From 1d5ae1026e831016fc29fd927877c86af904481f Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 23 Oct 2019 17:51:42 +0000 Subject: Vendor import of stripped llvm trunk r375505, the last commit before the upstream Subversion repository was made read-only, and the LLVM project migrated to GitHub: https://llvm.org/svn/llvm-project/llvm/trunk@375505 --- include/llvm-c/Core.h | 22 +- include/llvm-c/DebugInfo.h | 47 +- include/llvm-c/Remarks.h | 17 +- include/llvm-c/Transforms/IPO.h | 18 + include/llvm-c/Transforms/Scalar.h | 6 + include/llvm-c/lto.h | 94 +- include/llvm/ADT/APFloat.h | 5 + include/llvm/ADT/APInt.h | 9 + include/llvm/ADT/Any.h | 4 +- include/llvm/ADT/ArrayRef.h | 6 + include/llvm/ADT/DenseMap.h | 57 +- include/llvm/ADT/DenseMapInfo.h | 13 +- include/llvm/ADT/DirectedGraph.h | 270 + include/llvm/ADT/Hashing.h | 1 - include/llvm/ADT/IntervalMap.h | 4 +- include/llvm/ADT/PointerIntPair.h | 11 +- include/llvm/ADT/PointerUnion.h | 30 +- include/llvm/ADT/STLExtras.h | 168 +- include/llvm/ADT/SmallBitVector.h | 2 +- include/llvm/ADT/Statistic.h | 102 +- include/llvm/ADT/StringExtras.h | 2 +- include/llvm/ADT/StringMap.h | 59 +- include/llvm/ADT/StringRef.h | 18 +- include/llvm/ADT/StringSet.h | 8 +- include/llvm/ADT/TinyPtrVector.h | 38 +- include/llvm/ADT/VariadicFunction.h | 330 -- include/llvm/ADT/iterator_range.h | 1 + include/llvm/Analysis/AliasAnalysis.h | 2 +- include/llvm/Analysis/AliasSetTracker.h | 5 +- include/llvm/Analysis/AssumptionCache.h | 4 +- include/llvm/Analysis/CFG.h | 2 + include/llvm/Analysis/CFLAndersAliasAnalysis.h | 5 +- include/llvm/Analysis/CFLSteensAliasAnalysis.h | 5 +- include/llvm/Analysis/CGSCCPassManager.h | 31 +- include/llvm/Analysis/CaptureTracking.h | 6 + include/llvm/Analysis/DDG.h | 430 ++ include/llvm/Analysis/DOTGraphTraitsPass.h | 4 +- include/llvm/Analysis/DependenceGraphBuilder.h | 119 + include/llvm/Analysis/DivergenceAnalysis.h | 16 +- include/llvm/Analysis/GlobalsModRef.h | 12 +- include/llvm/Analysis/InstructionSimplify.h | 36 +- include/llvm/Analysis/LazyCallGraph.h | 10 +- include/llvm/Analysis/LegacyDivergenceAnalysis.h | 16 +- include/llvm/Analysis/Loads.h | 22 +- include/llvm/Analysis/LoopAnalysisManager.h | 10 +- include/llvm/Analysis/LoopCacheAnalysis.h | 281 + include/llvm/Analysis/LoopInfo.h | 37 +- include/llvm/Analysis/LoopInfoImpl.h | 8 +- include/llvm/Analysis/MemoryBuiltins.h | 26 +- include/llvm/Analysis/MemoryDependenceAnalysis.h | 14 +- include/llvm/Analysis/MemorySSA.h | 4 +- include/llvm/Analysis/MemorySSAUpdater.h | 3 +- include/llvm/Analysis/MustExecute.h | 285 +- include/llvm/Analysis/Passes.h | 7 + include/llvm/Analysis/ProfileSummaryInfo.h | 23 + include/llvm/Analysis/RegionInfoImpl.h | 2 +- include/llvm/Analysis/ScalarEvolution.h | 6 +- include/llvm/Analysis/ScalarEvolutionExpander.h | 22 +- include/llvm/Analysis/TargetLibraryInfo.h | 17 +- include/llvm/Analysis/TargetTransformInfo.h | 180 +- include/llvm/Analysis/TargetTransformInfoImpl.h | 55 +- include/llvm/Analysis/TypeMetadataUtils.h | 2 + include/llvm/Analysis/Utils/Local.h | 22 +- include/llvm/Analysis/ValueTracking.h | 67 +- include/llvm/Analysis/VectorUtils.h | 144 +- include/llvm/BinaryFormat/Dwarf.def | 198 +- include/llvm/BinaryFormat/Dwarf.h | 125 +- include/llvm/BinaryFormat/ELF.h | 66 + include/llvm/BinaryFormat/ELFRelocs/AArch64.def | 7 +- include/llvm/BinaryFormat/MachO.h | 5 + include/llvm/BinaryFormat/Magic.h | 1 + include/llvm/BinaryFormat/Minidump.h | 68 + include/llvm/BinaryFormat/MinidumpConstants.def | 41 +- include/llvm/BinaryFormat/Wasm.h | 14 + include/llvm/BinaryFormat/XCOFF.h | 116 +- include/llvm/Bitcode/BitcodeAnalyzer.h | 1 + include/llvm/Bitcode/LLVMBitCodes.h | 2 +- include/llvm/Bitstream/BitCodes.h | 5 + include/llvm/Bitstream/BitstreamReader.h | 1 + include/llvm/CodeGen/AccelTable.h | 2 - include/llvm/CodeGen/AsmPrinter.h | 21 +- include/llvm/CodeGen/BasicTTIImpl.h | 73 +- include/llvm/CodeGen/CallingConvLower.h | 18 +- include/llvm/CodeGen/DFAPacketizer.h | 44 +- include/llvm/CodeGen/DIE.h | 12 + include/llvm/CodeGen/FastISel.h | 4 +- include/llvm/CodeGen/FunctionLoweringInfo.h | 2 +- include/llvm/CodeGen/GlobalISel/CallLowering.h | 127 +- include/llvm/CodeGen/GlobalISel/CombinerHelper.h | 127 +- include/llvm/CodeGen/GlobalISel/CombinerInfo.h | 15 +- .../CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h | 11 + include/llvm/CodeGen/GlobalISel/GISelKnownBits.h | 111 + include/llvm/CodeGen/GlobalISel/IRTranslator.h | 12 +- .../llvm/CodeGen/GlobalISel/InstructionSelector.h | 34 +- .../CodeGen/GlobalISel/InstructionSelectorImpl.h | 66 +- .../GlobalISel/LegalizationArtifactCombiner.h | 92 +- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h | 20 + include/llvm/CodeGen/GlobalISel/LegalizerInfo.h | 61 +- include/llvm/CodeGen/GlobalISel/MIPatternMatch.h | 20 +- include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h | 93 +- include/llvm/CodeGen/GlobalISel/Utils.h | 22 +- include/llvm/CodeGen/ISDOpcodes.h | 41 +- include/llvm/CodeGen/LiveInterval.h | 6 +- include/llvm/CodeGen/LiveIntervals.h | 21 +- include/llvm/CodeGen/LiveRangeCalc.h | 295 + include/llvm/CodeGen/LiveRegUnits.h | 4 +- include/llvm/CodeGen/MIRYamlMapping.h | 1 + include/llvm/CodeGen/MachineBasicBlock.h | 36 +- include/llvm/CodeGen/MachineCombinerPattern.h | 21 + include/llvm/CodeGen/MachineDominators.h | 63 +- include/llvm/CodeGen/MachineFrameInfo.h | 62 +- include/llvm/CodeGen/MachineFunction.h | 61 +- include/llvm/CodeGen/MachineInstr.h | 99 +- include/llvm/CodeGen/MachineInstrBuilder.h | 62 +- include/llvm/CodeGen/MachineLoopUtils.h | 41 + include/llvm/CodeGen/MachineMemOperand.h | 7 - include/llvm/CodeGen/MachineModuleInfo.h | 52 +- include/llvm/CodeGen/MachineOperand.h | 49 +- include/llvm/CodeGen/MachinePipeliner.h | 80 +- include/llvm/CodeGen/MachinePostDominators.h | 46 +- include/llvm/CodeGen/MachineRegionInfo.h | 2 +- include/llvm/CodeGen/MachineRegisterInfo.h | 70 +- include/llvm/CodeGen/MachineScheduler.h | 1 + include/llvm/CodeGen/ModuloSchedule.h | 367 ++ include/llvm/CodeGen/PBQP/Math.h | 12 +- include/llvm/CodeGen/Passes.h | 4 + include/llvm/CodeGen/Register.h | 118 +- include/llvm/CodeGen/RegisterClassInfo.h | 2 +- include/llvm/CodeGen/RegisterPressure.h | 9 +- include/llvm/CodeGen/RegisterScavenging.h | 24 +- include/llvm/CodeGen/ScheduleDAGInstrs.h | 12 +- include/llvm/CodeGen/SelectionDAG.h | 95 +- include/llvm/CodeGen/SelectionDAGISel.h | 36 +- include/llvm/CodeGen/SelectionDAGNodes.h | 105 +- include/llvm/CodeGen/StackProtector.h | 6 + include/llvm/CodeGen/SwitchLoweringUtils.h | 5 +- include/llvm/CodeGen/TargetCallingConv.h | 23 +- include/llvm/CodeGen/TargetFrameLowering.h | 30 +- include/llvm/CodeGen/TargetInstrInfo.h | 102 +- include/llvm/CodeGen/TargetLowering.h | 399 +- .../llvm/CodeGen/TargetLoweringObjectFileImpl.h | 34 +- include/llvm/CodeGen/TargetPassConfig.h | 2 +- include/llvm/CodeGen/TargetRegisterInfo.h | 94 +- include/llvm/CodeGen/TargetSubtargetInfo.h | 10 +- include/llvm/CodeGen/ValueTypes.h | 4 +- include/llvm/CodeGen/ValueTypes.td | 247 +- include/llvm/CodeGen/VirtRegMap.h | 43 +- include/llvm/DebugInfo/CodeView/CVTypeVisitor.h | 4 - include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h | 13 +- .../llvm/DebugInfo/CodeView/CodeViewRegisters.def | 128 + include/llvm/DebugInfo/CodeView/EnumTables.h | 11 + .../llvm/DebugInfo/CodeView/SymbolDeserializer.h | 2 +- include/llvm/DebugInfo/CodeView/SymbolRecord.h | 304 +- include/llvm/DebugInfo/CodeView/TypeDeserializer.h | 2 +- .../llvm/DebugInfo/CodeView/TypeRecordMapping.h | 1 + .../CodeView/TypeVisitorCallbackPipeline.h | 5 - include/llvm/DebugInfo/DIContext.h | 14 +- .../DebugInfo/DWARF/DWARFAbbreviationDeclaration.h | 4 +- .../llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h | 68 +- include/llvm/DebugInfo/DWARF/DWARFAttribute.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFContext.h | 8 +- include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h | 13 +- include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h | 6 +- include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h | 6 +- include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h | 4 +- include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h | 8 +- include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h | 10 +- include/llvm/DebugInfo/DWARF/DWARFDebugLine.h | 27 +- include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h | 35 +- include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h | 4 +- include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h | 7 +- include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFDie.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFExpression.h | 14 +- include/llvm/DebugInfo/DWARF/DWARFFormValue.h | 10 +- include/llvm/DebugInfo/DWARF/DWARFListTable.h | 77 +- include/llvm/DebugInfo/DWARF/DWARFObject.h | 30 +- include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFUnit.h | 51 +- include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFVerifier.h | 4 +- include/llvm/DebugInfo/GSYM/FileEntry.h | 7 +- include/llvm/DebugInfo/GSYM/FileWriter.h | 124 + include/llvm/DebugInfo/GSYM/FunctionInfo.h | 154 +- include/llvm/DebugInfo/GSYM/GsymCreator.h | 229 + include/llvm/DebugInfo/GSYM/GsymReader.h | 228 + include/llvm/DebugInfo/GSYM/Header.h | 129 + include/llvm/DebugInfo/GSYM/InlineInfo.h | 63 +- include/llvm/DebugInfo/GSYM/LineEntry.h | 7 +- include/llvm/DebugInfo/GSYM/LineTable.h | 198 + include/llvm/DebugInfo/GSYM/Range.h | 33 +- include/llvm/DebugInfo/GSYM/StringTable.h | 7 +- include/llvm/DebugInfo/PDB/GenericError.h | 2 +- include/llvm/DebugInfo/PDB/Native/SymbolCache.h | 2 +- include/llvm/DebugInfo/PDB/PDBSymbol.h | 2 +- include/llvm/DebugInfo/Symbolize/Symbolize.h | 1 + include/llvm/Demangle/Demangle.h | 9 +- include/llvm/Demangle/DemangleConfig.h | 7 - include/llvm/Demangle/ItaniumDemangle.h | 419 +- include/llvm/Demangle/MicrosoftDemangle.h | 1 + include/llvm/Demangle/MicrosoftDemangleNodes.h | 7 +- .../llvm/ExecutionEngine/JITLink/EHFrameSupport.h | 39 +- include/llvm/ExecutionEngine/JITLink/JITLink.h | 1244 ++-- .../ExecutionEngine/JITLink/JITLinkMemoryManager.h | 17 +- include/llvm/ExecutionEngine/JITLink/MachO_arm64.h | 60 + .../llvm/ExecutionEngine/JITLink/MachO_x86_64.h | 1 + include/llvm/ExecutionEngine/JITSymbol.h | 5 +- .../ExecutionEngine/Orc/CompileOnDemandLayer.h | 10 +- include/llvm/ExecutionEngine/Orc/Core.h | 137 +- include/llvm/ExecutionEngine/Orc/ExecutionUtils.h | 46 +- .../llvm/ExecutionEngine/Orc/IRTransformLayer.h | 3 + include/llvm/ExecutionEngine/Orc/LLJIT.h | 4 +- include/llvm/ExecutionEngine/Orc/LambdaResolver.h | 5 +- .../llvm/ExecutionEngine/Orc/LazyEmittingLayer.h | 40 +- include/llvm/ExecutionEngine/Orc/LazyReexports.h | 13 +- include/llvm/ExecutionEngine/Orc/Legacy.h | 2 +- .../llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h | 23 +- .../ExecutionEngine/Orc/OrcRemoteTargetClient.h | 4 +- .../llvm/ExecutionEngine/Orc/RPCSerialization.h | 12 +- include/llvm/ExecutionEngine/Orc/RPCUtils.h | 65 +- .../ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h | 6 +- .../llvm/ExecutionEngine/Orc/RemoteObjectLayer.h | 21 +- .../llvm/ExecutionEngine/Orc/SpeculateAnalyses.h | 84 + include/llvm/ExecutionEngine/Orc/Speculation.h | 207 + .../llvm/ExecutionEngine/Orc/ThreadSafeModule.h | 53 +- include/llvm/ExecutionEngine/RuntimeDyld.h | 23 +- include/llvm/IR/Attributes.h | 49 +- include/llvm/IR/AutoUpgrade.h | 10 +- include/llvm/IR/BasicBlock.h | 5 + include/llvm/IR/CallSite.h | 9 + include/llvm/IR/CallingConv.h | 13 + include/llvm/IR/Constant.h | 6 + include/llvm/IR/ConstantRange.h | 10 +- include/llvm/IR/DataLayout.h | 125 +- include/llvm/IR/DebugInfoFlags.def | 6 +- include/llvm/IR/DebugInfoMetadata.h | 4 +- include/llvm/IR/DerivedTypes.h | 75 +- include/llvm/IR/DiagnosticInfo.h | 25 +- include/llvm/IR/FixedMetadataKinds.def | 43 + include/llvm/IR/Function.h | 15 +- include/llvm/IR/GlobalAlias.h | 4 - include/llvm/IR/GlobalIFunc.h | 4 - include/llvm/IR/GlobalIndirectSymbol.h | 8 +- include/llvm/IR/GlobalObject.h | 26 +- include/llvm/IR/GlobalVariable.h | 1 + include/llvm/IR/IRBuilder.h | 90 +- include/llvm/IR/InlineAsm.h | 1 + include/llvm/IR/InstrTypes.h | 12 +- include/llvm/IR/Instruction.h | 10 + include/llvm/IR/Instructions.h | 99 +- include/llvm/IR/IntrinsicInst.h | 23 +- include/llvm/IR/Intrinsics.h | 10 +- include/llvm/IR/Intrinsics.td | 295 +- include/llvm/IR/IntrinsicsAArch64.td | 125 +- include/llvm/IR/IntrinsicsAMDGPU.td | 121 +- include/llvm/IR/IntrinsicsARM.td | 9 + include/llvm/IR/IntrinsicsBPF.td | 3 + include/llvm/IR/IntrinsicsMips.td | 16 +- include/llvm/IR/IntrinsicsNVVM.td | 125 +- include/llvm/IR/IntrinsicsWebAssembly.td | 58 + include/llvm/IR/IntrinsicsX86.td | 12 +- include/llvm/IR/LLVMContext.h | 31 +- include/llvm/IR/MDBuilder.h | 5 + include/llvm/IR/Metadata.h | 4 +- include/llvm/IR/Module.h | 1 + include/llvm/IR/ModuleSummaryIndex.h | 18 +- include/llvm/IR/ModuleSummaryIndexYAML.h | 2 +- include/llvm/IR/Operator.h | 21 +- include/llvm/IR/PassManager.h | 5 +- include/llvm/IR/PassManagerInternal.h | 2 +- include/llvm/IR/PatternMatch.h | 155 +- include/llvm/IR/RemarkStreamer.h | 28 +- include/llvm/IR/Type.h | 15 +- include/llvm/IR/User.h | 2 +- include/llvm/IR/Value.h | 58 +- include/llvm/IR/ValueMap.h | 15 +- include/llvm/InitializePasses.h | 10 +- include/llvm/LTO/Config.h | 2 +- include/llvm/LTO/LTO.h | 10 +- include/llvm/LTO/legacy/LTOCodeGenerator.h | 2 +- include/llvm/LinkAllPasses.h | 2 + include/llvm/MC/MCAsmInfo.h | 18 + include/llvm/MC/MCAsmInfoXCOFF.h | 5 + include/llvm/MC/MCAsmMacro.h | 11 +- include/llvm/MC/MCContext.h | 23 +- include/llvm/MC/MCDirectives.h | 1 + include/llvm/MC/MCDwarf.h | 3 +- include/llvm/MC/MCExpr.h | 8 +- include/llvm/MC/MCFixup.h | 119 +- include/llvm/MC/MCFragment.h | 16 +- include/llvm/MC/MCInstPrinter.h | 2 - include/llvm/MC/MCInstrAnalysis.h | 6 + include/llvm/MC/MCInstrDesc.h | 23 +- include/llvm/MC/MCLinkerOptimizationHint.h | 2 + include/llvm/MC/MCRegister.h | 110 + include/llvm/MC/MCRegisterInfo.h | 98 +- include/llvm/MC/MCSection.h | 7 +- include/llvm/MC/MCSectionXCOFF.h | 22 +- include/llvm/MC/MCStreamer.h | 41 +- include/llvm/MC/MCSubtargetInfo.h | 46 + include/llvm/MC/MCSymbolWasm.h | 7 + include/llvm/MC/MCSymbolXCOFF.h | 32 + include/llvm/MC/MCWasmObjectWriter.h | 4 +- include/llvm/MC/MCXCOFFStreamer.h | 2 + include/llvm/MC/StringTableBuilder.h | 2 +- include/llvm/MC/SubtargetFeature.h | 139 +- include/llvm/MCA/CodeEmitter.h | 72 + include/llvm/MCA/Context.h | 5 +- include/llvm/MCA/HardwareUnits/LSUnit.h | 18 +- include/llvm/MCA/HardwareUnits/RegisterFile.h | 2 +- include/llvm/MCA/HardwareUnits/ResourceManager.h | 51 +- include/llvm/MCA/HardwareUnits/RetireControlUnit.h | 33 +- include/llvm/MCA/HardwareUnits/Scheduler.h | 13 +- include/llvm/MCA/Instruction.h | 51 +- include/llvm/MCA/SourceMgr.h | 5 +- include/llvm/MCA/Stages/RetireStage.h | 6 +- include/llvm/Object/Archive.h | 7 +- include/llvm/Object/Binary.h | 16 +- include/llvm/Object/COFF.h | 36 +- include/llvm/Object/ELF.h | 112 +- include/llvm/Object/ELFObjectFile.h | 31 +- include/llvm/Object/ELFTypes.h | 6 +- include/llvm/Object/MachO.h | 1 + include/llvm/Object/MachOUniversal.h | 14 +- include/llvm/Object/Minidump.h | 77 +- include/llvm/Object/ObjectFile.h | 21 +- include/llvm/Object/StackMapParser.h | 4 +- include/llvm/Object/TapiFile.h | 60 + include/llvm/Object/TapiUniversal.h | 109 + include/llvm/Object/WindowsResource.h | 55 +- include/llvm/Object/XCOFFObjectFile.h | 132 +- include/llvm/ObjectYAML/DWARFYAML.h | 2 +- include/llvm/ObjectYAML/ELFYAML.h | 116 +- include/llvm/ObjectYAML/MachOYAML.h | 3 + include/llvm/ObjectYAML/MinidumpYAML.h | 64 +- include/llvm/ObjectYAML/WasmYAML.h | 2 +- include/llvm/ObjectYAML/yaml2obj.h | 67 + include/llvm/Pass.h | 5 + include/llvm/Passes/PassBuilder.h | 7 +- .../llvm/ProfileData/Coverage/CoverageMapping.h | 16 +- .../ProfileData/Coverage/CoverageMappingWriter.h | 3 +- include/llvm/ProfileData/InstrProf.h | 18 +- include/llvm/ProfileData/InstrProfReader.h | 12 +- include/llvm/ProfileData/SampleProf.h | 178 +- include/llvm/ProfileData/SampleProfReader.h | 272 +- include/llvm/ProfileData/SampleProfWriter.h | 118 +- include/llvm/Remarks/BitstreamRemarkContainer.h | 106 + include/llvm/Remarks/BitstreamRemarkParser.h | 116 + include/llvm/Remarks/BitstreamRemarkSerializer.h | 196 + include/llvm/Remarks/Remark.h | 36 +- include/llvm/Remarks/RemarkFormat.h | 4 +- include/llvm/Remarks/RemarkParser.h | 38 +- include/llvm/Remarks/RemarkSerializer.h | 70 +- include/llvm/Remarks/RemarkStringTable.h | 24 +- include/llvm/Remarks/YAMLRemarkSerializer.h | 108 + include/llvm/Support/AArch64TargetParser.def | 72 +- include/llvm/Support/AArch64TargetParser.h | 3 +- include/llvm/Support/ARMTargetParser.def | 2 + include/llvm/Support/ARMTargetParser.h | 20 +- include/llvm/Support/AlignOf.h | 134 +- include/llvm/Support/Alignment.h | 403 ++ include/llvm/Support/Allocator.h | 22 +- include/llvm/Support/Automaton.h | 253 + include/llvm/Support/BinaryStreamArray.h | 2 +- include/llvm/Support/BinaryStreamReader.h | 2 +- include/llvm/Support/CRC.h | 45 +- include/llvm/Support/CommandLine.h | 3 + include/llvm/Support/Compiler.h | 81 +- include/llvm/Support/DataExtractor.h | 196 +- include/llvm/Support/Endian.h | 10 +- include/llvm/Support/Error.h | 42 +- include/llvm/Support/FileCheck.h | 604 +- include/llvm/Support/FileCollector.h | 79 + include/llvm/Support/FileSystem.h | 30 +- include/llvm/Support/FileUtilities.h | 38 + include/llvm/Support/Format.h | 5 +- include/llvm/Support/GenericDomTree.h | 6 +- include/llvm/Support/GenericDomTreeConstruction.h | 8 +- include/llvm/Support/GlobPattern.h | 2 +- include/llvm/Support/Host.h | 28 - include/llvm/Support/JamCRC.h | 48 - include/llvm/Support/MachineValueType.h | 419 +- include/llvm/Support/MathExtras.h | 187 +- include/llvm/Support/Mutex.h | 105 +- include/llvm/Support/MutexGuard.h | 40 - include/llvm/Support/OnDiskHashTable.h | 3 +- include/llvm/Support/Parallel.h | 27 - include/llvm/Support/RWMutex.h | 321 +- include/llvm/Support/Regex.h | 18 +- include/llvm/Support/Registry.h | 2 +- include/llvm/Support/SHA1.h | 2 +- include/llvm/Support/ScalableSize.h | 43 - include/llvm/Support/Signals.h | 11 + include/llvm/Support/SwapByteOrder.h | 38 +- include/llvm/Support/TargetOpcodes.def | 26 +- include/llvm/Support/TargetRegistry.h | 4 +- include/llvm/Support/TimeProfiler.h | 2 +- include/llvm/Support/TrailingObjects.h | 18 +- include/llvm/Support/TypeSize.h | 201 + include/llvm/Support/UnicodeCharRanges.h | 3 - include/llvm/Support/UniqueLock.h | 68 - include/llvm/Support/VirtualFileSystem.h | 16 +- include/llvm/Support/Win64EH.h | 4 +- include/llvm/Support/X86TargetParser.def | 4 +- include/llvm/Support/YAMLTraits.h | 11 +- include/llvm/Support/circular_raw_ostream.h | 4 + include/llvm/Support/raw_ostream.h | 27 +- include/llvm/Support/type_traits.h | 18 - include/llvm/TableGen/Automaton.td | 95 + include/llvm/TableGen/Error.h | 1 + include/llvm/TableGen/Record.h | 14 +- include/llvm/Target/GenericOpcodes.td | 87 +- include/llvm/Target/GlobalISel/Combine.td | 103 + .../llvm/Target/GlobalISel/SelectionDAGCompat.td | 25 + include/llvm/Target/Target.td | 33 +- include/llvm/Target/TargetCallingConv.td | 6 + include/llvm/Target/TargetItinerary.td | 11 + include/llvm/Target/TargetLoweringObjectFile.h | 3 +- include/llvm/Target/TargetMachine.h | 28 +- include/llvm/Target/TargetSchedule.td | 8 +- include/llvm/Target/TargetSelectionDAG.td | 146 +- include/llvm/TextAPI/MachO/Architecture.h | 4 + include/llvm/TextAPI/MachO/ArchitectureSet.h | 4 + include/llvm/TextAPI/MachO/InterfaceFile.h | 240 +- include/llvm/TextAPI/MachO/Platform.h | 45 + include/llvm/TextAPI/MachO/Symbol.h | 35 +- include/llvm/TextAPI/MachO/Target.h | 68 + include/llvm/TextAPI/MachO/TextAPIReader.h | 5 +- include/llvm/Transforms/IPO/Attributor.h | 1731 +++++- include/llvm/Transforms/IPO/GlobalDCE.h | 14 + include/llvm/Transforms/IPO/HotColdSplitting.h | 39 + include/llvm/Transforms/IPO/LowerTypeTests.h | 2 + include/llvm/Transforms/IPO/WholeProgramDevirt.h | 26 + include/llvm/Transforms/Instrumentation.h | 4 - .../Transforms/Instrumentation/InstrProfiling.h | 5 +- .../Transforms/Instrumentation/MemorySanitizer.h | 12 +- .../Transforms/Instrumentation/SanitizerCoverage.h | 47 + .../Transforms/Instrumentation/ThreadSanitizer.h | 2 + include/llvm/Transforms/Scalar.h | 9 +- include/llvm/Transforms/Scalar/CallSiteSplitting.h | 5 - include/llvm/Transforms/Scalar/ConstantHoisting.h | 10 +- include/llvm/Transforms/Scalar/Float2Int.h | 6 +- include/llvm/Transforms/Scalar/GVN.h | 7 +- include/llvm/Transforms/Scalar/GVNExpression.h | 9 +- include/llvm/Transforms/Scalar/LoopPassManager.h | 24 +- include/llvm/Transforms/Scalar/LoopUnrollPass.h | 14 + .../Transforms/Scalar/LowerConstantIntrinsics.h | 41 + .../llvm/Transforms/Scalar/MergedLoadStoreMotion.h | 18 +- include/llvm/Transforms/Scalar/Reassociate.h | 4 +- include/llvm/Transforms/Scalar/SCCP.h | 3 +- include/llvm/Transforms/Utils/BasicBlockUtils.h | 11 +- include/llvm/Transforms/Utils/BuildLibCalls.h | 27 +- include/llvm/Transforms/Utils/BypassSlowDivision.h | 13 +- include/llvm/Transforms/Utils/CodeExtractor.h | 57 +- include/llvm/Transforms/Utils/Local.h | 16 +- include/llvm/Transforms/Utils/LoopUtils.h | 5 + include/llvm/Transforms/Utils/MisExpect.h | 43 + include/llvm/Transforms/Utils/PredicateInfo.h | 10 +- include/llvm/Transforms/Utils/SimplifyLibCalls.h | 10 + include/llvm/Transforms/Utils/UnrollLoop.h | 8 +- include/llvm/Transforms/Utils/ValueMapper.h | 9 +- .../Vectorize/LoopVectorizationLegality.h | 48 +- include/llvm/Transforms/Vectorize/LoopVectorize.h | 8 + include/llvm/Transforms/Vectorize/SLPVectorizer.h | 9 +- include/llvm/XRay/FDRRecordProducer.h | 4 +- include/llvm/XRay/FDRRecords.h | 6 +- include/llvm/XRay/FileHeaderReader.h | 2 +- include/llvm/module.modulemap | 2 + lib/Analysis/AliasAnalysis.cpp | 4 +- lib/Analysis/AliasSetTracker.cpp | 12 +- lib/Analysis/Analysis.cpp | 1 + lib/Analysis/AssumptionCache.cpp | 12 +- lib/Analysis/BasicAliasAnalysis.cpp | 42 +- lib/Analysis/BranchProbabilityInfo.cpp | 19 +- lib/Analysis/CFG.cpp | 11 +- lib/Analysis/CFGPrinter.cpp | 2 +- lib/Analysis/CFLAndersAliasAnalysis.cpp | 19 +- lib/Analysis/CFLSteensAliasAnalysis.cpp | 20 +- lib/Analysis/CallGraph.cpp | 4 +- lib/Analysis/CaptureTracking.cpp | 46 +- lib/Analysis/ConstantFolding.cpp | 405 +- lib/Analysis/DDG.cpp | 203 + lib/Analysis/DependenceAnalysis.cpp | 8 +- lib/Analysis/DependenceGraphBuilder.cpp | 228 + lib/Analysis/DivergenceAnalysis.cpp | 10 + lib/Analysis/GlobalsModRef.cpp | 37 +- lib/Analysis/IVDescriptors.cpp | 3 +- lib/Analysis/IndirectCallPromotionAnalysis.cpp | 2 +- lib/Analysis/InlineCost.cpp | 23 +- lib/Analysis/InstructionSimplify.cpp | 320 +- lib/Analysis/LazyBranchProbabilityInfo.cpp | 5 +- lib/Analysis/LazyCallGraph.cpp | 13 +- lib/Analysis/LazyValueInfo.cpp | 37 +- lib/Analysis/LegacyDivergenceAnalysis.cpp | 36 +- lib/Analysis/Lint.cpp | 2 +- lib/Analysis/Loads.cpp | 238 +- lib/Analysis/LoopAccessAnalysis.cpp | 45 +- lib/Analysis/LoopAnalysisManager.cpp | 2 +- lib/Analysis/LoopCacheAnalysis.cpp | 625 +++ lib/Analysis/LoopInfo.cpp | 39 + lib/Analysis/LoopUnrollAnalyzer.cpp | 2 +- lib/Analysis/MemDerefPrinter.cpp | 4 +- lib/Analysis/MemoryBuiltins.cpp | 51 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 21 +- lib/Analysis/MemorySSA.cpp | 95 +- lib/Analysis/MemorySSAUpdater.cpp | 323 +- lib/Analysis/ModuleSummaryAnalysis.cpp | 16 +- lib/Analysis/MustExecute.cpp | 118 + lib/Analysis/OptimizationRemarkEmitter.cpp | 4 +- lib/Analysis/OrderedInstructions.cpp | 2 +- lib/Analysis/ProfileSummaryInfo.cpp | 67 + lib/Analysis/ScalarEvolution.cpp | 89 +- lib/Analysis/ScalarEvolutionExpander.cpp | 19 +- lib/Analysis/StackSafetyAnalysis.cpp | 4 +- lib/Analysis/SyncDependenceAnalysis.cpp | 61 +- lib/Analysis/TargetLibraryInfo.cpp | 44 +- lib/Analysis/TargetTransformInfo.cpp | 64 +- lib/Analysis/TypeMetadataUtils.cpp | 32 + lib/Analysis/VFABIDemangling.cpp | 418 ++ lib/Analysis/ValueTracking.cpp | 658 ++- lib/Analysis/VectorUtils.cpp | 20 +- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 81 +- lib/AsmParser/LLParser.h | 4 +- lib/AsmParser/LLToken.h | 1 + lib/AsmParser/Parser.cpp | 8 +- lib/BinaryFormat/Dwarf.cpp | 22 +- lib/BinaryFormat/Magic.cpp | 5 + lib/Bitcode/Reader/BitcodeAnalyzer.cpp | 10 + lib/Bitcode/Reader/BitcodeReader.cpp | 84 +- lib/Bitcode/Reader/MetadataLoader.cpp | 6 +- lib/Bitcode/Writer/BitWriter.cpp | 2 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 9 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 16 +- lib/CodeGen/Analysis.cpp | 12 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 255 +- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 3 +- lib/CodeGen/AsmPrinter/ByteStreamer.h | 12 +- lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 116 +- lib/CodeGen/AsmPrinter/CodeViewDebug.h | 3 +- .../AsmPrinter/DbgEntityHistoryCalculator.cpp | 12 +- lib/CodeGen/AsmPrinter/DebugLocStream.h | 19 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 176 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 31 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 644 ++- lib/CodeGen/AsmPrinter/DwarfDebug.h | 22 +- lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 95 +- lib/CodeGen/AsmPrinter/DwarfExpression.h | 95 +- lib/CodeGen/AsmPrinter/DwarfFile.h | 19 +- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 47 +- lib/CodeGen/AsmPrinter/DwarfUnit.h | 14 +- lib/CodeGen/AsmPrinter/EHStreamer.cpp | 6 +- lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp | 2 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 4 +- lib/CodeGen/AsmPrinter/WinException.cpp | 3 +- lib/CodeGen/AtomicExpandPass.cpp | 12 +- lib/CodeGen/BranchFolding.cpp | 34 +- lib/CodeGen/BranchRelaxation.cpp | 22 +- lib/CodeGen/BreakFalseDeps.cpp | 23 +- lib/CodeGen/CalcSpillWeights.cpp | 22 +- lib/CodeGen/CallingConvLower.cpp | 42 +- lib/CodeGen/CodeGen.cpp | 5 +- lib/CodeGen/CodeGenPrepare.cpp | 122 +- lib/CodeGen/CriticalAntiDepBreaker.cpp | 9 +- lib/CodeGen/DFAPacketizer.cpp | 81 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 12 +- lib/CodeGen/DetectDeadLanes.cpp | 56 +- lib/CodeGen/EarlyIfConversion.cpp | 345 +- lib/CodeGen/ExecutionDomainFix.cpp | 1 + lib/CodeGen/ExpandMemCmp.cpp | 2 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 10 +- lib/CodeGen/GCMetadata.cpp | 2 +- lib/CodeGen/GCRootLowering.cpp | 4 +- lib/CodeGen/GlobalISel/CSEInfo.cpp | 7 +- lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 11 + lib/CodeGen/GlobalISel/CallLowering.cpp | 288 +- lib/CodeGen/GlobalISel/Combiner.cpp | 14 +- lib/CodeGen/GlobalISel/CombinerHelper.cpp | 919 ++- lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 383 ++ lib/CodeGen/GlobalISel/IRTranslator.cpp | 392 +- lib/CodeGen/GlobalISel/InstructionSelect.cpp | 38 +- lib/CodeGen/GlobalISel/InstructionSelector.cpp | 2 +- lib/CodeGen/GlobalISel/Legalizer.cpp | 35 +- lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 978 +++- lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 42 +- lib/CodeGen/GlobalISel/Localizer.cpp | 11 +- lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 93 +- lib/CodeGen/GlobalISel/RegBankSelect.cpp | 13 +- lib/CodeGen/GlobalISel/RegisterBank.cpp | 1 + lib/CodeGen/GlobalISel/RegisterBankInfo.cpp | 17 +- lib/CodeGen/GlobalISel/Utils.cpp | 98 +- lib/CodeGen/GlobalMerge.cpp | 8 +- lib/CodeGen/HardwareLoops.cpp | 2 +- lib/CodeGen/IfConversion.cpp | 200 +- lib/CodeGen/ImplicitNullChecks.cpp | 8 +- lib/CodeGen/InlineSpiller.cpp | 22 +- lib/CodeGen/InterleavedLoadCombinePass.cpp | 4 +- lib/CodeGen/LLVMTargetMachine.cpp | 34 +- lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp | 6 +- lib/CodeGen/LexicalScopes.cpp | 1 + lib/CodeGen/LiveDebugValues.cpp | 510 +- lib/CodeGen/LiveDebugVariables.cpp | 257 +- lib/CodeGen/LiveInterval.cpp | 7 +- lib/CodeGen/LiveIntervals.cpp | 59 +- lib/CodeGen/LivePhysRegs.cpp | 20 +- lib/CodeGen/LiveRangeCalc.cpp | 5 +- lib/CodeGen/LiveRangeCalc.h | 297 - lib/CodeGen/LiveRangeEdit.cpp | 14 +- lib/CodeGen/LiveRangeShrink.cpp | 4 +- lib/CodeGen/LiveRegMatrix.cpp | 2 +- lib/CodeGen/LiveRegUnits.cpp | 12 +- lib/CodeGen/LiveStacks.cpp | 7 +- lib/CodeGen/LiveVariables.cpp | 29 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 10 +- lib/CodeGen/LowerEmuTLS.cpp | 7 +- lib/CodeGen/MIRCanonicalizerPass.cpp | 359 +- lib/CodeGen/MIRNamerPass.cpp | 77 + lib/CodeGen/MIRParser/MILexer.cpp | 1 + lib/CodeGen/MIRParser/MILexer.h | 2 + lib/CodeGen/MIRParser/MIParser.cpp | 60 +- lib/CodeGen/MIRParser/MIRParser.cpp | 18 +- lib/CodeGen/MIRPrinter.cpp | 16 +- lib/CodeGen/MIRVRegNamerUtils.cpp | 348 ++ lib/CodeGen/MIRVRegNamerUtils.h | 91 + lib/CodeGen/MachineBasicBlock.cpp | 64 +- lib/CodeGen/MachineBlockPlacement.cpp | 28 +- lib/CodeGen/MachineCSE.cpp | 75 +- lib/CodeGen/MachineCombiner.cpp | 6 +- lib/CodeGen/MachineCopyPropagation.cpp | 78 +- lib/CodeGen/MachineDominators.cpp | 23 +- lib/CodeGen/MachineFrameInfo.cpp | 38 +- lib/CodeGen/MachineFunction.cpp | 58 +- lib/CodeGen/MachineFunctionPass.cpp | 6 +- lib/CodeGen/MachineInstr.cpp | 116 +- lib/CodeGen/MachineInstrBundle.cpp | 14 +- lib/CodeGen/MachineLICM.cpp | 61 +- lib/CodeGen/MachineLoopUtils.cpp | 132 + lib/CodeGen/MachineModuleInfo.cpp | 85 +- lib/CodeGen/MachineOperand.cpp | 70 +- lib/CodeGen/MachineOptimizationRemarkEmitter.cpp | 2 +- lib/CodeGen/MachineOutliner.cpp | 16 +- lib/CodeGen/MachinePipeliner.cpp | 1235 +--- lib/CodeGen/MachinePostDominators.cpp | 55 +- lib/CodeGen/MachineRegisterInfo.cpp | 12 +- lib/CodeGen/MachineSSAUpdater.cpp | 6 +- lib/CodeGen/MachineScheduler.cpp | 59 +- lib/CodeGen/MachineSink.cpp | 73 +- lib/CodeGen/MachineTraceMetrics.cpp | 24 +- lib/CodeGen/MachineVerifier.cpp | 163 +- lib/CodeGen/MacroFusion.cpp | 4 +- lib/CodeGen/ModuloSchedule.cpp | 2022 +++++++ lib/CodeGen/OptimizePHIs.cpp | 15 +- lib/CodeGen/PHIElimination.cpp | 43 +- lib/CodeGen/PatchableFunction.cpp | 2 +- lib/CodeGen/PeepholeOptimizer.cpp | 83 +- lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 +- lib/CodeGen/ProcessImplicitDefs.cpp | 8 +- lib/CodeGen/PrologEpilogInserter.cpp | 2 +- lib/CodeGen/PseudoSourceValue.cpp | 6 +- lib/CodeGen/ReachingDefAnalysis.cpp | 1 + lib/CodeGen/RegAllocBase.cpp | 4 +- lib/CodeGen/RegAllocFast.cpp | 117 +- lib/CodeGen/RegAllocGreedy.cpp | 16 +- lib/CodeGen/RegAllocPBQP.cpp | 12 +- lib/CodeGen/RegUsageInfoCollector.cpp | 10 +- lib/CodeGen/RegUsageInfoPropagate.cpp | 6 +- lib/CodeGen/RegisterCoalescer.cpp | 71 +- lib/CodeGen/RegisterPressure.cpp | 36 +- lib/CodeGen/RegisterScavenging.cpp | 62 +- lib/CodeGen/RenameIndependentSubregs.cpp | 4 +- lib/CodeGen/SafeStack.cpp | 2 +- lib/CodeGen/ScalarizeMaskedMemIntrin.cpp | 167 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 57 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1758 +++--- lib/CodeGen/SelectionDAG/FastISel.cpp | 67 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 7 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 77 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 222 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 430 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 510 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 56 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 61 +- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 46 +- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 50 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 139 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 18 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 18 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 3 +- lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 9 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 283 +- .../SelectionDAG/SelectionDAGAddressAnalysis.cpp | 1 + lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 495 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 2 +- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 9 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 35 +- lib/CodeGen/SelectionDAG/StatepointLowering.cpp | 34 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1406 ++++- lib/CodeGen/ShrinkWrap.cpp | 5 +- lib/CodeGen/SjLjEHPrepare.cpp | 5 +- lib/CodeGen/SplitKit.cpp | 6 +- lib/CodeGen/SplitKit.h | 2 +- lib/CodeGen/StackMaps.cpp | 8 +- lib/CodeGen/StackProtector.cpp | 67 +- lib/CodeGen/StackSlotColoring.cpp | 8 +- lib/CodeGen/SwiftErrorValueTracking.cpp | 3 +- lib/CodeGen/TailDuplicator.cpp | 22 +- lib/CodeGen/TargetFrameLoweringImpl.cpp | 19 +- lib/CodeGen/TargetInstrInfo.cpp | 82 +- lib/CodeGen/TargetLoweringBase.cpp | 95 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 107 +- lib/CodeGen/TargetPassConfig.cpp | 24 +- lib/CodeGen/TargetRegisterInfo.cpp | 60 +- lib/CodeGen/TargetSchedule.cpp | 2 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 90 +- lib/CodeGen/UnreachableBlockElim.cpp | 15 +- lib/CodeGen/ValueTypes.cpp | 150 +- lib/CodeGen/VirtRegMap.cpp | 71 +- lib/CodeGen/XRayInstrumentation.cpp | 2 +- lib/DebugInfo/CodeView/CVTypeVisitor.cpp | 15 - lib/DebugInfo/CodeView/CodeViewRecordIO.cpp | 8 +- lib/DebugInfo/CodeView/EnumTables.cpp | 166 + lib/DebugInfo/CodeView/SymbolDumper.cpp | 2 +- lib/DebugInfo/CodeView/SymbolRecordMapping.cpp | 2 +- lib/DebugInfo/CodeView/TypeRecordMapping.cpp | 238 +- .../DWARF/DWARFAbbreviationDeclaration.cpp | 8 +- lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp | 105 +- lib/DebugInfo/DWARF/DWARFCompileUnit.cpp | 10 +- lib/DebugInfo/DWARF/DWARFContext.cpp | 335 +- lib/DebugInfo/DWARF/DWARFDataExtractor.cpp | 13 +- lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 10 +- lib/DebugInfo/DWARF/DWARFDebugAddr.cpp | 28 +- lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp | 4 +- lib/DebugInfo/DWARF/DWARFDebugAranges.cpp | 12 +- lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 74 +- lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 8 +- lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 159 +- lib/DebugInfo/DWARF/DWARFDebugLoc.cpp | 257 +- lib/DebugInfo/DWARF/DWARFDebugMacro.cpp | 2 +- lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp | 6 +- lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp | 18 +- lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp | 30 +- lib/DebugInfo/DWARF/DWARFDie.cpp | 54 +- lib/DebugInfo/DWARF/DWARFExpression.cpp | 13 +- lib/DebugInfo/DWARF/DWARFFormValue.cpp | 9 +- lib/DebugInfo/DWARF/DWARFGdbIndex.cpp | 2 +- lib/DebugInfo/DWARF/DWARFListTable.cpp | 70 +- lib/DebugInfo/DWARF/DWARFTypeUnit.cpp | 14 +- lib/DebugInfo/DWARF/DWARFUnit.cpp | 231 +- lib/DebugInfo/DWARF/DWARFUnitIndex.cpp | 12 +- lib/DebugInfo/DWARF/DWARFVerifier.cpp | 120 +- lib/DebugInfo/GSYM/FileWriter.cpp | 78 + lib/DebugInfo/GSYM/FunctionInfo.cpp | 143 +- lib/DebugInfo/GSYM/GsymCreator.cpp | 275 + lib/DebugInfo/GSYM/GsymReader.cpp | 265 + lib/DebugInfo/GSYM/Header.cpp | 109 + lib/DebugInfo/GSYM/InlineInfo.cpp | 100 + lib/DebugInfo/GSYM/LineTable.cpp | 287 + lib/DebugInfo/GSYM/Range.cpp | 47 + lib/DebugInfo/MSF/MappedBlockStream.cpp | 6 +- lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp | 28 +- lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp | 2 +- lib/DebugInfo/PDB/DIA/DIASession.cpp | 46 +- lib/DebugInfo/PDB/GenericError.cpp | 4 +- .../PDB/Native/DbiModuleDescriptorBuilder.cpp | 4 +- lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp | 2 +- lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp | 4 +- lib/DebugInfo/PDB/Native/Hash.cpp | 5 +- .../PDB/Native/NativeEnumInjectedSources.cpp | 29 +- lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp | 24 +- lib/DebugInfo/PDB/Native/NativeSession.cpp | 10 +- lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp | 4 +- lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp | 6 +- lib/DebugInfo/PDB/Native/PDBFile.cpp | 18 +- lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp | 17 +- lib/DebugInfo/PDB/Native/TpiHashing.cpp | 6 +- lib/DebugInfo/PDB/Native/TpiStream.cpp | 2 +- lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolFunc.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp | 2 +- lib/DebugInfo/PDB/UDTLayout.cpp | 14 +- lib/DebugInfo/Symbolize/DIPrinter.cpp | 17 +- lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp | 32 +- lib/DebugInfo/Symbolize/SymbolizableObjectFile.h | 7 +- lib/DebugInfo/Symbolize/Symbolize.cpp | 52 +- lib/Demangle/ItaniumDemangle.cpp | 10 + lib/Demangle/MicrosoftDemangle.cpp | 32 +- lib/Demangle/MicrosoftDemangleNodes.cpp | 51 +- lib/ExecutionEngine/ExecutionEngine.cpp | 34 +- lib/ExecutionEngine/GDBRegistrationListener.cpp | 8 +- .../Interpreter/ExternalFunctions.cpp | 4 +- .../JITLink/BasicGOTAndStubsBuilder.h | 35 +- lib/ExecutionEngine/JITLink/EHFrameSupport.cpp | 216 +- lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h | 50 +- lib/ExecutionEngine/JITLink/JITLink.cpp | 158 +- lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp | 385 +- lib/ExecutionEngine/JITLink/JITLinkGeneric.h | 185 +- .../JITLink/JITLinkMemoryManager.cpp | 63 +- lib/ExecutionEngine/JITLink/MachO.cpp | 3 + .../JITLink/MachOAtomGraphBuilder.cpp | 411 -- .../JITLink/MachOAtomGraphBuilder.h | 138 - .../JITLink/MachOLinkGraphBuilder.cpp | 535 ++ .../JITLink/MachOLinkGraphBuilder.h | 269 + lib/ExecutionEngine/JITLink/MachO_arm64.cpp | 736 +++ lib/ExecutionEngine/JITLink/MachO_x86_64.cpp | 279 +- lib/ExecutionEngine/MCJIT/MCJIT.cpp | 38 +- .../OProfileJIT/OProfileJITEventListener.cpp | 2 +- .../OProfileJIT/OProfileWrapper.cpp | 4 +- lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp | 127 +- lib/ExecutionEngine/Orc/CompileUtils.cpp | 2 +- lib/ExecutionEngine/Orc/Core.cpp | 506 +- lib/ExecutionEngine/Orc/ExecutionUtils.cpp | 92 +- lib/ExecutionEngine/Orc/IRCompileLayer.cpp | 4 +- lib/ExecutionEngine/Orc/IRTransformLayer.cpp | 2 +- lib/ExecutionEngine/Orc/IndirectionUtils.cpp | 27 +- .../Orc/JITTargetMachineBuilder.cpp | 17 +- lib/ExecutionEngine/Orc/LLJIT.cpp | 38 +- lib/ExecutionEngine/Orc/Layer.cpp | 26 +- lib/ExecutionEngine/Orc/LazyReexports.cpp | 18 +- lib/ExecutionEngine/Orc/Legacy.cpp | 5 +- lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp | 258 +- lib/ExecutionEngine/Orc/OrcCBindingsStack.h | 11 +- .../Orc/RTDyldObjectLinkingLayer.cpp | 24 +- lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp | 307 + lib/ExecutionEngine/Orc/Speculation.cpp | 146 + lib/ExecutionEngine/Orc/ThreadSafeModule.cpp | 58 +- .../PerfJITEvents/PerfJITEventListener.cpp | 8 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 64 +- .../RuntimeDyld/RuntimeDyldCOFF.cpp | 8 +- .../RuntimeDyld/RuntimeDyldChecker.cpp | 2 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 54 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h | 2 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 17 +- .../RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h | 8 +- .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h | 5 +- .../RuntimeDyld/Targets/RuntimeDyldMachOI386.h | 5 +- lib/FuzzMutate/FuzzerCLI.cpp | 2 +- lib/IR/AsmWriter.cpp | 11 +- lib/IR/AttributeImpl.h | 6 +- lib/IR/Attributes.cpp | 84 +- lib/IR/AutoUpgrade.cpp | 157 +- lib/IR/BasicBlock.cpp | 7 + lib/IR/ConstantFold.cpp | 25 +- lib/IR/ConstantRange.cpp | 76 +- lib/IR/Constants.cpp | 56 +- lib/IR/ConstantsContext.h | 12 +- lib/IR/Core.cpp | 114 +- lib/IR/DIBuilder.cpp | 2 +- lib/IR/DataLayout.cpp | 163 +- lib/IR/DebugInfo.cpp | 22 +- lib/IR/DebugInfoMetadata.cpp | 34 +- lib/IR/DiagnosticInfo.cpp | 11 + lib/IR/Function.cpp | 103 +- lib/IR/Globals.cpp | 51 +- lib/IR/IRBuilder.cpp | 8 +- lib/IR/IRPrintingPasses.cpp | 18 +- lib/IR/InlineAsm.cpp | 10 + lib/IR/Instruction.cpp | 2 +- lib/IR/Instructions.cpp | 167 +- lib/IR/IntrinsicInst.cpp | 15 +- lib/IR/LLVMContext.cpp | 31 +- lib/IR/LLVMContextImpl.cpp | 2 +- lib/IR/LegacyPassManager.cpp | 20 +- lib/IR/MDBuilder.cpp | 12 + lib/IR/Metadata.cpp | 18 + lib/IR/Module.cpp | 2 +- lib/IR/RemarkStreamer.cpp | 72 +- lib/IR/SafepointIRVerifier.cpp | 4 +- lib/IR/Type.cpp | 27 +- lib/IR/Value.cpp | 111 +- lib/IR/Verifier.cpp | 167 +- lib/LTO/Caching.cpp | 4 +- lib/LTO/LTO.cpp | 63 +- lib/LTO/LTOBackend.cpp | 18 +- lib/LTO/LTOCodeGenerator.cpp | 13 +- lib/LTO/LTOModule.cpp | 3 +- lib/LTO/SummaryBasedOptimizations.cpp | 2 +- lib/LTO/ThinLTOCodeGenerator.cpp | 139 +- lib/Linker/IRMover.cpp | 112 +- lib/Linker/LinkModules.cpp | 3 +- lib/MC/ELFObjectWriter.cpp | 86 +- lib/MC/MCAsmBackend.cpp | 5 +- lib/MC/MCAsmInfoXCOFF.cpp | 17 + lib/MC/MCAsmMacro.cpp | 2 + lib/MC/MCAsmStreamer.cpp | 140 +- lib/MC/MCAssembler.cpp | 38 +- lib/MC/MCContext.cpp | 29 +- lib/MC/MCDwarf.cpp | 44 +- lib/MC/MCELFStreamer.cpp | 7 +- lib/MC/MCExpr.cpp | 46 +- lib/MC/MCInstPrinter.cpp | 31 +- lib/MC/MCInstrAnalysis.cpp | 6 + lib/MC/MCMachOStreamer.cpp | 1 + lib/MC/MCObjectFileInfo.cpp | 14 +- lib/MC/MCObjectStreamer.cpp | 4 +- lib/MC/MCParser/AsmParser.cpp | 142 +- lib/MC/MCParser/COFFAsmParser.cpp | 155 +- lib/MC/MCParser/DarwinAsmParser.cpp | 4 +- lib/MC/MCParser/WasmAsmParser.cpp | 1 + lib/MC/MCRegisterInfo.cpp | 48 +- lib/MC/MCSectionXCOFF.cpp | 50 +- lib/MC/MCStreamer.cpp | 84 +- lib/MC/MCSubtargetInfo.cpp | 25 + lib/MC/MCWasmObjectTargetWriter.cpp | 5 +- lib/MC/MCWasmStreamer.cpp | 2 +- lib/MC/MCWinCOFFStreamer.cpp | 18 +- lib/MC/MCXCOFFStreamer.cpp | 54 +- lib/MC/MachObjectWriter.cpp | 14 +- lib/MC/StringTableBuilder.cpp | 10 +- lib/MC/WasmObjectWriter.cpp | 77 +- lib/MC/WinCOFFObjectWriter.cpp | 10 +- lib/MC/XCOFFObjectWriter.cpp | 533 +- lib/MCA/CodeEmitter.cpp | 37 + lib/MCA/Context.cpp | 23 +- lib/MCA/HardwareUnits/LSUnit.cpp | 28 +- lib/MCA/HardwareUnits/RegisterFile.cpp | 16 +- lib/MCA/HardwareUnits/ResourceManager.cpp | 59 +- lib/MCA/HardwareUnits/RetireControlUnit.cpp | 65 +- lib/MCA/HardwareUnits/Scheduler.cpp | 12 +- lib/MCA/InstrBuilder.cpp | 44 +- lib/MCA/Instruction.cpp | 4 +- lib/MCA/Stages/DispatchStage.cpp | 19 +- lib/MCA/Stages/EntryStage.cpp | 2 +- lib/MCA/Stages/ExecuteStage.cpp | 22 +- lib/MCA/Stages/RetireStage.cpp | 8 +- lib/Object/Archive.cpp | 6 +- lib/Object/ArchiveWriter.cpp | 35 +- lib/Object/Binary.cpp | 3 + lib/Object/COFFObjectFile.cpp | 198 +- lib/Object/Decompressor.cpp | 15 +- lib/Object/ELF.cpp | 2 + lib/Object/ELFObjectFile.cpp | 38 +- lib/Object/MachOObjectFile.cpp | 48 +- lib/Object/MachOUniversal.cpp | 38 +- lib/Object/Minidump.cpp | 46 +- lib/Object/Object.cpp | 10 +- lib/Object/ObjectFile.cpp | 11 +- lib/Object/RelocationResolver.cpp | 67 +- lib/Object/SymbolicFile.cpp | 1 + lib/Object/TapiFile.cpp | 104 + lib/Object/TapiUniversal.cpp | 54 + lib/Object/WasmObjectFile.cpp | 13 +- lib/Object/WindowsResource.cpp | 346 +- lib/Object/XCOFFObjectFile.cpp | 240 +- lib/ObjectYAML/COFFEmitter.cpp | 622 ++ lib/ObjectYAML/CodeViewYAMLSymbols.cpp | 2 +- lib/ObjectYAML/ELFEmitter.cpp | 1152 ++++ lib/ObjectYAML/ELFYAML.cpp | 325 +- lib/ObjectYAML/MachOEmitter.cpp | 580 ++ lib/ObjectYAML/MachOYAML.cpp | 9 + lib/ObjectYAML/MinidumpEmitter.cpp | 247 + lib/ObjectYAML/MinidumpYAML.cpp | 331 +- lib/ObjectYAML/WasmEmitter.cpp | 633 +++ lib/ObjectYAML/WasmYAML.cpp | 4 +- lib/ObjectYAML/yaml2obj.cpp | 77 + lib/Option/ArgList.cpp | 8 +- lib/Passes/PassBuilder.cpp | 164 +- lib/Passes/PassRegistry.def | 14 +- lib/ProfileData/Coverage/CoverageMapping.cpp | 60 +- lib/ProfileData/Coverage/CoverageMappingReader.cpp | 20 +- lib/ProfileData/Coverage/CoverageMappingWriter.cpp | 10 + lib/ProfileData/GCOV.cpp | 12 +- lib/ProfileData/InstrProf.cpp | 18 +- lib/ProfileData/InstrProfReader.cpp | 44 +- lib/ProfileData/InstrProfWriter.cpp | 2 +- lib/ProfileData/ProfileSummaryBuilder.cpp | 4 +- lib/ProfileData/SampleProf.cpp | 56 +- lib/ProfileData/SampleProfReader.cpp | 447 +- lib/ProfileData/SampleProfWriter.cpp | 279 +- lib/Remarks/BitstreamRemarkParser.cpp | 597 ++ lib/Remarks/BitstreamRemarkParser.h | 83 + lib/Remarks/BitstreamRemarkSerializer.cpp | 386 ++ lib/Remarks/RemarkFormat.cpp | 4 +- lib/Remarks/RemarkParser.cpp | 72 +- lib/Remarks/RemarkSerializer.cpp | 54 + lib/Remarks/RemarkStringTable.cpp | 28 +- lib/Remarks/YAMLRemarkParser.cpp | 165 +- lib/Remarks/YAMLRemarkParser.h | 38 +- lib/Remarks/YAMLRemarkSerializer.cpp | 134 +- lib/Support/AArch64TargetParser.cpp | 4 +- lib/Support/ABIBreak.cpp | 24 + lib/Support/APInt.cpp | 52 + lib/Support/ARMTargetParser.cpp | 8 +- lib/Support/CRC.cpp | 113 +- lib/Support/CachePruning.cpp | 2 +- lib/Support/CodeGenCoverage.cpp | 4 +- lib/Support/CommandLine.cpp | 2 +- lib/Support/CrashRecoveryContext.cpp | 8 +- lib/Support/DataExtractor.cpp | 160 +- lib/Support/Error.cpp | 17 +- lib/Support/FileCheck.cpp | 356 +- lib/Support/FileCheckImpl.h | 624 +++ lib/Support/FileCollector.cpp | 268 + lib/Support/FileOutputBuffer.cpp | 6 +- lib/Support/FileUtilities.cpp | 66 + lib/Support/GlobPattern.cpp | 23 +- lib/Support/Host.cpp | 34 +- lib/Support/JSON.cpp | 2 +- lib/Support/JamCRC.cpp | 96 - lib/Support/ManagedStatic.cpp | 13 +- lib/Support/MemoryBuffer.cpp | 31 +- lib/Support/Mutex.cpp | 123 - lib/Support/Parallel.cpp | 31 +- lib/Support/Path.cpp | 6 +- lib/Support/PrettyStackTrace.cpp | 64 +- lib/Support/RWMutex.cpp | 58 +- lib/Support/Regex.cpp | 39 +- lib/Support/Signposts.cpp | 2 + lib/Support/SpecialCaseList.cpp | 4 +- lib/Support/Statistic.cpp | 27 +- lib/Support/StringExtras.cpp | 4 +- lib/Support/TimeProfiler.cpp | 63 +- lib/Support/Timer.cpp | 10 +- lib/Support/Unix/Memory.inc | 6 +- lib/Support/Unix/Mutex.inc | 42 - lib/Support/Unix/Path.inc | 73 +- lib/Support/Unix/Process.inc | 7 +- lib/Support/Unix/Program.inc | 4 +- lib/Support/Unix/RWMutex.inc | 50 - lib/Support/Unix/Signals.inc | 15 +- lib/Support/VirtualFileSystem.cpp | 102 +- lib/Support/Windows/Mutex.inc | 56 - lib/Support/Windows/Path.inc | 93 +- lib/Support/Windows/Program.inc | 2 +- lib/Support/Windows/RWMutex.inc | 128 - lib/Support/Windows/Signals.inc | 3 + lib/Support/Windows/WindowsSupport.h | 1 + lib/Support/Windows/explicit_symbols.inc | 6 - lib/Support/YAMLTraits.cpp | 16 +- lib/Support/Z3Solver.cpp | 2 +- lib/Support/raw_ostream.cpp | 35 +- lib/Support/regcomp.c | 7 +- lib/TableGen/Error.cpp | 2 + lib/TableGen/Main.cpp | 21 +- lib/TableGen/Record.cpp | 11 +- lib/TableGen/SetTheory.cpp | 22 +- lib/TableGen/TGLexer.cpp | 4 +- lib/TableGen/TGParser.cpp | 28 +- lib/Target/AArch64/AArch64.h | 6 +- lib/Target/AArch64/AArch64.td | 80 +- lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp | 12 +- lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp | 16 +- lib/Target/AArch64/AArch64AsmPrinter.cpp | 280 +- lib/Target/AArch64/AArch64CallLowering.cpp | 632 ++- lib/Target/AArch64/AArch64CallLowering.h | 29 +- lib/Target/AArch64/AArch64CallingConvention.cpp | 38 +- lib/Target/AArch64/AArch64CallingConvention.h | 3 + lib/Target/AArch64/AArch64CallingConvention.td | 88 +- lib/Target/AArch64/AArch64CollectLOH.cpp | 22 +- lib/Target/AArch64/AArch64Combine.td | 18 + lib/Target/AArch64/AArch64CondBrTuning.cpp | 4 +- lib/Target/AArch64/AArch64ConditionalCompares.cpp | 6 +- .../AArch64/AArch64DeadRegisterDefinitionsPass.cpp | 4 +- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp | 76 +- lib/Target/AArch64/AArch64FalkorHWPFFix.cpp | 2 +- lib/Target/AArch64/AArch64FastISel.cpp | 75 +- lib/Target/AArch64/AArch64FrameLowering.cpp | 301 +- lib/Target/AArch64/AArch64FrameLowering.h | 28 +- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 45 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 535 +- lib/Target/AArch64/AArch64ISelLowering.h | 37 +- lib/Target/AArch64/AArch64InstrAtomics.td | 65 +- lib/Target/AArch64/AArch64InstrFormats.td | 220 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 1054 ++-- lib/Target/AArch64/AArch64InstrInfo.h | 12 +- lib/Target/AArch64/AArch64InstrInfo.td | 253 +- lib/Target/AArch64/AArch64InstructionSelector.cpp | 1096 +++- lib/Target/AArch64/AArch64LegalizerInfo.cpp | 111 +- lib/Target/AArch64/AArch64LegalizerInfo.h | 3 + lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 160 +- lib/Target/AArch64/AArch64MCInstLower.cpp | 2 + lib/Target/AArch64/AArch64MachineFunctionInfo.h | 17 + lib/Target/AArch64/AArch64PBQPRegAlloc.cpp | 16 +- lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp | 98 +- lib/Target/AArch64/AArch64RegisterBankInfo.cpp | 39 +- lib/Target/AArch64/AArch64RegisterInfo.cpp | 69 +- lib/Target/AArch64/AArch64SIMDInstrOpt.cpp | 8 +- lib/Target/AArch64/AArch64SVEInstrInfo.td | 264 +- lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 2 +- lib/Target/AArch64/AArch64SpeculationHardening.cpp | 13 +- lib/Target/AArch64/AArch64StackOffset.h | 138 + lib/Target/AArch64/AArch64StackTagging.cpp | 394 +- lib/Target/AArch64/AArch64StackTaggingPreRA.cpp | 209 + lib/Target/AArch64/AArch64StorePairSuppress.cpp | 2 +- lib/Target/AArch64/AArch64Subtarget.cpp | 50 +- lib/Target/AArch64/AArch64Subtarget.h | 48 +- lib/Target/AArch64/AArch64SystemOperands.td | 40 +- lib/Target/AArch64/AArch64TargetMachine.cpp | 35 +- lib/Target/AArch64/AArch64TargetObjectFile.cpp | 4 +- lib/Target/AArch64/AArch64TargetObjectFile.h | 3 +- lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 29 +- lib/Target/AArch64/AArch64TargetTransformInfo.h | 14 +- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 123 +- .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 13 +- .../MCTargetDesc/AArch64ELFObjectWriter.cpp | 22 +- .../AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 3 +- .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 5 +- lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 2 +- lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 7 + lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h | 20 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 2 +- .../MCTargetDesc/AArch64MachObjectWriter.cpp | 4 +- .../MCTargetDesc/AArch64WinCOFFObjectWriter.cpp | 2 +- lib/Target/AArch64/SVEInstrFormats.td | 366 +- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 2 +- lib/Target/AArch64/Utils/AArch64BaseInfo.h | 25 +- lib/Target/AMDGPU/AMDGPU.h | 4 + lib/Target/AMDGPU/AMDGPU.td | 16 + lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 21 +- lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 4 +- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 78 +- lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 10 +- lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 345 +- lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 701 ++- lib/Target/AMDGPU/AMDGPUCallLowering.h | 29 +- lib/Target/AMDGPU/AMDGPUCallingConv.td | 27 +- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 12 +- lib/Target/AMDGPU/AMDGPUFrameLowering.cpp | 6 +- lib/Target/AMDGPU/AMDGPUFrameLowering.h | 4 +- lib/Target/AMDGPU/AMDGPUGISel.td | 78 +- lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def | 80 +- lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 9 +- lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 2 +- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 272 +- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 250 +- lib/Target/AMDGPU/AMDGPUISelLowering.h | 18 +- lib/Target/AMDGPU/AMDGPUInline.cpp | 2 +- lib/Target/AMDGPU/AMDGPUInstrInfo.td | 126 +- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 1144 ++-- lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 48 +- lib/Target/AMDGPU/AMDGPUInstructions.td | 216 +- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 1132 +++- lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 52 +- lib/Target/AMDGPU/AMDGPULibCalls.cpp | 37 +- lib/Target/AMDGPU/AMDGPULibFunc.cpp | 14 +- lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp | 20 +- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 4 + lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp | 38 +- lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 1 - lib/Target/AMDGPU/AMDGPUMachineFunction.h | 6 +- lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 592 ++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 2 +- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1234 +++- lib/Target/AMDGPU/AMDGPURegisterBankInfo.h | 56 +- lib/Target/AMDGPU/AMDGPURegisterBanks.td | 6 +- lib/Target/AMDGPU/AMDGPURegisterInfo.cpp | 66 +- lib/Target/AMDGPU/AMDGPURegisterInfo.h | 2 +- lib/Target/AMDGPU/AMDGPUSearchableTables.td | 4 + lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 64 +- lib/Target/AMDGPU/AMDGPUSubtarget.h | 67 +- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 29 +- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 90 +- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 20 +- lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 8 +- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 595 +- lib/Target/AMDGPU/BUFInstructions.td | 694 ++- lib/Target/AMDGPU/DSInstructions.td | 92 +- .../AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 4 +- lib/Target/AMDGPU/EvergreenInstructions.td | 60 +- lib/Target/AMDGPU/FLATInstructions.td | 196 +- lib/Target/AMDGPU/GCNDPPCombine.cpp | 88 +- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 21 +- lib/Target/AMDGPU/GCNILPSched.cpp | 1 + lib/Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +- lib/Target/AMDGPU/GCNNSAReassign.cpp | 8 +- lib/Target/AMDGPU/GCNRegBankReassign.cpp | 14 +- lib/Target/AMDGPU/GCNRegPressure.cpp | 26 +- lib/Target/AMDGPU/GCNRegPressure.h | 2 +- lib/Target/AMDGPU/GCNSchedStrategy.cpp | 31 +- lib/Target/AMDGPU/GCNSchedStrategy.h | 3 + .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 2 +- .../AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp | 2 +- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 37 +- lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 6 +- .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 4 +- lib/Target/AMDGPU/MIMGInstructions.td | 4 +- lib/Target/AMDGPU/R600AsmPrinter.cpp | 2 +- lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp | 4 +- lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 22 +- lib/Target/AMDGPU/R600FrameLowering.h | 6 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 7 +- lib/Target/AMDGPU/R600InstrInfo.cpp | 22 +- lib/Target/AMDGPU/R600MachineScheduler.cpp | 8 +- lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp | 12 +- lib/Target/AMDGPU/R600Packetizer.cpp | 4 +- lib/Target/AMDGPU/R600RegisterInfo.cpp | 2 +- lib/Target/AMDGPU/SIAddIMGInit.cpp | 4 +- lib/Target/AMDGPU/SIDefines.h | 6 +- lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 394 +- lib/Target/AMDGPU/SIFixupVectorISel.cpp | 3 +- lib/Target/AMDGPU/SIFoldOperands.cpp | 114 +- lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 22 +- lib/Target/AMDGPU/SIFrameLowering.cpp | 34 +- lib/Target/AMDGPU/SIFrameLowering.h | 6 +- lib/Target/AMDGPU/SIISelLowering.cpp | 1052 ++-- lib/Target/AMDGPU/SIISelLowering.h | 54 +- lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 6 +- lib/Target/AMDGPU/SIInstrFormats.td | 5 + lib/Target/AMDGPU/SIInstrInfo.cpp | 558 +- lib/Target/AMDGPU/SIInstrInfo.h | 44 +- lib/Target/AMDGPU/SIInstrInfo.td | 320 +- lib/Target/AMDGPU/SIInstructions.td | 297 +- lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 1117 ++-- lib/Target/AMDGPU/SILowerControlFlow.cpp | 60 +- lib/Target/AMDGPU/SILowerI1Copies.cpp | 49 +- lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 4 +- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 9 +- lib/Target/AMDGPU/SIMachineFunctionInfo.h | 11 +- lib/Target/AMDGPU/SIMachineScheduler.cpp | 16 +- lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 6 +- lib/Target/AMDGPU/SIModeRegister.cpp | 2 +- lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 2 +- lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 32 +- lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 32 +- lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 12 +- lib/Target/AMDGPU/SIProgramInfo.h | 5 + lib/Target/AMDGPU/SIRegisterInfo.cpp | 445 +- lib/Target/AMDGPU/SIRegisterInfo.h | 15 +- lib/Target/AMDGPU/SIRegisterInfo.td | 464 +- lib/Target/AMDGPU/SIShrinkInstructions.cpp | 42 +- lib/Target/AMDGPU/SIWholeQuadMode.cpp | 35 +- lib/Target/AMDGPU/SMInstructions.td | 15 +- lib/Target/AMDGPU/SOPInstructions.td | 42 +- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 71 +- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 24 +- lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 2 + lib/Target/AMDGPU/VOP1Instructions.td | 65 +- lib/Target/AMDGPU/VOP2Instructions.td | 346 +- lib/Target/AMDGPU/VOP3Instructions.td | 69 +- lib/Target/AMDGPU/VOP3PInstructions.td | 2 + lib/Target/AMDGPU/VOPCInstructions.td | 30 +- lib/Target/AMDGPU/VOPInstructions.td | 12 +- lib/Target/ARC/ARCFrameLowering.h | 4 +- lib/Target/ARC/ARCISelLowering.cpp | 2 +- lib/Target/ARC/ARCMachineFunctionInfo.h | 4 +- lib/Target/ARC/ARCOptAddrMode.cpp | 16 +- lib/Target/ARC/ARCRegisterInfo.cpp | 2 +- lib/Target/ARC/ARCTargetMachine.cpp | 2 +- lib/Target/ARM/A15SDOptimizer.cpp | 54 +- lib/Target/ARM/ARM.h | 2 + lib/Target/ARM/ARM.td | 42 +- lib/Target/ARM/ARMAsmPrinter.cpp | 66 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 216 +- lib/Target/ARM/ARMBaseInstrInfo.h | 21 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 49 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 5 +- lib/Target/ARM/ARMBasicBlockInfo.cpp | 16 +- lib/Target/ARM/ARMBasicBlockInfo.h | 31 +- lib/Target/ARM/ARMCallLowering.cpp | 54 +- lib/Target/ARM/ARMCallLowering.h | 5 +- lib/Target/ARM/ARMCallingConv.cpp | 2 +- lib/Target/ARM/ARMCodeGenPrepare.cpp | 88 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 289 +- lib/Target/ARM/ARMConstantPoolValue.cpp | 1 + lib/Target/ARM/ARMExpandPseudoInsts.cpp | 96 +- lib/Target/ARM/ARMFastISel.cpp | 88 +- lib/Target/ARM/ARMFrameLowering.cpp | 65 +- lib/Target/ARM/ARMFrameLowering.h | 5 + lib/Target/ARM/ARMISelDAGToDAG.cpp | 224 +- lib/Target/ARM/ARMISelLowering.cpp | 2073 +++++-- lib/Target/ARM/ARMISelLowering.h | 45 +- lib/Target/ARM/ARMInstrFormats.td | 23 +- lib/Target/ARM/ARMInstrInfo.cpp | 2 +- lib/Target/ARM/ARMInstrInfo.td | 127 +- lib/Target/ARM/ARMInstrMVE.td | 1430 ++++- lib/Target/ARM/ARMInstrNEON.td | 191 +- lib/Target/ARM/ARMInstrThumb.td | 16 +- lib/Target/ARM/ARMInstrThumb2.td | 98 +- lib/Target/ARM/ARMInstrVFP.td | 96 +- lib/Target/ARM/ARMInstructionSelector.cpp | 41 +- lib/Target/ARM/ARMLegalizerInfo.cpp | 2 + lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 32 +- lib/Target/ARM/ARMLowOverheadLoops.cpp | 364 +- lib/Target/ARM/ARMMCInstLower.cpp | 4 +- lib/Target/ARM/ARMMachineFunctionInfo.h | 8 + lib/Target/ARM/ARMParallelDSP.cpp | 675 ++- lib/Target/ARM/ARMPredicates.td | 2 +- lib/Target/ARM/ARMRegisterInfo.td | 18 +- lib/Target/ARM/ARMScheduleA9.td | 4 +- lib/Target/ARM/ARMScheduleM4.td | 24 +- lib/Target/ARM/ARMSubtarget.cpp | 14 +- lib/Target/ARM/ARMSubtarget.h | 30 +- lib/Target/ARM/ARMTargetMachine.cpp | 13 +- lib/Target/ARM/ARMTargetTransformInfo.cpp | 370 +- lib/Target/ARM/ARMTargetTransformInfo.h | 24 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 251 +- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 31 +- lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h | 20 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 14 +- lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 3 + lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 6 +- lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp | 12 +- lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h | 5 +- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 21 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 6 +- lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp | 4 +- .../ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp | 10 - lib/Target/ARM/MLxExpansionPass.cpp | 42 +- lib/Target/ARM/MVETailPredication.cpp | 519 ++ lib/Target/ARM/MVEVPTBlockPass.cpp | 278 + lib/Target/ARM/Thumb1FrameLowering.cpp | 8 +- lib/Target/ARM/Thumb1InstrInfo.cpp | 17 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 134 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 38 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 28 +- lib/Target/ARM/ThumbRegisterInfo.cpp | 11 +- lib/Target/AVR/AVRAsmPrinter.cpp | 2 +- lib/Target/AVR/AVRExpandPseudoInsts.cpp | 12 +- lib/Target/AVR/AVRFrameLowering.cpp | 5 +- lib/Target/AVR/AVRISelDAGToDAG.cpp | 2 +- lib/Target/AVR/AVRISelLowering.cpp | 27 +- lib/Target/AVR/AVRISelLowering.h | 4 +- lib/Target/AVR/AVRRegisterInfo.cpp | 2 +- lib/Target/AVR/AVRTargetMachine.cpp | 2 +- lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 8 +- lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp | 2 +- lib/Target/BPF/AsmParser/BPFAsmParser.cpp | 6 +- lib/Target/BPF/BPF.h | 4 +- lib/Target/BPF/BPFAbstractMemberAccess.cpp | 708 ++- lib/Target/BPF/BPFAsmPrinter.cpp | 2 +- lib/Target/BPF/BPFCORE.h | 14 +- lib/Target/BPF/BPFFrameLowering.h | 2 +- lib/Target/BPF/BPFISelDAGToDAG.cpp | 170 +- lib/Target/BPF/BPFISelLowering.cpp | 21 +- lib/Target/BPF/BPFInstrInfo.cpp | 6 +- lib/Target/BPF/BPFInstrInfo.td | 2 +- lib/Target/BPF/BPFMIChecking.cpp | 1 + lib/Target/BPF/BPFMIPeephole.cpp | 206 +- lib/Target/BPF/BPFMISimplifyPatchable.cpp | 27 +- lib/Target/BPF/BPFRegisterInfo.cpp | 6 +- lib/Target/BPF/BPFTargetMachine.cpp | 16 +- lib/Target/BPF/BTF.h | 54 +- lib/Target/BPF/BTFDebug.cpp | 281 +- lib/Target/BPF/BTFDebug.h | 29 +- lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp | 4 +- lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 2 +- lib/Target/Hexagon/BitTracker.cpp | 21 +- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 2 +- lib/Target/Hexagon/HexagonBitSimplify.cpp | 71 +- lib/Target/Hexagon/HexagonBitTracker.cpp | 8 +- lib/Target/Hexagon/HexagonBlockRanges.cpp | 14 +- lib/Target/Hexagon/HexagonBranchRelaxation.cpp | 5 +- lib/Target/Hexagon/HexagonConstExtenders.cpp | 17 +- lib/Target/Hexagon/HexagonConstPropagation.cpp | 32 +- lib/Target/Hexagon/HexagonCopyToCombine.cpp | 32 +- lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td | 696 +-- lib/Target/Hexagon/HexagonDepOperands.td | 83 +- lib/Target/Hexagon/HexagonEarlyIfConv.cpp | 24 +- lib/Target/Hexagon/HexagonExpandCondsets.cpp | 30 +- lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 5 +- lib/Target/Hexagon/HexagonFrameLowering.cpp | 58 +- lib/Target/Hexagon/HexagonFrameLowering.h | 2 +- lib/Target/Hexagon/HexagonGenExtract.cpp | 2 +- lib/Target/Hexagon/HexagonGenInsert.cpp | 27 +- lib/Target/Hexagon/HexagonGenMux.cpp | 6 +- lib/Target/Hexagon/HexagonGenPredicate.cpp | 14 +- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 56 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 2 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 156 +- lib/Target/Hexagon/HexagonISelLowering.h | 15 +- lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 24 + lib/Target/Hexagon/HexagonInstrInfo.cpp | 273 +- lib/Target/Hexagon/HexagonInstrInfo.h | 22 +- lib/Target/Hexagon/HexagonIntrinsics.td | 46 +- lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp | 15 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 10 +- lib/Target/Hexagon/HexagonOptAddrMode.cpp | 12 +- lib/Target/Hexagon/HexagonPatterns.td | 194 +- lib/Target/Hexagon/HexagonPatternsHVX.td | 40 +- lib/Target/Hexagon/HexagonPeephole.cpp | 38 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 6 +- .../Hexagon/HexagonSplitConst32AndConst64.cpp | 8 +- lib/Target/Hexagon/HexagonSplitDouble.cpp | 60 +- lib/Target/Hexagon/HexagonStoreWidening.cpp | 2 +- lib/Target/Hexagon/HexagonSubtarget.cpp | 19 +- lib/Target/Hexagon/HexagonSubtarget.h | 2 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 12 +- lib/Target/Hexagon/HexagonTargetTransformInfo.cpp | 2 + lib/Target/Hexagon/HexagonTargetTransformInfo.h | 4 +- lib/Target/Hexagon/HexagonVExtract.cpp | 12 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 37 +- lib/Target/Hexagon/HexagonVLIWPacketizer.h | 3 +- .../Hexagon/MCTargetDesc/HexagonAsmBackend.cpp | 6 +- .../MCTargetDesc/HexagonELFObjectWriter.cpp | 4 +- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 7 +- .../Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp | 4 +- .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 10 +- lib/Target/Hexagon/RDFCopy.cpp | 4 +- lib/Target/Hexagon/RDFDeadCode.cpp | 1 + lib/Target/Hexagon/RDFGraph.cpp | 16 +- lib/Target/Hexagon/RDFLiveness.cpp | 8 +- lib/Target/Hexagon/RDFRegisters.cpp | 8 +- lib/Target/Hexagon/RDFRegisters.h | 8 +- lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp | 23 +- lib/Target/Lanai/LanaiAsmPrinter.cpp | 2 +- lib/Target/Lanai/LanaiDelaySlotFiller.cpp | 2 +- lib/Target/Lanai/LanaiFrameLowering.cpp | 4 +- lib/Target/Lanai/LanaiFrameLowering.h | 2 +- lib/Target/Lanai/LanaiISelLowering.cpp | 15 +- lib/Target/Lanai/LanaiISelLowering.h | 4 +- lib/Target/Lanai/LanaiInstrInfo.cpp | 9 +- lib/Target/Lanai/LanaiInstrInfo.h | 3 +- lib/Target/Lanai/LanaiRegisterInfo.cpp | 2 +- .../Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp | 2 +- lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp | 12 +- .../MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp | 4 +- lib/Target/MSP430/MSP430AsmPrinter.cpp | 8 +- lib/Target/MSP430/MSP430BranchSelector.cpp | 1 + lib/Target/MSP430/MSP430FrameLowering.h | 3 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 27 +- lib/Target/MSP430/MSP430ISelLowering.h | 2 + lib/Target/MSP430/MSP430RegisterInfo.cpp | 2 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 2 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 696 ++- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 16 + lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 1 - lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 6 +- .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 4 +- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 7 +- lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h | 5 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 11 +- .../Mips/MCTargetDesc/MipsNaClELFStreamer.cpp | 2 +- lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp | 4 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 96 +- lib/Target/Mips/MicroMipsDSPInstrInfo.td | 4 +- lib/Target/Mips/MicroMipsInstrInfo.td | 9 +- lib/Target/Mips/MicroMipsSizeReduction.cpp | 18 +- lib/Target/Mips/Mips.td | 12 + lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 2 +- lib/Target/Mips/Mips16ISelLowering.cpp | 16 +- lib/Target/Mips/Mips16InstrInfo.cpp | 2 +- lib/Target/Mips/Mips64InstrInfo.td | 36 + lib/Target/Mips/MipsAsmPrinter.cpp | 12 +- lib/Target/Mips/MipsCallLowering.cpp | 150 +- lib/Target/Mips/MipsCallLowering.h | 8 +- lib/Target/Mips/MipsConstantIslandPass.cpp | 63 +- lib/Target/Mips/MipsDSPInstrInfo.td | 19 +- lib/Target/Mips/MipsExpandPseudo.cpp | 54 +- lib/Target/Mips/MipsFastISel.cpp | 12 +- lib/Target/Mips/MipsFrameLowering.h | 5 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 53 +- lib/Target/Mips/MipsISelDAGToDAG.h | 5 + lib/Target/Mips/MipsISelLowering.cpp | 164 +- lib/Target/Mips/MipsISelLowering.h | 13 +- lib/Target/Mips/MipsInstrInfo.cpp | 3 +- lib/Target/Mips/MipsInstrInfo.h | 2 +- lib/Target/Mips/MipsInstrInfo.td | 30 +- lib/Target/Mips/MipsInstructionSelector.cpp | 206 +- lib/Target/Mips/MipsLegalizerInfo.cpp | 244 +- lib/Target/Mips/MipsLegalizerInfo.h | 3 + lib/Target/Mips/MipsMSAInstrInfo.td | 55 +- lib/Target/Mips/MipsOptimizePICCall.cpp | 5 +- lib/Target/Mips/MipsPfmCounters.td | 18 + lib/Target/Mips/MipsPreLegalizerCombiner.cpp | 3 +- lib/Target/Mips/MipsRegisterBankInfo.cpp | 322 +- lib/Target/Mips/MipsRegisterBankInfo.h | 9 + lib/Target/Mips/MipsRegisterBanks.td | 2 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 55 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 54 +- lib/Target/Mips/MipsSEISelDAGToDAG.h | 6 +- lib/Target/Mips/MipsSEISelLowering.cpp | 124 +- lib/Target/Mips/MipsSEInstrInfo.cpp | 20 +- lib/Target/Mips/MipsSERegisterInfo.cpp | 8 +- lib/Target/Mips/MipsSubtarget.cpp | 17 +- lib/Target/Mips/MipsSubtarget.h | 15 +- lib/Target/Mips/MipsTargetMachine.cpp | 18 +- lib/Target/Mips/MipsTargetStreamer.h | 14 +- lib/Target/NVPTX/NVPTX.h | 2 +- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 34 +- lib/Target/NVPTX/NVPTXAsmPrinter.h | 2 +- lib/Target/NVPTX/NVPTXFrameLowering.cpp | 2 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 58 +- lib/Target/NVPTX/NVPTXInstrInfo.td | 13 +- lib/Target/NVPTX/NVPTXIntrinsics.td | 169 +- lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 2 +- lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 97 +- lib/Target/NVPTX/NVPTXLowerArgs.cpp | 2 +- lib/Target/NVPTX/NVPTXPeephole.cpp | 2 +- lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp | 2 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +- lib/Target/NVPTX/NVPTXUtilities.cpp | 13 +- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 10 +- .../PowerPC/Disassembler/PPCDisassembler.cpp | 6 - .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 6 +- lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp | 25 + lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 1 + lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 4 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 14 +- .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 4 +- .../PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp | 2 +- lib/Target/PowerPC/P9InstrResources.td | 8 +- lib/Target/PowerPC/PPC.h | 8 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 482 +- lib/Target/PowerPC/PPCBranchCoalescing.cpp | 13 +- lib/Target/PowerPC/PPCBranchSelector.cpp | 29 +- lib/Target/PowerPC/PPCFastISel.cpp | 41 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 71 +- lib/Target/PowerPC/PPCFrameLowering.h | 11 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 105 +- lib/Target/PowerPC/PPCISelLowering.cpp | 546 +- lib/Target/PowerPC/PPCISelLowering.h | 49 +- lib/Target/PowerPC/PPCInstr64Bit.td | 4 +- lib/Target/PowerPC/PPCInstrAltivec.td | 12 +- lib/Target/PowerPC/PPCInstrFormats.td | 9 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 336 +- lib/Target/PowerPC/PPCInstrInfo.h | 42 +- lib/Target/PowerPC/PPCInstrInfo.td | 206 +- lib/Target/PowerPC/PPCInstrVSX.td | 180 +- lib/Target/PowerPC/PPCLoopPreIncPrep.cpp | 670 ++- lib/Target/PowerPC/PPCMCInstLower.cpp | 23 +- lib/Target/PowerPC/PPCMIPeephole.cpp | 82 +- lib/Target/PowerPC/PPCPreEmitPeephole.cpp | 106 +- lib/Target/PowerPC/PPCQPXLoadSplat.cpp | 6 +- lib/Target/PowerPC/PPCReduceCRLogicals.cpp | 15 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 39 +- lib/Target/PowerPC/PPCRegisterInfo.td | 22 +- lib/Target/PowerPC/PPCSubtarget.cpp | 18 +- lib/Target/PowerPC/PPCSubtarget.h | 24 +- lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 4 +- lib/Target/PowerPC/PPCTOCRegDeps.cpp | 9 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 38 +- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 68 +- lib/Target/PowerPC/PPCTargetTransformInfo.h | 12 +- lib/Target/PowerPC/PPCVSXCopy.cpp | 6 +- lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 18 +- lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 32 +- lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 282 +- .../RISCV/Disassembler/RISCVDisassembler.cpp | 139 +- lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 17 +- .../RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp | 13 +- lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 41 + lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h | 8 +- lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp | 20 + lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h | 3 + .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 4 +- lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h | 1 + .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 4 +- lib/Target/RISCV/RISCV.h | 7 + lib/Target/RISCV/RISCV.td | 11 +- lib/Target/RISCV/RISCVCallLowering.cpp | 50 + lib/Target/RISCV/RISCVCallLowering.h | 42 + lib/Target/RISCV/RISCVCallingConv.td | 28 +- lib/Target/RISCV/RISCVExpandPseudoInsts.cpp | 54 +- lib/Target/RISCV/RISCVFrameLowering.cpp | 164 +- lib/Target/RISCV/RISCVFrameLowering.h | 9 +- lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 5 +- lib/Target/RISCV/RISCVISelLowering.cpp | 323 +- lib/Target/RISCV/RISCVISelLowering.h | 6 + lib/Target/RISCV/RISCVInstrInfo.cpp | 118 +- lib/Target/RISCV/RISCVInstrInfo.h | 18 +- lib/Target/RISCV/RISCVInstrInfo.td | 22 + lib/Target/RISCV/RISCVInstrInfoA.td | 34 +- lib/Target/RISCV/RISCVInstrInfoC.td | 124 +- lib/Target/RISCV/RISCVInstrInfoF.td | 6 + lib/Target/RISCV/RISCVInstructionSelector.cpp | 103 + lib/Target/RISCV/RISCVLegalizerInfo.cpp | 23 + lib/Target/RISCV/RISCVLegalizerInfo.h | 28 + lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 16 +- lib/Target/RISCV/RISCVRegisterBankInfo.cpp | 26 + lib/Target/RISCV/RISCVRegisterBankInfo.h | 37 + lib/Target/RISCV/RISCVRegisterBanks.td | 13 + lib/Target/RISCV/RISCVRegisterInfo.cpp | 13 +- lib/Target/RISCV/RISCVRegisterInfo.h | 6 + lib/Target/RISCV/RISCVRegisterInfo.td | 100 +- lib/Target/RISCV/RISCVSubtarget.cpp | 30 +- lib/Target/RISCV/RISCVSubtarget.h | 20 + lib/Target/RISCV/RISCVTargetMachine.cpp | 31 +- lib/Target/RISCV/Utils/RISCVBaseInfo.h | 16 + lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 8 +- lib/Target/Sparc/DelaySlotFiller.cpp | 10 +- .../Sparc/MCTargetDesc/SparcELFObjectWriter.cpp | 6 +- lib/Target/Sparc/SparcFrameLowering.cpp | 3 +- lib/Target/Sparc/SparcISelDAGToDAG.cpp | 4 +- lib/Target/Sparc/SparcISelLowering.cpp | 28 +- lib/Target/Sparc/SparcISelLowering.h | 4 +- lib/Target/Sparc/SparcInstr64Bit.td | 2 +- lib/Target/Sparc/SparcInstrInfo.cpp | 4 +- lib/Target/Sparc/SparcInstrInfo.td | 8 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 12 +- lib/Target/Sparc/SparcTargetMachine.cpp | 4 +- lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 12 +- .../SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp | 2 +- lib/Target/SystemZ/SystemZ.h | 1 - lib/Target/SystemZ/SystemZAsmPrinter.cpp | 20 + lib/Target/SystemZ/SystemZAsmPrinter.h | 1 + lib/Target/SystemZ/SystemZElimCompare.cpp | 9 +- lib/Target/SystemZ/SystemZExpandPseudo.cpp | 152 - lib/Target/SystemZ/SystemZFrameLowering.cpp | 6 +- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 11 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 244 +- lib/Target/SystemZ/SystemZInstrFP.td | 32 +- lib/Target/SystemZ/SystemZInstrFormats.td | 166 +- lib/Target/SystemZ/SystemZInstrInfo.cpp | 168 +- lib/Target/SystemZ/SystemZInstrInfo.h | 29 +- lib/Target/SystemZ/SystemZInstrInfo.td | 22 +- lib/Target/SystemZ/SystemZInstrVector.td | 26 +- lib/Target/SystemZ/SystemZLongBranch.cpp | 26 +- lib/Target/SystemZ/SystemZMachineScheduler.cpp | 5 +- lib/Target/SystemZ/SystemZOperands.td | 121 +- lib/Target/SystemZ/SystemZOperators.td | 6 +- lib/Target/SystemZ/SystemZPatterns.td | 4 +- lib/Target/SystemZ/SystemZPostRewrite.cpp | 164 +- lib/Target/SystemZ/SystemZProcessors.td | 3 +- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 19 +- lib/Target/SystemZ/SystemZRegisterInfo.h | 9 + lib/Target/SystemZ/SystemZSchedule.td | 2 +- lib/Target/SystemZ/SystemZScheduleArch13.td | 1695 ------ lib/Target/SystemZ/SystemZScheduleZ15.td | 1695 ++++++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 8 +- lib/Target/SystemZ/SystemZShortenInst.cpp | 4 +- lib/Target/SystemZ/SystemZTargetMachine.cpp | 11 +- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp | 5 +- lib/Target/SystemZ/SystemZTargetTransformInfo.h | 8 +- lib/Target/TargetLoweringObjectFile.cpp | 1 + lib/Target/TargetMachine.cpp | 20 +- lib/Target/TargetMachineC.cpp | 2 +- .../WebAssembly/AsmParser/WebAssemblyAsmParser.cpp | 100 +- .../Disassembler/WebAssemblyDisassembler.cpp | 24 +- .../MCTargetDesc/WebAssemblyAsmBackend.cpp | 10 +- .../MCTargetDesc/WebAssemblyInstPrinter.cpp | 57 +- .../MCTargetDesc/WebAssemblyInstPrinter.h | 3 + .../MCTargetDesc/WebAssemblyMCCodeEmitter.cpp | 1 + .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 74 +- .../MCTargetDesc/WebAssemblyTargetStreamer.cpp | 33 +- .../MCTargetDesc/WebAssemblyTargetStreamer.h | 3 - .../MCTargetDesc/WebAssemblyWasmObjectWriter.cpp | 11 +- lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp | 38 +- lib/Target/WebAssembly/WebAssemblyCFGSort.cpp | 5 +- lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp | 127 +- .../WebAssembly/WebAssemblyExplicitLocals.cpp | 22 +- lib/Target/WebAssembly/WebAssemblyFastISel.cpp | 36 +- .../WebAssembly/WebAssemblyFixFunctionBitcasts.cpp | 2 + .../WebAssemblyFixIrreducibleControlFlow.cpp | 3 +- .../WebAssembly/WebAssemblyFrameLowering.cpp | 8 +- lib/Target/WebAssembly/WebAssemblyFrameLowering.h | 4 +- lib/Target/WebAssembly/WebAssemblyISD.def | 2 + lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp | 131 +- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 306 +- lib/Target/WebAssembly/WebAssemblyISelLowering.h | 2 +- lib/Target/WebAssembly/WebAssemblyInstrAtomics.td | 100 +- .../WebAssembly/WebAssemblyInstrBulkMemory.td | 4 +- lib/Target/WebAssembly/WebAssemblyInstrControl.td | 48 +- lib/Target/WebAssembly/WebAssemblyInstrConv.td | 17 + lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp | 4 +- lib/Target/WebAssembly/WebAssemblyInstrInfo.h | 2 +- lib/Target/WebAssembly/WebAssemblyInstrInfo.td | 3 +- lib/Target/WebAssembly/WebAssemblyInstrMemory.td | 51 - lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 221 +- .../WebAssembly/WebAssemblyLateEHPrepare.cpp | 12 +- .../WebAssembly/WebAssemblyLowerBrUnless.cpp | 4 +- .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 119 +- .../WebAssembly/WebAssemblyLowerGlobalDtors.cpp | 2 +- lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp | 55 +- lib/Target/WebAssembly/WebAssemblyMCInstLower.h | 3 + .../WebAssembly/WebAssemblyMachineFunctionInfo.cpp | 12 +- .../WebAssembly/WebAssemblyMachineFunctionInfo.h | 13 +- .../WebAssembly/WebAssemblyMemIntrinsicResults.cpp | 7 +- .../WebAssemblyOptimizeLiveIntervals.cpp | 2 +- .../WebAssembly/WebAssemblyOptimizeReturned.cpp | 7 +- lib/Target/WebAssembly/WebAssemblyPeephole.cpp | 107 +- .../WebAssemblyPrepareForLiveIntervals.cpp | 2 +- lib/Target/WebAssembly/WebAssemblyRegColoring.cpp | 7 +- lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp | 2 +- lib/Target/WebAssembly/WebAssemblyRegStackify.cpp | 24 +- lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp | 6 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 2 +- .../WebAssembly/WebAssemblyTargetTransformInfo.cpp | 5 +- .../WebAssembly/WebAssemblyTargetTransformInfo.h | 2 +- lib/Target/WebAssembly/WebAssemblyUtilities.cpp | 21 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 170 + lib/Target/X86/AsmParser/X86AsmParserCommon.h | 4 + lib/Target/X86/AsmParser/X86Operand.h | 25 +- .../X86/Disassembler/X86DisassemblerDecoder.cpp | 5 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 6 +- lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp | 19 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 2 + lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 3 + lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 61 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 11 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 7 +- .../X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp | 2 +- lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp | 5 +- .../X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp | 2 +- lib/Target/X86/X86.h | 10 +- lib/Target/X86/X86.td | 56 +- lib/Target/X86/X86AsmPrinter.cpp | 8 +- lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp | 3 +- lib/Target/X86/X86AvoidTrailingCall.cpp | 108 + lib/Target/X86/X86CallFrameOptimization.cpp | 26 +- lib/Target/X86/X86CallLowering.cpp | 49 +- lib/Target/X86/X86CallLowering.h | 5 +- lib/Target/X86/X86CallingConv.td | 2 + lib/Target/X86/X86CmovConversion.cpp | 18 +- lib/Target/X86/X86CondBrFolding.cpp | 2 +- lib/Target/X86/X86DomainReassignment.cpp | 20 +- lib/Target/X86/X86EvexToVex.cpp | 2 +- lib/Target/X86/X86ExpandPseudo.cpp | 11 +- lib/Target/X86/X86FastISel.cpp | 15 +- lib/Target/X86/X86FixupBWInsts.cpp | 68 +- lib/Target/X86/X86FixupLEAs.cpp | 201 +- lib/Target/X86/X86FixupSetCC.cpp | 4 +- lib/Target/X86/X86FlagsCopyLowering.cpp | 13 +- lib/Target/X86/X86FloatingPoint.cpp | 6 +- lib/Target/X86/X86FrameLowering.cpp | 106 +- lib/Target/X86/X86FrameLowering.h | 4 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 304 +- lib/Target/X86/X86ISelLowering.cpp | 5926 +++++++++++--------- lib/Target/X86/X86ISelLowering.h | 77 +- lib/Target/X86/X86IndirectBranchTracking.cpp | 2 +- lib/Target/X86/X86InsertPrefetch.cpp | 8 +- lib/Target/X86/X86InstrAVX512.td | 1457 ++--- lib/Target/X86/X86InstrArithmetic.td | 10 +- lib/Target/X86/X86InstrBuilder.h | 6 +- lib/Target/X86/X86InstrCMovSetCC.td | 33 +- lib/Target/X86/X86InstrCompiler.td | 139 +- lib/Target/X86/X86InstrControl.td | 85 +- lib/Target/X86/X86InstrExtension.td | 11 +- lib/Target/X86/X86InstrFoldTables.cpp | 287 + lib/Target/X86/X86InstrFoldTables.h | 39 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 26 + lib/Target/X86/X86InstrInfo.cpp | 582 +- lib/Target/X86/X86InstrInfo.h | 28 +- lib/Target/X86/X86InstrInfo.td | 57 +- lib/Target/X86/X86InstrMMX.td | 33 +- lib/Target/X86/X86InstrMPX.td | 32 +- lib/Target/X86/X86InstrSSE.td | 551 +- lib/Target/X86/X86InstrSystem.td | 2 +- lib/Target/X86/X86InstrTSX.td | 2 +- lib/Target/X86/X86InstrXOP.td | 26 +- lib/Target/X86/X86InstructionSelector.cpp | 135 +- lib/Target/X86/X86IntrinsicsInfo.h | 6 +- lib/Target/X86/X86LegalizerInfo.cpp | 20 + lib/Target/X86/X86LegalizerInfo.h | 3 + lib/Target/X86/X86MCInstLower.cpp | 313 +- lib/Target/X86/X86MachineFunctionInfo.h | 8 + lib/Target/X86/X86OptimizeLEAs.cpp | 60 +- lib/Target/X86/X86RegisterBankInfo.cpp | 4 +- lib/Target/X86/X86RegisterInfo.cpp | 31 +- lib/Target/X86/X86RetpolineThunks.cpp | 8 +- lib/Target/X86/X86SchedBroadwell.td | 8 +- lib/Target/X86/X86SchedHaswell.td | 8 +- lib/Target/X86/X86SchedPredicates.td | 57 + lib/Target/X86/X86SchedSandyBridge.td | 8 +- lib/Target/X86/X86SchedSkylakeClient.td | 8 +- lib/Target/X86/X86SchedSkylakeServer.td | 8 +- lib/Target/X86/X86Schedule.td | 24 +- lib/Target/X86/X86ScheduleAtom.td | 6 +- lib/Target/X86/X86ScheduleBdVer2.td | 6 +- lib/Target/X86/X86ScheduleBtVer2.td | 257 +- lib/Target/X86/X86ScheduleSLM.td | 8 +- lib/Target/X86/X86ScheduleZnver1.td | 8 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 2 +- lib/Target/X86/X86SpeculativeLoadHardening.cpp | 59 +- lib/Target/X86/X86Subtarget.cpp | 18 +- lib/Target/X86/X86Subtarget.h | 23 +- lib/Target/X86/X86TargetMachine.cpp | 49 +- lib/Target/X86/X86TargetMachine.h | 2 +- lib/Target/X86/X86TargetObjectFile.cpp | 4 +- lib/Target/X86/X86TargetObjectFile.h | 3 +- lib/Target/X86/X86TargetTransformInfo.cpp | 225 +- lib/Target/X86/X86TargetTransformInfo.h | 11 +- lib/Target/X86/X86VZeroUpper.cpp | 6 +- lib/Target/X86/X86WinAllocaExpander.cpp | 4 +- lib/Target/X86/X86WinEHState.cpp | 5 +- lib/Target/XCore/XCoreAsmPrinter.cpp | 4 +- lib/Target/XCore/XCoreFrameLowering.cpp | 6 +- lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp | 2 +- lib/Target/XCore/XCoreISelLowering.cpp | 21 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 2 +- lib/Target/XCore/XCoreTargetMachine.cpp | 2 +- lib/Target/XCore/XCoreTargetTransformInfo.h | 3 +- lib/TextAPI/MachO/Architecture.cpp | 4 + lib/TextAPI/MachO/InterfaceFile.cpp | 80 +- lib/TextAPI/MachO/Platform.cpp | 91 + lib/TextAPI/MachO/Symbol.cpp | 9 + lib/TextAPI/MachO/Target.cpp | 75 + lib/TextAPI/MachO/TextStub.cpp | 606 +- lib/TextAPI/MachO/TextStubCommon.cpp | 93 +- lib/TextAPI/MachO/TextStubCommon.h | 8 +- lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp | 26 +- lib/ToolDrivers/llvm-lib/LibDriver.cpp | 236 +- .../AggressiveInstCombine.cpp | 78 +- lib/Transforms/Coroutines/CoroCleanup.cpp | 7 +- lib/Transforms/Coroutines/CoroEarly.cpp | 26 +- lib/Transforms/Coroutines/CoroElide.cpp | 2 +- lib/Transforms/Coroutines/CoroFrame.cpp | 652 ++- lib/Transforms/Coroutines/CoroInstr.h | 205 +- lib/Transforms/Coroutines/CoroInternal.h | 162 +- lib/Transforms/Coroutines/CoroSplit.cpp | 1166 +++- lib/Transforms/Coroutines/Coroutines.cpp | 342 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 2 +- lib/Transforms/IPO/Attributor.cpp | 5055 ++++++++++++++--- lib/Transforms/IPO/BlockExtractor.cpp | 5 +- lib/Transforms/IPO/ConstantMerge.cpp | 4 +- lib/Transforms/IPO/CrossDSOCFI.cpp | 10 +- lib/Transforms/IPO/FunctionAttrs.cpp | 38 +- lib/Transforms/IPO/FunctionImport.cpp | 43 +- lib/Transforms/IPO/GlobalDCE.cpp | 156 +- lib/Transforms/IPO/GlobalOpt.cpp | 176 +- lib/Transforms/IPO/HotColdSplitting.cpp | 61 +- lib/Transforms/IPO/IPO.cpp | 13 + lib/Transforms/IPO/InferFunctionAttrs.cpp | 20 +- lib/Transforms/IPO/Inliner.cpp | 21 +- lib/Transforms/IPO/LoopExtractor.cpp | 6 +- lib/Transforms/IPO/LowerTypeTests.cpp | 305 +- lib/Transforms/IPO/MergeFunctions.cpp | 4 +- lib/Transforms/IPO/PartialInlining.cpp | 20 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 1 + lib/Transforms/IPO/SCCP.cpp | 18 +- lib/Transforms/IPO/SampleProfile.cpp | 238 +- lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 21 +- lib/Transforms/IPO/WholeProgramDevirt.cpp | 389 +- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 268 +- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 278 +- .../InstCombine/InstCombineAtomicRMW.cpp | 4 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 121 +- lib/Transforms/InstCombine/InstCombineCasts.cpp | 102 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 870 ++- lib/Transforms/InstCombine/InstCombineInternal.h | 116 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 93 +- .../InstCombine/InstCombineMulDivRem.cpp | 77 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 6 +- lib/Transforms/InstCombine/InstCombineSelect.cpp | 455 +- lib/Transforms/InstCombine/InstCombineShifts.cpp | 370 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 48 +- .../InstCombine/InstCombineVectorOps.cpp | 171 +- .../InstCombine/InstructionCombining.cpp | 67 +- .../Instrumentation/AddressSanitizer.cpp | 98 +- lib/Transforms/Instrumentation/BoundsChecking.cpp | 2 +- lib/Transforms/Instrumentation/CFGMST.h | 4 +- .../Instrumentation/ControlHeightReduction.cpp | 26 +- .../Instrumentation/DataFlowSanitizer.cpp | 2 +- lib/Transforms/Instrumentation/GCOVProfiling.cpp | 49 +- .../Instrumentation/HWAddressSanitizer.cpp | 376 +- .../Instrumentation/IndirectCallPromotion.cpp | 2 +- lib/Transforms/Instrumentation/InstrOrderFile.cpp | 3 +- lib/Transforms/Instrumentation/InstrProfiling.cpp | 65 +- lib/Transforms/Instrumentation/Instrumentation.cpp | 5 +- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 89 +- .../Instrumentation/PGOInstrumentation.cpp | 220 +- lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp | 6 +- .../Instrumentation/SanitizerCoverage.cpp | 164 +- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 54 +- .../Instrumentation/ValueProfileCollector.cpp | 78 + .../Instrumentation/ValueProfileCollector.h | 79 + .../Instrumentation/ValueProfilePlugins.inc | 75 + lib/Transforms/ObjCARC/PtrState.cpp | 4 + lib/Transforms/Scalar/AlignmentFromAssumptions.cpp | 8 +- lib/Transforms/Scalar/CallSiteSplitting.cpp | 2 +- lib/Transforms/Scalar/ConstantHoisting.cpp | 24 +- lib/Transforms/Scalar/ConstantProp.cpp | 2 +- .../Scalar/CorrelatedValuePropagation.cpp | 180 +- lib/Transforms/Scalar/DCE.cpp | 31 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 7 +- lib/Transforms/Scalar/DivRemPairs.cpp | 219 +- lib/Transforms/Scalar/EarlyCSE.cpp | 22 +- lib/Transforms/Scalar/FlattenCFGPass.cpp | 24 +- lib/Transforms/Scalar/Float2Int.cpp | 47 +- lib/Transforms/Scalar/GVN.cpp | 201 +- lib/Transforms/Scalar/GVNHoist.cpp | 17 +- lib/Transforms/Scalar/GuardWidening.cpp | 2 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 389 +- lib/Transforms/Scalar/InferAddressSpaces.cpp | 38 +- lib/Transforms/Scalar/InstSimplifyPass.cpp | 48 +- lib/Transforms/Scalar/JumpThreading.cpp | 18 +- lib/Transforms/Scalar/LICM.cpp | 55 +- lib/Transforms/Scalar/LoopDataPrefetch.cpp | 4 +- lib/Transforms/Scalar/LoopDeletion.cpp | 2 +- lib/Transforms/Scalar/LoopFuse.cpp | 640 ++- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 890 ++- lib/Transforms/Scalar/LoopInstSimplify.cpp | 5 +- lib/Transforms/Scalar/LoopInterchange.cpp | 62 +- lib/Transforms/Scalar/LoopLoadElimination.cpp | 3 +- lib/Transforms/Scalar/LoopPredication.cpp | 2 +- lib/Transforms/Scalar/LoopRerollPass.cpp | 3 +- lib/Transforms/Scalar/LoopRotation.cpp | 10 +- lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 4 +- lib/Transforms/Scalar/LoopSink.cpp | 9 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 20 +- lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp | 8 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 128 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 87 +- lib/Transforms/Scalar/LoopVersioningLICM.cpp | 31 - lib/Transforms/Scalar/LowerConstantIntrinsics.cpp | 170 + lib/Transforms/Scalar/LowerExpectIntrinsic.cpp | 33 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 110 +- lib/Transforms/Scalar/MergeICmps.cpp | 2 +- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp | 167 +- lib/Transforms/Scalar/NaryReassociate.cpp | 2 +- lib/Transforms/Scalar/NewGVN.cpp | 25 +- lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp | 2 +- lib/Transforms/Scalar/PlaceSafepoints.cpp | 6 +- lib/Transforms/Scalar/Reassociate.cpp | 190 +- lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 6 +- lib/Transforms/Scalar/SCCP.cpp | 75 +- lib/Transforms/Scalar/SROA.cpp | 40 +- lib/Transforms/Scalar/Scalar.cpp | 9 + .../Scalar/SeparateConstOffsetFromGEP.cpp | 2 +- lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 25 +- lib/Transforms/Scalar/SpeculateAroundPHIs.cpp | 6 +- lib/Transforms/Scalar/StructurizeCFG.cpp | 2 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 2 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 64 +- lib/Transforms/Utils/BuildLibCalls.cpp | 94 +- lib/Transforms/Utils/BypassSlowDivision.cpp | 8 +- lib/Transforms/Utils/CanonicalizeAliases.cpp | 1 + lib/Transforms/Utils/CloneFunction.cpp | 15 + lib/Transforms/Utils/CloneModule.cpp | 18 +- lib/Transforms/Utils/CodeExtractor.cpp | 309 +- lib/Transforms/Utils/EntryExitInstrumenter.cpp | 2 +- lib/Transforms/Utils/Evaluator.cpp | 2 +- lib/Transforms/Utils/FlattenCFG.cpp | 20 +- lib/Transforms/Utils/FunctionImportUtils.cpp | 2 +- .../Utils/ImportedFunctionsInliningStatistics.cpp | 6 +- lib/Transforms/Utils/LibCallsShrinkWrap.cpp | 2 +- lib/Transforms/Utils/Local.cpp | 209 +- lib/Transforms/Utils/LoopRotationUtils.cpp | 27 +- lib/Transforms/Utils/LoopSimplify.cpp | 15 +- lib/Transforms/Utils/LoopUnroll.cpp | 12 +- lib/Transforms/Utils/LoopUnrollAndJam.cpp | 6 +- lib/Transforms/Utils/LoopUnrollPeel.cpp | 161 +- lib/Transforms/Utils/LoopUtils.cpp | 56 + lib/Transforms/Utils/LoopVersioning.cpp | 4 +- lib/Transforms/Utils/MetaRenamer.cpp | 5 +- lib/Transforms/Utils/MisExpect.cpp | 177 + lib/Transforms/Utils/ModuleUtils.cpp | 2 +- lib/Transforms/Utils/PredicateInfo.cpp | 80 +- lib/Transforms/Utils/SimplifyCFG.cpp | 250 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 688 ++- lib/Transforms/Utils/SymbolRewriter.cpp | 12 +- lib/Transforms/Utils/VNCoercion.cpp | 2 +- lib/Transforms/Utils/ValueMapper.cpp | 60 +- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | 26 +- .../Vectorize/LoopVectorizationLegality.cpp | 186 +- .../Vectorize/LoopVectorizationPlanner.h | 4 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 738 ++- lib/Transforms/Vectorize/SLPVectorizer.cpp | 820 ++- lib/Transforms/Vectorize/VPlan.cpp | 19 +- lib/Transforms/Vectorize/VPlan.h | 4 + lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp | 2 +- lib/Transforms/Vectorize/VPlanSLP.cpp | 13 +- lib/WindowsManifest/WindowsManifestMerger.cpp | 4 +- lib/XRay/FDRRecordProducer.cpp | 37 +- lib/XRay/FileHeaderReader.cpp | 14 +- lib/XRay/InstrumentationMap.cpp | 17 +- lib/XRay/Profile.cpp | 10 +- lib/XRay/RecordInitializer.cpp | 202 +- lib/XRay/Trace.cpp | 43 +- tools/bugpoint/BugDriver.h | 7 +- tools/bugpoint/ExtractFunction.cpp | 3 +- tools/bugpoint/OptimizerDriver.cpp | 12 +- tools/bugpoint/ToolRunner.cpp | 16 +- tools/bugpoint/bugpoint.cpp | 46 +- tools/llc/llc.cpp | 27 +- tools/lli/lli.cpp | 71 +- tools/llvm-ar/llvm-ar.cpp | 192 +- tools/llvm-as/llvm-as.cpp | 2 +- tools/llvm-cov/CodeCoverage.cpp | 24 +- tools/llvm-cov/SourceCoverageView.cpp | 8 +- tools/llvm-cov/TestingSupport.cpp | 10 +- tools/llvm-cxxdump/llvm-cxxdump.cpp | 6 +- tools/llvm-cxxmap/llvm-cxxmap.cpp | 2 +- tools/llvm-dis/llvm-dis.cpp | 4 +- tools/llvm-dwarfdump/Statistics.cpp | 263 +- tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 2 +- tools/llvm-extract/llvm-extract.cpp | 16 +- tools/llvm-ifs/CMakeLists.txt | 10 + tools/llvm-ifs/LLVMBuild.txt | 21 + tools/llvm-ifs/llvm-ifs.cpp | 532 ++ tools/llvm-link/llvm-link.cpp | 6 +- tools/llvm-lto/llvm-lto.cpp | 20 +- tools/llvm-lto2/llvm-lto2.cpp | 12 +- tools/llvm-mc/Disassembler.cpp | 15 +- tools/llvm-mc/Disassembler.h | 10 +- tools/llvm-mc/llvm-mc.cpp | 27 +- tools/llvm-mca/CodeRegion.cpp | 6 +- tools/llvm-mca/CodeRegionGenerator.cpp | 2 + tools/llvm-mca/Views/BottleneckAnalysis.cpp | 40 +- tools/llvm-mca/Views/BottleneckAnalysis.h | 8 +- tools/llvm-mca/Views/InstructionInfoView.cpp | 31 +- tools/llvm-mca/Views/InstructionInfoView.h | 13 +- tools/llvm-mca/Views/TimelineView.cpp | 50 +- tools/llvm-mca/Views/TimelineView.h | 1 + tools/llvm-mca/llvm-mca.cpp | 113 +- tools/llvm-modextract/llvm-modextract.cpp | 2 +- tools/llvm-nm/llvm-nm.cpp | 51 +- tools/llvm-objcopy/COFF/COFFObjcopy.cpp | 88 +- tools/llvm-objcopy/COFF/Reader.cpp | 18 +- tools/llvm-objcopy/COFF/Writer.cpp | 4 +- tools/llvm-objcopy/CommonOpts.td | 123 + tools/llvm-objcopy/CopyConfig.cpp | 370 +- tools/llvm-objcopy/CopyConfig.h | 110 +- tools/llvm-objcopy/ELF/ELFConfig.cpp | 133 + tools/llvm-objcopy/ELF/ELFConfig.h | 44 + tools/llvm-objcopy/ELF/ELFObjcopy.cpp | 169 +- tools/llvm-objcopy/ELF/Object.cpp | 252 +- tools/llvm-objcopy/ELF/Object.h | 56 +- tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp | 350 ++ tools/llvm-objcopy/MachO/MachOLayoutBuilder.h | 50 + tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 30 +- tools/llvm-objcopy/MachO/MachOReader.cpp | 45 +- tools/llvm-objcopy/MachO/MachOReader.h | 3 + tools/llvm-objcopy/MachO/MachOWriter.cpp | 305 +- tools/llvm-objcopy/MachO/MachOWriter.h | 19 +- tools/llvm-objcopy/MachO/Object.h | 27 + tools/llvm-objcopy/ObjcopyOpts.td | 141 +- tools/llvm-objcopy/StripOpts.td | 103 +- tools/llvm-objcopy/llvm-objcopy.cpp | 53 +- tools/llvm-objdump/COFFDump.cpp | 77 +- tools/llvm-objdump/ELFDump.cpp | 2 +- tools/llvm-objdump/MachODump.cpp | 375 +- tools/llvm-objdump/llvm-objdump.cpp | 543 +- tools/llvm-objdump/llvm-objdump.h | 36 +- tools/llvm-pdbutil/BytesOutputStyle.cpp | 2 +- tools/llvm-pdbutil/DumpOutputStyle.cpp | 9 +- tools/llvm-pdbutil/ExplainOutputStyle.cpp | 2 +- tools/llvm-pdbutil/InputFile.cpp | 17 +- tools/llvm-pdbutil/MinimalSymbolDumper.cpp | 5 +- tools/llvm-pdbutil/PrettyTypeDumper.cpp | 4 +- tools/llvm-pdbutil/llvm-pdbutil.cpp | 10 +- tools/llvm-profdata/llvm-profdata.cpp | 287 +- tools/llvm-readobj/ARMEHABIPrinter.h | 19 +- tools/llvm-readobj/ARMWinEHPrinter.cpp | 9 +- tools/llvm-readobj/COFFDumper.cpp | 362 +- tools/llvm-readobj/DwarfCFIEHPrinter.h | 54 +- tools/llvm-readobj/ELFDumper.cpp | 1648 ++++-- tools/llvm-readobj/MachODumper.cpp | 61 +- tools/llvm-readobj/ObjDumper.cpp | 32 +- tools/llvm-readobj/ObjDumper.h | 11 +- tools/llvm-readobj/WasmDumper.cpp | 7 +- tools/llvm-readobj/Win64EHDumper.cpp | 13 +- tools/llvm-readobj/WindowsResourceDumper.cpp | 8 +- tools/llvm-readobj/XCOFFDumper.cpp | 402 +- tools/llvm-readobj/llvm-readobj.cpp | 148 +- tools/llvm-readobj/llvm-readobj.h | 25 +- tools/llvm-reduce/CMakeLists.txt | 26 + tools/llvm-reduce/DeltaManager.h | 36 + tools/llvm-reduce/LLVMBuild.txt | 24 + tools/llvm-reduce/TestRunner.cpp | 42 + tools/llvm-reduce/TestRunner.h | 46 + tools/llvm-reduce/deltas/Delta.cpp | 162 + tools/llvm-reduce/deltas/Delta.h | 76 + tools/llvm-reduce/deltas/ReduceArguments.cpp | 125 + tools/llvm-reduce/deltas/ReduceArguments.h | 21 + tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp | 146 + tools/llvm-reduce/deltas/ReduceBasicBlocks.h | 20 + tools/llvm-reduce/deltas/ReduceFunctions.cpp | 77 + tools/llvm-reduce/deltas/ReduceFunctions.h | 20 + tools/llvm-reduce/deltas/ReduceGlobalVars.cpp | 74 + tools/llvm-reduce/deltas/ReduceGlobalVars.h | 20 + tools/llvm-reduce/deltas/ReduceInstructions.cpp | 65 + tools/llvm-reduce/deltas/ReduceInstructions.h | 20 + tools/llvm-reduce/deltas/ReduceMetadata.cpp | 138 + tools/llvm-reduce/deltas/ReduceMetadata.h | 18 + tools/llvm-reduce/llvm-reduce.cpp | 114 + tools/llvm-rtdyld/llvm-rtdyld.cpp | 104 +- tools/llvm-stress/llvm-stress.cpp | 4 +- tools/llvm-symbolizer/llvm-symbolizer.cpp | 6 + tools/llvm-xray/func-id-helper.cpp | 2 +- tools/llvm-xray/xray-account.cpp | 2 +- tools/llvm-xray/xray-converter.cpp | 4 +- tools/llvm-xray/xray-extract.cpp | 2 +- tools/llvm-xray/xray-fdr-dump.cpp | 2 +- tools/llvm-xray/xray-graph-diff.cpp | 2 +- tools/llvm-xray/xray-graph.cpp | 2 +- tools/opt/opt.cpp | 13 +- tools/vfabi-demangle-fuzzer/CMakeLists.txt | 7 + .../vfabi-demangler-fuzzer.cpp | 26 + utils/TableGen/AsmMatcherEmitter.cpp | 19 +- utils/TableGen/AsmWriterEmitter.cpp | 3 +- utils/TableGen/CallingConvEmitter.cpp | 4 + utils/TableGen/CodeEmitterGen.cpp | 305 +- utils/TableGen/CodeGenDAGPatterns.cpp | 42 +- utils/TableGen/CodeGenDAGPatterns.h | 4 + utils/TableGen/CodeGenInstruction.cpp | 1 + utils/TableGen/CodeGenInstruction.h | 1 + utils/TableGen/CodeGenIntrinsics.h | 8 + utils/TableGen/CodeGenMapTable.cpp | 12 +- utils/TableGen/CodeGenRegisters.cpp | 52 +- utils/TableGen/CodeGenRegisters.h | 26 +- utils/TableGen/CodeGenSchedule.cpp | 24 +- utils/TableGen/CodeGenTarget.cpp | 70 +- utils/TableGen/CodeGenTarget.h | 6 + utils/TableGen/DAGISelEmitter.cpp | 2 +- utils/TableGen/DAGISelMatcher.h | 8 +- utils/TableGen/DAGISelMatcherEmitter.cpp | 22 +- utils/TableGen/DAGISelMatcherGen.cpp | 10 +- utils/TableGen/DAGISelMatcherOpt.cpp | 9 +- utils/TableGen/DFAEmitter.cpp | 394 ++ utils/TableGen/DFAEmitter.h | 107 + utils/TableGen/DFAPacketizerEmitter.cpp | 657 +-- utils/TableGen/DisassemblerEmitter.cpp | 2 +- utils/TableGen/FixedLenDecoderEmitter.cpp | 93 +- utils/TableGen/GICombinerEmitter.cpp | 452 ++ utils/TableGen/GlobalISel/CMakeLists.txt | 7 + utils/TableGen/GlobalISel/CodeExpander.cpp | 93 + utils/TableGen/GlobalISel/CodeExpander.h | 55 + utils/TableGen/GlobalISel/CodeExpansions.h | 43 + utils/TableGen/GlobalISelEmitter.cpp | 775 ++- utils/TableGen/InfoByHwMode.cpp | 11 + utils/TableGen/InfoByHwMode.h | 5 + utils/TableGen/InstrDocsEmitter.cpp | 2 +- utils/TableGen/InstrInfoEmitter.cpp | 52 +- utils/TableGen/IntrinsicEmitter.cpp | 20 +- utils/TableGen/RISCVCompressInstEmitter.cpp | 13 +- utils/TableGen/RegisterInfoEmitter.cpp | 4 +- utils/TableGen/SearchableTableEmitter.cpp | 16 +- utils/TableGen/SubtargetEmitter.cpp | 8 +- utils/TableGen/SubtargetFeatureInfo.cpp | 12 +- utils/TableGen/TableGen.cpp | 157 +- utils/TableGen/TableGenBackends.h | 2 + utils/TableGen/WebAssemblyDisassemblerEmitter.cpp | 2 +- utils/TableGen/X86DisassemblerTables.cpp | 2 +- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp | 1 + utils/TableGen/X86RecognizableInstr.cpp | 14 +- utils/add_argument_names.py | 82 + utils/llvm-locstats/CMakeLists.txt | 12 + utils/llvm-locstats/llvm-locstats.py | 209 + 2120 files changed, 122706 insertions(+), 50754 deletions(-) create mode 100644 include/llvm/ADT/DirectedGraph.h delete mode 100644 include/llvm/ADT/VariadicFunction.h create mode 100644 include/llvm/Analysis/DDG.h create mode 100644 include/llvm/Analysis/DependenceGraphBuilder.h create mode 100644 include/llvm/Analysis/LoopCacheAnalysis.h create mode 100644 include/llvm/CodeGen/GlobalISel/GISelKnownBits.h create mode 100644 include/llvm/CodeGen/LiveRangeCalc.h create mode 100644 include/llvm/CodeGen/MachineLoopUtils.h create mode 100644 include/llvm/CodeGen/ModuloSchedule.h create mode 100644 include/llvm/DebugInfo/GSYM/FileWriter.h create mode 100644 include/llvm/DebugInfo/GSYM/GsymCreator.h create mode 100644 include/llvm/DebugInfo/GSYM/GsymReader.h create mode 100644 include/llvm/DebugInfo/GSYM/Header.h create mode 100644 include/llvm/DebugInfo/GSYM/LineTable.h create mode 100644 include/llvm/ExecutionEngine/JITLink/MachO_arm64.h create mode 100644 include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h create mode 100644 include/llvm/ExecutionEngine/Orc/Speculation.h create mode 100644 include/llvm/IR/FixedMetadataKinds.def create mode 100644 include/llvm/MC/MCRegister.h create mode 100644 include/llvm/MCA/CodeEmitter.h create mode 100644 include/llvm/Object/TapiFile.h create mode 100644 include/llvm/Object/TapiUniversal.h create mode 100644 include/llvm/ObjectYAML/yaml2obj.h create mode 100644 include/llvm/Remarks/BitstreamRemarkContainer.h create mode 100644 include/llvm/Remarks/BitstreamRemarkParser.h create mode 100644 include/llvm/Remarks/BitstreamRemarkSerializer.h create mode 100644 include/llvm/Remarks/YAMLRemarkSerializer.h create mode 100644 include/llvm/Support/Alignment.h create mode 100644 include/llvm/Support/Automaton.h create mode 100644 include/llvm/Support/FileCollector.h delete mode 100644 include/llvm/Support/JamCRC.h delete mode 100644 include/llvm/Support/MutexGuard.h delete mode 100644 include/llvm/Support/ScalableSize.h create mode 100644 include/llvm/Support/TypeSize.h delete mode 100644 include/llvm/Support/UniqueLock.h create mode 100644 include/llvm/TableGen/Automaton.td create mode 100644 include/llvm/Target/GlobalISel/Combine.td create mode 100644 include/llvm/TextAPI/MachO/Platform.h create mode 100644 include/llvm/TextAPI/MachO/Target.h create mode 100644 include/llvm/Transforms/Instrumentation/SanitizerCoverage.h create mode 100644 include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h create mode 100644 include/llvm/Transforms/Utils/MisExpect.h create mode 100644 lib/Analysis/DDG.cpp create mode 100644 lib/Analysis/DependenceGraphBuilder.cpp create mode 100644 lib/Analysis/LoopCacheAnalysis.cpp create mode 100644 lib/Analysis/VFABIDemangling.cpp create mode 100644 lib/CodeGen/GlobalISel/GISelKnownBits.cpp delete mode 100644 lib/CodeGen/LiveRangeCalc.h create mode 100644 lib/CodeGen/MIRNamerPass.cpp create mode 100644 lib/CodeGen/MIRVRegNamerUtils.cpp create mode 100644 lib/CodeGen/MIRVRegNamerUtils.h create mode 100644 lib/CodeGen/MachineLoopUtils.cpp create mode 100644 lib/CodeGen/ModuloSchedule.cpp create mode 100644 lib/DebugInfo/GSYM/FileWriter.cpp create mode 100644 lib/DebugInfo/GSYM/GsymCreator.cpp create mode 100644 lib/DebugInfo/GSYM/GsymReader.cpp create mode 100644 lib/DebugInfo/GSYM/Header.cpp create mode 100644 lib/DebugInfo/GSYM/LineTable.cpp delete mode 100644 lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp delete mode 100644 lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h create mode 100644 lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp create mode 100644 lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h create mode 100644 lib/ExecutionEngine/JITLink/MachO_arm64.cpp create mode 100644 lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp create mode 100644 lib/ExecutionEngine/Orc/Speculation.cpp create mode 100644 lib/MCA/CodeEmitter.cpp create mode 100644 lib/Object/TapiFile.cpp create mode 100644 lib/Object/TapiUniversal.cpp create mode 100644 lib/ObjectYAML/COFFEmitter.cpp create mode 100644 lib/ObjectYAML/ELFEmitter.cpp create mode 100644 lib/ObjectYAML/MachOEmitter.cpp create mode 100644 lib/ObjectYAML/MinidumpEmitter.cpp create mode 100644 lib/ObjectYAML/WasmEmitter.cpp create mode 100644 lib/ObjectYAML/yaml2obj.cpp create mode 100644 lib/Remarks/BitstreamRemarkParser.cpp create mode 100644 lib/Remarks/BitstreamRemarkParser.h create mode 100644 lib/Remarks/BitstreamRemarkSerializer.cpp create mode 100644 lib/Remarks/RemarkSerializer.cpp create mode 100644 lib/Support/ABIBreak.cpp create mode 100644 lib/Support/FileCheckImpl.h create mode 100644 lib/Support/FileCollector.cpp delete mode 100644 lib/Support/JamCRC.cpp delete mode 100644 lib/Support/Mutex.cpp delete mode 100644 lib/Support/Unix/Mutex.inc delete mode 100644 lib/Support/Unix/RWMutex.inc delete mode 100644 lib/Support/Windows/Mutex.inc delete mode 100644 lib/Support/Windows/RWMutex.inc create mode 100644 lib/Target/AArch64/AArch64Combine.td create mode 100644 lib/Target/AArch64/AArch64StackOffset.h create mode 100644 lib/Target/AArch64/AArch64StackTaggingPreRA.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp create mode 100644 lib/Target/ARM/MVETailPredication.cpp create mode 100644 lib/Target/ARM/MVEVPTBlockPass.cpp create mode 100644 lib/Target/Mips/MipsPfmCounters.td create mode 100644 lib/Target/RISCV/RISCVCallLowering.cpp create mode 100644 lib/Target/RISCV/RISCVCallLowering.h create mode 100644 lib/Target/RISCV/RISCVInstructionSelector.cpp create mode 100644 lib/Target/RISCV/RISCVLegalizerInfo.cpp create mode 100644 lib/Target/RISCV/RISCVLegalizerInfo.h create mode 100644 lib/Target/RISCV/RISCVRegisterBankInfo.cpp create mode 100644 lib/Target/RISCV/RISCVRegisterBankInfo.h create mode 100644 lib/Target/RISCV/RISCVRegisterBanks.td delete mode 100644 lib/Target/SystemZ/SystemZExpandPseudo.cpp delete mode 100644 lib/Target/SystemZ/SystemZScheduleArch13.td create mode 100644 lib/Target/SystemZ/SystemZScheduleZ15.td create mode 100644 lib/Target/X86/X86AvoidTrailingCall.cpp create mode 100644 lib/TextAPI/MachO/Platform.cpp create mode 100644 lib/TextAPI/MachO/Target.cpp create mode 100644 lib/Transforms/Instrumentation/ValueProfileCollector.cpp create mode 100644 lib/Transforms/Instrumentation/ValueProfileCollector.h create mode 100644 lib/Transforms/Instrumentation/ValueProfilePlugins.inc create mode 100644 lib/Transforms/Scalar/LowerConstantIntrinsics.cpp create mode 100644 lib/Transforms/Utils/MisExpect.cpp create mode 100644 tools/llvm-ifs/CMakeLists.txt create mode 100644 tools/llvm-ifs/LLVMBuild.txt create mode 100644 tools/llvm-ifs/llvm-ifs.cpp create mode 100644 tools/llvm-objcopy/CommonOpts.td create mode 100644 tools/llvm-objcopy/ELF/ELFConfig.cpp create mode 100644 tools/llvm-objcopy/ELF/ELFConfig.h create mode 100644 tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp create mode 100644 tools/llvm-objcopy/MachO/MachOLayoutBuilder.h create mode 100644 tools/llvm-reduce/CMakeLists.txt create mode 100644 tools/llvm-reduce/DeltaManager.h create mode 100644 tools/llvm-reduce/LLVMBuild.txt create mode 100644 tools/llvm-reduce/TestRunner.cpp create mode 100644 tools/llvm-reduce/TestRunner.h create mode 100644 tools/llvm-reduce/deltas/Delta.cpp create mode 100644 tools/llvm-reduce/deltas/Delta.h create mode 100644 tools/llvm-reduce/deltas/ReduceArguments.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceArguments.h create mode 100644 tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceBasicBlocks.h create mode 100644 tools/llvm-reduce/deltas/ReduceFunctions.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceFunctions.h create mode 100644 tools/llvm-reduce/deltas/ReduceGlobalVars.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceGlobalVars.h create mode 100644 tools/llvm-reduce/deltas/ReduceInstructions.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceInstructions.h create mode 100644 tools/llvm-reduce/deltas/ReduceMetadata.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceMetadata.h create mode 100644 tools/llvm-reduce/llvm-reduce.cpp create mode 100644 tools/vfabi-demangle-fuzzer/CMakeLists.txt create mode 100644 tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp create mode 100644 utils/TableGen/DFAEmitter.cpp create mode 100644 utils/TableGen/DFAEmitter.h create mode 100644 utils/TableGen/GICombinerEmitter.cpp create mode 100644 utils/TableGen/GlobalISel/CMakeLists.txt create mode 100644 utils/TableGen/GlobalISel/CodeExpander.cpp create mode 100644 utils/TableGen/GlobalISel/CodeExpander.h create mode 100644 utils/TableGen/GlobalISel/CodeExpansions.h create mode 100755 utils/add_argument_names.py create mode 100644 utils/llvm-locstats/CMakeLists.txt create mode 100755 utils/llvm-locstats/llvm-locstats.py diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index cac2f297056d..b84970956666 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -370,9 +370,13 @@ typedef enum { LLVMAtomicRMWBinOpUMax, /**< Sets the value if it's greater than the original using an unsigned comparison and return the old one */ - LLVMAtomicRMWBinOpUMin /**< Sets the value if it's greater than the - original using an unsigned comparison and return - the old one */ + LLVMAtomicRMWBinOpUMin, /**< Sets the value if it's greater than the + original using an unsigned comparison and return + the old one */ + LLVMAtomicRMWBinOpFAdd, /**< Add a floating point value and return the + old one */ + LLVMAtomicRMWBinOpFSub /**< Subtract a floating point value and return the + old one */ } LLVMAtomicRMWBinOp; typedef enum { @@ -1539,6 +1543,7 @@ LLVMTypeRef LLVMX86MMXType(void); macro(GlobalVariable) \ macro(UndefValue) \ macro(Instruction) \ + macro(UnaryOperator) \ macro(BinaryOperator) \ macro(CallInst) \ macro(IntrinsicInst) \ @@ -1571,6 +1576,8 @@ LLVMTypeRef LLVMX86MMXType(void); macro(ResumeInst) \ macro(CleanupReturnInst) \ macro(CatchReturnInst) \ + macro(CatchSwitchInst) \ + macro(CallBrInst) \ macro(FuncletPadInst) \ macro(CatchPadInst) \ macro(CleanupPadInst) \ @@ -1592,7 +1599,10 @@ LLVMTypeRef LLVMX86MMXType(void); macro(ZExtInst) \ macro(ExtractValueInst) \ macro(LoadInst) \ - macro(VAArgInst) + macro(VAArgInst) \ + macro(AtomicCmpXchgInst) \ + macro(AtomicRMWInst) \ + macro(FenceInst) /** * @defgroup LLVMCCoreValueGeneral General APIs @@ -3807,8 +3817,12 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, const char *Name); LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst); void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile); +LLVMBool LLVMGetWeak(LLVMValueRef CmpXchgInst); +void LLVMSetWeak(LLVMValueRef CmpXchgInst, LLVMBool IsWeak); LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemoryAccessInst); void LLVMSetOrdering(LLVMValueRef MemoryAccessInst, LLVMAtomicOrdering Ordering); +LLVMAtomicRMWBinOp LLVMGetAtomicRMWBinOp(LLVMValueRef AtomicRMWInst); +void LLVMSetAtomicRMWBinOp(LLVMValueRef AtomicRMWInst, LLVMAtomicRMWBinOp BinOp); /* Casts */ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val, diff --git a/include/llvm-c/DebugInfo.h b/include/llvm-c/DebugInfo.h index 33c8110a863c..41e9f96bbb92 100644 --- a/include/llvm-c/DebugInfo.h +++ b/include/llvm-c/DebugInfo.h @@ -32,7 +32,7 @@ typedef enum { LLVMDIFlagPublic = 3, LLVMDIFlagFwdDecl = 1 << 2, LLVMDIFlagAppleBlock = 1 << 3, - LLVMDIFlagBlockByrefStruct = 1 << 4, + LLVMDIFlagReservedBit4 = 1 << 4, LLVMDIFlagVirtual = 1 << 5, LLVMDIFlagArtificial = 1 << 6, LLVMDIFlagExplicit = 1 << 7, @@ -169,6 +169,19 @@ typedef unsigned LLVMMetadataKind; */ typedef unsigned LLVMDWARFTypeEncoding; +/** + * Describes the kind of macro declaration used for LLVMDIBuilderCreateMacro. + * @see llvm::dwarf::MacinfoRecordType + * @note Values are from DW_MACINFO_* constants in the DWARF specification. + */ +typedef enum { + LLVMDWARFMacinfoRecordTypeDefine = 0x01, + LLVMDWARFMacinfoRecordTypeMacro = 0x02, + LLVMDWARFMacinfoRecordTypeStartFile = 0x03, + LLVMDWARFMacinfoRecordTypeEndFile = 0x04, + LLVMDWARFMacinfoRecordTypeVendorExt = 0xff +} LLVMDWARFMacinfoRecordType; + /** * The current debug metadata version number. */ @@ -521,6 +534,38 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Builder, unsigned NumParameterTypes, LLVMDIFlags Flags); +/** + * Create debugging information entry for a macro. + * @param Builder The DIBuilder. + * @param ParentMacroFile Macro parent (could be NULL). + * @param Line Source line number where the macro is defined. + * @param RecordType DW_MACINFO_define or DW_MACINFO_undef. + * @param Name Macro name. + * @param NameLen Macro name length. + * @param Value Macro value. + * @param ValueLen Macro value length. + */ +LLVMMetadataRef LLVMDIBuilderCreateMacro(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, + unsigned Line, + LLVMDWARFMacinfoRecordType RecordType, + const char *Name, size_t NameLen, + const char *Value, size_t ValueLen); + +/** + * Create debugging information temporary entry for a macro file. + * List of macro node direct children will be calculated by DIBuilder, + * using the \p ParentMacroFile relationship. + * @param Builder The DIBuilder. + * @param ParentMacroFile Macro parent (could be NULL). + * @param Line Source line number where the macro file is included. + * @param File File descriptor containing the name of the macro file. + */ +LLVMMetadataRef +LLVMDIBuilderCreateTempMacroFile(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, unsigned Line, + LLVMMetadataRef File); + /** * Create debugging information entry for an enumerator. * @param Builder The DIBuilder. diff --git a/include/llvm-c/Remarks.h b/include/llvm-c/Remarks.h index 88eb5120c57c..5444aebddd60 100644 --- a/include/llvm-c/Remarks.h +++ b/include/llvm-c/Remarks.h @@ -30,7 +30,8 @@ extern "C" { * @{ */ -#define REMARKS_API_VERSION 0 +// 0 -> 1: Bitstream remarks support. +#define REMARKS_API_VERSION 1 /** * The type of the emitted remark. @@ -240,6 +241,20 @@ typedef struct LLVMRemarkOpaqueParser *LLVMRemarkParserRef; extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf, uint64_t Size); +/** + * Creates a remark parser that can be used to parse the buffer located in \p + * Buf of size \p Size bytes. + * + * \p Buf cannot be `NULL`. + * + * This function should be paired with LLVMRemarkParserDispose() to avoid + * leaking resources. + * + * \since REMARKS_API_VERSION=1 + */ +extern LLVMRemarkParserRef LLVMRemarkParserCreateBitstream(const void *Buf, + uint64_t Size); + /** * Returns the next remark in the file. * diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h index 7a82ed464141..51d007581283 100644 --- a/include/llvm-c/Transforms/IPO.h +++ b/include/llvm-c/Transforms/IPO.h @@ -34,6 +34,9 @@ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM); /** See llvm::createConstantMergePass function. */ void LLVMAddConstantMergePass(LLVMPassManagerRef PM); +/** See llvm::createMergeFunctionsPass function. */ +void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM); + /** See llvm::createCalledValuePropagationPass function. */ void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM); @@ -67,6 +70,21 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM); /** See llvm::createInternalizePass function. */ void LLVMAddInternalizePass(LLVMPassManagerRef, unsigned AllButMain); +/** + * Create and add the internalize pass to the given pass manager with the + * provided preservation callback. + * + * The context parameter is forwarded to the callback on each invocation. + * As such, it is the responsibility of the caller to extend its lifetime + * until execution of this pass has finished. + * + * @see llvm::createInternalizePass function. + */ +void LLVMAddInternalizePassWithMustPreservePredicate( + LLVMPassManagerRef PM, + void *Context, + LLVMBool (*MustPreserve)(LLVMValueRef, void *)); + /** See llvm::createStripDeadPrototypesPass function. */ void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM); diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h index 031cf98b2df2..6f3a3d8b3750 100644 --- a/include/llvm-c/Transforms/Scalar.h +++ b/include/llvm-c/Transforms/Scalar.h @@ -35,6 +35,9 @@ extern "C" { /** See llvm::createAggressiveDCEPass function. */ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM); +/** See llvm::createDeadCodeEliminationPass function. */ +void LLVMAddDCEPass(LLVMPassManagerRef PM); + /** See llvm::createBitTrackingDCEPass function. */ void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM); @@ -144,6 +147,9 @@ void LLVMAddEarlyCSEMemSSAPass(LLVMPassManagerRef PM); /** See llvm::createLowerExpectIntrinsicPass function */ void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM); +/** See llvm::createLowerConstantIntrinsicsPass function */ +void LLVMAddLowerConstantIntrinsicsPass(LLVMPassManagerRef PM); + /** See llvm::createTypeBasedAliasAnalysisPass function */ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM); diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index 2467722b1954..41e6067cf44f 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -44,7 +44,7 @@ typedef bool lto_bool_t; * @{ */ -#define LTO_API_VERSION 24 +#define LTO_API_VERSION 25 /** * \since prior to LTO_API_VERSION=3 @@ -550,6 +550,56 @@ extern void lto_codegen_set_should_embed_uselists(lto_code_gen_t cg, lto_bool_t ShouldEmbedUselists); +/** Opaque reference to an LTO input file */ +typedef struct LLVMOpaqueLTOInput *lto_input_t; + +/** + * Creates an LTO input file from a buffer. The path + * argument is used for diagnotics as this function + * otherwise does not know which file the given buffer + * is associated with. + * + * \since LTO_API_VERSION=24 + */ +extern lto_input_t lto_input_create(const void *buffer, + size_t buffer_size, + const char *path); + +/** + * Frees all memory internally allocated by the LTO input file. + * Upon return the lto_module_t is no longer valid. + * + * \since LTO_API_VERSION=24 + */ +extern void lto_input_dispose(lto_input_t input); + +/** + * Returns the number of dependent library specifiers + * for the given LTO input file. + * + * \since LTO_API_VERSION=24 + */ +extern unsigned lto_input_get_num_dependent_libraries(lto_input_t input); + +/** + * Returns the ith dependent library specifier + * for the given LTO input file. The returned + * string is not null-terminated. + * + * \since LTO_API_VERSION=24 + */ +extern const char * lto_input_get_dependent_library(lto_input_t input, + size_t index, + size_t *size); + +/** + * Returns the list of libcall symbols that can be generated by LTO + * that might not be visible from the symbol table of bitcode files. + * + * \since prior to LTO_API_VERSION=25 + */ +extern const char *const *lto_runtime_lib_symbols_list(size_t *size); + /** * @} // endgoup LLVMCLTO * @defgroup LLVMCTLTO ThinLTO @@ -846,48 +896,6 @@ thinlto_codegen_set_cache_size_megabytes(thinlto_code_gen_t cg, extern void thinlto_codegen_set_cache_size_files(thinlto_code_gen_t cg, unsigned max_size_files); -/** Opaque reference to an LTO input file */ -typedef struct LLVMOpaqueLTOInput *lto_input_t; - -/** - * Creates an LTO input file from a buffer. The path - * argument is used for diagnotics as this function - * otherwise does not know which file the given buffer - * is associated with. - * - * \since LTO_API_VERSION=24 - */ -extern lto_input_t lto_input_create(const void *buffer, - size_t buffer_size, - const char *path); - -/** - * Frees all memory internally allocated by the LTO input file. - * Upon return the lto_module_t is no longer valid. - * - * \since LTO_API_VERSION=24 - */ -extern void lto_input_dispose(lto_input_t input); - -/** - * Returns the number of dependent library specifiers - * for the given LTO input file. - * - * \since LTO_API_VERSION=24 - */ -extern unsigned lto_input_get_num_dependent_libraries(lto_input_t input); - -/** - * Returns the ith dependent library specifier - * for the given LTO input file. The returned - * string is not null-terminated. - * - * \since LTO_API_VERSION=24 - */ -extern const char * lto_input_get_dependent_library(lto_input_t input, - size_t index, - size_t *size); - /** * @} // endgroup LLVMCTLTO_CACHING */ diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index a9648d35cf5d..1c4969733791 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -192,6 +192,11 @@ struct APFloatBase { /// IEEE-754R 7: Default exception handling. /// /// opUnderflow or opOverflow are always returned or-ed with opInexact. + /// + /// APFloat models this behavior specified by IEEE-754: + /// "For operations producing results in floating-point format, the default + /// result of an operation that signals the invalid operation exception + /// shall be a quiet NaN." enum opStatus { opOK = 0x00, opInvalidOp = 0x01, diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index 2381b75e08b1..8dce5a621bb3 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -1467,6 +1467,13 @@ public: U.pVal[whichWord(BitPosition)] &= Mask; } + /// Set bottom loBits bits to 0. + void clearLowBits(unsigned loBits) { + assert(loBits <= BitWidth && "More bits than bitwidth"); + APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits); + *this &= Keep; + } + /// Set the sign bit to 0. void clearSignBit() { clearBit(BitWidth - 1); @@ -1496,9 +1503,11 @@ public: /// Insert the bits from a smaller APInt starting at bitPosition. void insertBits(const APInt &SubBits, unsigned bitPosition); + void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits); /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). APInt extractBits(unsigned numBits, unsigned bitPosition) const; + uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const; /// @} /// \name Value Characterization Functions diff --git a/include/llvm/ADT/Any.h b/include/llvm/ADT/Any.h index 5dcd6e73c54f..49657e02a991 100644 --- a/include/llvm/ADT/Any.h +++ b/include/llvm/ADT/Any.h @@ -38,7 +38,7 @@ class Any { explicit StorageImpl(T &&Value) : Value(std::move(Value)) {} std::unique_ptr clone() const override { - return llvm::make_unique>(Value); + return std::make_unique>(Value); } const void *id() const override { return &TypeId::Id; } @@ -78,7 +78,7 @@ public: int>::type = 0> Any(T &&Value) { using U = typename std::decay::type; - Storage = llvm::make_unique>(std::forward(Value)); + Storage = std::make_unique>(std::forward(Value)); } Any(Any &&Other) : Storage(std::move(Other.Storage)) {} diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index 773c88f7c9f9..f6455d3fa412 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -481,6 +481,12 @@ namespace llvm { return Vec; } + /// Construct an ArrayRef from a std::array. + template + ArrayRef makeArrayRef(const std::array &Arr) { + return Arr; + } + /// Construct an ArrayRef from an ArrayRef (no-op) (const) template ArrayRef makeArrayRef(const ArrayRef &Vec) { return Vec; diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index a05cf8130d3c..948a6e6bfb38 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -38,33 +38,7 @@ namespace detail { // implementation without requiring two members. template struct DenseMapPair : public std::pair { - - // FIXME: Switch to inheriting constructors when we drop support for older - // clang versions. - // NOTE: This default constructor is declared with '{}' rather than - // '= default' to work around a separate bug in clang-3.8. This can - // also go when we switch to inheriting constructors. - DenseMapPair() {} - - DenseMapPair(const KeyT &Key, const ValueT &Value) - : std::pair(Key, Value) {} - - DenseMapPair(KeyT &&Key, ValueT &&Value) - : std::pair(std::move(Key), std::move(Value)) {} - - template - DenseMapPair(AltKeyT &&AltKey, AltValueT &&AltValue, - typename std::enable_if< - std::is_convertible::value && - std::is_convertible::value>::type * = 0) - : std::pair(std::forward(AltKey), - std::forward(AltValue)) {} - - template - DenseMapPair(AltPairT &&AltPair, - typename std::enable_if>::value>::type * = nullptr) - : std::pair(std::forward(AltPair)) {} + using std::pair::pair; KeyT &getFirst() { return std::pair::first; } const KeyT &getFirst() const { return std::pair::first; } @@ -748,7 +722,7 @@ public: ~DenseMap() { this->destroyAll(); - operator delete(Buckets); + deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); } void swap(DenseMap& RHS) { @@ -768,7 +742,7 @@ public: DenseMap& operator=(DenseMap &&other) { this->destroyAll(); - operator delete(Buckets); + deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); init(0); swap(other); return *this; @@ -776,7 +750,7 @@ public: void copyFrom(const DenseMap& other) { this->destroyAll(); - operator delete(Buckets); + deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); if (allocateBuckets(other.NumBuckets)) { this->BaseT::copyFrom(other); } else { @@ -809,10 +783,12 @@ public: this->moveFromOldBuckets(OldBuckets, OldBuckets+OldNumBuckets); // Free the old table. - operator delete(OldBuckets); + deallocate_buffer(OldBuckets, sizeof(BucketT) * OldNumBuckets, + alignof(BucketT)); } void shrink_and_clear() { + unsigned OldNumBuckets = NumBuckets; unsigned OldNumEntries = NumEntries; this->destroyAll(); @@ -825,7 +801,8 @@ public: return; } - operator delete(Buckets); + deallocate_buffer(Buckets, sizeof(BucketT) * OldNumBuckets, + alignof(BucketT)); init(NewNumBuckets); } @@ -861,7 +838,8 @@ private: return false; } - Buckets = static_cast(operator new(sizeof(BucketT) * NumBuckets)); + Buckets = static_cast( + allocate_buffer(sizeof(BucketT) * NumBuckets, alignof(BucketT))); return true; } }; @@ -1076,7 +1054,8 @@ public: this->moveFromOldBuckets(OldRep.Buckets, OldRep.Buckets+OldRep.NumBuckets); // Free the old table. - operator delete(OldRep.Buckets); + deallocate_buffer(OldRep.Buckets, sizeof(BucketT) * OldRep.NumBuckets, + alignof(BucketT)); } void shrink_and_clear() { @@ -1160,15 +1139,17 @@ private: if (Small) return; - operator delete(getLargeRep()->Buckets); + deallocate_buffer(getLargeRep()->Buckets, + sizeof(BucketT) * getLargeRep()->NumBuckets, + alignof(BucketT)); getLargeRep()->~LargeRep(); } LargeRep allocateBuckets(unsigned Num) { assert(Num > InlineBuckets && "Must allocate more buckets than are inline"); - LargeRep Rep = { - static_cast(operator new(sizeof(BucketT) * Num)), Num - }; + LargeRep Rep = {static_cast(allocate_buffer( + sizeof(BucketT) * Num, alignof(BucketT))), + Num}; return Rep; } }; diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h index 5ef6f3ad1b04..bd4c60c8f13e 100644 --- a/include/llvm/ADT/DenseMapInfo.h +++ b/include/llvm/ADT/DenseMapInfo.h @@ -17,7 +17,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/PointerLikeTypeTraits.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -67,6 +67,17 @@ template<> struct DenseMapInfo { } }; +// Provide DenseMapInfo for unsigned chars. +template <> struct DenseMapInfo { + static inline unsigned char getEmptyKey() { return ~0; } + static inline unsigned char getTombstoneKey() { return ~0 - 1; } + static unsigned getHashValue(const unsigned char &Val) { return Val * 37U; } + + static bool isEqual(const unsigned char &LHS, const unsigned char &RHS) { + return LHS == RHS; + } +}; + // Provide DenseMapInfo for unsigned shorts. template <> struct DenseMapInfo { static inline unsigned short getEmptyKey() { return 0xFFFF; } diff --git a/include/llvm/ADT/DirectedGraph.h b/include/llvm/ADT/DirectedGraph.h new file mode 100644 index 000000000000..f6a358d99cd2 --- /dev/null +++ b/include/llvm/ADT/DirectedGraph.h @@ -0,0 +1,270 @@ +//===- llvm/ADT/DirectedGraph.h - Directed Graph ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface and a base class implementation for a +// directed graph. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_DIRECTEDGRAPH_H +#define LLVM_ADT_DIRECTEDGRAPH_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// Represent an edge in the directed graph. +/// The edge contains the target node it connects to. +template class DGEdge { +public: + DGEdge() = delete; + /// Create an edge pointing to the given node \p N. + explicit DGEdge(NodeType &N) : TargetNode(N) {} + explicit DGEdge(const DGEdge &E) + : TargetNode(E.TargetNode) {} + DGEdge &operator=(const DGEdge &E) { + TargetNode = E.TargetNode; + return *this; + } + + /// Static polymorphism: delegate implementation (via isEqualTo) to the + /// derived class. + bool operator==(const EdgeType &E) const { return getDerived().isEqualTo(E); } + bool operator!=(const EdgeType &E) const { return !operator==(E); } + + /// Retrieve the target node this edge connects to. + const NodeType &getTargetNode() const { return TargetNode; } + NodeType &getTargetNode() { + return const_cast( + static_cast &>(*this).getTargetNode()); + } + +protected: + // As the default implementation use address comparison for equality. + bool isEqualTo(const EdgeType &E) const { return this == &E; } + + // Cast the 'this' pointer to the derived type and return a reference. + EdgeType &getDerived() { return *static_cast(this); } + const EdgeType &getDerived() const { + return *static_cast(this); + } + + // The target node this edge connects to. + NodeType &TargetNode; +}; + +/// Represent a node in the directed graph. +/// The node has a (possibly empty) list of outgoing edges. +template class DGNode { +public: + using EdgeListTy = SetVector; + using iterator = typename EdgeListTy::iterator; + using const_iterator = typename EdgeListTy::const_iterator; + + /// Create a node with a single outgoing edge \p E. + explicit DGNode(EdgeType &E) : Edges() { Edges.insert(&E); } + DGNode() = default; + + explicit DGNode(const DGNode &N) : Edges(N.Edges) {} + DGNode(DGNode &&N) : Edges(std::move(N.Edges)) {} + + DGNode &operator=(const DGNode &N) { + Edges = N.Edges; + return *this; + } + DGNode &operator=(const DGNode &&N) { + Edges = std::move(N.Edges); + return *this; + } + + /// Static polymorphism: delegate implementation (via isEqualTo) to the + /// derived class. + bool operator==(const NodeType &N) const { return getDerived().isEqualTo(N); } + bool operator!=(const NodeType &N) const { return !operator==(N); } + + const_iterator begin() const { return Edges.begin(); } + const_iterator end() const { return Edges.end(); } + iterator begin() { return Edges.begin(); } + iterator end() { return Edges.end(); } + const EdgeType &front() const { return *Edges.front(); } + EdgeType &front() { return *Edges.front(); } + const EdgeType &back() const { return *Edges.back(); } + EdgeType &back() { return *Edges.back(); } + + /// Collect in \p EL, all the edges from this node to \p N. + /// Return true if at least one edge was found, and false otherwise. + /// Note that this implementation allows more than one edge to connect + /// a given pair of nodes. + bool findEdgesTo(const NodeType &N, SmallVectorImpl &EL) const { + assert(EL.empty() && "Expected the list of edges to be empty."); + for (auto *E : Edges) + if (E->getTargetNode() == N) + EL.push_back(E); + return !EL.empty(); + } + + /// Add the given edge \p E to this node, if it doesn't exist already. Returns + /// true if the edge is added and false otherwise. + bool addEdge(EdgeType &E) { return Edges.insert(&E); } + + /// Remove the given edge \p E from this node, if it exists. + void removeEdge(EdgeType &E) { Edges.remove(&E); } + + /// Test whether there is an edge that goes from this node to \p N. + bool hasEdgeTo(const NodeType &N) const { + return (findEdgeTo(N) != Edges.end()); + } + + /// Retrieve the outgoing edges for the node. + const EdgeListTy &getEdges() const { return Edges; } + EdgeListTy &getEdges() { + return const_cast( + static_cast &>(*this).Edges); + } + + /// Clear the outgoing edges. + void clear() { Edges.clear(); } + +protected: + // As the default implementation use address comparison for equality. + bool isEqualTo(const NodeType &N) const { return this == &N; } + + // Cast the 'this' pointer to the derived type and return a reference. + NodeType &getDerived() { return *static_cast(this); } + const NodeType &getDerived() const { + return *static_cast(this); + } + + /// Find an edge to \p N. If more than one edge exists, this will return + /// the first one in the list of edges. + const_iterator findEdgeTo(const NodeType &N) const { + return llvm::find_if( + Edges, [&N](const EdgeType *E) { return E->getTargetNode() == N; }); + } + + // The list of outgoing edges. + EdgeListTy Edges; +}; + +/// Directed graph +/// +/// The graph is represented by a table of nodes. +/// Each node contains a (possibly empty) list of outgoing edges. +/// Each edge contains the target node it connects to. +template class DirectedGraph { +protected: + using NodeListTy = SmallVector; + using EdgeListTy = SmallVector; +public: + using iterator = typename NodeListTy::iterator; + using const_iterator = typename NodeListTy::const_iterator; + using DGraphType = DirectedGraph; + + DirectedGraph() = default; + explicit DirectedGraph(NodeType &N) : Nodes() { addNode(N); } + DirectedGraph(const DGraphType &G) : Nodes(G.Nodes) {} + DirectedGraph(DGraphType &&RHS) : Nodes(std::move(RHS.Nodes)) {} + DGraphType &operator=(const DGraphType &G) { + Nodes = G.Nodes; + return *this; + } + DGraphType &operator=(const DGraphType &&G) { + Nodes = std::move(G.Nodes); + return *this; + } + + const_iterator begin() const { return Nodes.begin(); } + const_iterator end() const { return Nodes.end(); } + iterator begin() { return Nodes.begin(); } + iterator end() { return Nodes.end(); } + const NodeType &front() const { return *Nodes.front(); } + NodeType &front() { return *Nodes.front(); } + const NodeType &back() const { return *Nodes.back(); } + NodeType &back() { return *Nodes.back(); } + + size_t size() const { return Nodes.size(); } + + /// Find the given node \p N in the table. + const_iterator findNode(const NodeType &N) const { + return llvm::find_if(Nodes, + [&N](const NodeType *Node) { return *Node == N; }); + } + iterator findNode(const NodeType &N) { + return const_cast( + static_cast(*this).findNode(N)); + } + + /// Add the given node \p N to the graph if it is not already present. + bool addNode(NodeType &N) { + if (findNode(N) != Nodes.end()) + return false; + Nodes.push_back(&N); + return true; + } + + /// Collect in \p EL all edges that are coming into node \p N. Return true + /// if at least one edge was found, and false otherwise. + bool findIncomingEdgesToNode(const NodeType &N, SmallVectorImpl &EL) const { + assert(EL.empty() && "Expected the list of edges to be empty."); + EdgeListTy TempList; + for (auto *Node : Nodes) { + if (*Node == N) + continue; + Node->findEdgesTo(N, TempList); + EL.insert(EL.end(), TempList.begin(), TempList.end()); + TempList.clear(); + } + return !EL.empty(); + } + + /// Remove the given node \p N from the graph. If the node has incoming or + /// outgoing edges, they are also removed. Return true if the node was found + /// and then removed, and false if the node was not found in the graph to + /// begin with. + bool removeNode(NodeType &N) { + iterator IT = findNode(N); + if (IT == Nodes.end()) + return false; + // Remove incoming edges. + EdgeListTy EL; + for (auto *Node : Nodes) { + if (*Node == N) + continue; + Node->findEdgesTo(N, EL); + for (auto *E : EL) + Node->removeEdge(*E); + EL.clear(); + } + N.clear(); + Nodes.erase(IT); + return true; + } + + /// Assuming nodes \p Src and \p Dst are already in the graph, connect node \p + /// Src to node \p Dst using the provided edge \p E. Return true if \p Src is + /// not already connected to \p Dst via \p E, and false otherwise. + bool connect(NodeType &Src, NodeType &Dst, EdgeType &E) { + assert(findNode(Src) != Nodes.end() && "Src node should be present."); + assert(findNode(Dst) != Nodes.end() && "Dst node should be present."); + assert((E.getTargetNode() == Dst) && + "Target of the given edge does not match Dst."); + return Src.addEdge(E); + } + +protected: + // The list of nodes in the graph. + NodeListTy Nodes; +}; + +} // namespace llvm + +#endif // LLVM_ADT_DIRECTEDGRAPH_H diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h index 008188bfa210..b22606bdb518 100644 --- a/include/llvm/ADT/Hashing.h +++ b/include/llvm/ADT/Hashing.h @@ -45,7 +45,6 @@ #define LLVM_ADT_HASHING_H #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/type_traits.h" #include diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h index 12828c4cfdab..a02876ee77f3 100644 --- a/include/llvm/ADT/IntervalMap.h +++ b/include/llvm/ADT/IntervalMap.h @@ -963,8 +963,8 @@ public: private: // The root data is either a RootLeaf or a RootBranchData instance. - LLVM_ALIGNAS(RootLeaf) LLVM_ALIGNAS(RootBranchData) - AlignedCharArrayUnion data; + alignas(RootLeaf) alignas(RootBranchData) + AlignedCharArrayUnion data; // Tree height. // 0: Leaves in root. diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h index 24a2bb67a36e..fa6bf1504469 100644 --- a/include/llvm/ADT/PointerIntPair.h +++ b/include/llvm/ADT/PointerIntPair.h @@ -13,6 +13,7 @@ #ifndef LLVM_ADT_POINTERINTPAIR_H #define LLVM_ADT_POINTERINTPAIR_H +#include "llvm/Support/Compiler.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include "llvm/Support/type_traits.h" #include @@ -59,19 +60,19 @@ public: IntType getInt() const { return (IntType)Info::getInt(Value); } - void setPointer(PointerTy PtrVal) { + void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION { Value = Info::updatePointer(Value, PtrVal); } - void setInt(IntType IntVal) { + void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION { Value = Info::updateInt(Value, static_cast(IntVal)); } - void initWithPointer(PointerTy PtrVal) { + void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION { Value = Info::updatePointer(0, PtrVal); } - void setPointerAndInt(PointerTy PtrVal, IntType IntVal) { + void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION { Value = Info::updateInt(Info::updatePointer(0, PtrVal), static_cast(IntVal)); } @@ -89,7 +90,7 @@ public: void *getOpaqueValue() const { return reinterpret_cast(Value); } - void setFromOpaqueValue(void *Val) { + void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION { Value = reinterpret_cast(Val); } diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index 2bcdf546c6e4..98c905775a77 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -54,21 +54,14 @@ struct PointerUnionTypeSelectorReturn< }; namespace pointer_union_detail { - constexpr int constexprMin(int a, int b) { return a < b ? a : b; } /// Determine the number of bits required to store integers with values < n. /// This is ceil(log2(n)). constexpr int bitsRequired(unsigned n) { return n > 1 ? 1 + bitsRequired((n + 1) / 2) : 0; } - // FIXME: In C++14, replace this with - // std::min({PointerLikeTypeTraits::NumLowBitsAvailable...}) - template constexpr int lowBitsAvailable() { - return PointerLikeTypeTraits::NumLowBitsAvailable; - } - template - constexpr int lowBitsAvailable() { - return constexprMin(lowBitsAvailable(), lowBitsAvailable()); + template constexpr int lowBitsAvailable() { + return std::min({PointerLikeTypeTraits::NumLowBitsAvailable...}); } /// Find the index of a type in a list of types. TypeIndex::Index @@ -167,10 +160,11 @@ class PointerUnion void *, pointer_union_detail::bitsRequired(sizeof...(PTs)), int, pointer_union_detail::PointerUnionUIntTraits>, 0, PTs...> { - // The first type is special in some ways, but we don't want PointerUnion to - // be a 'template ' because it's much more - // convenient to have a name for the whole pack. So split off the first type - // here. + // The first type is special because we want to directly cast a pointer to a + // default-initialized union to a pointer to the first type. But we don't + // want PointerUnion to be a 'template ' + // because it's much more convenient to have a name for the whole pack. So + // split off the first type here. using First = typename pointer_union_detail::GetFirstType::type; using Base = typename PointerUnion::PointerUnionMembers; @@ -182,12 +176,7 @@ public: /// Test if the pointer held in the union is null, regardless of /// which type it is. - bool isNull() const { - // Convert from the void* to one of the pointer types, to make sure that - // we recursively strip off low bits if we have a nested PointerUnion. - return !PointerLikeTypeTraits::getFromVoidPointer( - this->Val.getPointer()); - } + bool isNull() const { return !this->Val.getPointer(); } explicit operator bool() const { return !isNull(); } @@ -226,7 +215,8 @@ public: First *getAddrOfPtr1() { assert(is() && "Val is not the first pointer"); assert( - get() == this->Val.getPointer() && + PointerLikeTypeTraits::getAsVoidPointer(get()) == + this->Val.getPointer() && "Can't get the address because PointerLikeTypeTraits changes the ptr"); return const_cast( reinterpret_cast(this->Val.getAddrOfPointer())); diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 81dce0168c79..274933bc5204 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -95,18 +95,6 @@ template struct identity { } }; -template struct less_ptr { - bool operator()(const Ty* left, const Ty* right) const { - return *left < *right; - } -}; - -template struct greater_ptr { - bool operator()(const Ty* left, const Ty* right) const { - return *right < *left; - } -}; - /// An efficient, type-erasing, non-owning reference to a callable. This is /// intended for use as the type of a function parameter that is not used /// after the function in question returns. @@ -530,10 +518,6 @@ bool all_of(R &&range, UnaryPredicate P); template bool any_of(R &&range, UnaryPredicate P); -template struct index_sequence; - -template struct index_sequence_for; - namespace detail { using std::declval; @@ -568,38 +552,38 @@ struct zip_common : public zip_traits { std::tuple iterators; protected: - template value_type deref(index_sequence) const { + template value_type deref(std::index_sequence) const { return value_type(*std::get(iterators)...); } template - decltype(iterators) tup_inc(index_sequence) const { + decltype(iterators) tup_inc(std::index_sequence) const { return std::tuple(std::next(std::get(iterators))...); } template - decltype(iterators) tup_dec(index_sequence) const { + decltype(iterators) tup_dec(std::index_sequence) const { return std::tuple(std::prev(std::get(iterators))...); } public: zip_common(Iters &&... ts) : iterators(std::forward(ts)...) {} - value_type operator*() { return deref(index_sequence_for{}); } + value_type operator*() { return deref(std::index_sequence_for{}); } const value_type operator*() const { - return deref(index_sequence_for{}); + return deref(std::index_sequence_for{}); } ZipType &operator++() { - iterators = tup_inc(index_sequence_for{}); + iterators = tup_inc(std::index_sequence_for{}); return *reinterpret_cast(this); } ZipType &operator--() { static_assert(Base::IsBidirectional, "All inner iterators must be at least bidirectional."); - iterators = tup_dec(index_sequence_for{}); + iterators = tup_dec(std::index_sequence_for{}); return *reinterpret_cast(this); } }; @@ -618,7 +602,8 @@ struct zip_first : public zip_common, Iters...> { template class zip_shortest : public zip_common, Iters...> { template - bool test(const zip_shortest &other, index_sequence) const { + bool test(const zip_shortest &other, + std::index_sequence) const { return all_of(std::initializer_list{std::get(this->iterators) != std::get(other.iterators)...}, identity{}); @@ -630,7 +615,7 @@ public: zip_shortest(Iters &&... ts) : Base(std::forward(ts)...) {} bool operator==(const zip_shortest &other) const { - return !test(other, index_sequence_for{}); + return !test(other, std::index_sequence_for{}); } }; @@ -646,18 +631,21 @@ public: private: std::tuple ts; - template iterator begin_impl(index_sequence) const { + template + iterator begin_impl(std::index_sequence) const { return iterator(std::begin(std::get(ts))...); } - template iterator end_impl(index_sequence) const { + template iterator end_impl(std::index_sequence) const { return iterator(std::end(std::get(ts))...); } public: zippy(Args &&... ts_) : ts(std::forward(ts_)...) {} - iterator begin() const { return begin_impl(index_sequence_for{}); } - iterator end() const { return end_impl(index_sequence_for{}); } + iterator begin() const { + return begin_impl(std::index_sequence_for{}); + } + iterator end() const { return end_impl(std::index_sequence_for{}); } }; } // end namespace detail @@ -727,20 +715,20 @@ private: template bool test(const zip_longest_iterator &other, - index_sequence) const { + std::index_sequence) const { return llvm::any_of( std::initializer_list{std::get(this->iterators) != std::get(other.iterators)...}, identity{}); } - template value_type deref(index_sequence) const { + template value_type deref(std::index_sequence) const { return value_type( deref_or_none(std::get(iterators), std::get(end_iterators))...); } template - decltype(iterators) tup_inc(index_sequence) const { + decltype(iterators) tup_inc(std::index_sequence) const { return std::tuple( next_or_end(std::get(iterators), std::get(end_iterators))...); } @@ -750,17 +738,19 @@ public: : iterators(std::forward(ts.first)...), end_iterators(std::forward(ts.second)...) {} - value_type operator*() { return deref(index_sequence_for{}); } + value_type operator*() { return deref(std::index_sequence_for{}); } - value_type operator*() const { return deref(index_sequence_for{}); } + value_type operator*() const { + return deref(std::index_sequence_for{}); + } zip_longest_iterator &operator++() { - iterators = tup_inc(index_sequence_for{}); + iterators = tup_inc(std::index_sequence_for{}); return *this; } bool operator==(const zip_longest_iterator &other) const { - return !test(other, index_sequence_for{}); + return !test(other, std::index_sequence_for{}); } }; @@ -777,12 +767,13 @@ public: private: std::tuple ts; - template iterator begin_impl(index_sequence) const { + template + iterator begin_impl(std::index_sequence) const { return iterator(std::make_pair(adl_begin(std::get(ts)), adl_end(std::get(ts)))...); } - template iterator end_impl(index_sequence) const { + template iterator end_impl(std::index_sequence) const { return iterator(std::make_pair(adl_end(std::get(ts)), adl_end(std::get(ts)))...); } @@ -790,8 +781,10 @@ private: public: zip_longest_range(Args &&... ts_) : ts(std::forward(ts_)...) {} - iterator begin() const { return begin_impl(index_sequence_for{}); } - iterator end() const { return end_impl(index_sequence_for{}); } + iterator begin() const { + return begin_impl(std::index_sequence_for{}); + } + iterator end() const { return end_impl(std::index_sequence_for{}); } }; } // namespace detail @@ -847,7 +840,7 @@ class concat_iterator /// Increments the first non-end iterator. /// /// It is an error to call this with all iterators at the end. - template void increment(index_sequence) { + template void increment(std::index_sequence) { // Build a sequence of functions to increment each iterator if possible. bool (concat_iterator::*IncrementHelperFns[])() = { &concat_iterator::incrementHelper...}; @@ -876,7 +869,7 @@ class concat_iterator /// reference. /// /// It is an error to call this with all iterators at the end. - template ValueT &get(index_sequence) const { + template ValueT &get(std::index_sequence) const { // Build a sequence of functions to get from iterator if possible. ValueT *(concat_iterator::*GetHelperFns[])() const = { &concat_iterator::getHelper...}; @@ -901,11 +894,13 @@ public: using BaseT::operator++; concat_iterator &operator++() { - increment(index_sequence_for()); + increment(std::index_sequence_for()); return *this; } - ValueT &operator*() const { return get(index_sequence_for()); } + ValueT &operator*() const { + return get(std::index_sequence_for()); + } bool operator==(const concat_iterator &RHS) const { return Begins == RHS.Begins && Ends == RHS.Ends; @@ -928,10 +923,10 @@ public: private: std::tuple Ranges; - template iterator begin_impl(index_sequence) { + template iterator begin_impl(std::index_sequence) { return iterator(std::get(Ranges)...); } - template iterator end_impl(index_sequence) { + template iterator end_impl(std::index_sequence) { return iterator(make_range(std::end(std::get(Ranges)), std::end(std::get(Ranges)))...); } @@ -940,8 +935,8 @@ public: concat_range(RangeTs &&... Ranges) : Ranges(std::forward(Ranges)...) {} - iterator begin() { return begin_impl(index_sequence_for{}); } - iterator end() { return end_impl(index_sequence_for{}); } + iterator begin() { return begin_impl(std::index_sequence_for{}); } + iterator end() { return end_impl(std::index_sequence_for{}); } }; } // end namespace detail @@ -990,28 +985,6 @@ struct on_first { } }; -// A subset of N3658. More stuff can be added as-needed. - -/// Represents a compile-time sequence of integers. -template struct integer_sequence { - using value_type = T; - - static constexpr size_t size() { return sizeof...(I); } -}; - -/// Alias for the common case of a sequence of size_ts. -template -struct index_sequence : integer_sequence {}; - -template -struct build_index_impl : build_index_impl {}; -template -struct build_index_impl<0, I...> : index_sequence {}; - -/// Creates a compile-time integer sequence for a parameter pack. -template -struct index_sequence_for : build_index_impl {}; - /// Utility type to build an inheritance chain that makes it easy to rank /// overload candidates. template struct rank : rank {}; @@ -1391,41 +1364,6 @@ void replace(Container &Cont, typename Container::iterator ContIt, // Extra additions to //===----------------------------------------------------------------------===// -// Implement make_unique according to N3656. - -/// Constructs a `new T()` with the given args and returns a -/// `unique_ptr` which owns the object. -/// -/// Example: -/// -/// auto p = make_unique(); -/// auto p = make_unique>(0, 1); -template -typename std::enable_if::value, std::unique_ptr>::type -make_unique(Args &&... args) { - return std::unique_ptr(new T(std::forward(args)...)); -} - -/// Constructs a `new T[n]` with the given args and returns a -/// `unique_ptr` which owns the object. -/// -/// \param n size of the new array. -/// -/// Example: -/// -/// auto p = make_unique(2); // value-initializes the array with 0's. -template -typename std::enable_if::value && std::extent::value == 0, - std::unique_ptr>::type -make_unique(size_t n) { - return std::unique_ptr(new typename std::remove_extent::type[n]()); -} - -/// This function isn't used and is only here to provide better compile errors. -template -typename std::enable_if::value != 0>::type -make_unique(Args &&...) = delete; - struct FreeDeleter { void operator()(void* v) { ::free(v); @@ -1439,20 +1377,6 @@ struct pair_hash { } }; -/// A functor like C++14's std::less in its absence. -struct less { - template bool operator()(A &&a, B &&b) const { - return std::forward(a) < std::forward(b); - } -}; - -/// A functor like C++14's std::equal in its absence. -struct equal { - template bool operator()(A &&a, B &&b) const { - return std::forward(a) == std::forward(b); - } -}; - /// Binary functor that adapts to any other binary functor after dereferencing /// operands. template struct deref { @@ -1580,7 +1504,7 @@ template detail::enumerator enumerate(R &&TheRange) { namespace detail { template -auto apply_tuple_impl(F &&f, Tuple &&t, index_sequence) +auto apply_tuple_impl(F &&f, Tuple &&t, std::index_sequence) -> decltype(std::forward(f)(std::get(std::forward(t))...)) { return std::forward(f)(std::get(std::forward(t))...); } @@ -1593,9 +1517,9 @@ auto apply_tuple_impl(F &&f, Tuple &&t, index_sequence) template auto apply_tuple(F &&f, Tuple &&t) -> decltype(detail::apply_tuple_impl( std::forward(f), std::forward(t), - build_index_impl< + std::make_index_sequence< std::tuple_size::type>::value>{})) { - using Indices = build_index_impl< + using Indices = std::make_index_sequence< std::tuple_size::type>::value>; return detail::apply_tuple_impl(std::forward(f), std::forward(t), diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index 742450e6a951..61375c008022 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -290,7 +290,7 @@ public: ++Prev; uintptr_t Bits = getSmallBits(); // Mask in previous bits. - uintptr_t Mask = (1 << Prev) - 1; + uintptr_t Mask = (uintptr_t(1) << Prev) - 1; Bits |= Mask; if (Bits == ~uintptr_t(0) || Prev + 1 >= getSmallSize()) diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h index 2ac59da596ef..b7387ddcf1c7 100644 --- a/include/llvm/ADT/Statistic.h +++ b/include/llvm/ADT/Statistic.h @@ -44,38 +44,39 @@ class raw_ostream; class raw_fd_ostream; class StringRef; -class Statistic { +class StatisticBase { public: const char *DebugType; const char *Name; const char *Desc; - std::atomic Value; - std::atomic Initialized; - unsigned getValue() const { return Value.load(std::memory_order_relaxed); } + StatisticBase(const char *DebugType, const char *Name, const char *Desc) + : DebugType(DebugType), Name(Name), Desc(Desc) {} + const char *getDebugType() const { return DebugType; } const char *getName() const { return Name; } const char *getDesc() const { return Desc; } +}; - /// construct - This should only be called for non-global statistics. - void construct(const char *debugtype, const char *name, const char *desc) { - DebugType = debugtype; - Name = name; - Desc = desc; - Value = 0; - Initialized = false; - } +class TrackingStatistic : public StatisticBase { +public: + std::atomic Value; + std::atomic Initialized; + + TrackingStatistic(const char *DebugType, const char *Name, const char *Desc) + : StatisticBase(DebugType, Name, Desc), Value(0), Initialized(false) {} + + unsigned getValue() const { return Value.load(std::memory_order_relaxed); } // Allow use of this class as the value itself. operator unsigned() const { return getValue(); } -#if LLVM_ENABLE_STATS - const Statistic &operator=(unsigned Val) { + const TrackingStatistic &operator=(unsigned Val) { Value.store(Val, std::memory_order_relaxed); return init(); } - const Statistic &operator++() { + const TrackingStatistic &operator++() { Value.fetch_add(1, std::memory_order_relaxed); return init(); } @@ -85,7 +86,7 @@ public: return Value.fetch_add(1, std::memory_order_relaxed); } - const Statistic &operator--() { + const TrackingStatistic &operator--() { Value.fetch_sub(1, std::memory_order_relaxed); return init(); } @@ -95,14 +96,14 @@ public: return Value.fetch_sub(1, std::memory_order_relaxed); } - const Statistic &operator+=(unsigned V) { + const TrackingStatistic &operator+=(unsigned V) { if (V == 0) return *this; Value.fetch_add(V, std::memory_order_relaxed); return init(); } - const Statistic &operator-=(unsigned V) { + const TrackingStatistic &operator-=(unsigned V) { if (V == 0) return *this; Value.fetch_sub(V, std::memory_order_relaxed); @@ -119,54 +120,57 @@ public: init(); } -#else // Statistics are disabled in release builds. - - const Statistic &operator=(unsigned Val) { +protected: + TrackingStatistic &init() { + if (!Initialized.load(std::memory_order_acquire)) + RegisterStatistic(); return *this; } - const Statistic &operator++() { - return *this; - } + void RegisterStatistic(); +}; - unsigned operator++(int) { - return 0; - } +class NoopStatistic : public StatisticBase { +public: + using StatisticBase::StatisticBase; - const Statistic &operator--() { - return *this; - } + unsigned getValue() const { return 0; } - unsigned operator--(int) { - return 0; - } + // Allow use of this class as the value itself. + operator unsigned() const { return 0; } - const Statistic &operator+=(const unsigned &V) { - return *this; - } + const NoopStatistic &operator=(unsigned Val) { return *this; } - const Statistic &operator-=(const unsigned &V) { - return *this; - } + const NoopStatistic &operator++() { return *this; } - void updateMax(unsigned V) {} + unsigned operator++(int) { return 0; } -#endif // LLVM_ENABLE_STATS + const NoopStatistic &operator--() { return *this; } -protected: - Statistic &init() { - if (!Initialized.load(std::memory_order_acquire)) - RegisterStatistic(); - return *this; - } + unsigned operator--(int) { return 0; } - void RegisterStatistic(); + const NoopStatistic &operator+=(const unsigned &V) { return *this; } + + const NoopStatistic &operator-=(const unsigned &V) { return *this; } + + void updateMax(unsigned V) {} }; +#if LLVM_ENABLE_STATS +using Statistic = TrackingStatistic; +#else +using Statistic = NoopStatistic; +#endif + // STATISTIC - A macro to make definition of statistics really simple. This // automatically passes the DEBUG_TYPE of the file into the statistic. #define STATISTIC(VARNAME, DESC) \ - static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC, {0}, {false}} + static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC} + +// ALWAYS_ENABLED_STATISTIC - A macro to define a statistic like STATISTIC but +// it is enabled even if LLVM_ENABLE_STATS is off. +#define ALWAYS_ENABLED_STATISTIC(VARNAME, DESC) \ + static llvm::TrackingStatistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC} /// Enable the collection and printing of statistics. void EnableStatistics(bool PrintOnExit = true); diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index 16ac90bd6c89..ef1a11e0619b 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -345,7 +345,7 @@ inline void join_items_impl(std::string &Result, Sep Separator, const Arg1 &A1, join_items_impl(Result, Separator, std::forward(Items)...); } -inline size_t join_one_item_size(char C) { return 1; } +inline size_t join_one_item_size(char) { return 1; } inline size_t join_one_item_size(const char *S) { return S ? ::strlen(S) : 0; } template inline size_t join_one_item_size(const T &Str) { diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index 8a586fc26709..108185bd07b9 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -118,36 +118,59 @@ public: } }; -/// StringMapEntry - This is used to represent one value that is inserted into -/// a StringMap. It contains the Value itself and the key: the string length -/// and data. +/// StringMapEntryStorage - Holds the value in a StringMapEntry. +/// +/// Factored out into a separate base class to make it easier to specialize. +/// This is primarily intended to support StringSet, which doesn't need a value +/// stored at all. template -class StringMapEntry : public StringMapEntryBase { +class StringMapEntryStorage : public StringMapEntryBase { public: ValueTy second; - explicit StringMapEntry(size_t strLen) + explicit StringMapEntryStorage(size_t strLen) : StringMapEntryBase(strLen), second() {} template - StringMapEntry(size_t strLen, InitTy &&... InitVals) + StringMapEntryStorage(size_t strLen, InitTy &&... InitVals) : StringMapEntryBase(strLen), second(std::forward(InitVals)...) {} - StringMapEntry(StringMapEntry &E) = delete; - - StringRef getKey() const { - return StringRef(getKeyData(), getKeyLength()); - } + StringMapEntryStorage(StringMapEntryStorage &E) = delete; const ValueTy &getValue() const { return second; } ValueTy &getValue() { return second; } void setValue(const ValueTy &V) { second = V; } +}; + +template<> +class StringMapEntryStorage : public StringMapEntryBase { +public: + explicit StringMapEntryStorage(size_t strLen, NoneType none = None) + : StringMapEntryBase(strLen) {} + StringMapEntryStorage(StringMapEntryStorage &E) = delete; + + NoneType getValue() const { return None; } +}; + +/// StringMapEntry - This is used to represent one value that is inserted into +/// a StringMap. It contains the Value itself and the key: the string length +/// and data. +template +class StringMapEntry final : public StringMapEntryStorage { +public: + using StringMapEntryStorage::StringMapEntryStorage; + + StringRef getKey() const { + return StringRef(getKeyData(), this->getKeyLength()); + } /// getKeyData - Return the start of the string data that is the key for this /// value. The string data is always stored immediately after the /// StringMapEntry object. const char *getKeyData() const {return reinterpret_cast(this+1);} - StringRef first() const { return StringRef(getKeyData(), getKeyLength()); } + StringRef first() const { + return StringRef(getKeyData(), this->getKeyLength()); + } /// Create a StringMapEntry for the specified key construct the value using /// \p InitiVals. @@ -199,7 +222,7 @@ public: template void Destroy(AllocatorTy &Allocator) { // Free memory referenced by the item. - size_t AllocSize = sizeof(StringMapEntry) + getKeyLength() + 1; + size_t AllocSize = sizeof(StringMapEntry) + this->getKeyLength() + 1; this->~StringMapEntry(); Allocator.Deallocate(static_cast(this), AllocSize); } @@ -391,6 +414,16 @@ public: return try_emplace(KV.first, std::move(KV.second)); } + /// Inserts an element or assigns to the current element if the key already + /// exists. The return type is the same as try_emplace. + template + std::pair insert_or_assign(StringRef Key, V &&Val) { + auto Ret = try_emplace(Key, std::forward(Val)); + if (!Ret.second) + Ret.first->second = std::forward(Val); + return Ret; + } + /// Emplace a new element for the specified key into the map if the key isn't /// already in the map. The bool component of the returned pair is true /// if and only if the insertion takes place, and the iterator component of diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 4661b1e68b2f..52baab17bede 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -67,6 +67,20 @@ namespace llvm { return ::memcmp(Lhs,Rhs,Length); } + // Constexpr version of std::strlen. + static constexpr size_t strLen(const char *Str) { +#if __cplusplus > 201402L + return std::char_traits::length(Str); +#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) + return __builtin_strlen(Str); +#else + const char *Begin = Str; + while (*Str != '\0') + ++Str; + return Str - Begin; +#endif + } + public: /// @name Constructors /// @{ @@ -79,8 +93,8 @@ namespace llvm { StringRef(std::nullptr_t) = delete; /// Construct a string ref from a cstring. - /*implicit*/ StringRef(const char *Str) - : Data(Str), Length(Str ? ::strlen(Str) : 0) {} + /*implicit*/ constexpr StringRef(const char *Str) + : Data(Str), Length(Str ? strLen(Str) : 0) {} /// Construct a string ref from a pointer and length. /*implicit*/ constexpr StringRef(const char *data, size_t length) diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h index af3a44a7b32c..60be09d3c326 100644 --- a/include/llvm/ADT/StringSet.h +++ b/include/llvm/ADT/StringSet.h @@ -24,8 +24,8 @@ namespace llvm { /// StringSet - A wrapper for StringMap that provides set-like functionality. template - class StringSet : public StringMap { - using base = StringMap; + class StringSet : public StringMap { + using base = StringMap; public: StringSet() = default; @@ -37,13 +37,13 @@ namespace llvm { std::pair insert(StringRef Key) { assert(!Key.empty()); - return base::insert(std::make_pair(Key, '\0')); + return base::insert(std::make_pair(Key, None)); } template void insert(const InputIt &Begin, const InputIt &End) { for (auto It = Begin; It != End; ++It) - base::insert(std::make_pair(*It, '\0')); + base::insert(std::make_pair(*It, None)); } template diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h index ac82451a9b21..6b76d35d4e92 100644 --- a/include/llvm/ADT/TinyPtrVector.h +++ b/include/llvm/ADT/TinyPtrVector.h @@ -31,6 +31,10 @@ class TinyPtrVector { public: using VecTy = SmallVector; using value_type = typename VecTy::value_type; + // EltTy must be the first pointer type so that is is true for the + // default-constructed PtrUnion. This allows an empty TinyPtrVector to + // naturally vend a begin/end iterator of type EltTy* without an additional + // check for the empty state. using PtrUnion = PointerUnion; private: @@ -96,14 +100,14 @@ public: if (RHS.Val.template is()) { V->clear(); V->push_back(RHS.front()); - RHS.Val = (EltTy)nullptr; + RHS.Val = EltTy(); return *this; } delete V; } Val = RHS.Val; - RHS.Val = (EltTy)nullptr; + RHS.Val = EltTy(); return *this; } @@ -213,9 +217,9 @@ public: EltTy operator[](unsigned i) const { assert(!Val.isNull() && "can't index into an empty vector"); - if (EltTy V = Val.template dyn_cast()) { + if (Val.template is()) { assert(i == 0 && "tinyvector index out of range"); - return V; + return Val.template get(); } assert(i < Val.template get()->size() && @@ -225,29 +229,29 @@ public: EltTy front() const { assert(!empty() && "vector empty"); - if (EltTy V = Val.template dyn_cast()) - return V; + if (Val.template is()) + return Val.template get(); return Val.template get()->front(); } EltTy back() const { assert(!empty() && "vector empty"); - if (EltTy V = Val.template dyn_cast()) - return V; + if (Val.template is()) + return Val.template get(); return Val.template get()->back(); } void push_back(EltTy NewVal) { - assert(NewVal && "Can't add a null value"); - // If we have nothing, add something. if (Val.isNull()) { Val = NewVal; + assert(!Val.isNull() && "Can't add a null value"); return; } // If we have a single value, convert to a vector. - if (EltTy V = Val.template dyn_cast()) { + if (Val.template is()) { + EltTy V = Val.template get(); Val = new VecTy(); Val.template get()->push_back(V); } @@ -267,7 +271,7 @@ public: void clear() { // If we have a single value, convert to empty. if (Val.template is()) { - Val = (EltTy)nullptr; + Val = EltTy(); } else if (VecTy *Vec = Val.template dyn_cast()) { // If we have a vector form, just clear it. Vec->clear(); @@ -282,7 +286,7 @@ public: // If we have a single value, convert to empty. if (Val.template is()) { if (I == begin()) - Val = (EltTy)nullptr; + Val = EltTy(); } else if (VecTy *Vec = Val.template dyn_cast()) { // multiple items in a vector; just do the erase, there is no // benefit to collapsing back to a pointer @@ -298,7 +302,7 @@ public: if (Val.template is()) { if (S == begin() && S != E) - Val = (EltTy)nullptr; + Val = EltTy(); } else if (VecTy *Vec = Val.template dyn_cast()) { return Vec->erase(S, E); } @@ -313,7 +317,8 @@ public: return std::prev(end()); } assert(!Val.isNull() && "Null value with non-end insert iterator."); - if (EltTy V = Val.template dyn_cast()) { + if (Val.template is()) { + EltTy V = Val.template get(); assert(I == begin()); Val = Elt; push_back(V); @@ -339,7 +344,8 @@ public: } Val = new VecTy(); - } else if (EltTy V = Val.template dyn_cast()) { + } else if (Val.template is()) { + EltTy V = Val.template get(); Val = new VecTy(); Val.template get()->push_back(V); } diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h deleted file mode 100644 index 5aefb05ecdda..000000000000 --- a/include/llvm/ADT/VariadicFunction.h +++ /dev/null @@ -1,330 +0,0 @@ -//===- VariadicFunction.h - Variadic Functions ------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements compile-time type-safe variadic functions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ADT_VARIADICFUNCTION_H -#define LLVM_ADT_VARIADICFUNCTION_H - -#include "llvm/ADT/ArrayRef.h" - -namespace llvm { - -// Define macros to aid in expanding a comma separated series with the index of -// the series pasted onto the last token. -#define LLVM_COMMA_JOIN1(x) x ## 0 -#define LLVM_COMMA_JOIN2(x) LLVM_COMMA_JOIN1(x), x ## 1 -#define LLVM_COMMA_JOIN3(x) LLVM_COMMA_JOIN2(x), x ## 2 -#define LLVM_COMMA_JOIN4(x) LLVM_COMMA_JOIN3(x), x ## 3 -#define LLVM_COMMA_JOIN5(x) LLVM_COMMA_JOIN4(x), x ## 4 -#define LLVM_COMMA_JOIN6(x) LLVM_COMMA_JOIN5(x), x ## 5 -#define LLVM_COMMA_JOIN7(x) LLVM_COMMA_JOIN6(x), x ## 6 -#define LLVM_COMMA_JOIN8(x) LLVM_COMMA_JOIN7(x), x ## 7 -#define LLVM_COMMA_JOIN9(x) LLVM_COMMA_JOIN8(x), x ## 8 -#define LLVM_COMMA_JOIN10(x) LLVM_COMMA_JOIN9(x), x ## 9 -#define LLVM_COMMA_JOIN11(x) LLVM_COMMA_JOIN10(x), x ## 10 -#define LLVM_COMMA_JOIN12(x) LLVM_COMMA_JOIN11(x), x ## 11 -#define LLVM_COMMA_JOIN13(x) LLVM_COMMA_JOIN12(x), x ## 12 -#define LLVM_COMMA_JOIN14(x) LLVM_COMMA_JOIN13(x), x ## 13 -#define LLVM_COMMA_JOIN15(x) LLVM_COMMA_JOIN14(x), x ## 14 -#define LLVM_COMMA_JOIN16(x) LLVM_COMMA_JOIN15(x), x ## 15 -#define LLVM_COMMA_JOIN17(x) LLVM_COMMA_JOIN16(x), x ## 16 -#define LLVM_COMMA_JOIN18(x) LLVM_COMMA_JOIN17(x), x ## 17 -#define LLVM_COMMA_JOIN19(x) LLVM_COMMA_JOIN18(x), x ## 18 -#define LLVM_COMMA_JOIN20(x) LLVM_COMMA_JOIN19(x), x ## 19 -#define LLVM_COMMA_JOIN21(x) LLVM_COMMA_JOIN20(x), x ## 20 -#define LLVM_COMMA_JOIN22(x) LLVM_COMMA_JOIN21(x), x ## 21 -#define LLVM_COMMA_JOIN23(x) LLVM_COMMA_JOIN22(x), x ## 22 -#define LLVM_COMMA_JOIN24(x) LLVM_COMMA_JOIN23(x), x ## 23 -#define LLVM_COMMA_JOIN25(x) LLVM_COMMA_JOIN24(x), x ## 24 -#define LLVM_COMMA_JOIN26(x) LLVM_COMMA_JOIN25(x), x ## 25 -#define LLVM_COMMA_JOIN27(x) LLVM_COMMA_JOIN26(x), x ## 26 -#define LLVM_COMMA_JOIN28(x) LLVM_COMMA_JOIN27(x), x ## 27 -#define LLVM_COMMA_JOIN29(x) LLVM_COMMA_JOIN28(x), x ## 28 -#define LLVM_COMMA_JOIN30(x) LLVM_COMMA_JOIN29(x), x ## 29 -#define LLVM_COMMA_JOIN31(x) LLVM_COMMA_JOIN30(x), x ## 30 -#define LLVM_COMMA_JOIN32(x) LLVM_COMMA_JOIN31(x), x ## 31 - -/// Class which can simulate a type-safe variadic function. -/// -/// The VariadicFunction class template makes it easy to define -/// type-safe variadic functions where all arguments have the same -/// type. -/// -/// Suppose we need a variadic function like this: -/// -/// ResultT Foo(const ArgT &A_0, const ArgT &A_1, ..., const ArgT &A_N); -/// -/// Instead of many overloads of Foo(), we only need to define a helper -/// function that takes an array of arguments: -/// -/// ResultT FooImpl(ArrayRef Args) { -/// // 'Args[i]' is a pointer to the i-th argument passed to Foo(). -/// ... -/// } -/// -/// and then define Foo() like this: -/// -/// const VariadicFunction Foo; -/// -/// VariadicFunction takes care of defining the overloads of Foo(). -/// -/// Actually, Foo is a function object (i.e. functor) instead of a plain -/// function. This object is stateless and its constructor/destructor -/// does nothing, so it's safe to create global objects and call Foo(...) at -/// any time. -/// -/// Sometimes we need a variadic function to have some fixed leading -/// arguments whose types may be different from that of the optional -/// arguments. For example: -/// -/// bool FullMatch(const StringRef &S, const RE &Regex, -/// const ArgT &A_0, ..., const ArgT &A_N); -/// -/// VariadicFunctionN is for such cases, where N is the number of fixed -/// arguments. It is like VariadicFunction, except that it takes N more -/// template arguments for the types of the fixed arguments: -/// -/// bool FullMatchImpl(const StringRef &S, const RE &Regex, -/// ArrayRef Args) { ... } -/// const VariadicFunction2 -/// FullMatch; -/// -/// Currently VariadicFunction and friends support up-to 3 -/// fixed leading arguments and up-to 32 optional arguments. -template )> -struct VariadicFunction { - ResultT operator()() const { - return Func(None); - } - -#define LLVM_DEFINE_OVERLOAD(N) \ - ResultT operator()(LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ - const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ - return Func(makeArrayRef(Args)); \ - } - LLVM_DEFINE_OVERLOAD(1) - LLVM_DEFINE_OVERLOAD(2) - LLVM_DEFINE_OVERLOAD(3) - LLVM_DEFINE_OVERLOAD(4) - LLVM_DEFINE_OVERLOAD(5) - LLVM_DEFINE_OVERLOAD(6) - LLVM_DEFINE_OVERLOAD(7) - LLVM_DEFINE_OVERLOAD(8) - LLVM_DEFINE_OVERLOAD(9) - LLVM_DEFINE_OVERLOAD(10) - LLVM_DEFINE_OVERLOAD(11) - LLVM_DEFINE_OVERLOAD(12) - LLVM_DEFINE_OVERLOAD(13) - LLVM_DEFINE_OVERLOAD(14) - LLVM_DEFINE_OVERLOAD(15) - LLVM_DEFINE_OVERLOAD(16) - LLVM_DEFINE_OVERLOAD(17) - LLVM_DEFINE_OVERLOAD(18) - LLVM_DEFINE_OVERLOAD(19) - LLVM_DEFINE_OVERLOAD(20) - LLVM_DEFINE_OVERLOAD(21) - LLVM_DEFINE_OVERLOAD(22) - LLVM_DEFINE_OVERLOAD(23) - LLVM_DEFINE_OVERLOAD(24) - LLVM_DEFINE_OVERLOAD(25) - LLVM_DEFINE_OVERLOAD(26) - LLVM_DEFINE_OVERLOAD(27) - LLVM_DEFINE_OVERLOAD(28) - LLVM_DEFINE_OVERLOAD(29) - LLVM_DEFINE_OVERLOAD(30) - LLVM_DEFINE_OVERLOAD(31) - LLVM_DEFINE_OVERLOAD(32) -#undef LLVM_DEFINE_OVERLOAD -}; - -template )> -struct VariadicFunction1 { - ResultT operator()(Param0T P0) const { - return Func(P0, None); - } - -#define LLVM_DEFINE_OVERLOAD(N) \ - ResultT operator()(Param0T P0, LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ - const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ - return Func(P0, makeArrayRef(Args)); \ - } - LLVM_DEFINE_OVERLOAD(1) - LLVM_DEFINE_OVERLOAD(2) - LLVM_DEFINE_OVERLOAD(3) - LLVM_DEFINE_OVERLOAD(4) - LLVM_DEFINE_OVERLOAD(5) - LLVM_DEFINE_OVERLOAD(6) - LLVM_DEFINE_OVERLOAD(7) - LLVM_DEFINE_OVERLOAD(8) - LLVM_DEFINE_OVERLOAD(9) - LLVM_DEFINE_OVERLOAD(10) - LLVM_DEFINE_OVERLOAD(11) - LLVM_DEFINE_OVERLOAD(12) - LLVM_DEFINE_OVERLOAD(13) - LLVM_DEFINE_OVERLOAD(14) - LLVM_DEFINE_OVERLOAD(15) - LLVM_DEFINE_OVERLOAD(16) - LLVM_DEFINE_OVERLOAD(17) - LLVM_DEFINE_OVERLOAD(18) - LLVM_DEFINE_OVERLOAD(19) - LLVM_DEFINE_OVERLOAD(20) - LLVM_DEFINE_OVERLOAD(21) - LLVM_DEFINE_OVERLOAD(22) - LLVM_DEFINE_OVERLOAD(23) - LLVM_DEFINE_OVERLOAD(24) - LLVM_DEFINE_OVERLOAD(25) - LLVM_DEFINE_OVERLOAD(26) - LLVM_DEFINE_OVERLOAD(27) - LLVM_DEFINE_OVERLOAD(28) - LLVM_DEFINE_OVERLOAD(29) - LLVM_DEFINE_OVERLOAD(30) - LLVM_DEFINE_OVERLOAD(31) - LLVM_DEFINE_OVERLOAD(32) -#undef LLVM_DEFINE_OVERLOAD -}; - -template )> -struct VariadicFunction2 { - ResultT operator()(Param0T P0, Param1T P1) const { - return Func(P0, P1, None); - } - -#define LLVM_DEFINE_OVERLOAD(N) \ - ResultT operator()(Param0T P0, Param1T P1, \ - LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ - const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ - return Func(P0, P1, makeArrayRef(Args)); \ - } - LLVM_DEFINE_OVERLOAD(1) - LLVM_DEFINE_OVERLOAD(2) - LLVM_DEFINE_OVERLOAD(3) - LLVM_DEFINE_OVERLOAD(4) - LLVM_DEFINE_OVERLOAD(5) - LLVM_DEFINE_OVERLOAD(6) - LLVM_DEFINE_OVERLOAD(7) - LLVM_DEFINE_OVERLOAD(8) - LLVM_DEFINE_OVERLOAD(9) - LLVM_DEFINE_OVERLOAD(10) - LLVM_DEFINE_OVERLOAD(11) - LLVM_DEFINE_OVERLOAD(12) - LLVM_DEFINE_OVERLOAD(13) - LLVM_DEFINE_OVERLOAD(14) - LLVM_DEFINE_OVERLOAD(15) - LLVM_DEFINE_OVERLOAD(16) - LLVM_DEFINE_OVERLOAD(17) - LLVM_DEFINE_OVERLOAD(18) - LLVM_DEFINE_OVERLOAD(19) - LLVM_DEFINE_OVERLOAD(20) - LLVM_DEFINE_OVERLOAD(21) - LLVM_DEFINE_OVERLOAD(22) - LLVM_DEFINE_OVERLOAD(23) - LLVM_DEFINE_OVERLOAD(24) - LLVM_DEFINE_OVERLOAD(25) - LLVM_DEFINE_OVERLOAD(26) - LLVM_DEFINE_OVERLOAD(27) - LLVM_DEFINE_OVERLOAD(28) - LLVM_DEFINE_OVERLOAD(29) - LLVM_DEFINE_OVERLOAD(30) - LLVM_DEFINE_OVERLOAD(31) - LLVM_DEFINE_OVERLOAD(32) -#undef LLVM_DEFINE_OVERLOAD -}; - -template )> -struct VariadicFunction3 { - ResultT operator()(Param0T P0, Param1T P1, Param2T P2) const { - return Func(P0, P1, P2, None); - } - -#define LLVM_DEFINE_OVERLOAD(N) \ - ResultT operator()(Param0T P0, Param1T P1, Param2T P2, \ - LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ - const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ - return Func(P0, P1, P2, makeArrayRef(Args)); \ - } - LLVM_DEFINE_OVERLOAD(1) - LLVM_DEFINE_OVERLOAD(2) - LLVM_DEFINE_OVERLOAD(3) - LLVM_DEFINE_OVERLOAD(4) - LLVM_DEFINE_OVERLOAD(5) - LLVM_DEFINE_OVERLOAD(6) - LLVM_DEFINE_OVERLOAD(7) - LLVM_DEFINE_OVERLOAD(8) - LLVM_DEFINE_OVERLOAD(9) - LLVM_DEFINE_OVERLOAD(10) - LLVM_DEFINE_OVERLOAD(11) - LLVM_DEFINE_OVERLOAD(12) - LLVM_DEFINE_OVERLOAD(13) - LLVM_DEFINE_OVERLOAD(14) - LLVM_DEFINE_OVERLOAD(15) - LLVM_DEFINE_OVERLOAD(16) - LLVM_DEFINE_OVERLOAD(17) - LLVM_DEFINE_OVERLOAD(18) - LLVM_DEFINE_OVERLOAD(19) - LLVM_DEFINE_OVERLOAD(20) - LLVM_DEFINE_OVERLOAD(21) - LLVM_DEFINE_OVERLOAD(22) - LLVM_DEFINE_OVERLOAD(23) - LLVM_DEFINE_OVERLOAD(24) - LLVM_DEFINE_OVERLOAD(25) - LLVM_DEFINE_OVERLOAD(26) - LLVM_DEFINE_OVERLOAD(27) - LLVM_DEFINE_OVERLOAD(28) - LLVM_DEFINE_OVERLOAD(29) - LLVM_DEFINE_OVERLOAD(30) - LLVM_DEFINE_OVERLOAD(31) - LLVM_DEFINE_OVERLOAD(32) -#undef LLVM_DEFINE_OVERLOAD -}; - -// Cleanup the macro namespace. -#undef LLVM_COMMA_JOIN1 -#undef LLVM_COMMA_JOIN2 -#undef LLVM_COMMA_JOIN3 -#undef LLVM_COMMA_JOIN4 -#undef LLVM_COMMA_JOIN5 -#undef LLVM_COMMA_JOIN6 -#undef LLVM_COMMA_JOIN7 -#undef LLVM_COMMA_JOIN8 -#undef LLVM_COMMA_JOIN9 -#undef LLVM_COMMA_JOIN10 -#undef LLVM_COMMA_JOIN11 -#undef LLVM_COMMA_JOIN12 -#undef LLVM_COMMA_JOIN13 -#undef LLVM_COMMA_JOIN14 -#undef LLVM_COMMA_JOIN15 -#undef LLVM_COMMA_JOIN16 -#undef LLVM_COMMA_JOIN17 -#undef LLVM_COMMA_JOIN18 -#undef LLVM_COMMA_JOIN19 -#undef LLVM_COMMA_JOIN20 -#undef LLVM_COMMA_JOIN21 -#undef LLVM_COMMA_JOIN22 -#undef LLVM_COMMA_JOIN23 -#undef LLVM_COMMA_JOIN24 -#undef LLVM_COMMA_JOIN25 -#undef LLVM_COMMA_JOIN26 -#undef LLVM_COMMA_JOIN27 -#undef LLVM_COMMA_JOIN28 -#undef LLVM_COMMA_JOIN29 -#undef LLVM_COMMA_JOIN30 -#undef LLVM_COMMA_JOIN31 -#undef LLVM_COMMA_JOIN32 - -} // end namespace llvm - -#endif // LLVM_ADT_VARIADICFUNCTION_H diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h index 774c7c4e3366..aa8830943cab 100644 --- a/include/llvm/ADT/iterator_range.h +++ b/include/llvm/ADT/iterator_range.h @@ -44,6 +44,7 @@ public: IteratorT begin() const { return begin_iterator; } IteratorT end() const { return end_iterator; } + bool empty() const { return begin_iterator == end_iterator; } }; /// Convenience function for iterating over sub-ranges. diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 948341554f23..282142f51bb3 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -949,7 +949,7 @@ template class AAResultBase { /// A pointer to the AAResults object that this AAResult is /// aggregated within. May be null if not aggregated. - AAResults *AAR; + AAResults *AAR = nullptr; /// Helper to dispatch calls back through the derived type. DerivedT &derived() { return static_cast(*this); } diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 34a509b7f4bb..187317e3831b 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -87,10 +87,11 @@ class AliasSet : public ilist_node { AAInfo = NewAAInfo; else { AAMDNodes Intersection(AAInfo.intersect(NewAAInfo)); - if (!Intersection) { + if (!Intersection.TBAA || !Intersection.Scope || + !Intersection.NoAlias) { // NewAAInfo conflicts with AAInfo. AAInfo = DenseMapInfo::getTombstoneKey(); - return SizeChanged; + SizeChanged = true; } AAInfo = Intersection; } diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index b42846472f2e..0efbd59023d6 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -73,8 +73,8 @@ class AssumptionCache { /// Get the vector of assumptions which affect a value from the cache. SmallVector &getOrInsertAffectedValues(Value *V); - /// Copy affected values in the cache for OV to be affected values for NV. - void copyAffectedValuesInCache(Value *OV, Value *NV); + /// Move affected values in the cache for OV to be affected values for NV. + void transferAffectedValuesInCache(Value *OV, Value *NV); /// Flag tracking whether we have scanned the function yet. /// diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h index bb55e76ac86a..68f137ba622c 100644 --- a/include/llvm/Analysis/CFG.h +++ b/include/llvm/Analysis/CFG.h @@ -46,6 +46,8 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ); /// bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges = false); +bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ, + bool AllowIdenticalEdges = false); /// Determine whether instruction 'To' is reachable from 'From', without passing /// through any blocks in ExclusionSet, returning true if uncertain. diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h index 7c8b42b1d8d2..5f5e52af3d88 100644 --- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h +++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -41,7 +41,8 @@ class CFLAndersAAResult : public AAResultBase { class FunctionInfo; public: - explicit CFLAndersAAResult(const TargetLibraryInfo &TLI); + explicit CFLAndersAAResult( + std::function GetTLI); CFLAndersAAResult(CFLAndersAAResult &&RHS); ~CFLAndersAAResult(); @@ -74,7 +75,7 @@ private: /// Build summary for a given function FunctionInfo buildInfoFrom(const Function &); - const TargetLibraryInfo &TLI; + std::function GetTLI; /// Cached mapping of Functions to their StratifiedSets. /// If a function's sets are currently being built, it is marked diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h index cc7a47cd9a5f..135321616b7c 100644 --- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h +++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -42,7 +42,8 @@ class CFLSteensAAResult : public AAResultBase { class FunctionInfo; public: - explicit CFLSteensAAResult(const TargetLibraryInfo &TLI); + explicit CFLSteensAAResult( + std::function GetTLI); CFLSteensAAResult(CFLSteensAAResult &&Arg); ~CFLSteensAAResult(); @@ -90,7 +91,7 @@ public: } private: - const TargetLibraryInfo &TLI; + std::function GetTLI; /// Cached mapping of Functions to their StratifiedSets. /// If a function's sets are currently being built, it is marked diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index 8af5fb86995a..933f2210dafc 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -88,6 +88,7 @@ #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" @@ -583,10 +584,12 @@ public: SmallVectorImpl &CallHandles) { assert(CallHandles.empty() && "Must start with a clear set of handles."); - SmallVector CallCounts; + SmallDenseMap CallCounts; + CallCount CountLocal = {0, 0}; for (LazyCallGraph::Node &N : C) { - CallCounts.push_back({0, 0}); - CallCount &Count = CallCounts.back(); + CallCount &Count = + CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) + .first->second; for (Instruction &I : instructions(N.getFunction())) if (auto CS = CallSite(&I)) { if (CS.getCalledFunction()) { @@ -626,8 +629,6 @@ public: // Check that we didn't miss any update scenario. assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - assert((int)CallCounts.size() == C->size() && - "Cannot have changed the size of the SCC!"); // Check whether any of the handles were devirtualized. auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { @@ -642,7 +643,7 @@ public: if (!F) return false; - LLVM_DEBUG(dbgs() << "Found devirutalized call from " + LLVM_DEBUG(dbgs() << "Found devirtualized call from " << CS.getParent()->getParent()->getName() << " to " << F->getName() << "\n"); @@ -664,12 +665,20 @@ public: // manner of transformations such as DCE and other things, but seems to // work well in practice. if (!Devirt) - for (int i = 0, Size = C->size(); i < Size; ++i) - if (CallCounts[i].Indirect > NewCallCounts[i].Indirect && - CallCounts[i].Direct < NewCallCounts[i].Direct) { - Devirt = true; - break; + // Iterate over the keys in NewCallCounts, if Function also exists in + // CallCounts, make the check below. + for (auto &Pair : NewCallCounts) { + auto &CallCountNew = Pair.second; + auto CountIt = CallCounts.find(Pair.first); + if (CountIt != CallCounts.end()) { + const auto &CallCountOld = CountIt->second; + if (CallCountOld.Indirect > CallCountNew.Indirect && + CallCountOld.Direct < CallCountNew.Direct) { + Devirt = true; + break; + } } + } if (!Devirt) { PA.intersect(std::move(PassPA)); diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h index ca7abd34fea2..29921a51d5be 100644 --- a/include/llvm/Analysis/CaptureTracking.h +++ b/include/llvm/Analysis/CaptureTracking.h @@ -17,6 +17,7 @@ namespace llvm { class Value; class Use; + class DataLayout; class Instruction; class DominatorTree; class OrderedBasicBlock; @@ -83,6 +84,11 @@ namespace llvm { /// use U. Return true to stop the traversal or false to continue looking /// for more capturing instructions. virtual bool captured(const Use *U) = 0; + + /// isDereferenceableOrNull - Overload to allow clients with additional + /// knowledge about pointer dereferenceability to provide it and thereby + /// avoid conservative responses when a pointer is compared to null. + virtual bool isDereferenceableOrNull(Value *O, const DataLayout &DL); }; /// PointerMayBeCaptured - Visit the value and the values derived from it and diff --git a/include/llvm/Analysis/DDG.h b/include/llvm/Analysis/DDG.h new file mode 100644 index 000000000000..0e1eb9d2cda3 --- /dev/null +++ b/include/llvm/Analysis/DDG.h @@ -0,0 +1,430 @@ +//===- llvm/Analysis/DDG.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Data-Dependence Graph (DDG). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DDG_H +#define LLVM_ANALYSIS_DDG_H + +#include "llvm/ADT/DirectedGraph.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/Instructions.h" +#include + +namespace llvm { +class DDGNode; +class DDGEdge; +using DDGNodeBase = DGNode; +using DDGEdgeBase = DGEdge; +using DDGBase = DirectedGraph; +class LPMUpdater; + +/// Data Dependence Graph Node +/// The graph can represent the following types of nodes: +/// 1. Single instruction node containing just one instruction. +/// 2. Multiple instruction node where two or more instructions from +/// the same basic block are merged into one node. +/// 3. Root node is a special node that connects to all components such that +/// there is always a path from it to any node in the graph. +class DDGNode : public DDGNodeBase { +public: + using InstructionListType = SmallVectorImpl; + + enum class NodeKind { + Unknown, + SingleInstruction, + MultiInstruction, + Root, + }; + + DDGNode() = delete; + DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {} + DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {} + DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {} + virtual ~DDGNode() = 0; + + DDGNode &operator=(const DDGNode &N) { + DGNode::operator=(N); + Kind = N.Kind; + return *this; + } + + DDGNode &operator=(DDGNode &&N) { + DGNode::operator=(std::move(N)); + Kind = N.Kind; + return *this; + } + + /// Getter for the kind of this node. + NodeKind getKind() const { return Kind; } + + /// Collect a list of instructions, in \p IList, for which predicate \p Pred + /// evaluates to true when iterating over instructions of this node. Return + /// true if at least one instruction was collected, and false otherwise. + bool collectInstructions(llvm::function_ref const &Pred, + InstructionListType &IList) const; + +protected: + /// Setter for the kind of this node. + void setKind(NodeKind K) { Kind = K; } + +private: + NodeKind Kind; +}; + +/// Subclass of DDGNode representing the root node of the graph. +/// There should only be one such node in a given graph. +class RootDDGNode : public DDGNode { +public: + RootDDGNode() : DDGNode(NodeKind::Root) {} + RootDDGNode(const RootDDGNode &N) = delete; + RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {} + ~RootDDGNode() {} + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::Root; + } + static bool classof(const RootDDGNode *N) { return true; } +}; + +/// Subclass of DDGNode representing single or multi-instruction nodes. +class SimpleDDGNode : public DDGNode { +public: + SimpleDDGNode() = delete; + SimpleDDGNode(Instruction &I); + SimpleDDGNode(const SimpleDDGNode &N); + SimpleDDGNode(SimpleDDGNode &&N); + ~SimpleDDGNode(); + + SimpleDDGNode &operator=(const SimpleDDGNode &N) { + DDGNode::operator=(N); + InstList = N.InstList; + return *this; + } + + SimpleDDGNode &operator=(SimpleDDGNode &&N) { + DDGNode::operator=(std::move(N)); + InstList = std::move(N.InstList); + return *this; + } + + /// Get the list of instructions in this node. + const InstructionListType &getInstructions() const { + assert(!InstList.empty() && "Instruction List is empty."); + return InstList; + } + InstructionListType &getInstructions() { + return const_cast( + static_cast(this)->getInstructions()); + } + + /// Get the first/last instruction in the node. + Instruction *getFirstInstruction() const { return getInstructions().front(); } + Instruction *getLastInstruction() const { return getInstructions().back(); } + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::SingleInstruction || + N->getKind() == NodeKind::MultiInstruction; + } + static bool classof(const SimpleDDGNode *N) { return true; } + +private: + /// Append the list of instructions in \p Input to this node. + void appendInstructions(const InstructionListType &Input) { + setKind((InstList.size() == 0 && Input.size() == 1) + ? NodeKind::SingleInstruction + : NodeKind::MultiInstruction); + InstList.insert(InstList.end(), Input.begin(), Input.end()); + } + void appendInstructions(const SimpleDDGNode &Input) { + appendInstructions(Input.getInstructions()); + } + + /// List of instructions associated with a single or multi-instruction node. + SmallVector InstList; +}; + +/// Data Dependency Graph Edge. +/// An edge in the DDG can represent a def-use relationship or +/// a memory dependence based on the result of DependenceAnalysis. +/// A rooted edge connects the root node to one of the components +/// of the graph. +class DDGEdge : public DDGEdgeBase { +public: + /// The kind of edge in the DDG + enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence, Rooted }; + + explicit DDGEdge(DDGNode &N) = delete; + DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {} + DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {} + DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {} + DDGEdge &operator=(const DDGEdge &E) { + DDGEdgeBase::operator=(E); + Kind = E.Kind; + return *this; + } + + DDGEdge &operator=(DDGEdge &&E) { + DDGEdgeBase::operator=(std::move(E)); + Kind = E.Kind; + return *this; + } + + /// Get the edge kind + EdgeKind getKind() const { return Kind; }; + + /// Return true if this is a def-use edge, and false otherwise. + bool isDefUse() const { return Kind == EdgeKind::RegisterDefUse; } + + /// Return true if this is a memory dependence edge, and false otherwise. + bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; } + + /// Return true if this is an edge stemming from the root node, and false + /// otherwise. + bool isRooted() const { return Kind == EdgeKind::Rooted; } + +private: + EdgeKind Kind; +}; + +/// Encapsulate some common data and functionality needed for different +/// variations of data dependence graphs. +template class DependenceGraphInfo { +public: + using DependenceList = SmallVector, 1>; + + DependenceGraphInfo() = delete; + DependenceGraphInfo(const DependenceGraphInfo &G) = delete; + DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo) + : Name(N), DI(DepInfo), Root(nullptr) {} + DependenceGraphInfo(DependenceGraphInfo &&G) + : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {} + virtual ~DependenceGraphInfo() {} + + /// Return the label that is used to name this graph. + const StringRef getName() const { return Name; } + + /// Return the root node of the graph. + NodeType &getRoot() const { + assert(Root && "Root node is not available yet. Graph construction may " + "still be in progress\n"); + return *Root; + } + +protected: + // Name of the graph. + std::string Name; + + // Store a copy of DependenceInfo in the graph, so that individual memory + // dependencies don't need to be stored. Instead when the dependence is + // queried it is recomputed using @DI. + const DependenceInfo DI; + + // A special node in the graph that has an edge to every connected component of + // the graph, to ensure all nodes are reachable in a graph walk. + NodeType *Root = nullptr; +}; + +using DDGInfo = DependenceGraphInfo; + +/// Data Dependency Graph +class DataDependenceGraph : public DDGBase, public DDGInfo { + friend class DDGBuilder; + +public: + using NodeType = DDGNode; + using EdgeType = DDGEdge; + + DataDependenceGraph() = delete; + DataDependenceGraph(const DataDependenceGraph &G) = delete; + DataDependenceGraph(DataDependenceGraph &&G) + : DDGBase(std::move(G)), DDGInfo(std::move(G)) {} + DataDependenceGraph(Function &F, DependenceInfo &DI); + DataDependenceGraph(const Loop &L, DependenceInfo &DI); + ~DataDependenceGraph(); + +protected: + /// Add node \p N to the graph, if it's not added yet, and keep track of + /// the root node. Return true if node is successfully added. + bool addNode(NodeType &N); + +}; + +/// Concrete implementation of a pure data dependence graph builder. This class +/// provides custom implementation for the pure-virtual functions used in the +/// generic dependence graph build algorithm. +/// +/// For information about time complexity of the build algorithm see the +/// comments near the declaration of AbstractDependenceGraphBuilder. +class DDGBuilder : public AbstractDependenceGraphBuilder { +public: + DDGBuilder(DataDependenceGraph &G, DependenceInfo &D, + const BasicBlockListType &BBs) + : AbstractDependenceGraphBuilder(G, D, BBs) {} + DDGNode &createRootNode() final override { + auto *RN = new RootDDGNode(); + assert(RN && "Failed to allocate memory for DDG root node."); + Graph.addNode(*RN); + return *RN; + } + DDGNode &createFineGrainedNode(Instruction &I) final override { + auto *SN = new SimpleDDGNode(I); + assert(SN && "Failed to allocate memory for simple DDG node."); + Graph.addNode(*SN); + return *SN; + } + DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse); + assert(E && "Failed to allocate memory for edge"); + Graph.connect(Src, Tgt, *E); + return *E; + } + DDGEdge &createMemoryEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::MemoryDependence); + assert(E && "Failed to allocate memory for edge"); + Graph.connect(Src, Tgt, *E); + return *E; + } + DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted); + assert(E && "Failed to allocate memory for edge"); + assert(isa(Src) && "Expected root node"); + Graph.connect(Src, Tgt, *E); + return *E; + } + +}; + +raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N); +raw_ostream &operator<<(raw_ostream &OS, const DDGNode::NodeKind K); +raw_ostream &operator<<(raw_ostream &OS, const DDGEdge &E); +raw_ostream &operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K); +raw_ostream &operator<<(raw_ostream &OS, const DataDependenceGraph &G); + +//===--------------------------------------------------------------------===// +// DDG Analysis Passes +//===--------------------------------------------------------------------===// + +/// Analysis pass that builds the DDG for a loop. +class DDGAnalysis : public AnalysisInfoMixin { +public: + using Result = std::unique_ptr; + Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR); + +private: + friend AnalysisInfoMixin; + static AnalysisKey Key; +}; + +/// Textual printer pass for the DDG of a loop. +class DDGAnalysisPrinterPass : public PassInfoMixin { +public: + explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); + +private: + raw_ostream &OS; +}; + +//===--------------------------------------------------------------------===// +// GraphTraits specializations for the DDG +//===--------------------------------------------------------------------===// + +/// non-const versions of the grapth trait specializations for DDG +template <> struct GraphTraits { + using NodeRef = DDGNode *; + + static DDGNode *DDGGetTargetNode(DGEdge *P) { + return &P->getTargetNode(); + } + + // Provide a mapped iterator so that the GraphTrait-based implementations can + // find the target nodes without having to explicitly go through the edges. + using ChildIteratorType = + mapped_iterator; + using ChildEdgeIteratorType = DDGNode::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &DDGGetTargetNode); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &DDGGetTargetNode); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } +}; + +template <> +struct GraphTraits : public GraphTraits { + using nodes_iterator = DataDependenceGraph::iterator; + static NodeRef getEntryNode(DataDependenceGraph *DG) { + return &DG->getRoot(); + } + static nodes_iterator nodes_begin(DataDependenceGraph *DG) { + return DG->begin(); + } + static nodes_iterator nodes_end(DataDependenceGraph *DG) { return DG->end(); } +}; + +/// const versions of the grapth trait specializations for DDG +template <> struct GraphTraits { + using NodeRef = const DDGNode *; + + static const DDGNode *DDGGetTargetNode(const DGEdge *P) { + return &P->getTargetNode(); + } + + // Provide a mapped iterator so that the GraphTrait-based implementations can + // find the target nodes without having to explicitly go through the edges. + using ChildIteratorType = + mapped_iterator; + using ChildEdgeIteratorType = DDGNode::const_iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &DDGGetTargetNode); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &DDGGetTargetNode); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } +}; + +template <> +struct GraphTraits + : public GraphTraits { + using nodes_iterator = DataDependenceGraph::const_iterator; + static NodeRef getEntryNode(const DataDependenceGraph *DG) { + return &DG->getRoot(); + } + static nodes_iterator nodes_begin(const DataDependenceGraph *DG) { + return DG->begin(); + } + static nodes_iterator nodes_end(const DataDependenceGraph *DG) { + return DG->end(); + } +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DDG_H diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h index 0410a3314659..c9e8df5db1c2 100644 --- a/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -99,7 +99,7 @@ public: errs() << "Writing '" << Filename << "'..."; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); std::string GraphName = DOTGraphTraits::getGraphName(Graph); std::string Title = GraphName + " for '" + F.getName().str() + "' function"; @@ -162,7 +162,7 @@ public: errs() << "Writing '" << Filename << "'..."; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); std::string Title = DOTGraphTraits::getGraphName(Graph); if (!EC) diff --git a/include/llvm/Analysis/DependenceGraphBuilder.h b/include/llvm/Analysis/DependenceGraphBuilder.h new file mode 100644 index 000000000000..5f4bdb47043b --- /dev/null +++ b/include/llvm/Analysis/DependenceGraphBuilder.h @@ -0,0 +1,119 @@ +//===- llvm/Analysis/DependenceGraphBuilder.h -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a builder interface that can be used to populate dependence +// graphs such as DDG and PDG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H +#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H + +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instructions.h" + +namespace llvm { + +/// This abstract builder class defines a set of high-level steps for creating +/// DDG-like graphs. The client code is expected to inherit from this class and +/// define concrete implementation for each of the pure virtual functions used +/// in the high-level algorithm. +template class AbstractDependenceGraphBuilder { +protected: + using BasicBlockListType = SmallVectorImpl; + +private: + using NodeType = typename GraphType::NodeType; + using EdgeType = typename GraphType::EdgeType; + +public: + using ClassesType = EquivalenceClasses; + using NodeListType = SmallVector; + + AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D, + const BasicBlockListType &BBs) + : Graph(G), DI(D), BBList(BBs) {} + virtual ~AbstractDependenceGraphBuilder() {} + + /// The main entry to the graph construction algorithm. It starts by + /// creating nodes in increasing order of granularity and then + /// adds def-use and memory edges. + /// + /// The algorithmic complexity of this implementation is O(V^2 * I^2), where V + /// is the number of vertecies (nodes) and I is the number of instructions in + /// each node. The total number of instructions, N, is equal to V * I, + /// therefore the worst-case time complexity is O(N^2). The average time + /// complexity is O((N^2)/2). + void populate() { + createFineGrainedNodes(); + createDefUseEdges(); + createMemoryDependencyEdges(); + createAndConnectRootNode(); + } + + /// Create fine grained nodes. These are typically atomic nodes that + /// consist of a single instruction. + void createFineGrainedNodes(); + + /// Analyze the def-use chains and create edges from the nodes containing + /// definitions to the nodes containing the uses. + void createDefUseEdges(); + + /// Analyze data dependencies that exist between memory loads or stores, + /// in the graph nodes and create edges between them. + void createMemoryDependencyEdges(); + + /// Create a root node and add edges such that each node in the graph is + /// reachable from the root. + void createAndConnectRootNode(); + +protected: + /// Create the root node of the graph. + virtual NodeType &createRootNode() = 0; + + /// Create an atomic node in the graph given a single instruction. + virtual NodeType &createFineGrainedNode(Instruction &I) = 0; + + /// Create a def-use edge going from \p Src to \p Tgt. + virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Create a memory dependence edge going from \p Src to \p Tgt. + virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Create a rooted edge going from \p Src to \p Tgt . + virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Deallocate memory of edge \p E. + virtual void destroyEdge(EdgeType &E) { delete &E; } + + /// Deallocate memory of node \p N. + virtual void destroyNode(NodeType &N) { delete &N; } + + /// Map types to map instructions to nodes used when populating the graph. + using InstToNodeMap = DenseMap; + + /// Reference to the graph that gets built by a concrete implementation of + /// this builder. + GraphType &Graph; + + /// Dependence information used to create memory dependence edges in the + /// graph. + DependenceInfo &DI; + + /// The list of basic blocks to consider when building the graph. + const BasicBlockListType &BBList; + + /// A mapping from instructions to the corresponding nodes in the graph. + InstToNodeMap IMap; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h index 3cfb9d13df94..2fac9c8b4b34 100644 --- a/include/llvm/Analysis/DivergenceAnalysis.h +++ b/include/llvm/Analysis/DivergenceAnalysis.h @@ -73,9 +73,12 @@ public: /// operands bool isAlwaysUniform(const Value &Val) const; - /// \brief Whether \p Val is a divergent value + /// \brief Whether \p Val is divergent at its definition. bool isDivergent(const Value &Val) const; + /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use &U) const; + void print(raw_ostream &OS, const Module *) const; private: @@ -189,12 +192,19 @@ public: /// The GPU kernel this analysis result is for const Function &getFunction() const { return DA.getFunction(); } - /// Whether \p V is divergent. + /// Whether \p V is divergent at its definition. bool isDivergent(const Value &V) const; - /// Whether \p V is uniform/non-divergent + /// Whether \p U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use &U) const; + + /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } + /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be + /// divergent. + bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } + /// Print all divergent values in the kernel. void print(raw_ostream &OS, const Module *) const; }; diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h index d3fcfc2d41ab..5d1c5a05206a 100644 --- a/include/llvm/Analysis/GlobalsModRef.h +++ b/include/llvm/Analysis/GlobalsModRef.h @@ -34,7 +34,7 @@ class GlobalsAAResult : public AAResultBase { class FunctionInfo; const DataLayout &DL; - const TargetLibraryInfo &TLI; + std::function GetTLI; /// The globals that do not have their addresses taken. SmallPtrSet NonAddressTakenGlobals; @@ -72,14 +72,18 @@ class GlobalsAAResult : public AAResultBase { /// could perform to the memory utilization here if this becomes a problem. std::list Handles; - explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI); + explicit GlobalsAAResult( + const DataLayout &DL, + std::function GetTLI); public: GlobalsAAResult(GlobalsAAResult &&Arg); ~GlobalsAAResult(); - static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI, - CallGraph &CG); + static GlobalsAAResult + analyzeModule(Module &M, + std::function GetTLI, + CallGraph &CG); //------------------------------------------------ // Implement the AliasAnalysis API diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index 054ffca7215e..a5ffca13046b 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -31,6 +31,7 @@ #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H +#include "llvm/ADT/SetVector.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Operator.h" #include "llvm/IR/User.h" @@ -141,6 +142,13 @@ Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); +/// Given operands for the multiplication of a FMA, fold the result or return +/// null. In contrast to SimplifyFMulInst, this function will not perform +/// simplifications whose unrounded results differ when rounded to the argument +/// type. +Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + /// Given operands for a Mul, fold the result or return null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); @@ -234,21 +242,19 @@ Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, /// Given operand for a UnaryOperator, fold the result or return null. Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q); -/// Given operand for an FP UnaryOperator, fold the result or return null. -/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. -Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operand for a UnaryOperator, fold the result or return null. +/// Try to use FastMathFlags when folding the result. +Value *SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q); /// Given operands for a BinaryOperator, fold the result or return null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q); -/// Given operands for an FP BinaryOperator, fold the result or return null. -/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. -Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q); +/// Given operands for a BinaryOperator, fold the result or return null. +/// Try to use FastMathFlags when folding the result. +Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); /// Given a callsite, fold the result or return null. Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q); @@ -263,12 +269,14 @@ Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, /// This first performs a normal RAUW of I with SimpleV. It then recursively /// attempts to simplify those users updated by the operation. The 'I' /// instruction must not be equal to the simplified value 'SimpleV'. +/// If UnsimplifiedUsers is provided, instructions that could not be simplified +/// are added to it. /// /// The function returns true if any simplifications were performed. -bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); +bool replaceAndRecursivelySimplify( + Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, + SmallSetVector *UnsimplifiedUsers = nullptr); /// Recursively attempt to simplify an instruction. /// diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index 2d83929211e2..20a35bef189b 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -931,7 +931,8 @@ public: /// This sets up the graph and computes all of the entry points of the graph. /// No function definitions are scanned until their nodes in the graph are /// requested during traversal. - LazyCallGraph(Module &M, TargetLibraryInfo &TLI); + LazyCallGraph(Module &M, + function_ref GetTLI); LazyCallGraph(LazyCallGraph &&G); LazyCallGraph &operator=(LazyCallGraph &&RHS); @@ -1267,7 +1268,12 @@ public: /// This just builds the set of entry points to the call graph. The rest is /// built lazily as it is walked. LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) { - return LazyCallGraph(M, AM.getResult(M)); + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + return LazyCallGraph(M, GetTLI); } }; diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h index 0a338b816640..e33b8f4129f3 100644 --- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -39,17 +39,18 @@ public: void print(raw_ostream &OS, const Module *) const override; // Returns true if V is divergent at its definition. - // - // Even if this function returns false, V may still be divergent when used - // in a different basic block. bool isDivergent(const Value *V) const; + // Returns true if U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use *U) const; + // Returns true if V is uniform/non-divergent. - // - // Even if this function returns true, V may still be divergent when used - // in a different basic block. bool isUniform(const Value *V) const { return !isDivergent(V); } + // Returns true if U is uniform/non-divergent. Uses of a uniform value can be + // divergent. + bool isUniformUse(const Use *U) const { return !isDivergentUse(U); } + // Keep the analysis results uptodate by removing an erased value. void removeValue(const Value *V) { DivergentValues.erase(V); } @@ -62,6 +63,9 @@ private: // Stores all divergent values. DenseSet DivergentValues; + + // Stores divergent uses of possibly uniform values. + DenseSet DivergentUses; }; } // End llvm namespace diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index 5df6bb02308d..9604b2521e89 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -20,7 +20,9 @@ namespace llvm { class DataLayout; +class Loop; class MDNode; +class ScalarEvolution; /// Return true if this is always a dereferenceable pointer. If the context /// instruction is specified perform context-sensitive analysis and return true @@ -35,7 +37,8 @@ bool isDereferenceablePointer(const Value *V, Type *Ty, /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - unsigned Align, const DataLayout &DL, + MaybeAlign Alignment, + const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -43,7 +46,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, /// greater or equal than requested. If the context instruction is specified /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. -bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -56,11 +59,22 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, +bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); +/// Return true if we can prove that the given load (which is assumed to be +/// within the specified loop) would access only dereferenceable memory, and +/// be properly aligned on every iteration of the specified loop regardless of +/// its placement within the loop. (i.e. does not require predication beyond +/// that required by the the header itself and could be hoisted into the header +/// if desired.) This is more powerful than the variants above when the +/// address loaded from is analyzeable by SCEV. +bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, + ScalarEvolution &SE, + DominatorTree &DT); + /// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive @@ -69,7 +83,7 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, +bool isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h index 368a810cfa67..a2e65a7310af 100644 --- a/include/llvm/Analysis/LoopAnalysisManager.h +++ b/include/llvm/Analysis/LoopAnalysisManager.h @@ -86,8 +86,9 @@ typedef InnerAnalysisManagerProxy template <> class LoopAnalysisManagerFunctionProxy::Result { public: explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI) - : InnerAM(&InnerAM), LI(&LI) {} - Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI) { + : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {} + Result(Result &&Arg) + : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) { // We have to null out the analysis manager in the moved-from state // because we are taking ownership of the responsibilty to clear the // analysis state. @@ -96,6 +97,7 @@ public: Result &operator=(Result &&RHS) { InnerAM = RHS.InnerAM; LI = RHS.LI; + MSSAUsed = RHS.MSSAUsed; // We have to null out the analysis manager in the moved-from state // because we are taking ownership of the responsibilty to clear the // analysis state. @@ -112,6 +114,9 @@ public: InnerAM->clear(); } + /// Mark MemorySSA as used so we can invalidate self if MSSA is invalidated. + void markMSSAUsed() { MSSAUsed = true; } + /// Accessor for the analysis manager. LoopAnalysisManager &getManager() { return *InnerAM; } @@ -130,6 +135,7 @@ public: private: LoopAnalysisManager *InnerAM; LoopInfo *LI; + bool MSSAUsed; }; /// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy diff --git a/include/llvm/Analysis/LoopCacheAnalysis.h b/include/llvm/Analysis/LoopCacheAnalysis.h new file mode 100644 index 000000000000..ffec78b6db2c --- /dev/null +++ b/include/llvm/Analysis/LoopCacheAnalysis.h @@ -0,0 +1,281 @@ +//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the interface for the loop cache analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H +#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class LPMUpdater; +using CacheCostTy = int64_t; +using LoopVectorTy = SmallVector; + +/// Represents a memory reference as a base pointer and a set of indexing +/// operations. For example given the array reference A[i][2j+1][3k+2] in a +/// 3-dim loop nest: +/// for(i=0;i A +/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>] +/// Sizes -> [m][o][4] +class IndexedReference { + friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); + +public: + /// Construct an indexed reference given a \p StoreOrLoadInst instruction. + IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, + ScalarEvolution &SE); + + bool isValid() const { return IsValid; } + const SCEV *getBasePointer() const { return BasePointer; } + size_t getNumSubscripts() const { return Subscripts.size(); } + const SCEV *getSubscript(unsigned SubNum) const { + assert(SubNum < getNumSubscripts() && "Invalid subscript number"); + return Subscripts[SubNum]; + } + const SCEV *getFirstSubscript() const { + assert(!Subscripts.empty() && "Expecting non-empty container"); + return Subscripts.front(); + } + const SCEV *getLastSubscript() const { + assert(!Subscripts.empty() && "Expecting non-empty container"); + return Subscripts.back(); + } + + /// Return true/false if the current object and the indexed reference \p Other + /// are/aren't in the same cache line of size \p CLS. Two references are in + /// the same chace line iff the distance between them in the innermost + /// dimension is less than the cache line size. Return None if unsure. + Optional hasSpacialReuse(const IndexedReference &Other, unsigned CLS, + AliasAnalysis &AA) const; + + /// Return true if the current object and the indexed reference \p Other + /// have distance smaller than \p MaxDistance in the dimension associated with + /// the given loop \p L. Return false if the distance is not smaller than \p + /// MaxDistance and None if unsure. + Optional hasTemporalReuse(const IndexedReference &Other, + unsigned MaxDistance, const Loop &L, + DependenceInfo &DI, AliasAnalysis &AA) const; + + /// Compute the cost of the reference w.r.t. the given loop \p L when it is + /// considered in the innermost position in the loop nest. + /// The cost is defined as: + /// - equal to one if the reference is loop invariant, or + /// - equal to '(TripCount * stride) / cache_line_size' if: + /// + the reference stride is less than the cache line size, and + /// + the coefficient of this loop's index variable used in all other + /// subscripts is zero + /// - or otherwise equal to 'TripCount'. + CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const; + +private: + /// Attempt to delinearize the indexed reference. + bool delinearize(const LoopInfo &LI); + + /// Return true if the index reference is invariant with respect to loop \p L. + bool isLoopInvariant(const Loop &L) const; + + /// Return true if the indexed reference is 'consecutive' in loop \p L. + /// An indexed reference is 'consecutive' if the only coefficient that uses + /// the loop induction variable is the rightmost one, and the access stride is + /// smaller than the cache line size \p CLS. + bool isConsecutive(const Loop &L, unsigned CLS) const; + + /// Return the coefficient used in the rightmost dimension. + const SCEV *getLastCoefficient() const; + + /// Return true if the coefficient corresponding to induction variable of + /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L. + bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, + const Loop &L) const; + + /// Verify that the given \p Subscript is 'well formed' (must be a simple add + /// recurrence). + bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const; + + /// Return true if the given reference \p Other is definetely aliased with + /// the indexed reference represented by this class. + bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const; + +private: + /// True if the reference can be delinearized, false otherwise. + bool IsValid = false; + + /// Represent the memory reference instruction. + Instruction &StoreOrLoadInst; + + /// The base pointer of the memory reference. + const SCEV *BasePointer = nullptr; + + /// The subscript (indexes) of the memory reference. + SmallVector Subscripts; + + /// The dimensions of the memory reference. + SmallVector Sizes; + + ScalarEvolution &SE; +}; + +/// A reference group represents a set of memory references that exhibit +/// temporal or spacial reuse. Two references belong to the same +/// reference group with respect to a inner loop L iff: +/// 1. they have a loop independent dependency, or +/// 2. they have a loop carried dependence with a small dependence distance +/// (e.g. less than 2) carried by the inner loop, or +/// 3. they refer to the same array, and the subscript in their innermost +/// dimension is less than or equal to 'd' (where 'd' is less than the cache +/// line size) +/// +/// Intuitively a reference group represents memory references that access +/// the same cache line. Conditions 1,2 above account for temporal reuse, while +/// contition 3 accounts for spacial reuse. +using ReferenceGroupTy = SmallVector, 8>; +using ReferenceGroupsTy = SmallVector; + +/// \c CacheCost represents the estimated cost of a inner loop as the number of +/// cache lines used by the memory references it contains. +/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of +/// the cache costs of all of its reference groups when the loop is considered +/// to be in the innermost position in the nest. +/// A reference group represents memory references that fall into the same cache +/// line. Each reference group is analysed with respect to the innermost loop in +/// a loop nest. The cost of a reference is defined as follow: +/// - one if it is loop invariant w.r.t the innermost loop, +/// - equal to the loop trip count divided by the cache line times the +/// reference stride if the reference stride is less than the cache line +/// size (CLS), and the coefficient of this loop's index variable used in all +/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride)) +/// - equal to the innermost loop trip count if the reference stride is greater +/// or equal to the cache line size CLS. +class CacheCost { + friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); + using LoopTripCountTy = std::pair; + using LoopCacheCostTy = std::pair; + +public: + static CacheCostTy constexpr InvalidCost = -1; + + /// Construct a CacheCost object for the loop nest described by \p Loops. + /// The optional parameter \p TRT can be used to specify the max. distance + /// between array elements accessed in a loop so that the elements are + /// classified to have temporal reuse. + CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, + TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI, + Optional TRT = None); + + /// Create a CacheCost for the loop nest rooted by \p Root. + /// The optional parameter \p TRT can be used to specify the max. distance + /// between array elements accessed in a loop so that the elements are + /// classified to have temporal reuse. + static std::unique_ptr + getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, + Optional TRT = None); + + /// Return the estimated cost of loop \p L if the given loop is part of the + /// loop nest associated with this object. Return -1 otherwise. + CacheCostTy getLoopCost(const Loop &L) const { + auto IT = std::find_if( + LoopCosts.begin(), LoopCosts.end(), + [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; }); + return (IT != LoopCosts.end()) ? (*IT).second : -1; + } + + /// Return the estimated ordered loop costs. + const ArrayRef getLoopCosts() const { return LoopCosts; } + +private: + /// Calculate the cache footprint of each loop in the nest (when it is + /// considered to be in the innermost position). + void calculateCacheFootprint(); + + /// Partition store/load instructions in the loop nest into reference groups. + /// Two or more memory accesses belong in the same reference group if they + /// share the same cache line. + bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const; + + /// Calculate the cost of the given loop \p L assuming it is the innermost + /// loop in nest. + CacheCostTy computeLoopCacheCost(const Loop &L, + const ReferenceGroupsTy &RefGroups) const; + + /// Compute the cost of a representative reference in reference group \p RG + /// when the given loop \p L is considered as the innermost loop in the nest. + /// The computed cost is an estimate for the number of cache lines used by the + /// reference group. The representative reference cost is defined as: + /// - equal to one if the reference is loop invariant, or + /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's + /// induction variable is used only in the reference subscript associated + /// with loop \p L, and (b) the reference stride is less than the cache + /// line size, or + /// - TripCount otherwise + CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG, + const Loop &L) const; + + /// Sort the LoopCosts vector by decreasing cache cost. + void sortLoopCosts() { + sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) { + return A.second > B.second; + }); + } + +private: + /// Loops in the loop nest associated with this object. + LoopVectorTy Loops; + + /// Trip counts for the loops in the loop nest associated with this object. + SmallVector TripCounts; + + /// Cache costs for the loops in the loop nest associated with this object. + SmallVector LoopCosts; + + /// The max. distance between array elements accessed in a loop so that the + /// elements are classified to have temporal reuse. + Optional TRT; + + const LoopInfo &LI; + ScalarEvolution &SE; + TargetTransformInfo &TTI; + AliasAnalysis &AA; + DependenceInfo &DI; +}; + +raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); +raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); + +/// Printer pass for the \c CacheCost results. +class LoopCachePrinterPass : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 584eb3a8c854..abf3863b0601 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -30,6 +30,9 @@ // instance. In particular, a Loop might be inside such a non-loop SCC, or a // non-loop SCC might contain a sub-SCC which is a Loop. // +// For an overview of terminology used in this API (and thus all of our loop +// analyses or transforms), see docs/LoopTerminology.rst. +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_LOOPINFO_H @@ -570,9 +573,9 @@ public: bool getIncomingAndBackEdge(BasicBlock *&Incoming, BasicBlock *&Backedge) const; - /// Below are some utilities to get loop bounds and induction variable, and - /// check if a given phinode is an auxiliary induction variable, as well as - /// checking if the loop is canonical. + /// Below are some utilities to get the loop guard, loop bounds and induction + /// variable, and to check if a given phinode is an auxiliary induction + /// variable, if the loop is guarded, and if the loop is canonical. /// /// Here is an example: /// \code @@ -604,6 +607,9 @@ public: /// /// - getInductionVariable --> i_1 /// - isAuxiliaryInductionVariable(x) --> true if x == i_1 + /// - getLoopGuardBranch() + /// --> `if (guardcmp) goto preheader; else goto afterloop` + /// - isGuarded() --> true /// - isCanonical --> false struct LoopBounds { /// Return the LoopBounds object if @@ -725,6 +731,31 @@ public: bool isAuxiliaryInductionVariable(PHINode &AuxIndVar, ScalarEvolution &SE) const; + /// Return the loop guard branch, if it exists. + /// + /// This currently only works on simplified loop, as it requires a preheader + /// and a latch to identify the guard. It will work on loops of the form: + /// \code + /// GuardBB: + /// br cond1, Preheader, ExitSucc <== GuardBranch + /// Preheader: + /// br Header + /// Header: + /// ... + /// br Latch + /// Latch: + /// br cond2, Header, ExitBlock + /// ExitBlock: + /// br ExitSucc + /// ExitSucc: + /// \endcode + BranchInst *getLoopGuardBranch() const; + + /// Return true iff the loop is + /// - in simplify rotated form, and + /// - guarded by a loop guard branch. + bool isGuarded() const { return (getLoopGuardBranch() != nullptr); } + /// Return true if the loop induction variable starts at zero and increments /// by one each time through the loop. bool isCanonical(ScalarEvolution &SE) const; diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index 4c33dac9e21e..8b11e848a195 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -85,9 +85,9 @@ template bool LoopBase::hasDedicatedExits() const { // Each predecessor of each exit block of a normal loop is contained // within the loop. - SmallVector ExitBlocks; - getExitBlocks(ExitBlocks); - for (BlockT *EB : ExitBlocks) + SmallVector UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + for (BlockT *EB : UniqueExitBlocks) for (BlockT *Predecessor : children>(EB)) if (!contains(Predecessor)) return false; @@ -200,8 +200,6 @@ BlockT *LoopBase::getLoopPredecessor() const { } } - // Make sure there is only one exit out of the preheader. - assert(Out && "Header of loop has no predecessors from outside loop?"); return Out; } diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 49f9e58ffad7..a89d76b9e5bd 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -58,6 +58,9 @@ class Value; /// like). bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); +bool isAllocationFn(const Value *V, + function_ref GetTLI, + bool LookThroughBitCast = false); /// Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). @@ -68,6 +71,9 @@ bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, /// allocates uninitialized memory (such as malloc). bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); +bool isMallocLikeFn(const Value *V, + function_ref GetTLI, + bool LookThroughBitCast = false); /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). @@ -93,6 +99,16 @@ bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, /// reallocates memory (e.g., realloc). bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); +/// Tests if a value is a call or invoke to a library function that +/// allocates memory and throws if an allocation failed (e.g., new). +bool isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +/// Tests if a value is a call or invoke to a library function that +/// allocates memory (strdup, strndup). +bool isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + //===----------------------------------------------------------------------===// // malloc Call Utility Functions. // @@ -100,9 +116,13 @@ bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI); -inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) { - return const_cast(extractMallocCall((const Value*)I, TLI)); +const CallInst * +extractMallocCall(const Value *I, + function_ref GetTLI); +inline CallInst * +extractMallocCall(Value *I, + function_ref GetTLI) { + return const_cast(extractMallocCall((const Value *)I, GetTLI)); } /// getMallocType - Returns the PointerType resulting from the malloc call. diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index e2669c2fa601..e89e5690fad0 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -362,11 +362,14 @@ private: PhiValues &PV; PredIteratorCache PredCache; + unsigned DefaultBlockScanLimit; + public: MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, - const TargetLibraryInfo &TLI, - DominatorTree &DT, PhiValues &PV) - : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {} + const TargetLibraryInfo &TLI, DominatorTree &DT, + PhiValues &PV, unsigned DefaultBlockScanLimit) + : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV), + DefaultBlockScanLimit(DefaultBlockScanLimit) {} /// Handle invalidation in the new PM. bool invalidate(Function &F, const PreservedAnalyses &PA, @@ -511,9 +514,14 @@ class MemoryDependenceAnalysis static AnalysisKey Key; + unsigned DefaultBlockScanLimit; + public: using Result = MemoryDependenceResults; + MemoryDependenceAnalysis(); + MemoryDependenceAnalysis(unsigned DefaultBlockScanLimit) : DefaultBlockScanLimit(DefaultBlockScanLimit) { } + MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h index b7730be75354..e89bf26a7234 100644 --- a/include/llvm/Analysis/MemorySSA.h +++ b/include/llvm/Analysis/MemorySSA.h @@ -793,6 +793,7 @@ protected: friend class MemorySSAPrinterLegacyPass; friend class MemorySSAUpdater; + void verifyPrevDefInPhis(Function &F) const; void verifyDefUses(Function &F) const; void verifyDomination(Function &F) const; void verifyOrdering(Function &F) const; @@ -830,7 +831,8 @@ protected: void insertIntoListsBefore(MemoryAccess *, const BasicBlock *, AccessList::iterator); MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *, - const MemoryUseOrDef *Template = nullptr); + const MemoryUseOrDef *Template = nullptr, + bool CreationMustSucceed = true); private: template class ClobberWalkerBase; diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h index d4d8040c1ff6..1d34663721e3 100644 --- a/include/llvm/Analysis/MemorySSAUpdater.h +++ b/include/llvm/Analysis/MemorySSAUpdater.h @@ -99,7 +99,7 @@ public: /// load a /// Where a mayalias b, *does* require RenameUses be set to true. void insertDef(MemoryDef *Def, bool RenameUses = false); - void insertUse(MemoryUse *Use); + void insertUse(MemoryUse *Use, bool RenameUses = false); /// Update the MemoryPhi in `To` following an edge deletion between `From` and /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made. void removeEdge(BasicBlock *From, BasicBlock *To); @@ -275,6 +275,7 @@ private: getPreviousDefRecursive(BasicBlock *, DenseMap> &); MemoryAccess *recursePhi(MemoryAccess *Phi); + MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi); template MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); void tryRemoveTrivialPhis(ArrayRef UpdatedPHIs); diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h index 3ef539c89d97..87cf9f85c7f1 100644 --- a/include/llvm/Analysis/MustExecute.h +++ b/include/llvm/Analysis/MustExecute.h @@ -7,10 +7,17 @@ //===----------------------------------------------------------------------===// /// \file /// Contains a collection of routines for determining if a given instruction is -/// guaranteed to execute if a given point in control flow is reached. The most +/// guaranteed to execute if a given point in control flow is reached. The most /// common example is an instruction within a loop being provably executed if we /// branch to the header of it's containing loop. /// +/// There are two interfaces available to determine if an instruction is +/// executed once a given point in the control flow is reached: +/// 1) A loop-centric one derived from LoopSafetyInfo. +/// 2) A "must be executed context"-based one implemented in the +/// MustBeExecutedContextExplorer. +/// Please refer to the class comments for more information. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H @@ -164,6 +171,280 @@ public: virtual ~ICFLoopSafetyInfo() {}; }; -} +struct MustBeExecutedContextExplorer; + +/// Must be executed iterators visit stretches of instructions that are +/// guaranteed to be executed together, potentially with other instruction +/// executed in-between. +/// +/// Given the following code, and assuming all statements are single +/// instructions which transfer execution to the successor (see +/// isGuaranteedToTransferExecutionToSuccessor), there are two possible +/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B, +/// and E. If we start at C or D, we will visit all instructions A-E. +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// \endcode +/// +/// +/// Below is the example extneded with instructions F and G. Now we assume F +/// might not transfer execution to it's successor G. As a result we get the +/// following visit sets: +/// +/// Start Instruction | Visit Set +/// A | A, B, E, F +/// B | A, B, E, F +/// C | A, B, C, D, E, F +/// D | A, B, C, D, E, F +/// E | A, B, E, F +/// F | A, B, E, F +/// G | A, B, E, F, G +/// +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// F; // Might not transfer execution to its successor G. +/// G; +/// \endcode +/// +/// +/// A more complex example involving conditionals, loops, break, and continue +/// is shown below. We again assume all instructions will transmit control to +/// the successor and we assume we can prove the inner loop to be finite. We +/// omit non-trivial branch conditions as the exploration is oblivious to them. +/// Constant branches are assumed to be unconditional in the CFG. The resulting +/// visist sets are shown in the table below. +/// +/// \code +/// A; +/// while (true) { +/// B; +/// if (...) +/// C; +/// if (...) +/// continue; +/// D; +/// if (...) +/// break; +/// do { +/// if (...) +/// continue; +/// E; +/// } while (...); +/// F; +/// } +/// G; +/// \endcode +/// +/// Start Instruction | Visit Set +/// A | A, B +/// B | A, B +/// C | A, B, C +/// D | A, B, D +/// E | A, B, D, E, F +/// F | A, B, D, F +/// G | A, B, D, G +/// +/// +/// Note that the examples show optimal visist sets but not necessarily the ones +/// derived by the explorer depending on the available CFG analyses (see +/// MustBeExecutedContextExplorer). Also note that we, depending on the options, +/// the visit set can contain instructions from other functions. +struct MustBeExecutedIterator { + /// Type declarations that make his class an input iterator. + ///{ + typedef const Instruction *value_type; + typedef std::ptrdiff_t difference_type; + typedef const Instruction **pointer; + typedef const Instruction *&reference; + typedef std::input_iterator_tag iterator_category; + ///} + + using ExplorerTy = MustBeExecutedContextExplorer; + + MustBeExecutedIterator(const MustBeExecutedIterator &Other) + : Visited(Other.Visited), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator(MustBeExecutedIterator &&Other) + : Visited(std::move(Other.Visited)), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) { + if (this != &Other) { + std::swap(Visited, Other.Visited); + std::swap(CurInst, Other.CurInst); + } + return *this; + } + + ~MustBeExecutedIterator() {} + + /// Pre- and post-increment operators. + ///{ + MustBeExecutedIterator &operator++() { + CurInst = advance(); + return *this; + } + + MustBeExecutedIterator operator++(int) { + MustBeExecutedIterator tmp(*this); + operator++(); + return tmp; + } + ///} + + /// Equality and inequality operators. Note that we ignore the history here. + ///{ + bool operator==(const MustBeExecutedIterator &Other) const { + return CurInst == Other.CurInst; + } + + bool operator!=(const MustBeExecutedIterator &Other) const { + return !(*this == Other); + } + ///} + + /// Return the underlying instruction. + const Instruction *&operator*() { return CurInst; } + const Instruction *getCurrentInst() const { return CurInst; } + + /// Return true if \p I was encountered by this iterator already. + bool count(const Instruction *I) const { return Visited.count(I); } + +private: + using VisitedSetTy = DenseSet; + + /// Private constructors. + MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I); + + /// Reset the iterator to its initial state pointing at \p I. + void reset(const Instruction *I); + + /// Try to advance one of the underlying positions (Head or Tail). + /// + /// \return The next instruction in the must be executed context, or nullptr + /// if none was found. + const Instruction *advance(); + + /// A set to track the visited instructions in order to deal with endless + /// loops and recursion. + VisitedSetTy Visited; + + /// A reference to the explorer that created this iterator. + ExplorerTy &Explorer; + + /// The instruction we are currently exposing to the user. There is always an + /// instruction that we know is executed with the given program point, + /// initially the program point itself. + const Instruction *CurInst; + + friend struct MustBeExecutedContextExplorer; +}; + +/// A "must be executed context" for a given program point PP is the set of +/// instructions, potentially before and after PP, that are executed always when +/// PP is reached. The MustBeExecutedContextExplorer an interface to explore +/// "must be executed contexts" in a module through the use of +/// MustBeExecutedIterator. +/// +/// The explorer exposes "must be executed iterators" that traverse the must be +/// executed context. There is little information sharing between iterators as +/// the expected use case involves few iterators for "far apart" instructions. +/// If that changes, we should consider caching more intermediate results. +struct MustBeExecutedContextExplorer { + + /// In the description of the parameters we use PP to denote a program point + /// for which the must be executed context is explored, or put differently, + /// for which the MustBeExecutedIterator is created. + /// + /// \param ExploreInterBlock Flag to indicate if instructions in blocks + /// other than the parent of PP should be + /// explored. + MustBeExecutedContextExplorer(bool ExploreInterBlock) + : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {} + + /// Clean up the dynamically allocated iterators. + ~MustBeExecutedContextExplorer() { + DeleteContainerSeconds(InstructionIteratorMap); + } + + /// Iterator-based interface. \see MustBeExecutedIterator. + ///{ + using iterator = MustBeExecutedIterator; + using const_iterator = const MustBeExecutedIterator; + + /// Return an iterator to explore the context around \p PP. + iterator &begin(const Instruction *PP) { + auto *&It = InstructionIteratorMap[PP]; + if (!It) + It = new iterator(*this, PP); + return *It; + } + + /// Return an iterator to explore the cached context around \p PP. + const_iterator &begin(const Instruction *PP) const { + return *InstructionIteratorMap.lookup(PP); + } + + /// Return an universal end iterator. + ///{ + iterator &end() { return EndIterator; } + iterator &end(const Instruction *) { return EndIterator; } + + const_iterator &end() const { return EndIterator; } + const_iterator &end(const Instruction *) const { return EndIterator; } + ///} + + /// Return an iterator range to explore the context around \p PP. + llvm::iterator_range range(const Instruction *PP) { + return llvm::make_range(begin(PP), end(PP)); + } + + /// Return an iterator range to explore the cached context around \p PP. + llvm::iterator_range range(const Instruction *PP) const { + return llvm::make_range(begin(PP), end(PP)); + } + ///} + + /// Return the next instruction that is guaranteed to be executed after \p PP. + /// + /// \param It The iterator that is used to traverse the must be + /// executed context. + /// \param PP The program point for which the next instruction + /// that is guaranteed to execute is determined. + const Instruction * + getMustBeExecutedNextInstruction(MustBeExecutedIterator &It, + const Instruction *PP); + + /// Parameter that limit the performed exploration. See the constructor for + /// their meaning. + ///{ + const bool ExploreInterBlock; + ///} + +private: + /// Map from instructions to associated must be executed iterators. + DenseMap + InstructionIteratorMap; + + /// A unique end iterator. + MustBeExecutedIterator EndIterator; +}; + +} // namespace llvm #endif diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index d9c97dff8c6e..8562519fa7b1 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -103,6 +103,13 @@ namespace llvm { // FunctionPass *createMustExecutePrinter(); + //===--------------------------------------------------------------------===// + // + // createMustBeExecutedContextPrinter - This pass prints information about which + // instructions are guaranteed to execute together (run with -analyze). + // + ModulePass *createMustBeExecutedContextPrinter(); + } #endif diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h index f309d344b8d1..6693e40ccf22 100644 --- a/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -52,6 +52,15 @@ private: // because the number of profile counts required to reach the hot // percentile is above a huge threshold. Optional HasHugeWorkingSetSize; + // True if the working set size of the code is considered large, + // because the number of profile counts required to reach the hot + // percentile is above a large threshold. + Optional HasLargeWorkingSetSize; + // Compute the threshold for a given cutoff. + Optional computeThreshold(int PercentileCutoff); + // The map that caches the threshold values. The keys are the percentile + // cutoff values and the values are the corresponding threshold values. + DenseMap ThresholdCache; public: ProfileSummaryInfo(Module &M) : M(M) {} @@ -96,6 +105,8 @@ public: bool AllowSynthetic = false); /// Returns true if the working set size of the code is considered huge. bool hasHugeWorkingSetSize(); + /// Returns true if the working set size of the code is considered large. + bool hasLargeWorkingSetSize(); /// Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); /// Returns true if \p F contains hot code. @@ -104,14 +115,26 @@ public: bool isFunctionEntryCold(const Function *F); /// Returns true if \p F contains only cold code. bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI); + /// Returns true if \p F contains hot code with regard to a given hot + /// percentile cutoff value. + bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff, + const Function *F, + BlockFrequencyInfo &BFI); /// Returns true if count \p C is considered hot. bool isHotCount(uint64_t C); /// Returns true if count \p C is considered cold. bool isColdCount(uint64_t C); + /// Returns true if count \p C is considered hot with regard to a given + /// hot percentile cutoff value. + bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C); /// Returns true if BasicBlock \p BB is considered hot. bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); /// Returns true if BasicBlock \p BB is considered cold. bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); + /// Returns true if BasicBlock \p BB is considered hot with regard to a given + /// hot percentile cutoff value. + bool isHotBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, BlockFrequencyInfo *BFI); /// Returns true if CallSite \p CS is considered hot. bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); /// Returns true if Callsite \p CS is considered cold. diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index c59c09dd2095..6b5936680c37 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -365,7 +365,7 @@ typename Tr::RegionNodeT *RegionBase::getBBNode(BlockT *BB) const { auto Deconst = const_cast *>(this); typename BBNodeMapT::value_type V = { BB, - llvm::make_unique(static_cast(Deconst), BB)}; + std::make_unique(static_cast(Deconst), BB)}; at = BBNodeMap.insert(std::move(V)).first; } return at->second.get(); diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 0bd98ef37e7a..9c55f7a5090f 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -468,6 +468,8 @@ template <> struct DenseMapInfo { /// can't do much with the SCEV objects directly, they must ask this class /// for services. class ScalarEvolution { + friend class ScalarEvolutionsTest; + public: /// An enum describing the relationship between a SCEV and a loop. enum LoopDisposition { @@ -777,10 +779,10 @@ public: /// to (i.e. a "conservative over-approximation") of the value returend by /// getBackedgeTakenCount. If such a value cannot be computed, it returns the /// SCEVCouldNotCompute object. - const SCEV *getMaxBackedgeTakenCount(const Loop *L); + const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L); /// Return true if the backedge taken count is either the value returned by - /// getMaxBackedgeTakenCount or zero. + /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); /// Return true if the specified loop has an analyzable loop-invariant diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index a519f93216b3..b4d727449fbe 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -77,9 +77,13 @@ namespace llvm { /// Phis that complete an IV chain. Reuse DenseSet> ChainedPhis; - /// When true, expressions are expanded in "canonical" form. In particular, - /// addrecs are expanded as arithmetic based on a canonical induction - /// variable. When false, expression are expanded in a more literal form. + /// When true, SCEVExpander tries to expand expressions in "canonical" form. + /// When false, expressions are expanded in a more literal form. + /// + /// In "canonical" form addrecs are expanded as arithmetic based on a + /// canonical induction variable. Note that CanonicalMode doesn't guarantee + /// that all expressions are expanded in "canonical" form. For some + /// expressions literal mode can be preferred. bool CanonicalMode; /// When invoked from LSR, the expander is in "strength reduction" mode. The @@ -275,8 +279,16 @@ namespace llvm { /// Clear the current insertion point. This is useful if the instruction /// that had been serving as the insertion point may have been deleted. - void clearInsertPoint() { - Builder.ClearInsertionPoint(); + void clearInsertPoint() { Builder.ClearInsertionPoint(); } + + /// Set location information used by debugging information. + void SetCurrentDebugLocation(DebugLoc L) { + Builder.SetCurrentDebugLocation(std::move(L)); + } + + /// Get location information used by debugging information. + const DebugLoc &getCurrentDebugLocation() const { + return Builder.getCurrentDebugLocation(); } /// Return true if the specified instruction was inserted by the code diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 4b5200f5a838..d4b223863c54 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -30,11 +30,12 @@ struct VecDesc { unsigned VectorizationFactor; }; - enum LibFunc { + enum LibFunc : unsigned { #define TLI_DEFINE_ENUM #include "llvm/Analysis/TargetLibraryInfo.def" - NumLibFuncs + NumLibFuncs, + NotLibFunc }; /// Implementation of the target library information. @@ -48,7 +49,7 @@ class TargetLibraryInfoImpl { unsigned char AvailableArray[(NumLibFuncs+3)/4]; llvm::DenseMap CustomNames; - static StringRef const StandardNames[NumLibFuncs]; + static StringLiteral const StandardNames[NumLibFuncs]; bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param; enum AvailabilityState { @@ -359,7 +360,6 @@ public: TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl) : PresetInfoImpl(std::move(PresetInfoImpl)) {} - TargetLibraryInfo run(Module &M, ModuleAnalysisManager &); TargetLibraryInfo run(Function &F, FunctionAnalysisManager &); private: @@ -385,8 +385,13 @@ public: explicit TargetLibraryInfoWrapperPass(const Triple &T); explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI); - TargetLibraryInfo &getTLI() { return TLI; } - const TargetLibraryInfo &getTLI() const { return TLI; } + TargetLibraryInfo &getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) { + return TLI; + } + const TargetLibraryInfo & + getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) const { + return TLI; + } }; } // end namespace llvm diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 7574b811bc1c..d6fa88411654 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -368,6 +368,20 @@ public: /// optimize away. unsigned getFlatAddressSpace() const; + /// Return any intrinsic address operand indexes which may be rewritten if + /// they use a flat address space pointer. + /// + /// \returns true if the intrinsic was handled. + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const; + + /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p + /// NewV, which has a different address space. This should happen for every + /// operand index that collectFlatAddressOperands returned for the intrinsic. + /// \returns true if the intrinsic /// was handled. + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const; + /// Test whether calls to a function lower to actual program function /// calls. /// @@ -469,12 +483,17 @@ public: bool Force; /// Allow using trip count upper bound to unroll loops. bool UpperBound; - /// Allow peeling off loop iterations for loops with low dynamic tripcount. + /// Allow peeling off loop iterations. bool AllowPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. bool UnrollAndJam; + /// Allow peeling basing on profile. Uses to enable peeling off all + /// iterations basing on provided profile. + /// If the value is true the peeling cost model can decide to peel only + /// some iterations and in this case it will set this to false. + bool PeelProfiledIterations; /// Threshold for unroll and jam, for inner loop size. The 'Threshold' /// value above is used during unroll and jam for the outer loop size. /// This value is used in the same manner to limit the size of the inner @@ -555,15 +574,15 @@ public: /// modes that operate across loop iterations. bool shouldFavorBackedgeIndex(const Loop *L) const; - /// Return true if the target supports masked load. - bool isLegalMaskedStore(Type *DataType) const; /// Return true if the target supports masked store. - bool isLegalMaskedLoad(Type *DataType) const; + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const; + /// Return true if the target supports masked load. + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const; /// Return true if the target supports nontemporal store. - bool isLegalNTStore(Type *DataType, unsigned Alignment) const; + bool isLegalNTStore(Type *DataType, Align Alignment) const; /// Return true if the target supports nontemporal load. - bool isLegalNTLoad(Type *DataType, unsigned Alignment) const; + bool isLegalNTLoad(Type *DataType, Align Alignment) const; /// Return true if the target supports masked scatter. bool isLegalMaskedScatter(Type *DataType) const; @@ -622,12 +641,6 @@ public: /// Return true if this type is legal. bool isTypeLegal(Type *Ty) const; - /// Returns the target's jmp_buf alignment in bytes. - unsigned getJumpBufAlignment() const; - - /// Returns the target's jmp_buf size in bytes. - unsigned getJumpBufSize() const; - /// Return true if switches should be turned into lookup tables for the /// target. bool shouldBuildLookupTables() const; @@ -775,10 +788,23 @@ public: /// Additional properties of an operand's values. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; - /// \return The number of scalar or vector registers that the target has. - /// If 'Vectors' is true, it returns the number of vector registers. If it is - /// set to false, it returns the number of scalar registers. - unsigned getNumberOfRegisters(bool Vector) const; + /// \return the number of registers in the target-provided register class. + unsigned getNumberOfRegisters(unsigned ClassID) const; + + /// \return the target-provided register class ID for the provided type, + /// accounting for type promotion and other type-legalization techniques that the target might apply. + /// However, it specifically does not account for the scalarization or splitting of vector types. + /// Should a vector type require scalarization or splitting into multiple underlying vector registers, + /// that type should be mapped to a register class containing no registers. + /// Specifically, this is designed to provide a simple, high-level view of the register allocation + /// later performed by the backend. These register classes don't necessarily map onto the + /// register classes used by the backend. + /// FIXME: It's not currently possible to determine how many registers + /// are used by the provided type. + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; + + /// \return the target-provided register class name + const char* getRegisterClassName(unsigned ClassID) const; /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; @@ -824,18 +850,20 @@ public: /// \return The associativity of the cache level, if available. llvm::Optional getCacheAssociativity(CacheLevel Level) const; - /// \return How much before a load we should place the prefetch instruction. - /// This is currently measured in number of instructions. + /// \return How much before a load we should place the prefetch + /// instruction. This is currently measured in number of + /// instructions. unsigned getPrefetchDistance() const; - /// \return Some HW prefetchers can handle accesses up to a certain constant - /// stride. This is the minimum stride in bytes where it makes sense to start - /// adding SW prefetches. The default is 1, i.e. prefetch with any stride. + /// \return Some HW prefetchers can handle accesses up to a certain + /// constant stride. This is the minimum stride in bytes where it + /// makes sense to start adding SW prefetches. The default is 1, + /// i.e. prefetch with any stride. unsigned getMinPrefetchStride() const; - /// \return The maximum number of iterations to prefetch ahead. If the - /// required number of iterations is more than this number, no prefetching is - /// performed. + /// \return The maximum number of iterations to prefetch ahead. If + /// the required number of iterations is more than this number, no + /// prefetching is performed. unsigned getMaxPrefetchIterationsAhead() const; /// \return The maximum interleave factor that any transform should try to @@ -1155,6 +1183,10 @@ public: virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isAlwaysUniform(const Value *V) = 0; virtual unsigned getFlatAddressSpace() = 0; + virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const = 0; + virtual bool rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) = 0; @@ -1177,10 +1209,10 @@ public: TargetLibraryInfo *LibInfo) = 0; virtual bool shouldFavorPostInc() const = 0; virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; - virtual bool isLegalMaskedStore(Type *DataType) = 0; - virtual bool isLegalMaskedLoad(Type *DataType) = 0; - virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0; - virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0; + virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0; + virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0; + virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; + virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; @@ -1196,8 +1228,6 @@ public: virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; virtual bool isTypeLegal(Type *Ty) = 0; - virtual unsigned getJumpBufAlignment() = 0; - virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool useColdCCForColdCall(Function &F) = 0; @@ -1228,19 +1258,35 @@ public: Type *Ty) = 0; virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual unsigned getNumberOfRegisters(bool Vector) = 0; + virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; + virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0; + virtual const char* getRegisterClassName(unsigned ClassID) const = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; - virtual unsigned getCacheLineSize() = 0; - virtual llvm::Optional getCacheSize(CacheLevel Level) = 0; - virtual llvm::Optional getCacheAssociativity(CacheLevel Level) = 0; - virtual unsigned getPrefetchDistance() = 0; - virtual unsigned getMinPrefetchStride() = 0; - virtual unsigned getMaxPrefetchIterationsAhead() = 0; + virtual unsigned getCacheLineSize() const = 0; + virtual llvm::Optional getCacheSize(CacheLevel Level) const = 0; + virtual llvm::Optional getCacheAssociativity(CacheLevel Level) const = 0; + + /// \return How much before a load we should place the prefetch + /// instruction. This is currently measured in number of + /// instructions. + virtual unsigned getPrefetchDistance() const = 0; + + /// \return Some HW prefetchers can handle accesses up to a certain + /// constant stride. This is the minimum stride in bytes where it + /// makes sense to start adding SW prefetches. The default is 1, + /// i.e. prefetch with any stride. + virtual unsigned getMinPrefetchStride() const = 0; + + /// \return The maximum number of iterations to prefetch ahead. If + /// the required number of iterations is more than this number, no + /// prefetching is performed. + virtual unsigned getMaxPrefetchIterationsAhead() const = 0; + virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, @@ -1395,6 +1441,16 @@ public: return Impl.getFlatAddressSpace(); } + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const override { + return Impl.collectFlatAddressOperands(OpIndexes, IID); + } + + bool rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const override { + return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); + } + bool isLoweredToCall(const Function *F) override { return Impl.isLoweredToCall(F); } @@ -1440,16 +1496,16 @@ public: bool shouldFavorBackedgeIndex(const Loop *L) const override { return Impl.shouldFavorBackedgeIndex(L); } - bool isLegalMaskedStore(Type *DataType) override { - return Impl.isLegalMaskedStore(DataType); + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override { + return Impl.isLegalMaskedStore(DataType, Alignment); } - bool isLegalMaskedLoad(Type *DataType) override { - return Impl.isLegalMaskedLoad(DataType); + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override { + return Impl.isLegalMaskedLoad(DataType, Alignment); } - bool isLegalNTStore(Type *DataType, unsigned Alignment) override { + bool isLegalNTStore(Type *DataType, Align Alignment) override { return Impl.isLegalNTStore(DataType, Alignment); } - bool isLegalNTLoad(Type *DataType, unsigned Alignment) override { + bool isLegalNTLoad(Type *DataType, Align Alignment) override { return Impl.isLegalNTLoad(DataType, Alignment); } bool isLegalMaskedScatter(Type *DataType) override { @@ -1490,8 +1546,6 @@ public: } bool useAA() override { return Impl.useAA(); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } - unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); } - unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); } bool shouldBuildLookupTables() override { return Impl.shouldBuildLookupTables(); } @@ -1563,8 +1617,14 @@ public: Type *Ty) override { return Impl.getIntImmCost(IID, Idx, Imm, Ty); } - unsigned getNumberOfRegisters(bool Vector) override { - return Impl.getNumberOfRegisters(Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const override { + return Impl.getNumberOfRegisters(ClassID); + } + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override { + return Impl.getRegisterClassForType(Vector, Ty); + } + const char* getRegisterClassName(unsigned ClassID) const override { + return Impl.getRegisterClassName(ClassID); } unsigned getRegisterBitWidth(bool Vector) const override { return Impl.getRegisterBitWidth(Vector); @@ -1583,22 +1643,36 @@ public: return Impl.shouldConsiderAddressTypePromotion( I, AllowPromotionWithoutCommonHeader); } - unsigned getCacheLineSize() override { + unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } - llvm::Optional getCacheSize(CacheLevel Level) override { + llvm::Optional getCacheSize(CacheLevel Level) const override { return Impl.getCacheSize(Level); } - llvm::Optional getCacheAssociativity(CacheLevel Level) override { + llvm::Optional getCacheAssociativity(CacheLevel Level) const override { return Impl.getCacheAssociativity(Level); } - unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } - unsigned getMinPrefetchStride() override { + + /// Return the preferred prefetch distance in terms of instructions. + /// + unsigned getPrefetchDistance() const override { + return Impl.getPrefetchDistance(); + } + + /// Return the minimum stride necessary to trigger software + /// prefetching. + /// + unsigned getMinPrefetchStride() const override { return Impl.getMinPrefetchStride(); } - unsigned getMaxPrefetchIterationsAhead() override { + + /// Return the maximum prefetch distance in terms of loop + /// iterations. + /// + unsigned getMaxPrefetchIterationsAhead() const override { return Impl.getMaxPrefetchIterationsAhead(); } + unsigned getMaxInterleaveFactor(unsigned VF) override { return Impl.getMaxInterleaveFactor(VF); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index b99e1eb9adf0..a431fa0d458b 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -156,6 +156,16 @@ public: return -1; } + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const { + return false; + } + + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const { + return false; + } + bool isLoweredToCall(const Function *F) { assert(F && "A concrete function must be provided to this routine."); @@ -233,18 +243,18 @@ public: bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } - bool isLegalMaskedStore(Type *DataType) { return false; } + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; } - bool isLegalMaskedLoad(Type *DataType) { return false; } + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; } - bool isLegalNTStore(Type *DataType, unsigned Alignment) { + bool isLegalNTStore(Type *DataType, Align Alignment) { // By default, assume nontemporal memory stores are available for stores // that are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); return Alignment >= DataSize && isPowerOf2_32(DataSize); } - bool isLegalNTLoad(Type *DataType, unsigned Alignment) { + bool isLegalNTLoad(Type *DataType, Align Alignment) { // By default, assume nontemporal memory loads are available for loads that // are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); @@ -284,10 +294,6 @@ public: bool isTypeLegal(Type *Ty) { return false; } - unsigned getJumpBufAlignment() { return 0; } - - unsigned getJumpBufSize() { return 0; } - bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } @@ -348,7 +354,20 @@ public: return TTI::TCC_Free; } - unsigned getNumberOfRegisters(bool Vector) { return 8; } + unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } + + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { + return Vector ? 1 : 0; + }; + + const char* getRegisterClassName(unsigned ClassID) const { + switch (ClassID) { + default: + return "Generic::Unknown Register Class"; + case 0: return "Generic::ScalarRC"; + case 1: return "Generic::VectorRC"; + } + } unsigned getRegisterBitWidth(bool Vector) const { return 32; } @@ -365,21 +384,20 @@ public: return false; } - unsigned getCacheLineSize() { return 0; } + unsigned getCacheLineSize() const { return 0; } - llvm::Optional getCacheSize(TargetTransformInfo::CacheLevel Level) { + llvm::Optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: LLVM_FALLTHROUGH; case TargetTransformInfo::CacheLevel::L2D: return llvm::Optional(); } - llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } llvm::Optional getCacheAssociativity( - TargetTransformInfo::CacheLevel Level) { + TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: LLVM_FALLTHROUGH; @@ -390,11 +408,9 @@ public: llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } - unsigned getPrefetchDistance() { return 0; } - - unsigned getMinPrefetchStride() { return 1; } - - unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } + unsigned getPrefetchDistance() const { return 0; } + unsigned getMinPrefetchStride() const { return 1; } + unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } @@ -830,6 +846,9 @@ public: if (isa(U)) return TTI::TCC_Free; // Model all PHI nodes as free. + if (isa(U)) + return TTI::TCC_Free; // Model all ExtractValue nodes as free. + // Static alloca doesn't generate target instructions. if (auto *A = dyn_cast(U)) if (A->isStaticAlloca()) diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h index 82cf8efeea54..43ce26147c2e 100644 --- a/include/llvm/Analysis/TypeMetadataUtils.h +++ b/include/llvm/Analysis/TypeMetadataUtils.h @@ -50,6 +50,8 @@ void findDevirtualizableCallsForTypeCheckedLoad( SmallVectorImpl &LoadedPtrs, SmallVectorImpl &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT); + +Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M); } #endif diff --git a/include/llvm/Analysis/Utils/Local.h b/include/llvm/Analysis/Utils/Local.h index acbdf5dca32c..a63bcec9bc41 100644 --- a/include/llvm/Analysis/Utils/Local.h +++ b/include/llvm/Analysis/Utils/Local.h @@ -32,7 +32,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, Value *Result = Constant::getNullValue(IntPtrTy); // If the GEP is inbounds, we know that none of the addressing operations will - // overflow in an unsigned sense. + // overflow in a signed sense. bool isInBounds = GEPOp->isInBounds() && !NoAssumptions; // Build a mask for high order bits. @@ -51,10 +51,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = GTI.getStructTypeOrNull()) { - if (OpC->getType()->isVectorTy()) - OpC = OpC->getSplatValue(); - - uint64_t OpValue = cast(OpC)->getZExtValue(); + uint64_t OpValue = OpC->getUniqueInteger().getZExtValue(); Size = DL.getStructLayout(STy)->getElementOffset(OpValue); if (Size) @@ -63,20 +60,31 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, continue; } + // Splat the constant if needed. + if (IntPtrTy->isVectorTy() && !OpC->getType()->isVectorTy()) + OpC = ConstantVector::getSplat(IntPtrTy->getVectorNumElements(), OpC); + Constant *Scale = ConstantInt::get(IntPtrTy, Size); Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); + Scale = + ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/); // Emit an add instruction. Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); continue; } + + // Splat the index if needed. + if (IntPtrTy->isVectorTy() && !Op->getType()->isVectorTy()) + Op = Builder->CreateVectorSplat(IntPtrTy->getVectorNumElements(), Op); + // Convert to correct type. if (Op->getType() != IntPtrTy) Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); if (Size != 1) { // We'll let instcombine(mul) convert this to a shl if possible. Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".idx", isInBounds /*NUW*/); + GEP->getName() + ".idx", false /*NUW*/, + isInBounds /*NSW*/); } // Emit an add instruction. diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index fa7e0e0eef7e..33b064fcf9d2 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -242,19 +242,21 @@ class Value; /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that /// creates and later unpacks the required APInt. inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &DL) { + const DataLayout &DL, + bool AllowNonInbounds = true) { APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); Value *Base = - Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, - /* AllowNonInbounds */ true); + Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds); + Offset = OffsetAPInt.getSExtValue(); return Base; } - inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr, - int64_t &Offset, - const DataLayout &DL) { - return GetPointerBaseWithConstantOffset(const_cast(Ptr), Offset, - DL); + inline const Value * + GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, + const DataLayout &DL, + bool AllowNonInbounds = true) { + return GetPointerBaseWithConstantOffset(const_cast(Ptr), Offset, DL, + AllowNonInbounds); } /// Returns true if the GEP is based on a pointer to a string (array of @@ -307,20 +309,26 @@ class Value; uint64_t GetStringLength(const Value *V, unsigned CharSize = 8); /// This function returns call pointer argument that is considered the same by - /// aliasing rules. You CAN'T use it to replace one value with another. - const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call); - inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) { + /// aliasing rules. You CAN'T use it to replace one value with another. If + /// \p MustPreserveNullness is true, the call must preserve the nullness of + /// the pointer. + const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call, + bool MustPreserveNullness); + inline Value * + getArgumentAliasingToReturnedPointer(CallBase *Call, + bool MustPreserveNullness) { return const_cast(getArgumentAliasingToReturnedPointer( - const_cast(Call))); + const_cast(Call), MustPreserveNullness)); } - // {launder,strip}.invariant.group returns pointer that aliases its argument, - // and it only captures pointer by returning it. - // These intrinsics are not marked as nocapture, because returning is - // considered as capture. The arguments are not marked as returned neither, - // because it would make it useless. + /// {launder,strip}.invariant.group returns pointer that aliases its argument, + /// and it only captures pointer by returning it. + /// These intrinsics are not marked as nocapture, because returning is + /// considered as capture. The arguments are not marked as returned neither, + /// because it would make it useless. If \p MustPreserveNullness is true, + /// the intrinsic must preserve the nullness of the pointer. bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - const CallBase *Call); + const CallBase *Call, bool MustPreserveNullness); /// This method strips off any GEP address adjustments and pointer casts from /// the specified value, returning the original object being addressed. Note @@ -376,6 +384,13 @@ class Value; /// Return true if the only users of this pointer are lifetime markers. bool onlyUsedByLifetimeMarkers(const Value *V); + /// Return true if speculation of the given load must be suppressed to avoid + /// ordering or interfering with an active sanitizer. If not suppressed, + /// dereferenceability and alignment must be proven separately. Note: This + /// is only needed for raw reasoning; if you use the interface below + /// (isSafeToSpeculativelyExecute), this is handled internally. + bool mustSuppressSpeculation(const LoadInst &LI); + /// Return true if the instruction does not have any effects besides /// calculating the result and does not have undefined behavior. /// @@ -605,12 +620,12 @@ class Value; SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0); + inline SelectPatternResult - matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS, - Instruction::CastOps *CastOp = nullptr) { - Value *L = const_cast(LHS); - Value *R = const_cast(RHS); - auto Result = matchSelectPattern(const_cast(V), L, R); + matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS) { + Value *L = const_cast(LHS); + Value *R = const_cast(RHS); + auto Result = matchSelectPattern(const_cast(V), L, R); LHS = L; RHS = R; return Result; @@ -654,6 +669,12 @@ class Value; Optional isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL); + + /// If Ptr1 is provably equal to Ptr2 plus a constant offset, return that + /// offset. For example, Ptr1 might be &A[42], and Ptr2 might be &A[40]. In + /// this case offset would be -8. + Optional isPointerOffset(const Value *Ptr1, const Value *Ptr2, + const DataLayout &DL); } // end namespace llvm #endif // LLVM_ANALYSIS_VALUETRACKING_H diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index d93d2bc4570b..4a61c2bc35c7 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -15,18 +15,129 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CheckedArithmetic.h" namespace llvm { +/// Describes the type of Parameters +enum class VFParamKind { + Vector, // No semantic information. + OMP_Linear, // declare simd linear(i) + OMP_LinearRef, // declare simd linear(ref(i)) + OMP_LinearVal, // declare simd linear(val(i)) + OMP_LinearUVal, // declare simd linear(uval(i)) + OMP_LinearPos, // declare simd linear(i:c) uniform(c) + OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) + OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) + OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c + OMP_Uniform, // declare simd uniform(i) + GlobalPredicate, // Global logical predicate that acts on all lanes + // of the input and output mask concurrently. For + // example, it is implied by the `M` token in the + // Vector Function ABI mangled name. + Unknown +}; + +/// Describes the type of Instruction Set Architecture +enum class VFISAKind { + AdvancedSIMD, // AArch64 Advanced SIMD (NEON) + SVE, // AArch64 Scalable Vector Extension + SSE, // x86 SSE + AVX, // x86 AVX + AVX2, // x86 AVX2 + AVX512, // x86 AVX512 + Unknown // Unknown ISA +}; + +/// Encapsulates information needed to describe a parameter. +/// +/// The description of the parameter is not linked directly to +/// OpenMP or any other vector function description. This structure +/// is extendible to handle other paradigms that describe vector +/// functions and their parameters. +struct VFParameter { + unsigned ParamPos; // Parameter Position in Scalar Function. + VFParamKind ParamKind; // Kind of Parameter. + int LinearStepOrPos = 0; // Step or Position of the Parameter. + Align Alignment = Align(); // Optional aligment in bytes, defaulted to 1. + + // Comparison operator. + bool operator==(const VFParameter &Other) const { + return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) == + std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos, + Other.Alignment); + } +}; + +/// Contains the information about the kind of vectorization +/// available. +/// +/// This object in independent on the paradigm used to +/// represent vector functions. in particular, it is not attached to +/// any target-specific ABI. +struct VFShape { + unsigned VF; // Vectorization factor. + bool IsScalable; // True if the function is a scalable function. + VFISAKind ISA; // Instruction Set Architecture. + SmallVector Parameters; // List of parameter informations. + // Comparison operator. + bool operator==(const VFShape &Other) const { + return std::tie(VF, IsScalable, ISA, Parameters) == + std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters); + } +}; + +/// Holds the VFShape for a specific scalar to vector function mapping. +struct VFInfo { + VFShape Shape; // Classification of the vector function. + StringRef ScalarName; // Scalar Function Name. + StringRef VectorName; // Vector Function Name associated to this VFInfo. + + // Comparison operator. + bool operator==(const VFInfo &Other) const { + return std::tie(Shape, ScalarName, VectorName) == + std::tie(Shape, Other.ScalarName, Other.VectorName); + } +}; + +namespace VFABI { +/// Function to contruct a VFInfo out of a mangled names in the +/// following format: +/// +/// {()} +/// +/// where is the name of the vector function, mangled according +/// to the rules described in the Vector Function ABI of the target vector +/// extentsion (or from now on). The is in the following +/// format: +/// +/// _ZGV_[()] +/// +/// This methods support demangling rules for the following : +/// +/// * AArch64: https://developer.arm.com/docs/101129/latest +/// +/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and +/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt +/// +/// +/// +/// \param MangledName -> input string in the format +/// _ZGV_[()]. +Optional tryDemangleForVFABI(StringRef MangledName); + +/// Retrieve the `VFParamKind` from a string token. +VFParamKind getVFParamKindFromString(const StringRef Token); +} // end namespace VFABI + template class ArrayRef; class DemandedBits; class GetElementPtrInst; template class InterleaveGroup; class Loop; class ScalarEvolution; +class TargetLibraryInfo; class TargetTransformInfo; class Type; class Value; @@ -270,13 +381,12 @@ APInt possiblyDemandedEltsInMask(Value *Mask); /// the interleaved store group doesn't allow gaps. template class InterleaveGroup { public: - InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align) - : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {} - - InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align) - : Align(Align), InsertPos(Instr) { - assert(Align && "The alignment should be non-zero"); + InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment) + : Factor(Factor), Reverse(Reverse), Alignment(Alignment), + InsertPos(nullptr) {} + InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment) + : Alignment(Alignment), InsertPos(Instr) { Factor = std::abs(Stride); assert(Factor > 1 && "Invalid interleave factor"); @@ -286,7 +396,7 @@ public: bool isReverse() const { return Reverse; } uint32_t getFactor() const { return Factor; } - uint32_t getAlignment() const { return Align; } + uint32_t getAlignment() const { return Alignment.value(); } uint32_t getNumMembers() const { return Members.size(); } /// Try to insert a new member \p Instr with index \p Index and @@ -294,9 +404,7 @@ public: /// negative if it is the new leader. /// /// \returns false if the instruction doesn't belong to the group. - bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) { - assert(NewAlign && "The new member's alignment should be non-zero"); - + bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) { // Make sure the key fits in an int32_t. Optional MaybeKey = checkedAdd(Index, SmallestKey); if (!MaybeKey) @@ -328,7 +436,7 @@ public: } // It's always safe to select the minimum alignment. - Align = std::min(Align, NewAlign); + Alignment = std::min(Alignment, NewAlign); Members[Key] = Instr; return true; } @@ -387,7 +495,7 @@ public: private: uint32_t Factor; // Interleave Factor. bool Reverse; - uint32_t Align; + Align Alignment; DenseMap Members; int32_t SmallestKey = 0; int32_t LargestKey = 0; @@ -504,8 +612,8 @@ private: struct StrideDescriptor { StrideDescriptor() = default; StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, - unsigned Align) - : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} + Align Alignment) + : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {} // The access's stride. It is negative for a reverse access. int64_t Stride = 0; @@ -517,7 +625,7 @@ private: uint64_t Size = 0; // The alignment of this access. - unsigned Align = 0; + Align Alignment; }; /// A type for holding instructions and their stride descriptors. @@ -528,11 +636,11 @@ private: /// /// \returns the newly created interleave group. InterleaveGroup * - createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { + createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) { assert(!InterleaveGroupMap.count(Instr) && "Already in an interleaved access group"); InterleaveGroupMap[Instr] = - new InterleaveGroup(Instr, Stride, Align); + new InterleaveGroup(Instr, Stride, Alignment); InterleaveGroups.insert(InterleaveGroupMap[Instr]); return InterleaveGroupMap[Instr]; } diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def index b0f78d0fd61f..34a7410f7474 100644 --- a/include/llvm/BinaryFormat/Dwarf.def +++ b/include/llvm/BinaryFormat/Dwarf.def @@ -17,7 +17,7 @@ defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \ defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \ defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \ - defined HANDLE_DW_RLE || \ + defined HANDLE_DW_RLE || defined HANDLE_DW_LLE || \ (defined HANDLE_DW_CFA && defined HANDLE_DW_CFA_PRED) || \ defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT || \ defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX || \ @@ -26,7 +26,17 @@ #endif #ifndef HANDLE_DW_TAG -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) +#endif + +// Note that DW_KIND is not a DWARF concept, but rather a way for us to +// generate a list of tags that belong together. +#ifndef DW_KIND_NONE +#define DW_KIND_NONE 0 +#endif + +#ifndef DW_KIND_TYPE +#define DW_KIND_TYPE 1 #endif #ifndef HANDLE_DW_AT @@ -81,6 +91,10 @@ #define HANDLE_DW_RLE(ID, NAME) #endif +#ifndef HANDLE_DW_LLE +#define HANDLE_DW_LLE(ID, NAME) +#endif + #ifndef HANDLE_DW_CFA #define HANDLE_DW_CFA(ID, NAME) #endif @@ -109,94 +123,94 @@ #define HANDLE_DW_END(ID, NAME) #endif -HANDLE_DW_TAG(0x0000, null, 2, DWARF) -HANDLE_DW_TAG(0x0001, array_type, 2, DWARF) -HANDLE_DW_TAG(0x0002, class_type, 2, DWARF) -HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF) -HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF) -HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF) -HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF) -HANDLE_DW_TAG(0x000a, label, 2, DWARF) -HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF) -HANDLE_DW_TAG(0x000d, member, 2, DWARF) -HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF) -HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF) -HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF) -HANDLE_DW_TAG(0x0012, string_type, 2, DWARF) -HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF) -HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF) -HANDLE_DW_TAG(0x0016, typedef, 2, DWARF) -HANDLE_DW_TAG(0x0017, union_type, 2, DWARF) -HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF) -HANDLE_DW_TAG(0x0019, variant, 2, DWARF) -HANDLE_DW_TAG(0x001a, common_block, 2, DWARF) -HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF) -HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF) -HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF) -HANDLE_DW_TAG(0x001e, module, 2, DWARF) -HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF) -HANDLE_DW_TAG(0x0020, set_type, 2, DWARF) -HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF) -HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF) -HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF) -HANDLE_DW_TAG(0x0024, base_type, 2, DWARF) -HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF) -HANDLE_DW_TAG(0x0026, const_type, 2, DWARF) -HANDLE_DW_TAG(0x0027, constant, 2, DWARF) -HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF) -HANDLE_DW_TAG(0x0029, file_type, 2, DWARF) -HANDLE_DW_TAG(0x002a, friend, 2, DWARF) -HANDLE_DW_TAG(0x002b, namelist, 2, DWARF) -HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF) -HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF) -HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF) -HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF) -HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF) -HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF) -HANDLE_DW_TAG(0x0032, try_block, 2, DWARF) -HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF) -HANDLE_DW_TAG(0x0034, variable, 2, DWARF) -HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF) +HANDLE_DW_TAG(0x0000, null, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0001, array_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0002, class_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x000a, label, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x000d, member, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0012, string_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0016, typedef, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0017, union_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0019, variant, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001a, common_block, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001e, module, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0020, set_type, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0024, base_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0026, const_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0027, constant, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0029, file_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x002a, friend, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x002b, namelist, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0032, try_block, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0034, variable, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF, DW_KIND_TYPE) // New in DWARF v3: -HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF) -HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF) -HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF) -HANDLE_DW_TAG(0x0039, namespace, 3, DWARF) -HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF) -HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF) -HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF) -HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF) -HANDLE_DW_TAG(0x003f, condition, 3, DWARF) -HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF) +HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0039, namespace, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x003f, condition, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF, DW_KIND_TYPE) // New in DWARF v4: -HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF) -HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF) -HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF) +HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF, DW_KIND_NONE) // New in DWARF v5: -HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF) -HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF) -HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF) -HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF) -HANDLE_DW_TAG(0x0048, call_site, 5, DWARF) -HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF) -HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF) -HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF) +HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0048, call_site, 5, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF, DW_KIND_TYPE) // Vendor extensions: -HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS) -HANDLE_DW_TAG(0x4101, format_label, 0, GNU) -HANDLE_DW_TAG(0x4102, function_template, 0, GNU) -HANDLE_DW_TAG(0x4103, class_template, 0, GNU) -HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU) -HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU) -HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU) -HANDLE_DW_TAG(0x4109, GNU_call_site, 0, GNU) -HANDLE_DW_TAG(0x410a, GNU_call_site_parameter, 0, GNU) -HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE) -HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND) -HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND) -HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND) -HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND) -HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND) +HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS, DW_KIND_NONE) +HANDLE_DW_TAG(0x4101, format_label, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4102, function_template, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4103, class_template, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4109, GNU_call_site, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x410a, GNU_call_site_parameter, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE, DW_KIND_NONE) +HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND, DW_KIND_NONE) +HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND, DW_KIND_TYPE) +HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND, DW_KIND_TYPE) +HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND, DW_KIND_TYPE) +HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND, DW_KIND_TYPE) // Attributes. HANDLE_DW_AT(0x01, sibling, 2, DWARF) @@ -815,6 +829,17 @@ HANDLE_DW_RLE(0x05, base_address) HANDLE_DW_RLE(0x06, start_end) HANDLE_DW_RLE(0x07, start_length) +// DWARF v5 Loc List Entry encoding values. +HANDLE_DW_LLE(0x00, end_of_list) +HANDLE_DW_LLE(0x01, base_addressx) +HANDLE_DW_LLE(0x02, startx_endx) +HANDLE_DW_LLE(0x03, startx_length) +HANDLE_DW_LLE(0x04, offset_pair) +HANDLE_DW_LLE(0x05, default_location) +HANDLE_DW_LLE(0x06, base_address) +HANDLE_DW_LLE(0x07, start_end) +HANDLE_DW_LLE(0x08, start_length) + // Call frame instruction encodings. HANDLE_DW_CFA(0x00, nop) HANDLE_DW_CFA(0x40, advance_loc) @@ -929,6 +954,7 @@ HANDLE_DW_IDX(0x05, type_hash) #undef HANDLE_DW_LNCT #undef HANDLE_DW_MACRO #undef HANDLE_DW_RLE +#undef HANDLE_DW_LLE #undef HANDLE_DW_CFA #undef HANDLE_DW_CFA_PRED #undef HANDLE_DW_APPLE_PROPERTY diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h index 76d9c365c0a8..1c6aee48661c 100644 --- a/include/llvm/BinaryFormat/Dwarf.h +++ b/include/llvm/BinaryFormat/Dwarf.h @@ -46,6 +46,11 @@ enum LLVMConstants : uint32_t { DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results. DW_MACINFO_invalid = ~0U, // Macinfo type for invalid results. + // Special values for an initial length field. + DW_LENGTH_lo_reserved = 0xfffffff0, // Lower bound of the reserved range. + DW_LENGTH_DWARF64 = 0xffffffff, // Indicator of 64-bit DWARF format. + DW_LENGTH_hi_reserved = 0xffffffff, // Upper bound of the reserved range. + // Other constants. DWARF_VERSION = 4, // Default dwarf version we output. DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes. @@ -75,7 +80,7 @@ const uint64_t DW64_CIE_ID = UINT64_MAX; const uint32_t DW_INVALID_OFFSET = UINT32_MAX; enum Tag : uint16_t { -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID, +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) DW_TAG_##NAME = ID, #include "llvm/BinaryFormat/Dwarf.def" DW_TAG_lo_user = 0x4080, DW_TAG_hi_user = 0xffff, @@ -84,29 +89,12 @@ enum Tag : uint16_t { inline bool isType(Tag T) { switch (T) { - case DW_TAG_array_type: - case DW_TAG_class_type: - case DW_TAG_interface_type: - case DW_TAG_enumeration_type: - case DW_TAG_pointer_type: - case DW_TAG_reference_type: - case DW_TAG_rvalue_reference_type: - case DW_TAG_string_type: - case DW_TAG_structure_type: - case DW_TAG_subroutine_type: - case DW_TAG_union_type: - case DW_TAG_ptr_to_member_type: - case DW_TAG_set_type: - case DW_TAG_subrange_type: - case DW_TAG_base_type: - case DW_TAG_const_type: - case DW_TAG_file_type: - case DW_TAG_packed_type: - case DW_TAG_volatile_type: - case DW_TAG_typedef: - return true; default: return false; +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ + case DW_TAG_##NAME: \ + return (KIND == DW_KIND_TYPE); +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -129,9 +117,10 @@ enum LocationAtom { #include "llvm/BinaryFormat/Dwarf.def" DW_OP_lo_user = 0xe0, DW_OP_hi_user = 0xff, - DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata. - DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata. - DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata. + DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata. + DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata. + DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata. + DW_OP_LLVM_entry_value = 0x1003, ///< Only used in LLVM metadata. }; enum TypeKind : uint8_t { @@ -192,6 +181,59 @@ enum SourceLanguage { DW_LANG_hi_user = 0xffff }; +inline bool isCPlusPlus(SourceLanguage S) { + // Deliberately enumerate all the language options so we get a warning when + // new language options are added (-Wswitch) that'll hopefully help keep this + // switch up-to-date when new C++ versions are added. + switch (S) { + case DW_LANG_C_plus_plus: + case DW_LANG_C_plus_plus_03: + case DW_LANG_C_plus_plus_11: + case DW_LANG_C_plus_plus_14: + return true; + case DW_LANG_C89: + case DW_LANG_C: + case DW_LANG_Ada83: + case DW_LANG_Cobol74: + case DW_LANG_Cobol85: + case DW_LANG_Fortran77: + case DW_LANG_Fortran90: + case DW_LANG_Pascal83: + case DW_LANG_Modula2: + case DW_LANG_Java: + case DW_LANG_C99: + case DW_LANG_Ada95: + case DW_LANG_Fortran95: + case DW_LANG_PLI: + case DW_LANG_ObjC: + case DW_LANG_ObjC_plus_plus: + case DW_LANG_UPC: + case DW_LANG_D: + case DW_LANG_Python: + case DW_LANG_OpenCL: + case DW_LANG_Go: + case DW_LANG_Modula3: + case DW_LANG_Haskell: + case DW_LANG_OCaml: + case DW_LANG_Rust: + case DW_LANG_C11: + case DW_LANG_Swift: + case DW_LANG_Julia: + case DW_LANG_Dylan: + case DW_LANG_Fortran03: + case DW_LANG_Fortran08: + case DW_LANG_RenderScript: + case DW_LANG_BLISS: + case DW_LANG_Mips_Assembler: + case DW_LANG_GOOGLE_RenderScript: + case DW_LANG_BORLAND_Delphi: + case DW_LANG_lo_user: + case DW_LANG_hi_user: + return false; + } + llvm_unreachable("Invalid source language"); +} + enum CaseSensitivity { // Identifier case codes DW_ID_case_sensitive = 0x00, @@ -267,11 +309,17 @@ enum MacroEntryType { }; /// DWARF v5 range list entry encoding values. -enum RangeListEntries { +enum RnglistEntries { #define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID, #include "llvm/BinaryFormat/Dwarf.def" }; +/// DWARF v5 loc list entry encoding values. +enum LoclistEntries { +#define HANDLE_DW_LLE(ID, NAME) DW_LLE_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" +}; + /// Call frame instruction encodings. enum CallFrameInfo { #define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID, @@ -307,19 +355,6 @@ enum Constants { DW_EH_PE_indirect = 0x80 }; -/// Constants for location lists in DWARF v5. -enum LocationListEntry : unsigned char { - DW_LLE_end_of_list = 0x00, - DW_LLE_base_addressx = 0x01, - DW_LLE_startx_endx = 0x02, - DW_LLE_startx_length = 0x03, - DW_LLE_offset_pair = 0x04, - DW_LLE_default_location = 0x05, - DW_LLE_base_address = 0x06, - DW_LLE_start_end = 0x07, - DW_LLE_start_length = 0x08 -}; - /// Constants for the DW_APPLE_PROPERTY_attributes attribute. /// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! enum ApplePropertyAttributes { @@ -434,6 +469,7 @@ StringRef LNStandardString(unsigned Standard); StringRef LNExtendedString(unsigned Encoding); StringRef MacinfoString(unsigned Encoding); StringRef RangeListEncodingString(unsigned Encoding); +StringRef LocListEncodingString(unsigned Encoding); StringRef CallFrameString(unsigned Encoding, Triple::ArchType Arch); StringRef ApplePropertyString(unsigned); StringRef UnitTypeString(unsigned); @@ -525,6 +561,17 @@ struct FormParams { explicit operator bool() const { return Version && AddrSize; } }; +/// Get the byte size of the unit length field depending on the DWARF format. +inline uint8_t getUnitLengthFieldByteSize(DwarfFormat Format) { + switch (Format) { + case DwarfFormat::DWARF32: + return 4; + case DwarfFormat::DWARF64: + return 12; + } + llvm_unreachable("Invalid Format value"); +} + /// Get the fixed byte size for a given form. /// /// If the form has a fixed byte size, then an Optional with a value will be diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h index 2bd711137845..46edfb6260be 100644 --- a/include/llvm/BinaryFormat/ELF.h +++ b/include/llvm/BinaryFormat/ELF.h @@ -1356,6 +1356,72 @@ enum : unsigned { NT_GNU_BUILD_ATTRIBUTE_FUNC = 0x101, }; +// Core note types +enum : unsigned { + NT_PRSTATUS = 1, + NT_FPREGSET = 2, + NT_PRPSINFO = 3, + NT_TASKSTRUCT = 4, + NT_AUXV = 6, + NT_PSTATUS = 10, + NT_FPREGS = 12, + NT_PSINFO = 13, + NT_LWPSTATUS = 16, + NT_LWPSINFO = 17, + NT_WIN32PSTATUS = 18, + + NT_PPC_VMX = 0x100, + NT_PPC_VSX = 0x102, + NT_PPC_TAR = 0x103, + NT_PPC_PPR = 0x104, + NT_PPC_DSCR = 0x105, + NT_PPC_EBB = 0x106, + NT_PPC_PMU = 0x107, + NT_PPC_TM_CGPR = 0x108, + NT_PPC_TM_CFPR = 0x109, + NT_PPC_TM_CVMX = 0x10a, + NT_PPC_TM_CVSX = 0x10b, + NT_PPC_TM_SPR = 0x10c, + NT_PPC_TM_CTAR = 0x10d, + NT_PPC_TM_CPPR = 0x10e, + NT_PPC_TM_CDSCR = 0x10f, + + NT_386_TLS = 0x200, + NT_386_IOPERM = 0x201, + NT_X86_XSTATE = 0x202, + + NT_S390_HIGH_GPRS = 0x300, + NT_S390_TIMER = 0x301, + NT_S390_TODCMP = 0x302, + NT_S390_TODPREG = 0x303, + NT_S390_CTRS = 0x304, + NT_S390_PREFIX = 0x305, + NT_S390_LAST_BREAK = 0x306, + NT_S390_SYSTEM_CALL = 0x307, + NT_S390_TDB = 0x308, + NT_S390_VXRS_LOW = 0x309, + NT_S390_VXRS_HIGH = 0x30a, + NT_S390_GS_CB = 0x30b, + NT_S390_GS_BC = 0x30c, + + NT_ARM_VFP = 0x400, + NT_ARM_TLS = 0x401, + NT_ARM_HW_BREAK = 0x402, + NT_ARM_HW_WATCH = 0x403, + NT_ARM_SVE = 0x405, + NT_ARM_PAC_MASK = 0x406, + + NT_FILE = 0x46494c45, + NT_PRXFPREG = 0x46e62b7f, + NT_SIGINFO = 0x53494749, +}; + +// LLVM-specific notes. +enum { + NT_LLVM_HWASAN_GLOBALS = 3, +}; + +// GNU note types enum { NT_GNU_ABI_TAG = 1, NT_GNU_HWCAP = 2, diff --git a/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def index 4afcd7d1f093..c8364133e31f 100644 --- a/include/llvm/BinaryFormat/ELFRelocs/AArch64.def +++ b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def @@ -124,8 +124,11 @@ ELF_RELOC(R_AARCH64_COPY, 0x400) ELF_RELOC(R_AARCH64_GLOB_DAT, 0x401) ELF_RELOC(R_AARCH64_JUMP_SLOT, 0x402) ELF_RELOC(R_AARCH64_RELATIVE, 0x403) -ELF_RELOC(R_AARCH64_TLS_DTPREL64, 0x404) -ELF_RELOC(R_AARCH64_TLS_DTPMOD64, 0x405) +// 0x404 and 0x405 are now R_AARCH64_TLS_IMPDEF1 and R_AARCH64_TLS_IMPDEF2 +// We follow GNU and define TLS_IMPDEF1 as TLS_DTPMOD64 and TLS_IMPDEF2 as +// TLS_DTPREL64 +ELF_RELOC(R_AARCH64_TLS_DTPMOD64, 0x404) +ELF_RELOC(R_AARCH64_TLS_DTPREL64, 0x405) ELF_RELOC(R_AARCH64_TLS_TPREL64, 0x406) ELF_RELOC(R_AARCH64_TLSDESC, 0x407) ELF_RELOC(R_AARCH64_IRELATIVE, 0x408) diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h index a01393a3b303..fb50e549cb9d 100644 --- a/include/llvm/BinaryFormat/MachO.h +++ b/include/llvm/BinaryFormat/MachO.h @@ -581,6 +581,11 @@ struct section_64 { uint32_t reserved3; }; +inline bool isVirtualSection(uint8_t type) { + return (type == MachO::S_ZEROFILL || type == MachO::S_GB_ZEROFILL || + type == MachO::S_THREAD_LOCAL_ZEROFILL); +} + struct fvmlib { uint32_t name; uint32_t minor_version; diff --git a/include/llvm/BinaryFormat/Magic.h b/include/llvm/BinaryFormat/Magic.h index cd9833ec4d22..64c687262f4a 100644 --- a/include/llvm/BinaryFormat/Magic.h +++ b/include/llvm/BinaryFormat/Magic.h @@ -49,6 +49,7 @@ struct file_magic { xcoff_object_64, ///< 64-bit XCOFF object file wasm_object, ///< WebAssembly Object file pdb, ///< Windows PDB debug info file + tapi_file, ///< Text-based Dynamic Library Stub file }; bool is_object() const { return V != unknown; } diff --git a/include/llvm/BinaryFormat/Minidump.h b/include/llvm/BinaryFormat/Minidump.h index 65c17d1eb00c..89cd779951cf 100644 --- a/include/llvm/BinaryFormat/Minidump.h +++ b/include/llvm/BinaryFormat/Minidump.h @@ -18,12 +18,15 @@ #ifndef LLVM_BINARYFORMAT_MINIDUMP_H #define LLVM_BINARYFORMAT_MINIDUMP_H +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Endian.h" namespace llvm { namespace minidump { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + /// The minidump header is the first part of a minidump file. It identifies the /// file as a minidump file, and gives the location of the stream directory. struct Header { @@ -67,6 +70,50 @@ struct MemoryDescriptor { }; static_assert(sizeof(MemoryDescriptor) == 16, ""); +struct MemoryInfoListHeader { + support::ulittle32_t SizeOfHeader; + support::ulittle32_t SizeOfEntry; + support::ulittle64_t NumberOfEntries; + + MemoryInfoListHeader() = default; + MemoryInfoListHeader(uint32_t SizeOfHeader, uint32_t SizeOfEntry, + uint64_t NumberOfEntries) + : SizeOfHeader(SizeOfHeader), SizeOfEntry(SizeOfEntry), + NumberOfEntries(NumberOfEntries) {} +}; +static_assert(sizeof(MemoryInfoListHeader) == 16, ""); + +enum class MemoryProtection : uint32_t { +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), +}; + +enum class MemoryState : uint32_t { +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), +}; + +enum class MemoryType : uint32_t { +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), +}; + +struct MemoryInfo { + support::ulittle64_t BaseAddress; + support::ulittle64_t AllocationBase; + support::little_t AllocationProtect; + support::ulittle32_t Reserved0; + support::ulittle64_t RegionSize; + support::little_t State; + support::little_t Protect; + support::little_t Type; + support::ulittle32_t Reserved1; +}; +static_assert(sizeof(MemoryInfo) == 48, ""); + /// Specifies the location and type of a single stream in the minidump file. The /// minidump stream directory is an array of entries of this type, with its size /// given by Header.NumberOfStreams. @@ -180,6 +227,27 @@ struct Thread { }; static_assert(sizeof(Thread) == 48, ""); +struct Exception { + static constexpr size_t MaxParameters = 15; + + support::ulittle32_t ExceptionCode; + support::ulittle32_t ExceptionFlags; + support::ulittle64_t ExceptionRecord; + support::ulittle64_t ExceptionAddress; + support::ulittle32_t NumberParameters; + support::ulittle32_t UnusedAlignment; + support::ulittle64_t ExceptionInformation[MaxParameters]; +}; +static_assert(sizeof(Exception) == 152, ""); + +struct ExceptionStream { + support::ulittle32_t ThreadId; + support::ulittle32_t UnusedAlignment; + Exception ExceptionRecord; + LocationDescriptor ThreadContext; +}; +static_assert(sizeof(ExceptionStream) == 168, ""); + } // namespace minidump template <> struct DenseMapInfo { diff --git a/include/llvm/BinaryFormat/MinidumpConstants.def b/include/llvm/BinaryFormat/MinidumpConstants.def index d4f13dd99217..aeef399af7a4 100644 --- a/include/llvm/BinaryFormat/MinidumpConstants.def +++ b/include/llvm/BinaryFormat/MinidumpConstants.def @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#if !(defined HANDLE_MDMP_STREAM_TYPE || defined HANDLE_MDMP_ARCH || \ - defined HANDLE_MDMP_PLATFORM) +#if !(defined(HANDLE_MDMP_STREAM_TYPE) || defined(HANDLE_MDMP_ARCH) || \ + defined(HANDLE_MDMP_PLATFORM) || defined(HANDLE_MDMP_PROTECT) || \ + defined(HANDLE_MDMP_MEMSTATE) || defined(HANDLE_MDMP_MEMTYPE)) #error "Missing HANDLE_MDMP definition" #endif @@ -23,6 +24,18 @@ #define HANDLE_MDMP_PLATFORM(CODE, NAME) #endif +#ifndef HANDLE_MDMP_PROTECT +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) +#endif + +#ifndef HANDLE_MDMP_MEMSTATE +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) +#endif + +#ifndef HANDLE_MDMP_MEMTYPE +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) +#endif + HANDLE_MDMP_STREAM_TYPE(0x0003, ThreadList) HANDLE_MDMP_STREAM_TYPE(0x0004, ModuleList) HANDLE_MDMP_STREAM_TYPE(0x0005, MemoryList) @@ -102,6 +115,30 @@ HANDLE_MDMP_PLATFORM(0x8203, Android) // Android HANDLE_MDMP_PLATFORM(0x8204, PS3) // PS3 HANDLE_MDMP_PLATFORM(0x8205, NaCl) // Native Client (NaCl) +HANDLE_MDMP_PROTECT(0x01, NoAccess, PAGE_NO_ACCESS) +HANDLE_MDMP_PROTECT(0x02, ReadOnly, PAGE_READ_ONLY) +HANDLE_MDMP_PROTECT(0x04, ReadWrite, PAGE_READ_WRITE) +HANDLE_MDMP_PROTECT(0x08, WriteCopy, PAGE_WRITE_COPY) +HANDLE_MDMP_PROTECT(0x10, Execute, PAGE_EXECUTE) +HANDLE_MDMP_PROTECT(0x20, ExecuteRead, PAGE_EXECUTE_READ) +HANDLE_MDMP_PROTECT(0x40, ExecuteReadWrite, PAGE_EXECUTE_READ_WRITE) +HANDLE_MDMP_PROTECT(0x80, ExeciteWriteCopy, PAGE_EXECUTE_WRITE_COPY) +HANDLE_MDMP_PROTECT(0x100, Guard, PAGE_GUARD) +HANDLE_MDMP_PROTECT(0x200, NoCache, PAGE_NOCACHE) +HANDLE_MDMP_PROTECT(0x400, WriteCombine, PAGE_WRITECOMBINE) +HANDLE_MDMP_PROTECT(0x40000000, TargetsInvalid, PAGE_TARGETS_INVALID) + +HANDLE_MDMP_MEMSTATE(0x01000, Commit, MEM_COMMIT) +HANDLE_MDMP_MEMSTATE(0x02000, Reserve, MEM_RESERVE) +HANDLE_MDMP_MEMSTATE(0x10000, Free, MEM_FREE) + +HANDLE_MDMP_MEMTYPE(0x0020000, Private, MEM_PRIVATE) +HANDLE_MDMP_MEMTYPE(0x0040000, Mapped, MEM_MAPPED) +HANDLE_MDMP_MEMTYPE(0x1000000, Image, MEM_IMAGE) + #undef HANDLE_MDMP_STREAM_TYPE #undef HANDLE_MDMP_ARCH #undef HANDLE_MDMP_PLATFORM +#undef HANDLE_MDMP_PROTECT +#undef HANDLE_MDMP_MEMSTATE +#undef HANDLE_MDMP_MEMTYPE diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h index 0f22bfe610c6..f550d880f68a 100644 --- a/include/llvm/BinaryFormat/Wasm.h +++ b/include/llvm/BinaryFormat/Wasm.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" namespace llvm { namespace wasm { @@ -251,9 +252,21 @@ enum : unsigned { WASM_OPCODE_F32_CONST = 0x43, WASM_OPCODE_F64_CONST = 0x44, WASM_OPCODE_I32_ADD = 0x6a, +}; + +// Opcodes used in synthetic functions. +enum : unsigned { + WASM_OPCODE_IF = 0x04, + WASM_OPCODE_ELSE = 0x05, + WASM_OPCODE_DROP = 0x1a, WASM_OPCODE_MISC_PREFIX = 0xfc, WASM_OPCODE_MEMORY_INIT = 0x08, WASM_OPCODE_DATA_DROP = 0x09, + WASM_OPCODE_ATOMICS_PREFIX = 0xfe, + WASM_OPCODE_ATOMIC_NOTIFY = 0x00, + WASM_OPCODE_I32_ATOMIC_WAIT = 0x01, + WASM_OPCODE_I32_ATOMIC_STORE = 0x17, + WASM_OPCODE_I32_RMW_CMPXCHG = 0x48, }; enum : unsigned { @@ -318,6 +331,7 @@ const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4; const unsigned WASM_SYMBOL_UNDEFINED = 0x10; const unsigned WASM_SYMBOL_EXPORTED = 0x20; const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40; +const unsigned WASM_SYMBOL_NO_STRIP = 0x80; #define WASM_RELOC(name, value) name = value, diff --git a/include/llvm/BinaryFormat/XCOFF.h b/include/llvm/BinaryFormat/XCOFF.h index 7774ab3ed24a..20a0f446272f 100644 --- a/include/llvm/BinaryFormat/XCOFF.h +++ b/include/llvm/BinaryFormat/XCOFF.h @@ -19,12 +19,13 @@ namespace llvm { namespace XCOFF { // Constants used in the XCOFF definition. -enum { SectionNameSize = 8, SymbolNameSize = 8 }; +enum { FileNamePadSize = 6, NameSize = 8, SymbolTableEntrySize = 18 }; + enum ReservedSectionNum { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 }; // x_smclas field of x_csect from system header: /usr/include/syms.h /// Storage Mapping Class definitions. -enum StorageMappingClass { +enum StorageMappingClass : uint8_t { // READ ONLY CLASSES XMC_PR = 0, ///< Program Code XMC_RO = 1, ///< Read Only Constant @@ -139,6 +140,117 @@ enum StorageClass : uint8_t { C_TCSYM = 134 // Reserved }; +enum SymbolType { + XTY_ER = 0, ///< External reference. + XTY_SD = 1, ///< Csect definition for initialized storage. + XTY_LD = 2, ///< Label definition. + ///< Defines an entry point to an initialized csect. + XTY_CM = 3 ///< Common csect definition. For uninitialized storage. +}; + +// Relocation types, defined in `/usr/include/reloc.h`. +enum RelocationType : uint8_t { + R_POS = 0x00, ///< Positive relocation. Provides the address of the referenced + ///< symbol. + R_RL = 0x0c, ///< Positive indirect load relocation. Modifiable instruction. + R_RLA = 0x0d, ///< Positive load address relocation. Modifiable instruction. + + R_NEG = 0x01, ///< Negative relocation. Provides the negative of the address + ///< of the referenced symbol. + R_REL = 0x02, ///< Relative to self relocation. Provides a displacement value + ///< between the address of the referenced symbol and the + ///< address being relocated. + + R_TOC = 0x03, ///< Relative to the TOC relocation. Provides a displacement + ///< that is the difference between the address of the + ///< referenced symbol and the TOC anchor csect. + R_TRL = 0x12, ///< TOC relative indirect load relocation. Similar to R_TOC, + ///< but not modifiable instruction. + + R_TRLA = + 0x13, ///< Relative to the TOC or to the thread-local storage base + ///< relocation. Compilers are not permitted to generate this + ///< relocation type. It is the result of a reversible + ///< transformation by the linker of an R_TOC relation that turned a + ///< load instruction into an add-immediate instruction. + + R_GL = 0x05, ///< Global linkage-external TOC address relocation. Provides the + ///< address of the external TOC associated with a defined + ///< external symbol. + R_TCL = 0x06, ///< Local object TOC address relocation. Provides the address + ///< of the local TOC entry of a defined external symbol. + + R_REF = 0x0f, ///< A non-relocating relocation. Used to prevent the binder + ///< from garbage collecting a csect (such as code used for + ///< dynamic initialization of non-local statics) for which + ///< another csect has an implicit dependency. + + R_BA = 0x08, ///< Branch absolute relocation. Provides the address of the + ///< referenced symbol. References a non-modifiable instruction. + R_BR = 0x0a, ///< Branch relative to self relocation. Provides the + ///< displacement that is the difference between the address of + ///< the referenced symbol and the address of the referenced + ///< branch instruction. References a non-modifiable instruction. + R_RBA = 0x18, ///< Branch absolute relocation. Similar to R_BA but + ///< references a modifiable instruction. + R_RBR = 0x1a, ///< Branch relative to self relocation. Similar to the R_BR + ///< relocation type, but references a modifiable instruction. + + R_TLS = 0x20, ///< General-dynamic reference to TLS symbol. + R_TLS_IE = 0x21, ///< Initial-exec reference to TLS symbol. + R_TLS_LD = 0x22, ///< Local-dynamic reference to TLS symbol. + R_TLS_LE = 0x23, ///< Local-exec reference to TLS symbol. + R_TLSM = 0x24, ///< Module reference to TLS. Provides a handle for the module + ///< containing the referenced symbol. + R_TLSML = 0x25, ///< Module reference to the local TLS storage. + + R_TOCU = 0x30, ///< Relative to TOC upper. Specifies the high-order 16 bits of + ///< a large code model TOC-relative relocation. + R_TOCL = 0x31 ///< Relative to TOC lower. Specifies the low-order 16 bits of a + ///< large code model TOC-relative relocation. +}; + +struct FileHeader32 { + uint16_t Magic; + uint16_t NumberOfSections; + int32_t TimeStamp; + uint32_t SymbolTableFileOffset; + int32_t NumberOfSymbolTableEntries; + uint16_t AuxiliaryHeaderSize; + uint16_t Flags; +}; + +struct SectionHeader32 { + char Name[XCOFF::NameSize]; + uint32_t PhysicalAddress; + uint32_t VirtualAddress; + uint32_t Size; + uint32_t FileOffsetToData; + uint32_t FileOffsetToRelocations; + uint32_t FileOffsetToLineNumbers; + uint16_t NumberOfRelocations; + uint16_t NumberOfLineNumbers; + int32_t Flags; +}; + +enum CFileStringType : uint8_t { + XFT_FN = 0, ///< Specifies the source-file name. + XFT_CT = 1, ///< Specifies the compiler time stamp. + XFT_CV = 2, ///< Specifies the compiler version number. + XFT_CD = 128 ///< Specifies compiler-defined information. +}; + +enum CFileLangId : uint8_t { + TB_C = 0, ///< C language. + TB_CPLUSPLUS = 9 ///< C++ language. +}; + +enum CFileCpuId : uint8_t { + TCPU_PPC64 = 2, ///< PowerPC common architecture 64-bit mode. + TCPU_COM = 3, ///< POWER and PowerPC architecture common. + TCPU_970 = 19 ///< PPC970 - PowerPC 64-bit architecture. +}; + } // end namespace XCOFF } // end namespace llvm diff --git a/include/llvm/Bitcode/BitcodeAnalyzer.h b/include/llvm/Bitcode/BitcodeAnalyzer.h index cfdebd6fe6cb..5fb8bb26f255 100644 --- a/include/llvm/Bitcode/BitcodeAnalyzer.h +++ b/include/llvm/Bitcode/BitcodeAnalyzer.h @@ -30,6 +30,7 @@ enum CurStreamTypeType { LLVMIRBitstream, ClangSerializedASTBitstream, ClangSerializedDiagnosticsBitstream, + LLVMBitstreamRemarks }; struct BCDumpOptions { diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index decd4dd3a965..1a397068caf0 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -391,7 +391,7 @@ enum CastOpcodes { /// have no fixed relation to the LLVM IR enum values. Changing these will /// break compatibility with old files. enum UnaryOpcodes { - UNOP_NEG = 0 + UNOP_FNEG = 0 }; /// BinaryOpcodes - These are values used in the bitcode files to encode which diff --git a/include/llvm/Bitstream/BitCodes.h b/include/llvm/Bitstream/BitCodes.h index adf54ba96396..41a3de3b20ef 100644 --- a/include/llvm/Bitstream/BitCodes.h +++ b/include/llvm/Bitstream/BitCodes.h @@ -168,6 +168,11 @@ class BitCodeAbbrev { SmallVector OperandList; public: + BitCodeAbbrev() = default; + + explicit BitCodeAbbrev(std::initializer_list OperandList) + : OperandList(OperandList) {} + unsigned getNumOperandInfos() const { return static_cast(OperandList.size()); } diff --git a/include/llvm/Bitstream/BitstreamReader.h b/include/llvm/Bitstream/BitstreamReader.h index ee82e7ec1ba2..b49a969a2d8b 100644 --- a/include/llvm/Bitstream/BitstreamReader.h +++ b/include/llvm/Bitstream/BitstreamReader.h @@ -379,6 +379,7 @@ public: using SimpleBitstreamCursor::ReadVBR; using SimpleBitstreamCursor::ReadVBR64; using SimpleBitstreamCursor::SizeInBytes; + using SimpleBitstreamCursor::skipToEnd; /// Return the number of bits used to encode an abbrev #. unsigned getAbbrevIDWidth() const { return CurCodeSize; } diff --git a/include/llvm/CodeGen/AccelTable.h b/include/llvm/CodeGen/AccelTable.h index 734531a65d50..f8f6b5448f3f 100644 --- a/include/llvm/CodeGen/AccelTable.h +++ b/include/llvm/CodeGen/AccelTable.h @@ -101,8 +101,6 @@ /// /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable /// function. -/// -/// TODO: Add DWARF v5 emission code. namespace llvm { diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index d110f8b01cb5..a4580da5aec9 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -111,6 +111,10 @@ public: /// of each call to runOnMachineFunction(). MCSymbol *CurrentFnSym = nullptr; + /// The symbol for the current function descriptor on AIX. This is created + /// at the beginning of each call to SetupMachineFunction(). + MCSymbol *CurrentFnDescSym = nullptr; + /// The symbol used to represent the start of the current function for the /// purpose of calculating its size (e.g. using the .size directive). By /// default, this is equal to CurrentFnSym. @@ -304,7 +308,7 @@ public: /// This should be called when a new MachineFunction is being processed from /// runOnMachineFunction. - void SetupMachineFunction(MachineFunction &MF); + virtual void SetupMachineFunction(MachineFunction &MF); /// This method emits the body and trailer for a function. void EmitFunctionBody(); @@ -342,12 +346,11 @@ public: /// so, emit it and return true, otherwise do nothing and return false. bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); - /// Emit an alignment directive to the specified power of two boundary. For - /// example, if you pass in 3 here, you will get an 8 byte alignment. If a + /// Emit an alignment directive to the specified power of two boundary. If a /// global value is specified, and if that global has an explicit alignment /// requested, it will override the alignment request if required for /// correctness. - void EmitAlignment(unsigned NumBits, const GlobalObject *GV = nullptr) const; + void EmitAlignment(Align Alignment, const GlobalObject *GV = nullptr) const; /// Lower the specified LLVM Constant to an MCExpr. virtual const MCExpr *lowerConstant(const Constant *CV); @@ -400,7 +403,7 @@ public: /// By default, this method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing it /// if appropriate. - virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const; + virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB); /// Targets can override this to emit stuff at the end of a basic block. virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB); @@ -415,6 +418,10 @@ public: virtual void EmitFunctionEntryLabel(); + virtual void EmitFunctionDescriptor() { + llvm_unreachable("Function descriptor is target-specific."); + } + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); /// Targets can override this to change how global constants that are part of @@ -635,6 +642,10 @@ public: /// supported by the target. void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const; + /// Return the alignment for the specified \p GV. + static Align getGVAlignment(const GlobalValue *GV, const DataLayout &DL, + Align InAlign = Align::None()); + private: /// Private state for PrintSpecial() // Assign a unique ID to this machine instruction. diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 70bf670fdf0b..2e57b4c9d332 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -190,6 +190,7 @@ private: protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} + virtual ~BasicTTIImplBase() = default; using TargetTransformInfoImplBase::DL; @@ -215,6 +216,16 @@ public: return -1; } + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const { + return false; + } + + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const { + return false; + } + bool isLegalAddImmediate(int64_t imm) { return getTLI()->isLegalAddImmediate(imm); } @@ -317,7 +328,7 @@ public: unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize) { /// Try to find the estimated number of clusters. Note that the number of - /// clusters identified in this function could be different from the actural + /// clusters identified in this function could be different from the actual /// numbers found in lowering. This function ignore switches that are /// lowered with a mix of jump table / bit test / BTree. This function was /// initially intended to be used when estimating the cost of switch in @@ -371,10 +382,6 @@ public: return N; } - unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } - - unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } - bool shouldBuildLookupTables() { const TargetLoweringBase *TLI = getTLI(); return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || @@ -508,13 +515,44 @@ public: return BaseT::getInstructionLatency(I); } + virtual Optional + getCacheSize(TargetTransformInfo::CacheLevel Level) const { + return Optional( + getST()->getCacheSize(static_cast(Level))); + } + + virtual Optional + getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { + Optional TargetResult = + getST()->getCacheAssociativity(static_cast(Level)); + + if (TargetResult) + return TargetResult; + + return BaseT::getCacheAssociativity(Level); + } + + virtual unsigned getCacheLineSize() const { + return getST()->getCacheLineSize(); + } + + virtual unsigned getPrefetchDistance() const { + return getST()->getPrefetchDistance(); + } + + virtual unsigned getMinPrefetchStride() const { + return getST()->getMinPrefetchStride(); + } + + virtual unsigned getMaxPrefetchIterationsAhead() const { + return getST()->getMaxPrefetchIterationsAhead(); + } + /// @} /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; } - unsigned getRegisterBitWidth(bool Vector) const { return 32; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract @@ -1111,9 +1149,7 @@ public: OpPropsBW); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, nullptr); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, @@ -1131,7 +1167,6 @@ public: unsigned getIntrinsicInstrCost( Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = std::numeric_limits::max()) { - unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); auto *ConcreteTTI = static_cast(this); SmallVector ISDs; @@ -1288,9 +1323,7 @@ public: /*IsUnsigned=*/false); case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Type *OpTy = StructType::create({RetTy, CondTy}); Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat @@ -1310,9 +1343,7 @@ public: } case Intrinsic::uadd_sat: case Intrinsic::usub_sat: { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Type *OpTy = StructType::create({RetTy, CondTy}); Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat @@ -1329,9 +1360,7 @@ public: case Intrinsic::smul_fix: case Intrinsic::umul_fix: { unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); - if (RetVF > 1) - ExtTy = VectorType::get(ExtTy, RetVF); + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; @@ -1395,9 +1424,7 @@ public: Type *MulTy = RetTy->getContainedType(0); Type *OverflowTy = RetTy->getContainedType(1); unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); - if (MulTy->isVectorTy()) - ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index aa339e1cc913..a30ca638ee6d 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -20,6 +20,7 @@ #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Alignment.h" namespace llvm { @@ -43,6 +44,7 @@ public: AExtUpper, // The value is in the upper bits of the location and should be // extended with undefined upper bits when retrieved. BCvt, // The value is bit-converted in the location. + Trunc, // The value is truncated in the location. VExt, // The value is vector-widened in the location. // FIXME: Not implemented yet. Code that uses AExt to mean // vector-widen should be fixed to use VExt instead. @@ -197,7 +199,7 @@ private: LLVMContext &Context; unsigned StackOffset; - unsigned MaxStackArgAlign; + Align MaxStackArgAlign; SmallVector UsedRegs; SmallVector PendingLocs; SmallVector PendingArgFlags; @@ -421,19 +423,19 @@ public: /// AllocateStack - Allocate a chunk of stack space with the specified size /// and alignment. - unsigned AllocateStack(unsigned Size, unsigned Align) { - assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2. - StackOffset = alignTo(StackOffset, Align); + unsigned AllocateStack(unsigned Size, unsigned Alignment) { + const Align CheckedAlignment(Alignment); + StackOffset = alignTo(StackOffset, CheckedAlignment); unsigned Result = StackOffset; StackOffset += Size; - MaxStackArgAlign = std::max(Align, MaxStackArgAlign); - ensureMaxAlignment(Align); + MaxStackArgAlign = std::max(CheckedAlignment, MaxStackArgAlign); + ensureMaxAlignment(CheckedAlignment); return Result; } - void ensureMaxAlignment(unsigned Align) { + void ensureMaxAlignment(Align Alignment) { if (!AnalyzingMustTailForwardedRegs) - MF.getFrameInfo().ensureMaxAlignment(Align); + MF.getFrameInfo().ensureMaxAlignment(Alignment.value()); } /// Version of AllocateStack with extra register to be shadowed. diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index cf58ee0cabea..705465b15c4c 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -28,6 +28,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/Support/Automaton.h" #include #include #include @@ -76,26 +77,26 @@ using DFAStateInput = int64_t; class DFAPacketizer { private: - using UnsignPair = std::pair; - const InstrItineraryData *InstrItins; - int CurrentState = 0; - const DFAStateInput (*DFAStateInputTable)[2]; - const unsigned *DFAStateEntryTable; - - // CachedTable is a map from to ToState. - DenseMap CachedTable; - - // Read the DFA transition table and update CachedTable. - void ReadTable(unsigned state); + Automaton A; public: - DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], - const unsigned *SET); + DFAPacketizer(const InstrItineraryData *InstrItins, Automaton a) : + InstrItins(InstrItins), A(std::move(a)) { + // Start off with resource tracking disabled. + A.enableTranscription(false); + } // Reset the current state to make all resources available. void clearResources() { - CurrentState = 0; + A.reset(); + } + + // Set whether this packetizer should track not just whether instructions + // can be packetized, but also which functional units each instruction ends up + // using after packetization. + void setTrackResources(bool Track) { + A.enableTranscription(Track); } // Return the DFAInput for an instruction class. @@ -120,6 +121,15 @@ public: // current state to reflect that change. void reserveResources(MachineInstr &MI); + // Return the resources used by the InstIdx'th instruction added to this + // packet. The resources are returned as a bitvector of functional units. + // + // Note that a bundle may be packed in multiple valid ways. This function + // returns one arbitary valid packing. + // + // Requires setTrackResources(true) to have been called. + unsigned getUsedResources(unsigned InstIdx); + const InstrItineraryData *getInstrItins() const { return InstrItins; } }; @@ -134,7 +144,7 @@ class VLIWPacketizerList { protected: MachineFunction &MF; const TargetInstrInfo *TII; - AliasAnalysis *AA; + AAResults *AA; // The VLIW Scheduler. DefaultVLIWScheduler *VLIWScheduler; @@ -146,9 +156,9 @@ protected: std::map MIToSUnit; public: - // The AliasAnalysis parameter can be nullptr. + // The AAResults parameter can be nullptr. VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA); + AAResults *AA); virtual ~VLIWPacketizerList(); diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h index 684f9e40ca5a..e8e7504a6cda 100644 --- a/include/llvm/CodeGen/DIE.h +++ b/include/llvm/CodeGen/DIE.h @@ -550,6 +550,14 @@ public: return *static_cast(Last ? Last->Next.getPointer() : nullptr); } + void takeNodes(IntrusiveBackList &Other) { + for (auto &N : Other) { + N.Next.setPointerAndInt(&N, true); + push_back(N); + } + Other.Last = nullptr; + } + class const_iterator; class iterator : public iterator_facade_base { @@ -685,6 +693,10 @@ public: return addValue(Alloc, DIEValue(Attribute, Form, std::forward(Value))); } + /// Take ownership of the nodes in \p Other, and append them to the back of + /// the list. + void takeValues(DIEValueList &Other) { List.takeNodes(Other.List); } + value_range values() { return make_range(value_iterator(List.begin()), value_iterator(List.end())); } diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index f09b59daf4dd..03d681feb7aa 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -93,9 +93,9 @@ public: SmallVector OutVals; SmallVector OutFlags; - SmallVector OutRegs; + SmallVector OutRegs; SmallVector Ins; - SmallVector InRegs; + SmallVector InRegs; CallLoweringInfo() : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index fb60191abd3a..f812a2f6c585 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -20,7 +20,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -37,6 +36,7 @@ namespace llvm { class Argument; class BasicBlock; class BranchProbabilityInfo; +class LegacyDivergenceAnalysis; class Function; class Instruction; class MachineFunction; diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h index d717121ad78e..4901a3748e4a 100644 --- a/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -45,18 +45,62 @@ class CallLowering { public: struct ArgInfo { SmallVector Regs; + // If the argument had to be split into multiple parts according to the + // target calling convention, then this contains the original vregs + // if the argument was an incoming arg. + SmallVector OrigRegs; Type *Ty; - ISD::ArgFlagsTy Flags; + SmallVector Flags; bool IsFixed; ArgInfo(ArrayRef Regs, Type *Ty, - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true) - : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags), - IsFixed(IsFixed) { + ArrayRef Flags = ArrayRef(), + bool IsFixed = true) + : Regs(Regs.begin(), Regs.end()), Ty(Ty), + Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) { + if (!Regs.empty() && Flags.empty()) + this->Flags.push_back(ISD::ArgFlagsTy()); // FIXME: We should have just one way of saying "no register". assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) && "only void types should have no register"); } + + ArgInfo() : Ty(nullptr), IsFixed(false) {} + }; + + struct CallLoweringInfo { + /// Calling convention to be used for the call. + CallingConv::ID CallConv = CallingConv::C; + + /// Destination of the call. It should be either a register, globaladdress, + /// or externalsymbol. + MachineOperand Callee = MachineOperand::CreateImm(0); + + /// Descriptor for the return type of the function. + ArgInfo OrigRet; + + /// List of descriptors of the arguments passed to the function. + SmallVector OrigArgs; + + /// Valid if the call has a swifterror inout parameter, and contains the + /// vreg that the swifterror should be copied into after the call. + Register SwiftErrorVReg = 0; + + MDNode *KnownCallees = nullptr; + + /// True if the call must be tail call optimized. + bool IsMustTailCall = false; + + /// True if the call passes all target-independent checks for tail call + /// optimization. + bool IsTailCall = false; + + /// True if the call was lowered as a tail call. This is consumed by the + /// legalizer. This allows the legalizer to lower libcalls as tail calls. + bool LoweredTailCall = false; + + /// True if the call is to a vararg function. + bool IsVarArg = false; }; /// Argument handling is mostly uniform between the four places that @@ -72,9 +116,9 @@ public: virtual ~ValueHandler() = default; - /// Returns true if the handler is dealing with formal arguments, - /// not with return values etc. - virtual bool isArgumentHandler() const { return false; } + /// Returns true if the handler is dealing with incoming arguments, + /// i.e. those that move values from some physical location to vregs. + virtual bool isIncomingArgumentHandler() const = 0; /// Materialize a VReg containing the address of the specified /// stack-based object. This is either based on a FrameIndex or @@ -112,8 +156,8 @@ public: virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const ArgInfo &Info, - CCState &State) { - return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + ISD::ArgFlagsTy Flags, CCState &State) { + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); } MachineIRBuilder &MIRBuilder; @@ -162,12 +206,42 @@ protected: /// \p Callback to move them to the assigned locations. /// /// \return True if everything has succeeded, false otherwise. - bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef Args, + bool handleAssignments(MachineIRBuilder &MIRBuilder, + SmallVectorImpl &Args, ValueHandler &Handler) const; bool handleAssignments(CCState &CCState, SmallVectorImpl &ArgLocs, - MachineIRBuilder &MIRBuilder, ArrayRef Args, + MachineIRBuilder &MIRBuilder, + SmallVectorImpl &Args, ValueHandler &Handler) const; + + /// Analyze passed or returned values from a call, supplied in \p ArgInfo, + /// incorporating info about the passed values into \p CCState. + /// + /// Used to check if arguments are suitable for tail call lowering. + bool analyzeArgInfo(CCState &CCState, SmallVectorImpl &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const; + + /// \returns True if the calling convention for a callee and its caller pass + /// results in the same way. Typically used for tail call eligibility checks. + /// + /// \p Info is the CallLoweringInfo for the call. + /// \p MF is the MachineFunction for the caller. + /// \p InArgs contains the results of the call. + /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for + /// fixed arguments. + /// \p CalleeAssignFnVarArg is similar, but for varargs. + /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for + /// fixed arguments. + /// \p CallerAssignFnVarArg is similar, but for varargs. + bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &InArgs, + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const; + public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} virtual ~CallLowering() = default; @@ -223,37 +297,10 @@ public: /// This hook must be implemented to lower the given call instruction, /// including argument and return value marshalling. /// - /// \p CallConv is the calling convention to be used for the call. - /// - /// \p Callee is the destination of the call. It should be either a register, - /// globaladdress, or externalsymbol. - /// - /// \p OrigRet is a descriptor for the return type of the function. - /// - /// \p OrigArgs is a list of descriptors of the arguments passed to the - /// function. - /// - /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout - /// parameter, and contains the vreg that the swifterror should be copied into - /// after the call. /// /// \return true if the lowering succeeded, false otherwise. - virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs, - Register SwiftErrorVReg) const { - if (!supportSwiftError()) { - assert(SwiftErrorVReg == 0 && "trying to use unsupported swifterror"); - return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs); - } - return false; - } - - /// This hook behaves as the extended lowerCall function, but for targets that - /// do not support swifterror value promotion. - virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const { + virtual bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { return false; } diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 0c50c9c5e0cf..4c04dc52547d 100644 --- a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -27,6 +27,8 @@ class MachineIRBuilder; class MachineRegisterInfo; class MachineInstr; class MachineOperand; +class GISelKnownBits; +class MachineDominatorTree; struct PreferredTuple { LLT Ty; // The result type of the extend. @@ -35,12 +37,17 @@ struct PreferredTuple { }; class CombinerHelper { +protected: MachineIRBuilder &Builder; MachineRegisterInfo &MRI; GISelChangeObserver &Observer; + GISelKnownBits *KB; + MachineDominatorTree *MDT; public: - CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B); + CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, + GISelKnownBits *KB = nullptr, + MachineDominatorTree *MDT = nullptr); /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const; @@ -56,18 +63,132 @@ public: bool matchCombineCopy(MachineInstr &MI); void applyCombineCopy(MachineInstr &MI); + /// Returns true if \p DefMI precedes \p UseMI or they are the same + /// instruction. Both must be in the same basic block. + bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI); + + /// Returns true if \p DefMI dominates \p UseMI. By definition an + /// instruction dominates itself. + /// + /// If we haven't been provided with a MachineDominatorTree during + /// construction, this function returns a conservative result that tracks just + /// a single basic block. + bool dominates(MachineInstr &DefMI, MachineInstr &UseMI); + /// If \p MI is extend that consumes the result of a load, try to combine it. /// Returns true if MI changed. bool tryCombineExtendingLoads(MachineInstr &MI); bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); - bool matchCombineBr(MachineInstr &MI); - bool tryCombineBr(MachineInstr &MI); + /// Combine \p MI into a pre-indexed or post-indexed load/store operation if + /// legal and the surrounding code makes it useful. + bool tryCombineIndexedLoadStore(MachineInstr &MI); + + bool matchElideBrByInvertingCond(MachineInstr &MI); + void applyElideBrByInvertingCond(MachineInstr &MI); + bool tryElideBrByInvertingCond(MachineInstr &MI); + + /// If \p MI is G_CONCAT_VECTORS, try to combine it. + /// Returns true if MI changed. + /// Right now, we support: + /// - concat_vector(undef, undef) => undef + /// - concat_vector(build_vector(A, B), build_vector(C, D)) => + /// build_vector(A, B, C, D) + /// + /// \pre MI.getOpcode() == G_CONCAT_VECTORS. + bool tryCombineConcatVectors(MachineInstr &MI); + /// Check if the G_CONCAT_VECTORS \p MI is undef or if it + /// can be flattened into a build_vector. + /// In the first case \p IsUndef will be true. + /// In the second case \p Ops will contain the operands needed + /// to produce the flattened build_vector. + /// + /// \pre MI.getOpcode() == G_CONCAT_VECTORS. + bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl &Ops); + /// Replace \p MI with a flattened build_vector with \p Ops or an + /// implicit_def if IsUndef is true. + void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef, + const ArrayRef Ops); + + /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS. + /// Returns true if MI changed. + /// + /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. + bool tryCombineShuffleVector(MachineInstr &MI); + /// Check if the G_SHUFFLE_VECTOR \p MI can be replaced by a + /// concat_vectors. + /// \p Ops will contain the operands needed to produce the flattened + /// concat_vectors. + /// + /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. + bool matchCombineShuffleVector(MachineInstr &MI, + SmallVectorImpl &Ops); + /// Replace \p MI with a concat_vectors with \p Ops. + void applyCombineShuffleVector(MachineInstr &MI, + const ArrayRef Ops); + + /// Optimize memcpy intrinsics et al, e.g. constant len calls. + /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline. + /// + /// For example (pre-indexed): + /// + /// $addr = G_GEP $base, $offset + /// [...] + /// $val = G_LOAD $addr + /// [...] + /// $whatever = COPY $addr + /// + /// --> + /// + /// $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre) + /// [...] + /// $whatever = COPY $addr + /// + /// or (post-indexed): + /// + /// G_STORE $val, $base + /// [...] + /// $addr = G_GEP $base, $offset + /// [...] + /// $whatever = COPY $addr + /// + /// --> + /// + /// $addr = G_INDEXED_STORE $val, $base, $offset + /// [...] + /// $whatever = COPY $addr + bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); + +private: + // Memcpy family optimization helpers. + bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src, + unsigned KnownLen, unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile); + bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src, + unsigned KnownLen, unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile); + bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val, + unsigned KnownLen, unsigned DstAlign, bool IsVolatile); + + /// Given a non-indexed load or store instruction \p MI, find an offset that + /// can be usefully and legally folded into it as a post-indexing operation. + /// + /// \returns true if a candidate is found. + bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, + Register &Offset); + + /// Given a non-indexed load or store instruction \p MI, find an offset that + /// can be usefully and legally folded into it as a pre-indexing operation. + /// + /// \returns true if a candidate is found. + bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, + Register &Offset); }; } // namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 3b09a8e2b479..ad645a46bbe6 100644 --- a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -27,9 +27,11 @@ class MachineRegisterInfo; class CombinerInfo { public: CombinerInfo(bool AllowIllegalOps, bool ShouldLegalizeIllegal, - LegalizerInfo *LInfo) + LegalizerInfo *LInfo, bool OptEnabled, bool OptSize, + bool MinSize) : IllegalOpsAllowed(AllowIllegalOps), - LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo) { + LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo), + EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(MinSize) { assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) && "Expecting legalizerInfo when illegalops not allowed"); } @@ -43,6 +45,15 @@ public: bool LegalizeIllegalOps; // TODO: Make use of this. const LegalizerInfo *LInfo; + /// Whether optimizations should be enabled. This is to distinguish between + /// uses of the combiner unconditionally and only when optimizations are + /// specifically enabled/ + bool EnableOpt; + /// Whether we're optimizing for size. + bool EnableOptSize; + /// Whether we're optimizing for minsize (-Oz). + bool EnableMinSize; + /// Attempt to combine instructions using MI as the root. /// /// Use Observer to report the creation, modification, and erasure of diff --git a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h index e817d9b4550e..df196bfbd437 100644 --- a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h @@ -54,6 +54,17 @@ public: return buildConstant(Dst, MaybeCst->getSExtValue()); break; } + case TargetOpcode::G_SEXT_INREG: { + assert(DstOps.size() == 1 && "Invalid dst ops"); + assert(SrcOps.size() == 2 && "Invalid src ops"); + const DstOp &Dst = DstOps[0]; + const SrcOp &Src0 = SrcOps[0]; + const SrcOp &Src1 = SrcOps[1]; + if (auto MaybeCst = + ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI())) + return buildConstant(Dst, MaybeCst->getSExtValue()); + break; + } } return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps); } diff --git a/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h new file mode 100644 index 000000000000..dfe5a7f3177d --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -0,0 +1,111 @@ +//===- llvm/CodeGen/GlobalISel/GISelKnownBits.h ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Provides analysis for querying information about KnownBits during GISel +/// passes. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H +#define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H + +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/KnownBits.h" + +namespace llvm { + +class TargetLowering; +class DataLayout; + +class GISelKnownBits : public GISelChangeObserver { + MachineFunction &MF; + MachineRegisterInfo &MRI; + const TargetLowering &TL; + const DataLayout &DL; + +public: + GISelKnownBits(MachineFunction &MF); + virtual ~GISelKnownBits() = default; + void setMF(MachineFunction &MF); + virtual void computeKnownBitsImpl(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth = 0); + + // KnownBitsAPI + KnownBits getKnownBits(Register R); + // Calls getKnownBits for first operand def of MI. + KnownBits getKnownBits(MachineInstr &MI); + APInt getKnownZeroes(Register R); + APInt getKnownOnes(Register R); + + /// \return true if 'V & Mask' is known to be zero in DemandedElts. We use + /// this predicate to simplify operations downstream. + /// Mask is known to be zero for bits that V cannot have. + bool maskedValueIsZero(Register Val, const APInt &Mask) { + return Mask.isSubsetOf(getKnownBits(Val).Zero); + } + + /// \return true if the sign bit of Op is known to be zero. We use this + /// predicate to simplify operations downstream. + bool signBitIsZero(Register Op); + + // FIXME: Is this the right place for G_FRAME_INDEX? Should it be in + // TargetLowering? + void computeKnownBitsForFrameIndex(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth = 0); + static Align inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF); + static void computeKnownBitsForAlignment(KnownBits &Known, + MaybeAlign Alignment); + + // Try to infer alignment for MI. + static MaybeAlign inferPtrAlignment(const MachineInstr &MI); + + // Observer API. No-op for non-caching implementation. + void erasingInstr(MachineInstr &MI) override{}; + void createdInstr(MachineInstr &MI) override{}; + void changingInstr(MachineInstr &MI) override{}; + void changedInstr(MachineInstr &MI) override{}; + +protected: + unsigned getMaxDepth() const { return 6; } +}; + +/// To use KnownBitsInfo analysis in a pass, +/// KnownBitsInfo &Info = getAnalysis().get(MF); +/// Add to observer if the Info is caching. +/// WrapperObserver.addObserver(Info); + +/// Eventually add other features such as caching/ser/deserializing +/// to MIR etc. Those implementations can derive from GISelKnownBits +/// and override computeKnownBitsImpl. +class GISelKnownBitsAnalysis : public MachineFunctionPass { + std::unique_ptr Info; + +public: + static char ID; + GISelKnownBitsAnalysis() : MachineFunctionPass(ID) { + initializeGISelKnownBitsAnalysisPass(*PassRegistry::getPassRegistry()); + } + GISelKnownBits &get(MachineFunction &MF) { + if (!Info) + Info = std::make_unique(MF); + return *Info.get(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + void releaseMemory() override { Info.reset(); } +}; +} // namespace llvm + +#endif // ifdef diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 8654ba83f08d..bdb92aa4689d 100644 --- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -213,8 +213,8 @@ private: bool translateStore(const User &U, MachineIRBuilder &MIRBuilder); /// Translate an LLVM string intrinsic (memcpy, memset, ...). - bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned ID); + bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, + Intrinsic::ID ID); void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder); @@ -243,6 +243,10 @@ private: bool valueIsSplit(const Value &V, SmallVectorImpl *Offsets = nullptr); + /// Common code for translating normal calls or invokes. + bool translateCallSite(const ImmutableCallSite &CS, + MachineIRBuilder &MIRBuilder); + /// Translate call instruction. /// \pre \p U is a call instruction. bool translateCall(const User &U, MachineIRBuilder &MIRBuilder); @@ -514,6 +518,10 @@ private: // function has the optnone attribute. bool EnableOpts = false; + /// True when the block contains a tail call. This allows the IRTranslator to + /// stop translating such blocks early. + bool HasTailCall = false; + /// Switch analysis and optimization. class GISelSwitchLowering : public SwitchCG::SwitchLowering { public: diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index e9b93be76754..fd3dc743000b 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -31,6 +31,7 @@ namespace llvm { class APInt; class APFloat; +class GISelKnownBits; class MachineInstr; class MachineInstrBuilder; class MachineFunction; @@ -148,6 +149,13 @@ enum { /// - AddrSpaceN+1 ... GIM_CheckMemoryAddressSpace, + /// Check the minimum alignment of the memory access for the given machine + /// memory operand. + /// - InsnID - Instruction ID + /// - MMOIdx - MMO index + /// - MinAlign - Minimum acceptable alignment + GIM_CheckMemoryAlignment, + /// Check the size of the memory access for the given machine memory operand /// against the size of an operand. /// - InsnID - Instruction ID @@ -201,11 +209,22 @@ enum { /// - Expected Intrinsic ID GIM_CheckIntrinsicID, + /// Check the operand is a specific predicate + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected predicate + GIM_CheckCmpPredicate, + /// Check the specified operand is an MBB /// - InsnID - Instruction ID /// - OpIdx - Operand index GIM_CheckIsMBB, + /// Check the specified operand is an Imm + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + GIM_CheckIsImm, + /// Check if the specified operand is safe to fold into the current /// instruction. /// - InsnID - Instruction ID @@ -365,7 +384,20 @@ public: /// if returns true: /// for I in all mutated/inserted instructions: /// !isPreISelGenericOpcode(I.getOpcode()) - virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const = 0; + virtual bool select(MachineInstr &I) = 0; + + CodeGenCoverage *CoverageInfo = nullptr; + GISelKnownBits *KnownBits = nullptr; + MachineFunction *MF = nullptr; + + /// Setup per-MF selector state. + virtual void setupMF(MachineFunction &mf, + GISelKnownBits &KB, + CodeGenCoverage &covinfo) { + CoverageInfo = &covinfo; + KnownBits = &KB; + MF = &mf; + } protected: using ComplexRendererFns = diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index e8ee4af0cb0b..08f2f54bcf90 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -98,7 +98,7 @@ bool InstructionSelector::executeMatchTable( return false; break; } - if (TRI.isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(MO.getReg())) { DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": Is a physical register\n"); if (handleReject() == RejectAndGiveUp) @@ -409,6 +409,30 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckMemoryAlignment: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t MMOIdx = MatchTable[CurrentIdx++]; + unsigned MinAlign = MatchTable[CurrentIdx++]; + + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) { + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + + MachineMemOperand *MMO + = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckMemoryAlignment" + << "(MIs[" << InsnID << "]->memoperands() + " << MMOIdx + << ")->getAlignment() >= " << MinAlign << ")\n"); + if (MMO->getAlignment() < MinAlign && handleReject() == RejectAndGiveUp) + return false; + + break; + } case GIM_CheckMemorySizeEqualTo: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t MMOIdx = MatchTable[CurrentIdx++]; @@ -638,7 +662,21 @@ bool InstructionSelector::executeMatchTable( return false; break; } - + case GIM_CheckCmpPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t Value = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckCmpPredicate(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isPredicate() || MO.getPredicate() != Value) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } case GIM_CheckIsMBB: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -652,7 +690,19 @@ bool InstructionSelector::executeMatchTable( } break; } - + case GIM_CheckIsImm: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID + << "]->getOperand(" << OpIdx << "))\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } case GIM_CheckIsSafeToFold: { int64_t InsnID = MatchTable[CurrentIdx++]; DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), @@ -792,11 +842,13 @@ bool InstructionSelector::executeMatchTable( case GIR_AddRegister: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t RegNum = MatchTable[CurrentIdx++]; + uint64_t RegFlags = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); - OutMIs[InsnID].addReg(RegNum); - DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), - dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs[" - << InsnID << "], " << RegNum << ")\n"); + OutMIs[InsnID].addReg(RegNum, RegFlags); + DEBUG_WITH_TYPE( + TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs[" + << InsnID << "], " << RegNum << ", " << RegFlags << ")\n"); break; } diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index a22778b8848c..7f960e727846 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -47,8 +47,7 @@ public: bool tryCombineAnyExt(MachineInstr &MI, SmallVectorImpl &DeadInsts) { - if (MI.getOpcode() != TargetOpcode::G_ANYEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); @@ -93,9 +92,7 @@ public: bool tryCombineZExt(MachineInstr &MI, SmallVectorImpl &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_ZEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_ZEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); @@ -136,32 +133,24 @@ public: bool tryCombineSExt(MachineInstr &MI, SmallVectorImpl &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_SEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_SEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); - // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c + // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) Register TruncSrc; if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLT DstTy = MRI.getType(DstReg); - // Guess on the RHS shift amount type, which should be re-legalized if - // applicable. - if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy, DstTy}}) || - isInstUnsupported({TargetOpcode::G_ASHR, {DstTy, DstTy}}) || - isConstantUnsupported(DstTy)) + if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); - unsigned ShAmt = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); - auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt); - auto MIBShl = Builder.buildInstr( - TargetOpcode::G_SHL, {DstTy}, - {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBShAmt}); - Builder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {MIBShl, MIBShAmt}); + uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); + Builder.buildInstr( + TargetOpcode::G_SEXT_INREG, {DstReg}, + {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits}); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } @@ -172,9 +161,8 @@ public: bool tryFoldImplicitDef(MachineInstr &MI, SmallVectorImpl &DeadInsts) { unsigned Opcode = MI.getOpcode(); - if (Opcode != TargetOpcode::G_ANYEXT && Opcode != TargetOpcode::G_ZEXT && - Opcode != TargetOpcode::G_SEXT) - return false; + assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || + Opcode == TargetOpcode::G_SEXT); if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), MRI)) { @@ -203,21 +191,38 @@ public: return false; } - static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) { + static unsigned canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, + LLT OpTy, LLT DestTy) { if (OpTy.isVector() && DestTy.isVector()) - return TargetOpcode::G_CONCAT_VECTORS; + return MergeOp == TargetOpcode::G_CONCAT_VECTORS; + + if (OpTy.isVector() && !DestTy.isVector()) { + if (MergeOp == TargetOpcode::G_BUILD_VECTOR) + return true; - if (OpTy.isVector() && !DestTy.isVector()) - return TargetOpcode::G_BUILD_VECTOR; + if (MergeOp == TargetOpcode::G_CONCAT_VECTORS) { + if (ConvertOp == 0) + return true; - return TargetOpcode::G_MERGE_VALUES; + const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); + + // Don't handle scalarization with a cast that isn't in the same + // direction as the vector cast. This could be handled, but it would + // require more intermediate unmerges. + if (ConvertOp == TargetOpcode::G_TRUNC) + return DestTy.getSizeInBits() <= OpEltSize; + return DestTy.getSizeInBits() >= OpEltSize; + } + + return false; + } + + return MergeOp == TargetOpcode::G_MERGE_VALUES; } bool tryCombineMerges(MachineInstr &MI, SmallVectorImpl &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) - return false; + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); unsigned NumDefs = MI.getNumOperands() - 1; MachineInstr *SrcDef = @@ -237,16 +242,14 @@ public: MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); } - // FIXME: Handle scalarizing concat_vectors (scalar result type with vector - // source) - unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy); - if (!MergeI || MergeI->getOpcode() != MergingOpcode) + if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), + ConvertOp, OpTy, DestTy)) return false; const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; if (NumMergeRegs < NumDefs) { - if (ConvertOp != 0 || NumDefs % NumMergeRegs != 0) + if (NumDefs % NumMergeRegs != 0) return false; Builder.setInstr(MI); @@ -264,7 +267,22 @@ public: ++j, ++DefIdx) DstRegs.push_back(MI.getOperand(DefIdx).getReg()); - Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); + if (ConvertOp) { + SmallVector TmpRegs; + // This is a vector that is being scalarized and casted. Extract to + // the element type, and do the conversion on the scalars. + LLT MergeEltTy + = MRI.getType(MergeI->getOperand(0).getReg()).getElementType(); + for (unsigned j = 0; j < NumMergeRegs; ++j) + TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy)); + + Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); + + for (unsigned j = 0; j < NumMergeRegs; ++j) + Builder.buildInstr(ConvertOp, {DstRegs[j]}, {TmpRegs[j]}); + } else { + Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); + } } } else if (NumMergeRegs > NumDefs) { diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index a0f21e8b19d7..fbfe71255a38 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -200,6 +200,13 @@ public: LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); + LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); @@ -219,9 +226,17 @@ public: LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI); + LegalizeResult lowerFMad(MachineInstr &MI); + LegalizeResult lowerUnmergeValues(MachineInstr &MI); + LegalizeResult lowerShuffleVector(MachineInstr &MI); + LegalizeResult lowerDynStackAlloc(MachineInstr &MI); + LegalizeResult lowerExtract(MachineInstr &MI); + LegalizeResult lowerInsert(MachineInstr &MI); + LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI); private: MachineRegisterInfo &MRI; @@ -236,6 +251,11 @@ createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef Args); +/// Create a libcall to memcpy et al. +LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI, + MachineInstr &MI); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 513c98f2d23f..1cf62d1fde59 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -331,6 +331,8 @@ class LegalizeRuleSet { /// individually handled. SmallBitVector TypeIdxsCovered{MCOI::OPERAND_LAST_GENERIC - MCOI::OPERAND_FIRST_GENERIC + 2}; + SmallBitVector ImmIdxsCovered{MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM + 2}; #endif unsigned typeIdx(unsigned TypeIdx) { @@ -342,9 +344,21 @@ class LegalizeRuleSet { #endif return TypeIdx; } - void markAllTypeIdxsAsCovered() { + + unsigned immIdx(unsigned ImmIdx) { + assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM) && + "Imm Index is out of bounds"); +#ifndef NDEBUG + ImmIdxsCovered.set(ImmIdx); +#endif + return ImmIdx; + } + + void markAllIdxsAsCovered() { #ifndef NDEBUG TypeIdxsCovered.set(); + ImmIdxsCovered.set(); #endif } @@ -403,6 +417,15 @@ class LegalizeRuleSet { return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types), Mutation); } + /// Use the given action when type index 0 is any type in the given list and + /// imm index 0 is anything. Action should not be an action that requires + /// mutation. + LegalizeRuleSet &actionForTypeWithAnyImm(LegalizeAction Action, + std::initializer_list Types) { + using namespace LegalityPredicates; + immIdx(0); // Inform verifier imm idx 0 is handled. + return actionIf(Action, typeInSet(typeIdx(0), Types)); + } /// Use the given action when type indexes 0 and 1 are both in the given list. /// That is, the type pair is in the cartesian product of the list. /// Action should not be an action that requires mutation. @@ -454,7 +477,7 @@ public: LegalizeRuleSet &legalIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that the free-form // user-provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Legal, Predicate); } /// The instruction is legal when type index 0 is any type in the given list. @@ -466,6 +489,12 @@ public: LegalizeRuleSet &legalFor(std::initializer_list> Types) { return actionFor(LegalizeAction::Legal, Types); } + /// The instruction is legal when type index 0 is any type in the given list + /// and imm index 0 is anything. + LegalizeRuleSet &legalForTypeWithAnyImm(std::initializer_list Types) { + markAllIdxsAsCovered(); + return actionForTypeWithAnyImm(LegalizeAction::Legal, Types); + } /// The instruction is legal when type indexes 0 and 1 along with the memory /// size and minimum alignment is any type and size tuple in the given list. LegalizeRuleSet &legalForTypesWithMemDesc( @@ -497,7 +526,7 @@ public: LegalizeRuleSet &alwaysLegal() { using namespace LegalizeMutations; - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Legal, always); } @@ -506,7 +535,7 @@ public: using namespace LegalizeMutations; // We have no choice but conservatively assume that predicate-less lowering // properly handles all type indices by design: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, always); } /// The instruction is lowered if predicate is true. Keep type index 0 as the @@ -515,7 +544,7 @@ public: using namespace LegalizeMutations; // We have no choice but conservatively assume that lowering with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, Predicate); } /// The instruction is lowered if predicate is true. @@ -523,7 +552,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that lowering with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, Predicate, Mutation); } /// The instruction is lowered when type index 0 is any type in the given @@ -571,7 +600,7 @@ public: LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a libcall with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Libcall, Predicate); } LegalizeRuleSet &libcallFor(std::initializer_list Types) { @@ -597,7 +626,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::WidenScalar, Predicate, Mutation); } /// Narrow the scalar to the one selected by the mutation if the predicate is @@ -606,7 +635,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation); } @@ -616,7 +645,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::MoreElements, Predicate, Mutation); } /// Remove elements to reach the type selected by the mutation if the @@ -625,7 +654,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::FewerElements, Predicate, Mutation); } @@ -640,11 +669,15 @@ public: return actionIf(LegalizeAction::Unsupported, LegalityPredicates::memSizeInBytesNotPow2(0)); } + LegalizeRuleSet &lowerIfMemSizeNotPow2() { + return actionIf(LegalizeAction::Lower, + LegalityPredicates::memSizeInBytesNotPow2(0)); + } LegalizeRuleSet &customIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a custom action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Custom, Predicate); } LegalizeRuleSet &customFor(std::initializer_list Types) { @@ -882,6 +915,10 @@ public: /// LegalizeRuleSet in any way at all. /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set. bool verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const; + /// Check if there is no imm index which is obviously not handled by the + /// LegalizeRuleSet in any way at all. + /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set. + bool verifyImmIdxsCoverage(unsigned NumImmIdxs) const; /// Apply the ruleset to the given LegalityQuery. LegalizeActionStep apply(const LegalityQuery &Query) const; diff --git a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 13eddd9539fa..be12341f5763 100644 --- a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -21,7 +21,7 @@ namespace llvm { namespace MIPatternMatch { template -bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P) { +bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P) { return P.match(MRI, R); } @@ -30,7 +30,7 @@ template struct OneUse_match { SubPatternT SubPat; OneUse_match(const SubPatternT &SP) : SubPat(SP) {} - bool match(MachineRegisterInfo &MRI, unsigned Reg) { + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg); } }; @@ -71,7 +71,7 @@ inline operand_type_match m_Reg() { return operand_type_match(); } /// Matching combinators. template struct And { template - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return true; } }; @@ -83,14 +83,14 @@ struct And : And { : And(std::forward(preds)...), P(std::forward(p)) { } template - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return P.match(MRI, src) && And::match(MRI, src); } }; template struct Or { template - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return false; } }; @@ -101,7 +101,7 @@ struct Or : Or { Or(Pred &&p, Preds &&... preds) : Or(std::forward(preds)...), P(std::forward(p)) {} template - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return P.match(MRI, src) || Or::match(MRI, src); } }; @@ -175,7 +175,8 @@ struct BinaryOp_match { RHS_P R; BinaryOp_match(const LHS_P &LHS, const RHS_P &RHS) : L(LHS), R(RHS) {} - template bool match(MachineRegisterInfo &MRI, OpTy &&Op) { + template + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { MachineInstr *TmpMI; if (mi_match(Op, MRI, m_MInstr(TmpMI))) { if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) { @@ -242,7 +243,8 @@ template struct UnaryOp_match { SrcTy L; UnaryOp_match(const SrcTy &LHS) : L(LHS) {} - template bool match(MachineRegisterInfo &MRI, OpTy &&Op) { + template + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { MachineInstr *TmpMI; if (mi_match(Op, MRI, m_MInstr(TmpMI))) { if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 2) { @@ -323,7 +325,7 @@ struct CheckType { LLT Ty; CheckType(const LLT &Ty) : Ty(Ty) {} - bool match(MachineRegisterInfo &MRI, unsigned Reg) { + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return MRI.getType(Reg) == Ty; } }; diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 10d712176b1b..416f9c19f794 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -122,14 +122,22 @@ class SrcOp { MachineInstrBuilder SrcMIB; Register Reg; CmpInst::Predicate Pred; + int64_t Imm; }; public: - enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate }; + enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate, Ty_Imm }; SrcOp(Register R) : Reg(R), Ty(SrcType::Ty_Reg) {} SrcOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(SrcType::Ty_Reg) {} SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {} SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {} + /// Use of registers held in unsigned integer variables (or more rarely signed + /// integers) is no longer permitted to avoid ambiguity with upcoming support + /// for immediates. + SrcOp(unsigned) = delete; + SrcOp(int) = delete; + SrcOp(uint64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {} + SrcOp(int64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {} void addSrcToMIB(MachineInstrBuilder &MIB) const { switch (Ty) { @@ -142,12 +150,16 @@ public: case SrcType::Ty_MIB: MIB.addUse(SrcMIB->getOperand(0).getReg()); break; + case SrcType::Ty_Imm: + MIB.addImm(Imm); + break; } } LLT getLLTTy(const MachineRegisterInfo &MRI) const { switch (Ty) { case SrcType::Ty_Predicate: + case SrcType::Ty_Imm: llvm_unreachable("Not a register operand"); case SrcType::Ty_Reg: return MRI.getType(Reg); @@ -160,6 +172,7 @@ public: Register getReg() const { switch (Ty) { case SrcType::Ty_Predicate: + case SrcType::Ty_Imm: llvm_unreachable("Not a register operand"); case SrcType::Ty_Reg: return Reg; @@ -178,6 +191,15 @@ public: } } + int64_t getImm() const { + switch (Ty) { + case SrcType::Ty_Imm: + return Imm; + default: + llvm_unreachable("Not an immediate"); + } + } + SrcType getSrcOpKind() const { return Ty; } private: @@ -348,6 +370,17 @@ public: /// given. Convert "llvm.dbg.label Label" to "DBG_LABEL Label". MachineInstrBuilder buildDbgLabel(const MDNode *Label); + /// Build and insert \p Res = G_DYN_STACKALLOC \p Size, \p Align + /// + /// G_DYN_STACKALLOC does a dynamic stack allocation and writes the address of + /// the allocated memory into \p Res. + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with pointer type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildDynStackAlloc(const DstOp &Res, const SrcOp &Size, + unsigned Align); + /// Build and insert \p Res = G_FRAME_INDEX \p Idx /// /// G_FRAME_INDEX materializes the address of an alloca value or other @@ -489,11 +522,21 @@ public: return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src}); } + /// Build and insert a G_INTTOPTR instruction. + MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_INTTOPTR, {Dst}, {Src}); + } + /// Build and insert \p Dst = G_BITCAST \p Src MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src}); } + /// Build and insert \p Dst = G_ADDRSPACE_CAST \p Src + MachineInstrBuilder buildAddrSpaceCast(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_ADDRSPACE_CAST, {Dst}, {Src}); + } + /// \return The opcode of the extension the target wants to use for boolean /// values. unsigned getBoolExtOp(bool IsVec, bool IsFP) const; @@ -867,7 +910,8 @@ public: /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + Optional Flags = None); /// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1 /// @@ -880,7 +924,8 @@ public: /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + Optional Flags = None); /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, /// \p Elt, \p Idx @@ -961,8 +1006,8 @@ public: /// same type. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildAtomicRMW(unsigned Opcode, Register OldValRes, - Register Addr, Register Val, + MachineInstrBuilder buildAtomicRMW(unsigned Opcode, const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, MachineMemOperand &MMO); /// Build and insert `OldValRes = G_ATOMICRMW_XCHG Addr, Val, MMO`. @@ -1135,6 +1180,16 @@ public: MachineInstrBuilder buildAtomicRMWUmin(Register OldValRes, Register Addr, Register Val, MachineMemOperand &MMO); + /// Build and insert `OldValRes = G_ATOMICRMW_FADD Addr, Val, MMO`. + MachineInstrBuilder buildAtomicRMWFAdd( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + + /// Build and insert `OldValRes = G_ATOMICRMW_FSUB Addr, Val, MMO`. + MachineInstrBuilder buildAtomicRMWFSub( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + /// Build and insert `G_FENCE Ordering, Scope`. MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope); @@ -1210,6 +1265,12 @@ public: return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags); } + MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags); + } + MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional Flags = None) { @@ -1300,8 +1361,9 @@ public: /// Build and insert \p Res = G_FADD \p Op0, \p Op1 MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1) { - return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}); + const SrcOp &Src1, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_FSUB \p Op0, \p Op1 @@ -1316,14 +1378,23 @@ public: return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2}); } + /// Build and insert \p Res = G_FMAD \p Op0, \p Op1, \p Op2 + MachineInstrBuilder buildFMAD(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, const SrcOp &Src2, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FMAD, {Dst}, {Src0, Src1, Src2}, Flags); + } + /// Build and insert \p Res = G_FNEG \p Op0 - MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) { - return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}); + MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags); } /// Build and insert \p Res = G_FABS \p Op0 - MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0) { - return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}); + MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_FCANONICALIZE \p Src0 diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h index 4cdaa48fb689..8af2853473c2 100644 --- a/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/include/llvm/CodeGen/GlobalISel/Utils.h @@ -16,6 +16,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MachineValueType.h" namespace llvm { @@ -117,14 +119,16 @@ struct ValueAndVReg { unsigned VReg; }; /// If \p VReg is defined by a statically evaluable chain of -/// instructions rooted on a G_CONSTANT (\p LookThroughInstrs == true) -/// and that constant fits in int64_t, returns its value as well as -/// the virtual register defined by this G_CONSTANT. -/// When \p LookThroughInstrs == false, this function behaves like +/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true) +/// and that constant fits in int64_t, returns its value as well as the +/// virtual register defined by this G_F/CONSTANT. +/// When \p LookThroughInstrs == false this function behaves like /// getConstantVRegVal. +/// When \p HandleFConstants == false the function bails on G_FCONSTANTs. Optional getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, - bool LookThroughInstrs = true); + bool LookThroughInstrs = true, + bool HandleFConstants = true); const ConstantFP* getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI); @@ -151,6 +155,9 @@ Optional ConstantFoldBinOp(unsigned Opcode, const unsigned Op1, const unsigned Op2, const MachineRegisterInfo &MRI); +Optional ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, + uint64_t Imm, const MachineRegisterInfo &MRI); + /// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true, /// this returns if \p Val can be assumed to never be a signaling NaN. bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, @@ -161,5 +168,10 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) { return isKnownNeverNaN(Val, MRI, true); } +/// Get a rough equivalent of an MVT for a given LLT. +MVT getMVTForLLT(LLT Ty); +/// Get a rough equivalent of an LLT for a given MVT. +LLT getLLTForMVT(MVT Ty); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index acf27dcc5fab..658ad31fa2a6 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -281,7 +281,7 @@ namespace ISD { /// Same as the corresponding unsaturated fixed point instructions, but the /// result is clamped between the min and max values representable by the /// bits of the first 2 operands. - SMULFIXSAT, + SMULFIXSAT, UMULFIXSAT, /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, @@ -301,6 +301,14 @@ namespace ISD { STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC, + STRICT_LROUND, STRICT_LLROUND, STRICT_LRINT, STRICT_LLRINT, + + /// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or + /// unsigned integer. These have the same semantics as fptosi and fptoui + /// in IR. + /// They are used to limit optimizations while the DAG is being optimized. + STRICT_FP_TO_SINT, + STRICT_FP_TO_UINT, /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating /// point type down to the precision of the destination VT. TRUNC is a @@ -398,6 +406,13 @@ namespace ISD { /// than the vector element type, and is implicitly truncated to it. SCALAR_TO_VECTOR, + /// SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL + /// duplicated in all lanes. The type of the operand must match the vector + /// element type, except when they are integer types. In this case the + /// operand is allowed to be wider than the vector element type, and is + /// implicitly truncated to it. + SPLAT_VECTOR, + /// MULHU/MULHS - Multiply high - Multiply two integers of type iN, /// producing an unsigned/signed value of type i[2*N], then return the top /// part. @@ -569,13 +584,6 @@ namespace ISD { /// 3 Round to -inf FLT_ROUNDS_, - /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and - /// rounds it to a floating point value. It then promotes it and returns it - /// in a register of the same size. This operation effectively just - /// discards excess precision. The type to round down to is specified by - /// the VT operand, a VTSDNode. - FP_ROUND_INREG, - /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. FP_EXTEND, @@ -957,6 +965,23 @@ namespace ISD { static const int LAST_INDEXED_MODE = POST_DEC + 1; + //===--------------------------------------------------------------------===// + /// MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's + /// index parameter when calculating addresses. + /// + /// SIGNED_SCALED Addr = Base + ((signed)Index * sizeof(element)) + /// SIGNED_UNSCALED Addr = Base + (signed)Index + /// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * sizeof(element)) + /// UNSIGNED_UNSCALED Addr = Base + (unsigned)Index + enum MemIndexType { + SIGNED_SCALED = 0, + SIGNED_UNSCALED, + UNSIGNED_SCALED, + UNSIGNED_UNSCALED + }; + + static const int LAST_MEM_INDEX_TYPE = UNSIGNED_UNSCALED + 1; + //===--------------------------------------------------------------------===// /// LoadExtType enum - This enum defines the three variants of LOADEXT /// (load with extension). diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 8bb88165d3e1..290a2381d9c9 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -189,6 +189,10 @@ namespace llvm { return start == Other.start && end == Other.end; } + bool operator!=(const Segment &Other) const { + return !(*this == Other); + } + void dump() const; }; @@ -224,7 +228,7 @@ namespace llvm { /// Constructs a new LiveRange object. LiveRange(bool UseSegmentSet = false) - : segmentSet(UseSegmentSet ? llvm::make_unique() + : segmentSet(UseSegmentSet ? std::make_unique() : nullptr) {} /// Constructs a new LiveRange object by copying segments and valnos from diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h index 588b0f9cf39c..888d72b87bd1 100644 --- a/include/llvm/CodeGen/LiveIntervals.h +++ b/include/llvm/CodeGen/LiveIntervals.h @@ -111,30 +111,31 @@ class VirtRegMap; const MachineBlockFrequencyInfo *MBFI, const MachineBasicBlock *MBB); - LiveInterval &getInterval(unsigned Reg) { + LiveInterval &getInterval(Register Reg) { if (hasInterval(Reg)) - return *VirtRegIntervals[Reg]; + return *VirtRegIntervals[Reg.id()]; else return createAndComputeVirtRegInterval(Reg); } - const LiveInterval &getInterval(unsigned Reg) const { + const LiveInterval &getInterval(Register Reg) const { return const_cast(this)->getInterval(Reg); } - bool hasInterval(unsigned Reg) const { - return VirtRegIntervals.inBounds(Reg) && VirtRegIntervals[Reg]; + bool hasInterval(Register Reg) const { + return VirtRegIntervals.inBounds(Reg.id()) && + VirtRegIntervals[Reg.id()]; } /// Interval creation. - LiveInterval &createEmptyInterval(unsigned Reg) { + LiveInterval &createEmptyInterval(Register Reg) { assert(!hasInterval(Reg) && "Interval already exists!"); - VirtRegIntervals.grow(Reg); - VirtRegIntervals[Reg] = createInterval(Reg); - return *VirtRegIntervals[Reg]; + VirtRegIntervals.grow(Reg.id()); + VirtRegIntervals[Reg.id()] = createInterval(Reg); + return *VirtRegIntervals[Reg.id()]; } - LiveInterval &createAndComputeVirtRegInterval(unsigned Reg) { + LiveInterval &createAndComputeVirtRegInterval(Register Reg) { LiveInterval &LI = createEmptyInterval(Reg); computeVirtRegInterval(LI); return LI; diff --git a/include/llvm/CodeGen/LiveRangeCalc.h b/include/llvm/CodeGen/LiveRangeCalc.h new file mode 100644 index 000000000000..08026c05733c --- /dev/null +++ b/include/llvm/CodeGen/LiveRangeCalc.h @@ -0,0 +1,295 @@ +//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeCalc class can be used to compute live ranges from scratch. It +// caches information about values in the CFG to speed up repeated operations +// on the same live range. The cache can be shared by non-overlapping live +// ranges. SplitKit uses that when computing the live range of split products. +// +// A low-level interface is available to clients that know where a variable is +// live, but don't know which value it has as every point. LiveRangeCalc will +// propagate values down the dominator tree, and even insert PHI-defs where +// needed. SplitKit uses this faster interface when possible. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H +#define LLVM_LIB_CODEGEN_LIVERANGECALC_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/LaneBitmask.h" +#include + +namespace llvm { + +template class DomTreeNodeBase; +class MachineDominatorTree; +class MachineFunction; +class MachineRegisterInfo; + +using MachineDomTreeNode = DomTreeNodeBase; + +class LiveRangeCalc { + const MachineFunction *MF = nullptr; + const MachineRegisterInfo *MRI = nullptr; + SlotIndexes *Indexes = nullptr; + MachineDominatorTree *DomTree = nullptr; + VNInfo::Allocator *Alloc = nullptr; + + /// LiveOutPair - A value and the block that defined it. The domtree node is + /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. + using LiveOutPair = std::pair; + + /// LiveOutMap - Map basic blocks to the value leaving the block. + using LiveOutMap = IndexedMap; + + /// Bit vector of active entries in LiveOut, also used as a visited set by + /// findReachingDefs. One entry per basic block, indexed by block number. + /// This is kept as a separate bit vector because it can be cleared quickly + /// when switching live ranges. + BitVector Seen; + + /// Map LiveRange to sets of blocks (represented by bit vectors) that + /// in the live range are defined on entry and undefined on entry. + /// A block is defined on entry if there is a path from at least one of + /// the defs in the live range to the entry of the block, and conversely, + /// a block is undefined on entry, if there is no such path (i.e. no + /// definition reaches the entry of the block). A single LiveRangeCalc + /// object is used to track live-out information for multiple registers + /// in live range splitting (which is ok, since the live ranges of these + /// registers do not overlap), but the defined/undefined information must + /// be kept separate for each individual range. + /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }. + using EntryInfoMap = DenseMap>; + EntryInfoMap EntryInfos; + + /// Map each basic block where a live range is live out to the live-out value + /// and its defining block. + /// + /// For every basic block, MBB, one of these conditions shall be true: + /// + /// 1. !Seen.count(MBB->getNumber()) + /// Blocks without a Seen bit are ignored. + /// 2. LiveOut[MBB].second.getNode() == MBB + /// The live-out value is defined in MBB. + /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB] + /// The live-out value passses through MBB. All predecessors must carry + /// the same value. + /// + /// The domtree node may be null, it can be computed. + /// + /// The map can be shared by multiple live ranges as long as no two are + /// live-out of the same block. + LiveOutMap Map; + + /// LiveInBlock - Information about a basic block where a live range is known + /// to be live-in, but the value has not yet been determined. + struct LiveInBlock { + // The live range set that is live-in to this block. The algorithms can + // handle multiple non-overlapping live ranges simultaneously. + LiveRange &LR; + + // DomNode - Dominator tree node for the block. + // Cleared when the final value has been determined and LI has been updated. + MachineDomTreeNode *DomNode; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. When the final value has been + // determined, the range from the block start to Kill will be added to LI. + SlotIndex Kill; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value = nullptr; + + LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) + : LR(LR), DomNode(node), Kill(kill) {} + }; + + /// LiveIn - Work list of blocks where the live-in value has yet to be + /// determined. This list is typically computed by findReachingDefs() and + /// used as a work list by updateSSA(). The low-level interface may also be + /// used to add entries directly. + SmallVector LiveIn; + + /// Check if the entry to block @p MBB can be reached by any of the defs + /// in @p LR. Return true if none of the defs reach the entry to @p MBB. + bool isDefOnEntry(LiveRange &LR, ArrayRef Undefs, + MachineBasicBlock &MBB, BitVector &DefOnEntry, + BitVector &UndefOnEntry); + + /// Find the set of defs that can reach @p Kill. @p Kill must belong to + /// @p UseMBB. + /// + /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill, + /// all paths from the def to @p UseMBB are added to @p LR, and the function + /// returns true. + /// + /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be + /// live in are added to the LiveIn array, and the function returns false. + /// + /// The array @p Undef provides the locations where the range @p LR becomes + /// undefined by operands on other subranges. If @p Undef + /// is non-empty and @p Kill is jointly dominated only by the entries of + /// @p Undef, the function returns false. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, SlotIndex Use, + unsigned PhysReg, ArrayRef Undefs); + + /// updateSSA - Compute the values that will be live in to all requested + /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. + /// + /// Every live-in block must be jointly dominated by the added live-out + /// blocks. No values are read from the live ranges. + void updateSSA(); + + /// Transfer information from the LiveIn vector to the live ranges and update + /// the given @p LiveOuts. + void updateFromLiveIns(); + + /// Extend the live range of @p LR to reach all uses of Reg. + /// + /// If @p LR is a main range, or if @p LI is null, then all uses must be + /// jointly dominated by the definitions from @p LR. If @p LR is a subrange + /// of the live interval @p LI, corresponding to lane mask @p LaneMask, + /// all uses must be jointly dominated by the definitions from @p LR + /// together with definitions of other lanes where @p LR becomes undefined + /// (via operands). + /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e. + /// LaneBitmask::getAll(). + void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask, + LiveInterval *LI = nullptr); + + /// Reset Map and Seen fields. + void resetLiveOutMap(); + +public: + LiveRangeCalc() = default; + + //===--------------------------------------------------------------------===// + // High-level interface. + //===--------------------------------------------------------------------===// + // + // Calculate live ranges from scratch. + // + + /// reset - Prepare caches for a new set of non-overlapping live ranges. The + /// caches must be reset before attempting calculations with a live range + /// that may overlap a previously computed live range, and before the first + /// live range in a function. If live ranges are not known to be + /// non-overlapping, call reset before each. + void reset(const MachineFunction *mf, SlotIndexes *SI, + MachineDominatorTree *MDT, VNInfo::Allocator *VNIA); + + //===--------------------------------------------------------------------===// + // Mid-level interface. + //===--------------------------------------------------------------------===// + // + // Modify existing live ranges. + // + + /// Extend the live range of @p LR to reach @p Use. + /// + /// The existing values in @p LR must be live so they jointly dominate @p Use. + /// If @p Use is not dominated by a single existing value, PHI-defs are + /// inserted as required to preserve SSA form. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg, + ArrayRef Undefs); + + /// createDeadDefs - Create a dead def in LI for every def operand of Reg. + /// Each instruction defining Reg gets a new VNInfo with a corresponding + /// minimal live range. + void createDeadDefs(LiveRange &LR, unsigned Reg); + + /// Extend the live range of @p LR to reach all uses of Reg. + /// + /// All uses must be jointly dominated by existing liveness. PHI-defs are + /// inserted as needed to preserve SSA form. + void extendToUses(LiveRange &LR, unsigned PhysReg) { + extendToUses(LR, PhysReg, LaneBitmask::getAll()); + } + + /// Calculates liveness for the register specified in live interval @p LI. + /// Creates subregister live ranges as needed if subreg liveness tracking is + /// enabled. + void calculate(LiveInterval &LI, bool TrackSubRegs); + + /// For live interval \p LI with correct SubRanges construct matching + /// information for the main live range. Expects the main live range to not + /// have any segments or value numbers. + void constructMainRangeFromSubranges(LiveInterval &LI); + + //===--------------------------------------------------------------------===// + // Low-level interface. + //===--------------------------------------------------------------------===// + // + // These functions can be used to compute live ranges where the live-in and + // live-out blocks are already known, but the SSA value in each block is + // unknown. + // + // After calling reset(), add known live-out values and known live-in blocks. + // Then call calculateValues() to compute the actual value that is + // live-in to each block, and add liveness to the live ranges. + // + + /// setLiveOutValue - Indicate that VNI is live out from MBB. The + /// calculateValues() function will not add liveness for MBB, the caller + /// should take care of that. + /// + /// VNI may be null only if MBB is a live-through block also passed to + /// addLiveInBlock(). + void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { + Seen.set(MBB->getNumber()); + Map[MBB] = LiveOutPair(VNI, nullptr); + } + + /// addLiveInBlock - Add a block with an unknown live-in value. This + /// function can only be called once per basic block. Once the live-in value + /// has been determined, calculateValues() will add liveness to LI. + /// + /// @param LR The live range that is live-in to the block. + /// @param DomNode The domtree node for the block. + /// @param Kill Index in block where LI is killed. If the value is + /// live-through, set Kill = SLotIndex() and also call + /// setLiveOutValue(MBB, 0). + void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode, + SlotIndex Kill = SlotIndex()) { + LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); + } + + /// calculateValues - Calculate the value that will be live-in to each block + /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA + /// form. Add liveness to all live-in blocks up to the Kill point, or the + /// whole block for live-through blocks. + /// + /// Every predecessor of a live-in block must have been given a value with + /// setLiveOutValue, the value may be null for live-trough blocks. + void calculateValues(); + + /// A diagnostic function to check if the end of the block @p MBB is + /// jointly dominated by the blocks corresponding to the slot indices + /// in @p Defs. This function is mainly for use in self-verification + /// checks. + LLVM_ATTRIBUTE_UNUSED + static bool isJointlyDominated(const MachineBasicBlock *MBB, + ArrayRef Defs, + const SlotIndexes &Indexes); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index 7dbb2feab8bf..314afad92970 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -53,8 +53,8 @@ public: ModifiedRegUnits.addRegsInMask(O->getRegMask()); if (!O->isReg()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Reg.isPhysical()) continue; if (O->isDef()) { // Some architectures (e.g. AArch64 XZR/WZR) have registers that are diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index 94e76a75e8da..069d0aa45095 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -314,6 +314,7 @@ struct ScalarEnumerationTraits { static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) { IO.enumCase(ID, "default", TargetStackID::Default); IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill); + IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector); IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc); } }; diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 333d0a78618c..ccdde78a0b22 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -103,9 +103,9 @@ private: using LiveInVector = std::vector; LiveInVector LiveIns; - /// Alignment of the basic block. Zero if the basic block does not need to be - /// aligned. The alignment is specified as log2(bytes). - unsigned Alignment = 0; + /// Alignment of the basic block. One if the basic block does not need to be + /// aligned. + Align Alignment; /// Indicate that this basic block is entered via an exception handler. bool IsEHPad = false; @@ -312,7 +312,7 @@ public: /// Adds the specified register as a live in. Note that it is an error to add /// the same register to the same set more than once unless the intention is /// to call sortUniqueLiveIns after all registers are added. - void addLiveIn(MCPhysReg PhysReg, + void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask = LaneBitmask::getAll()) { LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask)); } @@ -331,7 +331,7 @@ public: /// Add PhysReg as live in to this block, and ensure that there is a copy of /// PhysReg to a virtual register of class RC. Return the virtual register /// that is a copy of the live in PhysReg. - unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC); + unsigned addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC); /// Remove the specified register from the live in set. void removeLiveIn(MCPhysReg Reg, @@ -372,13 +372,11 @@ public: /// \see getBeginClobberMask() const uint32_t *getEndClobberMask(const TargetRegisterInfo *TRI) const; - /// Return alignment of the basic block. The alignment is specified as - /// log2(bytes). - unsigned getAlignment() const { return Alignment; } + /// Return alignment of the basic block. + Align getAlignment() const { return Alignment; } - /// Set alignment of the basic block. The alignment is specified as - /// log2(bytes). - void setAlignment(unsigned Align) { Alignment = Align; } + /// Set alignment of the basic block. + void setAlignment(Align A) { Alignment = A; } /// Returns true if the block is a landing pad. That is this basic block is /// entered via an exception handler. @@ -636,6 +634,18 @@ public: return Insts.insertAfter(I.getInstrIterator(), MI); } + /// If I is bundled then insert MI into the instruction list after the end of + /// the bundle, otherwise insert MI immediately after I. + instr_iterator insertAfterBundle(instr_iterator I, MachineInstr *MI) { + assert((I == instr_end() || I->getParent() == this) && + "iterator points outside of basic block"); + assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() && + "Cannot insert instruction with bundle flags"); + while (I->isBundledWithSucc()) + ++I; + return Insts.insertAfter(I, MI); + } + /// Remove an instruction from the instruction list and delete it. /// /// If the instruction is part of a bundle, the other instructions in the @@ -723,6 +733,10 @@ public: /// CFG so that it branches to 'New' instead. void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Update all phi nodes in this basic block to refer to basic block \p New + /// instead of basic block \p Old. + void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Various pieces of code can cause excess edges in the CFG to be inserted. /// If we have proven that MBB can only branch to DestA and DestB, remove any /// other MBB successors from the CFG. DestA and DestB can be null. Besides diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h index 4f4034baf801..503227222207 100644 --- a/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/include/llvm/CodeGen/MachineCombinerPattern.h @@ -39,6 +39,10 @@ enum class MachineCombinerPattern { MULADDXI_OP1, MULSUBXI_OP1, // Floating Point + FMULADDH_OP1, + FMULADDH_OP2, + FMULSUBH_OP1, + FMULSUBH_OP2, FMULADDS_OP1, FMULADDS_OP2, FMULSUBS_OP1, @@ -47,16 +51,25 @@ enum class MachineCombinerPattern { FMULADDD_OP2, FMULSUBD_OP1, FMULSUBD_OP2, + FNMULSUBH_OP1, FNMULSUBS_OP1, FNMULSUBD_OP1, FMLAv1i32_indexed_OP1, FMLAv1i32_indexed_OP2, FMLAv1i64_indexed_OP1, FMLAv1i64_indexed_OP2, + FMLAv4f16_OP1, + FMLAv4f16_OP2, + FMLAv8f16_OP1, + FMLAv8f16_OP2, FMLAv2f32_OP2, FMLAv2f32_OP1, FMLAv2f64_OP1, FMLAv2f64_OP2, + FMLAv4i16_indexed_OP1, + FMLAv4i16_indexed_OP2, + FMLAv8i16_indexed_OP1, + FMLAv8i16_indexed_OP2, FMLAv2i32_indexed_OP1, FMLAv2i32_indexed_OP2, FMLAv2i64_indexed_OP1, @@ -67,10 +80,18 @@ enum class MachineCombinerPattern { FMLAv4i32_indexed_OP2, FMLSv1i32_indexed_OP2, FMLSv1i64_indexed_OP2, + FMLSv4f16_OP1, + FMLSv4f16_OP2, + FMLSv8f16_OP1, + FMLSv8f16_OP2, FMLSv2f32_OP1, FMLSv2f32_OP2, FMLSv2f64_OP1, FMLSv2f64_OP2, + FMLSv4i16_indexed_OP1, + FMLSv4i16_indexed_OP2, + FMLSv8i16_indexed_OP1, + FMLSv8i16_indexed_OP2, FMLSv2i32_indexed_OP1, FMLSv2i32_indexed_OP2, FMLSv2i64_indexed_OP1, diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index d2200080b897..e4d7a02f8c48 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -44,6 +44,8 @@ using MachineDomTreeNode = DomTreeNodeBase; /// compute a normal dominator tree. /// class MachineDominatorTree : public MachineFunctionPass { + using DomTreeT = DomTreeBase; + /// Helper structure used to hold all the basic blocks /// involved in the split of a critical edge. struct CriticalEdge { @@ -65,8 +67,8 @@ class MachineDominatorTree : public MachineFunctionPass { /// such as BB == elt.NewBB. mutable SmallSet NewBBs; - /// The DominatorTreeBase that is used to compute a normal dominator tree - std::unique_ptr> DT; + /// The DominatorTreeBase that is used to compute a normal dominator tree. + std::unique_ptr DT; /// Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses @@ -80,8 +82,8 @@ public: MachineDominatorTree(); - DomTreeBase &getBase() { - if (!DT) DT.reset(new DomTreeBase()); + DomTreeT &getBase() { + if (!DT) DT.reset(new DomTreeT()); applySplitCriticalEdges(); return *DT; } @@ -92,31 +94,30 @@ public: /// multiple blocks if we are computing post dominators. For forward /// dominators, this will always be a single block (the entry node). /// - inline const SmallVectorImpl &getRoots() const { + const SmallVectorImpl &getRoots() const { applySplitCriticalEdges(); return DT->getRoots(); } - inline MachineBasicBlock *getRoot() const { + MachineBasicBlock *getRoot() const { applySplitCriticalEdges(); return DT->getRoot(); } - inline MachineDomTreeNode *getRootNode() const { + MachineDomTreeNode *getRootNode() const { applySplitCriticalEdges(); return DT->getRootNode(); } bool runOnMachineFunction(MachineFunction &F) override; - inline bool dominates(const MachineDomTreeNode* A, - const MachineDomTreeNode* B) const { + bool dominates(const MachineDomTreeNode *A, + const MachineDomTreeNode *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); } - inline bool dominates(const MachineBasicBlock* A, - const MachineBasicBlock* B) const { + bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); } @@ -133,36 +134,30 @@ public: for (; &*I != A && &*I != B; ++I) /*empty*/ ; - //if(!DT.IsPostDominators) { - // A dominates B if it is found first in the basic block. - return &*I == A; - //} else { - // // A post-dominates B if B is found first in the basic block. - // return &*I == B; - //} + return &*I == A; } - inline bool properlyDominates(const MachineDomTreeNode* A, - const MachineDomTreeNode* B) const { + bool properlyDominates(const MachineDomTreeNode *A, + const MachineDomTreeNode *B) const { applySplitCriticalEdges(); return DT->properlyDominates(A, B); } - inline bool properlyDominates(const MachineBasicBlock* A, - const MachineBasicBlock* B) const { + bool properlyDominates(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->properlyDominates(A, B); } /// findNearestCommonDominator - Find nearest common dominator basic block /// for basic block A and B. If there is no such block then return NULL. - inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, - MachineBasicBlock *B) { + MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, + MachineBasicBlock *B) { applySplitCriticalEdges(); return DT->findNearestCommonDominator(A, B); } - inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { + MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { applySplitCriticalEdges(); return DT->getNode(BB); } @@ -170,7 +165,7 @@ public: /// getNode - return the (Post)DominatorTree node for the specified basic /// block. This is the same as using operator[] on this class. /// - inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { + MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { applySplitCriticalEdges(); return DT->getNode(BB); } @@ -178,8 +173,8 @@ public: /// addNewBlock - Add a new node to the dominator tree information. This /// creates a new node as a child of DomBB dominator node,linking it into /// the children list of the immediate dominator. - inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB, - MachineBasicBlock *DomBB) { + MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB) { applySplitCriticalEdges(); return DT->addNewBlock(BB, DomBB); } @@ -187,14 +182,14 @@ public: /// changeImmediateDominator - This method is used to update the dominator /// tree information when a node's immediate dominator changes. /// - inline void changeImmediateDominator(MachineBasicBlock *N, - MachineBasicBlock* NewIDom) { + void changeImmediateDominator(MachineBasicBlock *N, + MachineBasicBlock *NewIDom) { applySplitCriticalEdges(); DT->changeImmediateDominator(N, NewIDom); } - inline void changeImmediateDominator(MachineDomTreeNode *N, - MachineDomTreeNode* NewIDom) { + void changeImmediateDominator(MachineDomTreeNode *N, + MachineDomTreeNode *NewIDom) { applySplitCriticalEdges(); DT->changeImmediateDominator(N, NewIDom); } @@ -202,14 +197,14 @@ public: /// eraseNode - Removes a node from the dominator tree. Block must not /// dominate any other blocks. Removes node from its immediate dominator's /// children list. Deletes dominator node associated with basic block BB. - inline void eraseNode(MachineBasicBlock *BB) { + void eraseNode(MachineBasicBlock *BB) { applySplitCriticalEdges(); DT->eraseNode(BB); } /// splitBlock - BB is split and now it has one successor. Update dominator /// tree to reflect this change. - inline void splitBlock(MachineBasicBlock* NewBB) { + void splitBlock(MachineBasicBlock* NewBB) { applySplitCriticalEdges(); DT->splitBlock(NewBB); } diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 761735120a64..01fc50d14a7f 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_MACHINEFRAMEINFO_H #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/DataTypes.h" #include #include @@ -129,7 +130,7 @@ private: uint64_t Size; // The required alignment of this stack slot. - unsigned Alignment; + Align Alignment; // If true, the value of the stack object is set before // entering the function and is not modified inside the function. By @@ -180,17 +181,16 @@ private: uint8_t SSPLayout; - StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset, + StackObject(uint64_t Size, Align Alignment, int64_t SPOffset, bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca, bool IsAliased, uint8_t StackID = 0) - : SPOffset(SPOffset), Size(Size), Alignment(Alignment), - isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), - StackID(StackID), Alloca(Alloca), isAliased(IsAliased), - SSPLayout(SSPLK_None) {} + : SPOffset(SPOffset), Size(Size), Alignment(Alignment), + isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID), + Alloca(Alloca), isAliased(IsAliased), SSPLayout(SSPLK_None) {} }; /// The alignment of the stack. - unsigned StackAlignment; + Align StackAlignment; /// Can the stack be realigned. This can be false if the target does not /// support stack realignment, or if the user asks us not to realign the @@ -260,7 +260,7 @@ private: /// native alignment maintained by the compiler, dynamic alignment code will /// be needed. /// - unsigned MaxAlignment = 0; + Align MaxAlignment; /// Set to true if this function adjusts the stack -- e.g., /// when calling another function. This is only valid during and after @@ -304,7 +304,7 @@ private: /// Required alignment of the local object blob, which is the strictest /// alignment of any object in it. - unsigned LocalFrameMaxAlign = 0; + Align LocalFrameMaxAlign; /// Whether the local object blob needs to be allocated together. If not, /// PEI should ignore the isPreAllocated flags on the stack objects and @@ -338,8 +338,8 @@ private: public: explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable, bool ForcedRealign) - : StackAlignment(StackAlignment), StackRealignable(StackRealignable), - ForcedRealign(ForcedRealign) {} + : StackAlignment(assumeAligned(StackAlignment)), + StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {} /// Return true if there are any stack objects in this function. bool hasStackObjects() const { return !Objects.empty(); } @@ -419,10 +419,12 @@ public: /// Required alignment of the local object blob, /// which is the strictest alignment of any object in it. - void setLocalFrameMaxAlign(unsigned Align) { LocalFrameMaxAlign = Align; } + void setLocalFrameMaxAlign(Align Alignment) { + LocalFrameMaxAlign = Alignment; + } /// Return the required alignment of the local object blob. - unsigned getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; } + Align getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; } /// Get whether the local allocation blob should be allocated together or /// let PEI allocate the locals in it directly. @@ -462,14 +464,14 @@ public: unsigned getObjectAlignment(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - return Objects[ObjectIdx+NumFixedObjects].Alignment; + return Objects[ObjectIdx + NumFixedObjects].Alignment.value(); } /// setObjectAlignment - Change the alignment of the specified stack object. void setObjectAlignment(int ObjectIdx, unsigned Align) { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - Objects[ObjectIdx+NumFixedObjects].Alignment = Align; + Objects[ObjectIdx + NumFixedObjects].Alignment = assumeAligned(Align); // Only ensure max alignment for the default stack. if (getStackID(ObjectIdx) == 0) @@ -561,10 +563,14 @@ public: /// Return the alignment in bytes that this function must be aligned to, /// which is greater than the default stack alignment provided by the target. - unsigned getMaxAlignment() const { return MaxAlignment; } + unsigned getMaxAlignment() const { return MaxAlignment.value(); } /// Make sure the function is at least Align bytes aligned. - void ensureMaxAlignment(unsigned Align); + void ensureMaxAlignment(Align Alignment); + /// FIXME: Remove this once transition to Align is over. + inline void ensureMaxAlignment(unsigned Align) { + ensureMaxAlignment(assumeAligned(Align)); + } /// Return true if this function adjusts the stack -- e.g., /// when calling another function. This is only valid during and after @@ -728,12 +734,24 @@ public: /// Create a new statically sized stack object, returning /// a nonnegative identifier to represent it. - int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, + int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca = nullptr, uint8_t ID = 0); + /// FIXME: Remove this function when transition to Align is over. + inline int CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSpillSlot, + const AllocaInst *Alloca = nullptr, + uint8_t ID = 0) { + return CreateStackObject(Size, assumeAligned(Alignment), isSpillSlot, + Alloca, ID); + } /// Create a new statically sized stack object that represents a spill slot, /// returning a nonnegative identifier to represent it. - int CreateSpillStackObject(uint64_t Size, unsigned Alignment); + int CreateSpillStackObject(uint64_t Size, Align Alignment); + /// FIXME: Remove this function when transition to Align is over. + inline int CreateSpillStackObject(uint64_t Size, unsigned Alignment) { + return CreateSpillStackObject(Size, assumeAligned(Alignment)); + } /// Remove or mark dead a statically sized stack object. void RemoveStackObject(int ObjectIdx) { @@ -744,7 +762,11 @@ public: /// Notify the MachineFrameInfo object that a variable sized object has been /// created. This must be created whenever a variable sized object is /// created, whether or not the index returned is actually used. - int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca); + int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca); + /// FIXME: Remove this function when transition to Align is over. + int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { + return CreateVariableSizedObject(assumeAligned(Alignment), Alloca); + } /// Returns a reference to call saved info vector for the current function. const std::vector &getCalleeSavedInfo() const { diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 201c126ee52e..3a3176e51c51 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -36,6 +36,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Recycler.h" +#include "llvm/Target/TargetMachine.h" #include #include #include @@ -277,7 +278,7 @@ class MachineFunction { unsigned FunctionNumber; /// Alignment - The alignment of the function. - unsigned Alignment; + Align Alignment; /// ExposesReturnsTwice - True if the function calls setjmp or related /// functions with attribute "returns twice", but doesn't have @@ -322,7 +323,7 @@ class MachineFunction { std::vector> CodeViewAnnotations; /// CodeView heapallocsites. - std::vector> + std::vector> CodeViewHeapAllocSites; bool CallsEHReturn = false; @@ -400,6 +401,17 @@ private: /// Map a call instruction to call site arguments forwarding info. CallSiteInfoMap CallSitesInfo; + /// A helper function that returns call site info for a give call + /// instruction if debug entry value support is enabled. + CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI) { + assert(MI->isCall() && + "Call site info refers only to call instructions!"); + + if (!Target.Options.EnableDebugEntryValues) + return CallSitesInfo.end(); + return CallSitesInfo.find(MI); + } + // Callbacks for insertion and removal. void handleInsertion(MachineInstr &MI); void handleRemoval(MachineInstr &MI); @@ -508,15 +520,16 @@ public: const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; } WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; } - /// getAlignment - Return the alignment (log2, not bytes) of the function. - unsigned getAlignment() const { return Alignment; } + /// getAlignment - Return the alignment of the function. + Align getAlignment() const { return Alignment; } - /// setAlignment - Set the alignment (log2, not bytes) of the function. - void setAlignment(unsigned A) { Alignment = A; } + /// setAlignment - Set the alignment of the function. + void setAlignment(Align A) { Alignment = A; } - /// ensureAlignment - Make sure the function is at least 1 << A bytes aligned. - void ensureAlignment(unsigned A) { - if (Alignment < A) Alignment = A; + /// ensureAlignment - Make sure the function is at least A bytes aligned. + void ensureAlignment(Align A) { + if (Alignment < A) + Alignment = A; } /// exposesReturnsTwice - Returns true if the function calls setjmp or @@ -935,10 +948,10 @@ public: } /// Record heapallocsites - void addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD); + void addCodeViewHeapAllocSite(MachineInstr *I, const MDNode *MD); - ArrayRef> - getCodeViewHeapAllocSites() const { + ArrayRef> + getCodeViewHeapAllocSites() const { return CodeViewHeapAllocSites; } @@ -976,12 +989,24 @@ public: return CallSitesInfo; } - /// Update call sites info by deleting entry for \p Old call instruction. - /// If \p New is present then transfer \p Old call info to it. This function - /// should be called before removing call instruction or before replacing - /// call instruction with new one. - void updateCallSiteInfo(const MachineInstr *Old, - const MachineInstr *New = nullptr); + /// Following functions update call site info. They should be called before + /// removing, replacing or copying call instruction. + + /// Move the call site info from \p Old to \New call site info. This function + /// is used when we are replacing one call instruction with another one to + /// the same callee. + void moveCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New); + + /// Erase the call site info for \p MI. It is used to remove a call + /// instruction from the instruction stream. + void eraseCallSiteInfo(const MachineInstr *MI); + + /// Copy the call site info from \p Old to \ New. Its usage is when we are + /// making a copy of the instruction that will be inserted at different point + /// of the instruction stream. + void copyCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New); }; //===--------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index c82c5b137507..c94ad292ec96 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -20,11 +20,9 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" @@ -38,6 +36,7 @@ namespace llvm { +class AAResults; template class ArrayRef; class DIExpression; class DILocalVariable; @@ -427,6 +426,22 @@ public: return getNumExplicitDefs() + MCID->getNumImplicitDefs(); } + /// Returns true if the instruction has implicit definition. + bool hasImplicitDef() const { + for (unsigned I = getNumExplicitOperands(), E = getNumOperands(); + I != E; ++I) { + const MachineOperand &MO = getOperand(I); + if (MO.isDef() && MO.isImplicit()) + return true; + } + return false; + } + + /// Returns the implicit operands number. + unsigned getNumImplicitOperands() const { + return getNumOperands() - getNumExplicitOperands(); + } + /// Return true if operand \p OpIdx is a subregister index. bool isOperandSubregIdx(unsigned OpIdx) const { assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate && @@ -602,6 +617,12 @@ public: return hasPropertyInBundle(1ULL << MCFlag, Type); } + /// Return true if this is an instruction that should go through the usual + /// legalization steps. + bool isPreISelOpcode(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::PreISelOpcode, Type); + } + /// Return true if this instruction can have a variable number of operands. /// In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are @@ -1020,15 +1041,13 @@ public: } /// A DBG_VALUE is an entry value iff its debug expression contains the - /// DW_OP_entry_value DWARF operation. - bool isDebugEntryValue() const { - return isDebugValue() && getDebugExpression()->isEntryValue(); - } + /// DW_OP_LLVM_entry_value operation. + bool isDebugEntryValue() const; /// Return true if the instruction is a debug value which describes a part of /// a variable as unavailable. bool isUndefDebugValue() const { - return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg(); + return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg().isValid(); } bool isPHI() const { @@ -1140,7 +1159,7 @@ public: /// is a read of a super-register. /// This does not count partial redefines of virtual registers as reads: /// %reg1024:6 = OP. - bool readsRegister(unsigned Reg, + bool readsRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterUseOperandIdx(Reg, false, TRI) != -1; } @@ -1148,20 +1167,20 @@ public: /// Return true if the MachineInstr reads the specified virtual register. /// Take into account that a partial define is a /// read-modify-write operation. - bool readsVirtualRegister(unsigned Reg) const { + bool readsVirtualRegister(Register Reg) const { return readsWritesVirtualRegister(Reg).first; } /// Return a pair of bools (reads, writes) indicating if this instruction /// reads or writes Reg. This also considers partial defines. /// If Ops is not null, all operand indices for Reg are added. - std::pair readsWritesVirtualRegister(unsigned Reg, + std::pair readsWritesVirtualRegister(Register Reg, SmallVectorImpl *Ops = nullptr) const; /// Return true if the MachineInstr kills the specified register. /// If TargetRegisterInfo is passed, then it also checks if there is /// a kill of a super-register. - bool killsRegister(unsigned Reg, + bool killsRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterUseOperandIdx(Reg, true, TRI) != -1; } @@ -1170,7 +1189,7 @@ public: /// If TargetRegisterInfo is passed, then it also checks /// if there is a def of a super-register. /// NOTE: It's ignoring subreg indices on virtual registers. - bool definesRegister(unsigned Reg, + bool definesRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterDefOperandIdx(Reg, false, false, TRI) != -1; } @@ -1178,38 +1197,38 @@ public: /// Return true if the MachineInstr modifies (fully define or partially /// define) the specified register. /// NOTE: It's ignoring subreg indices on virtual registers. - bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const { + bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const { return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1; } /// Returns true if the register is dead in this machine instruction. /// If TargetRegisterInfo is passed, then it also checks /// if there is a dead def of a super-register. - bool registerDefIsDead(unsigned Reg, + bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterDefOperandIdx(Reg, true, false, TRI) != -1; } /// Returns true if the MachineInstr has an implicit-use operand of exactly /// the given register (not considering sub/super-registers). - bool hasRegisterImplicitUseOperand(unsigned Reg) const; + bool hasRegisterImplicitUseOperand(Register Reg) const; /// Returns the operand index that is a use of the specific register or -1 /// if it is not found. It further tightens the search criteria to a use /// that kills the register if isKill is true. - int findRegisterUseOperandIdx(unsigned Reg, bool isKill = false, + int findRegisterUseOperandIdx(Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) const; /// Wrapper for findRegisterUseOperandIdx, it returns /// a pointer to the MachineOperand rather than an index. - MachineOperand *findRegisterUseOperand(unsigned Reg, bool isKill = false, + MachineOperand *findRegisterUseOperand(Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) { int Idx = findRegisterUseOperandIdx(Reg, isKill, TRI); return (Idx == -1) ? nullptr : &getOperand(Idx); } const MachineOperand *findRegisterUseOperand( - unsigned Reg, bool isKill = false, + Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) const { return const_cast(this)-> findRegisterUseOperand(Reg, isKill, TRI); @@ -1221,14 +1240,14 @@ public: /// overlap the specified register. If TargetRegisterInfo is non-null, /// then it also checks if there is a def of a super-register. /// This may also return a register mask operand when Overlap is true. - int findRegisterDefOperandIdx(unsigned Reg, + int findRegisterDefOperandIdx(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) const; /// Wrapper for findRegisterDefOperandIdx, it returns /// a pointer to the MachineOperand rather than an index. MachineOperand * - findRegisterDefOperand(unsigned Reg, bool isDead = false, + findRegisterDefOperand(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) { int Idx = findRegisterDefOperandIdx(Reg, isDead, Overlap, TRI); @@ -1236,7 +1255,7 @@ public: } const MachineOperand * - findRegisterDefOperand(unsigned Reg, bool isDead = false, + findRegisterDefOperand(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) const { return const_cast(this)->findRegisterDefOperand( @@ -1283,7 +1302,7 @@ public: /// /// \pre CurRC must not be NULL. const TargetRegisterClass *getRegClassConstraintEffectForVReg( - unsigned Reg, const TargetRegisterClass *CurRC, + Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ExploreBundle = false) const; @@ -1346,39 +1365,39 @@ public: /// Replace all occurrences of FromReg with ToReg:SubIdx, /// properly composing subreg indices where necessary. - void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, + void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo); /// We have determined MI kills a register. Look for the /// operand that uses it and mark it as IsKill. If AddIfNotFound is true, /// add a implicit operand if it's not found. Returns true if the operand /// exists / is added. - bool addRegisterKilled(unsigned IncomingReg, + bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); /// Clear all kill flags affecting Reg. If RegInfo is provided, this includes /// all aliasing registers. - void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo); + void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo); /// We have determined MI defined a register without a use. /// Look for the operand that defines it and mark it as IsDead. If /// AddIfNotFound is true, add a implicit operand if it's not found. Returns /// true if the operand exists / is added. - bool addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo, + bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); /// Clear all dead flags on operands defining register @p Reg. - void clearRegisterDeads(unsigned Reg); + void clearRegisterDeads(Register Reg); /// Mark all subregister defs of register @p Reg with the undef flag. /// This function is used when we determined to have a subregister def in an /// otherwise undefined super register. - void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true); + void setRegisterDefReadUndef(Register Reg, bool IsUndef = true); /// We have determined MI defines a register. Make sure there is an operand /// defining Reg. - void addRegisterDefined(unsigned Reg, + void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo = nullptr); /// Mark every physreg used by this instruction as @@ -1386,13 +1405,13 @@ public: /// /// On instructions with register mask operands, also add implicit-def /// operands for all registers in UsedRegs. - void setPhysRegsDeadExcept(ArrayRef UsedRegs, + void setPhysRegsDeadExcept(ArrayRef UsedRegs, const TargetRegisterInfo &TRI); /// Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. - bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const; + bool isSafeToMove(AAResults *AA, bool &SawStore) const; /// Returns true if this instruction's memory access aliases the memory /// access of Other. @@ -1404,7 +1423,7 @@ public: /// @param AA Optional alias analysis, used to compare memory operands. /// @param Other MachineInstr to check aliasing against. /// @param UseTBAA Whether to pass TBAA information to alias analysis. - bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const; + bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const; /// Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory @@ -1419,7 +1438,7 @@ public: /// argument area of a function (if it does not change). If the instruction /// does multiple loads, this returns true only if all of the loads are /// dereferenceable and invariant. - bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const; + bool isDereferenceableInvariantLoad(AAResults *AA) const; /// If the specified instruction is a PHI that always merges together the /// same virtual register, return the register, otherwise return 0. @@ -1603,9 +1622,15 @@ public: /// Scan instructions following MI and collect any matching DBG_VALUEs. void collectDebugValues(SmallVectorImpl &DbgValues); - /// Find all DBG_VALUEs immediately following this instruction that point - /// to a register def in this instruction and point them to \p Reg instead. - void changeDebugValuesDefReg(unsigned Reg); + /// Find all DBG_VALUEs that point to the register def in this instruction + /// and point them to \p Reg instead. + void changeDebugValuesDefReg(Register Reg); + + /// Returns the Intrinsic::ID for this instruction. + /// \pre Must have an intrinsic ID operand. + unsigned getIntrinsicID() const { + return getOperand(getNumExplicitDefs()).getIntrinsicID(); + } private: /// If this instruction is embedded into a MachineFunction, return the @@ -1630,7 +1655,7 @@ private: /// this MI and the given operand index \p OpIdx. /// If the related operand does not constrained Reg, this returns CurRC. const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl( - unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const; }; diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 6d7fb72b6bd1..880d4829ac7e 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -85,7 +85,7 @@ public: Register getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); } /// Add a new virtual register operand. - const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0, + const MachineInstrBuilder &addReg(Register RegNo, unsigned flags = 0, unsigned SubReg = 0) const { assert((flags & 0x1) == 0 && "Passing in 'true' to addReg is forbidden! Use enums instead."); @@ -104,14 +104,14 @@ public: } /// Add a virtual register definition operand. - const MachineInstrBuilder &addDef(unsigned RegNo, unsigned Flags = 0, + const MachineInstrBuilder &addDef(Register RegNo, unsigned Flags = 0, unsigned SubReg = 0) const { return addReg(RegNo, Flags | RegState::Define, SubReg); } /// Add a virtual register use operand. It is an error for Flags to contain /// `RegState::Define` when calling this function. - const MachineInstrBuilder &addUse(unsigned RegNo, unsigned Flags = 0, + const MachineInstrBuilder &addUse(Register RegNo, unsigned Flags = 0, unsigned SubReg = 0) const { assert(!(Flags & RegState::Define) && "Misleading addUse defines register, use addReg instead."); @@ -135,7 +135,7 @@ public: } const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateMBB(MBB, TargetFlags)); return *this; } @@ -145,42 +145,42 @@ public: return *this; } - const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx, - int Offset = 0, - unsigned char TargetFlags = 0) const { + const MachineInstrBuilder & + addConstantPoolIndex(unsigned Idx, int Offset = 0, + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateCPI(Idx, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addTargetIndex(unsigned Idx, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateTargetIndex(Idx, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addJumpTableIndex(unsigned Idx, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateJTI(Idx, TargetFlags)); return *this; } const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateGA(GV, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addExternalSymbol(const char *FnName, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateES(FnName, TargetFlags)); return *this; } const MachineInstrBuilder &addBlockAddress(const BlockAddress *BA, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateBA(BA, Offset, TargetFlags)); return *this; } @@ -250,6 +250,11 @@ public: return *this; } + const MachineInstrBuilder &addShuffleMask(const Constant *Val) const { + MI->addOperand(*MF, MachineOperand::CreateShuffleMask(Val)); + return *this; + } + const MachineInstrBuilder &addSym(MCSymbol *Sym, unsigned char TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym, TargetFlags)); @@ -316,7 +321,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, /// This version of the builder sets up the first operand as a /// destination virtual register. inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, - const MCInstrDesc &MCID, unsigned DestReg) { + const MCInstrDesc &MCID, Register DestReg) { return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL)) .addReg(DestReg, RegState::Define); } @@ -327,7 +332,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); @@ -343,7 +348,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); @@ -352,7 +357,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { // Calling the overload for instr_iterator is always correct. However, the // definition is not available in headers, so inline the check. if (I.isInsideBundle()) @@ -362,7 +367,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { return BuildMI(BB, *I, DL, MCID, DestReg); } @@ -416,7 +421,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, /// end of the given MachineBasicBlock, and sets up the first operand as a /// destination virtual register. inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, - const MCInstrDesc &MCID, unsigned DestReg) { + const MCInstrDesc &MCID, Register DestReg) { return BuildMI(*BB, BB->end(), DL, MCID, DestReg); } @@ -426,7 +431,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, /// second operand is an immediate. MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr); /// This version of the builder builds a DBG_VALUE intrinsic @@ -442,7 +447,7 @@ MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr); /// This version of the builder builds a DBG_VALUE intrinsic @@ -490,16 +495,13 @@ inline unsigned getRenamableRegState(bool B) { /// Get all register state flags from machine operand \p RegOp. inline unsigned getRegState(const MachineOperand &RegOp) { assert(RegOp.isReg() && "Not a register operand"); - return getDefRegState(RegOp.isDef()) | - getImplRegState(RegOp.isImplicit()) | - getKillRegState(RegOp.isKill()) | - getDeadRegState(RegOp.isDead()) | - getUndefRegState(RegOp.isUndef()) | - getInternalReadRegState(RegOp.isInternalRead()) | - getDebugRegState(RegOp.isDebug()) | - getRenamableRegState( - TargetRegisterInfo::isPhysicalRegister(RegOp.getReg()) && - RegOp.isRenamable()); + return getDefRegState(RegOp.isDef()) | getImplRegState(RegOp.isImplicit()) | + getKillRegState(RegOp.isKill()) | getDeadRegState(RegOp.isDead()) | + getUndefRegState(RegOp.isUndef()) | + getInternalReadRegState(RegOp.isInternalRead()) | + getDebugRegState(RegOp.isDebug()) | + getRenamableRegState(Register::isPhysicalRegister(RegOp.getReg()) && + RegOp.isRenamable()); } /// Helper class for constructing bundles of MachineInstrs. diff --git a/include/llvm/CodeGen/MachineLoopUtils.h b/include/llvm/CodeGen/MachineLoopUtils.h new file mode 100644 index 000000000000..41379b75d00a --- /dev/null +++ b/include/llvm/CodeGen/MachineLoopUtils.h @@ -0,0 +1,41 @@ +//=- MachineLoopUtils.h - Helper functions for manipulating loops -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H +#define LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H + +namespace llvm { +class MachineBasicBlock; +class MachineRegisterInfo; +class TargetInstrInfo; + +enum LoopPeelDirection { + LPD_Front, ///< Peel the first iteration of the loop. + LPD_Back ///< Peel the last iteration of the loop. +}; + +/// Peels a single block loop. Loop must have two successors, one of which +/// must be itself. Similarly it must have two predecessors, one of which must +/// be itself. +/// +/// The loop block is copied and inserted into the CFG such that two copies of +/// the loop follow on from each other. The copy is inserted either before or +/// after the loop based on Direction. +/// +/// Phis are updated and an unconditional branch inserted at the end of the +/// clone so as to execute a single iteration. +/// +/// The trip count of Loop is not updated. +MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction, + MachineBasicBlock *Loop, + MachineRegisterInfo &MRI, + const TargetInstrInfo *TII); + +} // namespace llvm + +#endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index 65f706302bc2..33a48a235e18 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -293,8 +293,6 @@ public: /// Support for operator<<. /// @{ - void print(raw_ostream &OS) const; - void print(raw_ostream &OS, ModuleSlotTracker &MST) const; void print(raw_ostream &OS, ModuleSlotTracker &MST, SmallVectorImpl &SSNs, const LLVMContext &Context, const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const; @@ -319,11 +317,6 @@ public: } }; -inline raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO) { - MRO.print(OS); - return OS; -} - } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 4ff5c7fd013a..6902dada2423 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -33,6 +33,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/IR/PassManager.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" @@ -74,7 +75,10 @@ protected: /// made by different debugging and exception handling schemes and reformated /// for specific use. /// -class MachineModuleInfo : public ImmutablePass { +class MachineModuleInfo { + friend class MachineModuleInfoWrapperPass; + friend class MachineModuleAnalysis; + const LLVMTargetMachine &TM; /// This is the MCContext used for the entire code generator. @@ -140,15 +144,17 @@ class MachineModuleInfo : public ImmutablePass { const Function *LastRequest = nullptr; ///< Used for shortcut/cache. MachineFunction *LastResult = nullptr; ///< Used for shortcut/cache. -public: - static char ID; // Pass identification, replacement for typeid + MachineModuleInfo &operator=(MachineModuleInfo &&MMII) = delete; +public: explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr); - ~MachineModuleInfo() override; - // Initialization and Finalization - bool doInitialization(Module &) override; - bool doFinalization(Module &) override; + MachineModuleInfo(MachineModuleInfo &&MMII); + + ~MachineModuleInfo(); + + void initialize(); + void finalize(); const LLVMTargetMachine &getTarget() const { return TM; } @@ -254,6 +260,38 @@ public: /// \} }; // End class MachineModuleInfo +class MachineModuleInfoWrapperPass : public ImmutablePass { + MachineModuleInfo MMI; + +public: + static char ID; // Pass identification, replacement for typeid + explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM = nullptr); + + // Initialization and Finalization + bool doInitialization(Module &) override; + bool doFinalization(Module &) override; + + MachineModuleInfo &getMMI() { return MMI; } + const MachineModuleInfo &getMMI() const { return MMI; } +}; + +/// An analysis that produces \c MachineInfo for a module. +class MachineModuleAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + + const LLVMTargetMachine *TM; + +public: + /// Provide the result type for this analysis pass. + using Result = MachineModuleInfo; + + MachineModuleAnalysis(const LLVMTargetMachine *TM) : TM(TM) {} + + /// Run the analysis pass and produce machine module information. + MachineModuleInfo run(Module &M, ModuleAnalysisManager &); +}; + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINEMODULEINFO_H diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 2152c7582e5a..df914dc2d85e 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -23,6 +23,7 @@ namespace llvm { class BlockAddress; +class Constant; class ConstantFP; class ConstantInt; class GlobalValue; @@ -68,7 +69,8 @@ public: MO_CFIIndex, ///< MCCFIInstruction index. MO_IntrinsicID, ///< Intrinsic ID for ISel MO_Predicate, ///< Generic predicate for ISel - MO_Last = MO_Predicate, + MO_ShuffleMask, ///< Other IR Constant for ISel (shuffle masks) + MO_Last = MO_ShuffleMask }; private: @@ -172,6 +174,7 @@ private: unsigned CFIIndex; // For MO_CFI. Intrinsic::ID IntrinsicID; // For MO_IntrinsicID. unsigned Pred; // For MO_Predicate + const Constant *ShuffleMask; // For MO_ShuffleMask struct { // For MO_Register. // Register number is in SmallContents.RegNo. @@ -341,6 +344,7 @@ public: bool isCFIIndex() const { return OpKind == MO_CFIIndex; } bool isIntrinsicID() const { return OpKind == MO_IntrinsicID; } bool isPredicate() const { return OpKind == MO_Predicate; } + bool isShuffleMask() const { return OpKind == MO_ShuffleMask; } //===--------------------------------------------------------------------===// // Accessors for Register Operands //===--------------------------------------------------------------------===// @@ -455,7 +459,7 @@ public: /// Change the register this operand corresponds to. /// - void setReg(unsigned Reg); + void setReg(Register Reg); void setSubReg(unsigned subReg) { assert(isReg() && "Wrong MachineOperand mutator"); @@ -468,13 +472,13 @@ public: /// using TargetRegisterInfo to compose the subreg indices if necessary. /// Reg must be a virtual register, SubIdx can be 0. /// - void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo&); + void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo&); /// substPhysReg - Substitute the current register with the physical register /// Reg, taking any existing SubReg into account. For instance, /// substPhysReg(%eax) will change %reg1024:sub_8bit to %al. /// - void substPhysReg(unsigned Reg, const TargetRegisterInfo&); + void substPhysReg(MCRegister Reg, const TargetRegisterInfo&); void setIsUse(bool Val = true) { setIsDef(!Val); } @@ -579,6 +583,11 @@ public: return Contents.Pred; } + const Constant *getShuffleMask() const { + assert(isShuffleMask() && "Wrong MachineOperand accessor"); + return Contents.ShuffleMask; + } + /// Return the offset from the symbol in this operand. This always returns 0 /// for ExternalSymbol operands. int64_t getOffset() const { @@ -717,11 +726,11 @@ public: void ChangeToFPImmediate(const ConstantFP *FPImm); /// ChangeToES - Replace this operand with a new external symbol operand. - void ChangeToES(const char *SymName, unsigned char TargetFlags = 0); + void ChangeToES(const char *SymName, unsigned TargetFlags = 0); /// ChangeToGA - Replace this operand with a new global address operand. void ChangeToGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand. void ChangeToMCSymbol(MCSymbol *Sym); @@ -731,12 +740,12 @@ public: /// Replace this operand with a target index. void ChangeToTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. - void ChangeToRegister(unsigned Reg, bool isDef, bool isImp = false, + void ChangeToRegister(Register Reg, bool isDef, bool isImp = false, bool isKill = false, bool isDead = false, bool isUndef = false, bool isDebug = false); @@ -762,7 +771,7 @@ public: return Op; } - static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp = false, + static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp = false, bool isKill = false, bool isDead = false, bool isUndef = false, bool isEarlyClobber = false, @@ -788,7 +797,7 @@ public: return Op; } static MachineOperand CreateMBB(MachineBasicBlock *MBB, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_MachineBasicBlock); Op.setMBB(MBB); Op.setTargetFlags(TargetFlags); @@ -800,7 +809,7 @@ public: return Op; } static MachineOperand CreateCPI(unsigned Idx, int Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_ConstantPoolIndex); Op.setIndex(Idx); Op.setOffset(Offset); @@ -808,21 +817,21 @@ public: return Op; } static MachineOperand CreateTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_TargetIndex); Op.setIndex(Idx); Op.setOffset(Offset); Op.setTargetFlags(TargetFlags); return Op; } - static MachineOperand CreateJTI(unsigned Idx, unsigned char TargetFlags = 0) { + static MachineOperand CreateJTI(unsigned Idx, unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_JumpTableIndex); Op.setIndex(Idx); Op.setTargetFlags(TargetFlags); return Op; } static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_GlobalAddress); Op.Contents.OffsetedInfo.Val.GV = GV; Op.setOffset(Offset); @@ -830,7 +839,7 @@ public: return Op; } static MachineOperand CreateES(const char *SymName, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_ExternalSymbol); Op.Contents.OffsetedInfo.Val.SymbolName = SymName; Op.setOffset(0); // Offset is always 0. @@ -838,7 +847,7 @@ public: return Op; } static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_BlockAddress); Op.Contents.OffsetedInfo.Val.BA = BA; Op.setOffset(Offset); @@ -876,7 +885,7 @@ public: } static MachineOperand CreateMCSymbol(MCSymbol *Sym, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_MCSymbol); Op.Contents.Sym = Sym; Op.setOffset(0); @@ -902,6 +911,12 @@ public: return Op; } + static MachineOperand CreateShuffleMask(const Constant *C) { + MachineOperand Op(MachineOperand::MO_ShuffleMask); + Op.Contents.ShuffleMask = C; + return Op; + } + friend class MachineInstr; friend class MachineRegisterInfo; diff --git a/include/llvm/CodeGen/MachinePipeliner.h b/include/llvm/CodeGen/MachinePipeliner.h index 03ca53072685..e9cf7e115bff 100644 --- a/include/llvm/CodeGen/MachinePipeliner.h +++ b/include/llvm/CodeGen/MachinePipeliner.h @@ -40,6 +40,8 @@ #ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H #define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H +#include "llvm/Analysis/AliasAnalysis.h" + #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" @@ -148,7 +150,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { /// We may create a new instruction, so remember it because it /// must be deleted when the pass is finished. - SmallPtrSet NewMIs; + DenseMap NewMIs; /// Ordered list of DAG postprocessing steps. std::vector> Mutations; @@ -200,7 +202,7 @@ public: RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) { P.MF->getSubtarget().getSMSMutations(Mutations); if (SwpEnableCopyToPhi) - Mutations.push_back(llvm::make_unique()); + Mutations.push_back(std::make_unique()); } void schedule() override; @@ -297,53 +299,8 @@ private: void computeNodeOrder(NodeSetType &NodeSets); void checkValidNodeOrder(const NodeSetType &Circuits) const; bool schedulePipeline(SMSchedule &Schedule); - void generatePipelinedLoop(SMSchedule &Schedule); - void generateProlog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, - MBBVectorTy &PrologBBs); - void generateEpilog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); - void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, - MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, - unsigned CurStageNum, bool IsLast); - void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, - MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, - unsigned CurStageNum, bool IsLast); - void removeDeadInstructions(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs); - void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, - SMSchedule &Schedule); - void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs, - MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, - SMSchedule &Schedule, ValueMapTy *VRMap); bool computeDelta(MachineInstr &MI, unsigned &Delta); - void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, - unsigned Num); - MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, - unsigned InstStageNum); - MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, - unsigned InstStageNum, - SMSchedule &Schedule); - void updateInstruction(MachineInstr *NewMI, bool LastDef, - unsigned CurStageNum, unsigned InstrStageNum, - SMSchedule &Schedule, ValueMapTy *VRMap); MachineInstr *findDefInLoop(unsigned Reg); - unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, - unsigned LoopStage, ValueMapTy *VRMap, - MachineBasicBlock *BB); - void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap); - void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule, - InstrMapTy &InstrMap, unsigned CurStageNum, - unsigned PhiNum, MachineInstr *Phi, - unsigned OldReg, unsigned NewReg, - unsigned PrevReg = 0); bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, unsigned &OffsetPos, unsigned &NewBase, int64_t &NewOffset); @@ -529,12 +486,6 @@ private: /// Map from instruction to execution cycle. std::map InstrToCycle; - /// Map for each register and the max difference between its uses and def. - /// The first element in the pair is the max difference in stages. The - /// second is true if the register defines a Phi value and loop value is - /// scheduled before the Phi. - std::map> RegToStageDiff; - /// Keep track of the first cycle value in the schedule. It starts /// as zero, but the algorithm allows negative values. int FirstCycle = 0; @@ -560,7 +511,6 @@ public: void reset() { ScheduledInstrs.clear(); InstrToCycle.clear(); - RegToStageDiff.clear(); FirstCycle = 0; LastCycle = 0; InitiationInterval = 0; @@ -620,28 +570,6 @@ public: return (LastCycle - FirstCycle) / InitiationInterval; } - /// Return the max. number of stages/iterations that can occur between a - /// register definition and its uses. - unsigned getStagesForReg(int Reg, unsigned CurStage) { - std::pair Stages = RegToStageDiff[Reg]; - if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second) - return 1; - return Stages.first; - } - - /// The number of stages for a Phi is a little different than other - /// instructions. The minimum value computed in RegToStageDiff is 1 - /// because we assume the Phi is needed for at least 1 iteration. - /// This is not the case if the loop value is scheduled prior to the - /// Phi in the same stage. This function returns the number of stages - /// or iterations needed between the Phi definition and any uses. - unsigned getStagesForPhi(int Reg) { - std::pair Stages = RegToStageDiff[Reg]; - if (Stages.second) - return Stages.first; - return Stages.first - 1; - } - /// Return the instructions that are scheduled at the specified cycle. std::deque &getInstructions(int cycle) { return ScheduledInstrs[cycle]; diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h index b67e6b52ac8f..cb258b5e7b21 100644 --- a/include/llvm/CodeGen/MachinePostDominators.h +++ b/include/llvm/CodeGen/MachinePostDominators.h @@ -16,68 +16,76 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include namespace llvm { /// -/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used -/// to compute the post-dominator tree. +/// MachinePostDominatorTree - an analysis pass wrapper for DominatorTree +/// used to compute the post-dominator tree for MachineFunctions. /// -struct MachinePostDominatorTree : public MachineFunctionPass { -private: - PostDomTreeBase *DT; +class MachinePostDominatorTree : public MachineFunctionPass { + using PostDomTreeT = PostDomTreeBase; + std::unique_ptr PDT; public: static char ID; MachinePostDominatorTree(); - ~MachinePostDominatorTree() override; - FunctionPass *createMachinePostDominatorTreePass(); const SmallVectorImpl &getRoots() const { - return DT->getRoots(); + return PDT->getRoots(); } - MachineDomTreeNode *getRootNode() const { - return DT->getRootNode(); - } + MachineDomTreeNode *getRootNode() const { return PDT->getRootNode(); } MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { - return DT->getNode(BB); + return PDT->getNode(BB); } MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { - return DT->getNode(BB); + return PDT->getNode(BB); } bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const { - return DT->dominates(A, B); + return PDT->dominates(A, B); } bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { - return DT->dominates(A, B); + return PDT->dominates(A, B); } bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const { - return DT->properlyDominates(A, B); + return PDT->properlyDominates(A, B); } bool properlyDominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { - return DT->properlyDominates(A, B); + return PDT->properlyDominates(A, B); + } + + bool isVirtualRoot(const MachineDomTreeNode *Node) const { + return PDT->isVirtualRoot(Node); } MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, - MachineBasicBlock *B) { - return DT->findNearestCommonDominator(A, B); + MachineBasicBlock *B) const { + return PDT->findNearestCommonDominator(A, B); } + /// Returns the nearest common dominator of the given blocks. + /// If that tree node is a virtual root, a nullptr will be returned. + MachineBasicBlock * + findNearestCommonDominator(ArrayRef Blocks) const; + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override { PDT.reset(nullptr); } + void verifyAnalysis() const override; void print(llvm::raw_ostream &OS, const Module *M = nullptr) const override; }; } //end of namespace llvm diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h index 6d9fb9b9100a..eeb69fef2c6b 100644 --- a/include/llvm/CodeGen/MachineRegionInfo.h +++ b/include/llvm/CodeGen/MachineRegionInfo.h @@ -22,7 +22,7 @@ namespace llvm { -struct MachinePostDominatorTree; +class MachinePostDominatorTree; class MachineRegion; class MachineRegionNode; class MachineRegionInfo; diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index b5deed1f5010..488a5a55a169 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -107,16 +107,16 @@ private: /// getRegUseDefListHead - Return the head pointer for the register use/def /// list for the specified virtual or physical register. - MachineOperand *&getRegUseDefListHead(unsigned RegNo) { - if (TargetRegisterInfo::isVirtualRegister(RegNo)) - return VRegInfo[RegNo].second; - return PhysRegUseDefLists[RegNo]; + MachineOperand *&getRegUseDefListHead(Register RegNo) { + if (RegNo.isVirtual()) + return VRegInfo[RegNo.id()].second; + return PhysRegUseDefLists[RegNo.id()]; } - MachineOperand *getRegUseDefListHead(unsigned RegNo) const { - if (TargetRegisterInfo::isVirtualRegister(RegNo)) - return VRegInfo[RegNo].second; - return PhysRegUseDefLists[RegNo]; + MachineOperand *getRegUseDefListHead(Register RegNo) const { + if (RegNo.isVirtual()) + return VRegInfo[RegNo.id()].second; + return PhysRegUseDefLists[RegNo.id()]; } /// Get the next element in the use-def chain. @@ -214,8 +214,8 @@ public: bool shouldTrackSubRegLiveness(const TargetRegisterClass &RC) const { return subRegLivenessEnabled() && RC.HasDisjunctSubRegs; } - bool shouldTrackSubRegLiveness(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Must pass a VReg"); + bool shouldTrackSubRegLiveness(Register VReg) const { + assert(VReg.isVirtual() && "Must pass a VReg"); return shouldTrackSubRegLiveness(*getRegClass(VReg)); } bool subRegLivenessEnabled() const { @@ -326,7 +326,7 @@ public: /// of the specified register, skipping those marked as Debug. using reg_nodbg_iterator = defusechain_iterator; - reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const { + reg_nodbg_iterator reg_nodbg_begin(Register RegNo) const { return reg_nodbg_iterator(getRegUseDefListHead(RegNo)); } static reg_nodbg_iterator reg_nodbg_end() { @@ -374,7 +374,7 @@ public: /// reg_nodbg_empty - Return true if the only instructions using or defining /// Reg are Debug instructions. - bool reg_nodbg_empty(unsigned RegNo) const { + bool reg_nodbg_empty(Register RegNo) const { return reg_nodbg_begin(RegNo) == reg_nodbg_end(); } @@ -628,10 +628,10 @@ public: /// Return the register class of the specified virtual register. /// This shouldn't be used directly unless \p Reg has a register class. /// \see getRegClassOrNull when this might happen. - const TargetRegisterClass *getRegClass(unsigned Reg) const { - assert(VRegInfo[Reg].first.is() && + const TargetRegisterClass *getRegClass(Register Reg) const { + assert(VRegInfo[Reg.id()].first.is() && "Register class not set, wrong accessor"); - return VRegInfo[Reg].first.get(); + return VRegInfo[Reg.id()].first.get(); } /// Return the register class of \p Reg, or null if Reg has not been assigned @@ -727,7 +727,7 @@ public: /// Get the low-level type of \p Reg or LLT{} if Reg is not a generic /// (target independent) virtual register. LLT getType(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg) && VRegToType.inBounds(Reg)) + if (Register::isVirtualRegister(Reg) && VRegToType.inBounds(Reg)) return VRegToType[Reg]; return LLT{}; } @@ -760,7 +760,7 @@ public: /// specified virtual register. This is typically used by target, and in case /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; RegAllocHints[VReg].second.clear(); RegAllocHints[VReg].second.push_back(PrefReg); @@ -769,7 +769,7 @@ public: /// addRegAllocationHint - Add a register allocation hint to the hints /// vector for VReg. void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); RegAllocHints[VReg].second.push_back(PrefReg); } @@ -789,17 +789,18 @@ public: /// specified virtual register. If there are many hints, this returns the /// one with the greatest weight. std::pair - getRegAllocationHint(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); - unsigned BestHint = (RegAllocHints[VReg].second.size() ? - RegAllocHints[VReg].second[0] : 0); - return std::pair(RegAllocHints[VReg].first, BestHint); + getRegAllocationHint(Register VReg) const { + assert(VReg.isVirtual()); + unsigned BestHint = (RegAllocHints[VReg.id()].second.size() ? + RegAllocHints[VReg.id()].second[0] : 0); + return std::pair(RegAllocHints[VReg.id()].first, + BestHint); } /// getSimpleHint - same as getRegAllocationHint except it will only return /// a target independent hint. - unsigned getSimpleHint(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + Register getSimpleHint(Register VReg) const { + assert(VReg.isVirtual()); std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } @@ -808,7 +809,7 @@ public: /// register allocation hints for VReg. const std::pair> &getRegAllocationHints(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); return RegAllocHints[VReg]; } @@ -817,6 +818,17 @@ public: /// deleted during LiveDebugVariables analysis. void markUsesInDebugValueAsUndef(unsigned Reg) const; + /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions + /// to refer to the designated register. + void updateDbgUsersToReg(unsigned Reg, + ArrayRef Users) const { + for (MachineInstr *MI : Users) { + assert(MI->isDebugInstr()); + assert(MI->getOperand(0).isReg()); + MI->getOperand(0).setReg(Reg); + } + } + /// Return true if the specified register is modified in this function. /// This checks that no defining machine operands exist for the register or /// any of its aliases. Definitions found on functions marked noreturn are @@ -882,8 +894,8 @@ public: /// /// Reserved registers may belong to an allocatable register class, but the /// target has explicitly requested that they are not used. - bool isReserved(unsigned PhysReg) const { - return getReservedRegs().test(PhysReg); + bool isReserved(Register PhysReg) const { + return getReservedRegs().test(PhysReg.id()); } /// Returns true when the given register unit is considered reserved. @@ -1164,7 +1176,7 @@ public: PSetIterator(unsigned RegUnit, const MachineRegisterInfo *MRI) { const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); - if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { + if (Register::isVirtualRegister(RegUnit)) { const TargetRegisterClass *RC = MRI->getRegClass(RegUnit); PSet = TRI->getRegClassPressureSets(RC); Weight = TRI->getRegClassWeight(RC).RegWeight; diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 75a334f61ad0..333367943ac0 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -100,6 +100,7 @@ namespace llvm { extern cl::opt ForceTopDown; extern cl::opt ForceBottomUp; +extern cl::opt VerifyScheduling; class LiveIntervals; class MachineDominatorTree; diff --git a/include/llvm/CodeGen/ModuloSchedule.h b/include/llvm/CodeGen/ModuloSchedule.h new file mode 100644 index 000000000000..81a9b63b64ca --- /dev/null +++ b/include/llvm/CodeGen/ModuloSchedule.h @@ -0,0 +1,367 @@ +//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Software pipelining (SWP) is an instruction scheduling technique for loops +// that overlaps loop iterations and exploits ILP via compiler transformations. +// +// There are multiple methods for analyzing a loop and creating a schedule. +// An example algorithm is Swing Modulo Scheduling (implemented by the +// MachinePipeliner). The details of how a schedule is arrived at are irrelevant +// for the task of actually rewriting a loop to adhere to the schedule, which +// is what this file does. +// +// A schedule is, for every instruction in a block, a Cycle and a Stage. Note +// that we only support single-block loops, so "block" and "loop" can be used +// interchangably. +// +// The Cycle of an instruction defines a partial order of the instructions in +// the remapped loop. Instructions within a cycle must not consume the output +// of any instruction in the same cycle. Cycle information is assumed to have +// been calculated such that the processor will execute instructions in +// lock-step (for example in a VLIW ISA). +// +// The Stage of an instruction defines the mapping between logical loop +// iterations and pipelined loop iterations. An example (unrolled) pipeline +// may look something like: +// +// I0[0] Execute instruction I0 of iteration 0 +// I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1 +// I1[1], I0[2] +// I1[2], I0[3] +// +// In the schedule for this unrolled sequence we would say that I0 was scheduled +// in stage 0 and I1 in stage 1: +// +// loop: +// [stage 0] x = I0 +// [stage 1] I1 x (from stage 0) +// +// And to actually generate valid code we must insert a phi: +// +// loop: +// x' = phi(x) +// x = I0 +// I1 x' +// +// This is a simple example; the rules for how to generate correct code given +// an arbitrary schedule containing loop-carried values are complex. +// +// Note that these examples only mention the steady-state kernel of the +// generated loop; prologs and epilogs must be generated also that prime and +// flush the pipeline. Doing so is nontrivial. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H +#define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include +#include + +namespace llvm { +class MachineBasicBlock; +class MachineInstr; +class LiveIntervals; + +/// Represents a schedule for a single-block loop. For every instruction we +/// maintain a Cycle and Stage. +class ModuloSchedule { +private: + /// The block containing the loop instructions. + MachineLoop *Loop; + + /// The instructions to be generated, in total order. Cycle provides a partial + /// order; the total order within cycles has been decided by the schedule + /// producer. + std::vector ScheduledInstrs; + + /// The cycle for each instruction. + DenseMap Cycle; + + /// The stage for each instruction. + DenseMap Stage; + + /// The number of stages in this schedule (Max(Stage) + 1). + int NumStages; + +public: + /// Create a new ModuloSchedule. + /// \arg ScheduledInstrs The new loop instructions, in total resequenced + /// order. + /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does + /// not need to start at zero. ScheduledInstrs must be partially ordered by + /// Cycle. + /// \arg Stage Stage index for all instructions in ScheduleInstrs. + ModuloSchedule(MachineFunction &MF, MachineLoop *Loop, + std::vector ScheduledInstrs, + DenseMap Cycle, + DenseMap Stage) + : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)), + Stage(std::move(Stage)) { + NumStages = 0; + for (auto &KV : this->Stage) + NumStages = std::max(NumStages, KV.second); + ++NumStages; + } + + /// Return the single-block loop being scheduled. + MachineLoop *getLoop() const { return Loop; } + + /// Return the number of stages contained in this schedule, which is the + /// largest stage index + 1. + int getNumStages() const { return NumStages; } + + /// Return the first cycle in the schedule, which is the cycle index of the + /// first instruction. + int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; } + + /// Return the final cycle in the schedule, which is the cycle index of the + /// last instruction. + int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; } + + /// Return the stage that MI is scheduled in, or -1. + int getStage(MachineInstr *MI) { + auto I = Stage.find(MI); + return I == Stage.end() ? -1 : I->second; + } + + /// Return the cycle that MI is scheduled at, or -1. + int getCycle(MachineInstr *MI) { + auto I = Cycle.find(MI); + return I == Cycle.end() ? -1 : I->second; + } + + /// Return the rescheduled instructions in order. + ArrayRef getInstructions() { return ScheduledInstrs; } + + void dump() { print(dbgs()); } + void print(raw_ostream &OS); +}; + +/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, +/// rewriting the old loop and inserting prologs and epilogs as required. +class ModuloScheduleExpander { +public: + using InstrChangesTy = DenseMap>; + +private: + using ValueMapTy = DenseMap; + using MBBVectorTy = SmallVectorImpl; + using InstrMapTy = DenseMap; + + ModuloSchedule &Schedule; + MachineFunction &MF; + const TargetSubtargetInfo &ST; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals &LIS; + + MachineBasicBlock *BB; + MachineBasicBlock *Preheader; + MachineBasicBlock *NewKernel = nullptr; + std::unique_ptr LoopInfo; + + /// Map for each register and the max difference between its uses and def. + /// The first element in the pair is the max difference in stages. The + /// second is true if the register defines a Phi value and loop value is + /// scheduled before the Phi. + std::map> RegToStageDiff; + + /// Instructions to change when emitting the final schedule. + InstrChangesTy InstrChanges; + + void generatePipelinedLoop(); + void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, MBBVectorTy &PrologBBs); + void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs); + void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, + MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, + bool IsLast); + void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, + MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast); + void removeDeadInstructions(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs); + void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs); + void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs, + MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, + ValueMapTy *VRMap); + bool computeDelta(MachineInstr &MI, unsigned &Delta); + void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, + unsigned Num); + MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, + unsigned InstStageNum); + MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, + unsigned InstStageNum); + void updateInstruction(MachineInstr *NewMI, bool LastDef, + unsigned CurStageNum, unsigned InstrStageNum, + ValueMapTy *VRMap); + MachineInstr *findDefInLoop(unsigned Reg); + unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, + unsigned LoopStage, ValueMapTy *VRMap, + MachineBasicBlock *BB); + void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, + ValueMapTy *VRMap, InstrMapTy &InstrMap); + void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap, + unsigned CurStageNum, unsigned PhiNum, + MachineInstr *Phi, unsigned OldReg, + unsigned NewReg, unsigned PrevReg = 0); + bool isLoopCarried(MachineInstr &Phi); + + /// Return the max. number of stages/iterations that can occur between a + /// register definition and its uses. + unsigned getStagesForReg(int Reg, unsigned CurStage) { + std::pair Stages = RegToStageDiff[Reg]; + if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 && + Stages.second) + return 1; + return Stages.first; + } + + /// The number of stages for a Phi is a little different than other + /// instructions. The minimum value computed in RegToStageDiff is 1 + /// because we assume the Phi is needed for at least 1 iteration. + /// This is not the case if the loop value is scheduled prior to the + /// Phi in the same stage. This function returns the number of stages + /// or iterations needed between the Phi definition and any uses. + unsigned getStagesForPhi(int Reg) { + std::pair Stages = RegToStageDiff[Reg]; + if (Stages.second) + return Stages.first; + return Stages.first - 1; + } + +public: + /// Create a new ModuloScheduleExpander. + /// \arg InstrChanges Modifications to make to instructions with memory + /// operands. + /// FIXME: InstrChanges is opaque and is an implementation detail of an + /// optimization in MachinePipeliner that crosses abstraction boundaries. + ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S, + LiveIntervals &LIS, InstrChangesTy InstrChanges) + : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()), + TII(ST.getInstrInfo()), LIS(LIS), + InstrChanges(std::move(InstrChanges)) {} + + /// Performs the actual expansion. + void expand(); + /// Performs final cleanup after expansion. + void cleanup(); + + /// Returns the newly rewritten kernel block, or nullptr if this was + /// optimized away. + MachineBasicBlock *getRewrittenKernel() { return NewKernel; } +}; + +/// A reimplementation of ModuloScheduleExpander. It works by generating a +/// standalone kernel loop and peeling out the prologs and epilogs. +class PeelingModuloScheduleExpander { + ModuloSchedule &Schedule; + MachineFunction &MF; + const TargetSubtargetInfo &ST; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals *LIS; + + /// The original loop block that gets rewritten in-place. + MachineBasicBlock *BB; + /// The original loop preheader. + MachineBasicBlock *Preheader; + /// All prolog and epilog blocks. + SmallVector Prologs, Epilogs; + /// For every block, the stages that are produced. + DenseMap LiveStages; + /// For every block, the stages that are available. A stage can be available + /// but not produced (in the epilog) or produced but not available (in the + /// prolog). + DenseMap AvailableStages; + + /// CanonicalMIs and BlockMIs form a bidirectional map between any of the + /// loop kernel clones. + DenseMap CanonicalMIs; + DenseMap, MachineInstr *> + BlockMIs; + + /// State passed from peelKernel to peelPrologAndEpilogs(). + std::deque PeeledFront, PeeledBack; + +public: + PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S, + LiveIntervals *LIS) + : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()), + TII(ST.getInstrInfo()), LIS(LIS) {} + + void expand(); + + /// Runs ModuloScheduleExpander and treats it as a golden input to validate + /// aspects of the code generated by PeelingModuloScheduleExpander. + void validateAgainstModuloScheduleExpander(); + +protected: + /// Converts BB from the original loop body to the rewritten, pipelined + /// steady-state. + void rewriteKernel(); + +private: + /// Peels one iteration of the rewritten kernel (BB) in the specified + /// direction. + MachineBasicBlock *peelKernel(LoopPeelDirection LPD); + /// Peel the kernel forwards and backwards to produce prologs and epilogs, + /// and stitch them together. + void peelPrologAndEpilogs(); + /// All prolog and epilog blocks are clones of the kernel, so any produced + /// register in one block has an corollary in all other blocks. + Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB); + /// Change all users of MI, if MI is predicated out + /// (LiveStages[MI->getParent()] == false). + void rewriteUsesOf(MachineInstr *MI); + /// Insert branches between prologs, kernel and epilogs. + void fixupBranches(); + /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block + /// to a block dominated by all prologs and epilogs. This allows us to treat + /// the loop exiting block as any other kernel clone. + MachineBasicBlock *CreateLCSSAExitingBlock(); + /// Helper to get the stage of an instruction in the schedule. + unsigned getStage(MachineInstr *MI) { + if (CanonicalMIs.count(MI)) + MI = CanonicalMIs[MI]; + return Schedule.getStage(MI); + } +}; + +/// Expander that simply annotates each scheduled instruction with a post-instr +/// symbol that can be consumed by the ModuloScheduleTest pass. +/// +/// The post-instr symbol is a way of annotating an instruction that can be +/// roundtripped in MIR. The syntax is: +/// MYINST %0, post-instr-symbol +class ModuloScheduleTestAnnotater { + MachineFunction &MF; + ModuloSchedule &S; + +public: + ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S) + : MF(MF), S(S) {} + + /// Performs the annotation. + void annotate(); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h index 8b014ccbb07b..099ba788e9a2 100644 --- a/include/llvm/CodeGen/PBQP/Math.h +++ b/include/llvm/CodeGen/PBQP/Math.h @@ -28,17 +28,17 @@ class Vector { public: /// Construct a PBQP vector of the given size. explicit Vector(unsigned Length) - : Length(Length), Data(llvm::make_unique(Length)) {} + : Length(Length), Data(std::make_unique(Length)) {} /// Construct a PBQP vector with initializer. Vector(unsigned Length, PBQPNum InitVal) - : Length(Length), Data(llvm::make_unique(Length)) { + : Length(Length), Data(std::make_unique(Length)) { std::fill(Data.get(), Data.get() + Length, InitVal); } /// Copy construct a PBQP vector. Vector(const Vector &V) - : Length(V.Length), Data(llvm::make_unique(Length)) { + : Length(V.Length), Data(std::make_unique(Length)) { std::copy(V.Data.get(), V.Data.get() + Length, Data.get()); } @@ -125,21 +125,21 @@ private: public: /// Construct a PBQP Matrix with the given dimensions. Matrix(unsigned Rows, unsigned Cols) : - Rows(Rows), Cols(Cols), Data(llvm::make_unique(Rows * Cols)) { + Rows(Rows), Cols(Cols), Data(std::make_unique(Rows * Cols)) { } /// Construct a PBQP Matrix with the given dimensions and initial /// value. Matrix(unsigned Rows, unsigned Cols, PBQPNum InitVal) : Rows(Rows), Cols(Cols), - Data(llvm::make_unique(Rows * Cols)) { + Data(std::make_unique(Rows * Cols)) { std::fill(Data.get(), Data.get() + (Rows * Cols), InitVal); } /// Copy construct a PBQP matrix. Matrix(const Matrix &M) : Rows(M.Rows), Cols(M.Cols), - Data(llvm::make_unique(Rows * Cols)) { + Data(std::make_unique(Rows * Cols)) { std::copy(M.Data.get(), M.Data.get() + (Rows * Cols), Data.get()); } diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index d92ee93268e7..1e765ce51e4a 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -226,6 +226,10 @@ namespace llvm { /// inserting cmov instructions. extern char &EarlyIfConverterID; + /// EarlyIfPredicator - This pass performs if-conversion on SSA form by + /// predicating if/else block and insert select at the join point. + extern char &EarlyIfPredicatorID; + /// This pass performs instruction combining using trace metrics to estimate /// critical-path and resource depth. extern char &MachineCombinerID; diff --git a/include/llvm/CodeGen/Register.h b/include/llvm/CodeGen/Register.h index 907c1a99e56f..aa5173684e24 100644 --- a/include/llvm/CodeGen/Register.h +++ b/include/llvm/CodeGen/Register.h @@ -9,6 +9,7 @@ #ifndef LLVM_CODEGEN_REGISTER_H #define LLVM_CODEGEN_REGISTER_H +#include "llvm/MC/MCRegister.h" #include namespace llvm { @@ -20,41 +21,136 @@ class Register { public: Register(unsigned Val = 0): Reg(Val) {} + Register(MCRegister Val): Reg(Val) {} + + // Register numbers can represent physical registers, virtual registers, and + // sometimes stack slots. The unsigned values are divided into these ranges: + // + // 0 Not a register, can be used as a sentinel. + // [1;2^30) Physical registers assigned by TableGen. + // [2^30;2^31) Stack slots. (Rarely used.) + // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo. + // + // Further sentinels can be allocated from the small negative integers. + // DenseMapInfo uses -1u and -2u. + + /// isStackSlot - Sometimes it is useful the be able to store a non-negative + /// frame index in a variable that normally holds a register. isStackSlot() + /// returns true if Reg is in the range used for stack slots. + /// + /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack + /// slots, so if a variable may contains a stack slot, always check + /// isStackSlot() first. + /// + static bool isStackSlot(unsigned Reg) { + return MCRegister::isStackSlot(Reg); + } + + /// Compute the frame index from a register value representing a stack slot. + static int stackSlot2Index(unsigned Reg) { + assert(isStackSlot(Reg) && "Not a stack slot"); + return int(Reg - (1u << 30)); + } + + /// Convert a non-negative frame index to a stack slot register value. + static unsigned index2StackSlot(int FI) { + assert(FI >= 0 && "Cannot hold a negative frame index."); + return FI + (1u << 30); + } + + /// Return true if the specified register number is in + /// the physical register namespace. + static bool isPhysicalRegister(unsigned Reg) { + return MCRegister::isPhysicalRegister(Reg); + } + + /// Return true if the specified register number is in + /// the virtual register namespace. + static bool isVirtualRegister(unsigned Reg) { + assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); + return int(Reg) < 0; + } + + /// Convert a virtual register number to a 0-based index. + /// The first virtual register in a function will get the index 0. + static unsigned virtReg2Index(unsigned Reg) { + assert(isVirtualRegister(Reg) && "Not a virtual register"); + return Reg & ~(1u << 31); + } + + /// Convert a 0-based index to a virtual register number. + /// This is the inverse operation of VirtReg2IndexFunctor below. + static unsigned index2VirtReg(unsigned Index) { + return Index | (1u << 31); + } /// Return true if the specified register number is in the virtual register /// namespace. bool isVirtual() const { - return int(Reg) < 0; + return isVirtualRegister(Reg); } /// Return true if the specified register number is in the physical register /// namespace. bool isPhysical() const { - return int(Reg) > 0; + return isPhysicalRegister(Reg); } /// Convert a virtual register number to a 0-based index. The first virtual /// register in a function will get the index 0. unsigned virtRegIndex() const { - assert(isVirtual() && "Not a virtual register"); - return Reg & ~(1u << 31); - } - - /// Convert a 0-based index to a virtual register number. - /// This is the inverse operation of VirtReg2IndexFunctor below. - static Register index2VirtReg(unsigned Index) { - return Register(Index | (1u << 31)); + return virtReg2Index(Reg); } operator unsigned() const { return Reg; } + unsigned id() const { return Reg; } + + operator MCRegister() const { + return MCRegister(Reg); + } + bool isValid() const { return Reg != 0; } + + /// Comparisons between register objects + bool operator==(const Register &Other) const { return Reg == Other.Reg; } + bool operator!=(const Register &Other) const { return Reg != Other.Reg; } + bool operator==(const MCRegister &Other) const { return Reg == Other.id(); } + bool operator!=(const MCRegister &Other) const { return Reg != Other.id(); } + + /// Comparisons against register constants. E.g. + /// * R == AArch64::WZR + /// * R == 0 + /// * R == VirtRegMap::NO_PHYS_REG + bool operator==(unsigned Other) const { return Reg == Other; } + bool operator!=(unsigned Other) const { return Reg != Other; } + bool operator==(int Other) const { return Reg == unsigned(Other); } + bool operator!=(int Other) const { return Reg != unsigned(Other); } + // MSVC requires that we explicitly declare these two as well. + bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); } + bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); } +}; + +// Provide DenseMapInfo for Register +template<> struct DenseMapInfo { + static inline unsigned getEmptyKey() { + return DenseMapInfo::getEmptyKey(); + } + static inline unsigned getTombstoneKey() { + return DenseMapInfo::getTombstoneKey(); + } + static unsigned getHashValue(const Register &Val) { + return DenseMapInfo::getHashValue(Val.id()); + } + static bool isEqual(const Register &LHS, const Register &RHS) { + return DenseMapInfo::isEqual(LHS.id(), RHS.id()); + } }; } -#endif +#endif // ifndef LLVM_CODEGEN_REGISTER_H diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h index 14af5c4d090d..25b310c47621 100644 --- a/include/llvm/CodeGen/RegisterClassInfo.h +++ b/include/llvm/CodeGen/RegisterClassInfo.h @@ -110,7 +110,7 @@ public: /// getLastCalleeSavedAlias - Returns the last callee saved register that /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases. unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); if (PhysReg < CalleeSavedAliases.size()) return CalleeSavedAliases[PhysReg]; return 0; diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index 5bbaa03fd751..92333b859f1b 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -129,6 +129,8 @@ public: bool operator==(const PressureChange &RHS) const { return PSetID == RHS.PSetID && UnitInc == RHS.UnitInc; } + + void dump() const; }; /// List of PressureChanges in order of increasing, unique PSetID. @@ -248,6 +250,7 @@ struct RegPressureDelta { bool operator!=(const RegPressureDelta &RHS) const { return !operator==(RHS); } + void dump() const; }; /// A set of live virtual registers and physical register units. @@ -273,15 +276,15 @@ private: unsigned NumRegUnits; unsigned getSparseIndexFromReg(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return TargetRegisterInfo::virtReg2Index(Reg) + NumRegUnits; + if (Register::isVirtualRegister(Reg)) + return Register::virtReg2Index(Reg) + NumRegUnits; assert(Reg < NumRegUnits); return Reg; } unsigned getRegFromSparseIndex(unsigned SparseIndex) const { if (SparseIndex >= NumRegUnits) - return TargetRegisterInfo::index2VirtReg(SparseIndex-NumRegUnits); + return Register::index2VirtReg(SparseIndex-NumRegUnits); return SparseIndex; } diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 9c48df82f07d..5b5a80a67e7f 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -51,7 +51,7 @@ class RegScavenger { /// If non-zero, the specific register is currently being /// scavenged. That is, it is spilled to this scavenging stack slot. - unsigned Reg = 0; + Register Reg; /// The instruction that restores the scavenged register from stack. const MachineInstr *Restore = nullptr; @@ -119,14 +119,14 @@ public: MachineBasicBlock::iterator getCurrentPosition() const { return MBBI; } /// Return if a specific register is currently used. - bool isRegUsed(unsigned Reg, bool includeReserved = true) const; + bool isRegUsed(Register Reg, bool includeReserved = true) const; /// Return all available registers in the register class in Mask. BitVector getRegsAvailable(const TargetRegisterClass *RC); /// Find an unused register of the specified register class. /// Return 0 if none is found. - unsigned FindUnusedReg(const TargetRegisterClass *RC) const; + Register FindUnusedReg(const TargetRegisterClass *RC) const; /// Add a scavenging frame index. void addScavengingFrameIndex(int FI) { @@ -160,10 +160,10 @@ public: /// /// If \p AllowSpill is false, fail if a spill is required to make the /// register available, and return NoRegister. - unsigned scavengeRegister(const TargetRegisterClass *RC, + Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill = true); - unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj, + Register scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj, bool AllowSpill = true) { return scavengeRegister(RegClass, MBBI, SPAdj, AllowSpill); } @@ -177,17 +177,17 @@ public: /// /// If \p AllowSpill is false, fail if a spill is required to make the /// register available, and return NoRegister. - unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC, + Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill = true); /// Tell the scavenger a register is used. - void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); + void setRegUsed(Register Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); private: /// Returns true if a register is reserved. It is never "unused". - bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); } + bool isReserved(Register Reg) const { return MRI->isReserved(Reg); } /// setUsed / setUnused - Mark the state of one or a number of register units. /// @@ -203,16 +203,16 @@ private: void determineKillsAndDefs(); /// Add all Reg Units that Reg contains to BV. - void addRegUnits(BitVector &BV, unsigned Reg); + void addRegUnits(BitVector &BV, Register Reg); /// Remove all Reg Units that \p Reg contains from \p BV. - void removeRegUnits(BitVector &BV, unsigned Reg); + void removeRegUnits(BitVector &BV, Register Reg); /// Return the candidate register that is unused for the longest after /// StartMI. UseMI is set to the instruction where the search stopped. /// /// No more than InstrLimit instructions are inspected. - unsigned findSurvivorReg(MachineBasicBlock::iterator StartMI, + Register findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI); @@ -225,7 +225,7 @@ private: /// Spill a register after position \p After and reload it before position /// \p UseMI. - ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, MachineBasicBlock::iterator Before, MachineBasicBlock::iterator &UseMI); }; diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 3e3b604acbac..1eb9b9f322ba 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -34,6 +34,7 @@ namespace llvm { + class AAResults; class LiveIntervals; class MachineFrameInfo; class MachineFunction; @@ -57,7 +58,7 @@ namespace llvm { : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} unsigned getSparseSetIndex() const { - return TargetRegisterInfo::virtReg2Index(VirtReg); + return Register::virtReg2Index(VirtReg); } }; @@ -173,7 +174,7 @@ namespace llvm { /// Tracks the last instructions in this region using each virtual register. VReg2SUnitOperIdxMultiMap CurrentVRegUses; - AliasAnalysis *AAForDep = nullptr; + AAResults *AAForDep = nullptr; /// Remember a generic side-effecting instruction as we proceed. /// No other SU ever gets scheduled around it (except in the special @@ -201,7 +202,7 @@ namespace llvm { Value2SUsMap &loads, unsigned N); /// Adds a chain edge between SUa and SUb, but only if both - /// AliasAnalysis and Target fail to deny the dependency. + /// AAResults and Target fail to deny the dependency. void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency = 0); @@ -306,7 +307,7 @@ namespace llvm { /// If \p RPTracker is non-null, compute register pressure as a side effect. /// The DAG builder is an efficient place to do it because it already visits /// operands. - void buildSchedGraph(AliasAnalysis *AA, + void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker = nullptr, PressureDiffs *PDiffs = nullptr, LiveIntervals *LIS = nullptr, @@ -374,6 +375,9 @@ namespace llvm { /// Returns a mask for which lanes get read/written by the given (register) /// machine operand. LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; + + /// Returns true if the def register in \p MO has no uses. + bool deadDefHasNoUse(const MachineOperand &MO); }; /// Creates a new SUnit and return a ptr to it. diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 12a970847021..6b8e2dd803ba 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -26,8 +26,6 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -58,6 +56,7 @@ namespace llvm { +class AAResults; class BlockAddress; class Constant; class ConstantFP; @@ -66,6 +65,7 @@ class DataLayout; struct fltSemantics; class GlobalValue; struct KnownBits; +class LegacyDivergenceAnalysis; class LLVMContext; class MachineBasicBlock; class MachineConstantPoolValue; @@ -269,7 +269,13 @@ class SelectionDAG { using CallSiteInfo = MachineFunction::CallSiteInfo; using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl; - DenseMap SDCallSiteInfo; + + struct CallSiteDbgInfo { + CallSiteInfo CSInfo; + MDNode *HeapAllocSite = nullptr; + }; + + DenseMap SDCallSiteDbgInfo; uint16_t NextPersistentId = 0; @@ -382,7 +388,11 @@ private: Node->OperandList = nullptr; } void CreateTopologicalOrder(std::vector& Order); + public: + // Maximum depth for recursive analysis such as computeKnownBits, etc. + static constexpr unsigned MaxRecursionDepth = 6; + explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level); SelectionDAG(const SelectionDAG &) = delete; SelectionDAG &operator=(const SelectionDAG &) = delete; @@ -489,7 +499,7 @@ public: /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and /// types that are illegal on the target. - void Combine(CombineLevel Level, AliasAnalysis *AA, + void Combine(CombineLevel Level, AAResults *AA, CodeGenOpt::Level OptLevel); /// This transforms the SelectionDAG into a SelectionDAG that @@ -628,10 +638,9 @@ public: SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset = 0, bool isTargetGA = false, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, - int64_t offset = 0, - unsigned char TargetFlags = 0) { + int64_t offset = 0, unsigned TargetFlags = 0) { return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags); } SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false); @@ -639,28 +648,27 @@ public: return getFrameIndex(FI, VT, true); } SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false, - unsigned char TargetFlags = 0); - SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0); + SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags = 0) { return getJumpTable(JTI, VT, true, TargetFlags); } - SDValue getConstantPool(const Constant *C, EVT VT, - unsigned Align = 0, int Offs = 0, bool isT=false, - unsigned char TargetFlags = 0); - SDValue getTargetConstantPool(const Constant *C, EVT VT, - unsigned Align = 0, int Offset = 0, - unsigned char TargetFlags = 0) { + SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align = 0, + int Offs = 0, bool isT = false, + unsigned TargetFlags = 0); + SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align = 0, + int Offset = 0, unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Align = 0, int Offs = 0, bool isT=false, - unsigned char TargetFlags = 0); - SDValue getTargetConstantPool(MachineConstantPoolValue *C, - EVT VT, unsigned Align = 0, - int Offset = 0, unsigned char TargetFlags=0) { + unsigned TargetFlags = 0); + SDValue getTargetConstantPool(MachineConstantPoolValue *C, EVT VT, + unsigned Align = 0, int Offset = 0, + unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } SDValue getTargetIndex(int Index, EVT VT, int64_t Offset = 0, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); // When generating a branch to a BB, we don't in general know enough // to provide debug info for the BB at that time, so keep this one around. SDValue getBasicBlock(MachineBasicBlock *MBB); @@ -668,7 +676,7 @@ public: SDValue getExternalSymbol(const char *Sym, EVT VT); SDValue getExternalSymbol(const char *Sym, const SDLoc &dl, EVT VT); SDValue getTargetExternalSymbol(const char *Sym, EVT VT, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); SDValue getMCSymbol(MCSymbol *Sym, EVT VT); SDValue getValueType(EVT); @@ -677,12 +685,10 @@ public: SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label); SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root, MCSymbol *Label); - SDValue getBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset = 0, bool isTarget = false, - unsigned char TargetFlags = 0); + SDValue getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset = 0, + bool isTarget = false, unsigned TargetFlags = 0); SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset = 0, - unsigned char TargetFlags = 0) { + int64_t Offset = 0, unsigned TargetFlags = 0) { return getBlockAddress(BA, VT, Offset, true, TargetFlags); } @@ -1035,7 +1041,7 @@ public: unsigned Align = 0, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore, - unsigned Size = 0, + uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()); SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, @@ -1117,9 +1123,11 @@ public: MachineMemOperand *MMO, bool IsTruncating = false, bool IsCompressing = false); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, - ArrayRef Ops, MachineMemOperand *MMO); + ArrayRef Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType); SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, - ArrayRef Ops, MachineMemOperand *MMO); + ArrayRef Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType); /// Return (create a new or find existing) a target-specific node. /// TargetMemSDNode should be derived class from MemSDNode. @@ -1588,9 +1596,12 @@ public: /// Extract. The reduction must use one of the opcodes listed in /p /// CandidateBinOps and on success /p BinOp will contain the matching opcode. /// Returns the vector that is being reduced on, or SDValue() if a reduction - /// was not matched. + /// was not matched. If \p AllowPartials is set then in the case of a + /// reduction pattern that only matches the first few stages, the extracted + /// subvector of the start of the reduction is returned. SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, - ArrayRef CandidateBinOps); + ArrayRef CandidateBinOps, + bool AllowPartials = false); /// Utility function used by legalize and lowering to /// "unroll" a vector operation by splitting out the scalars and operating @@ -1664,16 +1675,28 @@ public: } void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) { - SDCallSiteInfo[CallNode] = std::move(CallInfo); + SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo); } CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) { - auto I = SDCallSiteInfo.find(CallNode); - if (I != SDCallSiteInfo.end()) - return std::move(I->second); + auto I = SDCallSiteDbgInfo.find(CallNode); + if (I != SDCallSiteDbgInfo.end()) + return std::move(I->second).CSInfo; return CallSiteInfo(); } + void addHeapAllocSite(const SDNode *Node, MDNode *MD) { + SDCallSiteDbgInfo[Node].HeapAllocSite = MD; + } + + /// Return the HeapAllocSite type associated with the SDNode, if it exists. + MDNode *getHeapAllocSite(const SDNode *Node) { + auto It = SDCallSiteDbgInfo.find(Node); + if (It == SDCallSiteDbgInfo.end()) + return nullptr; + return It->second.HeapAllocSite; + } + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); @@ -1712,7 +1735,7 @@ private: std::map ExtendedValueTypeNodes; StringMap ExternalSymbols; - std::map,SDNode*> TargetExternalSymbols; + std::map, SDNode *> TargetExternalSymbols; DenseMap MCSymbols; }; diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 147c325342fc..de71a21d4671 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -22,22 +22,23 @@ #include namespace llvm { - class FastISel; - class SelectionDAGBuilder; - class SDValue; - class MachineRegisterInfo; - class MachineBasicBlock; - class MachineFunction; - class MachineInstr; - class OptimizationRemarkEmitter; - class TargetLowering; - class TargetLibraryInfo; - class FunctionLoweringInfo; - class ScheduleHazardRecognizer; - class SwiftErrorValueTracking; - class GCFunctionInfo; - class ScheduleDAGSDNodes; - class LoadInst; +class AAResults; +class FastISel; +class SelectionDAGBuilder; +class SDValue; +class MachineRegisterInfo; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class OptimizationRemarkEmitter; +class TargetLowering; +class TargetLibraryInfo; +class FunctionLoweringInfo; +class ScheduleHazardRecognizer; +class SwiftErrorValueTracking; +class GCFunctionInfo; +class ScheduleDAGSDNodes; +class LoadInst; /// SelectionDAGISel - This is the common base class used for SelectionDAG-based /// pattern-matching instruction selectors. @@ -51,7 +52,7 @@ public: MachineRegisterInfo *RegInfo; SelectionDAG *CurDAG; SelectionDAGBuilder *SDB; - AliasAnalysis *AA; + AAResults *AA; GCFunctionInfo *GFI; CodeGenOpt::Level OptLevel; const TargetInstrInfo *TII; @@ -162,6 +163,7 @@ public: OPC_EmitMergeInputChains1_1, OPC_EmitMergeInputChains1_2, OPC_EmitCopyToReg, + OPC_EmitCopyToReg2, OPC_EmitNodeXForm, OPC_EmitNode, // Space-optimized forms that implicitly encode number of result VTs. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 5aab9643e09d..ceb8b72635a2 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -548,10 +548,15 @@ BEGIN_TWO_BYTE_PACK() class LSBaseSDNodeBitfields { friend class LSBaseSDNode; + friend class MaskedGatherScatterSDNode; uint16_t : NumMemSDNodeBits; - uint16_t AddressingMode : 3; // enum ISD::MemIndexedMode + // This storage is shared between disparate class hierarchies to hold an + // enumeration specific to the class hierarchy in use. + // LSBaseSDNode => enum ISD::MemIndexedMode + // MaskedGatherScatterSDNode => enum ISD::MemIndexType + uint16_t AddressingMode : 3; }; enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 }; @@ -696,14 +701,20 @@ public: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: case ISD::STRICT_FCEIL: case ISD::STRICT_FFLOOR: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: return true; @@ -1346,6 +1357,17 @@ public: /// store occurs. AtomicOrdering getOrdering() const { return MMO->getOrdering(); } + /// Return true if the memory operation ordering is Unordered or higher. + bool isAtomic() const { return MMO->isAtomic(); } + + /// Returns true if the memory operation doesn't imply any ordering + /// constraints on surrounding memory operations beyond the normal memory + /// aliasing rules. + bool isUnordered() const { return MMO->isUnordered(); } + + /// Returns true if the memory operation is neither atomic or volatile. + bool isSimple() const { return !isAtomic() && !isVolatile(); } + /// Return the type of the in-memory value. EVT getMemoryVT() const { return MemoryVT; } @@ -1702,16 +1724,16 @@ class GlobalAddressSDNode : public SDNode { const GlobalValue *TheGlobal; int64_t Offset; - unsigned char TargetFlags; + unsigned TargetFlags; GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, int64_t o, - unsigned char TF); + unsigned TF); public: const GlobalValue *getGlobal() const { return TheGlobal; } int64_t getOffset() const { return Offset; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } // Return the address space this GlobalAddress belongs to. unsigned getAddressSpace() const; @@ -1778,16 +1800,16 @@ class JumpTableSDNode : public SDNode { friend class SelectionDAG; int JTI; - unsigned char TargetFlags; + unsigned TargetFlags; - JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF) + JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF) : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { } public: int getIndex() const { return JTI; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::JumpTable || @@ -1804,10 +1826,10 @@ class ConstantPoolSDNode : public SDNode { } Val; int Offset; // It's a MachineConstantPoolValue if top bit is set. unsigned Alignment; // Minimum alignment requirement of CP (not log2 value). - unsigned char TargetFlags; + unsigned TargetFlags; ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o, - unsigned Align, unsigned char TF) + unsigned Align, unsigned TF) : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { @@ -1816,7 +1838,7 @@ class ConstantPoolSDNode : public SDNode { } ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, - EVT VT, int o, unsigned Align, unsigned char TF) + EVT VT, int o, unsigned Align, unsigned TF) : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { @@ -1847,7 +1869,7 @@ public: // Return the alignment of this constant pool object, which is either 0 (for // default alignment) or the desired value. unsigned getAlignment() const { return Alignment; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } Type *getType() const; @@ -1861,16 +1883,16 @@ public: class TargetIndexSDNode : public SDNode { friend class SelectionDAG; - unsigned char TargetFlags; + unsigned TargetFlags; int Index; int64_t Offset; public: - TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF) - : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), - TargetFlags(TF), Index(Idx), Offset(Ofs) {} + TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF) + : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), + TargetFlags(TF), Index(Idx), Offset(Ofs) {} - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } int getIndex() const { return Index; } int64_t getOffset() const { return Offset; } @@ -2063,17 +2085,17 @@ class BlockAddressSDNode : public SDNode { const BlockAddress *BA; int64_t Offset; - unsigned char TargetFlags; + unsigned TargetFlags; BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, - int64_t o, unsigned char Flags) + int64_t o, unsigned Flags) : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), BA(ba), Offset(o), TargetFlags(Flags) {} public: const BlockAddress *getBlockAddress() const { return BA; } int64_t getOffset() const { return Offset; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::BlockAddress || @@ -2104,15 +2126,16 @@ class ExternalSymbolSDNode : public SDNode { friend class SelectionDAG; const char *Symbol; - unsigned char TargetFlags; + unsigned TargetFlags; - ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT) - : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, - 0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {} + ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT) + : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0, + DebugLoc(), getSDVTList(VT)), + Symbol(Sym), TargetFlags(TF) {} public: const char *getSymbol() const { return Symbol; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::ExternalSymbol || @@ -2181,8 +2204,6 @@ public: : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { LSBaseSDNodeBits.AddressingMode = AM; assert(getAddressingMode() == AM && "Value truncated"); - assert((!MMO->isAtomic() || MMO->isVolatile()) && - "use an AtomicSDNode instead for non-volatile atomics"); } const SDValue &getOffset() const { @@ -2362,8 +2383,24 @@ public: MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, - MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} + MachineMemOperand *MMO, ISD::MemIndexType IndexType) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + LSBaseSDNodeBits.AddressingMode = IndexType; + assert(getIndexType() == IndexType && "Value truncated"); + } + + /// How is Index applied to BasePtr when computing addresses. + ISD::MemIndexType getIndexType() const { + return static_cast(LSBaseSDNodeBits.AddressingMode); + } + bool isIndexScaled() const { + return (getIndexType() == ISD::SIGNED_SCALED) || + (getIndexType() == ISD::UNSIGNED_SCALED); + } + bool isIndexSigned() const { + return (getIndexType() == ISD::SIGNED_SCALED) || + (getIndexType() == ISD::SIGNED_UNSCALED); + } // In the both nodes address is Op1, mask is Op2: // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) @@ -2387,8 +2424,10 @@ public: friend class SelectionDAG; MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemVT, MachineMemOperand *MMO) - : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {} + EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexType IndexType) + : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, + IndexType) {} const SDValue &getPassThru() const { return getOperand(1); } @@ -2404,8 +2443,10 @@ public: friend class SelectionDAG; MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemVT, MachineMemOperand *MMO) - : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {} + EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexType IndexType) + : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, + IndexType) {} const SDValue &getValue() const { return getOperand(1); } diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h index 2bdf4425e24a..ed52db3e6269 100644 --- a/include/llvm/CodeGen/StackProtector.h +++ b/include/llvm/CodeGen/StackProtector.h @@ -61,6 +61,12 @@ private: /// protection when -fstack-protection is used. unsigned SSPBufferSize = 0; + /// VisitedPHIs - The set of PHI nodes visited when determining + /// if a variable's reference has been taken. This set + /// is maintained to ensure we don't visit the same PHI node multiple + /// times. + SmallPtrSet VisitedPHIs; + // A prologue is generated. bool HasPrologue = false; diff --git a/include/llvm/CodeGen/SwitchLoweringUtils.h b/include/llvm/CodeGen/SwitchLoweringUtils.h index 62134dc792f7..b8adcf759b19 100644 --- a/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -212,16 +212,17 @@ struct BitTestBlock { BitTestInfo Cases; BranchProbability Prob; BranchProbability DefaultProb; + bool OmitRangeCheck; BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, BitTestInfo C, BranchProbability Pr) : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), - Cases(std::move(C)), Prob(Pr) {} + Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {} }; -/// Return the range of value within a range. +/// Return the range of values within a range. uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First, unsigned Last); diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h index aebeeecbe506..db3d1175afee 100644 --- a/include/llvm/CodeGen/TargetCallingConv.h +++ b/include/llvm/CodeGen/TargetCallingConv.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_TARGETCALLINGCONV_H #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include @@ -120,16 +121,22 @@ namespace ISD { bool isPointer() const { return IsPointer; } void setPointer() { IsPointer = 1; } - unsigned getByValAlign() const { return (1U << ByValAlign) / 2; } - void setByValAlign(unsigned A) { - ByValAlign = Log2_32(A) + 1; - assert(getByValAlign() == A && "bitfield overflow"); + unsigned getByValAlign() const { + MaybeAlign A = decodeMaybeAlign(ByValAlign); + return A ? A->value() : 0; + } + void setByValAlign(Align A) { + ByValAlign = encode(A); + assert(getByValAlign() == A.value() && "bitfield overflow"); } - unsigned getOrigAlign() const { return (1U << OrigAlign) / 2; } - void setOrigAlign(unsigned A) { - OrigAlign = Log2_32(A) + 1; - assert(getOrigAlign() == A && "bitfield overflow"); + unsigned getOrigAlign() const { + MaybeAlign A = decodeMaybeAlign(OrigAlign); + return A ? A->value() : 0; + } + void setOrigAlign(Align A) { + OrigAlign = encode(A); + assert(getOrigAlign() == A.value() && "bitfield overflow"); } unsigned getByValSize() const { return ByValSize; } diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h index 878c9ffd2b51..72edb27964c4 100644 --- a/include/llvm/CodeGen/TargetFrameLowering.h +++ b/include/llvm/CodeGen/TargetFrameLowering.h @@ -28,6 +28,7 @@ namespace TargetStackID { enum Value { Default = 0, SGPRSpill = 1, + SVEVector = 2, NoAlloc = 255 }; } @@ -53,15 +54,15 @@ public: }; private: StackDirection StackDir; - unsigned StackAlignment; - unsigned TransientStackAlignment; + Align StackAlignment; + Align TransientStackAlignment; int LocalAreaOffset; bool StackRealignable; public: - TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1, bool StackReal = true) - : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), - LocalAreaOffset(LAO), StackRealignable(StackReal) {} + TargetFrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None(), bool StackReal = true) + : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), + LocalAreaOffset(LAO), StackRealignable(StackReal) {} virtual ~TargetFrameLowering(); @@ -76,7 +77,7 @@ public: /// stack pointer must be aligned on entry to a function. Typically, this /// is the largest alignment for any data object in the target. /// - unsigned getStackAlignment() const { return StackAlignment; } + unsigned getStackAlignment() const { return StackAlignment.value(); } /// alignSPAdjust - This method aligns the stack adjustment to the correct /// alignment. @@ -95,7 +96,7 @@ public: /// calls. /// unsigned getTransientStackAlignment() const { - return TransientStackAlignment; + return TransientStackAlignment.value(); } /// isStackRealignable - This method returns whether the stack can be @@ -366,15 +367,10 @@ public: /// Check if given function is safe for not having callee saved registers. /// This is used when interprocedural register allocation is enabled. - static bool isSafeForNoCSROpt(const Function &F) { - if (!F.hasLocalLinkage() || F.hasAddressTaken() || - !F.hasFnAttribute(Attribute::NoRecurse)) - return false; - // Function should not be optimized as tail call. - for (const User *U : F.users()) - if (auto CS = ImmutableCallSite(U)) - if (CS.isTailCall()) - return false; + static bool isSafeForNoCSROpt(const Function &F); + + /// Check if the no-CSR optimisation is profitable for the given function. + virtual bool isProfitableForNoCSROpt(const Function &F) const { return true; } diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h index 25b04f8c019a..5011cf34c0ee 100644 --- a/include/llvm/CodeGen/TargetInstrInfo.h +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -38,10 +38,12 @@ namespace llvm { +class AAResults; class DFAPacketizer; class InstrItineraryData; class LiveIntervals; class LiveVariables; +class MachineLoop; class MachineMemOperand; class MachineRegisterInfo; class MCAsmInfo; @@ -60,6 +62,8 @@ class TargetSubtargetInfo; template class SmallVectorImpl; +using ParamLoadedValue = std::pair; + //--------------------------------------------------------------------------- /// /// TargetInstrInfo - Interface to description of machine instruction set @@ -92,7 +96,7 @@ public: /// registers so that the instructions result is independent of the place /// in the function. bool isTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA = nullptr) const { + AAResults *AA = nullptr) const { return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI.getDesc().isRematerializable() && (isReallyTriviallyReMaterializable(MI, AA) || @@ -108,7 +112,7 @@ protected: /// not always available. /// Requirements must be check as stated in isTriviallyReMaterializable() . virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { + AAResults *AA) const { return false; } @@ -151,7 +155,7 @@ private: /// this function does target-independent tests to determine if the /// instruction is really trivially rematerializable. bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, - AliasAnalysis *AA) const; + AAResults *AA) const; public: /// These methods return the opcode of the frame setup/destroy instructions @@ -419,7 +423,8 @@ public: /// findCommutedOpIndices(MI, Op1, Op2); /// can be interpreted as a query asking to find an operand that would be /// commutable with the operand#1. - virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + virtual bool findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const; /// A pair composed of a register and a sub-register index. @@ -659,6 +664,50 @@ public: BytesAdded); } + /// Object returned by analyzeLoopForPipelining. Allows software pipelining + /// implementations to query attributes of the loop being pipelined and to + /// apply target-specific updates to the loop once pipelining is complete. + class PipelinerLoopInfo { + public: + virtual ~PipelinerLoopInfo(); + /// Return true if the given instruction should not be pipelined and should + /// be ignored. An example could be a loop comparison, or induction variable + /// update with no users being pipelined. + virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0; + + /// Create a condition to determine if the trip count of the loop is greater + /// than TC. + /// + /// If the trip count is statically known to be greater than TC, return + /// true. If the trip count is statically known to be not greater than TC, + /// return false. Otherwise return nullopt and fill out Cond with the test + /// condition. + virtual Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) = 0; + + /// Modify the loop such that the trip count is + /// OriginalTC + TripCountAdjust. + virtual void adjustTripCount(int TripCountAdjust) = 0; + + /// Called when the loop's preheader has been modified to NewPreheader. + virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0; + + /// Called when the loop is being removed. Any instructions in the preheader + /// should be removed. + /// + /// Once this function is called, no other functions on this object are + /// valid; the loop has been removed. + virtual void disposed() = 0; + }; + + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + virtual std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + return nullptr; + } + /// Analyze the loop code, return true if it cannot be understoo. Upon /// success, this function returns false and returns information about the /// induction variable and compare instruction used at the end. @@ -730,6 +779,19 @@ public: return false; } + /// Return the increase in code size needed to predicate a contiguous run of + /// NumInsts instructions. + virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + return 0; + } + + /// Return an estimate for the code size reduction (in bytes) which will be + /// caused by removing the given branch instruction during if-conversion. + virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const { + return getInstSizeInBytes(MI); + } + /// Return true if it's profitable to unpredicate /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually /// exclusive predicates. @@ -1558,8 +1620,7 @@ public: /// function. virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const { + const MachineInstr &MIb) const { assert((MIa.mayLoad() || MIa.mayStore()) && "MIa must load from or modify a memory location"); assert((MIb.mayLoad() || MIb.mayStore()) && @@ -1636,6 +1697,28 @@ public: return false; } + /// During PHI eleimination lets target to make necessary checks and + /// insert the copy to the PHI destination register in a target specific + /// manner. + virtual MachineInstr *createPHIDestinationCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, Register Dst) const { + return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) + .addReg(Src); + } + + /// During PHI eleimination lets target to make necessary checks and + /// insert the copy to the PHI destination register in a target specific + /// manner. + virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, + Register SrcSubReg, + Register Dst) const { + return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) + .addReg(Src, 0, SrcSubReg); + } + /// Returns a \p outliner::OutlinedFunction struct containing target-specific /// information for a set of outlining candidates. virtual outliner::OutlinedFunction getOutliningCandidateInfo( @@ -1691,6 +1774,11 @@ public: return false; } + /// Produce the expression describing the \p MI loading a value into + /// the parameter's forwarding register. + virtual Optional + describeLoadedValue(const MachineInstr &MI) const; + private: unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CatchRetOpcode; diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index d5cca60bb1b2..a58fca7e73f5 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -28,7 +28,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -48,6 +47,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -72,8 +72,10 @@ class Constant; class FastISel; class FunctionLoweringInfo; class GlobalValue; +class GISelKnownBits; class IntrinsicInst; struct KnownBits; +class LegacyDivergenceAnalysis; class LLVMContext; class MachineBasicBlock; class MachineFunction; @@ -122,8 +124,7 @@ public: TypeLegal, // The target natively supports this type. TypePromoteInteger, // Replace this integer with a larger one. TypeExpandInteger, // Split this integer into two of half the size. - TypeSoftenFloat, // Convert this float to a same size integer type, - // if an operation is not supported in target HW. + TypeSoftenFloat, // Convert this float to a same size integer type. TypeExpandFloat, // Split this float into two of half the size. TypeScalarizeVector, // Replace this one-element vector with its element. TypeSplitVector, // Split this vector into two of half the size. @@ -284,7 +285,7 @@ public: /// a constant pool load whose address depends on the select condition. The /// parameter may be used to differentiate a select with FP compare from /// integer compare. - virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { + virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { return true; } @@ -539,6 +540,12 @@ public: return hasAndNotCompare(X); } + /// Return true if the target has a bit-test instruction: + /// (X & (1 << Y)) ==/!= 0 + /// This knowledge can be used to prevent breaking the pattern, + /// or creating it if it could be recognized. + virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } + /// There are two ways to clear extreme bits (either low or high): /// Mask: x & (-1 << y) (the instcombine canonical form) /// Shifts: x >> y << y @@ -571,6 +578,38 @@ public: return false; } + /// Given the pattern + /// (X & (C l>>/<< Y)) ==/!= 0 + /// return true if it should be transformed into: + /// ((X <> Y) & C) ==/!= 0 + /// WARNING: if 'X' is a constant, the fold may deadlock! + /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() + /// here because it can end up being not linked in. + virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + if (hasBitTest(X, Y)) { + // One interesting pattern that we'd want to form is 'bit test': + // ((1 << Y) & C) ==/!= 0 + // But we also need to be careful not to try to reverse that fold. + + // Is this '1 << Y' ? + if (OldShiftOpcode == ISD::SHL && CC->isOne()) + return false; // Keep the 'bit test' pattern. + + // Will it be '1 << Y' after the transform ? + if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) + return true; // Do form the 'bit test' pattern. + } + + // If 'X' is a constant, and we transform, then we will immediately + // try to undo the fold, thus causing endless combine loop. + // So by default, let's assume everyone prefers the fold + // iff 'X' is not a constant. + return !XC; + } + /// These two forms are equivalent: /// sub %y, (xor %x, -1) /// add (add %x, 1), %y @@ -798,9 +837,9 @@ public: PointerUnion ptrVal; int offset = 0; // offset off of ptrVal - unsigned size = 0; // the size of the memory location + uint64_t size = 0; // the size of the memory location // (taken from memVT if zero) - unsigned align = 1; // alignment + MaybeAlign align = Align::None(); // alignment MachineMemOperand::Flags flags = MachineMemOperand::MONone; IntrinsicInfo() = default; @@ -884,6 +923,7 @@ public: case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: Supported = isSupportedFixedPointOperation(Op, VT, Scale); break; } @@ -891,6 +931,8 @@ public: return Supported ? Action : Expand; } + // If Op is a strict floating-point operation, return the result + // of getOperationAction for the equivalent non-strict operation. LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { unsigned EqOpc; switch (Op) { @@ -911,26 +953,25 @@ public: case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: EqOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: EqOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: EqOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: EqOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; + case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } - auto Action = getOperationAction(EqOpc, VT); - - // We don't currently handle Custom or Promote for strict FP pseudo-ops. - // For now, we just expand for those cases. - if (Action != Legal) - Action = Expand; - - return Action; + return getOperationAction(EqOpc, VT); } /// Return true if the specified operation is legal on this target or can be @@ -1206,7 +1247,7 @@ public: EltTy = PointerTy.getTypeForEVT(Ty->getContext()); } return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), - VTy->getNumElements()); + VTy->getElementCount()); } return EVT::getEVT(Ty, AllowUnknown); @@ -1316,9 +1357,9 @@ public: /// Certain targets have context senstive alignment requirements, where one /// type has the alignment requirement of another type. - virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const { - return DL.getABITypeAlignment(ArgTy); + virtual Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return Align(DL.getABITypeAlignment(ArgTy)); } /// If true, then instruction selection should seek to shrink the FP constant @@ -1426,11 +1467,38 @@ public: return false; } + /// LLT handling variant. + virtual bool allowsMisalignedMemoryAccesses( + LLT, unsigned AddrSpace = 0, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool * /*Fast*/ = nullptr) const { + return false; + } + + /// This function returns true if the memory access is aligned or if the + /// target allows this specific unaligned memory access. If the access is + /// allowed, the optional final parameter returns if the access is also fast + /// (as defined by the target). + bool allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, + unsigned AddrSpace = 0, unsigned Alignment = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const; + + /// Return true if the memory access of this type is aligned or if the target + /// allows this specific unaligned access for the given MachineMemOperand. + /// If the access is allowed, the optional final parameter returns if the + /// access is also fast (as defined by the target). + bool allowsMemoryAccessForAlignment(LLVMContext &Context, + const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, + bool *Fast = nullptr) const; + /// Return true if the target supports a memory access of this type for the /// given address space and alignment. If the access is allowed, the optional /// final parameter returns if the access is also fast (as defined by the /// target). - bool + virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace = 0, unsigned Alignment = 1, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, @@ -1463,6 +1531,16 @@ public: return MVT::Other; } + + /// LLT returning variant. + virtual LLT + getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/, + unsigned /*SrcAlign*/, bool /*IsMemset*/, + bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, + const AttributeList & /*FuncAttributes*/) const { + return LLT(); + } + /// Returns true if it's safe to use load / store of the specified type to /// expand memcpy / memset inline. /// @@ -1522,35 +1600,19 @@ public: report_fatal_error("Funclet EH is not implemented for this target"); } - /// Returns the target's jmp_buf size in bytes (if never set, the default is - /// 200) - unsigned getJumpBufSize() const { - return JumpBufSize; - } - - /// Returns the target's jmp_buf alignment in bytes (if never set, the default - /// is 0) - unsigned getJumpBufAlignment() const { - return JumpBufAlignment; - } - /// Return the minimum stack alignment of an argument. - unsigned getMinStackArgumentAlignment() const { + Align getMinStackArgumentAlignment() const { return MinStackArgumentAlignment; } /// Return the minimum function alignment. - unsigned getMinFunctionAlignment() const { - return MinFunctionAlignment; - } + Align getMinFunctionAlignment() const { return MinFunctionAlignment; } /// Return the preferred function alignment. - unsigned getPrefFunctionAlignment() const { - return PrefFunctionAlignment; - } + Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } /// Return the preferred loop alignment. - virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { + virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const { return PrefLoopAlignment; } @@ -1772,6 +1834,11 @@ public: return IsSigned; } + /// Returns true if arguments should be extended in lib calls. + virtual bool shouldExtendTypeInLibCall(EVT Type) const { + return true; + } + /// Returns how the given (atomic) load should be expanded by the /// IR-level AtomicExpand pass. virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { @@ -1848,7 +1915,8 @@ public: /// This may be true if the target does not directly support the /// multiplication operation for the specified type or the sequence of simpler /// ops is faster than the multiply. - virtual bool decomposeMulByConstant(EVT VT, SDValue C) const { + virtual bool decomposeMulByConstant(LLVMContext &Context, + EVT VT, SDValue C) const { return false; } @@ -2056,40 +2124,25 @@ protected: TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); } - /// Set the target's required jmp_buf buffer size (in bytes); default is 200 - void setJumpBufSize(unsigned Size) { - JumpBufSize = Size; - } - - /// Set the target's required jmp_buf buffer alignment (in bytes); default is - /// 0 - void setJumpBufAlignment(unsigned Align) { - JumpBufAlignment = Align; - } - - /// Set the target's minimum function alignment (in log2(bytes)) - void setMinFunctionAlignment(unsigned Align) { - MinFunctionAlignment = Align; + /// Set the target's minimum function alignment. + void setMinFunctionAlignment(Align Alignment) { + MinFunctionAlignment = Alignment; } /// Set the target's preferred function alignment. This should be set if - /// there is a performance benefit to higher-than-minimum alignment (in - /// log2(bytes)) - void setPrefFunctionAlignment(unsigned Align) { - PrefFunctionAlignment = Align; + /// there is a performance benefit to higher-than-minimum alignment + void setPrefFunctionAlignment(Align Alignment) { + PrefFunctionAlignment = Alignment; } - /// Set the target's preferred loop alignment. Default alignment is zero, it - /// means the target does not care about loop alignment. The alignment is - /// specified in log2(bytes). The target may also override - /// getPrefLoopAlignment to provide per-loop values. - void setPrefLoopAlignment(unsigned Align) { - PrefLoopAlignment = Align; - } + /// Set the target's preferred loop alignment. Default alignment is one, it + /// means the target does not care about loop alignment. The target may also + /// override getPrefLoopAlignment to provide per-loop values. + void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } - /// Set the minimum stack alignment of an argument (in log2(bytes)). - void setMinStackArgumentAlignment(unsigned Align) { - MinStackArgumentAlignment = Align; + /// Set the minimum stack alignment of an argument. + void setMinStackArgumentAlignment(Align Alignment) { + MinStackArgumentAlignment = Alignment; } /// Set the maximum atomic operation size supported by the @@ -2555,6 +2608,12 @@ public: // same blocks of its users. virtual bool shouldConsiderGEPOffsetSplit() const { return false; } + // Return the shift amount threshold for profitable transforms into shifts. + // Transforms creating shifts above the returned value will be avoided. + virtual unsigned getShiftAmountThreshold(EVT VT) const { + return VT.getScalarSizeInBits(); + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // @@ -2650,25 +2709,19 @@ private: /// register usage. Sched::Preference SchedPreferenceInfo; - /// The size, in bytes, of the target's jmp_buf buffers - unsigned JumpBufSize; - - /// The alignment, in bytes, of the target's jmp_buf buffers - unsigned JumpBufAlignment; - /// The minimum alignment that any argument on the stack needs to have. - unsigned MinStackArgumentAlignment; + Align MinStackArgumentAlignment; /// The minimum function alignment (used when optimizing for size, and to /// prevent explicitly provided alignment from leading to incorrect code). - unsigned MinFunctionAlignment; + Align MinFunctionAlignment; /// The preferred function alignment (used when alignment unspecified and /// optimizing for speed). - unsigned PrefFunctionAlignment; + Align PrefFunctionAlignment; - /// The preferred loop alignment. - unsigned PrefLoopAlignment; + /// The preferred loop alignment (in log2 bot in bytes). + Align PrefLoopAlignment; /// Size in bits of the maximum atomics size the backend supports. /// Accesses larger than this will be expanded by AtomicExpandPass. @@ -2744,7 +2797,6 @@ private: /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; -protected: ValueTypeActionImpl ValueTypeActions; private: @@ -2790,7 +2842,7 @@ protected: /// expected to be merged. unsigned GatherAllAliasesMaxDepth; - /// Specify maximum number of store instructions per memset call. + /// \brief Specify maximum number of store instructions per memset call. /// /// When lowering \@llvm.memset this field specifies the maximum number of /// store operations that may be substituted for the call to memset. Targets @@ -2801,12 +2853,10 @@ protected: /// with 16-bit alignment would result in four 2-byte stores and one 1-byte /// store. This only applies to setting a constant array of a constant size. unsigned MaxStoresPerMemset; - - /// Maximum number of stores operations that may be substituted for the call - /// to memset, used for functions with OptSize attribute. + /// Likewise for functions with the OptSize attribute. unsigned MaxStoresPerMemsetOptSize; - /// Specify maximum bytes of store instructions per memcpy call. + /// \brief Specify maximum number of store instructions per memcpy call. /// /// When lowering \@llvm.memcpy this field specifies the maximum number of /// store operations that may be substituted for a call to memcpy. Targets @@ -2818,8 +2868,8 @@ protected: /// and one 1-byte store. This only applies to copying a constant array of /// constant size. unsigned MaxStoresPerMemcpy; - - + /// Likewise for functions with the OptSize attribute. + unsigned MaxStoresPerMemcpyOptSize; /// \brief Specify max number of store instructions to glue in inlined memcpy. /// /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number @@ -2827,13 +2877,22 @@ protected: // vectorization later on. unsigned MaxGluedStoresPerMemcpy = 0; - /// Maximum number of store operations that may be substituted for a call to - /// memcpy, used for functions with OptSize attribute. - unsigned MaxStoresPerMemcpyOptSize; + /// \brief Specify maximum number of load instructions per memcmp call. + /// + /// When lowering \@llvm.memcmp this field specifies the maximum number of + /// pairs of load operations that may be substituted for a call to memcmp. + /// Targets must set this value based on the cost threshold for that target. + /// Targets should assume that the memcmp will be done using as many of the + /// largest load operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine + /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load + /// and one 1-byte load. This only applies to copying a constant array of + /// constant size. unsigned MaxLoadsPerMemcmp; + /// Likewise for functions with the OptSize attribute. unsigned MaxLoadsPerMemcmpOptSize; - /// Specify maximum bytes of store instructions per memmove call. + /// \brief Specify maximum number of store instructions per memmove call. /// /// When lowering \@llvm.memmove this field specifies the maximum number of /// store instructions that may be substituted for a call to memmove. Targets @@ -2844,9 +2903,7 @@ protected: /// with 8-bit alignment would result in nine 1-byte stores. This only /// applies to copying a constant array of constant size. unsigned MaxStoresPerMemmove; - - /// Maximum number of store instructions that may be substituted for a call to - /// memmove, used for functions with OptSize attribute. + /// Likewise for functions with the OptSize attribute. unsigned MaxStoresPerMemmoveOptSize; /// Tells the code generator that select is more expensive than a branch if @@ -2885,6 +2942,7 @@ protected: class TargetLowering : public TargetLoweringBase { public: struct DAGCombinerInfo; + struct MakeLibCallOptions; TargetLowering(const TargetLowering &) = delete; TargetLowering &operator=(const TargetLowering &) = delete; @@ -2925,6 +2983,14 @@ public: return false; } + /// Returns true if the specified base+offset is a legal indexed addressing + /// mode for this target. \p MI is the load or store instruction that is being + /// considered for transformation. + virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, + bool IsPre, MachineRegisterInfo &MRI) const { + return false; + } + /// Return the entry encoding for a jump table in the current function. The /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. virtual unsigned getJumpTableEncoding() const; @@ -2955,14 +3021,15 @@ public: void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &DL) const; + const SDLoc &DL, const SDValue OldLHS, + const SDValue OldRHS) const; /// Returns a pair of (return value, chain). /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. - std::pair makeLibCall( - SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef Ops, - bool isSigned, const SDLoc &dl, bool doesNotReturn = false, - bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const; + std::pair makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, + EVT RetVT, ArrayRef Ops, + MakeLibCallOptions CallOptions, + const SDLoc &dl) const; /// Check whether parameters to a call that are passed in callee saved /// registers are the same as from the calling function. This needs to be @@ -3065,6 +3132,14 @@ public: bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const; + /// More limited version of SimplifyDemandedBits that can be used to "look + /// through" ops that don't contribute to the DemandedBits/DemandedElts - + /// bitwise ops etc. + SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + SelectionDAG &DAG, + unsigned Depth) const; + /// Look at Vector Op. At this point, we know that only the DemandedElts /// elements of the result of Op are ever used downstream. If we can use /// this information to simplify Op, create a new simplified DAG node and @@ -3099,6 +3174,15 @@ public: const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const; + /// Determine which of the bits specified in Mask are known to be either zero + /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts + /// argument allows us to only collect the known bits that are shared by the + /// requested vector elements. This is for GISel. + virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, + Register R, KnownBits &Known, + const APInt &DemandedElts, + const MachineRegisterInfo &MRI, + unsigned Depth = 0) const; /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. /// Default implementation computes low bits based on alignment @@ -3139,6 +3223,21 @@ public: TargetLoweringOpt &TLO, unsigned Depth = 0) const; + /// More limited version of SimplifyDemandedBits that can be used to "look + /// through" ops that don't contribute to the DemandedBits/DemandedElts - + /// bitwise ops etc. + virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const; + + /// Tries to build a legal vector shuffle using the provided parameters + /// or equivalent variations. The Mask argument maybe be modified as the + /// function tries different variations. + /// Returns an empty SDValue if the operation fails. + SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, + SDValue N1, MutableArrayRef Mask, + SelectionDAG &DAG) const; + /// This method returns the constant pool value that will be loaded by LD. /// NOTE: You must check for implicit extensions of the constant by LD. virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; @@ -3174,6 +3273,8 @@ public: SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); + bool recursivelyDeleteUnusedNodes(SDNode *N); + void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); }; @@ -3297,6 +3398,18 @@ public: llvm_unreachable("Not Implemented"); } + /// Return 1 if we can compute the negated form of the specified expression + /// for the same cost as the expression itself, or 2 if we can compute the + /// negated form more cheaply than the expression itself. Else return 0. + virtual char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth = 0) const; + + /// If isNegatibleForFree returns true, return the newly negated expression. + virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth = 0) const; + //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGBuilder code knows how to lower these. @@ -3468,6 +3581,51 @@ public: } }; + /// This structure is used to pass arguments to makeLibCall function. + struct MakeLibCallOptions { + // By passing type list before soften to makeLibCall, the target hook + // shouldExtendTypeInLibCall can get the original type before soften. + ArrayRef OpsVTBeforeSoften; + EVT RetVTBeforeSoften; + bool IsSExt : 1; + bool DoesNotReturn : 1; + bool IsReturnValueUsed : 1; + bool IsPostTypeLegalization : 1; + bool IsSoften : 1; + + MakeLibCallOptions() + : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), + IsPostTypeLegalization(false), IsSoften(false) {} + + MakeLibCallOptions &setSExt(bool Value = true) { + IsSExt = Value; + return *this; + } + + MakeLibCallOptions &setNoReturn(bool Value = true) { + DoesNotReturn = Value; + return *this; + } + + MakeLibCallOptions &setDiscardResult(bool Value = true) { + IsReturnValueUsed = !Value; + return *this; + } + + MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { + IsPostTypeLegalization = Value; + return *this; + } + + MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef OpsVT, EVT RetVT, + bool Value = true) { + OpsVTBeforeSoften = OpsVT; + RetVTBeforeSoften = RetVT; + IsSoften = Value; + return *this; + } + }; + /// This function lowers an abstract call to a function into an actual call. /// This returns a pair of operands. The first element is the return value /// for the function (if RetTy is not VoidTy). The second element is the @@ -3537,8 +3695,8 @@ public: /// Return the register ID of the name passed in. Used by named register /// global variables extension. There is no target-independent behaviour /// so the default action is to bail. - virtual unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { + virtual Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { report_fatal_error("Named registers not implemented for this target"); } @@ -3597,6 +3755,25 @@ public: return MachineMemOperand::MONone; } + /// Should SelectionDAG lower an atomic store of the given kind as a normal + /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using StoreSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + assert(SI.isAtomic() && "violated precondition"); + return false; + } + + /// Should SelectionDAG lower an atomic load of the given kind as a normal + /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using LoadSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + assert(LI.isAtomic() && "violated precondition"); + return false; + } + + /// This callback is invoked by the type legalizer to legalize nodes with an /// illegal operand type but legal result types. It replaces the /// LowerOperation callback in the type Legalizer. The reason we can not do @@ -3665,6 +3842,7 @@ public: C_Register, // Constraint represents specific register(s). C_RegisterClass, // Constraint represents any of register(s) in class. C_Memory, // Memory constraint. + C_Immediate, // Requires an immediate. C_Other, // Something else. C_Unknown // Unsupported constraint. }; @@ -3905,7 +4083,7 @@ public: /// \param N Node to expand /// \param Result output after conversion /// \returns True, if the expansion was successful, false otherwise - bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const; /// Expand UINT(i64) to double(f64) conversion /// \param N Node to expand @@ -3986,8 +4164,8 @@ public: /// method accepts integers as its arguments. SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; - /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts - /// integers as its arguments. + /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This + /// method accepts integers as its arguments. SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion @@ -4070,6 +4248,11 @@ private: DAGCombinerInfo &DCI, const SDLoc &DL) const; + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 + SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const; + SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL, @@ -4077,6 +4260,14 @@ private: SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + + SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl &Created) const; + SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, + ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const; }; /// Given an LLVM IR type and return type attributes, compute the return value diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index a1fb81cb009d..59f5ddbd9dac 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H +#include "llvm/BinaryFormat/XCOFF.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCExpr.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -35,7 +36,7 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { protected: MCSymbolRefExpr::VariantKind PLTRelativeVariantKind = MCSymbolRefExpr::VK_None; - const TargetMachine *TM; + const TargetMachine *TM = nullptr; public: TargetLoweringObjectFileELF() = default; @@ -126,7 +127,8 @@ public: MachineModuleInfo *MMI) const override; /// Get MachO PC relative GOT entry relocation - const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; @@ -206,6 +208,34 @@ public: const TargetMachine &TM) const override; }; +class TargetLoweringObjectFileXCOFF : public TargetLoweringObjectFile { +public: + TargetLoweringObjectFileXCOFF() = default; + ~TargetLoweringObjectFileXCOFF() override = default; + + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + + bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, + const Function &F) const override; + + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; + + MCSection *getStaticCtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + MCSection *getStaticDtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + + const MCExpr *lowerRelativeReference(const GlobalValue *LHS, + const GlobalValue *RHS, + const TargetMachine &TM) const override; + + MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; + + static XCOFF::StorageClass getStorageClassForGlobal(const GlobalObject *GO); +}; + } // end namespace llvm #endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index 0bd82aafac37..d48fc664c1c3 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -280,7 +280,7 @@ public: /// /// This can also be used to plug a new MachineSchedStrategy into an instance /// of the standard ScheduleDAGMI: - /// return new ScheduleDAGMI(C, make_unique(C), /*RemoveKillFlags=*/false) + /// return new ScheduleDAGMI(C, std::make_unique(C), /*RemoveKillFlags=*/false) /// /// Return NULL to select the default (generic) machine scheduler. virtual ScheduleDAGInstrs * diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h index ddbd677b3eaa..c42ca3ad6eb9 100644 --- a/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/include/llvm/CodeGen/TargetRegisterInfo.h @@ -87,11 +87,20 @@ public: /// Return true if the specified register is included in this register class. /// This does not include virtual registers. bool contains(unsigned Reg) const { + /// FIXME: Historically this function has returned false when given vregs + /// but it should probably only receive physical registers + if (!Register::isPhysicalRegister(Reg)) + return false; return MC->contains(Reg); } /// Return true if both registers are in this class. bool contains(unsigned Reg1, unsigned Reg2) const { + /// FIXME: Historically this function has returned false when given a vregs + /// but it should probably only receive physical registers + if (!Register::isPhysicalRegister(Reg1) || + !Register::isPhysicalRegister(Reg2)) + return false; return MC->contains(Reg1, Reg2); } @@ -258,57 +267,6 @@ public: // Further sentinels can be allocated from the small negative integers. // DenseMapInfo uses -1u and -2u. - /// isStackSlot - Sometimes it is useful the be able to store a non-negative - /// frame index in a variable that normally holds a register. isStackSlot() - /// returns true if Reg is in the range used for stack slots. - /// - /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack - /// slots, so if a variable may contains a stack slot, always check - /// isStackSlot() first. - /// - static bool isStackSlot(unsigned Reg) { - return int(Reg) >= (1 << 30); - } - - /// Compute the frame index from a register value representing a stack slot. - static int stackSlot2Index(unsigned Reg) { - assert(isStackSlot(Reg) && "Not a stack slot"); - return int(Reg - (1u << 30)); - } - - /// Convert a non-negative frame index to a stack slot register value. - static unsigned index2StackSlot(int FI) { - assert(FI >= 0 && "Cannot hold a negative frame index."); - return FI + (1u << 30); - } - - /// Return true if the specified register number is in - /// the physical register namespace. - static bool isPhysicalRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return int(Reg) > 0; - } - - /// Return true if the specified register number is in - /// the virtual register namespace. - static bool isVirtualRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return int(Reg) < 0; - } - - /// Convert a virtual register number to a 0-based index. - /// The first virtual register in a function will get the index 0. - static unsigned virtReg2Index(unsigned Reg) { - assert(isVirtualRegister(Reg) && "Not a virtual register"); - return Reg & ~(1u << 31); - } - - /// Convert a 0-based index to a virtual register number. - /// This is the inverse operation of VirtReg2IndexFunctor below. - static unsigned index2VirtReg(unsigned Index) { - return Index | (1u << 31); - } - /// Return the size in bits of a register from class RC. unsigned getRegSizeInBits(const TargetRegisterClass &RC) const { return getRegClassInfo(RC).RegSize; @@ -419,9 +377,9 @@ public: /// Returns true if the two registers are equal or alias each other. /// The registers may be virtual registers. - bool regsOverlap(unsigned regA, unsigned regB) const { + bool regsOverlap(Register regA, Register regB) const { if (regA == regB) return true; - if (isVirtualRegister(regA) || isVirtualRegister(regB)) + if (regA.isVirtual() || regB.isVirtual()) return false; // Regunits are numerically ordered. Find a common unit. @@ -489,6 +447,14 @@ public: llvm_unreachable("target does not provide no preserved mask"); } + /// Return a list of all of the registers which are clobbered "inside" a call + /// to the given function. For example, these might be needed for PLT + /// sequences of long-branch veneers. + virtual ArrayRef + getIntraCallClobberedRegs(const MachineFunction *MF) const { + return {}; + } + /// Return true if all bits that are set in mask \p mask0 are also set in /// \p mask1. bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const; @@ -535,6 +501,11 @@ public: return false; } + /// This is a wrapper around getCallPreservedMask(). + /// Return true if the register is preserved after the call. + virtual bool isCalleeSavedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const; + /// Prior to adding the live-out mask to a stackmap or patchpoint /// instruction, provide the target the opportunity to adjust it (mainly to /// remove pseudo-registers that should be ignored). @@ -709,13 +680,9 @@ public: /// Find the largest common subclass of A and B. /// Return NULL if there is no common subclass. - /// The common subclass should contain - /// simple value type SVT if it is not the Any type. const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - const MVT::SimpleValueType SVT = - MVT::SimpleValueType::Any) const; + const TargetRegisterClass *B) const; /// Returns a TargetRegisterClass used for pointer values. /// If a target supports multiple different pointer register classes, @@ -1005,6 +972,13 @@ public: const MachineRegisterInfo &MRI) const { return nullptr; } + + /// Returns the physical register number of sub-register "Index" + /// for physical register RegNo. Return zero if the sub-register does not + /// exist. + inline Register getSubReg(MCRegister Reg, unsigned Idx) const { + return static_cast(this)->getSubReg(Reg, Idx); + } }; //===----------------------------------------------------------------------===// @@ -1156,7 +1130,7 @@ public: struct VirtReg2IndexFunctor { using argument_type = unsigned; unsigned operator()(unsigned Reg) const { - return TargetRegisterInfo::virtReg2Index(Reg); + return Register::virtReg2Index(Reg); } }; @@ -1170,7 +1144,7 @@ struct VirtReg2IndexFunctor { /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n'; -Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, +Printable printReg(Register Reg, const TargetRegisterInfo *TRI = nullptr, unsigned SubIdx = 0, const MachineRegisterInfo *MRI = nullptr); diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h index 037fc3ed3243..56018eca8c27 100644 --- a/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -106,12 +106,10 @@ public: // us do things like a dedicated avx512 selector). However, we might want // to also specialize selectors by MachineFunction, which would let us be // aware of optsize/optnone and such. - virtual const InstructionSelector *getInstructionSelector() const { + virtual InstructionSelector *getInstructionSelector() const { return nullptr; } - virtual unsigned getHwMode() const { return 0; } - /// Target can subclass this hook to select a different DAG scheduler. virtual RegisterScheduler::FunctionPassCtor getDAGScheduler(CodeGenOpt::Level) const { @@ -274,6 +272,12 @@ public: /// scheduling, DAGCombine, etc.). virtual bool useAA() const; + /// \brief Sink addresses into blocks using GEP instructions rather than + /// pointer casts and arithmetic. + virtual bool addrSinkUsingGEPs() const { + return useAA(); + } + /// Enable the use of the early if conversion pass. virtual bool enableEarlyIfConversion() const { return false; } diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index c540c94f79d9..cd4c4ca64081 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -81,7 +81,7 @@ namespace llvm { /// Returns the EVT that represents a vector EC.Min elements in length, /// where each element is of type VT. - static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) { + static EVT getVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) { MVT M = MVT::getVectorVT(VT.V, EC); if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) return M; @@ -277,7 +277,7 @@ namespace llvm { } // Given a (possibly scalable) vector type, return the ElementCount - MVT::ElementCount getVectorElementCount() const { + ElementCount getVectorElementCount() const { assert((isVector()) && "Invalid vector type!"); if (isSimple()) return V.getVectorElementCount(); diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index 5818ac183fcc..16df565bc8b8 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -40,127 +40,132 @@ def v16i1 : ValueType<16, 18>; // 16 x i1 vector value def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value def v128i1 : ValueType<128, 21>; // 128 x i1 vector value -def v512i1 : ValueType<512, 22>; // 512 x i1 vector value -def v1024i1: ValueType<1024,23>; //1024 x i1 vector value - -def v1i8 : ValueType<8, 24>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value -def v16i8 : ValueType<128, 28>; // 16 x i8 vector value -def v32i8 : ValueType<256, 29>; // 32 x i8 vector value -def v64i8 : ValueType<512, 30>; // 64 x i8 vector value -def v128i8 : ValueType<1024,31>; //128 x i8 vector value -def v256i8 : ValueType<2048,32>; //256 x i8 vector value - -def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value -def v8i16 : ValueType<128, 36>; // 8 x i16 vector value -def v16i16 : ValueType<256, 37>; // 16 x i16 vector value -def v32i16 : ValueType<512, 38>; // 32 x i16 vector value -def v64i16 : ValueType<1024,39>; // 64 x i16 vector value -def v128i16: ValueType<2048,40>; //128 x i16 vector value - -def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value -def v3i32 : ValueType<96 , 43>; // 3 x i32 vector value -def v4i32 : ValueType<128, 44>; // 4 x i32 vector value -def v5i32 : ValueType<160, 45>; // 5 x i32 vector value -def v8i32 : ValueType<256, 46>; // 8 x i32 vector value -def v16i32 : ValueType<512, 47>; // 16 x i32 vector value -def v32i32 : ValueType<1024,48>; // 32 x i32 vector value -def v64i32 : ValueType<2048,49>; // 64 x i32 vector value -def v128i32 : ValueType<4096,50>; // 128 x i32 vector value -def v256i32 : ValueType<8182,51>; // 256 x i32 vector value -def v512i32 : ValueType<16384,52>; // 512 x i32 vector value -def v1024i32 : ValueType<32768,53>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536,54>; // 2048 x i32 vector value - -def v1i64 : ValueType<64 , 55>; // 1 x i64 vector value -def v2i64 : ValueType<128, 56>; // 2 x i64 vector value -def v4i64 : ValueType<256, 57>; // 4 x i64 vector value -def v8i64 : ValueType<512, 58>; // 8 x i64 vector value -def v16i64 : ValueType<1024,59>; // 16 x i64 vector value -def v32i64 : ValueType<2048,60>; // 32 x i64 vector value - -def v1i128 : ValueType<128, 61>; // 1 x i128 vector value - -def nxv1i1 : ValueType<1, 62>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 63>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 64>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 65>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 66>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 67>; // n x 32 x i1 vector value - -def nxv1i8 : ValueType<8, 68>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 69>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 70>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 71>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 72>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 73>; // n x 32 x i8 vector value - -def nxv1i16 : ValueType<16, 74>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 75>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 76>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 77>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 78>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 79>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 80>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 81>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 82>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 83>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 84>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,85>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 86>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 87>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 88>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 89>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,90>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,91>; // n x 32 x i64 vector value - -def v2f16 : ValueType<32 , 92>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 93>; // 4 x f16 vector value -def v8f16 : ValueType<128, 94>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 95>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 96>; // 2 x f32 vector value -def v3f32 : ValueType<96 , 97>; // 3 x f32 vector value -def v4f32 : ValueType<128, 98>; // 4 x f32 vector value -def v5f32 : ValueType<160, 99>; // 5 x f32 vector value -def v8f32 : ValueType<256, 100>; // 8 x f32 vector value -def v16f32 : ValueType<512, 101>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 102>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 103>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 104>; // 128 x f32 vector value -def v256f32 : ValueType<8182, 105>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 106>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value -def v1f64 : ValueType<64, 109>; // 1 x f64 vector value -def v2f64 : ValueType<128, 110>; // 2 x f64 vector value -def v4f64 : ValueType<256, 111>; // 4 x f64 vector value -def v8f64 : ValueType<512, 112>; // 8 x f64 vector value - -def nxv2f16 : ValueType<32 , 113>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 114>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 115>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 116>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 117>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 118>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 119>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 121>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 122>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 123>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 124>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 125>; // X86 MMX value -def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue -def isVoid : ValueType<0 , 127>; // Produces no value -def untyped: ValueType<8 , 128>; // Produces an untyped value -def exnref: ValueType<0, 129>; // WebAssembly's exnref type +def v256i1 : ValueType<256, 22>; // 256 x i1 vector value +def v512i1 : ValueType<512, 23>; // 512 x i1 vector value +def v1024i1: ValueType<1024,24>; //1024 x i1 vector value + +def v1i8 : ValueType<8, 25>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 26>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 27>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 28>; // 8 x i8 vector value +def v16i8 : ValueType<128, 29>; // 16 x i8 vector value +def v32i8 : ValueType<256, 30>; // 32 x i8 vector value +def v64i8 : ValueType<512, 31>; // 64 x i8 vector value +def v128i8 : ValueType<1024,32>; //128 x i8 vector value +def v256i8 : ValueType<2048,33>; //256 x i8 vector value + +def v1i16 : ValueType<16 , 34>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 35>; // 2 x i16 vector value +def v3i16 : ValueType<48 , 36>; // 3 x i16 vector value +def v4i16 : ValueType<64 , 37>; // 4 x i16 vector value +def v8i16 : ValueType<128, 38>; // 8 x i16 vector value +def v16i16 : ValueType<256, 39>; // 16 x i16 vector value +def v32i16 : ValueType<512, 40>; // 32 x i16 vector value +def v64i16 : ValueType<1024,41>; // 64 x i16 vector value +def v128i16: ValueType<2048,42>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 43>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 44>; // 2 x i32 vector value +def v3i32 : ValueType<96 , 45>; // 3 x i32 vector value +def v4i32 : ValueType<128, 46>; // 4 x i32 vector value +def v5i32 : ValueType<160, 47>; // 5 x i32 vector value +def v8i32 : ValueType<256, 48>; // 8 x i32 vector value +def v16i32 : ValueType<512, 49>; // 16 x i32 vector value +def v32i32 : ValueType<1024,50>; // 32 x i32 vector value +def v64i32 : ValueType<2048,51>; // 64 x i32 vector value +def v128i32 : ValueType<4096,52>; // 128 x i32 vector value +def v256i32 : ValueType<8182,53>; // 256 x i32 vector value +def v512i32 : ValueType<16384,54>; // 512 x i32 vector value +def v1024i32 : ValueType<32768,55>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536,56>; // 2048 x i32 vector value + +def v1i64 : ValueType<64 , 57>; // 1 x i64 vector value +def v2i64 : ValueType<128, 58>; // 2 x i64 vector value +def v4i64 : ValueType<256, 59>; // 4 x i64 vector value +def v8i64 : ValueType<512, 60>; // 8 x i64 vector value +def v16i64 : ValueType<1024,61>; // 16 x i64 vector value +def v32i64 : ValueType<2048,62>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 63>; // 1 x i128 vector value + +def v2f16 : ValueType<32 , 64>; // 2 x f16 vector value +def v3f16 : ValueType<48 , 65>; // 3 x f16 vector value +def v4f16 : ValueType<64 , 66>; // 4 x f16 vector value +def v8f16 : ValueType<128, 67>; // 8 x f16 vector value +def v16f16 : ValueType<256, 68>; // 8 x f16 vector value +def v32f16 : ValueType<512, 69>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 70>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 71>; // 2 x f32 vector value +def v3f32 : ValueType<96 , 72>; // 3 x f32 vector value +def v4f32 : ValueType<128, 73>; // 4 x f32 vector value +def v5f32 : ValueType<160, 74>; // 5 x f32 vector value +def v8f32 : ValueType<256, 75>; // 8 x f32 vector value +def v16f32 : ValueType<512, 76>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 77>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 78>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 79>; // 128 x f32 vector value +def v256f32 : ValueType<8182, 80>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 81>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 82>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 83>; // 2048 x f32 vector value +def v1f64 : ValueType<64, 84>; // 1 x f64 vector value +def v2f64 : ValueType<128, 85>; // 2 x f64 vector value +def v4f64 : ValueType<256, 86>; // 4 x f64 vector value +def v8f64 : ValueType<512, 87>; // 8 x f64 vector value + +def nxv1i1 : ValueType<1, 88>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 89>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 90>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 91>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 92>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 93>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 94>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 95>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 96>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 97>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 98>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 99>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 100>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 101>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 102>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 103>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 104>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 105>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 106>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 107>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 108>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 109>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 110>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,111>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 112>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 113>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 114>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 115>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,116>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,117>; // n x 32 x i64 vector value + +def nxv2f16 : ValueType<32 , 118>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 119>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 120>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 121>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 122>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 123>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 124>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 125>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 126>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 127>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 128>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 129>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 130>; // X86 MMX value +def FlagVT : ValueType<0 , 131>; // Pre-RA sched glue +def isVoid : ValueType<0 , 132>; // Produces no value +def untyped: ValueType<8 , 133>; // Produces an untyped value +def exnref: ValueType<0, 134>; // WebAssembly's exnref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h index 70eb048f05eb..db25ed5c5116 100644 --- a/include/llvm/CodeGen/VirtRegMap.h +++ b/include/llvm/CodeGen/VirtRegMap.h @@ -49,7 +49,7 @@ class TargetInstrInfo; /// it; even spilled virtual registers (the register mapped to a /// spilled register is the temporary used to load it from the /// stack). - IndexedMap Virt2PhysMap; + IndexedMap Virt2PhysMap; /// Virt2StackSlotMap - This is virtual register to stack slot /// mapping. Each spilled virtual register has an entry in it @@ -93,7 +93,7 @@ class TargetInstrInfo; /// returns true if the specified virtual register is /// mapped to a physical register - bool hasPhys(unsigned virtReg) const { + bool hasPhys(Register virtReg) const { return getPhys(virtReg) != NO_PHYS_REG; } @@ -101,20 +101,20 @@ class TargetInstrInfo; /// virtual register Register getPhys(Register virtReg) const { assert(virtReg.isVirtual()); - return Virt2PhysMap[virtReg]; + return Virt2PhysMap[virtReg.id()]; } /// creates a mapping for the specified virtual register to /// the specified physical register - void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg); + void assignVirt2Phys(Register virtReg, MCPhysReg physReg); /// clears the specified virtual register's, physical /// register mapping - void clearVirt(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2PhysMap[virtReg] != NO_PHYS_REG && + void clearVirt(Register virtReg) { + assert(virtReg.isVirtual()); + assert(Virt2PhysMap[virtReg.id()] != NO_PHYS_REG && "attempt to clear a not assigned virtual register"); - Virt2PhysMap[virtReg] = NO_PHYS_REG; + Virt2PhysMap[virtReg.id()] = NO_PHYS_REG; } /// clears all virtual to physical register mappings @@ -124,21 +124,21 @@ class TargetInstrInfo; } /// returns true if VirtReg is assigned to its preferred physreg. - bool hasPreferredPhys(unsigned VirtReg); + bool hasPreferredPhys(Register VirtReg); /// returns true if VirtReg has a known preferred register. /// This returns false if VirtReg has a preference that is a virtual /// register that hasn't been assigned yet. - bool hasKnownPreference(unsigned VirtReg); + bool hasKnownPreference(Register VirtReg); /// records virtReg is a split live interval from SReg. - void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { - Virt2SplitMap[virtReg] = SReg; + void setIsSplitFromReg(Register virtReg, unsigned SReg) { + Virt2SplitMap[virtReg.id()] = SReg; } /// returns the live interval virtReg is split from. - unsigned getPreSplitReg(unsigned virtReg) const { - return Virt2SplitMap[virtReg]; + unsigned getPreSplitReg(Register virtReg) const { + return Virt2SplitMap[virtReg.id()]; } /// getOriginal - Return the original virtual register that VirtReg descends @@ -152,28 +152,29 @@ class TargetInstrInfo; /// returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. - bool isAssignedReg(unsigned virtReg) const { + bool isAssignedReg(Register virtReg) const { if (getStackSlot(virtReg) == NO_STACK_SLOT) return true; // Split register can be assigned a physical register as well as a // stack slot or remat id. - return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG); + return (Virt2SplitMap[virtReg.id()] && + Virt2PhysMap[virtReg.id()] != NO_PHYS_REG); } /// returns the stack slot mapped to the specified virtual /// register - int getStackSlot(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2StackSlotMap[virtReg]; + int getStackSlot(Register virtReg) const { + assert(virtReg.isVirtual()); + return Virt2StackSlotMap[virtReg.id()]; } /// create a mapping for the specifed virtual register to /// the next available stack slot - int assignVirt2StackSlot(unsigned virtReg); + int assignVirt2StackSlot(Register virtReg); /// create a mapping for the specified virtual register to /// the specified stack slot - void assignVirt2StackSlot(unsigned virtReg, int SS); + void assignVirt2StackSlot(Register virtReg, int SS); void print(raw_ostream &OS, const Module* M = nullptr) const override; void dump() const; diff --git a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h index 7d20bb0a7bde..7538cb2c2548 100644 --- a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h +++ b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h @@ -11,7 +11,6 @@ #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/Support/Error.h" namespace llvm { @@ -31,9 +30,6 @@ enum VisitorDataSource { Error visitTypeRecord(CVType &Record, TypeIndex Index, TypeVisitorCallbacks &Callbacks, VisitorDataSource Source = VDS_BytesPresent); -Error visitTypeRecord(CVType &Record, TypeIndex Index, - TypeVisitorCallbackPipeline &Callbacks, - VisitorDataSource Source = VDS_BytesPresent); Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks, VisitorDataSource Source = VDS_BytesPresent); diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index 00fb0cf4cc90..60829a51dc25 100644 --- a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -33,6 +33,9 @@ public: virtual void EmitIntValue(uint64_t Value, unsigned Size) = 0; virtual void EmitBinaryData(StringRef Data) = 0; virtual void AddComment(const Twine &T) = 0; + virtual void AddRawComment(const Twine &T) = 0; + virtual bool isVerboseAsm() = 0; + virtual std::string getTypeName(TypeIndex TI) = 0; virtual ~CodeViewRecordStreamer() = default; }; @@ -206,6 +209,11 @@ public: return 0; } + void emitRawComment(const Twine &T) { + if (isStreaming() && Streamer->isVerboseAsm()) + Streamer->AddRawComment(T); + } + private: void emitEncodedSignedInteger(const int64_t &Value, const Twine &Comment = ""); @@ -225,9 +233,10 @@ private: } void emitComment(const Twine &Comment) { - if (isStreaming()) { + if (isStreaming() && Streamer->isVerboseAsm()) { Twine TComment(Comment); - Streamer->AddComment(TComment); + if (!TComment.isTriviallyEmpty()) + Streamer->AddComment(TComment); } } diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def index 9767e49c44f5..ed5c143818e6 100644 --- a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def +++ b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def @@ -366,8 +366,134 @@ CV_REGISTER(AMD64_K7, 765) #endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86) +#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM) + +// ARM registers + +CV_REGISTER(ARM_NOREG, 0) + +// General purpose 32-bit integer regisers + +CV_REGISTER(ARM_R0, 10) +CV_REGISTER(ARM_R1, 11) +CV_REGISTER(ARM_R2, 12) +CV_REGISTER(ARM_R3, 13) +CV_REGISTER(ARM_R4, 14) +CV_REGISTER(ARM_R5, 15) +CV_REGISTER(ARM_R6, 16) +CV_REGISTER(ARM_R7, 17) +CV_REGISTER(ARM_R8, 18) +CV_REGISTER(ARM_R9, 19) +CV_REGISTER(ARM_R10, 20) +CV_REGISTER(ARM_R11, 21) +CV_REGISTER(ARM_R12, 22) +CV_REGISTER(ARM_SP, 23) +CV_REGISTER(ARM_LR, 24) +CV_REGISTER(ARM_PC, 25) + +// Status register + +CV_REGISTER(ARM_CPSR, 25) + +// ARM VFPv1 registers + +CV_REGISTER(ARM_FPSCR, 40) +CV_REGISTER(ARM_FPEXC, 41) + +// ARM VFPv3/NEON registers + +CV_REGISTER(ARM_FS32, 200) +CV_REGISTER(ARM_FS33, 201) +CV_REGISTER(ARM_FS34, 202) +CV_REGISTER(ARM_FS35, 203) +CV_REGISTER(ARM_FS36, 204) +CV_REGISTER(ARM_FS37, 205) +CV_REGISTER(ARM_FS38, 206) +CV_REGISTER(ARM_FS39, 207) +CV_REGISTER(ARM_FS40, 208) +CV_REGISTER(ARM_FS41, 209) +CV_REGISTER(ARM_FS42, 210) +CV_REGISTER(ARM_FS43, 211) +CV_REGISTER(ARM_FS44, 212) +CV_REGISTER(ARM_FS45, 213) +CV_REGISTER(ARM_FS46, 214) +CV_REGISTER(ARM_FS47, 215) +CV_REGISTER(ARM_FS48, 216) +CV_REGISTER(ARM_FS49, 217) +CV_REGISTER(ARM_FS50, 218) +CV_REGISTER(ARM_FS51, 219) +CV_REGISTER(ARM_FS52, 220) +CV_REGISTER(ARM_FS53, 221) +CV_REGISTER(ARM_FS54, 222) +CV_REGISTER(ARM_FS55, 223) +CV_REGISTER(ARM_FS56, 224) +CV_REGISTER(ARM_FS57, 225) +CV_REGISTER(ARM_FS58, 226) +CV_REGISTER(ARM_FS59, 227) +CV_REGISTER(ARM_FS60, 228) +CV_REGISTER(ARM_FS61, 229) +CV_REGISTER(ARM_FS62, 230) +CV_REGISTER(ARM_FS63, 231) + +CV_REGISTER(ARM_ND0, 300) +CV_REGISTER(ARM_ND1, 301) +CV_REGISTER(ARM_ND2, 302) +CV_REGISTER(ARM_ND3, 303) +CV_REGISTER(ARM_ND4, 304) +CV_REGISTER(ARM_ND5, 305) +CV_REGISTER(ARM_ND6, 306) +CV_REGISTER(ARM_ND7, 307) +CV_REGISTER(ARM_ND8, 308) +CV_REGISTER(ARM_ND9, 309) +CV_REGISTER(ARM_ND10, 310) +CV_REGISTER(ARM_ND11, 311) +CV_REGISTER(ARM_ND12, 312) +CV_REGISTER(ARM_ND13, 313) +CV_REGISTER(ARM_ND14, 314) +CV_REGISTER(ARM_ND15, 315) +CV_REGISTER(ARM_ND16, 316) +CV_REGISTER(ARM_ND17, 317) +CV_REGISTER(ARM_ND18, 318) +CV_REGISTER(ARM_ND19, 319) +CV_REGISTER(ARM_ND20, 320) +CV_REGISTER(ARM_ND21, 321) +CV_REGISTER(ARM_ND22, 322) +CV_REGISTER(ARM_ND23, 323) +CV_REGISTER(ARM_ND24, 324) +CV_REGISTER(ARM_ND25, 325) +CV_REGISTER(ARM_ND26, 326) +CV_REGISTER(ARM_ND27, 327) +CV_REGISTER(ARM_ND28, 328) +CV_REGISTER(ARM_ND29, 329) +CV_REGISTER(ARM_ND30, 330) +CV_REGISTER(ARM_ND31, 331) + +CV_REGISTER(ARM_NQ0, 400) +CV_REGISTER(ARM_NQ1, 401) +CV_REGISTER(ARM_NQ2, 402) +CV_REGISTER(ARM_NQ3, 403) +CV_REGISTER(ARM_NQ4, 404) +CV_REGISTER(ARM_NQ5, 405) +CV_REGISTER(ARM_NQ6, 406) +CV_REGISTER(ARM_NQ7, 407) +CV_REGISTER(ARM_NQ8, 408) +CV_REGISTER(ARM_NQ9, 409) +CV_REGISTER(ARM_NQ10, 410) +CV_REGISTER(ARM_NQ11, 411) +CV_REGISTER(ARM_NQ12, 412) +CV_REGISTER(ARM_NQ13, 413) +CV_REGISTER(ARM_NQ14, 414) +CV_REGISTER(ARM_NQ15, 415) + +#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM) + #if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64) +// arm64intr.h from MSVC defines ARM64_FPSR, which conflicts with +// these declarations. +#pragma push_macro("ARM64_FPSR") +#undef ARM64_FPSR + // ARM64 registers CV_REGISTER(ARM64_NOREG, 0) @@ -556,4 +682,6 @@ CV_REGISTER(ARM64_Q31, 211) CV_REGISTER(ARM64_FPSR, 220) +#pragma pop_macro("ARM64_FPSR") + #endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64) diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h index ed126ed9e2ff..270cd4b8330c 100644 --- a/include/llvm/DebugInfo/CodeView/EnumTables.h +++ b/include/llvm/DebugInfo/CodeView/EnumTables.h @@ -37,6 +37,17 @@ ArrayRef> getThunkOrdinalNames(); ArrayRef> getTrampolineNames(); ArrayRef> getImageSectionCharacteristicNames(); +ArrayRef> getClassOptionNames(); +ArrayRef> getMemberAccessNames(); +ArrayRef> getMethodOptionNames(); +ArrayRef> getMemberKindNames(); +ArrayRef> getPtrKindNames(); +ArrayRef> getPtrModeNames(); +ArrayRef> getPtrMemberRepNames(); +ArrayRef> getTypeModifierNames(); +ArrayRef> getCallingConventions(); +ArrayRef> getFunctionOptionEnum(); +ArrayRef> getLabelTypeEnum(); } // end namespace codeview } // end namespace llvm diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h index 62761cb87c81..108abb291498 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h +++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h @@ -62,7 +62,7 @@ public: Error visitSymbolBegin(CVSymbol &Record) override { assert(!Mapping && "Already in a symbol mapping!"); - Mapping = llvm::make_unique(Record.content(), Container); + Mapping = std::make_unique(Record.content(), Container); return Mapping->Mapping.visitSymbolBegin(Record); } Error visitSymbolEnd(CVSymbol &Record) override { diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 5e9a7432b9b6..1aafa3ca9f1d 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -73,17 +73,17 @@ public: Thunk32Sym(SymbolRecordKind Kind, uint32_t RecordOffset) : SymbolRecord(Kind), RecordOffset(RecordOffset) {} - uint32_t Parent; - uint32_t End; - uint32_t Next; - uint32_t Offset; - uint16_t Segment; - uint16_t Length; + uint32_t Parent = 0; + uint32_t End = 0; + uint32_t Next = 0; + uint32_t Offset = 0; + uint16_t Segment = 0; + uint16_t Length = 0; ThunkOrdinal Thunk; StringRef Name; ArrayRef VariantData; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_TRAMPOLINE @@ -94,13 +94,13 @@ public: : SymbolRecord(Kind), RecordOffset(RecordOffset) {} TrampolineType Type; - uint16_t Size; - uint32_t ThunkOffset; - uint32_t TargetOffset; - uint16_t ThunkSection; - uint16_t TargetSection; + uint16_t Size = 0; + uint32_t ThunkOffset = 0; + uint32_t TargetOffset = 0; + uint16_t ThunkSection = 0; + uint16_t TargetSection = 0; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_SECTION @@ -110,14 +110,14 @@ public: SectionSym(SymbolRecordKind Kind, uint32_t RecordOffset) : SymbolRecord(Kind), RecordOffset(RecordOffset) {} - uint16_t SectionNumber; - uint8_t Alignment; - uint32_t Rva; - uint32_t Length; - uint32_t Characteristics; + uint16_t SectionNumber = 0; + uint8_t Alignment = 0; + uint32_t Rva = 0; + uint32_t Length = 0; + uint32_t Characteristics = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_COFFGROUP @@ -127,13 +127,13 @@ public: CoffGroupSym(SymbolRecordKind Kind, uint32_t RecordOffset) : SymbolRecord(Kind), RecordOffset(RecordOffset) {} - uint32_t Size; - uint32_t Characteristics; - uint32_t Offset; - uint16_t Segment; + uint32_t Size = 0; + uint32_t Characteristics = 0; + uint32_t Offset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; class ScopeEndSym : public SymbolRecord { @@ -142,7 +142,7 @@ public: ScopeEndSym(SymbolRecordKind Kind, uint32_t RecordOffset) : SymbolRecord(Kind), RecordOffset(RecordOffset) {} - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; class CallerSym : public SymbolRecord { @@ -153,7 +153,7 @@ public: std::vector Indices; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; struct DecodedAnnotation { @@ -333,7 +333,7 @@ private: class InlineSiteSym : public SymbolRecord { public: explicit InlineSiteSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - InlineSiteSym(uint32_t RecordOffset) + explicit InlineSiteSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::InlineSiteSym), RecordOffset(RecordOffset) {} @@ -342,12 +342,12 @@ public: BinaryAnnotationIterator()); } - uint32_t Parent; - uint32_t End; + uint32_t Parent = 0; + uint32_t End = 0; TypeIndex Inlinee; std::vector AnnotationData; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_PUB32 @@ -371,7 +371,7 @@ public: class RegisterSym : public SymbolRecord { public: explicit RegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - RegisterSym(uint32_t RecordOffset) + explicit RegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::RegisterSym), RecordOffset(RecordOffset) {} @@ -379,7 +379,7 @@ public: RegisterId Register; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_PROCREF, S_LPROCREF @@ -390,13 +390,13 @@ public: : SymbolRecord(SymbolRecordKind::ProcRefSym), RecordOffset(RecordOffset) { } - uint32_t SumName; - uint32_t SymOffset; - uint16_t Module; + uint32_t SumName = 0; + uint32_t SymOffset = 0; + uint16_t Module = 0; StringRef Name; uint16_t modi() const { return Module - 1; } - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_LOCAL @@ -410,7 +410,7 @@ public: LocalSymFlags Flags; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; struct LocalVariableAddrRange { @@ -440,11 +440,11 @@ public: return RecordOffset + RelocationOffset; } - uint32_t Program; + uint32_t Program = 0; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_DEFRANGE_SUBFIELD @@ -453,7 +453,7 @@ class DefRangeSubfieldSym : public SymbolRecord { public: explicit DefRangeSubfieldSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DefRangeSubfieldSym(uint32_t RecordOffset) + explicit DefRangeSubfieldSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeSubfieldSym), RecordOffset(RecordOffset) {} @@ -461,58 +461,62 @@ public: return RecordOffset + RelocationOffset; } - uint32_t Program; - uint16_t OffsetInParent; + uint32_t Program = 0; + uint16_t OffsetInParent = 0; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; +}; + +struct DefRangeRegisterHeader { + ulittle16_t Register; + ulittle16_t MayHaveNoName; }; // S_DEFRANGE_REGISTER class DefRangeRegisterSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t MayHaveNoName; - }; - explicit DefRangeRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DefRangeRegisterSym(uint32_t RecordOffset) + explicit DefRangeRegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeRegisterSym), RecordOffset(RecordOffset) {} - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeRegisterHeader); } - Header Hdr; + DefRangeRegisterHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; +}; + +struct DefRangeSubfieldRegisterHeader { + ulittle16_t Register; + ulittle16_t MayHaveNoName; + ulittle32_t OffsetInParent; }; // S_DEFRANGE_SUBFIELD_REGISTER class DefRangeSubfieldRegisterSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t MayHaveNoName; - ulittle32_t OffsetInParent; - }; - explicit DefRangeSubfieldRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DefRangeSubfieldRegisterSym(uint32_t RecordOffset) + explicit DefRangeSubfieldRegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeSubfieldRegisterSym), RecordOffset(RecordOffset) {} - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeSubfieldRegisterHeader); } - Header Hdr; + DefRangeSubfieldRegisterHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; +}; + +struct DefRangeFramePointerRelHeader { + little32_t Offset; }; // S_DEFRANGE_FRAMEPOINTER_REL @@ -522,7 +526,7 @@ class DefRangeFramePointerRelSym : public SymbolRecord { public: explicit DefRangeFramePointerRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DefRangeFramePointerRelSym(uint32_t RecordOffset) + explicit DefRangeFramePointerRelSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeFramePointerRelSym), RecordOffset(RecordOffset) {} @@ -530,22 +534,22 @@ public: return RecordOffset + RelocationOffset; } - int32_t Offset; + DefRangeFramePointerRelHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; +}; + +struct DefRangeRegisterRelHeader { + ulittle16_t Register; + ulittle16_t Flags; + little32_t BasePointerOffset; }; // S_DEFRANGE_REGISTER_REL class DefRangeRegisterRelSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t Flags; - little32_t BasePointerOffset; - }; - explicit DefRangeRegisterRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} explicit DefRangeRegisterRelSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeRegisterRelSym), @@ -563,13 +567,13 @@ public: bool hasSpilledUDTMember() const { return Hdr.Flags & IsSubfieldFlag; } uint16_t offsetInParent() const { return Hdr.Flags >> OffsetInParentShift; } - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeRegisterRelHeader); } - Header Hdr; + DefRangeRegisterRelHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE @@ -581,9 +585,9 @@ public: : SymbolRecord(SymbolRecordKind::DefRangeFramePointerRelFullScopeSym), RecordOffset(RecordOffset) {} - int32_t Offset; + int32_t Offset = 0; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_BLOCK32 @@ -599,14 +603,14 @@ public: return RecordOffset + RelocationOffset; } - uint32_t Parent; - uint32_t End; - uint32_t CodeSize; - uint32_t CodeOffset; - uint16_t Segment; + uint32_t Parent = 0; + uint32_t End = 0; + uint32_t CodeSize = 0; + uint32_t CodeOffset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_LABEL32 @@ -622,12 +626,12 @@ public: return RecordOffset + RelocationOffset; } - uint32_t CodeOffset; - uint16_t Segment; + uint32_t CodeOffset = 0; + uint16_t Segment = 0; ProcSymFlags Flags; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_OBJNAME @@ -635,82 +639,82 @@ class ObjNameSym : public SymbolRecord { public: explicit ObjNameSym() : SymbolRecord(SymbolRecordKind::ObjNameSym) {} explicit ObjNameSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - ObjNameSym(uint32_t RecordOffset) + explicit ObjNameSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset) { } - uint32_t Signature; + uint32_t Signature = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_ENVBLOCK class EnvBlockSym : public SymbolRecord { public: explicit EnvBlockSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - EnvBlockSym(uint32_t RecordOffset) + explicit EnvBlockSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::EnvBlockSym), RecordOffset(RecordOffset) {} std::vector Fields; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_EXPORT class ExportSym : public SymbolRecord { public: explicit ExportSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - ExportSym(uint32_t RecordOffset) + explicit ExportSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::ExportSym), RecordOffset(RecordOffset) {} - uint16_t Ordinal; + uint16_t Ordinal = 0; ExportFlags Flags; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_FILESTATIC class FileStaticSym : public SymbolRecord { public: explicit FileStaticSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - FileStaticSym(uint32_t RecordOffset) + explicit FileStaticSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::FileStaticSym), RecordOffset(RecordOffset) {} TypeIndex Index; - uint32_t ModFilenameOffset; + uint32_t ModFilenameOffset = 0; LocalSymFlags Flags; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_COMPILE2 class Compile2Sym : public SymbolRecord { public: explicit Compile2Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - Compile2Sym(uint32_t RecordOffset) + explicit Compile2Sym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::Compile2Sym), RecordOffset(RecordOffset) {} CompileSym2Flags Flags; CPUType Machine; - uint16_t VersionFrontendMajor; - uint16_t VersionFrontendMinor; - uint16_t VersionFrontendBuild; - uint16_t VersionBackendMajor; - uint16_t VersionBackendMinor; - uint16_t VersionBackendBuild; + uint16_t VersionFrontendMajor = 0; + uint16_t VersionFrontendMinor = 0; + uint16_t VersionFrontendBuild = 0; + uint16_t VersionBackendMajor = 0; + uint16_t VersionBackendMinor = 0; + uint16_t VersionBackendBuild = 0; StringRef Version; std::vector ExtraStrings; uint8_t getLanguage() const { return static_cast(Flags) & 0xFF; } uint32_t getFlags() const { return static_cast(Flags) & ~0xFF; } - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_COMPILE3 @@ -718,20 +722,20 @@ class Compile3Sym : public SymbolRecord { public: Compile3Sym() : SymbolRecord(SymbolRecordKind::Compile3Sym) {} explicit Compile3Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - Compile3Sym(uint32_t RecordOffset) + explicit Compile3Sym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::Compile3Sym), RecordOffset(RecordOffset) {} CompileSym3Flags Flags; CPUType Machine; - uint16_t VersionFrontendMajor; - uint16_t VersionFrontendMinor; - uint16_t VersionFrontendBuild; - uint16_t VersionFrontendQFE; - uint16_t VersionBackendMajor; - uint16_t VersionBackendMinor; - uint16_t VersionBackendBuild; - uint16_t VersionBackendQFE; + uint16_t VersionFrontendMajor = 0; + uint16_t VersionFrontendMinor = 0; + uint16_t VersionFrontendBuild = 0; + uint16_t VersionFrontendQFE = 0; + uint16_t VersionBackendMajor = 0; + uint16_t VersionBackendMinor = 0; + uint16_t VersionBackendBuild = 0; + uint16_t VersionBackendQFE = 0; StringRef Version; void setLanguage(SourceLanguage Lang) { @@ -750,7 +754,7 @@ public: (getFlags() & (CompileSym3Flags::PGO | CompileSym3Flags::LTCG)); } - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_FRAMEPROC @@ -761,12 +765,12 @@ public: : SymbolRecord(SymbolRecordKind::FrameProcSym), RecordOffset(RecordOffset) {} - uint32_t TotalFrameBytes; - uint32_t PaddingFrameBytes; - uint32_t OffsetToPadding; - uint32_t BytesOfCalleeSavedRegisters; - uint32_t OffsetOfExceptionHandler; - uint16_t SectionIdOfExceptionHandler; + uint32_t TotalFrameBytes = 0; + uint32_t PaddingFrameBytes = 0; + uint32_t OffsetToPadding = 0; + uint32_t BytesOfCalleeSavedRegisters = 0; + uint32_t OffsetOfExceptionHandler = 0; + uint16_t SectionIdOfExceptionHandler = 0; FrameProcedureOptions Flags; /// Extract the register this frame uses to refer to local variables. @@ -781,7 +785,7 @@ public: EncodedFramePtrReg((uint32_t(Flags) >> 16U) & 0x3U), CPU); } - uint32_t RecordOffset; + uint32_t RecordOffset = 0; private: }; @@ -799,11 +803,11 @@ public: return RecordOffset + RelocationOffset; } - uint32_t CodeOffset; - uint16_t Segment; + uint32_t CodeOffset = 0; + uint16_t Segment = 0; TypeIndex Type; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_HEAPALLOCSITE @@ -820,12 +824,12 @@ public: return RecordOffset + RelocationOffset; } - uint32_t CodeOffset; - uint16_t Segment; - uint16_t CallInstructionSize; + uint32_t CodeOffset = 0; + uint16_t Segment = 0; + uint16_t CallInstructionSize = 0; TypeIndex Type; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_FRAMECOOKIE @@ -841,12 +845,12 @@ public: return RecordOffset + RelocationOffset; } - uint32_t CodeOffset; - uint16_t Register; + uint32_t CodeOffset = 0; + uint16_t Register = 0; FrameCookieKind CookieKind; - uint8_t Flags; + uint8_t Flags = 0; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_UDT, S_COBOLUDT @@ -859,20 +863,20 @@ public: TypeIndex Type; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_BUILDINFO class BuildInfoSym : public SymbolRecord { public: explicit BuildInfoSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - BuildInfoSym(uint32_t RecordOffset) + explicit BuildInfoSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::BuildInfoSym), RecordOffset(RecordOffset) {} TypeIndex BuildId; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_BPREL32 @@ -883,11 +887,11 @@ public: : SymbolRecord(SymbolRecordKind::BPRelativeSym), RecordOffset(RecordOffset) {} - int32_t Offset; + int32_t Offset = 0; TypeIndex Type; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_REGREL32 @@ -898,19 +902,19 @@ public: : SymbolRecord(SymbolRecordKind::RegRelativeSym), RecordOffset(RecordOffset) {} - uint32_t Offset; + uint32_t Offset = 0; TypeIndex Type; RegisterId Register; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_CONSTANT, S_MANCONSTANT class ConstantSym : public SymbolRecord { public: explicit ConstantSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - ConstantSym(uint32_t RecordOffset) + explicit ConstantSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::ConstantSym), RecordOffset(RecordOffset) {} @@ -918,7 +922,7 @@ public: APSInt Value; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_LDATA32, S_GDATA32, S_LMANDATA, S_GMANDATA @@ -927,7 +931,7 @@ class DataSym : public SymbolRecord { public: explicit DataSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DataSym(uint32_t RecordOffset) + explicit DataSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DataSym), RecordOffset(RecordOffset) {} uint32_t getRelocationOffset() const { @@ -935,11 +939,11 @@ public: } TypeIndex Type; - uint32_t DataOffset; - uint16_t Segment; + uint32_t DataOffset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_LTHREAD32, S_GTHREAD32 @@ -957,11 +961,11 @@ public: } TypeIndex Type; - uint32_t DataOffset; - uint16_t Segment; + uint32_t DataOffset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_UNAMESPACE @@ -974,7 +978,7 @@ public: StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_ANNOTATION @@ -989,7 +993,7 @@ public: uint16_t Segment = 0; std::vector Strings; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; using CVSymbol = CVRecord; diff --git a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h index 081de32dd02c..2b17f5ccb13b 100644 --- a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h +++ b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h @@ -66,7 +66,7 @@ public: Error visitTypeBegin(CVType &Record) override { assert(!Mapping && "Already in a type mapping!"); - Mapping = llvm::make_unique(Record.content()); + Mapping = std::make_unique(Record.content()); return Mapping->Mapping.visitTypeBegin(Record); } diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h index 4c309c10ff0c..c6044d5138a8 100644 --- a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h +++ b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h @@ -10,6 +10,7 @@ #define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDMAPPING_H #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Error.h" diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h index 169715be2d52..fb0b579d6a06 100644 --- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h +++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h @@ -82,11 +82,6 @@ public: Pipeline.push_back(&Callbacks); } - void addCallbackToPipelineFront(TypeVisitorCallbacks &Callbacks) { - auto CallBackItr = Pipeline.begin(); - Pipeline.insert(CallBackItr, &Callbacks); - } - #define TYPE_RECORD(EnumName, EnumVal, Name) \ Error visitKnownRecord(CVType &CVR, Name##Record &Record) override { \ return visitKnownRecordImpl(CVR, Record); \ diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index d2a5318179eb..fbebfe634b63 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -28,6 +28,10 @@ namespace llvm { /// A format-neutral container for source line information. struct DILineInfo { + // DILineInfo contains "" for function/filename it cannot fetch. + static constexpr const char *const BadString = ""; + // Use "??" instead of "" to make our output closer to addr2line. + static constexpr const char *const Addr2LineBadString = "??"; std::string FileName; std::string FunctionName; Optional Source; @@ -38,7 +42,7 @@ struct DILineInfo { // DWARF-specific. uint32_t Discriminator = 0; - DILineInfo() : FileName(""), FunctionName("") {} + DILineInfo() : FileName(BadString), FunctionName(BadString) {} bool operator==(const DILineInfo &RHS) const { return Line == RHS.Line && Column == RHS.Column && @@ -61,9 +65,9 @@ struct DILineInfo { void dump(raw_ostream &OS) { OS << "Line info: "; - if (FileName != "") + if (FileName != BadString) OS << "file '" << FileName << "', "; - if (FunctionName != "") + if (FunctionName != BadString) OS << "function '" << FunctionName << "', "; OS << "line " << Line << ", "; OS << "column " << Column << ", "; @@ -109,7 +113,7 @@ struct DIGlobal { uint64_t Start = 0; uint64_t Size = 0; - DIGlobal() : Name("") {} + DIGlobal() : Name(DILineInfo::BadString) {} }; struct DILocal { @@ -289,7 +293,7 @@ public: LoadedObjectInfoHelper(Ts &&... Args) : Base(std::forward(Args)...) {} std::unique_ptr clone() const override { - return llvm::make_unique(static_cast(*this)); + return std::make_unique(static_cast(*this)); } }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index ccf2891c2e21..39ae53c4e7fe 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -130,11 +130,11 @@ public: /// \param Attr DWARF attribute to search for. /// \param U the DWARFUnit the contains the DIE. /// \returns Optional DWARF form value if the attribute was extracted. - Optional getAttributeValue(const uint32_t DIEOffset, + Optional getAttributeValue(const uint64_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U) const; - bool extract(DataExtractor Data, uint32_t* OffsetPtr); + bool extract(DataExtractor Data, uint64_t* OffsetPtr); void dump(raw_ostream &OS) const; // Return an optional byte size of all attribute data in this abbreviation diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index 303375703d2e..c9042e593260 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -96,7 +96,7 @@ class AppleAcceleratorTable : public DWARFAcceleratorTable { using AtomType = uint16_t; using Form = dwarf::Form; - uint32_t DIEOffsetBase; + uint64_t DIEOffsetBase; SmallVector, 3> Atoms; Optional extractOffset(Optional Value) const; @@ -109,7 +109,7 @@ class AppleAcceleratorTable : public DWARFAcceleratorTable { /// Returns true if we should continue scanning for entries or false if we've /// reached the last (sentinel) entry of encountered a parsing error. bool dumpName(ScopedPrinter &W, SmallVectorImpl &AtomForms, - uint32_t *DataOffset) const; + uint64_t *DataOffset) const; public: /// Apple-specific implementation of an Accelerator Entry. @@ -119,7 +119,7 @@ public: Entry(const HeaderData &Data); Entry() = default; - void extract(const AppleAcceleratorTable &AccelTable, uint32_t *Offset); + void extract(const AppleAcceleratorTable &AccelTable, uint64_t *Offset); public: Optional getCUOffset() const override; @@ -143,7 +143,7 @@ public: class ValueIterator : public std::iterator { const AppleAcceleratorTable *AccelTable = nullptr; Entry Current; ///< The current entry. - unsigned DataOffset = 0; ///< Offset into the section. + uint64_t DataOffset = 0; ///< Offset into the section. unsigned Data = 0; ///< Current data entry. unsigned NumData = 0; ///< Number of data entries. @@ -151,7 +151,7 @@ public: void Next(); public: /// Construct a new iterator for the entries at \p DataOffset. - ValueIterator(const AppleAcceleratorTable &AccelTable, unsigned DataOffset); + ValueIterator(const AppleAcceleratorTable &AccelTable, uint64_t DataOffset); /// End marker. ValueIterator() = default; @@ -193,7 +193,7 @@ public: /// DieOffset is the offset into the .debug_info section for the DIE /// related to the input hash data offset. /// DieTag is the tag of the DIE - std::pair readAtoms(uint32_t &HashDataOffset); + std::pair readAtoms(uint64_t *HashDataOffset); void dump(raw_ostream &OS) const override; /// Look up all entries in the accelerator table matching \c Key. @@ -245,7 +245,7 @@ public: struct Header : public HeaderPOD { SmallString<8> AugmentationString; - Error extract(const DWARFDataExtractor &AS, uint32_t *Offset); + Error extract(const DWARFDataExtractor &AS, uint64_t *Offset); void dump(ScopedPrinter &W) const; }; @@ -354,12 +354,12 @@ public: DataExtractor StrData; uint32_t Index; - uint32_t StringOffset; - uint32_t EntryOffset; + uint64_t StringOffset; + uint64_t EntryOffset; public: NameTableEntry(const DataExtractor &StrData, uint32_t Index, - uint32_t StringOffset, uint32_t EntryOffset) + uint64_t StringOffset, uint64_t EntryOffset) : StrData(StrData), Index(Index), StringOffset(StringOffset), EntryOffset(EntryOffset) {} @@ -367,17 +367,17 @@ public: uint32_t getIndex() const { return Index; } /// Returns the offset of the name of the described entities. - uint32_t getStringOffset() const { return StringOffset; } + uint64_t getStringOffset() const { return StringOffset; } /// Return the string referenced by this name table entry or nullptr if the /// string offset is not valid. const char *getString() const { - uint32_t Off = StringOffset; + uint64_t Off = StringOffset; return StrData.getCStr(&Off); } /// Returns the offset of the first Entry in the list. - uint32_t getEntryOffset() const { return EntryOffset; } + uint64_t getEntryOffset() const { return EntryOffset; } }; /// Represents a single accelerator table within the DWARF v5 .debug_names @@ -389,40 +389,40 @@ public: // Base of the whole unit and of various important tables, as offsets from // the start of the section. - uint32_t Base; - uint32_t CUsBase; - uint32_t BucketsBase; - uint32_t HashesBase; - uint32_t StringOffsetsBase; - uint32_t EntryOffsetsBase; - uint32_t EntriesBase; + uint64_t Base; + uint64_t CUsBase; + uint64_t BucketsBase; + uint64_t HashesBase; + uint64_t StringOffsetsBase; + uint64_t EntryOffsetsBase; + uint64_t EntriesBase; void dumpCUs(ScopedPrinter &W) const; void dumpLocalTUs(ScopedPrinter &W) const; void dumpForeignTUs(ScopedPrinter &W) const; void dumpAbbreviations(ScopedPrinter &W) const; - bool dumpEntry(ScopedPrinter &W, uint32_t *Offset) const; + bool dumpEntry(ScopedPrinter &W, uint64_t *Offset) const; void dumpName(ScopedPrinter &W, const NameTableEntry &NTE, Optional Hash) const; void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const; - Expected extractAttributeEncoding(uint32_t *Offset); + Expected extractAttributeEncoding(uint64_t *Offset); Expected> - extractAttributeEncodings(uint32_t *Offset); + extractAttributeEncodings(uint64_t *Offset); - Expected extractAbbrev(uint32_t *Offset); + Expected extractAbbrev(uint64_t *Offset); public: - NameIndex(const DWARFDebugNames &Section, uint32_t Base) + NameIndex(const DWARFDebugNames &Section, uint64_t Base) : Section(Section), Base(Base) {} /// Reads offset of compilation unit CU. CU is 0-based. - uint32_t getCUOffset(uint32_t CU) const; + uint64_t getCUOffset(uint32_t CU) const; uint32_t getCUCount() const { return Hdr.CompUnitCount; } /// Reads offset of local type unit TU, TU is 0-based. - uint32_t getLocalTUOffset(uint32_t TU) const; + uint64_t getLocalTUOffset(uint32_t TU) const; uint32_t getLocalTUCount() const { return Hdr.LocalTypeUnitCount; } /// Reads signature of foreign type unit TU. TU is 0-based. @@ -451,7 +451,7 @@ public: return Abbrevs; } - Expected getEntry(uint32_t *Offset) const; + Expected getEntry(uint64_t *Offset) const; /// Look up all entries in this Name Index matching \c Key. iterator_range equal_range(StringRef Key) const; @@ -460,8 +460,8 @@ public: NameIterator end() const { return NameIterator(this, getNameCount() + 1); } Error extract(); - uint32_t getUnitOffset() const { return Base; } - uint32_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; } + uint64_t getUnitOffset() const { return Base; } + uint64_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; } void dump(ScopedPrinter &W) const; friend class DWARFDebugNames; @@ -479,12 +479,12 @@ public: bool IsLocal; Optional CurrentEntry; - unsigned DataOffset = 0; ///< Offset into the section. + uint64_t DataOffset = 0; ///< Offset into the section. std::string Key; ///< The Key we are searching for. Optional Hash; ///< Hash of Key, if it has been computed. bool getEntryAtCurrentOffset(); - Optional findEntryOffsetInCurrentIndex(); + Optional findEntryOffsetInCurrentIndex(); bool findInCurrentIndex(); void searchFromStartOfCurrentIndex(); void next(); @@ -572,7 +572,7 @@ public: private: SmallVector NameIndices; - DenseMap CUToNameIndex; + DenseMap CUToNameIndex; public: DWARFDebugNames(const DWARFDataExtractor &AccelSection, @@ -591,7 +591,7 @@ public: /// Return the Name Index covering the compile unit at CUOffset, or nullptr if /// there is no Name Index covering that unit. - const NameIndex *getCUNameIndex(uint32_t CUOffset); + const NameIndex *getCUNameIndex(uint64_t CUOffset); }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h index c8ad19ad6bf6..dfc778346dbe 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h @@ -23,7 +23,7 @@ namespace llvm { /// attributes in a DWARFDie. struct DWARFAttribute { /// The debug info/types offset for this attribute. - uint32_t Offset = 0; + uint64_t Offset = 0; /// The debug info/types section byte size of the data for this attribute. uint32_t ByteSize = 0; /// The attribute enumeration of this attribute. diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index 23cf21c3523f..fae163622edb 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -225,10 +225,10 @@ public: DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash); /// Return the compile unit that includes an offset (relative to .debug_info). - DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset); + DWARFCompileUnit *getCompileUnitForOffset(uint64_t Offset); /// Get a DIE given an exact offset. - DWARFDie getDIEForOffset(uint32_t Offset); + DWARFDie getDIEForOffset(uint64_t Offset); unsigned getMaxVersion() { // Ensure info units have been parsed to discover MaxVersion @@ -301,10 +301,10 @@ public: std::function RecoverableErrorCallback); DataExtractor getStringExtractor() const { - return DataExtractor(DObj->getStringSection(), false, 0); + return DataExtractor(DObj->getStrSection(), false, 0); } DataExtractor getLineStringExtractor() const { - return DataExtractor(DObj->getLineStringSection(), false, 0); + return DataExtractor(DObj->getLineStrSection(), false, 0); } /// Wraps the returned DIEs for a given address. diff --git a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h index 7c2a159b71fa..980724c525d2 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h @@ -35,20 +35,25 @@ public: /// Extracts a value and applies a relocation to the result if /// one exists for the given offset. - uint64_t getRelocatedValue(uint32_t Size, uint32_t *Off, - uint64_t *SectionIndex = nullptr) const; + uint64_t getRelocatedValue(uint32_t Size, uint64_t *Off, + uint64_t *SectionIndex = nullptr, + Error *Err = nullptr) const; /// Extracts an address-sized value and applies a relocation to the result if /// one exists for the given offset. - uint64_t getRelocatedAddress(uint32_t *Off, uint64_t *SecIx = nullptr) const { + uint64_t getRelocatedAddress(uint64_t *Off, uint64_t *SecIx = nullptr) const { return getRelocatedValue(getAddressSize(), Off, SecIx); } + uint64_t getRelocatedAddress(Cursor &C, uint64_t *SecIx = nullptr) const { + return getRelocatedValue(getAddressSize(), &getOffset(C), SecIx, + &getError(C)); + } /// Extracts a DWARF-encoded pointer in \p Offset using \p Encoding. /// There is a DWARF encoding that uses a PC-relative adjustment. /// For these values, \p AbsPosOffset is used to fix them, which should /// reflect the absolute address of this pointer. - Optional getEncodedPointer(uint32_t *Offset, uint8_t Encoding, + Optional getEncodedPointer(uint64_t *Offset, uint8_t Encoding, uint64_t AbsPosOffset = 0) const; size_t size() const { return Section == nullptr ? 0 : Section->Data.size(); } diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h index 28fd8484b4a9..1398e16252a9 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h @@ -20,7 +20,7 @@ namespace llvm { class raw_ostream; class DWARFAbbreviationDeclarationSet { - uint32_t Offset; + uint64_t Offset; /// Code of the first abbreviation, if all abbreviations in the set have /// consecutive codes. UINT32_MAX otherwise. uint32_t FirstAbbrCode; @@ -32,9 +32,9 @@ class DWARFAbbreviationDeclarationSet { public: DWARFAbbreviationDeclarationSet(); - uint32_t getOffset() const { return Offset; } + uint64_t getOffset() const { return Offset; } void dump(raw_ostream &OS) const; - bool extract(DataExtractor Data, uint32_t *OffsetPtr); + bool extract(DataExtractor Data, uint64_t *OffsetPtr); const DWARFAbbreviationDeclaration * getAbbreviationDeclaration(uint32_t AbbrCode) const; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h index a98bf282fe7c..4539b9c9d581 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h @@ -45,7 +45,7 @@ public: private: dwarf::DwarfFormat Format; - uint32_t HeaderOffset; + uint64_t HeaderOffset; Header HeaderData; uint32_t DataSize = 0; std::vector Addrs; @@ -54,11 +54,11 @@ public: void clear(); /// Extract an entire table, including all addresses. - Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr, + Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr, uint16_t Version, uint8_t AddrSize, std::function WarnCallback); - uint32_t getHeaderOffset() const { return HeaderOffset; } + uint64_t getHeaderOffset() const { return HeaderOffset; } uint8_t getAddrSize() const { return HeaderData.AddrSize; } void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h index 5b6c578bc3bf..ebe4ad6e24dd 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h @@ -49,7 +49,7 @@ private: using DescriptorColl = std::vector; using desc_iterator_range = iterator_range; - uint32_t Offset; + uint64_t Offset; Header HeaderData; DescriptorColl ArangeDescriptors; @@ -57,7 +57,7 @@ public: DWARFDebugArangeSet() { clear(); } void clear(); - bool extract(DataExtractor data, uint32_t *offset_ptr); + bool extract(DataExtractor data, uint64_t *offset_ptr); void dump(raw_ostream &OS) const; uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h index 03223fbc80a9..172f1d2c9dbe 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h @@ -28,7 +28,7 @@ private: void extract(DataExtractor DebugArangesData); /// Call appendRange multiple times and then call construct. - void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); + void appendRange(uint64_t CUOffset, uint64_t LowPC, uint64_t HighPC); void construct(); struct Range { @@ -60,10 +60,10 @@ private: struct RangeEndpoint { uint64_t Address; - uint32_t CUOffset; + uint64_t CUOffset; bool IsRangeStart; - RangeEndpoint(uint64_t Address, uint32_t CUOffset, bool IsRangeStart) + RangeEndpoint(uint64_t Address, uint64_t CUOffset, bool IsRangeStart) : Address(Address), CUOffset(CUOffset), IsRangeStart(IsRangeStart) {} bool operator<(const RangeEndpoint &Other) const { @@ -76,7 +76,7 @@ private: std::vector Endpoints; RangeColl Aranges; - DenseSet ParsedCUOffsets; + DenseSet ParsedCUOffsets; }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h index d960f4bc9b1c..c6539df0d756 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -69,7 +69,7 @@ public: /// starting at *Offset and ending at EndOffset. *Offset is updated /// to EndOffset upon successful parsing, or indicates the offset /// where a problem occurred in case an error is returned. - Error parse(DataExtractor Data, uint32_t *Offset, uint32_t EndOffset); + Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset); void dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, unsigned IndentLevel = 1) const; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h index f50063b24370..ded960337ec6 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -22,7 +22,7 @@ class DWARFUnit; /// DWARFDebugInfoEntry - A DIE with only the minimum required data. class DWARFDebugInfoEntry { /// Offset within the .debug_info of the start of this entry. - uint32_t Offset = 0; + uint64_t Offset = 0; /// The integer depth of this DIE within the compile unit DIEs where the /// compile/type unit DIE has a depth of zero. @@ -36,14 +36,14 @@ public: /// Extracts a debug info entry, which is a child of a given unit, /// starting at a given offset. If DIE can't be extracted, returns false and /// doesn't change OffsetPtr. - bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr); + bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr); /// High performance extraction should use this call. - bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, - const DWARFDataExtractor &DebugInfoData, uint32_t UEndOffset, + bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr, + const DWARFDataExtractor &DebugInfoData, uint64_t UEndOffset, uint32_t Depth); - uint32_t getOffset() const { return Offset; } + uint64_t getOffset() const { return Offset; } uint32_t getDepth() const { return Depth; } dwarf::Tag getTag() const { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index e7425c192373..c2be8304ad84 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -18,6 +18,7 @@ #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/Path.h" #include #include #include @@ -128,13 +129,15 @@ public: bool hasFileAtIndex(uint64_t FileIndex) const; - bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, - DILineInfoSpecifier::FileLineInfoKind Kind, - std::string &Result) const; + bool + getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, + DILineInfoSpecifier::FileLineInfoKind Kind, + std::string &Result, + sys::path::Style Style = sys::path::Style::native) const; void clear(); void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; - Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, + Error parse(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U = nullptr); }; @@ -278,7 +281,7 @@ public: /// Parse prologue and all rows. Error parse( - DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, + DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U, std::function RecoverableErrorCallback, raw_ostream *OS = nullptr); @@ -305,9 +308,9 @@ public: std::vector &Result) const; }; - const LineTable *getLineTable(uint32_t Offset) const; + const LineTable *getLineTable(uint64_t Offset) const; Expected getOrParseLineTable( - DWARFDataExtractor &DebugLineData, uint32_t Offset, + DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx, const DWARFUnit *U, std::function RecoverableErrorCallback); @@ -350,17 +353,17 @@ public: bool done() const { return Done; } /// Get the offset the parser has reached. - uint32_t getOffset() const { return Offset; } + uint64_t getOffset() const { return Offset; } private: - DWARFUnit *prepareToParse(uint32_t Offset); - void moveToNextTable(uint32_t OldOffset, const Prologue &P); + DWARFUnit *prepareToParse(uint64_t Offset); + void moveToNextTable(uint64_t OldOffset, const Prologue &P); LineToUnitMap LineToUnit; DWARFDataExtractor &DebugLineData; const DWARFContext &Context; - uint32_t Offset = 0; + uint64_t Offset = 0; bool Done = false; }; @@ -377,7 +380,7 @@ private: struct Sequence Sequence; }; - using LineTableMapTy = std::map; + using LineTableMapTy = std::map; using LineTableIter = LineTableMapTy::iterator; using LineTableConstIter = LineTableMapTy::const_iterator; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index cced6048e811..c79d98e34f6e 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -11,6 +11,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include @@ -29,19 +30,20 @@ public: /// The ending address of the instruction range. uint64_t End; /// The location of the variable within the specified range. - SmallVector Loc; + SmallVector Loc; }; /// A list of locations that contain one variable. struct LocationList { /// The beginning offset where this location list is stored in the debug_loc /// section. - unsigned Offset; + uint64_t Offset; /// All the locations in which the variable is stored. SmallVector Entries; /// Dump this list on OS. - void dump(raw_ostream &OS, bool IsLittleEndian, unsigned AddressSize, - const MCRegisterInfo *MRI, DWARFUnit *U, uint64_t BaseAddress, + void dump(raw_ostream &OS, uint64_t BaseAddress, bool IsLittleEndian, + unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent) const; }; @@ -58,7 +60,7 @@ private: public: /// Print the location lists found within the debug_loc section. - void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, + void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, DIDumpOptions DumpOpts, Optional Offset) const; /// Parse the debug_loc section accessible via the 'data' parameter using the @@ -68,25 +70,29 @@ public: /// Return the location list at the given offset or nullptr. LocationList const *getLocationListAtOffset(uint64_t Offset) const; - Optional parseOneLocationList(DWARFDataExtractor Data, - uint32_t *Offset); + Expected + parseOneLocationList(const DWARFDataExtractor &Data, uint64_t *Offset); }; class DWARFDebugLoclists { public: struct Entry { uint8_t Kind; + uint64_t Offset; uint64_t Value0; uint64_t Value1; - SmallVector Loc; + SmallVector Loc; + void dump(raw_ostream &OS, uint64_t &BaseAddr, bool IsLittleEndian, + unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent, size_t MaxEncodingStringLength) const; }; struct LocationList { - unsigned Offset; + uint64_t Offset; SmallVector Entries; void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian, unsigned AddressSize, const MCRegisterInfo *RegInfo, - DWARFUnit *U, unsigned Indent) const; + DWARFUnit *U, DIDumpOptions DumpOpts, unsigned Indent) const; }; private: @@ -99,15 +105,16 @@ private: bool IsLittleEndian; public: - void parse(DataExtractor data, unsigned Version); + void parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version); void dump(raw_ostream &OS, uint64_t BaseAddr, const MCRegisterInfo *RegInfo, - Optional Offset) const; + DIDumpOptions DumpOpts, Optional Offset) const; /// Return the location list at the given offset or nullptr. LocationList const *getLocationListAtOffset(uint64_t Offset) const; - static Optional - parseOneLocationList(DataExtractor Data, unsigned *Offset, unsigned Version); + static Expected parseOneLocationList(const DataExtractor &Data, + uint64_t *Offset, + unsigned Version); }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h index 99e91ca90319..ae57306b90e1 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h @@ -25,7 +25,7 @@ class DWARFDebugPubTable { public: struct Entry { /// Section offset from the beginning of the compilation unit. - uint32_t SecOffset; + uint64_t SecOffset; /// An entry of the various gnu_pub* debug sections. dwarf::PubIndexEntryDescriptor Descriptor; @@ -50,7 +50,7 @@ public: /// The offset from the beginning of the .debug_info section of the /// compilation unit header referenced by the set. - uint32_t Offset; + uint64_t Offset; /// The size in bytes of the contents of the .debug_info section generated /// to represent that compilation unit. diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index a66f60292343..2f72c642a2d5 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -53,14 +53,13 @@ public: assert(AddressSize == 4 || AddressSize == 8); if (AddressSize == 4) return StartAddress == -1U; - else - return StartAddress == -1ULL; + return StartAddress == -1ULL; } }; private: /// Offset in .debug_ranges section. - uint32_t Offset; + uint64_t Offset; uint8_t AddressSize; std::vector Entries; @@ -69,7 +68,7 @@ public: void clear(); void dump(raw_ostream &OS) const; - Error extract(const DWARFDataExtractor &data, uint32_t *offset_ptr); + Error extract(const DWARFDataExtractor &data, uint64_t *offset_ptr); const std::vector &getEntries() { return Entries; } /// getAbsoluteRanges - Returns absolute address ranges defined by this range diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h index 167ddde3ec3d..952c41e188c7 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h @@ -34,7 +34,7 @@ struct RangeListEntry : public DWARFListEntryBase { uint64_t Value0; uint64_t Value1; - Error extract(DWARFDataExtractor Data, uint32_t End, uint32_t *OffsetPtr); + Error extract(DWARFDataExtractor Data, uint64_t End, uint64_t *OffsetPtr); void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength, uint64_t &CurrentBase, DIDumpOptions DumpOpts, llvm::function_ref(uint32_t)> diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h index 21e68f983bb3..f7f08b4a499d 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -63,7 +63,7 @@ public: /// Get the absolute offset into the debug info or types section. /// /// \returns the DIE offset or -1U if invalid. - uint32_t getOffset() const { + uint64_t getOffset() const { assert(isValid() && "must check validity prior to calling"); return Die->getOffset(); } diff --git a/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/include/llvm/DebugInfo/DWARF/DWARFExpression.h index f066dd58d606..456d9df957ad 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFExpression.h +++ b/include/llvm/DebugInfo/DWARF/DWARFExpression.h @@ -77,18 +77,18 @@ public: uint8_t Opcode; ///< The Op Opcode, DW_OP_. Description Desc; bool Error; - uint32_t EndOffset; + uint64_t EndOffset; uint64_t Operands[2]; - uint32_t OperandEndOffsets[2]; + uint64_t OperandEndOffsets[2]; public: Description &getDescription() { return Desc; } uint8_t getCode() { return Opcode; } uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; } - uint32_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; } - uint32_t getEndOffset() { return EndOffset; } + uint64_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; } + uint64_t getEndOffset() { return EndOffset; } bool extract(DataExtractor Data, uint16_t Version, uint8_t AddressSize, - uint32_t Offset); + uint64_t Offset); bool isError() { return Error; } bool print(raw_ostream &OS, const DWARFExpression *Expr, const MCRegisterInfo *RegInfo, DWARFUnit *U, bool isEH); @@ -101,9 +101,9 @@ public: Operation> { friend class DWARFExpression; const DWARFExpression *Expr; - uint32_t Offset; + uint64_t Offset; Operation Op; - iterator(const DWARFExpression *Expr, uint32_t Offset) + iterator(const DWARFExpression *Expr, uint64_t Offset) : Expr(Expr), Offset(Offset) { Op.Error = Offset >= Expr->Data.getData().size() || diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 731e71ed9eae..6fec6fcb6b34 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -70,7 +70,7 @@ public: static DWARFFormValue createFromBlockValue(dwarf::Form F, ArrayRef D); static DWARFFormValue createFromUnit(dwarf::Form F, const DWARFUnit *Unit, - uint32_t *OffsetPtr); + uint64_t *OffsetPtr); dwarf::Form getForm() const { return Form; } uint64_t getRawUValue() const { return Value.uval; } @@ -87,12 +87,12 @@ public: /// in \p FormParams is needed to interpret some forms. The optional /// \p Context and \p Unit allows extracting information if the form refers /// to other sections (e.g., .debug_str). - bool extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr, + bool extractValue(const DWARFDataExtractor &Data, uint64_t *OffsetPtr, dwarf::FormParams FormParams, const DWARFContext *Context = nullptr, const DWARFUnit *Unit = nullptr); - bool extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr, + bool extractValue(const DWARFDataExtractor &Data, uint64_t *OffsetPtr, dwarf::FormParams FormParams, const DWARFUnit *U) { return extractValue(Data, OffsetPtr, FormParams, nullptr, U); } @@ -128,7 +128,7 @@ public: /// \param OffsetPtr A reference to the offset that will be updated. /// \param Params DWARF parameters to help interpret forms. /// \returns true on success, false if the form was not skipped. - bool skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr, + bool skipValue(DataExtractor DebugInfoData, uint64_t *OffsetPtr, const dwarf::FormParams Params) const { return DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, Params); } @@ -144,7 +144,7 @@ public: /// \param FormParams DWARF parameters to help interpret forms. /// \returns true on success, false if the form was not skipped. static bool skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, + uint64_t *OffsetPtr, const dwarf::FormParams FormParams); private: diff --git a/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/include/llvm/DebugInfo/DWARF/DWARFListTable.h index a1ea69b040f0..496fdb2477f9 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -26,7 +26,7 @@ namespace llvm { /// entries. struct DWARFListEntryBase { /// The offset at which the entry is located in the section. - uint32_t Offset; + uint64_t Offset; /// The DWARF encoding (DW_RLE_* or DW_LLE_*). uint8_t EntryKind; /// The index of the section this entry belongs to. @@ -46,8 +46,8 @@ public: const ListEntries &getEntries() const { return Entries; } bool empty() const { return Entries.empty(); } void clear() { Entries.clear(); } - Error extract(DWARFDataExtractor Data, uint32_t HeaderOffset, uint32_t End, - uint32_t *OffsetPtr, StringRef SectionName, + Error extract(DWARFDataExtractor Data, uint64_t HeaderOffset, uint64_t End, + uint64_t *OffsetPtr, StringRef SectionName, StringRef ListStringName); }; @@ -57,7 +57,7 @@ class DWARFListTableHeader { struct Header { /// The total length of the entries for this table, not including the length /// field itself. - uint32_t Length = 0; + uint64_t Length = 0; /// The DWARF version number. uint16_t Version; /// The size in bytes of an address on the target architecture. For @@ -75,12 +75,12 @@ class DWARFListTableHeader { /// The offset table, which contains offsets to the individual list entries. /// It is used by forms such as DW_FORM_rnglistx. /// FIXME: Generate the table and use the appropriate forms. - std::vector Offsets; + std::vector Offsets; /// The table's format, either DWARF32 or DWARF64. dwarf::DwarfFormat Format; /// The offset at which the header (and hence the table) is located within /// its section. - uint32_t HeaderOffset; + uint64_t HeaderOffset; /// The name of the section the list is located in. StringRef SectionName; /// A characterization of the list for dumping purposes, e.g. "range" or @@ -95,28 +95,40 @@ public: HeaderData = {}; Offsets.clear(); } - uint32_t getHeaderOffset() const { return HeaderOffset; } + uint64_t getHeaderOffset() const { return HeaderOffset; } uint8_t getAddrSize() const { return HeaderData.AddrSize; } - uint32_t getLength() const { return HeaderData.Length; } + uint64_t getLength() const { return HeaderData.Length; } uint16_t getVersion() const { return HeaderData.Version; } StringRef getSectionName() const { return SectionName; } StringRef getListTypeString() const { return ListTypeString; } dwarf::DwarfFormat getFormat() const { return Format; } + /// Return the size of the table header including the length but not including + /// the offsets. + static uint8_t getHeaderSize(dwarf::DwarfFormat Format) { + switch (Format) { + case dwarf::DwarfFormat::DWARF32: + return 12; + case dwarf::DwarfFormat::DWARF64: + return 20; + } + llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64"); + } + void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const; - Optional getOffsetEntry(uint32_t Index) const { + Optional getOffsetEntry(uint32_t Index) const { if (Index < Offsets.size()) return Offsets[Index]; return None; } /// Extract the table header and the array of offsets. - Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr); + Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr); /// Returns the length of the table, including the length field, or 0 if the /// length has not been determined (e.g. because the table has not yet been /// parsed, or there was a problem in parsing). - uint32_t length() const; + uint64_t length() const; }; /// A class representing a table of lists as specified in the DWARF v5 @@ -128,7 +140,7 @@ template class DWARFListTableBase { DWARFListTableHeader Header; /// A mapping between file offsets and lists. It is used to find a particular /// list based on an offset (obtained from DW_AT_ranges, for example). - std::map ListMap; + std::map ListMap; /// This string is displayed as a heading before the list is dumped /// (e.g. "ranges:"). StringRef HeaderString; @@ -144,17 +156,18 @@ public: ListMap.clear(); } /// Extract the table header and the array of offsets. - Error extractHeaderAndOffsets(DWARFDataExtractor Data, uint32_t *OffsetPtr) { + Error extractHeaderAndOffsets(DWARFDataExtractor Data, uint64_t *OffsetPtr) { return Header.extract(Data, OffsetPtr); } /// Extract an entire table, including all list entries. - Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr); + Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr); /// Look up a list based on a given offset. Extract it and enter it into the /// list map if necessary. - Expected findList(DWARFDataExtractor Data, uint32_t Offset); + Expected findList(DWARFDataExtractor Data, uint64_t Offset); - uint32_t getHeaderOffset() const { return Header.getHeaderOffset(); } + uint64_t getHeaderOffset() const { return Header.getHeaderOffset(); } uint8_t getAddrSize() const { return Header.getAddrSize(); } + dwarf::DwarfFormat getFormat() const { return Header.getFormat(); } void dump(raw_ostream &OS, llvm::function_ref(uint32_t)> @@ -162,37 +175,31 @@ public: DIDumpOptions DumpOpts = {}) const; /// Return the contents of the offset entry designated by a given index. - Optional getOffsetEntry(uint32_t Index) const { + Optional getOffsetEntry(uint32_t Index) const { return Header.getOffsetEntry(Index); } /// Return the size of the table header including the length but not including /// the offsets. This is dependent on the table format, which is unambiguously /// derived from parsing the table. uint8_t getHeaderSize() const { - switch (Header.getFormat()) { - case dwarf::DwarfFormat::DWARF32: - return 12; - case dwarf::DwarfFormat::DWARF64: - return 20; - } - llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64"); + return DWARFListTableHeader::getHeaderSize(getFormat()); } - uint32_t length() { return Header.length(); } + uint64_t length() { return Header.length(); } }; template Error DWARFListTableBase::extract(DWARFDataExtractor Data, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { clear(); if (Error E = extractHeaderAndOffsets(Data, OffsetPtr)) return E; Data.setAddressSize(Header.getAddrSize()); - uint32_t End = getHeaderOffset() + Header.length(); + uint64_t End = getHeaderOffset() + Header.length(); while (*OffsetPtr < End) { DWARFListType CurrentList; - uint32_t Off = *OffsetPtr; + uint64_t Off = *OffsetPtr; if (Error E = CurrentList.extract(Data, getHeaderOffset(), End, OffsetPtr, Header.getSectionName(), Header.getListTypeString())) @@ -208,13 +215,13 @@ Error DWARFListTableBase::extract(DWARFDataExtractor Data, template Error DWARFListType::extract(DWARFDataExtractor Data, - uint32_t HeaderOffset, uint32_t End, - uint32_t *OffsetPtr, + uint64_t HeaderOffset, uint64_t End, + uint64_t *OffsetPtr, StringRef SectionName, StringRef ListTypeString) { if (*OffsetPtr < HeaderOffset || *OffsetPtr >= End) return createStringError(errc::invalid_argument, - "invalid %s list offset 0x%" PRIx32, + "invalid %s list offset 0x%" PRIx64, ListTypeString.data(), *OffsetPtr); Entries.clear(); while (*OffsetPtr < End) { @@ -227,7 +234,7 @@ Error DWARFListType::extract(DWARFDataExtractor Data, } return createStringError(errc::illegal_byte_sequence, "no end of list marker detected at end of %s table " - "starting at offset 0x%" PRIx32, + "starting at offset 0x%" PRIx64, SectionName.data(), HeaderOffset); } @@ -261,15 +268,15 @@ void DWARFListTableBase::dump( template Expected DWARFListTableBase::findList(DWARFDataExtractor Data, - uint32_t Offset) { + uint64_t Offset) { auto Entry = ListMap.find(Offset); if (Entry != ListMap.end()) return Entry->second; // Extract the list from the section and enter it into the list map. DWARFListType List; - uint32_t End = getHeaderOffset() + Header.length(); - uint32_t StartingOffset = Offset; + uint64_t End = getHeaderOffset() + Header.length(); + uint64_t StartingOffset = Offset; if (Error E = List.extract(Data, getHeaderOffset(), End, &Offset, Header.getSectionName(), Header.getListTypeString())) diff --git a/include/llvm/DebugInfo/DWARF/DWARFObject.h b/include/llvm/DebugInfo/DWARF/DWARFObject.h index 1bba74a25d0e..88fe3f434edc 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFObject.h +++ b/include/llvm/DebugInfo/DWARF/DWARFObject.h @@ -39,20 +39,20 @@ public: virtual StringRef getAbbrevSection() const { return ""; } virtual const DWARFSection &getLocSection() const { return Dummy; } virtual const DWARFSection &getLoclistsSection() const { return Dummy; } - virtual StringRef getARangeSection() const { return ""; } - virtual StringRef getDebugFrameSection() const { return ""; } - virtual StringRef getEHFrameSection() const { return ""; } + virtual StringRef getArangesSection() const { return ""; } + virtual const DWARFSection &getFrameSection() const { return Dummy; } + virtual const DWARFSection &getEHFrameSection() const { return Dummy; } virtual const DWARFSection &getLineSection() const { return Dummy; } - virtual StringRef getLineStringSection() const { return ""; } - virtual StringRef getStringSection() const { return ""; } - virtual const DWARFSection &getRangeSection() const { return Dummy; } + virtual StringRef getLineStrSection() const { return ""; } + virtual StringRef getStrSection() const { return ""; } + virtual const DWARFSection &getRangesSection() const { return Dummy; } virtual const DWARFSection &getRnglistsSection() const { return Dummy; } virtual StringRef getMacinfoSection() const { return ""; } - virtual const DWARFSection &getPubNamesSection() const { return Dummy; } - virtual const DWARFSection &getPubTypesSection() const { return Dummy; } - virtual const DWARFSection &getGnuPubNamesSection() const { return Dummy; } - virtual const DWARFSection &getGnuPubTypesSection() const { return Dummy; } - virtual const DWARFSection &getStringOffsetSection() const { return Dummy; } + virtual const DWARFSection &getPubnamesSection() const { return Dummy; } + virtual const DWARFSection &getPubtypesSection() const { return Dummy; } + virtual const DWARFSection &getGnuPubnamesSection() const { return Dummy; } + virtual const DWARFSection &getGnuPubtypesSection() const { return Dummy; } + virtual const DWARFSection &getStrOffsetsSection() const { return Dummy; } virtual void forEachInfoDWOSections(function_ref F) const {} virtual void @@ -60,11 +60,11 @@ public: virtual StringRef getAbbrevDWOSection() const { return ""; } virtual const DWARFSection &getLineDWOSection() const { return Dummy; } virtual const DWARFSection &getLocDWOSection() const { return Dummy; } - virtual StringRef getStringDWOSection() const { return ""; } - virtual const DWARFSection &getStringOffsetDWOSection() const { + virtual StringRef getStrDWOSection() const { return ""; } + virtual const DWARFSection &getStrOffsetsDWOSection() const { return Dummy; } - virtual const DWARFSection &getRangeDWOSection() const { return Dummy; } + virtual const DWARFSection &getRangesDWOSection() const { return Dummy; } virtual const DWARFSection &getRnglistsDWOSection() const { return Dummy; } virtual const DWARFSection &getAddrSection() const { return Dummy; } virtual const DWARFSection &getAppleNamesSection() const { return Dummy; } @@ -72,7 +72,7 @@ public: virtual const DWARFSection &getAppleNamespacesSection() const { return Dummy; } - virtual const DWARFSection &getDebugNamesSection() const { return Dummy; } + virtual const DWARFSection &getNamesSection() const { return Dummy; } virtual const DWARFSection &getAppleObjCSection() const { return Dummy; } virtual StringRef getCUIndexSection() const { return ""; } virtual StringRef getGdbIndexSection() const { return ""; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index 90d89375fd35..c95bdcbd8a43 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -34,7 +34,7 @@ public: LS, LE, IsDWO, UnitVector) {} uint64_t getTypeHash() const { return getHeader().getTypeHash(); } - uint32_t getTypeOffset() const { return getHeader().getTypeOffset(); } + uint64_t getTypeOffset() const { return getHeader().getTypeOffset(); } void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override; // Enable LLVM-style RTTI. diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index f9f90db31890..51de114a3506 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -45,7 +45,7 @@ class DWARFUnit; /// parse the header before deciding what specific kind of unit to construct. class DWARFUnitHeader { // Offset within section. - uint32_t Offset = 0; + uint64_t Offset = 0; // Version, address size, and DWARF format. dwarf::FormParams FormParams; uint64_t Length = 0; @@ -56,7 +56,7 @@ class DWARFUnitHeader { // For type units only. uint64_t TypeHash = 0; - uint32_t TypeOffset = 0; + uint64_t TypeOffset = 0; // For v5 split or skeleton compile units only. Optional DWOId; @@ -70,10 +70,10 @@ class DWARFUnitHeader { public: /// Parse a unit header from \p debug_info starting at \p offset_ptr. bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info, - uint32_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO, + uint64_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO, const DWARFUnitIndex *Index = nullptr, const DWARFUnitIndex::Entry *Entry = nullptr); - uint32_t getOffset() const { return Offset; } + uint64_t getOffset() const { return Offset; } const dwarf::FormParams &getFormParams() const { return FormParams; } uint16_t getVersion() const { return FormParams.Version; } dwarf::DwarfFormat getFormat() const { return FormParams.Format; } @@ -91,16 +91,17 @@ public: } const DWARFUnitIndex::Entry *getIndexEntry() const { return IndexEntry; } uint64_t getTypeHash() const { return TypeHash; } - uint32_t getTypeOffset() const { return TypeOffset; } + uint64_t getTypeOffset() const { return TypeOffset; } uint8_t getUnitType() const { return UnitType; } bool isTypeUnit() const { return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type; } uint8_t getSize() const { return Size; } - uint32_t getNextUnitOffset() const { - return Offset + Length + - (FormParams.Format == llvm::dwarf::DwarfFormat::DWARF64 ? 4 : 0) + - FormParams.getDwarfOffsetByteSize(); + uint8_t getUnitLengthFieldByteSize() const { + return dwarf::getUnitLengthFieldByteSize(FormParams.Format); + } + uint64_t getNextUnitOffset() const { + return Offset + Length + getUnitLengthFieldByteSize(); } }; @@ -110,7 +111,7 @@ const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, /// Describe a collection of units. Intended to hold all units either from /// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo. class DWARFUnitVector final : public SmallVector, 1> { - std::function(uint32_t, DWARFSectionKind, + std::function(uint64_t, DWARFSectionKind, const DWARFSection *, const DWARFUnitIndex::Entry *)> Parser; @@ -121,7 +122,7 @@ public: using iterator = typename UnitVector::iterator; using iterator_range = llvm::iterator_range; - DWARFUnit *getUnitForOffset(uint32_t Offset) const; + DWARFUnit *getUnitForOffset(uint64_t Offset) const; DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E); /// Read units from a .debug_info or .debug_types section. Calls made @@ -197,7 +198,7 @@ class DWARFUnit { DWARFUnitHeader Header; const DWARFDebugAbbrev *Abbrev; const DWARFSection *RangeSection; - uint32_t RangeSectionBase; + uint64_t RangeSectionBase; /// We either keep track of the location list section or its data, depending /// on whether we are handling a split DWARF section or not. union { @@ -275,7 +276,7 @@ public: const DWARFSection &getInfoSection() const { return InfoSection; } const DWARFSection *getLocSection() const { return LocSection; } StringRef getLocSectionData() const { return LocSectionData; } - uint32_t getOffset() const { return Header.getOffset(); } + uint64_t getOffset() const { return Header.getOffset(); } const dwarf::FormParams &getFormParams() const { return Header.getFormParams(); } @@ -285,10 +286,10 @@ public: uint8_t getDwarfOffsetByteSize() const { return Header.getDwarfOffsetByteSize(); } - uint32_t getLength() const { return Header.getLength(); } + uint64_t getLength() const { return Header.getLength(); } uint8_t getUnitType() const { return Header.getUnitType(); } bool isTypeUnit() const { return Header.isTypeUnit(); } - uint32_t getNextUnitOffset() const { return Header.getNextUnitOffset(); } + uint64_t getNextUnitOffset() const { return Header.getNextUnitOffset(); } const DWARFSection &getLineSection() const { return LineSection; } StringRef getStringSection() const { return StringSection; } const DWARFSection &getStringOffsetSection() const { @@ -303,7 +304,7 @@ public: /// Recursively update address to Die map. void updateAddressDieMap(DWARFDie Die); - void setRangesSection(const DWARFSection *RS, uint32_t Base) { + void setRangesSection(const DWARFSection *RS, uint64_t Base) { RangeSection = RS; RangeSectionBase = Base; } @@ -322,7 +323,7 @@ public: /// .debug_ranges section. If the extraction is unsuccessful, an error /// is returned. Successful extraction requires that the compile unit /// has already been extracted. - Error extractRangeList(uint32_t RangeListOffset, + Error extractRangeList(uint64_t RangeListOffset, DWARFDebugRangeList &RangeList) const; void clear(); @@ -405,7 +406,7 @@ public: /// Return a vector of address ranges resulting from a (possibly encoded) /// range list starting at a given offset in the appropriate ranges section. - Expected findRnglistFromOffset(uint32_t Offset); + Expected findRnglistFromOffset(uint64_t Offset); /// Return a vector of address ranges retrieved from an encoded range /// list whose offset is found via a table lookup given an index (DWARF v5 @@ -415,7 +416,7 @@ public: /// Return a rangelist's offset based on an index. The index designates /// an entry in the rangelist table's offset array and is supplied by /// DW_FORM_rnglistx. - Optional getRnglistOffset(uint32_t Index) { + Optional getRnglistOffset(uint32_t Index) { if (RngListTable) return RngListTable->getOffsetEntry(Index); return None; @@ -470,7 +471,7 @@ public: /// unit's DIE vector. /// /// The unit needs to have its DIEs extracted for this method to work. - DWARFDie getDIEForOffset(uint32_t Offset) { + DWARFDie getDIEForOffset(uint64_t Offset) { extractDIEsIfNeeded(false); assert(!DieArray.empty()); auto It = @@ -495,15 +496,19 @@ public: } virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0; + + Error tryExtractDIEsIfNeeded(bool CUDieOnly); + private: /// Size in bytes of the .debug_info data associated with this compile unit. size_t getDebugInfoSize() const { - return Header.getLength() + 4 - getHeaderSize(); + return Header.getLength() + Header.getUnitLengthFieldByteSize() - + getHeaderSize(); } /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it - /// hasn't already been done. Returns the number of DIEs parsed at this call. - size_t extractDIEsIfNeeded(bool CUDieOnly); + /// hasn't already been done + void extractDIEsIfNeeded(bool CUDieOnly); /// extractDIEsToVector - Appends all parsed DIEs to a vector. void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs, diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h index fc8c707c512e..684103aac2fc 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h @@ -37,7 +37,7 @@ class DWARFUnitIndex { uint32_t NumUnits; uint32_t NumBuckets = 0; - bool parse(DataExtractor IndexData, uint32_t *OffsetPtr); + bool parse(DataExtractor IndexData, uint64_t *OffsetPtr); void dump(raw_ostream &OS) const; }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index f1268f220272..a4a3a11d441b 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -94,7 +94,7 @@ private: /// A map that tracks all references (converted absolute references) so we /// can verify each reference points to a valid DIE and not an offset that /// lies between to valid DIEs. - std::map> ReferenceToDIEOffsets; + std::map> ReferenceToDIEOffsets; uint32_t NumDebugLineErrors = 0; // Used to relax some checks that do not currently work portably bool IsObjectFile; @@ -138,7 +138,7 @@ private: /// /// \returns true if the header is verified successfully, false otherwise. bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData, - uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType, + uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType, bool &isUnitDWARF64); /// Verifies the header of a unit in a .debug_info or .debug_types section. diff --git a/include/llvm/DebugInfo/GSYM/FileEntry.h b/include/llvm/DebugInfo/GSYM/FileEntry.h index 228b4efa0656..49e7fc9c4291 100644 --- a/include/llvm/DebugInfo/GSYM/FileEntry.h +++ b/include/llvm/DebugInfo/GSYM/FileEntry.h @@ -1,9 +1,8 @@ //===- FileEntry.h ----------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/include/llvm/DebugInfo/GSYM/FileWriter.h b/include/llvm/DebugInfo/GSYM/FileWriter.h new file mode 100644 index 000000000000..cd568765a4f2 --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/FileWriter.h @@ -0,0 +1,124 @@ +//===- FileWriter.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H +#define LLVM_DEBUGINFO_GSYM_FILEWRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +#include +#include +#include + +namespace llvm { +class raw_pwrite_stream; + +namespace gsym { + +/// A simplified binary data writer class that doesn't require targets, target +/// definitions, architectures, or require any other optional compile time +/// libraries to be enabled via the build process. This class needs the ability +/// to seek to different spots in the binary stream that is produces to fixup +/// offsets and sizes. +class FileWriter { + llvm::raw_pwrite_stream &OS; + llvm::support::endianness ByteOrder; +public: + FileWriter(llvm::raw_pwrite_stream &S, llvm::support::endianness B) + : OS(S), ByteOrder(B) {} + ~FileWriter(); + /// Write a single uint8_t value into the stream at the current file + /// position. + /// + /// \param Value The value to write into the stream. + void writeU8(uint8_t Value); + + /// Write a single uint16_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU16(uint16_t Value); + + /// Write a single uint32_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU32(uint32_t Value); + + /// Write a single uint64_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU64(uint64_t Value); + + /// Write the value into the stream encoded using signed LEB128 at the + /// current file position. + /// + /// \param Value The value to write into the stream. + void writeSLEB(int64_t Value); + + /// Write the value into the stream encoded using unsigned LEB128 at the + /// current file position. + /// + /// \param Value The value to write into the stream. + void writeULEB(uint64_t Value); + + /// Write an array of uint8_t values into the stream at the current file + /// position. + /// + /// \param Data An array of values to write into the stream. + void writeData(llvm::ArrayRef Data); + + /// Write a NULL terminated C string into the stream at the current file + /// position. The entire contents of Str will be written into the steam at + /// the current file position and then an extra NULL termation byte will be + /// written. It is up to the user to ensure that Str doesn't contain any NULL + /// characters unless the additional NULL characters are desired. + /// + /// \param Str The value to write into the stream. + void writeNullTerminated(llvm::StringRef Str); + + /// Fixup a uint32_t value at the specified offset in the stream. This + /// function will save the current file position, seek to the specified + /// offset, overwrite the data using Value, and then restore the file + /// position to the previous file position. + /// + /// \param Value The value to write into the stream. + /// \param Offset The offset at which to write the Value within the stream. + void fixup32(uint32_t Value, uint64_t Offset); + + /// Pad with zeroes at the current file position until the current file + /// position matches the specified alignment. + /// + /// \param Align An integer speciying the desired alignment. This does not + /// need to be a power of two. + void alignTo(size_t Align); + + /// Return the current offset within the file. + /// + /// \return The unsigned offset from the start of the file of the current + /// file position. + uint64_t tell(); + + llvm::raw_pwrite_stream &get_stream() { + return OS; + } + +private: + FileWriter(const FileWriter &rhs) = delete; + void operator=(const FileWriter &rhs) = delete; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H diff --git a/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/include/llvm/DebugInfo/GSYM/FunctionInfo.h index eedb1e638fd1..63e18bb2ecd5 100644 --- a/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -1,17 +1,17 @@ //===- FunctionInfo.h -------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H +#include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" -#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" #include @@ -21,41 +21,125 @@ namespace llvm { class raw_ostream; namespace gsym { -/// Function information in GSYM files encodes information for one -/// contiguous address range. The name of the function is encoded as -/// a string table offset and allows multiple functions with the same -/// name to share the name string in the string table. Line tables are -/// stored in a sorted vector of gsym::LineEntry objects and are split -/// into line tables for each function. If a function has a discontiguous -/// range, it will be split into two gsym::FunctionInfo objects. If the -/// function has inline functions, the information will be encoded in -/// the "Inline" member, see gsym::InlineInfo for more information. +/// Function information in GSYM files encodes information for one contiguous +/// address range. If a function has discontiguous address ranges, they will +/// need to be encoded using multiple FunctionInfo objects. +/// +/// ENCODING +/// +/// The function information gets the function start address as an argument +/// to the FunctionInfo::decode(...) function. This information is calculated +/// from the GSYM header and an address offset from the GSYM address offsets +/// table. The encoded FunctionInfo information must be alinged to a 4 byte +/// boundary. +/// +/// The encoded data for a FunctionInfo starts with fixed data that all +/// function info objects have: +/// +/// ENCODING NAME DESCRIPTION +/// ========= =========== ==================================================== +/// uint32_t Size The size in bytes of this function. +/// uint32_t Name The string table offset of the function name. +/// +/// The optional data in a FunctionInfo object follows this fixed information +/// and consists of a stream of tuples that consist of: +/// +/// ENCODING NAME DESCRIPTION +/// ========= =========== ==================================================== +/// uint32_t InfoType An "InfoType" enumeration that describes the type +/// of optional data that is encoded. +/// uint32_t InfoLength The size in bytes of the encoded data that +/// immediately follows this length if this value is +/// greater than zero. +/// uint8_t[] InfoData Encoded bytes that represent the data for the +/// "InfoType". These bytes are only present if +/// "InfoLength" is greater than zero. +/// +/// The "InfoType" is an enumeration: +/// +/// enum InfoType { +/// EndOfList = 0u, +/// LineTableInfo = 1u, +/// InlineInfo = 2u +/// }; +/// +/// This stream of tuples is terminated by a "InfoType" whose value is +/// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of +/// the optional information list. This format allows us to add new optional +/// information data to a FunctionInfo object over time and allows older +/// clients to still parse the format and skip over any data that they don't +/// understand or want to parse. +/// +/// So the function information encoding essientially looks like: +/// +/// struct { +/// uint32_t Size; +/// uint32_t Name; +/// struct { +/// uint32_t InfoType; +/// uint32_t InfoLength; +/// uint8_t InfoData[InfoLength]; +/// }[N]; +/// } +/// +/// Where "N" is the number of tuples. struct FunctionInfo { AddressRange Range; uint32_t Name; ///< String table offset in the string table. - std::vector Lines; - InlineInfo Inline; + llvm::Optional OptLineTable; + llvm::Optional Inline; FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0) : Range(Addr, Addr + Size), Name(N) {} + /// Query if a FunctionInfo has rich debug info. + /// + /// \returns A bool that indicates if this object has something else than + /// range and name. When converting information from a symbol table and from + /// debug info, we might end up with multiple FunctionInfo objects for the + /// same range and we need to be able to tell which one is the better object + /// to use. bool hasRichInfo() const { - /// Returns whether we have something else than range and name. When - /// converting information from a symbol table and from debug info, we - /// might end up with multiple FunctionInfo objects for the same range - /// and we need to be able to tell which one is the better object to use. - return !Lines.empty() || Inline.isValid(); + return OptLineTable.hasValue() || Inline.hasValue(); } + /// Query if a FunctionInfo object is valid. + /// + /// Address and size can be zero and there can be no line entries for a + /// symbol so the only indication this entry is valid is if the name is + /// not zero. This can happen when extracting information from symbol + /// tables that do not encode symbol sizes. In that case only the + /// address and name will be filled in. + /// + /// \returns A boolean indicating if this FunctionInfo is valid. bool isValid() const { - /// Address and size can be zero and there can be no line entries for a - /// symbol so the only indication this entry is valid is if the name is - /// not zero. This can happen when extracting information from symbol - /// tables that do not encode symbol sizes. In that case only the - /// address and name will be filled in. return Name != 0; } + /// Decode an object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The FunctionInfo's start address and will be used as the + /// base address when decoding any contained information like the line table + /// and the inline info. + /// + /// \returns An FunctionInfo or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + + /// Encode this object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \returns An error object that indicates failure or the offset of the + /// function info that was successfully written into the stream. + llvm::Expected encode(FileWriter &O) const; + uint64_t startAddress() const { return Range.Start; } uint64_t endAddress() const { return Range.End; } uint64_t size() const { return Range.size(); } @@ -66,14 +150,14 @@ struct FunctionInfo { void clear() { Range = {0, 0}; Name = 0; - Lines.clear(); - Inline.clear(); + OptLineTable = None; + Inline = None; } }; inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) { return LHS.Range == RHS.Range && LHS.Name == RHS.Name && - LHS.Lines == RHS.Lines && LHS.Inline == RHS.Inline; + LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline; } inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) { return !(LHS == RHS); @@ -89,14 +173,10 @@ inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) { return LHS.Range < RHS.Range; // Then sort by inline - if (LHS.Inline.isValid() != RHS.Inline.isValid()) - return RHS.Inline.isValid(); - - // If the number of lines is the same, then compare line table entries - if (LHS.Lines.size() == RHS.Lines.size()) - return LHS.Lines < RHS.Lines; - // Then sort by number of line table entries (more is better) - return LHS.Lines.size() < RHS.Lines.size(); + if (LHS.Inline.hasValue() != RHS.Inline.hasValue()) + return RHS.Inline.hasValue(); + + return LHS.OptLineTable < RHS.OptLineTable; } raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R); diff --git a/include/llvm/DebugInfo/GSYM/GsymCreator.h b/include/llvm/DebugInfo/GSYM/GsymCreator.h new file mode 100644 index 000000000000..12c8187132ba --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -0,0 +1,229 @@ +//===- GsymCreator.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H +#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H + +#include +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +namespace llvm { + +namespace gsym { +class FileWriter; + +/// GsymCreator is used to emit GSYM data to a stand alone file or section +/// within a file. +/// +/// The GsymCreator is designed to be used in 3 stages: +/// - Create FunctionInfo objects and add them +/// - Finalize the GsymCreator object +/// - Save to file or section +/// +/// The first stage involves creating FunctionInfo objects from another source +/// of information like compiler debug info metadata, DWARF or Breakpad files. +/// Any strings in the FunctionInfo or contained information, like InlineInfo +/// or LineTable objects, should get the string table offsets by calling +/// GsymCreator::insertString(...). Any file indexes that are needed should be +/// obtained by calling GsymCreator::insertFile(...). All of the function calls +/// in GsymCreator are thread safe. This allows multiple threads to create and +/// add FunctionInfo objects while parsing debug information. +/// +/// Once all of the FunctionInfo objects have been added, the +/// GsymCreator::finalize(...) must be called prior to saving. This function +/// will sort the FunctionInfo objects, finalize the string table, and do any +/// other passes on the information needed to prepare the information to be +/// saved. +/// +/// Once the object has been finalized, it can be saved to a file or section. +/// +/// ENCODING +/// +/// GSYM files are designed to be memory mapped into a process as shared, read +/// only data, and used as is. +/// +/// The GSYM file format when in a stand alone file consists of: +/// - Header +/// - Address Table +/// - Function Info Offsets +/// - File Table +/// - String Table +/// - Function Info Data +/// +/// HEADER +/// +/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h". +/// +/// ADDRESS TABLE +/// +/// The address table immediately follows the header in the file and consists +/// of Header.NumAddresses address offsets. These offsets are sorted and can be +/// binary searched for efficient lookups. Addresses in the address table are +/// stored as offsets from a 64 bit base address found in Header.BaseAddress. +/// This allows the address table to contain 8, 16, or 32 offsets. This allows +/// the address table to not require full 64 bit addresses for each address. +/// The resulting GSYM size is smaller and causes fewer pages to be touched +/// during address lookups when the address table is smaller. The size of the +/// address offsets in the address table is specified in the header in +/// Header.AddrOffSize. The first offset in the address table is alinged to +/// Header.AddrOffSize alignement to ensure efficient access when loaded into +/// memory. +/// +/// FUNCTION INFO OFFSETS TABLE +/// +/// The function info offsets table immediately follows the address table and +/// consists of Header.NumAddresses 32 bit file offsets: one for each address +/// in the address table. This data is algined to a 4 byte boundary. The +/// offsets in this table are the relative offsets from the start offset of the +/// GSYM header and point to the function info data for each address in the +/// address table. Keeping this data separate from the address table helps to +/// reduce the number of pages that are touched when address lookups occur on a +/// GSYM file. +/// +/// FILE TABLE +/// +/// The file table immediately follows the function info offsets table. The +/// encoding of the FileTable is: +/// +/// struct FileTable { +/// uint32_t Count; +/// FileEntry Files[]; +/// }; +/// +/// The file table starts with a 32 bit count of the number of files that are +/// used in all of the function info, followed by that number of FileEntry +/// structures. The file table is aligned to a 4 byte boundary, Each file in +/// the file table is represented with a FileEntry structure. +/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details. +/// +/// STRING TABLE +/// +/// The string table follows the file table in stand alone GSYM files and +/// contains all strings for everything contained in the GSYM file. Any string +/// data should be added to the string table and any references to strings +/// inside GSYM information must be stored as 32 bit string table offsets into +/// this string table. The string table always starts with an empty string at +/// offset zero and is followed by any strings needed by the GSYM information. +/// The start of the string table is not aligned to any boundary. +/// +/// FUNCTION INFO DATA +/// +/// The function info data is the payload that contains information about the +/// address that is being looked up. It contains all of the encoded +/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an +/// entry in the Function Info Offsets Table. For details on the exact encoding +/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h". +class GsymCreator { + // Private member variables require Mutex protections + mutable std::recursive_mutex Mutex; + std::vector Funcs; + StringTableBuilder StrTab; + DenseMap FileEntryToIndex; + std::vector Files; + std::vector UUID; + bool Finalized = false; + +public: + + GsymCreator(); + + /// Save a GSYM file to a stand alone file. + /// + /// \param Path The file path to save the GSYM file to. + /// \param ByteOrder The endianness to use when saving the file. + /// \returns An error object that indicates success or failure of the save. + llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const; + + /// Encode a GSYM into the file writer stream at the current position. + /// + /// \param O The stream to save the binary data to + /// \returns An error object that indicates success or failure of the save. + llvm::Error encode(FileWriter &O) const; + + /// Insert a string into the GSYM string table. + /// + /// All strings used by GSYM files must be uniqued by adding them to this + /// string pool and using the returned offset for any string values. + /// + /// \param S The string to insert into the string table. + /// \returns The unique 32 bit offset into the string table. + uint32_t insertString(StringRef S); + + /// Insert a file into this GSYM creator. + /// + /// Inserts a file by adding a FileEntry into the "Files" member variable if + /// the file has not already been added. The file path is split into + /// directory and filename which are both added to the string table. This + /// allows paths to be stored efficiently by reusing the directories that are + /// common between multiple files. + /// + /// \param Path The path to the file to insert. + /// \param Style The path style for the "Path" parameter. + /// \returns The unique file index for the inserted file. + uint32_t insertFile(StringRef Path, + sys::path::Style Style = sys::path::Style::native); + + /// Add a function info to this GSYM creator. + /// + /// All information in the FunctionInfo object must use the + /// GsymCreator::insertString(...) function when creating string table + /// offsets for names and other strings. + /// + /// \param FI The function info object to emplace into our functions list. + void addFunctionInfo(FunctionInfo &&FI); + + /// Finalize the data in the GSYM creator prior to saving the data out. + /// + /// Finalize must be called after all FunctionInfo objects have been added + /// and before GsymCreator::save() is called. + /// + /// \param OS Output stream to report duplicate function infos, overlapping + /// function infos, and function infos that were merged or removed. + /// \returns An error object that indicates success or failure of the + /// finalize. + llvm::Error finalize(llvm::raw_ostream &OS); + + /// Set the UUID value. + /// + /// \param UUIDBytes The new UUID bytes. + void setUUID(llvm::ArrayRef UUIDBytes) { + UUID.assign(UUIDBytes.begin(), UUIDBytes.end()); + } + + /// Thread safe iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback); + + /// Thread safe const iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback) const; + +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H diff --git a/include/llvm/DebugInfo/GSYM/GsymReader.h b/include/llvm/DebugInfo/GSYM/GsymReader.h new file mode 100644 index 000000000000..113bcee9c9a3 --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -0,0 +1,228 @@ +//===- GsymReader.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H +#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H + + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorOr.h" + +#include +#include +#include +#include +#include + +namespace llvm { +class MemoryBuffer; +class raw_ostream; + +namespace gsym { + +/// GsymReader is used to read GSYM data from a file or buffer. +/// +/// This class is optimized for very quick lookups when the endianness matches +/// the host system. The Header, address table, address info offsets, and file +/// table is designed to be mmap'ed as read only into memory and used without +/// any parsing needed. If the endianness doesn't match, we swap these objects +/// and tables into GsymReader::SwappedData and then point our header and +/// ArrayRefs to this swapped internal data. +/// +/// GsymReader objects must use one of the static functions to create an +/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). + +class GsymReader { + GsymReader(std::unique_ptr Buffer); + llvm::Error parse(); + + std::unique_ptr MemBuffer; + StringRef GsymBytes; + llvm::support::endianness Endian; + const Header *Hdr = nullptr; + ArrayRef AddrOffsets; + ArrayRef AddrInfoOffsets; + ArrayRef Files; + StringTable StrTab; + /// When the GSYM file's endianness doesn't match the host system then + /// we must decode all data structures that need to be swapped into + /// local storage and set point the ArrayRef objects above to these swapped + /// copies. + struct SwappedData { + Header Hdr; + std::vector AddrOffsets; + std::vector AddrInfoOffsets; + std::vector Files; + }; + std::unique_ptr Swap; + +public: + GsymReader(GsymReader &&RHS); + ~GsymReader(); + + /// Construct a GsymReader from a file on disk. + /// + /// \param Path The file path the GSYM file to read. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected openFile(StringRef Path); + + /// Construct a GsymReader from a buffer. + /// + /// \param Bytes A set of bytes that will be copied and owned by the + /// returned object on success. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected copyBuffer(StringRef Bytes); + + /// Access the GSYM header. + /// \returns A native endian version of the GSYM header. + const Header &getHeader() const; + + /// Get the full function info for an address. + /// + /// \param Addr A virtual address from the orignal object file to lookup. + /// \returns An expected FunctionInfo that contains the function info object + /// or an error object that indicates reason for failing to lookup the + /// address, + llvm::Expected getFunctionInfo(uint64_t Addr) const; + + /// Get a string from the string table. + /// + /// \param Offset The string table offset for the string to retrieve. + /// \returns The string from the strin table. + StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } + +protected: + /// Gets an address from the address table. + /// + /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. + /// + /// \param Index A index into the address table. + /// \returns A resolved virtual address for adddress in the address table + /// or llvm::None if Index is out of bounds. + Optional getAddress(size_t Index) const; + + /// Get the a file entry for the suppplied file index. + /// + /// Used to convert any file indexes in the FunctionInfo data back into + /// files. This function can be used for iteration, but is more commonly used + /// for random access when doing lookups. + /// + /// \param Index An index into the file table. + /// \returns An optional FileInfo that will be valid if the file index is + /// valid, or llvm::None if the file index is out of bounds, + Optional getFile(uint32_t Index) const { + if (Index < Files.size()) + return Files[Index]; + return llvm::None; + } + + /// Get an appropriate address info offsets array. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte offsets from the The gsym::Header::BaseAddress. The table is stored + /// internally as a array of bytes that are in the correct endianness. When + /// we access this table we must get an array that matches those sizes. This + /// templatized helper function is used when accessing address offsets in the + /// AddrOffsets member variable. + /// + /// \returns An ArrayRef of an appropriate address offset size. + template ArrayRef + getAddrOffsets() const { + return ArrayRef(reinterpret_cast(AddrOffsets.data()), + AddrOffsets.size()/sizeof(T)); + } + + /// Get an appropriate address from the address table. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte address offsets from the The gsym::Header::BaseAddress. The table is + /// stored internally as a array of bytes that are in the correct endianness. + /// In order to extract an address from the address table we must access the + /// address offset using the correct size and then add it to the BaseAddress + /// in the header. + /// + /// \param Index An index into the AddrOffsets array. + /// \returns An virtual address that matches the original object file for the + /// address as the specified index, or llvm::None if Index is out of bounds. + template Optional + addressForIndex(size_t Index) const { + ArrayRef AIO = getAddrOffsets(); + if (Index < AIO.size()) + return AIO[Index] + Hdr->BaseAddress; + return llvm::None; + } + /// Lookup an address offset in the AddrOffsets table. + /// + /// Given an address offset, look it up using a binary search of the + /// AddrOffsets table. + /// + /// \param AddrOffset An address offset, that has already been computed by + /// subtracting the gsym::Header::BaseAddress. + /// \returns The matching address offset index. This index will be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + template + uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const { + ArrayRef AIO = getAddrOffsets(); + const auto Begin = AIO.begin(); + const auto End = AIO.end(); + auto Iter = std::lower_bound(Begin, End, AddrOffset); + if (Iter == End || AddrOffset < *Iter) + --Iter; + return std::distance(Begin, Iter); + } + + /// Create a GSYM from a memory buffer. + /// + /// Called by both openFile() and copyBuffer(), this function does all of the + /// work of parsing the GSYM file and returning an error. + /// + /// \param MemBuffer A memory buffer that will transfer ownership into the + /// GsymReader. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected + create(std::unique_ptr &MemBuffer); + + + /// Given an address, find the address index. + /// + /// Binary search the address table and find the matching address index. + /// + /// \param Addr A virtual address that matches the original object file + /// to lookup. + /// \returns An index into the address table. This index can be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + /// Returns an error if the address isn't in the GSYM with details of why. + Expected getAddressIndex(const uint64_t Addr) const; + + /// Given an address index, get the offset for the FunctionInfo. + /// + /// Looking up an address is done by finding the corresponding address + /// index for the address. This index is then used to get the offset of the + /// FunctionInfo data that we will decode using this function. + /// + /// \param Index An index into the address table. + /// \returns An optional GSYM data offset for the offset of the FunctionInfo + /// that needs to be decoded. + Optional getAddressInfoOffset(size_t Index) const; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H diff --git a/include/llvm/DebugInfo/GSYM/Header.h b/include/llvm/DebugInfo/GSYM/Header.h new file mode 100644 index 000000000000..6652c59c97a6 --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/Header.h @@ -0,0 +1,129 @@ +//===- Header.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_HEADER_H +#define LLVM_DEBUGINFO_GSYM_HEADER_H + +#include "llvm/Support/Error.h" + +#include +#include + +namespace llvm { +class raw_ostream; +class DataExtractor; + +namespace gsym { +class FileWriter; + +constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG' +constexpr uint32_t GSYM_VERSION = 1; +constexpr size_t GSYM_MAX_UUID_SIZE = 20; + +/// The GSYM header. +/// +/// The GSYM header is found at the start of a stand alone GSYM file, or as +/// the first bytes in a section when GSYM is contained in a section of an +/// executable file (ELF, mach-o, COFF). +/// +/// The structure is encoded exactly as it appears in the structure definition +/// with no gaps between members. Alignment should not change from system to +/// system as the members were laid out so that they shouldn't align +/// differently on different architectures. +/// +/// When endianness of the system loading a GSYM file matches, the file can +/// be mmap'ed in and a pointer to the header can be cast to the first bytes +/// of the file (stand alone GSYM file) or section data (GSYM in a section). +/// When endianness is swapped, the Header::decode() function should be used to +/// decode the header. +struct Header { + /// The magic bytes should be set to GSYM_MAGIC. This helps detect if a file + /// is a GSYM file by scanning the first 4 bytes of a file or section. + /// This value might appear byte swapped + uint32_t Magic; + /// The version can number determines how the header is decoded and how each + /// InfoType in FunctionInfo is encoded/decoded. As version numbers increase, + /// "Magic" and "Version" members should always appear at offset zero and 4 + /// respectively to ensure clients figure out if they can parse the format. + uint16_t Version; + /// The size in bytes of each address offset in the address offsets table. + uint8_t AddrOffSize; + /// The size in bytes of the UUID encoded in the "UUID" member. + uint8_t UUIDSize; + /// The 64 bit base address that all address offsets in the address offsets + /// table are relative to. Storing a full 64 bit address allows our address + /// offsets table to be smaller on disk. + uint64_t BaseAddress; + /// The number of addresses stored in the address offsets table. + uint32_t NumAddresses; + /// The file relative offset of the start of the string table for strings + /// contained in the GSYM file. If the GSYM in contained in a stand alone + /// file this will be the file offset of the start of the string table. If + /// the GSYM is contained in a section within an executable file, this can + /// be the offset of the first string used in the GSYM file and can possibly + /// span one or more executable string tables. This allows the strings to + /// share string tables in an ELF or mach-o file. + uint32_t StrtabOffset; + /// The size in bytes of the string table. For a stand alone GSYM file, this + /// will be the exact size in bytes of the string table. When the GSYM data + /// is in a section within an executable file, this size can span one or more + /// sections that contains strings. This allows any strings that are already + /// stored in the executable file to be re-used, and any extra strings could + /// be added to another string table and the string table offset and size + /// can be set to span all needed string tables. + uint32_t StrtabSize; + /// The UUID of the original executable file. This is stored to allow + /// matching a GSYM file to an executable file when symbolication is + /// required. Only the first "UUIDSize" bytes of the UUID are valid. Any + /// bytes in the UUID value that appear after the first UUIDSize bytes should + /// be set to zero. + uint8_t UUID[GSYM_MAX_UUID_SIZE]; + + /// Check if a header is valid and return an error if anything is wrong. + /// + /// This function can be used prior to encoding a header to ensure it is + /// valid, or after decoding a header to ensure it is valid and supported. + /// + /// Check a correctly byte swapped header for errors: + /// - check magic value + /// - check that version number is supported + /// - check that the address offset size is supported + /// - check that the UUID size is valid + /// + /// \returns An error if anything is wrong in the header, or Error::success() + /// if there are no errors. + llvm::Error checkForError() const; + + /// Decode an object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \returns A Header or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected
decode(DataExtractor &Data); + + /// Encode this object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \returns An error object that indicates success or failure of the + /// encoding process. + llvm::Error encode(FileWriter &O) const; +}; + +bool operator==(const Header &LHS, const Header &RHS); +raw_ostream &operator<<(raw_ostream &OS, const llvm::gsym::Header &H); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_HEADER_H diff --git a/include/llvm/DebugInfo/GSYM/InlineInfo.h b/include/llvm/DebugInfo/GSYM/InlineInfo.h index 222430622932..48fd9a7c1308 100644 --- a/include/llvm/DebugInfo/GSYM/InlineInfo.h +++ b/include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -1,9 +1,8 @@ //===- InlineInfo.h ---------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -12,6 +11,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/Support/Error.h" #include #include @@ -31,6 +31,30 @@ namespace gsym { /// Any clients that encode information will need to ensure the ranges are /// all contined correctly or lookups could fail. Add ranges in these objects /// must be contained in the top level FunctionInfo address ranges as well. +/// +/// ENCODING +/// +/// When saved to disk, the inline info encodes all ranges to be relative to +/// a parent address range. This will be the FunctionInfo's start address if +/// the InlineInfo is directly contained in a FunctionInfo, or a the start +/// address of the containing parent InlineInfo's first "Ranges" member. This +/// allows address ranges to be efficiently encoded using ULEB128 encodings as +/// we encode the offset and size of each range instead of full addresses. This +/// also makes any encoded addresses easy to relocate as we just need to +/// relocate the FunctionInfo's start address. +/// +/// - The AddressRanges member "Ranges" is encoded using an approriate base +/// address as described above. +/// - UINT8 boolean value that specifies if the InlineInfo object has children. +/// - UINT32 string table offset that points to the name of the inline +/// function. +/// - ULEB128 integer that specifies the file of the call site that called +/// this function. +/// - ULEB128 integer that specifies the source line of the call site that +/// called this function. +/// - if this object has children, enocode each child InlineInfo using the +/// the first address range's start address as the base address. +/// struct InlineInfo { uint32_t Name; ///< String table offset in the string table. @@ -62,6 +86,37 @@ struct InlineInfo { /// \returns optional vector of InlineInfo objects that describe the /// inline call stack for a given address, false otherwise. llvm::Optional getInlineStack(uint64_t Addr) const; + + /// Decode an InlineInfo object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the InlineInfo object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The base address to use when decoding all address ranges. + /// This will be the FunctionInfo's start address if this object is directly + /// contained in a FunctionInfo object, or the start address of the first + /// address range in an InlineInfo object of this object is a child of + /// another InlineInfo object. + /// \returns An InlineInfo or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + + /// Encode this InlineInfo object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \param BaseAddr The base address to use when encoding all address ranges. + /// This will be the FunctionInfo's start address if this object is directly + /// contained in a FunctionInfo object, or the start address of the first + /// address range in an InlineInfo object of this object is a child of + /// another InlineInfo object. + /// + /// \returns An error object that indicates success or failure or the + /// encoding process. + llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const; }; inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) { diff --git a/include/llvm/DebugInfo/GSYM/LineEntry.h b/include/llvm/DebugInfo/GSYM/LineEntry.h index 6b9380940bd3..aac7c48e067e 100644 --- a/include/llvm/DebugInfo/GSYM/LineEntry.h +++ b/include/llvm/DebugInfo/GSYM/LineEntry.h @@ -1,9 +1,8 @@ //===- LineEntry.h ----------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/include/llvm/DebugInfo/GSYM/LineTable.h b/include/llvm/DebugInfo/GSYM/LineTable.h new file mode 100644 index 000000000000..3cdbccb08ced --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/LineTable.h @@ -0,0 +1,198 @@ +//===- LineTable.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H +#define LLVM_DEBUGINFO_GSYM_LINETABLE_H + +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace gsym { + +struct FunctionInfo; +class FileWriter; + +/// LineTable class contains deserialized versions of line tables for each +/// function's address ranges. +/// +/// When saved to disk, the line table is encoded using a modified version of +/// the DWARF line tables that only tracks address to source file and line. +/// +/// ENCODING +/// +/// The line table starts with a small prolog that contains the following +/// values: +/// +/// ENCODING NAME DESCRIPTION +/// ======== =========== ==================================================== +/// SLEB MinDelta The min line delta for special opcodes that advance +/// the address and line number. +/// SLEB MaxDelta The max line delta for single byte opcodes that +/// advance the address and line number. +/// ULEB FirstLine The value of the first source line number to +/// initialize the LineEntry with. +/// +/// Once these prolog items are read, we initialize a LineEntry struct with +/// the start address of the function from the FunctionInfo's address range, +/// a default file index of 1, and the line number set to "FirstLine" from +/// the prolog above: +/// +/// LineEntry Row(BaseAddr, 1, FirstLine); +/// +/// The line table state machine is now initialized and ready to be parsed. +/// The stream that follows this encodes the line entries in a compact +/// form. Some opcodes cause "Row" to be modified and some opcodes may also +/// push "Row" onto the end of the "LineTable.Lines" vector. The end result +/// is a vector of LineEntry structs that is sorted in ascending address +/// order. +/// +/// NORMAL OPCODES +/// +/// The opcodes 0 through 3 are normal in opcodes. Their encoding and +/// descriptions are listed below: +/// +/// ENCODING ENUMERATION VALUE DESCRIPTION +/// ======== ================ ===== ======================================== +/// LTOC_EndSequence 0x00 Parsing is done. +/// ULEB LTOC_SetFile 0x01 Row.File = ULEB +/// ULEB LTOC_AdvancePC 0x02 Row.Addr += ULEB, push "Row". +/// SLEB LTOC_AdvanceLine 0x03 Row.Line += SLEB +/// LTOC_FirstSpecial 0x04 First special opcode (see SPECIAL +/// OPCODES below). +/// +/// SPECIAL OPCODES +/// +/// Opcodes LTOC_FirstSpecial through 255 are special opcodes that always +/// increment both the Row.Addr and Row.Line and push "Row" onto the +/// LineEntry.Lines array. They do this by using some of the bits to +/// increment/decrement the source line number, and some of the bits to +/// increment the address. Line numbers can go up or down when making line +/// tables, where addresses always only increase since line tables are sorted +/// by address. +/// +/// In order to calculate the amount to increment the line and address for +/// these special opcodes, we calculate the number of values reserved for the +/// line increment/decrement using the "MinDelta" and "MaxDelta" from the +/// prolog: +/// +/// const int64_t LineRange = MaxDelta - MinDelta + 1; +/// +/// Then we can adjust the opcode to not include any of the normal opcodes: +/// +/// const uint8_t AdjustedOp = Opcode - LTOC_FirstSpecial; +/// +/// And we can calculate the line offset, and address offset: +/// +/// const int64_t LineDelta = MinDelta + (AdjustedOp % LineRange); +/// const uint64_t AddrDelta = (AdjustedOp / LineRange); +/// +/// And use these to modify our "Row": +/// +/// Row.Line += LineDelta; +/// Row.Addr += AddrDelta; +/// +/// And push a row onto the line table: +/// +/// Lines.push_back(Row); +/// +/// This is verify similar to the way that DWARF encodes its line tables. The +/// only difference is the DWARF line tables have more normal opcodes and the +/// "Row" contains more members, like source column number, bools for end of +/// prologue, beginnging of epilogue, is statement and many others. There are +/// also more complex rules that happen for the extra normal opcodes. By +/// leaving these extra opcodes out, we leave more bits for the special +/// opcodes that allows us to encode line tables in fewer bytes than standard +/// DWARF encodings. +/// +/// Opcodes that will push "Row" onto the LineEntry.Lines include the +/// LTOC_AdvancePC opcode and all special opcodes. All other opcodes +/// only modify the current "Row", or cause the line table to end. +class LineTable { + typedef std::vector Collection; + Collection Lines; ///< All line entries in the line table. +public: + static LineEntry lookup(DataExtractor &Data, uint64_t BaseAddr, + uint64_t Addr); + + /// Decode an LineTable object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the LineTable object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The base address to use when decoding the line table. + /// This will be the FunctionInfo's start address and will be used to + /// initialize the line table row prior to parsing any opcodes. + /// + /// \returns An LineTable or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + /// Encode this LineTable object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \param BaseAddr The base address to use when decoding the line table. + /// This will be the FunctionInfo's start address. + /// + /// \returns An error object that indicates success or failure or the + /// encoding process. + llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const; + bool empty() const { return Lines.empty(); } + void clear() { Lines.clear(); } + void push(const LineEntry &LE) { + Lines.push_back(LE); + } + size_t isValid() const { + return !Lines.empty(); + } + size_t size() const { + return Lines.size(); + } + LineEntry &get(size_t i) { + assert(i < Lines.size()); + return Lines[i]; + } + const LineEntry &get(size_t i) const { + assert(i < Lines.size()); + return Lines[i]; + } + LineEntry &operator[](size_t i) { + return get(i); + } + const LineEntry &operator[](size_t i) const { + return get(i); + } + bool operator==(const LineTable &RHS) const { + return Lines == RHS.Lines; + } + bool operator!=(const LineTable &RHS) const { + return Lines != RHS.Lines; + } + bool operator<(const LineTable &RHS) const { + const auto LHSSize = Lines.size(); + const auto RHSSize = RHS.Lines.size(); + if (LHSSize == RHSSize) + return Lines < RHS.Lines; + return LHSSize < RHSSize; + } + Collection::const_iterator begin() const { return Lines.begin(); } + Collection::const_iterator end() const { return Lines.end(); } + +}; + +raw_ostream &operator<<(raw_ostream &OS, const gsym::LineTable <); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H diff --git a/include/llvm/DebugInfo/GSYM/Range.h b/include/llvm/DebugInfo/GSYM/Range.h index 772ff244c5b7..37cfec713f26 100644 --- a/include/llvm/DebugInfo/GSYM/Range.h +++ b/include/llvm/DebugInfo/GSYM/Range.h @@ -1,9 +1,8 @@ -//===- AddressRange.h -------------------------------------------*- C++ -*-===// +//===- Range.h --------------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -21,10 +20,13 @@ #define HEX64(v) llvm::format_hex(v, 18) namespace llvm { +class DataExtractor; class raw_ostream; namespace gsym { +class FileWriter; + /// A class that represents an address range. The range is specified using /// a start and an end address. struct AddressRange { @@ -47,6 +49,18 @@ struct AddressRange { bool operator<(const AddressRange &R) const { return std::make_pair(Start, End) < std::make_pair(R.Start, R.End); } + /// AddressRange objects are encoded and decoded to be relative to a base + /// address. This will be the FunctionInfo's start address if the AddressRange + /// is directly contained in a FunctionInfo, or a base address of the + /// containing parent AddressRange or AddressRanges. This allows address + /// ranges to be efficiently encoded using ULEB128 encodings as we encode the + /// offset and size of each range instead of full addresses. This also makes + /// encoded addresses easy to relocate as we just need to relocate one base + /// address. + /// @{ + void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); + void encode(FileWriter &O, uint64_t BaseAddr) const; + /// @} }; raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R); @@ -66,6 +80,7 @@ public: void clear() { Ranges.clear(); } bool empty() const { return Ranges.empty(); } bool contains(uint64_t Addr) const; + bool contains(AddressRange Range) const; void insert(AddressRange Range); size_t size() const { return Ranges.size(); } bool operator==(const AddressRanges &RHS) const { @@ -77,6 +92,14 @@ public: } Collection::const_iterator begin() const { return Ranges.begin(); } Collection::const_iterator end() const { return Ranges.end(); } + + /// Address ranges are decoded and encoded to be relative to a base address. + /// See the AddressRange comment for the encode and decode methods for full + /// details. + /// @{ + void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); + void encode(FileWriter &O, uint64_t BaseAddr) const; + /// @} }; raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR); diff --git a/include/llvm/DebugInfo/GSYM/StringTable.h b/include/llvm/DebugInfo/GSYM/StringTable.h index 0001b8b82743..a96ae5899da3 100644 --- a/include/llvm/DebugInfo/GSYM/StringTable.h +++ b/include/llvm/DebugInfo/GSYM/StringTable.h @@ -1,9 +1,8 @@ //===- StringTable.h --------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/include/llvm/DebugInfo/PDB/GenericError.h b/include/llvm/DebugInfo/PDB/GenericError.h index ec85d92d2a92..af93be931b8e 100644 --- a/include/llvm/DebugInfo/PDB/GenericError.h +++ b/include/llvm/DebugInfo/PDB/GenericError.h @@ -20,7 +20,7 @@ enum class pdb_error_code { dia_sdk_not_present, dia_failed_loading, signature_out_of_date, - external_cmdline_ref, + no_matching_pch, unspecified, }; } // namespace pdb diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h index 0b15ab474f71..4adf3b394c2e 100644 --- a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h +++ b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h @@ -87,7 +87,7 @@ public: // Initial construction must not access the cache, since it must be done // atomically. - auto Result = llvm::make_unique( + auto Result = std::make_unique( Session, Id, std::forward(ConstructorArgs)...); Result->SymbolId = Id; diff --git a/include/llvm/DebugInfo/PDB/PDBSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbol.h index d9004a8894d9..0d95a2467556 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbol.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -131,7 +131,7 @@ public: auto BaseIter = RawSymbol->findChildren(T::Tag); if (!BaseIter) return nullptr; - return llvm::make_unique>(std::move(BaseIter)); + return std::make_unique>(std::move(BaseIter)); } std::unique_ptr findAllChildren(PDB_SymType Type) const; std::unique_ptr findAllChildren() const; diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h index d3da28ca0b7b..11599fc1797d 100644 --- a/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -39,6 +39,7 @@ public: bool UseSymbolTable = true; bool Demangle = true; bool RelativeAddresses = false; + bool UntagAddresses = false; std::string DefaultArch; std::vector DsymHints; std::string FallbackDebugPath; diff --git a/include/llvm/Demangle/Demangle.h b/include/llvm/Demangle/Demangle.h index 6fea7ef13f11..7b85b9a9ccf7 100644 --- a/include/llvm/Demangle/Demangle.h +++ b/include/llvm/Demangle/Demangle.h @@ -32,7 +32,14 @@ char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n, int *status); -enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0 }; +enum MSDemangleFlags { + MSDF_None = 0, + MSDF_DumpBackrefs = 1 << 0, + MSDF_NoAccessSpecifier = 1 << 1, + MSDF_NoCallingConvention = 1 << 2, + MSDF_NoReturnType = 1 << 3, + MSDF_NoMemberType = 1 << 4, +}; char *microsoftDemangle(const char *mangled_name, char *buf, size_t *n, int *status, MSDemangleFlags Flags = MSDF_None); diff --git a/include/llvm/Demangle/DemangleConfig.h b/include/llvm/Demangle/DemangleConfig.h index 73f89d357c85..b7b7dbd24c7f 100644 --- a/include/llvm/Demangle/DemangleConfig.h +++ b/include/llvm/Demangle/DemangleConfig.h @@ -15,13 +15,6 @@ #ifndef LLVM_DEMANGLE_COMPILER_H #define LLVM_DEMANGLE_COMPILER_H -#ifdef _MSC_VER -// snprintf is implemented in VS 2015 -#if _MSC_VER < 1900 -#define snprintf _snprintf_s -#endif -#endif - #ifndef __has_feature #define __has_feature(x) 0 #endif diff --git a/include/llvm/Demangle/ItaniumDemangle.h b/include/llvm/Demangle/ItaniumDemangle.h index aaccb27e17a3..7784e842bfeb 100644 --- a/include/llvm/Demangle/ItaniumDemangle.h +++ b/include/llvm/Demangle/ItaniumDemangle.h @@ -57,6 +57,11 @@ X(LocalName) \ X(VectorType) \ X(PixelVectorType) \ + X(SyntheticTemplateParamName) \ + X(TypeTemplateParamDecl) \ + X(NonTypeTemplateParamDecl) \ + X(TemplateTemplateParamDecl) \ + X(TemplateParamPackDecl) \ X(ParameterPack) \ X(TemplateArgumentPack) \ X(ParameterPackExpansion) \ @@ -91,6 +96,8 @@ X(ThrowExpr) \ X(UUIDOfExpr) \ X(BoolExpr) \ + X(StringLiteral) \ + X(LambdaExpr) \ X(IntegerCastExpr) \ X(IntegerLiteral) \ X(FloatLiteral) \ @@ -303,7 +310,7 @@ inline Qualifiers operator|=(Qualifiers &Q1, Qualifiers Q2) { return Q1 = static_cast(Q1 | Q2); } -class QualType : public Node { +class QualType final : public Node { protected: const Qualifiers Quals; const Node *Child; @@ -964,6 +971,127 @@ public: } }; +enum class TemplateParamKind { Type, NonType, Template }; + +/// An invented name for a template parameter for which we don't have a +/// corresponding template argument. +/// +/// This node is created when parsing the for a lambda with +/// explicit template arguments, which might be referenced in the parameter +/// types appearing later in the . +class SyntheticTemplateParamName final : public Node { + TemplateParamKind Kind; + unsigned Index; + +public: + SyntheticTemplateParamName(TemplateParamKind Kind_, unsigned Index_) + : Node(KSyntheticTemplateParamName), Kind(Kind_), Index(Index_) {} + + template void match(Fn F) const { F(Kind, Index); } + + void printLeft(OutputStream &S) const override { + switch (Kind) { + case TemplateParamKind::Type: + S += "$T"; + break; + case TemplateParamKind::NonType: + S += "$N"; + break; + case TemplateParamKind::Template: + S += "$TT"; + break; + } + if (Index > 0) + S << Index - 1; + } +}; + +/// A template type parameter declaration, 'typename T'. +class TypeTemplateParamDecl final : public Node { + Node *Name; + +public: + TypeTemplateParamDecl(Node *Name_) + : Node(KTypeTemplateParamDecl, Cache::Yes), Name(Name_) {} + + template void match(Fn F) const { F(Name); } + + void printLeft(OutputStream &S) const override { + S += "typename "; + } + + void printRight(OutputStream &S) const override { + Name->print(S); + } +}; + +/// A non-type template parameter declaration, 'int N'. +class NonTypeTemplateParamDecl final : public Node { + Node *Name; + Node *Type; + +public: + NonTypeTemplateParamDecl(Node *Name_, Node *Type_) + : Node(KNonTypeTemplateParamDecl, Cache::Yes), Name(Name_), Type(Type_) {} + + template void match(Fn F) const { F(Name, Type); } + + void printLeft(OutputStream &S) const override { + Type->printLeft(S); + if (!Type->hasRHSComponent(S)) + S += " "; + } + + void printRight(OutputStream &S) const override { + Name->print(S); + Type->printRight(S); + } +}; + +/// A template template parameter declaration, +/// 'template typename N'. +class TemplateTemplateParamDecl final : public Node { + Node *Name; + NodeArray Params; + +public: + TemplateTemplateParamDecl(Node *Name_, NodeArray Params_) + : Node(KTemplateTemplateParamDecl, Cache::Yes), Name(Name_), + Params(Params_) {} + + template void match(Fn F) const { F(Name, Params); } + + void printLeft(OutputStream &S) const override { + S += "template<"; + Params.printWithComma(S); + S += "> typename "; + } + + void printRight(OutputStream &S) const override { + Name->print(S); + } +}; + +/// A template parameter pack declaration, 'typename ...T'. +class TemplateParamPackDecl final : public Node { + Node *Param; + +public: + TemplateParamPackDecl(Node *Param_) + : Node(KTemplateParamPackDecl, Cache::Yes), Param(Param_) {} + + template void match(Fn F) const { F(Param); } + + void printLeft(OutputStream &S) const override { + Param->printLeft(S); + S += "..."; + } + + void printRight(OutputStream &S) const override { + Param->printRight(S); + } +}; + /// An unexpanded parameter pack (either in the expression or type context). If /// this AST is correct, this node will have a ParameterPackExpansion node above /// it. @@ -1410,21 +1538,36 @@ public: }; class ClosureTypeName : public Node { + NodeArray TemplateParams; NodeArray Params; StringView Count; public: - ClosureTypeName(NodeArray Params_, StringView Count_) - : Node(KClosureTypeName), Params(Params_), Count(Count_) {} + ClosureTypeName(NodeArray TemplateParams_, NodeArray Params_, + StringView Count_) + : Node(KClosureTypeName), TemplateParams(TemplateParams_), + Params(Params_), Count(Count_) {} + + template void match(Fn F) const { + F(TemplateParams, Params, Count); + } - template void match(Fn F) const { F(Params, Count); } + void printDeclarator(OutputStream &S) const { + if (!TemplateParams.empty()) { + S += "<"; + TemplateParams.printWithComma(S); + S += ">"; + } + S += "("; + Params.printWithComma(S); + S += ")"; + } void printLeft(OutputStream &S) const override { S += "\'lambda"; S += Count; - S += "\'("; - Params.printWithComma(S); - S += ")"; + S += "\'"; + printDeclarator(S); } }; @@ -1902,6 +2045,37 @@ public: } }; +class StringLiteral : public Node { + const Node *Type; + +public: + StringLiteral(const Node *Type_) : Node(KStringLiteral), Type(Type_) {} + + template void match(Fn F) const { F(Type); } + + void printLeft(OutputStream &S) const override { + S += "\"<"; + Type->print(S); + S += ">\""; + } +}; + +class LambdaExpr : public Node { + const Node *Type; + +public: + LambdaExpr(const Node *Type_) : Node(KLambdaExpr), Type(Type_) {} + + template void match(Fn F) const { F(Type); } + + void printLeft(OutputStream &S) const override { + S += "[]"; + if (Type->getKind() == KClosureTypeName) + static_cast(Type)->printDeclarator(S); + S += "{...}"; + } +}; + class IntegerCastExpr : public Node { // ty(integer) const Node *Ty; @@ -2167,10 +2341,36 @@ template struct AbstractManglingParser { // table. PODSmallVector Subs; + using TemplateParamList = PODSmallVector; + + class ScopedTemplateParamList { + AbstractManglingParser *Parser; + size_t OldNumTemplateParamLists; + TemplateParamList Params; + + public: + ScopedTemplateParamList(AbstractManglingParser *Parser) + : Parser(Parser), + OldNumTemplateParamLists(Parser->TemplateParams.size()) { + Parser->TemplateParams.push_back(&Params); + } + ~ScopedTemplateParamList() { + assert(Parser->TemplateParams.size() >= OldNumTemplateParamLists); + Parser->TemplateParams.dropBack(OldNumTemplateParamLists); + } + }; + // Template parameter table. Like the above, but referenced like "T42_". // This has a smaller size compared to Subs and Names because it can be // stored on the stack. - PODSmallVector TemplateParams; + TemplateParamList OuterTemplateParams; + + // Lists of template parameters indexed by template parameter depth, + // referenced like "TL2_4_". If nonempty, element 0 is always + // OuterTemplateParams; inner elements are always template parameter lists of + // lambda expressions. For a generic lambda with no explicit template + // parameter list, the corresponding parameter list pointer will be null. + PODSmallVector TemplateParams; // Set of unresolved forward references. These can occur in a // conversion operator's type, and are resolved in the enclosing . @@ -2178,7 +2378,9 @@ template struct AbstractManglingParser { bool TryToParseTemplateArgs = true; bool PermitForwardTemplateReferences = false; - bool ParsingLambdaParams = false; + size_t ParsingLambdaParamsAtLevel = (size_t)-1; + + unsigned NumSyntheticTemplateParameters[3] = {}; Alloc ASTAllocator; @@ -2193,9 +2395,11 @@ template struct AbstractManglingParser { Names.clear(); Subs.clear(); TemplateParams.clear(); - ParsingLambdaParams = false; + ParsingLambdaParamsAtLevel = (size_t)-1; TryToParseTemplateArgs = true; PermitForwardTemplateReferences = false; + for (int I = 0; I != 3; ++I) + NumSyntheticTemplateParameters[I] = 0; ASTAllocator.reset(); } @@ -2253,6 +2457,7 @@ template struct AbstractManglingParser { bool parseSeqId(size_t *Out); Node *parseSubstitution(); Node *parseTemplateParam(); + Node *parseTemplateParamDecl(); Node *parseTemplateArgs(bool TagTemplates = false); Node *parseTemplateArg(); @@ -2301,9 +2506,10 @@ template struct AbstractManglingParser { size_t E = ForwardTemplateRefs.size(); for (; I < E; ++I) { size_t Idx = ForwardTemplateRefs[I]->Index; - if (Idx >= TemplateParams.size()) + if (TemplateParams.empty() || !TemplateParams[0] || + Idx >= TemplateParams[0]->size()) return true; - ForwardTemplateRefs[I]->Ref = TemplateParams[Idx]; + ForwardTemplateRefs[I]->Ref = (*TemplateParams[0])[Idx]; } ForwardTemplateRefs.dropBack(State.ForwardTemplateRefsBegin); return false; @@ -2470,7 +2676,12 @@ AbstractManglingParser::parseUnqualifiedName(NameState *State) { // ::= + # Parameter types or "v" if the lambda has no parameters template Node * -AbstractManglingParser::parseUnnamedTypeName(NameState *) { +AbstractManglingParser::parseUnnamedTypeName(NameState *State) { + // refer to the innermost . Clear out any + // outer args that we may have inserted into TemplateParams. + if (State != nullptr) + TemplateParams.clear(); + if (consumeIf("Ut")) { StringView Count = parseNumber(); if (!consumeIf('_')) @@ -2478,22 +2689,59 @@ AbstractManglingParser::parseUnnamedTypeName(NameState *) { return make(Count); } if (consumeIf("Ul")) { - NodeArray Params; - SwapAndRestore SwapParams(ParsingLambdaParams, true); + SwapAndRestore SwapParams(ParsingLambdaParamsAtLevel, + TemplateParams.size()); + ScopedTemplateParamList LambdaTemplateParams(this); + + size_t ParamsBegin = Names.size(); + while (look() == 'T' && + StringView("yptn").find(look(1)) != StringView::npos) { + Node *T = parseTemplateParamDecl(); + if (!T) + return nullptr; + Names.push_back(T); + } + NodeArray TempParams = popTrailingNodeArray(ParamsBegin); + + // FIXME: If TempParams is empty and none of the function parameters + // includes 'auto', we should remove LambdaTemplateParams from the + // TemplateParams list. Unfortunately, we don't find out whether there are + // any 'auto' parameters until too late in an example such as: + // + // template void f( + // decltype([](decltype([](T v) {}), + // auto) {})) {} + // template void f( + // decltype([](decltype([](T w) {}), + // int) {})) {} + // + // Here, the type of v is at level 2 but the type of w is at level 1. We + // don't find this out until we encounter the type of the next parameter. + // + // However, compilers can't actually cope with the former example in + // practice, and it's likely to be made ill-formed in future, so we don't + // need to support it here. + // + // If we encounter an 'auto' in the function parameter types, we will + // recreate a template parameter scope for it, but any intervening lambdas + // will be parsed in the 'wrong' template parameter depth. + if (TempParams.empty()) + TemplateParams.pop_back(); + if (!consumeIf("vE")) { - size_t ParamsBegin = Names.size(); do { Node *P = getDerived().parseType(); if (P == nullptr) return nullptr; Names.push_back(P); } while (!consumeIf('E')); - Params = popTrailingNodeArray(ParamsBegin); } + NodeArray Params = popTrailingNodeArray(ParamsBegin); + StringView Count = parseNumber(); if (!consumeIf('_')) return nullptr; - return make(Params, Count); + return make(TempParams, Params, Count); } if (consumeIf("Ub")) { (void)parseNumber(); @@ -3949,6 +4197,7 @@ Node *AbstractManglingParser::parseConversionExpr() { // ::= L E # floating literal // ::= L E # string literal // ::= L E # nullptr literal (i.e., "LDnE") +// ::= L E # lambda expression // FIXME: ::= L _ E # complex floating point literal (C 2000) // ::= L E # external name template @@ -4020,24 +4269,43 @@ Node *AbstractManglingParser::parseExprPrimary() { return R; } return nullptr; + case 'A': { + Node *T = getDerived().parseType(); + if (T == nullptr) + return nullptr; + // FIXME: We need to include the string contents in the mangling. + if (consumeIf('E')) + return make(T); + return nullptr; + } + case 'D': + if (consumeIf("DnE")) + return make("nullptr"); + return nullptr; case 'T': // Invalid mangled name per // http://sourcerytools.com/pipermail/cxx-abi-dev/2011-August/002422.html return nullptr; + case 'U': { + // FIXME: Should we support LUb... for block literals? + if (look(1) != 'l') + return nullptr; + Node *T = parseUnnamedTypeName(nullptr); + if (!T || !consumeIf('E')) + return nullptr; + return make(T); + } default: { // might be named type Node *T = getDerived().parseType(); if (T == nullptr) return nullptr; StringView N = parseNumber(); - if (!N.empty()) { - if (!consumeIf('E')) - return nullptr; - return make(T, N); - } - if (consumeIf('E')) - return T; - return nullptr; + if (N.empty()) + return nullptr; + if (!consumeIf('E')) + return nullptr; + return make(T, N); } } } @@ -5062,11 +5330,22 @@ Node *AbstractManglingParser::parseSubstitution() { // ::= T_ # first template parameter // ::= T _ +// ::= TL __ +// ::= TL _ _ template Node *AbstractManglingParser::parseTemplateParam() { if (!consumeIf('T')) return nullptr; + size_t Level = 0; + if (consumeIf('L')) { + if (parsePositiveInteger(&Level)) + return nullptr; + ++Level; + if (!consumeIf('_')) + return nullptr; + } + size_t Index = 0; if (!consumeIf('_')) { if (parsePositiveInteger(&Index)) @@ -5076,15 +5355,11 @@ Node *AbstractManglingParser::parseTemplateParam() { return nullptr; } - // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter list - // are mangled as the corresponding artificial template type parameter. - if (ParsingLambdaParams) - return make("auto"); - // If we're in a context where this refers to a // further ahead in the mangled name (currently just conversion // operator types), then we should only look it up in the right context. - if (PermitForwardTemplateReferences) { + // This can only happen at the outermost level. + if (PermitForwardTemplateReferences && Level == 0) { Node *ForwardRef = make(Index); if (!ForwardRef) return nullptr; @@ -5094,9 +5369,78 @@ Node *AbstractManglingParser::parseTemplateParam() { return ForwardRef; } - if (Index >= TemplateParams.size()) + if (Level >= TemplateParams.size() || !TemplateParams[Level] || + Index >= TemplateParams[Level]->size()) { + // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter + // list are mangled as the corresponding artificial template type parameter. + if (ParsingLambdaParamsAtLevel == Level && Level <= TemplateParams.size()) { + // This will be popped by the ScopedTemplateParamList in + // parseUnnamedTypeName. + if (Level == TemplateParams.size()) + TemplateParams.push_back(nullptr); + return make("auto"); + } + return nullptr; - return TemplateParams[Index]; + } + + return (*TemplateParams[Level])[Index]; +} + +// ::= Ty # type parameter +// ::= Tn # non-type parameter +// ::= Tt * E # template parameter +// ::= Tp # parameter pack +template +Node *AbstractManglingParser::parseTemplateParamDecl() { + auto InventTemplateParamName = [&](TemplateParamKind Kind) { + unsigned Index = NumSyntheticTemplateParameters[(int)Kind]++; + Node *N = make(Kind, Index); + if (N) TemplateParams.back()->push_back(N); + return N; + }; + + if (consumeIf("Ty")) { + Node *Name = InventTemplateParamName(TemplateParamKind::Type); + if (!Name) + return nullptr; + return make(Name); + } + + if (consumeIf("Tn")) { + Node *Name = InventTemplateParamName(TemplateParamKind::NonType); + if (!Name) + return nullptr; + Node *Type = parseType(); + if (!Type) + return nullptr; + return make(Name, Type); + } + + if (consumeIf("Tt")) { + Node *Name = InventTemplateParamName(TemplateParamKind::Template); + if (!Name) + return nullptr; + size_t ParamsBegin = Names.size(); + ScopedTemplateParamList TemplateTemplateParamParams(this); + while (!consumeIf("E")) { + Node *P = parseTemplateParamDecl(); + if (!P) + return nullptr; + Names.push_back(P); + } + NodeArray Params = popTrailingNodeArray(ParamsBegin); + return make(Name, Params); + } + + if (consumeIf("Tp")) { + Node *P = parseTemplateParamDecl(); + if (!P) + return nullptr; + return make(P); + } + + return nullptr; } // ::= # type or template @@ -5153,8 +5497,11 @@ AbstractManglingParser::parseTemplateArgs(bool TagTemplates) { // refer to the innermost . Clear out any // outer args that we may have inserted into TemplateParams. - if (TagTemplates) + if (TagTemplates) { TemplateParams.clear(); + TemplateParams.push_back(&OuterTemplateParams); + OuterTemplateParams.clear(); + } size_t ArgsBegin = Names.size(); while (!consumeIf('E')) { @@ -5172,7 +5519,7 @@ AbstractManglingParser::parseTemplateArgs(bool TagTemplates) { if (!TableEntry) return nullptr; } - TemplateParams.push_back(TableEntry); + TemplateParams.back()->push_back(TableEntry); } else { Node *Arg = getDerived().parseTemplateArg(); if (Arg == nullptr) diff --git a/include/llvm/Demangle/MicrosoftDemangle.h b/include/llvm/Demangle/MicrosoftDemangle.h index 382e79401c43..c6f26061bedd 100644 --- a/include/llvm/Demangle/MicrosoftDemangle.h +++ b/include/llvm/Demangle/MicrosoftDemangle.h @@ -158,6 +158,7 @@ private: QualifiedNameNode *QN); SymbolNode *demangleDeclarator(StringView &MangledName); SymbolNode *demangleMD5Name(StringView &MangledName); + SymbolNode *demangleTypeinfoName(StringView &MangledName); VariableSymbolNode *demangleVariableEncoding(StringView &MangledName, StorageClass SC); diff --git a/include/llvm/Demangle/MicrosoftDemangleNodes.h b/include/llvm/Demangle/MicrosoftDemangleNodes.h index da9d9d5bfdc0..81b279fe237d 100644 --- a/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -16,6 +16,8 @@ #include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/StringView.h" #include +#include +#include namespace llvm { namespace itanium_demangle { @@ -73,6 +75,9 @@ enum OutputFlags { OF_Default = 0, OF_NoCallingConvention = 1, OF_NoTagSpecifier = 2, + OF_NoAccessSpecifier = 4, + OF_NoMemberType = 8, + OF_NoReturnType = 16, }; // Types @@ -301,8 +306,6 @@ struct TypeNode : public Node { outputPost(OS, Flags); } - void outputQuals(bool SpaceBefore, bool SpaceAfter) const; - Qualifiers Quals = Q_None; }; diff --git a/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h index 8d2f641254b3..72687682f606 100644 --- a/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h +++ b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h @@ -22,17 +22,21 @@ namespace llvm { namespace jitlink { /// Registers all FDEs in the given eh-frame section with the current process. -Error registerEHFrameSection(const void *EHFrameSectionAddr); +Error registerEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize); /// Deregisters all FDEs in the given eh-frame section with the current process. -Error deregisterEHFrameSection(const void *EHFrameSectionAddr); +Error deregisterEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize); /// Supports registration/deregistration of EH-frames in a target process. class EHFrameRegistrar { public: virtual ~EHFrameRegistrar(); - virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) = 0; - virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) = 0; + virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) = 0; + virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) = 0; }; /// Registers / Deregisters EH-frames in the current process. @@ -48,31 +52,38 @@ public: InProcessEHFrameRegistrar(InProcessEHFrameRegistrar &&) = delete; InProcessEHFrameRegistrar &operator=(InProcessEHFrameRegistrar &&) = delete; - Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) override { + Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) override { return registerEHFrameSection( - jitTargetAddressToPointer(EHFrameSectionAddr)); + jitTargetAddressToPointer(EHFrameSectionAddr), + EHFrameSectionSize); } - Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) override { + Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) override { return deregisterEHFrameSection( - jitTargetAddressToPointer(EHFrameSectionAddr)); + jitTargetAddressToPointer(EHFrameSectionAddr), + EHFrameSectionSize); } private: InProcessEHFrameRegistrar(); }; -using StoreFrameAddressFunction = std::function; +using StoreFrameRangeFunction = + std::function; -/// Creates a pass that records the address of the EH frame section. If no -/// eh-frame section is found, it will set EHFrameAddr to zero. +/// Creates a pass that records the address and size of the EH frame section. +/// If no eh-frame section is found then the address and size will both be given +/// as zero. /// /// Authors of JITLinkContexts can use this function to register a post-fixup -/// pass that records the address of the eh-frame section. This address can +/// pass that records the range of the eh-frame section. This range can /// be used after finalization to register and deregister the frame. -AtomGraphPassFunction +LinkGraphPassFunction createEHFrameRecorderPass(const Triple &TT, - StoreFrameAddressFunction StoreFrameAddress); + StoreFrameRangeFunction StoreFrameRange); } // end namespace jitlink } // end namespace llvm diff --git a/include/llvm/ExecutionEngine/JITLink/JITLink.h b/include/llvm/ExecutionEngine/JITLink/JITLink.h index be80d44ccf51..b531127cf892 100644 --- a/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -34,6 +34,9 @@ namespace llvm { namespace jitlink { +class Symbol; +class Section; + /// Base class for errors originating in JIT linker, e.g. missing relocation /// support. class JITLinkError : public ErrorInfo { @@ -50,27 +53,22 @@ private: std::string ErrMsg; }; -// Forward declare the Atom class. -class Atom; - -/// Edge class. Represents both object file relocations, as well as layout and -/// keep-alive constraints. +/// Represents fixups and constraints in the LinkGraph. class Edge { public: using Kind = uint8_t; - using GenericEdgeKind = enum : Kind { + enum GenericEdgeKind : Kind { Invalid, // Invalid edge value. FirstKeepAlive, // Keeps target alive. Offset/addend zero. KeepAlive = FirstKeepAlive, // Tag first edge kind that preserves liveness. - LayoutNext, // Layout constraint. Offset/Addend zero. FirstRelocation // First architecture specific relocation. }; using OffsetT = uint32_t; using AddendT = int64_t; - Edge(Kind K, OffsetT Offset, Atom &Target, AddendT Addend) + Edge(Kind K, OffsetT Offset, Symbol &Target, AddendT Addend) : Target(&Target), Offset(Offset), Addend(Addend), K(K) {} OffsetT getOffset() const { return Offset; } @@ -82,461 +80,637 @@ public: return K - FirstRelocation; } bool isKeepAlive() const { return K >= FirstKeepAlive; } - Atom &getTarget() const { return *Target; } - void setTarget(Atom &Target) { this->Target = &Target; } + Symbol &getTarget() const { return *Target; } + void setTarget(Symbol &Target) { this->Target = &Target; } AddendT getAddend() const { return Addend; } void setAddend(AddendT Addend) { this->Addend = Addend; } private: - Atom *Target; - OffsetT Offset; - AddendT Addend; + Symbol *Target = nullptr; + OffsetT Offset = 0; + AddendT Addend = 0; Kind K = 0; }; -using EdgeVector = std::vector; +/// Returns the string name of the given generic edge kind, or "unknown" +/// otherwise. Useful for debugging. +const char *getGenericEdgeKindName(Edge::Kind K); -const StringRef getGenericEdgeKindName(Edge::Kind K); - -/// Base Atom class. Used by absolute and undefined atoms. -class Atom { - friend class AtomGraph; +/// Base class for Addressable entities (externals, absolutes, blocks). +class Addressable { + friend class LinkGraph; protected: - /// Create a named (as yet unresolved) atom. - Atom(StringRef Name) - : Name(Name), IsDefined(false), IsLive(false), ShouldDiscard(false), - IsGlobal(false), IsAbsolute(false), IsCallable(false), - IsExported(false), IsWeak(false), HasLayoutNext(false), - IsCommon(false) {} - - /// Create an absolute symbol atom. - Atom(StringRef Name, JITTargetAddress Address) - : Name(Name), Address(Address), IsDefined(true), IsLive(false), - ShouldDiscard(false), IsGlobal(false), IsAbsolute(false), - IsCallable(false), IsExported(false), IsWeak(false), - HasLayoutNext(false), IsCommon(false) {} + Addressable(JITTargetAddress Address, bool IsDefined) + : Address(Address), IsDefined(IsDefined), IsAbsolute(false) {} -public: - /// Returns true if this atom has a name. - bool hasName() const { return Name != StringRef(); } + Addressable(JITTargetAddress Address) + : Address(Address), IsDefined(false), IsAbsolute(true) { + assert(!(IsDefined && IsAbsolute) && + "Block cannot be both defined and absolute"); + } - /// Returns the name of this atom. - StringRef getName() const { return Name; } +public: + Addressable(const Addressable &) = delete; + Addressable &operator=(const Addressable &) = default; + Addressable(Addressable &&) = delete; + Addressable &operator=(Addressable &&) = default; - /// Returns the current target address of this atom. - /// The initial target address (for atoms that have one) will be taken from - /// the input object file's virtual address space. During the layout phase - /// of JIT linking the atom's address will be updated to point to its final - /// address in the JIT'd process. JITTargetAddress getAddress() const { return Address; } - - /// Set the current target address of this atom. void setAddress(JITTargetAddress Address) { this->Address = Address; } - /// Returns true if this is a defined atom. - bool isDefined() const { return IsDefined; } + /// Returns true if this is a defined addressable, in which case you + /// can downcast this to a . + bool isDefined() const { return static_cast(IsDefined); } + bool isAbsolute() const { return static_cast(IsAbsolute); } - /// Returns true if this atom is marked as live. - bool isLive() const { return IsLive; } +private: + JITTargetAddress Address = 0; + uint64_t IsDefined : 1; + uint64_t IsAbsolute : 1; +}; - /// Mark this atom as live. - /// - /// Note: Only defined and absolute atoms can be marked live. - void setLive(bool IsLive) { - assert((IsDefined || IsAbsolute || !IsLive) && - "Only defined and absolute atoms can be marked live"); - this->IsLive = IsLive; - } +using BlockOrdinal = unsigned; +using SectionOrdinal = unsigned; - /// Returns true if this atom should be discarded during pruning. - bool shouldDiscard() const { return ShouldDiscard; } +/// An Addressable with content and edges. +class Block : public Addressable { + friend class LinkGraph; - /// Mark this atom to be discarded. - /// - /// Note: Only defined and absolute atoms can be marked live. - void setShouldDiscard(bool ShouldDiscard) { - assert((IsDefined || IsAbsolute || !ShouldDiscard) && - "Only defined and absolute atoms can be marked live"); - this->ShouldDiscard = ShouldDiscard; +private: + /// Create a zero-fill defined addressable. + Block(Section &Parent, BlockOrdinal Ordinal, JITTargetAddress Size, + JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) + : Addressable(Address, true), Parent(Parent), Size(Size), + Ordinal(Ordinal) { + assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); + assert(AlignmentOffset < Alignment && + "Alignment offset cannot exceed alignment"); + assert(AlignmentOffset <= MaxAlignmentOffset && + "Alignment offset exceeds maximum"); + P2Align = Alignment ? countTrailingZeros(Alignment) : 0; + this->AlignmentOffset = AlignmentOffset; } - /// Returns true if this definition is global (i.e. visible outside this - /// linkage unit). - /// - /// Note: This is distict from Exported, which means visibile outside the - /// JITDylib that this graph is being linked in to. - bool isGlobal() const { return IsGlobal; } + /// Create a defined addressable for the given content. + Block(Section &Parent, BlockOrdinal Ordinal, StringRef Content, + JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) + : Addressable(Address, true), Parent(Parent), Data(Content.data()), + Size(Content.size()), Ordinal(Ordinal) { + assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); + assert(AlignmentOffset < Alignment && + "Alignment offset cannot exceed alignment"); + assert(AlignmentOffset <= MaxAlignmentOffset && + "Alignment offset exceeds maximum"); + P2Align = Alignment ? countTrailingZeros(Alignment) : 0; + this->AlignmentOffset = AlignmentOffset; + } - /// Mark this atom as global. - void setGlobal(bool IsGlobal) { this->IsGlobal = IsGlobal; } +public: + using EdgeVector = std::vector; + using edge_iterator = EdgeVector::iterator; + using const_edge_iterator = EdgeVector::const_iterator; - /// Returns true if this atom represents an absolute symbol. - bool isAbsolute() const { return IsAbsolute; } + Block(const Block &) = delete; + Block &operator=(const Block &) = delete; + Block(Block &&) = delete; + Block &operator=(Block &&) = delete; - /// Returns true if this atom is known to be callable. + /// Return the parent section for this block. + Section &getSection() const { return Parent; } + + /// Return the ordinal for this block. + BlockOrdinal getOrdinal() const { return Ordinal; } + + /// Returns true if this is a zero-fill block. /// - /// Primarily provided for easy interoperability with ORC, which uses the - /// JITSymbolFlags::Common flag to identify symbols that can be interposed - /// with stubs. - bool isCallable() const { return IsCallable; } + /// If true, getSize is callable but getContent is not (the content is + /// defined to be a sequence of zero bytes of length Size). + bool isZeroFill() const { return !Data; } + + /// Returns the size of this defined addressable. + size_t getSize() const { return Size; } + + /// Get the content for this block. Block must not be a zero-fill block. + StringRef getContent() const { + assert(Data && "Section does not contain content"); + return StringRef(Data, Size); + } - /// Mark this atom as callable. - void setCallable(bool IsCallable) { - assert((IsDefined || IsAbsolute || !IsCallable) && - "Callable atoms must be defined or absolute"); - this->IsCallable = IsCallable; + /// Set the content for this block. + /// Caller is responsible for ensuring the underlying bytes are not + /// deallocated while pointed to by this block. + void setContent(StringRef Content) { + Data = Content.data(); + Size = Content.size(); } - /// Returns true if this atom should appear in the symbol table of a final - /// linked image. - bool isExported() const { return IsExported; } + /// Get the alignment for this content. + uint64_t getAlignment() const { return 1ull << P2Align; } + + /// Get the alignment offset for this content. + uint64_t getAlignmentOffset() const { return AlignmentOffset; } - /// Mark this atom as exported. - void setExported(bool IsExported) { - assert((!IsExported || ((IsDefined || IsAbsolute) && hasName())) && - "Exported atoms must have names"); - this->IsExported = IsExported; + /// Add an edge to this block. + void addEdge(Edge::Kind K, Edge::OffsetT Offset, Symbol &Target, + Edge::AddendT Addend) { + Edges.push_back(Edge(K, Offset, Target, Addend)); } - /// Returns true if this is a weak symbol. - bool isWeak() const { return IsWeak; } + /// Return the list of edges attached to this content. + iterator_range edges() { + return make_range(Edges.begin(), Edges.end()); + } - /// Mark this atom as weak. - void setWeak(bool IsWeak) { this->IsWeak = IsWeak; } + /// Returns the list of edges attached to this content. + iterator_range edges() const { + return make_range(Edges.begin(), Edges.end()); + } -private: - StringRef Name; - JITTargetAddress Address = 0; + /// Return the size of the edges list. + size_t edges_size() const { return Edges.size(); } - bool IsDefined : 1; - bool IsLive : 1; - bool ShouldDiscard : 1; + /// Returns true if the list of edges is empty. + bool edges_empty() const { return Edges.empty(); } - bool IsGlobal : 1; - bool IsAbsolute : 1; - bool IsCallable : 1; - bool IsExported : 1; - bool IsWeak : 1; +private: + static constexpr uint64_t MaxAlignmentOffset = (1ULL << 57) - 1; -protected: - // These flags only make sense for DefinedAtom, but we can minimize the size - // of DefinedAtom by defining them here. - bool HasLayoutNext : 1; - bool IsCommon : 1; + uint64_t P2Align : 5; + uint64_t AlignmentOffset : 57; + Section &Parent; + const char *Data = nullptr; + size_t Size = 0; + BlockOrdinal Ordinal = 0; + std::vector Edges; }; -// Forward declare DefinedAtom. -class DefinedAtom; +/// Describes symbol linkage. This can be used to make resolve definition +/// clashes. +enum class Linkage : uint8_t { + Strong, + Weak, +}; -raw_ostream &operator<<(raw_ostream &OS, const Atom &A); -void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E, - StringRef EdgeKindName); +/// For errors and debugging output. +const char *getLinkageName(Linkage L); + +/// Defines the scope in which this symbol should be visible: +/// Default -- Visible in the public interface of the linkage unit. +/// Hidden -- Visible within the linkage unit, but not exported from it. +/// Local -- Visible only within the LinkGraph. +enum class Scope : uint8_t { Default, Hidden, Local }; + +/// For debugging output. +const char *getScopeName(Scope S); + +raw_ostream &operator<<(raw_ostream &OS, const Block &B); + +/// Symbol representation. +/// +/// Symbols represent locations within Addressable objects. +/// They can be either Named or Anonymous. +/// Anonymous symbols have neither linkage nor visibility, and must point at +/// ContentBlocks. +/// Named symbols may be in one of four states: +/// - Null: Default initialized. Assignable, but otherwise unusable. +/// - Defined: Has both linkage and visibility and points to a ContentBlock +/// - Common: Has both linkage and visibility, points to a null Addressable. +/// - External: Has neither linkage nor visibility, points to an external +/// Addressable. +/// +class Symbol { + friend class LinkGraph; + +private: + Symbol(Addressable &Base, JITTargetAddress Offset, StringRef Name, + JITTargetAddress Size, Linkage L, Scope S, bool IsLive, + bool IsCallable) + : Name(Name), Base(&Base), Offset(Offset), Size(Size) { + setLinkage(L); + setScope(S); + setLive(IsLive); + setCallable(IsCallable); + } + + static Symbol &constructCommon(void *SymStorage, Block &Base, StringRef Name, + JITTargetAddress Size, Scope S, bool IsLive) { + assert(SymStorage && "Storage cannot be null"); + assert(!Name.empty() && "Common symbol name cannot be empty"); + assert(Base.isDefined() && + "Cannot create common symbol from undefined block"); + assert(static_cast(Base).getSize() == Size && + "Common symbol size should match underlying block size"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, 0, Name, Size, Linkage::Weak, S, IsLive, false); + return *Sym; + } + + static Symbol &constructExternal(void *SymStorage, Addressable &Base, + StringRef Name, JITTargetAddress Size) { + assert(SymStorage && "Storage cannot be null"); + assert(!Base.isDefined() && + "Cannot create external symbol from defined block"); + assert(!Name.empty() && "External symbol name cannot be empty"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, 0, Name, Size, Linkage::Strong, Scope::Default, + false, false); + return *Sym; + } + + static Symbol &constructAbsolute(void *SymStorage, Addressable &Base, + StringRef Name, JITTargetAddress Size, + Linkage L, Scope S, bool IsLive) { + assert(SymStorage && "Storage cannot be null"); + assert(!Base.isDefined() && + "Cannot create absolute symbol from a defined block"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, 0, Name, Size, L, S, IsLive, false); + return *Sym; + } + + static Symbol &constructAnonDef(void *SymStorage, Block &Base, + JITTargetAddress Offset, + JITTargetAddress Size, bool IsCallable, + bool IsLive) { + assert(SymStorage && "Storage cannot be null"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, Offset, StringRef(), Size, Linkage::Strong, + Scope::Local, IsLive, IsCallable); + return *Sym; + } + + static Symbol &constructNamedDef(void *SymStorage, Block &Base, + JITTargetAddress Offset, StringRef Name, + JITTargetAddress Size, Linkage L, Scope S, + bool IsLive, bool IsCallable) { + assert(SymStorage && "Storage cannot be null"); + assert(!Name.empty() && "Name cannot be empty"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, Offset, Name, Size, L, S, IsLive, IsCallable); + return *Sym; + } -/// Represents a section address range via a pair of DefinedAtom pointers to -/// the first and last atoms in the section. -class SectionRange { public: - SectionRange() = default; - SectionRange(DefinedAtom *First, DefinedAtom *Last) - : First(First), Last(Last) {} - DefinedAtom *getFirstAtom() const { - assert((!Last || First) && "First can not be null if end is non-null"); - return First; + /// Create a null Symbol. This allows Symbols to be default initialized for + /// use in containers (e.g. as map values). Null symbols are only useful for + /// assigning to. + Symbol() = default; + + // Symbols are not movable or copyable. + Symbol(const Symbol &) = delete; + Symbol &operator=(const Symbol &) = delete; + Symbol(Symbol &&) = delete; + Symbol &operator=(Symbol &&) = delete; + + /// Returns true if this symbol has a name. + bool hasName() const { return !Name.empty(); } + + /// Returns the name of this symbol (empty if the symbol is anonymous). + StringRef getName() const { + assert((!Name.empty() || getScope() == Scope::Local) && + "Anonymous symbol has non-local scope"); + return Name; } - DefinedAtom *getLastAtom() const { - assert((First || !Last) && "Last can not be null if start is non-null"); - return Last; + + /// Returns true if this Symbol has content (potentially) defined within this + /// object file (i.e. is anything but an external or absolute symbol). + bool isDefined() const { + assert(Base && "Attempt to access null symbol"); + return Base->isDefined(); } - bool isEmpty() const { - assert((First || !Last) && "Last can not be null if start is non-null"); - return !First; + + /// Returns true if this symbol is live (i.e. should be treated as a root for + /// dead stripping). + bool isLive() const { + assert(Base && "Attempting to access null symbol"); + return IsLive; } - JITTargetAddress getStart() const; - JITTargetAddress getEnd() const; - uint64_t getSize() const; -private: - DefinedAtom *First = nullptr; - DefinedAtom *Last = nullptr; -}; + /// Set this symbol's live bit. + void setLive(bool IsLive) { this->IsLive = IsLive; } -/// Represents an object file section. -class Section { - friend class AtomGraph; + /// Returns true is this symbol is callable. + bool isCallable() const { return IsCallable; } -private: - Section(StringRef Name, uint32_t Alignment, sys::Memory::ProtectionFlags Prot, - unsigned Ordinal, bool IsZeroFill) - : Name(Name), Alignment(Alignment), Prot(Prot), Ordinal(Ordinal), - IsZeroFill(IsZeroFill) { - assert(isPowerOf2_32(Alignment) && "Alignments must be a power of 2"); + /// Set this symbol's callable bit. + void setCallable(bool IsCallable) { this->IsCallable = IsCallable; } + + /// Returns true if the underlying addressable is an unresolved external. + bool isExternal() const { + assert(Base && "Attempt to access null symbol"); + return !Base->isDefined() && !Base->isAbsolute(); } - using DefinedAtomSet = DenseSet; + /// Returns true if the underlying addressable is an absolute symbol. + bool isAbsolute() const { + assert(Base && "Attempt to access null symbol"); + return !Base->isDefined() && Base->isAbsolute(); + } -public: - using atom_iterator = DefinedAtomSet::iterator; - using const_atom_iterator = DefinedAtomSet::const_iterator; + /// Return the addressable that this symbol points to. + Addressable &getAddressable() { + assert(Base && "Cannot get underlying addressable for null symbol"); + return *Base; + } - ~Section(); - StringRef getName() const { return Name; } - uint32_t getAlignment() const { return Alignment; } - sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; } - unsigned getSectionOrdinal() const { return Ordinal; } - size_t getNextAtomOrdinal() { return ++NextAtomOrdinal; } + /// Return the addressable that thsi symbol points to. + const Addressable &getAddressable() const { + assert(Base && "Cannot get underlying addressable for null symbol"); + return *Base; + } - bool isZeroFill() const { return IsZeroFill; } + /// Return the Block for this Symbol (Symbol must be defined). + Block &getBlock() { + assert(Base && "Cannot get block for null symbol"); + assert(Base->isDefined() && "Not a defined symbol"); + return static_cast(*Base); + } - /// Returns an iterator over the atoms in the section (in no particular - /// order). - iterator_range atoms() { - return make_range(DefinedAtoms.begin(), DefinedAtoms.end()); + /// Return the Block for this Symbol (Symbol must be defined). + const Block &getBlock() const { + assert(Base && "Cannot get block for null symbol"); + assert(Base->isDefined() && "Not a defined symbol"); + return static_cast(*Base); } - /// Returns an iterator over the atoms in the section (in no particular - /// order). - iterator_range atoms() const { - return make_range(DefinedAtoms.begin(), DefinedAtoms.end()); + /// Returns the offset for this symbol within the underlying addressable. + JITTargetAddress getOffset() const { return Offset; } + + /// Returns the address of this symbol. + JITTargetAddress getAddress() const { return Base->getAddress() + Offset; } + + /// Returns the size of this symbol. + JITTargetAddress getSize() const { return Size; } + + /// Returns true if this symbol is backed by a zero-fill block. + /// This method may only be called on defined symbols. + bool isSymbolZeroFill() const { return getBlock().isZeroFill(); } + + /// Returns the content in the underlying block covered by this symbol. + /// This method may only be called on defined non-zero-fill symbols. + StringRef getSymbolContent() const { + return getBlock().getContent().substr(Offset, Size); } - /// Return the number of atoms in this section. - DefinedAtomSet::size_type atoms_size() { return DefinedAtoms.size(); } + /// Get the linkage for this Symbol. + Linkage getLinkage() const { return static_cast(L); } - /// Return true if this section contains no atoms. - bool atoms_empty() const { return DefinedAtoms.empty(); } + /// Set the linkage for this Symbol. + void setLinkage(Linkage L) { + assert((L == Linkage::Strong || (Base->isDefined() && !Name.empty())) && + "Linkage can only be applied to defined named symbols"); + this->L = static_cast(L); + } - /// Returns the range of this section as the pair of atoms with the lowest - /// and highest target address. This operation is expensive, as it - /// must traverse all atoms in the section. - /// - /// Note: If the section is empty, both values will be null. The section - /// address will evaluate to null, and the size to zero. If the section - /// contains a single atom both values will point to it, the address will - /// evaluate to the address of that atom, and the size will be the size of - /// that atom. - SectionRange getRange() const; + /// Get the visibility for this Symbol. + Scope getScope() const { return static_cast(S); } -private: - void addAtom(DefinedAtom &DA) { - assert(!DefinedAtoms.count(&DA) && "Atom is already in this section"); - DefinedAtoms.insert(&DA); + /// Set the visibility for this Symbol. + void setScope(Scope S) { + assert((S == Scope::Default || Base->isDefined() || Base->isAbsolute()) && + "Invalid visibility for symbol type"); + this->S = static_cast(S); } - void removeAtom(DefinedAtom &DA) { - assert(DefinedAtoms.count(&DA) && "Atom is not in this section"); - DefinedAtoms.erase(&DA); +private: + void makeExternal(Addressable &A) { + assert(!A.isDefined() && "Attempting to make external with defined block"); + Base = &A; + Offset = 0; + setLinkage(Linkage::Strong); + setScope(Scope::Default); + IsLive = 0; + // note: Size and IsCallable fields left unchanged. } + static constexpr uint64_t MaxOffset = (1ULL << 59) - 1; + + // FIXME: A char* or SymbolStringPtr may pack better. StringRef Name; - uint32_t Alignment = 0; - sys::Memory::ProtectionFlags Prot; - unsigned Ordinal = 0; - unsigned NextAtomOrdinal = 0; - bool IsZeroFill = false; - DefinedAtomSet DefinedAtoms; + Addressable *Base = nullptr; + uint64_t Offset : 59; + uint64_t L : 1; + uint64_t S : 2; + uint64_t IsLive : 1; + uint64_t IsCallable : 1; + JITTargetAddress Size = 0; }; -/// Defined atom class. Suitable for use by defined named and anonymous -/// atoms. -class DefinedAtom : public Atom { - friend class AtomGraph; +raw_ostream &operator<<(raw_ostream &OS, const Symbol &A); + +void printEdge(raw_ostream &OS, const Block &B, const Edge &E, + StringRef EdgeKindName); + +/// Represents an object file section. +class Section { + friend class LinkGraph; private: - DefinedAtom(Section &Parent, JITTargetAddress Address, uint32_t Alignment) - : Atom("", Address), Parent(Parent), Ordinal(Parent.getNextAtomOrdinal()), - Alignment(Alignment) { - assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two"); - } + Section(StringRef Name, sys::Memory::ProtectionFlags Prot, + SectionOrdinal SecOrdinal) + : Name(Name), Prot(Prot), SecOrdinal(SecOrdinal) {} - DefinedAtom(Section &Parent, StringRef Name, JITTargetAddress Address, - uint32_t Alignment) - : Atom(Name, Address), Parent(Parent), - Ordinal(Parent.getNextAtomOrdinal()), Alignment(Alignment) { - assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two"); - } + using SymbolSet = DenseSet; + using BlockSet = DenseSet; public: - using edge_iterator = EdgeVector::iterator; + using symbol_iterator = SymbolSet::iterator; + using const_symbol_iterator = SymbolSet::const_iterator; - Section &getSection() const { return Parent; } + using block_iterator = BlockSet::iterator; + using const_block_iterator = BlockSet::const_iterator; - uint64_t getSize() const { return Size; } + ~Section(); - StringRef getContent() const { - assert(!Parent.isZeroFill() && "Trying to get content for zero-fill atom"); - assert(Size <= std::numeric_limits::max() && - "Content size too large"); - return {ContentPtr, static_cast(Size)}; - } - void setContent(StringRef Content) { - assert(!Parent.isZeroFill() && "Calling setContent on zero-fill atom?"); - ContentPtr = Content.data(); - Size = Content.size(); - } + /// Returns the name of this section. + StringRef getName() const { return Name; } + + /// Returns the protection flags for this section. + sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; } - bool isZeroFill() const { return Parent.isZeroFill(); } + /// Returns the ordinal for this section. + SectionOrdinal getOrdinal() const { return SecOrdinal; } - void setZeroFill(uint64_t Size) { - assert(Parent.isZeroFill() && !ContentPtr && - "Can't set zero-fill length of a non zero-fill atom"); - this->Size = Size; + /// Returns an iterator over the symbols defined in this section. + iterator_range symbols() { + return make_range(Symbols.begin(), Symbols.end()); } - uint64_t getZeroFillSize() const { - assert(Parent.isZeroFill() && - "Can't get zero-fill length of a non zero-fill atom"); - return Size; + /// Returns an iterator over the symbols defined in this section. + iterator_range symbols() const { + return make_range(Symbols.begin(), Symbols.end()); } - uint32_t getAlignment() const { return Alignment; } + /// Return the number of symbols in this section. + SymbolSet::size_type symbols_size() { return Symbols.size(); } - bool hasLayoutNext() const { return HasLayoutNext; } - void setLayoutNext(DefinedAtom &Next) { - assert(!HasLayoutNext && "Atom already has layout-next constraint"); - HasLayoutNext = true; - Edges.push_back(Edge(Edge::LayoutNext, 0, Next, 0)); - } - DefinedAtom &getLayoutNext() { - assert(HasLayoutNext && "Atom does not have a layout-next constraint"); - DefinedAtom *Next = nullptr; - for (auto &E : edges()) - if (E.getKind() == Edge::LayoutNext) { - assert(E.getTarget().isDefined() && - "layout-next target atom must be a defined atom"); - Next = static_cast(&E.getTarget()); - break; - } - assert(Next && "Missing LayoutNext edge"); - return *Next; - } + /// Return true if this section contains no symbols. + bool symbols_empty() const { return Symbols.empty(); } - bool isCommon() const { return IsCommon; } + /// Returns the ordinal for the next block. + BlockOrdinal getNextBlockOrdinal() { return NextBlockOrdinal++; } - void addEdge(Edge::Kind K, Edge::OffsetT Offset, Atom &Target, - Edge::AddendT Addend) { - assert(K != Edge::LayoutNext && - "Layout edges should be added via setLayoutNext"); - Edges.push_back(Edge(K, Offset, Target, Addend)); +private: + void addSymbol(Symbol &Sym) { + assert(!Symbols.count(&Sym) && "Symbol is already in this section"); + Symbols.insert(&Sym); } - iterator_range edges() { - return make_range(Edges.begin(), Edges.end()); + void removeSymbol(Symbol &Sym) { + assert(Symbols.count(&Sym) && "symbol is not in this section"); + Symbols.erase(&Sym); } - size_t edges_size() const { return Edges.size(); } - bool edges_empty() const { return Edges.empty(); } - unsigned getOrdinal() const { return Ordinal; } + StringRef Name; + sys::Memory::ProtectionFlags Prot; + SectionOrdinal SecOrdinal = 0; + BlockOrdinal NextBlockOrdinal = 0; + SymbolSet Symbols; +}; -private: - void setCommon(uint64_t Size) { - assert(ContentPtr == 0 && "Atom already has content?"); - IsCommon = true; - setZeroFill(Size); +/// Represents a section address range via a pair of Block pointers +/// to the first and last Blocks in the section. +class SectionRange { +public: + SectionRange() = default; + SectionRange(const Section &Sec) { + if (Sec.symbols_empty()) + return; + First = Last = *Sec.symbols().begin(); + for (auto *Sym : Sec.symbols()) { + if (Sym->getAddress() < First->getAddress()) + First = Sym; + if (Sym->getAddress() > Last->getAddress()) + Last = Sym; + } + } + Symbol *getFirstSymbol() const { + assert((!Last || First) && "First can not be null if end is non-null"); + return First; + } + Symbol *getLastSymbol() const { + assert((First || !Last) && "Last can not be null if start is non-null"); + return Last; + } + bool isEmpty() const { + assert((First || !Last) && "Last can not be null if start is non-null"); + return !First; + } + JITTargetAddress getStart() const { + return First ? First->getBlock().getAddress() : 0; + } + JITTargetAddress getEnd() const { + return Last ? Last->getBlock().getAddress() + Last->getBlock().getSize() + : 0; } + uint64_t getSize() const { return getEnd() - getStart(); } - EdgeVector Edges; - uint64_t Size = 0; - Section &Parent; - const char *ContentPtr = nullptr; - unsigned Ordinal = 0; - uint32_t Alignment = 0; +private: + Symbol *First = nullptr; + Symbol *Last = nullptr; }; -inline JITTargetAddress SectionRange::getStart() const { - return First ? First->getAddress() : 0; -} +class LinkGraph { +private: + using SectionList = std::vector>; + using ExternalSymbolSet = DenseSet; + using BlockSet = DenseSet; + + template + Addressable &createAddressable(ArgTs &&... Args) { + Addressable *A = + reinterpret_cast(Allocator.Allocate()); + new (A) Addressable(std::forward(Args)...); + return *A; + } -inline JITTargetAddress SectionRange::getEnd() const { - return Last ? Last->getAddress() + Last->getSize() : 0; -} + void destroyAddressable(Addressable &A) { + A.~Addressable(); + Allocator.Deallocate(&A); + } -inline uint64_t SectionRange::getSize() const { return getEnd() - getStart(); } + template Block &createBlock(ArgTs &&... Args) { + Block *B = reinterpret_cast(Allocator.Allocate()); + new (B) Block(std::forward(Args)...); + Blocks.insert(B); + return *B; + } -inline SectionRange Section::getRange() const { - if (atoms_empty()) - return SectionRange(); - DefinedAtom *First = *DefinedAtoms.begin(), *Last = *DefinedAtoms.begin(); - for (auto *DA : atoms()) { - if (DA->getAddress() < First->getAddress()) - First = DA; - if (DA->getAddress() > Last->getAddress()) - Last = DA; + void destroyBlock(Block &B) { + Blocks.erase(&B); + B.~Block(); + Allocator.Deallocate(&B); } - return SectionRange(First, Last); -} -class AtomGraph { -private: - using SectionList = std::vector>; - using AddressToAtomMap = std::map; - using NamedAtomMap = DenseMap; - using ExternalAtomSet = DenseSet; + void destroySymbol(Symbol &S) { + S.~Symbol(); + Allocator.Deallocate(&S); + } public: - using external_atom_iterator = ExternalAtomSet::iterator; + using external_symbol_iterator = ExternalSymbolSet::iterator; + + using block_iterator = BlockSet::iterator; using section_iterator = pointee_iterator; using const_section_iterator = pointee_iterator; - template - class defined_atom_iterator_impl + template + class defined_symbol_iterator_impl : public iterator_facade_base< - defined_atom_iterator_impl, + defined_symbol_iterator_impl, std::forward_iterator_tag, T> { public: - defined_atom_iterator_impl() = default; + defined_symbol_iterator_impl() = default; - defined_atom_iterator_impl(SecItrT SI, SecItrT SE) - : SI(SI), SE(SE), - AI(SI != SE ? SI->atoms().begin() : Section::atom_iterator()) { - moveToNextAtomOrEnd(); + defined_symbol_iterator_impl(SectionItrT SecI, SectionItrT SecE) + : SecI(SecI), SecE(SecE), + SymI(SecI != SecE ? SecI->symbols().begin() : SymbolItrT()) { + moveToNextSymbolOrEnd(); } - bool operator==(const defined_atom_iterator_impl &RHS) const { - return (SI == RHS.SI) && (AI == RHS.AI); + bool operator==(const defined_symbol_iterator_impl &RHS) const { + return (SecI == RHS.SecI) && (SymI == RHS.SymI); } T operator*() const { - assert(AI != SI->atoms().end() && "Dereferencing end?"); - return *AI; + assert(SymI != SecI->symbols().end() && "Dereferencing end?"); + return *SymI; } - defined_atom_iterator_impl operator++() { - ++AI; - moveToNextAtomOrEnd(); + defined_symbol_iterator_impl operator++() { + ++SymI; + moveToNextSymbolOrEnd(); return *this; } private: - void moveToNextAtomOrEnd() { - while (SI != SE && AI == SI->atoms().end()) { - ++SI; - if (SI == SE) - AI = Section::atom_iterator(); - else - AI = SI->atoms().begin(); + void moveToNextSymbolOrEnd() { + while (SecI != SecE && SymI == SecI->symbols().end()) { + ++SecI; + SymI = SecI == SecE ? SymbolItrT() : SecI->symbols().begin(); } } - SecItrT SI, SE; - AtomItrT AI; + SectionItrT SecI, SecE; + SymbolItrT SymI; }; - using defined_atom_iterator = - defined_atom_iterator_impl; + using defined_symbol_iterator = + defined_symbol_iterator_impl; - using const_defined_atom_iterator = - defined_atom_iterator_impl; + using const_defined_symbol_iterator = defined_symbol_iterator_impl< + const_section_iterator, Section::const_symbol_iterator, const Symbol *>; - AtomGraph(std::string Name, unsigned PointerSize, + LinkGraph(std::string Name, unsigned PointerSize, support::endianness Endianness) : Name(std::move(Name)), PointerSize(PointerSize), Endianness(Endianness) {} + ~LinkGraph(); + /// Returns the name of this graph (usually the name of the original /// underlying MemoryBuffer). const std::string &getName() { return Name; } @@ -544,84 +718,83 @@ public: /// Returns the pointer size for use in this graph. unsigned getPointerSize() const { return PointerSize; } - /// Returns the endianness of atom-content in this graph. + /// Returns the endianness of content in this graph. support::endianness getEndianness() const { return Endianness; } /// Create a section with the given name, protection flags, and alignment. - Section &createSection(StringRef Name, uint32_t Alignment, - sys::Memory::ProtectionFlags Prot, bool IsZeroFill) { - std::unique_ptr
Sec( - new Section(Name, Alignment, Prot, Sections.size(), IsZeroFill)); + Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) { + std::unique_ptr
Sec(new Section(Name, Prot, Sections.size())); Sections.push_back(std::move(Sec)); return *Sections.back(); } - /// Add an external atom representing an undefined symbol in this graph. - Atom &addExternalAtom(StringRef Name) { - assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted"); - Atom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(Atom), alignof(Atom))); - new (A) Atom(Name); - ExternalAtoms.insert(A); - NamedAtoms[Name] = A; - return *A; + /// Create a content block. + Block &createContentBlock(Section &Parent, StringRef Content, + uint64_t Address, uint64_t Alignment, + uint64_t AlignmentOffset) { + return createBlock(Parent, Parent.getNextBlockOrdinal(), Content, Address, + Alignment, AlignmentOffset); } - /// Add an external atom representing an absolute symbol. - Atom &addAbsoluteAtom(StringRef Name, JITTargetAddress Addr) { - assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted"); - Atom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(Atom), alignof(Atom))); - new (A) Atom(Name, Addr); - AbsoluteAtoms.insert(A); - NamedAtoms[Name] = A; - return *A; + /// Create a zero-fill block. + Block &createZeroFillBlock(Section &Parent, uint64_t Size, uint64_t Address, + uint64_t Alignment, uint64_t AlignmentOffset) { + return createBlock(Parent, Parent.getNextBlockOrdinal(), Size, Address, + Alignment, AlignmentOffset); } - /// Add an anonymous defined atom to the graph. - /// - /// Anonymous atoms have content but no name. They must have an address. - DefinedAtom &addAnonymousAtom(Section &Parent, JITTargetAddress Address, - uint32_t Alignment) { - DefinedAtom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom))); - new (A) DefinedAtom(Parent, Address, Alignment); - Parent.addAtom(*A); - getAddrToAtomMap()[A->getAddress()] = A; - return *A; + /// Add an external symbol. + /// Some formats (e.g. ELF) allow Symbols to have sizes. For Symbols whose + /// size is not known, you should substitute '0'. + Symbol &addExternalSymbol(StringRef Name, uint64_t Size) { + auto &Sym = Symbol::constructExternal( + Allocator.Allocate(), createAddressable(0, false), Name, Size); + ExternalSymbols.insert(&Sym); + return Sym; } - /// Add a defined atom to the graph. - /// - /// Allocates and constructs a DefinedAtom instance with the given parent, - /// name, address, and alignment. - DefinedAtom &addDefinedAtom(Section &Parent, StringRef Name, - JITTargetAddress Address, uint32_t Alignment) { - assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted"); - DefinedAtom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom))); - new (A) DefinedAtom(Parent, Name, Address, Alignment); - Parent.addAtom(*A); - getAddrToAtomMap()[A->getAddress()] = A; - NamedAtoms[Name] = A; - return *A; + /// Add an absolute symbol. + Symbol &addAbsoluteSymbol(StringRef Name, JITTargetAddress Address, + uint64_t Size, Linkage L, Scope S, bool IsLive) { + auto &Sym = Symbol::constructAbsolute(Allocator.Allocate(), + createAddressable(Address), Name, + Size, L, S, IsLive); + AbsoluteSymbols.insert(&Sym); + return Sym; } - /// Add a common symbol atom to the graph. - /// - /// Adds a common-symbol atom to the graph with the given parent, name, - /// address, alignment and size. - DefinedAtom &addCommonAtom(Section &Parent, StringRef Name, - JITTargetAddress Address, uint32_t Alignment, - uint64_t Size) { - assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted"); - DefinedAtom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom))); - new (A) DefinedAtom(Parent, Name, Address, Alignment); - A->setCommon(Size); - Parent.addAtom(*A); - NamedAtoms[Name] = A; - return *A; + /// Convenience method for adding a weak zero-fill symbol. + Symbol &addCommonSymbol(StringRef Name, Scope S, Section &Section, + JITTargetAddress Address, uint64_t Size, + uint64_t Alignment, bool IsLive) { + auto &Sym = Symbol::constructCommon( + Allocator.Allocate(), + createBlock(Section, Section.getNextBlockOrdinal(), Address, Size, + Alignment, 0), + Name, Size, S, IsLive); + Section.addSymbol(Sym); + return Sym; + } + + /// Add an anonymous symbol. + Symbol &addAnonymousSymbol(Block &Content, JITTargetAddress Offset, + JITTargetAddress Size, bool IsCallable, + bool IsLive) { + auto &Sym = Symbol::constructAnonDef(Allocator.Allocate(), Content, + Offset, Size, IsCallable, IsLive); + Content.getSection().addSymbol(Sym); + return Sym; + } + + /// Add a named symbol. + Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset, + StringRef Name, JITTargetAddress Size, Linkage L, + Scope S, bool IsCallable, bool IsLive) { + auto &Sym = + Symbol::constructNamedDef(Allocator.Allocate(), Content, Offset, + Name, Size, L, S, IsLive, IsCallable); + Content.getSection().addSymbol(Sym); + return Sym; } iterator_range sections() { @@ -638,135 +811,79 @@ public: return nullptr; } - iterator_range external_atoms() { - return make_range(ExternalAtoms.begin(), ExternalAtoms.end()); + iterator_range external_symbols() { + return make_range(ExternalSymbols.begin(), ExternalSymbols.end()); } - iterator_range absolute_atoms() { - return make_range(AbsoluteAtoms.begin(), AbsoluteAtoms.end()); + iterator_range absolute_symbols() { + return make_range(AbsoluteSymbols.begin(), AbsoluteSymbols.end()); } - iterator_range defined_atoms() { - return make_range(defined_atom_iterator(Sections.begin(), Sections.end()), - defined_atom_iterator(Sections.end(), Sections.end())); + iterator_range defined_symbols() { + return make_range(defined_symbol_iterator(Sections.begin(), Sections.end()), + defined_symbol_iterator(Sections.end(), Sections.end())); } - iterator_range defined_atoms() const { + iterator_range defined_symbols() const { return make_range( - const_defined_atom_iterator(Sections.begin(), Sections.end()), - const_defined_atom_iterator(Sections.end(), Sections.end())); - } - - /// Returns the atom with the given name, which must exist in this graph. - Atom &getAtomByName(StringRef Name) { - auto I = NamedAtoms.find(Name); - assert(I != NamedAtoms.end() && "Name not in NamedAtoms map"); - return *I->second; - } - - /// Returns the atom with the given name, which must exist in this graph and - /// be a DefinedAtom. - DefinedAtom &getDefinedAtomByName(StringRef Name) { - auto &A = getAtomByName(Name); - assert(A.isDefined() && "Atom is not a defined atom"); - return static_cast(A); - } - - /// Search for the given atom by name. - /// Returns the atom (if found) or an error (if no atom with this name - /// exists). - Expected findAtomByName(StringRef Name) { - auto I = NamedAtoms.find(Name); - if (I == NamedAtoms.end()) - return make_error("No atom named " + Name); - return *I->second; - } - - /// Search for the given defined atom by name. - /// Returns the defined atom (if found) or an error (if no atom with this - /// name exists, or if one exists but is not a defined atom). - Expected findDefinedAtomByName(StringRef Name) { - auto I = NamedAtoms.find(Name); - if (I == NamedAtoms.end()) - return make_error("No atom named " + Name); - if (!I->second->isDefined()) - return make_error("Atom " + Name + - " exists but is not a " - "defined atom"); - return static_cast(*I->second); - } - - /// Returns the atom covering the given address, or an error if no such atom - /// exists. - /// - /// Returns null if no atom exists at the given address. - DefinedAtom *getAtomByAddress(JITTargetAddress Address) { - refreshAddrToAtomCache(); - - // If there are no defined atoms, bail out early. - if (AddrToAtomCache->empty()) - return nullptr; - - // Find the atom *after* the given address. - auto I = AddrToAtomCache->upper_bound(Address); - - // If this address falls before any known atom, bail out. - if (I == AddrToAtomCache->begin()) - return nullptr; - - // The atom we're looking for is the one before the atom we found. - --I; - - // Otherwise range check the atom that was found. - assert(!I->second->getContent().empty() && "Atom content not set"); - if (Address >= I->second->getAddress() + I->second->getContent().size()) - return nullptr; + const_defined_symbol_iterator(Sections.begin(), Sections.end()), + const_defined_symbol_iterator(Sections.end(), Sections.end())); + } - return I->second; + iterator_range blocks() { + return make_range(Blocks.begin(), Blocks.end()); } - /// Like getAtomByAddress, but returns an Error if the given address is not - /// covered by an atom, rather than a null pointer. - Expected findAtomByAddress(JITTargetAddress Address) { - if (auto *DA = getAtomByAddress(Address)) - return *DA; - return make_error("No atom at address " + - formatv("{0:x16}", Address)); + /// Turn a defined symbol into an external one. + void makeExternal(Symbol &Sym) { + if (Sym.getAddressable().isAbsolute()) { + assert(AbsoluteSymbols.count(&Sym) && + "Sym is not in the absolute symbols set"); + AbsoluteSymbols.erase(&Sym); + } else { + assert(Sym.isDefined() && "Sym is not a defined symbol"); + Section &Sec = Sym.getBlock().getSection(); + Sec.removeSymbol(Sym); + } + Sym.makeExternal(createAddressable(false)); + ExternalSymbols.insert(&Sym); } - // Remove the given external atom from the graph. - void removeExternalAtom(Atom &A) { - assert(!A.isDefined() && !A.isAbsolute() && "A is not an external atom"); - assert(ExternalAtoms.count(&A) && "A is not in the external atoms set"); - ExternalAtoms.erase(&A); - A.~Atom(); + /// Removes an external symbol. Also removes the underlying Addressable. + void removeExternalSymbol(Symbol &Sym) { + assert(!Sym.isDefined() && !Sym.isAbsolute() && + "Sym is not an external symbol"); + assert(ExternalSymbols.count(&Sym) && "Symbol is not in the externals set"); + ExternalSymbols.erase(&Sym); + Addressable &Base = *Sym.Base; + destroySymbol(Sym); + destroyAddressable(Base); } - /// Remove the given absolute atom from the graph. - void removeAbsoluteAtom(Atom &A) { - assert(A.isAbsolute() && "A is not an absolute atom"); - assert(AbsoluteAtoms.count(&A) && "A is not in the absolute atoms set"); - AbsoluteAtoms.erase(&A); - A.~Atom(); + /// Remove an absolute symbol. Also removes the underlying Addressable. + void removeAbsoluteSymbol(Symbol &Sym) { + assert(!Sym.isDefined() && Sym.isAbsolute() && + "Sym is not an absolute symbol"); + assert(AbsoluteSymbols.count(&Sym) && + "Symbol is not in the absolute symbols set"); + AbsoluteSymbols.erase(&Sym); + Addressable &Base = *Sym.Base; + destroySymbol(Sym); + destroyAddressable(Base); } - /// Remove the given defined atom from the graph. - void removeDefinedAtom(DefinedAtom &DA) { - if (AddrToAtomCache) { - assert(AddrToAtomCache->count(DA.getAddress()) && - "Cache exists, but does not contain atom"); - AddrToAtomCache->erase(DA.getAddress()); - } - if (DA.hasName()) { - assert(NamedAtoms.count(DA.getName()) && "Named atom not in map"); - NamedAtoms.erase(DA.getName()); - } - DA.getSection().removeAtom(DA); - DA.~DefinedAtom(); + /// Removes defined symbols. Does not remove the underlying block. + void removeDefinedSymbol(Symbol &Sym) { + assert(Sym.isDefined() && "Sym is not a defined symbol"); + Sym.getBlock().getSection().removeSymbol(Sym); + destroySymbol(Sym); } - /// Invalidate the atom-to-address map. - void invalidateAddrToAtomMap() { AddrToAtomCache = None; } + /// Remove a block. + void removeBlock(Block &B) { + Blocks.erase(&B); + destroyBlock(B); + } /// Dump the graph. /// @@ -778,87 +895,84 @@ public: std::function()); private: - AddressToAtomMap &getAddrToAtomMap() { - refreshAddrToAtomCache(); - return *AddrToAtomCache; - } - - const AddressToAtomMap &getAddrToAtomMap() const { - refreshAddrToAtomCache(); - return *AddrToAtomCache; - } - - void refreshAddrToAtomCache() const { - if (!AddrToAtomCache) { - AddrToAtomCache = AddressToAtomMap(); - for (auto *DA : defined_atoms()) - (*AddrToAtomCache)[DA->getAddress()] = const_cast(DA); - } - } - - // Put the BumpPtrAllocator first so that we don't free any of the atoms in - // it until all of their destructors have been run. - BumpPtrAllocator AtomAllocator; + // Put the BumpPtrAllocator first so that we don't free any of the underlying + // memory until the Symbol/Addressable destructors have been run. + BumpPtrAllocator Allocator; std::string Name; unsigned PointerSize; support::endianness Endianness; + BlockSet Blocks; SectionList Sections; - NamedAtomMap NamedAtoms; - ExternalAtomSet ExternalAtoms; - ExternalAtomSet AbsoluteAtoms; - mutable Optional AddrToAtomCache; + ExternalSymbolSet ExternalSymbols; + ExternalSymbolSet AbsoluteSymbols; }; -/// A function for mutating AtomGraphs. -using AtomGraphPassFunction = std::function; +/// A function for mutating LinkGraphs. +using LinkGraphPassFunction = std::function; -/// A list of atom graph passes. -using AtomGraphPassList = std::vector; +/// A list of LinkGraph passes. +using LinkGraphPassList = std::vector; -/// An atom graph pass configuration, consisting of a list of pre-prune, +/// An LinkGraph pass configuration, consisting of a list of pre-prune, /// post-prune, and post-fixup passes. struct PassConfiguration { /// Pre-prune passes. /// /// These passes are called on the graph after it is built, and before any - /// atoms have been pruned. + /// symbols have been pruned. /// - /// Notable use cases: Marking atoms live or should-discard. - AtomGraphPassList PrePrunePasses; + /// Notable use cases: Marking symbols live or should-discard. + LinkGraphPassList PrePrunePasses; /// Post-prune passes. /// - /// These passes are called on the graph after dead and should-discard atoms - /// have been removed, but before fixups are applied. + /// These passes are called on the graph after dead stripping, but before + /// fixups are applied. /// - /// Notable use cases: Building GOT, stub, and TLV atoms. - AtomGraphPassList PostPrunePasses; + /// Notable use cases: Building GOT, stub, and TLV symbols. + LinkGraphPassList PostPrunePasses; /// Post-fixup passes. /// - /// These passes are called on the graph after atom contents has been copied + /// These passes are called on the graph after block contents has been copied /// to working memory, and fixups applied. /// /// Notable use cases: Testing and validation. - AtomGraphPassList PostFixupPasses; + LinkGraphPassList PostFixupPasses; }; /// A map of symbol names to resolved addresses. using AsyncLookupResult = DenseMap; -/// A function to call with a resolved symbol map (See AsyncLookupResult) or an -/// error if resolution failed. -using JITLinkAsyncLookupContinuation = - std::function LR)>; +/// A function object to call with a resolved symbol map (See AsyncLookupResult) +/// or an error if resolution failed. +class JITLinkAsyncLookupContinuation { +public: + virtual ~JITLinkAsyncLookupContinuation() {} + virtual void run(Expected LR) = 0; + +private: + virtual void anchor(); +}; + +/// Create a lookup continuation from a function object. +template +std::unique_ptr +createLookupContinuation(Continuation Cont) { -/// An asynchronous symbol lookup. Performs a search (possibly asynchronously) -/// for the given symbols, calling the given continuation with either the result -/// (if the lookup succeeds), or an error (if the lookup fails). -using JITLinkAsyncLookupFunction = - std::function &Symbols, - JITLinkAsyncLookupContinuation LookupContinuation)>; + class Impl final : public JITLinkAsyncLookupContinuation { + public: + Impl(Continuation C) : C(std::move(C)) {} + void run(Expected LR) override { C(std::move(LR)); } + + private: + Continuation C; + }; + + return std::make_unique(std::move(Cont)); +} /// Holds context for a single jitLink invocation. class JITLinkContext { @@ -881,13 +995,13 @@ public: /// lookup continutation which it must call with a result to continue the /// linking process. virtual void lookup(const DenseSet &Symbols, - JITLinkAsyncLookupContinuation LookupContinuation) = 0; + std::unique_ptr LC) = 0; - /// Called by JITLink once all defined atoms in the graph have been assigned - /// their final memory locations in the target process. At this point he - /// atom graph can be, inspected to build a symbol table however the atom + /// Called by JITLink once all defined symbols in the graph have been assigned + /// their final memory locations in the target process. At this point the + /// LinkGraph can be inspected to build a symbol table, however the block /// content will not generally have been copied to the target location yet. - virtual void notifyResolved(AtomGraph &G) = 0; + virtual void notifyResolved(LinkGraph &G) = 0; /// Called by JITLink to notify the context that the object has been /// finalized (i.e. emitted to memory and memory permissions set). If all of @@ -904,20 +1018,20 @@ public: /// Returns the mark-live pass to be used for this link. If no pass is /// returned (the default) then the target-specific linker implementation will - /// choose a conservative default (usually marking all atoms live). + /// choose a conservative default (usually marking all symbols live). /// This function is only called if shouldAddDefaultTargetPasses returns true, /// otherwise the JITContext is responsible for adding a mark-live pass in /// modifyPassConfig. - virtual AtomGraphPassFunction getMarkLivePass(const Triple &TT) const; + virtual LinkGraphPassFunction getMarkLivePass(const Triple &TT) const; /// Called by JITLink to modify the pass pipeline prior to linking. /// The default version performs no modification. virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config); }; -/// Marks all atoms in a graph live. This can be used as a default, conservative -/// mark-live implementation. -Error markAllAtomsLive(AtomGraph &G); +/// Marks all symbols in a graph live. This can be used as a default, +/// conservative mark-live implementation. +Error markAllSymbolsLive(LinkGraph &G); /// Basic JITLink implementation. /// diff --git a/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 9d0b37fe4a4d..ac5a593bb77b 100644 --- a/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -33,20 +33,19 @@ public: class SegmentRequest { public: SegmentRequest() = default; - SegmentRequest(size_t ContentSize, unsigned ContentAlign, - uint64_t ZeroFillSize, unsigned ZeroFillAlign) - : ContentSize(ContentSize), ZeroFillSize(ZeroFillSize), - ContentAlign(ContentAlign), ZeroFillAlign(ZeroFillAlign) {} + SegmentRequest(uint64_t Alignment, size_t ContentSize, + uint64_t ZeroFillSize) + : Alignment(Alignment), ContentSize(ContentSize), + ZeroFillSize(ZeroFillSize) { + assert(isPowerOf2_32(Alignment) && "Alignment must be power of 2"); + } + uint64_t getAlignment() const { return Alignment; } size_t getContentSize() const { return ContentSize; } - unsigned getContentAlignment() const { return ContentAlign; } uint64_t getZeroFillSize() const { return ZeroFillSize; } - unsigned getZeroFillAlignment() const { return ZeroFillAlign; } - private: + uint64_t Alignment = 0; size_t ContentSize = 0; uint64_t ZeroFillSize = 0; - unsigned ContentAlign = 0; - unsigned ZeroFillAlign = 0; }; using SegmentsRequestMap = DenseMap; diff --git a/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h new file mode 100644 index 000000000000..d70b545fff86 --- /dev/null +++ b/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h @@ -0,0 +1,60 @@ +//===---- MachO_arm64.h - JIT link functions for MachO/arm64 ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for MachO/arm64. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H +#define LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +namespace MachO_arm64_Edges { + +enum MachOARM64RelocationKind : Edge::Kind { + Branch26 = Edge::FirstRelocation, + Pointer32, + Pointer64, + Pointer64Anon, + Page21, + PageOffset12, + GOTPage21, + GOTPageOffset12, + PointerToGOT, + PairedAddend, + LDRLiteral19, + Delta32, + Delta64, + NegDelta32, + NegDelta64, +}; + +} // namespace MachO_arm64_Edges + +/// jit-link the given object buffer, which must be a MachO arm64 object file. +/// +/// If PrePrunePasses is empty then a default mark-live pass will be inserted +/// that will mark all exported atoms live. If PrePrunePasses is not empty, the +/// caller is responsible for including a pass to mark atoms as live. +/// +/// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will +/// be inserted. If PostPrunePasses is not empty then the caller is responsible +/// for including a pass to insert GOT and stub edges. +void jitLink_MachO_arm64(std::unique_ptr Ctx); + +/// Return the string name of the given MachO arm64 edge kind. +StringRef getMachOARM64RelocationKindName(Edge::Kind R); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H diff --git a/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h index 1d5b586afc32..00a7feb86e83 100644 --- a/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h +++ b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h @@ -22,6 +22,7 @@ namespace MachO_x86_64_Edges { enum MachOX86RelocationKind : Edge::Kind { Branch32 = Edge::FirstRelocation, + Pointer32, Pointer64, Pointer64Anon, PCRel32, diff --git a/include/llvm/ExecutionEngine/JITSymbol.h b/include/llvm/ExecutionEngine/JITSymbol.h index b14154c5b5e8..c0f1ca4b9876 100644 --- a/include/llvm/ExecutionEngine/JITSymbol.h +++ b/include/llvm/ExecutionEngine/JITSymbol.h @@ -23,6 +23,7 @@ #include #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -217,7 +218,7 @@ private: /// Represents a symbol in the JIT. class JITSymbol { public: - using GetAddressFtor = std::function()>; + using GetAddressFtor = unique_function()>; /// Create a 'null' symbol, used to represent a "symbol not found" /// result from a successful (non-erroneous) lookup. @@ -325,7 +326,7 @@ class JITSymbolResolver { public: using LookupSet = std::set; using LookupResult = std::map; - using OnResolvedFunction = std::function)>; + using OnResolvedFunction = unique_function)>; virtual ~JITSymbolResolver() = default; diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 5f593a27cad6..7946b5b7b209 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -26,6 +26,7 @@ #include "llvm/ExecutionEngine/Orc/LazyReexports.h" #include "llvm/ExecutionEngine/Orc/Legacy.h" #include "llvm/ExecutionEngine/Orc/OrcError.h" +#include "llvm/ExecutionEngine/Orc/Speculation.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" @@ -91,6 +92,8 @@ public: /// Sets the partition function. void setPartitionFunction(PartitionFunction Partition); + /// Sets the ImplSymbolMap + void setImplMap(ImplSymbolMap *Imp); /// Emits the given module. This should not be called by clients: it will be /// called by the JIT when a definition added via the add method is requested. void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; @@ -128,6 +131,7 @@ private: PerDylibResourcesMap DylibResources; PartitionFunction Partition = compileRequested; SymbolLinkagePromoter PromoteSymbols; + ImplSymbolMap *AliaseeImpls = nullptr; }; /// Compile-on-demand layer. @@ -187,7 +191,7 @@ private: std::unique_ptr> wrapOwnership(ResourcePtrT ResourcePtr) { using RO = ResourceOwnerImpl; - return llvm::make_unique(std::move(ResourcePtr)); + return std::make_unique(std::move(ResourcePtr)); } struct LogicalDylib { @@ -440,7 +444,7 @@ private: return Error::success(); // Create the GlobalValues module. - auto GVsM = llvm::make_unique((SrcM.getName() + ".globals").str(), + auto GVsM = std::make_unique((SrcM.getName() + ".globals").str(), SrcM.getContext()); GVsM->setDataLayout(DL); @@ -633,7 +637,7 @@ private: NewName += F->getName(); } - auto M = llvm::make_unique(NewName, SrcM.getContext()); + auto M = std::make_unique(NewName, SrcM.getContext()); M->setDataLayout(SrcM.getDataLayout()); ValueToValueMapTy VMap; diff --git a/include/llvm/ExecutionEngine/Orc/Core.h b/include/llvm/ExecutionEngine/Orc/Core.h index 94a5618233e4..4f22a4c38796 100644 --- a/include/llvm/ExecutionEngine/Orc/Core.h +++ b/include/llvm/ExecutionEngine/Orc/Core.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_ORC_CORE_H #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" #include "llvm/ExecutionEngine/OrcV1Deprecation.h" @@ -51,8 +52,7 @@ using SymbolMap = DenseMap; /// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags. using SymbolFlagsMap = DenseMap; -/// A base class for materialization failures that allows the failing -/// symbols to be obtained for logging. +/// A map from JITDylibs to sets of symbols. using SymbolDependenceMap = DenseMap; /// A list of (JITDylib*, bool) pairs. @@ -108,7 +108,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases); raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S); /// Callback to notify client that symbols have been resolved. -using SymbolsResolvedCallback = std::function)>; +using SymbolsResolvedCallback = unique_function)>; /// Callback to register the dependencies for a given query. using RegisterDependenciesFunction = @@ -124,13 +124,13 @@ class FailedToMaterialize : public ErrorInfo { public: static char ID; - FailedToMaterialize(SymbolNameSet Symbols); + FailedToMaterialize(std::shared_ptr Symbols); std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; - const SymbolNameSet &getSymbols() const { return Symbols; } + const SymbolDependenceMap &getSymbols() const { return *Symbols; } private: - SymbolNameSet Symbols; + std::shared_ptr Symbols; }; /// Used to notify clients when symbols can not be found during a lookup. @@ -205,12 +205,26 @@ public: /// symbols must be ones covered by this MaterializationResponsibility /// instance. Individual calls to this method may resolve a subset of the /// symbols, but all symbols must have been resolved prior to calling emit. - void notifyResolved(const SymbolMap &Symbols); + /// + /// This method will return an error if any symbols being resolved have been + /// moved to the error state due to the failure of a dependency. If this + /// method returns an error then clients should log it and call + /// failMaterialize. If no dependencies have been registered for the + /// symbols covered by this MaterializationResponsibiility then this method + /// is guaranteed to return Error::success() and can be wrapped with cantFail. + Error notifyResolved(const SymbolMap &Symbols); /// Notifies the target JITDylib (and any pending queries on that JITDylib) /// that all symbols covered by this MaterializationResponsibility instance /// have been emitted. - void notifyEmitted(); + /// + /// This method will return an error if any symbols being resolved have been + /// moved to the error state due to the failure of a dependency. If this + /// method returns an error then clients should log it and call + /// failMaterialize. If no dependencies have been registered for the + /// symbols covered by this MaterializationResponsibiility then this method + /// is guaranteed to return Error::success() and can be wrapped with cantFail. + Error notifyEmitted(); /// Adds new symbols to the JITDylib and this responsibility instance. /// JITDylib entries start out in the materializing state. @@ -346,7 +360,7 @@ private: /// inline std::unique_ptr absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) { - return llvm::make_unique( + return std::make_unique( std::move(Symbols), std::move(K)); } @@ -390,7 +404,7 @@ private: /// \endcode inline std::unique_ptr symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) { - return llvm::make_unique( + return std::make_unique( nullptr, true, std::move(Aliases), std::move(K)); } @@ -402,7 +416,7 @@ symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) { inline std::unique_ptr reexports(JITDylib &SourceJD, SymbolAliasMap Aliases, bool MatchNonExported = false, VModuleKey K = VModuleKey()) { - return llvm::make_unique( + return std::make_unique( &SourceJD, MatchNonExported, std::move(Aliases), std::move(K)); } @@ -411,32 +425,13 @@ reexports(JITDylib &SourceJD, SymbolAliasMap Aliases, Expected buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols); -/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically -/// re-export a subset of the source JITDylib's symbols in the target. -class ReexportsGenerator { -public: - using SymbolPredicate = std::function; - - /// Create a reexports generator. If an Allow predicate is passed, only - /// symbols for which the predicate returns true will be reexported. If no - /// Allow predicate is passed, all symbols will be exported. - ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false, - SymbolPredicate Allow = SymbolPredicate()); - - Expected operator()(JITDylib &JD, const SymbolNameSet &Names); - -private: - JITDylib &SourceJD; - bool MatchNonExported = false; - SymbolPredicate Allow; -}; - /// Represents the state that a symbol has reached during materialization. enum class SymbolState : uint8_t { Invalid, /// No symbol should be in this state. NeverSearched, /// Added to the symbol table, never queried. Materializing, /// Queried, materialization begun. Resolved, /// Assigned address, still materializing. + Emitted, /// Emitted to memory, but waiting on transitive dependencies. Ready = 0x3f /// Ready and safe for clients to access. }; @@ -502,8 +497,12 @@ class JITDylib { friend class ExecutionSession; friend class MaterializationResponsibility; public: - using GeneratorFunction = std::function( - JITDylib &Parent, const SymbolNameSet &Names)>; + class DefinitionGenerator { + public: + virtual ~DefinitionGenerator(); + virtual Expected + tryToGenerate(JITDylib &Parent, const SymbolNameSet &Names) = 0; + }; using AsynchronousSymbolQuerySet = std::set>; @@ -519,13 +518,20 @@ public: /// Get a reference to the ExecutionSession for this JITDylib. ExecutionSession &getExecutionSession() const { return ES; } - /// Set a definition generator. If set, whenever a symbol fails to resolve - /// within this JITDylib, lookup and lookupFlags will pass the unresolved - /// symbols set to the definition generator. The generator can optionally - /// add a definition for the unresolved symbols to the dylib. - void setGenerator(GeneratorFunction DefGenerator) { - this->DefGenerator = std::move(DefGenerator); - } + /// Adds a definition generator to this JITDylib and returns a referenece to + /// it. + /// + /// When JITDylibs are searched during lookup, if no existing definition of + /// a symbol is found, then any generators that have been added are run (in + /// the order that they were added) to potentially generate a definition. + template + GeneratorT &addGenerator(std::unique_ptr DefGenerator); + + /// Remove a definition generator from this JITDylib. + /// + /// The given generator must exist in this JITDylib's generators list (i.e. + /// have been added and not yet removed). + void removeGenerator(DefinitionGenerator &G); /// Set the search order to be used when fixing up definitions in JITDylib. /// This will replace the previous search order, and apply to any symbol @@ -633,17 +639,17 @@ private: struct MaterializingInfo { SymbolDependenceMap Dependants; SymbolDependenceMap UnemittedDependencies; - bool IsEmitted = false; void addQuery(std::shared_ptr Q); void removeQuery(const AsynchronousSymbolQuery &Q); AsynchronousSymbolQueryList takeQueriesMeeting(SymbolState RequiredState); - AsynchronousSymbolQueryList takeAllQueries(); + AsynchronousSymbolQueryList takeAllPendingQueries() { + return std::move(PendingQueries); + } bool hasQueriesPending() const { return !PendingQueries.empty(); } const AsynchronousSymbolQueryList &pendingQueries() const { return PendingQueries; } - private: AsynchronousSymbolQueryList PendingQueries; }; @@ -710,9 +716,9 @@ private: SymbolNameSet &Unresolved, bool MatchNonExported, MaterializationUnitList &MUs); - void lodgeQueryImpl(std::shared_ptr &Q, - SymbolNameSet &Unresolved, bool MatchNonExported, - MaterializationUnitList &MUs); + Error lodgeQueryImpl(std::shared_ptr &Q, + SymbolNameSet &Unresolved, bool MatchNonExported, + MaterializationUnitList &MUs); bool lookupImpl(std::shared_ptr &Q, std::vector> &MUs, @@ -734,18 +740,20 @@ private: void addDependencies(const SymbolStringPtr &Name, const SymbolDependenceMap &Dependants); - void resolve(const SymbolMap &Resolved); + Error resolve(const SymbolMap &Resolved); - void emit(const SymbolFlagsMap &Emitted); + Error emit(const SymbolFlagsMap &Emitted); - void notifyFailed(const SymbolNameSet &FailedSymbols); + using FailedSymbolsWorklist = + std::vector>; + static void notifyFailed(FailedSymbolsWorklist FailedSymbols); ExecutionSession &ES; std::string JITDylibName; SymbolTable Symbols; UnmaterializedInfosMap UnmaterializedInfos; MaterializingInfosMap MaterializingInfos; - GeneratorFunction DefGenerator; + std::vector> DefGenerators; JITDylibSearchList SearchOrder; }; @@ -933,6 +941,14 @@ private: OutstandingMUs; }; +template +GeneratorT &JITDylib::addGenerator(std::unique_ptr DefGenerator) { + auto &G = *DefGenerator; + ES.runSessionLocked( + [&]() { DefGenerators.push_back(std::move(DefGenerator)); }); + return G; +} + template auto JITDylib::withSearchOrderDo(Func &&F) -> decltype(F(std::declval())) { @@ -972,6 +988,27 @@ Error JITDylib::define(std::unique_ptr &MU) { }); } +/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically +/// re-export a subset of the source JITDylib's symbols in the target. +class ReexportsGenerator : public JITDylib::DefinitionGenerator { +public: + using SymbolPredicate = std::function; + + /// Create a reexports generator. If an Allow predicate is passed, only + /// symbols for which the predicate returns true will be reexported. If no + /// Allow predicate is passed, all symbols will be exported. + ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false, + SymbolPredicate Allow = SymbolPredicate()); + + Expected tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) override; + +private: + JITDylib &SourceJD; + bool MatchNonExported = false; + SymbolPredicate Allow; +}; + /// Mangles symbol names then uniques them in the context of an /// ExecutionSession. class MangleAndInterner { diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index 75865920c741..cf0a428662ef 100644 --- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -19,6 +19,7 @@ #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/OrcError.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/Object/Archive.h" #include "llvm/Support/DynamicLibrary.h" #include #include @@ -37,6 +38,8 @@ class Value; namespace orc { +class ObjectLayer; + /// This iterator provides a convenient way to iterate over the elements /// of an llvm.global_ctors/llvm.global_dtors instance. /// @@ -237,7 +240,7 @@ public: /// If an instance of this class is attached to a JITDylib as a fallback /// definition generator, then any symbol found in the given DynamicLibrary that /// passes the 'Allow' predicate will be added to the JITDylib. -class DynamicLibrarySearchGenerator { +class DynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { public: using SymbolPredicate = std::function; @@ -253,19 +256,20 @@ public: /// Permanently loads the library at the given path and, on success, returns /// a DynamicLibrarySearchGenerator that will search it for symbol definitions /// in the library. On failure returns the reason the library failed to load. - static Expected + static Expected> Load(const char *FileName, char GlobalPrefix, SymbolPredicate Allow = SymbolPredicate()); /// Creates a DynamicLibrarySearchGenerator that searches for symbols in /// the current process. - static Expected + static Expected> GetForCurrentProcess(char GlobalPrefix, SymbolPredicate Allow = SymbolPredicate()) { return Load(nullptr, GlobalPrefix, std::move(Allow)); } - Expected operator()(JITDylib &JD, const SymbolNameSet &Names); + Expected tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) override; private: sys::DynamicLibrary Dylib; @@ -273,6 +277,40 @@ private: char GlobalPrefix; }; +/// A utility class to expose symbols from a static library. +/// +/// If an instance of this class is attached to a JITDylib as a fallback +/// definition generator, then any symbol found in the archive will result in +/// the containing object being added to the JITDylib. +class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator { +public: + /// Try to create a StaticLibraryDefinitionGenerator from the given path. + /// + /// This call will succeed if the file at the given path is a static library + /// is a valid archive, otherwise it will return an error. + static Expected> + Load(ObjectLayer &L, const char *FileName); + + /// Try to create a StaticLibrarySearchGenerator from the given memory buffer. + /// Thhis call will succeed if the buffer contains a valid archive, otherwise + /// it will return an error. + static Expected> + Create(ObjectLayer &L, std::unique_ptr ArchiveBuffer); + + Expected tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) override; + +private: + StaticLibraryDefinitionGenerator(ObjectLayer &L, + std::unique_ptr ArchiveBuffer, + Error &Err); + + ObjectLayer &L; + std::unique_ptr ArchiveBuffer; + object::Archive Archive; + size_t UnrealizedObjects = 0; +}; + } // end namespace orc } // end namespace llvm diff --git a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h index 1b4c8b6cd95f..b71e5b339711 100644 --- a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h +++ b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h @@ -22,6 +22,9 @@ namespace llvm { class Module; namespace orc { +/// A layer that applies a transform to emitted modules. +/// The transform function is responsible for locking the ThreadSafeContext +/// before operating on the module. class IRTransformLayer : public IRLayer { public: using TransformFunction = std::function( diff --git a/include/llvm/ExecutionEngine/Orc/LLJIT.h b/include/llvm/ExecutionEngine/Orc/LLJIT.h index 0aac1916423f..b1e47d77557c 100644 --- a/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -184,8 +184,8 @@ private: class LLJITBuilderState { public: - using ObjectLinkingLayerCreator = - std::function(ExecutionSession &)>; + using ObjectLinkingLayerCreator = std::function( + ExecutionSession &, const Triple &TT)>; using CompileFunctionCreator = std::function( diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h index 855e31b33549..b31914f12a0d 100644 --- a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h +++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/OrcV1Deprecation.h" #include namespace llvm { @@ -62,7 +63,7 @@ std::shared_ptr> createLambdaResolver(DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) { using LR = LambdaResolver; - return make_unique(std::move(DylibLookupFtor), + return std::make_unique(std::move(DylibLookupFtor), std::move(ExternalLookupFtor)); } @@ -72,7 +73,7 @@ createLambdaResolver(ORCv1DeprecationAcknowledgement, DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) { using LR = LambdaResolver; - return make_unique(AcknowledgeORCv1Deprecation, + return std::make_unique(AcknowledgeORCv1Deprecation, std::move(DylibLookupFtor), std::move(ExternalLookupFtor)); } diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h index 16202d89f861..b67a9feed523 100644 --- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h @@ -49,28 +49,24 @@ private: switch (EmitState) { case NotEmitted: if (auto GV = searchGVs(Name, ExportedSymbolsOnly)) { - // Create a std::string version of Name to capture here - the argument - // (a StringRef) may go away before the lambda is executed. - // FIXME: Use capture-init when we move to C++14. - std::string PName = Name; JITSymbolFlags Flags = JITSymbolFlags::fromGlobalValue(*GV); - auto GetAddress = - [this, ExportedSymbolsOnly, PName, &B]() -> Expected { - if (this->EmitState == Emitting) - return 0; - else if (this->EmitState == NotEmitted) { - this->EmitState = Emitting; - if (auto Err = this->emitToBaseLayer(B)) - return std::move(Err); - this->EmitState = Emitted; - } - if (auto Sym = B.findSymbolIn(K, PName, ExportedSymbolsOnly)) - return Sym.getAddress(); - else if (auto Err = Sym.takeError()) + auto GetAddress = [this, ExportedSymbolsOnly, Name = Name.str(), + &B]() -> Expected { + if (this->EmitState == Emitting) + return 0; + else if (this->EmitState == NotEmitted) { + this->EmitState = Emitting; + if (auto Err = this->emitToBaseLayer(B)) return std::move(Err); - else - llvm_unreachable("Successful symbol lookup should return " - "definition address here"); + this->EmitState = Emitted; + } + if (auto Sym = B.findSymbolIn(K, Name, ExportedSymbolsOnly)) + return Sym.getAddress(); + else if (auto Err = Sym.takeError()) + return std::move(Err); + else + llvm_unreachable("Successful symbol lookup should return " + "definition address here"); }; return JITSymbol(std::move(GetAddress), Flags); } else @@ -171,7 +167,7 @@ private: bool ExportedSymbolsOnly) const { assert(!MangledSymbols && "Mangled symbols map already exists?"); - auto Symbols = llvm::make_unique>(); + auto Symbols = std::make_unique>(); Mangler Mang; @@ -209,7 +205,7 @@ public: Error addModule(VModuleKey K, std::unique_ptr M) { assert(!ModuleMap.count(K) && "VModuleKey K already in use"); ModuleMap[K] = - llvm::make_unique(std::move(K), std::move(M)); + std::make_unique(std::move(K), std::move(M)); return Error::success(); } diff --git a/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/include/llvm/ExecutionEngine/Orc/LazyReexports.h index 9fdd1d15f782..311ed59b1549 100644 --- a/include/llvm/ExecutionEngine/Orc/LazyReexports.h +++ b/include/llvm/ExecutionEngine/Orc/LazyReexports.h @@ -18,6 +18,7 @@ #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/Speculation.h" namespace llvm { @@ -70,7 +71,7 @@ public: template static std::unique_ptr createNotifyResolvedFunction(NotifyResolvedImpl NotifyResolved) { - return llvm::make_unique>( + return std::make_unique>( std::move(NotifyResolved)); } @@ -159,7 +160,7 @@ public: IndirectStubsManager &ISManager, JITDylib &SourceJD, SymbolAliasMap CallableAliases, - VModuleKey K); + ImplSymbolMap *SrcJDLoc, VModuleKey K); StringRef getName() const override; @@ -174,6 +175,7 @@ private: SymbolAliasMap CallableAliases; std::shared_ptr NotifyResolved; + ImplSymbolMap *AliaseeTable; }; /// Define lazy-reexports based on the given SymbolAliasMap. Each lazy re-export @@ -182,9 +184,10 @@ private: inline std::unique_ptr lazyReexports(LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager, JITDylib &SourceJD, - SymbolAliasMap CallableAliases, VModuleKey K = VModuleKey()) { - return llvm::make_unique( - LCTManager, ISManager, SourceJD, std::move(CallableAliases), + SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc = nullptr, + VModuleKey K = VModuleKey()) { + return std::make_unique( + LCTManager, ISManager, SourceJD, std::move(CallableAliases), SrcJDLoc, std::move(K)); } diff --git a/include/llvm/ExecutionEngine/Orc/Legacy.h b/include/llvm/ExecutionEngine/Orc/Legacy.h index f9cbbf6ff180..148e260c9569 100644 --- a/include/llvm/ExecutionEngine/Orc/Legacy.h +++ b/include/llvm/ExecutionEngine/Orc/Legacy.h @@ -84,7 +84,7 @@ createSymbolResolver(GetResponsibilitySetFn &&GetResponsibilitySet, typename std::remove_reference::type>::type, typename std::remove_cv< typename std::remove_reference::type>::type>; - return llvm::make_unique( + return std::make_unique( std::forward(GetResponsibilitySet), std::forward(Lookup)); } diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index c1e7d27f446e..caf8e707516d 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -73,6 +73,9 @@ public: virtual Error notifyRemovingAllModules() { return Error::success(); } }; + using ReturnObjectBufferFunction = + std::function)>; + /// Construct an ObjectLinkingLayer with the given NotifyLoaded, /// and NotifyEmitted functors. ObjectLinkingLayer(ExecutionSession &ES, @@ -81,6 +84,13 @@ public: /// Destruct an ObjectLinkingLayer. ~ObjectLinkingLayer(); + /// Set an object buffer return function. By default object buffers are + /// deleted once the JIT has linked them. If a return function is set then + /// it will be called to transfer ownership of the buffer instead. + void setReturnObjectBuffer(ReturnObjectBufferFunction ReturnObjectBuffer) { + this->ReturnObjectBuffer = std::move(ReturnObjectBuffer); + } + /// Add a pass-config modifier. ObjectLinkingLayer &addPlugin(std::unique_ptr P) { std::lock_guard Lock(LayerMutex); @@ -138,6 +148,7 @@ private: jitlink::JITLinkMemoryManager &MemMgr; bool OverrideObjectFlags = false; bool AutoClaimObjectSymbols = false; + ReturnObjectBufferFunction ReturnObjectBuffer; DenseMap TrackedAllocs; std::vector UntrackedAllocs; std::vector> Plugins; @@ -153,10 +164,16 @@ public: Error notifyRemovingAllModules() override; private: + + struct EHFrameRange { + JITTargetAddress Addr = 0; + size_t Size; + }; + jitlink::EHFrameRegistrar &Registrar; - DenseMap InProcessLinks; - DenseMap TrackedEHFrameAddrs; - std::vector UntrackedEHFrameAddrs; + DenseMap InProcessLinks; + DenseMap TrackedEHFrameRanges; + std::vector UntrackedEHFrameRanges; }; } // end namespace orc diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h index 8b875b7906e1..86e8d5df3ad9 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h @@ -493,7 +493,7 @@ public: ExecutionSession &ES, JITTargetAddress ErrorHandlerAddress) : JITCompileCallbackManager( - llvm::make_unique(Client), ES, + std::make_unique(Client), ES, ErrorHandlerAddress) {} }; @@ -553,7 +553,7 @@ public: auto Id = IndirectStubOwnerIds.getNext(); if (auto Err = callB(Id)) return std::move(Err); - return llvm::make_unique(*this, Id); + return std::make_unique(*this, Id); } Expected diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h index 07c7471afc6a..752a0a34e0a1 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h +++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h @@ -359,9 +359,9 @@ public: { assert(KeyName != nullptr && "No keyname pointer"); std::lock_guard Lock(SerializersMutex); - // FIXME: Move capture Serialize once we have C++14. Serializers[ErrorInfoT::classID()] = - [KeyName, Serialize](ChannelT &C, const ErrorInfoBase &EIB) -> Error { + [KeyName, Serialize = std::move(Serialize)]( + ChannelT &C, const ErrorInfoBase &EIB) -> Error { assert(EIB.dynamicClassID() == ErrorInfoT::classID() && "Serializer called for wrong error type"); if (auto Err = serializeSeq(C, *KeyName)) @@ -551,26 +551,26 @@ public: /// RPC channel serialization for std::tuple. static Error serialize(ChannelT &C, const std::tuple &V) { - return serializeTupleHelper(C, V, llvm::index_sequence_for()); + return serializeTupleHelper(C, V, std::index_sequence_for()); } /// RPC channel deserialization for std::tuple. static Error deserialize(ChannelT &C, std::tuple &V) { - return deserializeTupleHelper(C, V, llvm::index_sequence_for()); + return deserializeTupleHelper(C, V, std::index_sequence_for()); } private: // Serialization helper for std::tuple. template static Error serializeTupleHelper(ChannelT &C, const std::tuple &V, - llvm::index_sequence _) { + std::index_sequence _) { return serializeSeq(C, std::get(V)...); } // Serialization helper for std::tuple. template static Error deserializeTupleHelper(ChannelT &C, std::tuple &V, - llvm::index_sequence _) { + std::index_sequence _) { return deserializeSeq(C, std::get(V)...); } }; diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h index 3b11e1b283de..ee9c2cc69c30 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h +++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h @@ -338,7 +338,9 @@ public: return Err; // Close the response message. - return C.endSendMessage(); + if (auto Err = C.endSendMessage()) + return Err; + return C.send(); } template @@ -350,7 +352,9 @@ public: return Err2; if (auto Err2 = serializeSeq(C, std::move(Err))) return Err2; - return C.endSendMessage(); + if (auto Err2 = C.endSendMessage()) + return Err2; + return C.send(); } }; @@ -378,8 +382,11 @@ public: C, *ResultOrErr)) return Err; - // Close the response message. - return C.endSendMessage(); + // End the response message. + if (auto Err = C.endSendMessage()) + return Err; + + return C.send(); } template @@ -389,7 +396,9 @@ public: return Err; if (auto Err2 = C.startSendMessage(ResponseId, SeqNo)) return Err2; - return C.endSendMessage(); + if (auto Err2 = C.endSendMessage()) + return Err2; + return C.send(); } }; @@ -502,7 +511,7 @@ public: static typename WrappedHandlerReturn::Type unpackAndRun(HandlerT &Handler, std::tuple &Args) { return unpackAndRunHelper(Handler, Args, - llvm::index_sequence_for()); + std::index_sequence_for()); } // Call the given handler with the given arguments. @@ -510,7 +519,7 @@ public: static Error unpackAndRunAsync(HandlerT &Handler, ResponderT &Responder, std::tuple &Args) { return unpackAndRunAsyncHelper(Handler, Responder, Args, - llvm::index_sequence_for()); + std::index_sequence_for()); } // Call the given handler with the given arguments. @@ -540,14 +549,13 @@ public: // Deserialize arguments from the channel. template static Error deserializeArgs(ChannelT &C, std::tuple &Args) { - return deserializeArgsHelper(C, Args, - llvm::index_sequence_for()); + return deserializeArgsHelper(C, Args, std::index_sequence_for()); } private: template static Error deserializeArgsHelper(ChannelT &C, std::tuple &Args, - llvm::index_sequence _) { + std::index_sequence _) { return SequenceSerialization::deserialize( C, std::get(Args)...); } @@ -556,18 +564,16 @@ private: static typename WrappedHandlerReturn< typename HandlerTraits::ReturnType>::Type unpackAndRunHelper(HandlerT &Handler, ArgTuple &Args, - llvm::index_sequence) { + std::index_sequence) { return run(Handler, std::move(std::get(Args))...); } - template static typename WrappedHandlerReturn< typename HandlerTraits::ReturnType>::Type unpackAndRunAsyncHelper(HandlerT &Handler, ResponderT &Responder, - ArgTuple &Args, - llvm::index_sequence) { + ArgTuple &Args, std::index_sequence) { return run(Handler, Responder, std::move(std::get(Args))...); } }; @@ -743,11 +749,15 @@ public: // to the user defined handler. Error handleResponse(ChannelT &C) override { Error Result = Error::success(); - if (auto Err = - SerializationTraits::deserialize(C, Result)) + if (auto Err = SerializationTraits::deserialize( + C, Result)) { + consumeError(std::move(Result)); return Err; - if (auto Err = C.endReceiveMessage()) + } + if (auto Err = C.endReceiveMessage()) { + consumeError(std::move(Result)); return Err; + } return Handler(std::move(Result)); } @@ -767,7 +777,7 @@ private: // Create a ResponseHandler from a given user handler. template std::unique_ptr> createResponseHandler(HandlerT H) { - return llvm::make_unique>( + return std::make_unique>( std::move(H)); } @@ -1403,14 +1413,12 @@ public: using ErrorReturn = typename RTraits::ErrorReturnType; using ErrorReturnPromise = typename RTraits::ReturnPromiseType; - // FIXME: Stack allocate and move this into the handler once LLVM builds - // with C++14. - auto Promise = std::make_shared(); - auto FutureResult = Promise->get_future(); + ErrorReturnPromise Promise; + auto FutureResult = Promise.get_future(); if (auto Err = this->template appendCallAsync( - [Promise](ErrorReturn RetOrErr) { - Promise->set_value(std::move(RetOrErr)); + [Promise = std::move(Promise)](ErrorReturn RetOrErr) mutable { + Promise.set_value(std::move(RetOrErr)); return Error::success(); }, Args...)) { @@ -1523,6 +1531,12 @@ public: return std::move(Err); } + if (auto Err = this->C.send()) { + detail::ResultTraits::consumeAbandoned( + std::move(Result)); + return std::move(Err); + } + while (!ReceivedResponse) { if (auto Err = this->handleOne()) { detail::ResultTraits::consumeAbandoned( @@ -1582,8 +1596,7 @@ public: // outstanding calls count, then poke the condition variable. using ArgType = typename detail::ResponseHandlerArg< typename detail::HandlerTraits::Type>::ArgType; - // FIXME: Move handler into wrapped handler once we have C++14. - auto WrappedHandler = [this, Handler](ArgType Arg) { + auto WrappedHandler = [this, Handler = std::move(Handler)](ArgType Arg) { auto Err = Handler(std::move(Arg)); std::unique_lock Lock(M); --NumOutstandingCalls; diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index d9535ce5f21f..c5106cf09ecc 100644 --- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -216,7 +216,7 @@ private: : K(std::move(K)), Parent(Parent), MemMgr(std::move(MemMgr)), - PFC(llvm::make_unique( + PFC(std::make_unique( std::move(Obj), std::move(Resolver), ProcessAllSections)) { buildInitialSymbolTable(PFC->Obj); @@ -234,7 +234,7 @@ private: JITSymbolResolverAdapter ResolverAdapter(Parent.ES, *PFC->Resolver, nullptr); - PFC->RTDyld = llvm::make_unique(*MemMgr, ResolverAdapter); + PFC->RTDyld = std::make_unique(*MemMgr, ResolverAdapter); PFC->RTDyld->setProcessAllSections(PFC->ProcessAllSections); Finalized = true; @@ -338,7 +338,7 @@ private: std::shared_ptr Resolver, bool ProcessAllSections) { using LOS = ConcreteLinkedObject; - return llvm::make_unique(Parent, std::move(K), std::move(Obj), + return std::make_unique(Parent, std::move(K), std::move(Obj), std::move(MemMgr), std::move(Resolver), ProcessAllSections); } diff --git a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h index b87cf697a81e..d7304cfcf931 100644 --- a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h @@ -137,17 +137,12 @@ protected: RemoteSymbolId Id) : C(C), Id(Id) {} - RemoteSymbolMaterializer(const RemoteSymbolMaterializer &Other) - : C(Other.C), Id(Other.Id) { - // FIXME: This is a horrible, auto_ptr-style, copy-as-move operation. - // It should be removed as soon as LLVM has C++14's generalized - // lambda capture (at which point the materializer can be moved - // into the lambda in remoteToJITSymbol below). - const_cast(Other).Id = 0; + RemoteSymbolMaterializer(RemoteSymbolMaterializer &&Other) + : C(Other.C), Id(Other.Id) { + Other.Id = 0; } - RemoteSymbolMaterializer& - operator=(const RemoteSymbolMaterializer&) = delete; + RemoteSymbolMaterializer &operator=(RemoteSymbolMaterializer &&) = delete; /// Release the remote symbol. ~RemoteSymbolMaterializer() { @@ -218,9 +213,9 @@ protected: return nullptr; // else... RemoteSymbolMaterializer RSM(*this, RemoteSym.first); - auto Sym = - JITSymbol([RSM]() mutable { return RSM.materialize(); }, - RemoteSym.second); + auto Sym = JITSymbol( + [RSM = std::move(RSM)]() mutable { return RSM.materialize(); }, + RemoteSym.second); return Sym; } else return RemoteSymOrErr.takeError(); @@ -472,7 +467,7 @@ private: } Expected addObject(std::string ObjBuffer) { - auto Buffer = llvm::make_unique(std::move(ObjBuffer)); + auto Buffer = std::make_unique(std::move(ObjBuffer)); auto Id = HandleIdMgr.getNext(); assert(!BaseLayerHandles.count(Id) && "Id already in use?"); diff --git a/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h b/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h new file mode 100644 index 000000000000..cf57b63b6448 --- /dev/null +++ b/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h @@ -0,0 +1,84 @@ +//===-- SpeculateAnalyses.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +/// Contains the Analyses and Result Interpretation to select likely functions +/// to Speculatively compile before they are called. [Purely Experimentation] +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H +#define LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H + +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/Speculation.h" + +#include + +namespace llvm { + +namespace orc { + +// Provides common code. +class SpeculateQuery { +protected: + void findCalles(const BasicBlock *, DenseSet &); + bool isStraightLine(const Function &F); + +public: + using ResultTy = Optional>>; +}; + +// Direct calls in high frequency basic blocks are extracted. +class BlockFreqQuery : public SpeculateQuery { + size_t numBBToGet(size_t); + +public: + // Find likely next executables based on IR Block Frequency + ResultTy operator()(Function &F); +}; + +// This Query generates a sequence of basic blocks which follows the order of +// execution. +// A handful of BB with higher block frequencies are taken, then path to entry +// and end BB are discovered by traversing up & down the CFG. +class SequenceBBQuery : public SpeculateQuery { + struct WalkDirection { + bool Upward = true, Downward = true; + // the block associated contain a call + bool CallerBlock = false; + }; + +public: + using VisitedBlocksInfoTy = DenseMap; + using BlockListTy = SmallVector; + using BackEdgesInfoTy = + SmallVector, 8>; + using BlockFreqInfoTy = + SmallVector, 8>; + +private: + std::size_t getHottestBlocks(std::size_t TotalBlocks); + BlockListTy rearrangeBB(const Function &, const BlockListTy &); + BlockListTy queryCFG(Function &, const BlockListTy &); + void traverseToEntryBlock(const BasicBlock *, const BlockListTy &, + const BackEdgesInfoTy &, + const BranchProbabilityInfo *, + VisitedBlocksInfoTy &); + void traverseToExitBlock(const BasicBlock *, const BlockListTy &, + const BackEdgesInfoTy &, + const BranchProbabilityInfo *, + VisitedBlocksInfoTy &); + +public: + ResultTy operator()(Function &F); +}; + +} // namespace orc +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H diff --git a/include/llvm/ExecutionEngine/Orc/Speculation.h b/include/llvm/ExecutionEngine/Orc/Speculation.h new file mode 100644 index 000000000000..766a6b070f12 --- /dev/null +++ b/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -0,0 +1,207 @@ +//===-- Speculation.h - Speculative Compilation --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Contains the definition to support speculative compilation when laziness is +// enabled. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SPECULATION_H +#define LLVM_EXECUTIONENGINE_ORC_SPECULATION_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/Debug.h" + +#include +#include +#include +#include + +namespace llvm { +namespace orc { + +class Speculator; + +// Track the Impls (JITDylib,Symbols) of Symbols while lazy call through +// trampolines are created. Operations are guarded by locks tp ensure that Imap +// stays in consistent state after read/write + +class ImplSymbolMap { + friend class Speculator; + +public: + using AliaseeDetails = std::pair; + using Alias = SymbolStringPtr; + using ImapTy = DenseMap; + void trackImpls(SymbolAliasMap ImplMaps, JITDylib *SrcJD); + +private: + // FIX ME: find a right way to distinguish the pre-compile Symbols, and update + // the callsite + Optional getImplFor(const SymbolStringPtr &StubSymbol) { + std::lock_guard Lockit(ConcurrentAccess); + auto Position = Maps.find(StubSymbol); + if (Position != Maps.end()) + return Position->getSecond(); + else + return None; + } + + std::mutex ConcurrentAccess; + ImapTy Maps; +}; + +// Defines Speculator Concept, +class Speculator { +public: + using TargetFAddr = JITTargetAddress; + using FunctionCandidatesMap = DenseMap; + using StubAddrLikelies = DenseMap; + +private: + void registerSymbolsWithAddr(TargetFAddr ImplAddr, + SymbolNameSet likelySymbols) { + std::lock_guard Lockit(ConcurrentAccess); + GlobalSpecMap.insert({ImplAddr, std::move(likelySymbols)}); + } + + void launchCompile(JITTargetAddress FAddr) { + SymbolNameSet CandidateSet; + // Copy CandidateSet is necessary, to avoid unsynchronized access to + // the datastructure. + { + std::lock_guard Lockit(ConcurrentAccess); + auto It = GlobalSpecMap.find(FAddr); + if (It == GlobalSpecMap.end()) + return; + CandidateSet = It->getSecond(); + } + + SymbolDependenceMap SpeculativeLookUpImpls; + + for (auto &Callee : CandidateSet) { + auto ImplSymbol = AliaseeImplTable.getImplFor(Callee); + // try to distinguish already compiled & library symbols + if (!ImplSymbol.hasValue()) + continue; + const auto &ImplSymbolName = ImplSymbol.getPointer()->first; + JITDylib *ImplJD = ImplSymbol.getPointer()->second; + auto &SymbolsInJD = SpeculativeLookUpImpls[ImplJD]; + SymbolsInJD.insert(ImplSymbolName); + } + + DEBUG_WITH_TYPE("orc", for (auto &I + : SpeculativeLookUpImpls) { + llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib "; + for (auto &N : I.second) + llvm::dbgs() << "\n Likely Symbol : " << N; + }); + + // for a given symbol, there may be no symbol qualified for speculatively + // compile try to fix this before jumping to this code if possible. + for (auto &LookupPair : SpeculativeLookUpImpls) + ES.lookup(JITDylibSearchList({{LookupPair.first, true}}), + LookupPair.second, SymbolState::Ready, + [this](Expected Result) { + if (auto Err = Result.takeError()) + ES.reportError(std::move(Err)); + }, + NoDependenciesToRegister); + } + +public: + Speculator(ImplSymbolMap &Impl, ExecutionSession &ref) + : AliaseeImplTable(Impl), ES(ref), GlobalSpecMap(0) {} + Speculator(const Speculator &) = delete; + Speculator(Speculator &&) = delete; + Speculator &operator=(const Speculator &) = delete; + Speculator &operator=(Speculator &&) = delete; + + /// Define symbols for this Speculator object (__orc_speculator) and the + /// speculation runtime entry point symbol (__orc_speculate_for) in the + /// given JITDylib. + Error addSpeculationRuntime(JITDylib &JD, MangleAndInterner &Mangle); + + // Speculatively compile likely functions for the given Stub Address. + // destination of __orc_speculate_for jump + void speculateFor(TargetFAddr StubAddr) { launchCompile(StubAddr); } + + // FIXME : Register with Stub Address, after JITLink Fix. + void registerSymbols(FunctionCandidatesMap Candidates, JITDylib *JD) { + for (auto &SymPair : Candidates) { + auto Target = SymPair.first; + auto Likely = SymPair.second; + + auto OnReadyFixUp = [Likely, Target, + this](Expected ReadySymbol) { + if (ReadySymbol) { + auto RAddr = (*ReadySymbol)[Target].getAddress(); + registerSymbolsWithAddr(RAddr, std::move(Likely)); + } else + this->getES().reportError(ReadySymbol.takeError()); + }; + // Include non-exported symbols also. + ES.lookup(JITDylibSearchList({{JD, true}}), SymbolNameSet({Target}), + SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister); + } + } + + ExecutionSession &getES() { return ES; } + +private: + static void speculateForEntryPoint(Speculator *Ptr, uint64_t StubId); + std::mutex ConcurrentAccess; + ImplSymbolMap &AliaseeImplTable; + ExecutionSession &ES; + StubAddrLikelies GlobalSpecMap; +}; + +class IRSpeculationLayer : public IRLayer { +public: + using IRlikiesStrRef = Optional>>; + using ResultEval = std::function; + using TargetAndLikelies = DenseMap; + + IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer, + Speculator &Spec, MangleAndInterner &Mangle, + ResultEval Interpreter) + : IRLayer(ES), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), + QueryAnalysis(Interpreter) {} + + void emit(MaterializationResponsibility R, ThreadSafeModule TSM); + +private: + TargetAndLikelies + internToJITSymbols(DenseMap> IRNames) { + assert(!IRNames.empty() && "No IRNames received to Intern?"); + TargetAndLikelies InternedNames; + DenseSet TargetJITNames; + for (auto &NamePair : IRNames) { + for (auto &TargetNames : NamePair.second) + TargetJITNames.insert(Mangle(TargetNames)); + + InternedNames[Mangle(NamePair.first)] = std::move(TargetJITNames); + } + return InternedNames; + } + + IRCompileLayer &NextLayer; + Speculator &S; + MangleAndInterner &Mangle; + ResultEval QueryAnalysis; +}; + +} // namespace orc +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SPECULATION_H diff --git a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h index 5787500387c4..2347faed37a2 100644 --- a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h +++ b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h @@ -38,17 +38,12 @@ private: public: // RAII based lock for ThreadSafeContext. class LLVM_NODISCARD Lock { - private: - using UnderlyingLock = std::lock_guard; - public: - Lock(std::shared_ptr S) - : S(std::move(S)), - L(llvm::make_unique(this->S->Mutex)) {} + Lock(std::shared_ptr S) : S(std::move(S)), L(this->S->Mutex) {} private: std::shared_ptr S; - std::unique_ptr L; + std::unique_lock L; }; /// Construct a null context. @@ -69,7 +64,7 @@ public: /// instance, or null if the instance was default constructed. const LLVMContext *getContext() const { return S ? S->Ctx.get() : nullptr; } - Lock getLock() { + Lock getLock() const { assert(S && "Can not lock an empty ThreadSafeContext"); return Lock(S); } @@ -95,7 +90,7 @@ public: // We also need to lock the context to make sure the module tear-down // does not overlap any other work on the context. if (M) { - auto L = getContextLock(); + auto L = TSCtx.getLock(); M = nullptr; } M = std::move(Other.M); @@ -117,23 +112,14 @@ public: ~ThreadSafeModule() { // We need to lock the context while we destruct the module. if (M) { - auto L = getContextLock(); + auto L = TSCtx.getLock(); M = nullptr; } } - /// Get the module wrapped by this ThreadSafeModule. - Module *getModule() { return M.get(); } - - /// Get the module wrapped by this ThreadSafeModule. - const Module *getModule() const { return M.get(); } - - /// Take out a lock on the ThreadSafeContext for this module. - ThreadSafeContext::Lock getContextLock() { return TSCtx.getLock(); } - /// Boolean conversion: This ThreadSafeModule will evaluate to true if it /// wraps a non-null module. - explicit operator bool() { + explicit operator bool() const { if (M) { assert(TSCtx.getContext() && "Non-null module must have non-null context"); @@ -142,6 +128,33 @@ public: return false; } + /// Locks the associated ThreadSafeContext and calls the given function + /// on the contained Module. + template + auto withModuleDo(Func &&F) -> decltype(F(std::declval())) { + assert(M && "Can not call on null module"); + auto Lock = TSCtx.getLock(); + return F(*M); + } + + /// Locks the associated ThreadSafeContext and calls the given function + /// on the contained Module. + template + auto withModuleDo(Func &&F) const + -> decltype(F(std::declval())) { + auto Lock = TSCtx.getLock(); + return F(*M); + } + + /// Get a raw pointer to the contained module without locking the context. + Module *getModuleUnlocked() { return M.get(); } + + /// Get a raw pointer to the contained module without locking the context. + const Module *getModuleUnlocked() const { return M.get(); } + + /// Returns the context for this ThreadSafeModule. + ThreadSafeContext getContext() const { return TSCtx; } + private: std::unique_ptr M; ThreadSafeContext TSCtx; diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index b2b4eba47074..ce7024a7f19b 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -13,6 +13,7 @@ #ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H #define LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DIContext.h" @@ -271,10 +272,10 @@ private: std::unique_ptr UnderlyingBuffer, RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver, bool ProcessAllSections, - std::function, - std::map)> + unique_function, + std::map)> OnLoaded, - std::function OnEmitted); + unique_function OnEmitted); // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public // interface. @@ -291,14 +292,14 @@ private: // but ORC's RTDyldObjectLinkingLayer2. Internally it constructs a RuntimeDyld // instance and uses continuation passing to perform the fix-up and finalize // steps asynchronously. -void jitLinkForORC(object::ObjectFile &Obj, - std::unique_ptr UnderlyingBuffer, - RuntimeDyld::MemoryManager &MemMgr, - JITSymbolResolver &Resolver, bool ProcessAllSections, - std::function, - std::map)> - OnLoaded, - std::function OnEmitted); +void jitLinkForORC( + object::ObjectFile &Obj, std::unique_ptr UnderlyingBuffer, + RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver, + bool ProcessAllSections, + unique_function, + std::map)> + OnLoaded, + unique_function OnEmitted); } // end namespace llvm diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 06cc09e1cfc7..e6b280465f72 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include #include @@ -94,8 +95,8 @@ public: /// Return a uniquified Attribute object that has the specific /// alignment set. - static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align); - static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align); + static Attribute getWithAlignment(LLVMContext &Context, Align Alignment); + static Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment); static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes); static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, @@ -150,11 +151,11 @@ public: /// Returns the alignment field of an attribute as a byte alignment /// value. - unsigned getAlignment() const; + MaybeAlign getAlignment() const; /// Returns the stack alignment field of an attribute as a byte /// alignment value. - unsigned getStackAlignment() const; + MaybeAlign getStackAlignment() const; /// Returns the number of dereferenceable bytes from the /// dereferenceable attribute. @@ -284,8 +285,8 @@ public: /// Return the target-dependent attribute object. Attribute getAttribute(StringRef Kind) const; - unsigned getAlignment() const; - unsigned getStackAlignment() const; + MaybeAlign getAlignment() const; + MaybeAlign getStackAlignment() const; uint64_t getDereferenceableBytes() const; uint64_t getDereferenceableOrNullBytes() const; Type *getByValType() const; @@ -603,16 +604,16 @@ public: } /// Return the alignment of the return value. - unsigned getRetAlignment() const; + MaybeAlign getRetAlignment() const; /// Return the alignment for the specified function parameter. - unsigned getParamAlignment(unsigned ArgNo) const; + MaybeAlign getParamAlignment(unsigned ArgNo) const; /// Return the byval type for the specified function parameter. Type *getParamByValType(unsigned ArgNo) const; /// Get the stack alignment. - unsigned getStackAlignment(unsigned Index) const; + MaybeAlign getStackAlignment(unsigned Index) const; /// Get the number of dereferenceable bytes (or zero if unknown). uint64_t getDereferenceableBytes(unsigned Index) const; @@ -704,9 +705,9 @@ template <> struct DenseMapInfo { /// equality, presence of attributes, etc. class AttrBuilder { std::bitset Attrs; - std::map TargetDepAttrs; - uint64_t Alignment = 0; - uint64_t StackAlignment = 0; + std::map> TargetDepAttrs; + MaybeAlign Alignment; + MaybeAlign StackAlignment; uint64_t DerefBytes = 0; uint64_t DerefOrNullBytes = 0; uint64_t AllocSizeArgs = 0; @@ -773,10 +774,10 @@ public: bool hasAlignmentAttr() const; /// Retrieve the alignment attribute, if it exists. - uint64_t getAlignment() const { return Alignment; } + MaybeAlign getAlignment() const { return Alignment; } /// Retrieve the stack alignment attribute, if it exists. - uint64_t getStackAlignment() const { return StackAlignment; } + MaybeAlign getStackAlignment() const { return StackAlignment; } /// Retrieve the number of dereferenceable bytes, if the /// dereferenceable attribute exists (zero is returned otherwise). @@ -793,13 +794,29 @@ public: /// doesn't exist, pair(0, 0) is returned. std::pair> getAllocSizeArgs() const; + /// This turns an alignment into the form used internally in Attribute. + /// This call has no effect if Align is not set. + AttrBuilder &addAlignmentAttr(MaybeAlign Align); + /// This turns an int alignment (which must be a power of 2) into the /// form used internally in Attribute. - AttrBuilder &addAlignmentAttr(unsigned Align); + /// This call has no effect if Align is 0. + /// Deprecated, use the version using a MaybeAlign. + inline AttrBuilder &addAlignmentAttr(unsigned Align) { + return addAlignmentAttr(MaybeAlign(Align)); + } + + /// This turns a stack alignment into the form used internally in Attribute. + /// This call has no effect if Align is not set. + AttrBuilder &addStackAlignmentAttr(MaybeAlign Align); /// This turns an int stack alignment (which must be a power of 2) into /// the form used internally in Attribute. - AttrBuilder &addStackAlignmentAttr(unsigned Align); + /// This call has no effect if Align is 0. + /// Deprecated, use the version using a MaybeAlign. + inline AttrBuilder &addStackAlignmentAttr(unsigned Align) { + return addStackAlignmentAttr(MaybeAlign(Align)); + } /// This turns the number of dereferenceable bytes into the form used /// internally in Attribute. diff --git a/include/llvm/IR/AutoUpgrade.h b/include/llvm/IR/AutoUpgrade.h index 017ad93d8a2a..66f38e5b55d1 100644 --- a/include/llvm/IR/AutoUpgrade.h +++ b/include/llvm/IR/AutoUpgrade.h @@ -54,9 +54,9 @@ namespace llvm { /// module is modified. bool UpgradeModuleFlags(Module &M); - /// This checks for objc retain release marker which should be upgraded. It - /// returns true if module is modified. - bool UpgradeRetainReleaseMarker(Module &M); + /// Convert calls to ARC runtime functions to intrinsic calls and upgrade the + /// old retain release marker to new module flag format. + void UpgradeARCRuntime(Module &M); void UpgradeSectionAttributes(Module &M); @@ -87,6 +87,10 @@ namespace llvm { /// Upgrade the loop attachment metadata node. MDNode *upgradeInstructionLoopAttachment(MDNode &N); + /// Upgrade the datalayout string by adding a section for address space + /// pointers. + std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple); + } // End llvm namespace #endif diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h index 69555af50e1f..d594145f8636 100644 --- a/include/llvm/IR/BasicBlock.h +++ b/include/llvm/IR/BasicBlock.h @@ -192,6 +192,11 @@ public: std::function>> instructionsWithoutDebug(); + /// Return the size of the basic block ignoring debug instructions + filter_iterator>::difference_type + sizeWithoutDebug() const; + /// Unlink 'this' from the containing function, but do not delete it. void removeFromParent(); diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index b47a96c5d5fa..13b1ae8d0e32 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -854,6 +854,15 @@ public: return CI.ParameterEncoding[0]; } + /// Return the use of the callee value in the underlying instruction. Only + /// valid for callback calls! + const Use &getCalleeUseForCallback() const { + int CalleeArgIdx = getCallArgOperandNoForCallee(); + assert(CalleeArgIdx >= 0 && + unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()); + return getInstruction()->getOperandUse(CalleeArgIdx); + } + /// Return the pointer to function that is being called. Value *getCalledValue() const { if (isDirectCall()) diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 399c6ad521fa..c1c979c2e2ab 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -75,6 +75,11 @@ namespace CallingConv { // CXX_FAST_TLS - Calling convention for access functions. CXX_FAST_TLS = 17, + /// Tail - This calling convention attemps to make calls as fast as + /// possible while guaranteeing that tail call optimization can always + /// be performed. + Tail = 18, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, @@ -222,6 +227,14 @@ namespace CallingConv { // Calling convention between AArch64 Advanced SIMD functions AArch64_VectorCall = 97, + /// Calling convention between AArch64 SVE functions + AArch64_SVE_VectorCall = 98, + + /// Calling convention for emscripten __invoke_* functions. The first + /// argument is required to be the function ptr being indirectly called. + /// The remainder matches the regular calling convention. + WASM_EmscriptenInvoke = 99, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h index 931576651224..2b6a6e4141b9 100644 --- a/include/llvm/IR/Constant.h +++ b/include/llvm/IR/Constant.h @@ -86,6 +86,12 @@ public: /// floating-point constant with all NaN elements. bool isNaN() const; + /// Return true if this constant and a constant 'Y' are element-wise equal. + /// This is identical to just comparing the pointers, with the exception that + /// for vectors, if only one of the constants has an `undef` element in some + /// lane, the constants still match. + bool isElementWiseEqual(Value *Y) const; + /// Return true if this is a vector constant that includes any undefined /// elements. bool containsUndefElement() const; diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h index 91f3f31abe17..964f9e8e9bc9 100644 --- a/include/llvm/IR/ConstantRange.h +++ b/include/llvm/IR/ConstantRange.h @@ -330,9 +330,13 @@ public: /// from an addition of a value in this range and a value in \p Other. ConstantRange add(const ConstantRange &Other) const; - /// Return a new range representing the possible values resulting from a - /// known NSW addition of a value in this range and \p Other constant. - ConstantRange addWithNoSignedWrap(const APInt &Other) const; + /// Return a new range representing the possible values resulting + /// from an addition with wrap type \p NoWrapKind of a value in this + /// range and a value in \p Other. + /// If the result range is disjoint, the preferred range is determined by the + /// \p PreferredRangeType. + ConstantRange addWithNoWrap(const ConstantRange &Other, unsigned NoWrapKind, + PreferredRangeType RangeType = Smallest) const; /// Return a new range representing the possible values resulting /// from a subtraction of a value in this range and a value in \p Other. diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h index ac9770a15120..85093dd218f8 100644 --- a/include/llvm/IR/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -25,10 +25,11 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -71,11 +72,11 @@ struct LayoutAlignElem { /// Alignment type from \c AlignTypeEnum unsigned AlignType : 8; unsigned TypeBitWidth : 24; - unsigned ABIAlign : 16; - unsigned PrefAlign : 16; + Align ABIAlign; + Align PrefAlign; - static LayoutAlignElem get(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width); + static LayoutAlignElem get(AlignTypeEnum align_type, Align abi_align, + Align pref_align, uint32_t bit_width); bool operator==(const LayoutAlignElem &rhs) const; }; @@ -87,15 +88,15 @@ struct LayoutAlignElem { /// \note The unusual order of elements in the structure attempts to reduce /// padding and make the structure slightly more cache friendly. struct PointerAlignElem { - unsigned ABIAlign; - unsigned PrefAlign; + Align ABIAlign; + Align PrefAlign; uint32_t TypeByteWidth; uint32_t AddressSpace; uint32_t IndexWidth; /// Initializer - static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth, + static PointerAlignElem get(uint32_t AddressSpace, Align ABIAlign, + Align PrefAlign, uint32_t TypeByteWidth, uint32_t IndexWidth); bool operator==(const PointerAlignElem &rhs) const; @@ -120,10 +121,10 @@ private: bool BigEndian; unsigned AllocaAddrSpace; - unsigned StackNaturalAlign; + MaybeAlign StackNaturalAlign; unsigned ProgramAddrSpace; - unsigned FunctionPtrAlign; + MaybeAlign FunctionPtrAlign; FunctionPtrAlignType TheFunctionPtrAlignType; enum ManglingModeT { @@ -172,16 +173,15 @@ private: /// well-defined bitwise representation. SmallVector NonIntegralAddressSpaces; - void setAlignment(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width); - unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width, - bool ABIAlign, Type *Ty) const; - void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth, - uint32_t IndexWidth); + void setAlignment(AlignTypeEnum align_type, Align abi_align, Align pref_align, + uint32_t bit_width); + Align getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width, + bool ABIAlign, Type *Ty) const; + void setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign, + uint32_t TypeByteWidth, uint32_t IndexWidth); /// Internal helper method that returns requested alignment for type. - unsigned getAlignment(Type *Ty, bool abi_or_pref) const; + Align getAlignment(Type *Ty, bool abi_or_pref) const; /// Parses a target data specification string. Assert if the string is /// malformed. @@ -261,17 +261,21 @@ public: bool isIllegalInteger(uint64_t Width) const { return !isLegalInteger(Width); } /// Returns true if the given alignment exceeds the natural stack alignment. - bool exceedsNaturalStackAlignment(unsigned Align) const { - return (StackNaturalAlign != 0) && (Align > StackNaturalAlign); + bool exceedsNaturalStackAlignment(Align Alignment) const { + return StackNaturalAlign && (Alignment > StackNaturalAlign); + } + + Align getStackAlignment() const { + assert(StackNaturalAlign && "StackNaturalAlign must be defined"); + return *StackNaturalAlign; } - unsigned getStackAlignment() const { return StackNaturalAlign; } unsigned getAllocaAddrSpace() const { return AllocaAddrSpace; } /// Returns the alignment of function pointers, which may or may not be /// related to the alignment of functions. /// \see getFunctionPtrAlignType - unsigned getFunctionPtrAlign() const { return FunctionPtrAlign; } + MaybeAlign getFunctionPtrAlign() const { return FunctionPtrAlign; } /// Return the type of function pointer alignment. /// \see getFunctionPtrAlign @@ -344,12 +348,12 @@ public: } /// Layout pointer alignment - unsigned getPointerABIAlignment(unsigned AS) const; + Align getPointerABIAlignment(unsigned AS) const; /// Return target's alignment for stack-based pointers /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. - unsigned getPointerPrefAlignment(unsigned AS = 0) const; + Align getPointerPrefAlignment(unsigned AS = 0) const; /// Layout pointer size /// FIXME: The defaults need to be removed once all of @@ -433,23 +437,33 @@ public: /// Returns the number of bits necessary to hold the specified type. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 36 for i36 and 80 for x86_fp80. The type passed must /// have a size (Type::isSized() must return true). - uint64_t getTypeSizeInBits(Type *Ty) const; + TypeSize getTypeSizeInBits(Type *Ty) const; /// Returns the maximum number of bytes that may be overwritten by /// storing the specified type. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 5 for i36 and 10 for x86_fp80. - uint64_t getTypeStoreSize(Type *Ty) const { - return (getTypeSizeInBits(Ty) + 7) / 8; + TypeSize getTypeStoreSize(Type *Ty) const { + TypeSize BaseSize = getTypeSizeInBits(Ty); + return { (BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable() }; } /// Returns the maximum number of bits that may be overwritten by /// storing the specified type; always a multiple of 8. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 40 for i36 and 80 for x86_fp80. - uint64_t getTypeStoreSizeInBits(Type *Ty) const { + TypeSize getTypeStoreSizeInBits(Type *Ty) const { return 8 * getTypeStoreSize(Ty); } @@ -464,9 +478,12 @@ public: /// Returns the offset in bytes between successive objects of the /// specified type, including alignment padding. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// This is the amount that alloca reserves for this type. For example, /// returns 12 or 16 for x86_fp80, depending on alignment. - uint64_t getTypeAllocSize(Type *Ty) const { + TypeSize getTypeAllocSize(Type *Ty) const { // Round up to the next alignment boundary. return alignTo(getTypeStoreSize(Ty), getABITypeAlignment(Ty)); } @@ -474,18 +491,28 @@ public: /// Returns the offset in bits between successive objects of the /// specified type, including alignment padding; always a multiple of 8. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// This is the amount that alloca reserves for this type. For example, /// returns 96 or 128 for x86_fp80, depending on alignment. - uint64_t getTypeAllocSizeInBits(Type *Ty) const { + TypeSize getTypeAllocSizeInBits(Type *Ty) const { return 8 * getTypeAllocSize(Ty); } /// Returns the minimum ABI-required alignment for the specified type. unsigned getABITypeAlignment(Type *Ty) const; + /// Helper function to return `Alignment` if it's set or the result of + /// `getABITypeAlignment(Ty)`, in any case the result is a valid alignment. + inline Align getValueOrABITypeAlignment(MaybeAlign Alignment, + Type *Ty) const { + return Alignment ? *Alignment : Align(getABITypeAlignment(Ty)); + } + /// Returns the minimum ABI-required alignment for an integer type of /// the specified bitwidth. - unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const; + Align getABIIntegerTypeAlignment(unsigned BitWidth) const; /// Returns the preferred stack/global alignment for the specified /// type. @@ -493,10 +520,6 @@ public: /// This is always at least as good as the ABI alignment. unsigned getPrefTypeAlignment(Type *Ty) const; - /// Returns the preferred alignment for the specified type, returned as - /// log2 of the value (a shift amount). - unsigned getPreferredTypeAlignmentShift(Type *Ty) const; - /// Returns an integer type with size at least as big as that of a /// pointer in the given address space. IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const; @@ -561,7 +584,7 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) { /// based on the DataLayout structure. class StructLayout { uint64_t StructSize; - unsigned StructAlignment; + Align StructAlignment; unsigned IsPadded : 1; unsigned NumElements : 31; uint64_t MemberOffsets[1]; // variable sized array! @@ -571,7 +594,7 @@ public: uint64_t getSizeInBits() const { return 8 * StructSize; } - unsigned getAlignment() const { return StructAlignment; } + Align getAlignment() const { return StructAlignment; } /// Returns whether the struct has padding or not between its fields. /// NB: Padding in nested element is not taken into account. @@ -598,13 +621,13 @@ private: // The implementation of this method is provided inline as it is particularly // well suited to constant folding when called on a specific Type subclass. -inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { +inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const { assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { case Type::LabelTyID: - return getPointerSizeInBits(0); + return TypeSize::Fixed(getPointerSizeInBits(0)); case Type::PointerTyID: - return getPointerSizeInBits(Ty->getPointerAddressSpace()); + return TypeSize::Fixed(getPointerSizeInBits(Ty->getPointerAddressSpace())); case Type::ArrayTyID: { ArrayType *ATy = cast(Ty); return ATy->getNumElements() * @@ -612,26 +635,30 @@ inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { } case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. - return getStructLayout(cast(Ty))->getSizeInBits(); + return TypeSize::Fixed( + getStructLayout(cast(Ty))->getSizeInBits()); case Type::IntegerTyID: - return Ty->getIntegerBitWidth(); + return TypeSize::Fixed(Ty->getIntegerBitWidth()); case Type::HalfTyID: - return 16; + return TypeSize::Fixed(16); case Type::FloatTyID: - return 32; + return TypeSize::Fixed(32); case Type::DoubleTyID: case Type::X86_MMXTyID: - return 64; + return TypeSize::Fixed(64); case Type::PPC_FP128TyID: case Type::FP128TyID: - return 128; + return TypeSize::Fixed(128); // In memory objects this is always aligned to a higher boundary, but // only 80 bits contain information. case Type::X86_FP80TyID: - return 80; + return TypeSize::Fixed(80); case Type::VectorTyID: { VectorType *VTy = cast(Ty); - return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType()); + auto EltCnt = VTy->getElementCount(); + uint64_t MinBits = EltCnt.Min * + getTypeSizeInBits(VTy->getElementType()).getFixedSize(); + return TypeSize(MinBits, EltCnt.Scalable); } default: llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def index 07e3d6bdc9e5..f90c580f10ef 100644 --- a/include/llvm/IR/DebugInfoFlags.def +++ b/include/llvm/IR/DebugInfoFlags.def @@ -31,7 +31,8 @@ HANDLE_DI_FLAG(2, Protected) HANDLE_DI_FLAG(3, Public) HANDLE_DI_FLAG((1 << 2), FwdDecl) HANDLE_DI_FLAG((1 << 3), AppleBlock) -HANDLE_DI_FLAG((1 << 4), BlockByrefStruct) +// Used to be BlockByRef, can be reused for anything except DICompositeType. +HANDLE_DI_FLAG((1 << 4), ReservedBit4) HANDLE_DI_FLAG((1 << 5), Virtual) HANDLE_DI_FLAG((1 << 6), Artificial) HANDLE_DI_FLAG((1 << 7), Explicit) @@ -42,8 +43,7 @@ HANDLE_DI_FLAG((1 << 11), Vector) HANDLE_DI_FLAG((1 << 12), StaticMember) HANDLE_DI_FLAG((1 << 13), LValueReference) HANDLE_DI_FLAG((1 << 14), RValueReference) -// 15 was formerly ExternalTypeRef, but this was never used. -HANDLE_DI_FLAG((1 << 15), Reserved) +HANDLE_DI_FLAG((1 << 15), ExportSymbols) HANDLE_DI_FLAG((1 << 16), SingleInheritance) HANDLE_DI_FLAG((2 << 16), MultipleInheritance) HANDLE_DI_FLAG((3 << 16), VirtualInheritance) diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 9dc6dfbb0f68..28a59576b7c6 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -650,7 +650,6 @@ public: } bool isForwardDecl() const { return getFlags() & FlagFwdDecl; } bool isAppleBlockExtension() const { return getFlags() & FlagAppleBlock; } - bool isBlockByrefStruct() const { return getFlags() & FlagBlockByrefStruct; } bool isVirtual() const { return getFlags() & FlagVirtual; } bool isArtificial() const { return getFlags() & FlagArtificial; } bool isObjectPointer() const { return getFlags() & FlagObjectPointer; } @@ -668,6 +667,7 @@ public: } bool isBigEndian() const { return getFlags() & FlagBigEndian; } bool isLittleEndian() const { return getFlags() & FlagLittleEndian; } + bool getExportSymbols() const { return getFlags() & FlagExportSymbols; } static bool classof(const Metadata *MD) { switch (MD->getMetadataID()) { @@ -2569,7 +2569,7 @@ public: /// (This is the only configuration of entry values that is supported.) bool isEntryValue() const { return getNumElements() > 0 && - getElement(0) == dwarf::DW_OP_entry_value; + getElement(0) == dwarf::DW_OP_LLVM_entry_value; } }; diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h index 3c1d4278905f..20097ef3f31a 100644 --- a/include/llvm/IR/DerivedTypes.h +++ b/include/llvm/IR/DerivedTypes.h @@ -23,7 +23,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include #include @@ -62,6 +62,11 @@ public: /// Get or create an IntegerType instance. static IntegerType *get(LLVMContext &C, unsigned NumBits); + /// Returns type twice as wide the input type. + IntegerType *getExtendedType() const { + return Type::getIntNTy(getContext(), 2 * getScalarSizeInBits()); + } + /// Get the number of bits in this IntegerType unsigned getBitWidth() const { return getSubclassData(); } @@ -470,21 +475,47 @@ public: /// This static method is like getInteger except that the element types are /// twice as wide as the elements in the input type. static VectorType *getExtendedElementVectorType(VectorType *VTy) { - unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2); - return VectorType::get(EltTy, VTy->getElementCount()); + assert(VTy->isIntOrIntVectorTy() && "VTy expected to be a vector of ints."); + auto *EltTy = cast(VTy->getElementType()); + return VectorType::get(EltTy->getExtendedType(), VTy->getElementCount()); } - /// This static method is like getInteger except that the element types are - /// half as wide as the elements in the input type. + // This static method gets a VectorType with the same number of elements as + // the input type, and the element type is an integer or float type which + // is half as wide as the elements in the input type. static VectorType *getTruncatedElementVectorType(VectorType *VTy) { - unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - assert((EltBits & 1) == 0 && - "Cannot truncate vector element with odd bit-width"); - Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2); + Type *EltTy; + if (VTy->getElementType()->isFloatingPointTy()) { + switch(VTy->getElementType()->getTypeID()) { + case DoubleTyID: + EltTy = Type::getFloatTy(VTy->getContext()); + break; + case FloatTyID: + EltTy = Type::getHalfTy(VTy->getContext()); + break; + default: + llvm_unreachable("Cannot create narrower fp vector element type"); + } + } else { + unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); + assert((EltBits & 1) == 0 && + "Cannot truncate vector element with odd bit-width"); + EltTy = IntegerType::get(VTy->getContext(), EltBits / 2); + } return VectorType::get(EltTy, VTy->getElementCount()); } + // This static method returns a VectorType with a smaller number of elements + // of a larger type than the input element type. For example, a <16 x i8> + // subdivided twice would return <4 x i32> + static VectorType *getSubdividedVectorType(VectorType *VTy, int NumSubdivs) { + for (int i = 0; i < NumSubdivs; ++i) { + VTy = VectorType::getDoubleElementsVectorType(VTy); + VTy = VectorType::getTruncatedElementVectorType(VTy); + } + return VTy; + } + /// This static method returns a VectorType with half as many elements as the /// input type and the same element type. static VectorType *getHalfElementsVectorType(VectorType *VTy) { @@ -540,6 +571,10 @@ bool Type::getVectorIsScalable() const { return cast(this)->isScalable(); } +ElementCount Type::getVectorElementCount() const { + return cast(this)->getElementCount(); +} + /// Class to represent pointers. class PointerType : public Type { explicit PointerType(Type *ElType, unsigned AddrSpace); @@ -577,6 +612,26 @@ public: } }; +Type *Type::getExtendedType() const { + assert( + isIntOrIntVectorTy() && + "Original type expected to be a vector of integers or a scalar integer."); + if (auto *VTy = dyn_cast(this)) + return VectorType::getExtendedElementVectorType( + const_cast(VTy)); + return cast(this)->getExtendedType(); +} + +Type *Type::getWithNewBitWidth(unsigned NewBitWidth) const { + assert( + isIntOrIntVectorTy() && + "Original type expected to be a vector of integers or a scalar integer."); + Type *NewType = getIntNTy(getContext(), NewBitWidth); + if (isVectorTy()) + NewType = VectorType::get(NewType, getVectorElementCount()); + return NewType; +} + unsigned Type::getPointerAddressSpace() const { return cast(getScalarType())->getAddressSpace(); } diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index 373663289dbd..ec469982d378 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -74,8 +74,10 @@ enum DiagnosticKind { DK_LastMachineRemark = DK_MachineOptimizationRemarkAnalysis, DK_MIRParser, DK_PGOProfile, + DK_MisExpect, DK_Unsupported, - DK_FirstPluginKind + DK_FirstPluginKind // Must be last value to work with + // getNextAvailablePluginDiagnosticKind }; /// Get the next available kind ID for a plugin diagnostic. @@ -663,7 +665,7 @@ public: private: /// The IR value (currently basic block) that the optimization operates on. /// This is currently used to provide run-time hotness information with PGO. - const Value *CodeRegion; + const Value *CodeRegion = nullptr; }; /// Diagnostic information for applied optimization remarks. @@ -1002,6 +1004,25 @@ public: void print(DiagnosticPrinter &DP) const override; }; +/// Diagnostic information for MisExpect analysis. +class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase { +public: + DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg); + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_MisExpect; + } + + const Twine &getMsg() const { return Msg; } + +private: + /// Message to report. + const Twine &Msg; +}; + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/include/llvm/IR/FixedMetadataKinds.def b/include/llvm/IR/FixedMetadataKinds.def new file mode 100644 index 000000000000..0e1ffef58672 --- /dev/null +++ b/include/llvm/IR/FixedMetadataKinds.def @@ -0,0 +1,43 @@ +/*===-- FixedMetadataKinds.def - Fixed metadata kind IDs -------*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_FIXED_MD_KIND +#error "LLVM_FIXED_MD_KIND(EnumID, Name, Value) is not defined." +#endif + +LLVM_FIXED_MD_KIND(MD_dbg, "dbg", 0) +LLVM_FIXED_MD_KIND(MD_tbaa, "tbaa", 1) +LLVM_FIXED_MD_KIND(MD_prof, "prof", 2) +LLVM_FIXED_MD_KIND(MD_fpmath, "fpmath", 3) +LLVM_FIXED_MD_KIND(MD_range, "range", 4) +LLVM_FIXED_MD_KIND(MD_tbaa_struct, "tbaa.struct", 5) +LLVM_FIXED_MD_KIND(MD_invariant_load, "invariant.load", 6) +LLVM_FIXED_MD_KIND(MD_alias_scope, "alias.scope", 7) +LLVM_FIXED_MD_KIND(MD_noalias, "noalias", 8) +LLVM_FIXED_MD_KIND(MD_nontemporal, "nontemporal", 9) +LLVM_FIXED_MD_KIND(MD_mem_parallel_loop_access, + "llvm.mem.parallel_loop_access", 10) +LLVM_FIXED_MD_KIND(MD_nonnull, "nonnull", 11) +LLVM_FIXED_MD_KIND(MD_dereferenceable, "dereferenceable", 12) +LLVM_FIXED_MD_KIND(MD_dereferenceable_or_null, "dereferenceable_or_null", 13) +LLVM_FIXED_MD_KIND(MD_make_implicit, "make.implicit", 14) +LLVM_FIXED_MD_KIND(MD_unpredictable, "unpredictable", 15) +LLVM_FIXED_MD_KIND(MD_invariant_group, "invariant.group", 16) +LLVM_FIXED_MD_KIND(MD_align, "align", 17) +LLVM_FIXED_MD_KIND(MD_loop, "llvm.loop", 18) +LLVM_FIXED_MD_KIND(MD_type, "type", 19) +LLVM_FIXED_MD_KIND(MD_section_prefix, "section_prefix", 20) +LLVM_FIXED_MD_KIND(MD_absolute_symbol, "absolute_symbol", 21) +LLVM_FIXED_MD_KIND(MD_associated, "associated", 22) +LLVM_FIXED_MD_KIND(MD_callees, "callees", 23) +LLVM_FIXED_MD_KIND(MD_irr_loop, "irr_loop", 24) +LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25) +LLVM_FIXED_MD_KIND(MD_callback, "callback", 26) +LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) +LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28) +LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 29) diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 7fa61e12f431..d586a9460d2b 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -343,7 +343,10 @@ public: unsigned getFnStackAlignment() const { if (!hasFnAttribute(Attribute::StackAlignment)) return 0; - return AttributeSets.getStackAlignment(AttributeList::FunctionIndex); + if (const auto MA = + AttributeSets.getStackAlignment(AttributeList::FunctionIndex)) + return MA->value(); + return 0; } /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm @@ -433,7 +436,9 @@ public: /// Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned ArgNo) const { - return AttributeSets.getParamAlignment(ArgNo); + if (const auto MA = AttributeSets.getParamAlignment(ArgNo)) + return MA->value(); + return 0; } /// Extract the byval type for a parameter. @@ -710,6 +715,12 @@ public: return Arguments + NumArgs; } + Argument* getArg(unsigned i) const { + assert (i < NumArgs && "getArg() out of range!"); + CheckLazyArguments(); + return Arguments + i; + } + iterator_range args() { return make_range(arg_begin(), arg_end()); } diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h index 3cd405701300..f2d9b9676ec9 100644 --- a/include/llvm/IR/GlobalAlias.h +++ b/include/llvm/IR/GlobalAlias.h @@ -58,10 +58,6 @@ public: // Linkage, Type, Parent and AddressSpace taken from the Aliasee. static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee); - void copyAttributesFrom(const GlobalValue *Src) { - GlobalValue::copyAttributesFrom(Src); - } - /// removeFromParent - This method unlinks 'this' from the containing module, /// but does not delete it. /// diff --git a/include/llvm/IR/GlobalIFunc.h b/include/llvm/IR/GlobalIFunc.h index bc0d3c053cce..0fdae917878a 100644 --- a/include/llvm/IR/GlobalIFunc.h +++ b/include/llvm/IR/GlobalIFunc.h @@ -46,10 +46,6 @@ public: LinkageTypes Linkage, const Twine &Name, Constant *Resolver, Module *Parent); - void copyAttributesFrom(const GlobalIFunc *Src) { - GlobalValue::copyAttributesFrom(Src); - } - /// This method unlinks 'this' from the containing module, but does not /// delete it. void removeFromParent(); diff --git a/include/llvm/IR/GlobalIndirectSymbol.h b/include/llvm/IR/GlobalIndirectSymbol.h index 8bc3f90b94aa..d996237aa3ef 100644 --- a/include/llvm/IR/GlobalIndirectSymbol.h +++ b/include/llvm/IR/GlobalIndirectSymbol.h @@ -42,6 +42,10 @@ public: /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); + void copyAttributesFrom(const GlobalValue *Src) { + GlobalValue::copyAttributesFrom(Src); + } + /// These methods set and retrieve indirect symbol. void setIndirectSymbol(Constant *Symbol) { setOperand(0, Symbol); @@ -54,9 +58,7 @@ public: static_cast(this)->getIndirectSymbol()); } - const GlobalObject *getBaseObject() const { - return dyn_cast(getIndirectSymbol()->stripInBoundsOffsets()); - } + const GlobalObject *getBaseObject() const; GlobalObject *getBaseObject() { return const_cast( static_cast(this)->getBaseObject()); diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h index b8ab6140ebe7..ce81eb9f0719 100644 --- a/include/llvm/IR/GlobalObject.h +++ b/include/llvm/IR/GlobalObject.h @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Alignment.h" #include #include @@ -27,6 +28,20 @@ class MDNode; class Metadata; class GlobalObject : public GlobalValue { +public: + // VCallVisibility - values for visibility metadata attached to vtables. This + // describes the scope in which a virtual call could end up being dispatched + // through this vtable. + enum VCallVisibility { + // Type is potentially visible to external code. + VCallVisibilityPublic = 0, + // Type is only visible to code which will be in the current Module after + // LTO internalization. + VCallVisibilityLinkageUnit = 1, + // Type is only visible to code in the current Module. + VCallVisibilityTranslationUnit = 2, + }; + protected: GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, LinkageTypes Linkage, const Twine &Name, @@ -58,9 +73,14 @@ public: unsigned getAlignment() const { unsigned Data = getGlobalValueSubClassData(); unsigned AlignmentData = Data & AlignmentMask; - return (1u << AlignmentData) >> 1; + MaybeAlign Align = decodeMaybeAlign(AlignmentData); + return Align ? Align->value() : 0; } - void setAlignment(unsigned Align); + + /// FIXME: Remove this setter once the migration to MaybeAlign is over. + LLVM_ATTRIBUTE_DEPRECATED(void setAlignment(unsigned Align), + "Please use `void setAlignment(MaybeAlign Align)`"); + void setAlignment(MaybeAlign Align); unsigned getGlobalObjectSubClassData() const { unsigned ValueData = getGlobalValueSubClassData(); @@ -158,6 +178,8 @@ public: void copyMetadata(const GlobalObject *Src, unsigned Offset); void addTypeMetadata(unsigned Offset, Metadata *TypeID); + void addVCallVisibilityMetadata(VCallVisibility Visibility); + VCallVisibility getVCallVisibility() const; protected: void copyAttributesFrom(const GlobalObject *Src); diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h index 2e2c8c477913..2c730bc312e4 100644 --- a/include/llvm/IR/GlobalVariable.h +++ b/include/llvm/IR/GlobalVariable.h @@ -243,6 +243,7 @@ public: bool hasImplicitSection() const { return getAttributes().hasAttribute("bss-section") || getAttributes().hasAttribute("data-section") || + getAttributes().hasAttribute("relro-section") || getAttributes().hasAttribute("rodata-section"); } diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index a74364dffb2e..d1ddb75cde9b 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -1461,7 +1461,7 @@ public: if (Value *V = foldConstant(Opc, LHS, RHS, Name)) return V; Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS); if (isa(BinOp)) - BinOp = setFPAttrs(BinOp, FPMathTag, FMF); + setFPAttrs(BinOp, FPMathTag, FMF); return Insert(BinOp, Name); } @@ -1479,7 +1479,8 @@ public: CallInst *C = CreateIntrinsic(ID, {L->getType()}, {L, R, RoundingV, ExceptV}, nullptr, Name); - return cast(setFPAttrs(C, FPMathTag, UseFMF)); + setFPAttrs(C, FPMathTag, UseFMF); + return C; } Value *CreateNeg(Value *V, const Twine &Name = "", @@ -1504,7 +1505,7 @@ public: MDNode *FPMathTag = nullptr) { if (auto *VC = dyn_cast(V)) return Insert(Folder.CreateFNeg(VC), Name); - return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), FPMathTag, FMF), + return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), FPMathTag, FMF), Name); } @@ -1514,9 +1515,7 @@ public: const Twine &Name = "") { if (auto *VC = dyn_cast(V)) return Insert(Folder.CreateFNeg(VC), Name); - // TODO: This should return UnaryOperator::CreateFNeg(...) once we are - // confident that they are optimized sufficiently. - return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), nullptr, + return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), nullptr, FMFSource->getFastMathFlags()), Name); } @@ -1534,7 +1533,7 @@ public: return Insert(Folder.CreateUnOp(Opc, VC), Name); Instruction *UnOp = UnaryOperator::Create(Opc, V); if (isa(UnOp)) - UnOp = setFPAttrs(UnOp, FPMathTag, FMF); + setFPAttrs(UnOp, FPMathTag, FMF); return Insert(UnOp, Name); } @@ -1612,19 +1611,19 @@ public: LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name) { LoadInst *LI = CreateLoad(Ty, Ptr, Name); - LI->setAlignment(Align); + LI->setAlignment(MaybeAlign(Align)); return LI; } LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const Twine &Name = "") { LoadInst *LI = CreateLoad(Ty, Ptr, Name); - LI->setAlignment(Align); + LI->setAlignment(MaybeAlign(Align)); return LI; } LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, bool isVolatile, const Twine &Name = "") { LoadInst *LI = CreateLoad(Ty, Ptr, isVolatile, Name); - LI->setAlignment(Align); + LI->setAlignment(MaybeAlign(Align)); return LI; } @@ -1649,7 +1648,7 @@ public: StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile = false) { StoreInst *SI = CreateStore(Val, Ptr, isVolatile); - SI->setAlignment(Align); + SI->setAlignment(MaybeAlign(Align)); return SI; } @@ -1913,11 +1912,17 @@ public: return V; } - Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){ + Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = "") { + if (IsFPConstrained) + return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fptoui, + V, DestTy, nullptr, Name); return CreateCast(Instruction::FPToUI, V, DestTy, Name); } - Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = ""){ + Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = "") { + if (IsFPConstrained) + return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fptosi, + V, DestTy, nullptr, Name); return CreateCast(Instruction::FPToSI, V, DestTy, Name); } @@ -1931,10 +1936,17 @@ public: Value *CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name = "") { + if (IsFPConstrained) + return CreateConstrainedFPCast( + Intrinsic::experimental_constrained_fptrunc, V, DestTy, nullptr, + Name); return CreateCast(Instruction::FPTrunc, V, DestTy, Name); } Value *CreateFPExt(Value *V, Type *DestTy, const Twine &Name = "") { + if (IsFPConstrained) + return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fpext, + V, DestTy, nullptr, Name); return CreateCast(Instruction::FPExt, V, DestTy, Name); } @@ -2046,6 +2058,37 @@ public: return Insert(CastInst::CreateFPCast(V, DestTy), Name); } + CallInst *CreateConstrainedFPCast( + Intrinsic::ID ID, Value *V, Type *DestTy, + Instruction *FMFSource = nullptr, const Twine &Name = "", + MDNode *FPMathTag = nullptr, + Optional Rounding = None, + Optional Except = None) { + Value *ExceptV = getConstrainedFPExcept(Except); + + FastMathFlags UseFMF = FMF; + if (FMFSource) + UseFMF = FMFSource->getFastMathFlags(); + + CallInst *C; + switch (ID) { + default: { + Value *RoundingV = getConstrainedFPRounding(Rounding); + C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, RoundingV, ExceptV}, + nullptr, Name); + } break; + case Intrinsic::experimental_constrained_fpext: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptosi: + C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, ExceptV}, nullptr, + Name); + break; + } + if (isa(C)) + setFPAttrs(C, FPMathTag, UseFMF); + return C; + } + // Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a // compile time error, instead of converting the string to bool for the // isSigned parameter. @@ -2187,7 +2230,10 @@ public: PHINode *CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name = "") { - return Insert(PHINode::Create(Ty, NumReservedValues), Name); + PHINode *Phi = PHINode::Create(Ty, NumReservedValues); + if (isa(Phi)) + setFPAttrs(Phi, nullptr /* MDNode* */, FMF); + return Insert(Phi, Name); } CallInst *CreateCall(FunctionType *FTy, Value *Callee, @@ -2195,7 +2241,7 @@ public: MDNode *FPMathTag = nullptr) { CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles); if (isa(CI)) - CI = cast(setFPAttrs(CI, FPMathTag, FMF)); + setFPAttrs(CI, FPMathTag, FMF); return Insert(CI, Name); } @@ -2204,7 +2250,7 @@ public: const Twine &Name = "", MDNode *FPMathTag = nullptr) { CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles); if (isa(CI)) - CI = cast(setFPAttrs(CI, FPMathTag, FMF)); + setFPAttrs(CI, FPMathTag, FMF); return Insert(CI, Name); } @@ -2252,7 +2298,7 @@ public: Sel = addBranchMetadata(Sel, Prof, Unpred); } if (isa(Sel)) - Sel = cast(setFPAttrs(Sel, nullptr /* MDNode* */, FMF)); + setFPAttrs(Sel, nullptr /* MDNode* */, FMF); return Insert(Sel, Name); } @@ -2454,7 +2500,7 @@ public: } Value *CreatePreserveArrayAccessIndex(Value *Base, unsigned Dimension, - unsigned LastIndex) { + unsigned LastIndex, MDNode *DbgInfo) { assert(isa(Base->getType()) && "Invalid Base ptr type for preserve.array.access.index."); auto *BaseType = Base->getType(); @@ -2476,6 +2522,8 @@ public: Value *DimV = getInt32(Dimension); CallInst *Fn = CreateCall(FnPreserveArrayAccessIndex, {Base, DimV, LastIndexV}); + if (DbgInfo) + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } @@ -2493,7 +2541,8 @@ public: Value *DIIndex = getInt32(FieldIndex); CallInst *Fn = CreateCall(FnPreserveUnionAccessIndex, {Base, DIIndex}); - Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); + if (DbgInfo) + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } @@ -2516,7 +2565,8 @@ public: Value *DIIndex = getInt32(FieldIndex); CallInst *Fn = CreateCall(FnPreserveStructAccessIndex, {Base, GEPIndex, DIIndex}); - Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); + if (DbgInfo) + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h index 2aac807623a9..72d8ad1501ae 100644 --- a/include/llvm/IR/InlineAsm.h +++ b/include/llvm/IR/InlineAsm.h @@ -244,6 +244,7 @@ public: Constraint_m, Constraint_o, Constraint_v, + Constraint_A, Constraint_Q, Constraint_R, Constraint_S, diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index ca419b50da6b..7fb94e9d8c22 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -975,7 +975,7 @@ public: static Type* makeCmpResultType(Type* opnd_type) { if (VectorType* vt = dyn_cast(opnd_type)) { return VectorType::get(Type::getInt1Ty(opnd_type->getContext()), - vt->getNumElements()); + vt->getElementCount()); } return Type::getInt1Ty(opnd_type->getContext()); } @@ -1567,11 +1567,17 @@ public: } /// Extract the alignment of the return value. - unsigned getRetAlignment() const { return Attrs.getRetAlignment(); } + unsigned getRetAlignment() const { + if (const auto MA = Attrs.getRetAlignment()) + return MA->value(); + return 0; + } /// Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned ArgNo) const { - return Attrs.getParamAlignment(ArgNo); + if (const auto MA = Attrs.getParamAlignment(ArgNo)) + return MA->value(); + return 0; } /// Extract the byval type for a call or parameter. diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 6a9a74bd16f0..803f6977b32c 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -229,6 +229,16 @@ public: return hasMetadataHashEntry(); } + /// Return true if this instruction has the given type of metadata attached. + bool hasMetadata(unsigned KindID) const { + return getMetadata(KindID) != nullptr; + } + + /// Return true if this instruction has the given type of metadata attached. + bool hasMetadata(StringRef Kind) const { + return getMetadata(Kind) != nullptr; + } + /// Get the metadata of given kind attached to this Instruction. /// If the metadata is not found then return null. MDNode *getMetadata(unsigned KindID) const { diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 215ce45c7b75..fa980df03ef0 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -110,9 +110,11 @@ public: /// Return the alignment of the memory that is being allocated by the /// instruction. unsigned getAlignment() const { - return (1u << (getSubclassDataFromInstruction() & 31)) >> 1; + if (const auto MA = decodeMaybeAlign(getSubclassDataFromInstruction() & 31)) + return MA->value(); + return 0; } - void setAlignment(unsigned Align); + void setAlignment(MaybeAlign Align); /// Return true if this alloca is in the entry block of the function and is a /// constant size. If so, the code generator will fold it into the @@ -182,15 +184,15 @@ public: LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, BasicBlock *InsertAtEnd); LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, Instruction *InsertBefore = nullptr); + MaybeAlign Align, Instruction *InsertBefore = nullptr); LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, BasicBlock *InsertAtEnd); + MaybeAlign Align, BasicBlock *InsertAtEnd); LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, AtomicOrdering Order, + MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr); LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, + MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // Deprecated [opaque pointer types] @@ -209,20 +211,20 @@ public: BasicBlock *InsertAtEnd) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, InsertAtEnd) {} - LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align, Instruction *InsertBefore = nullptr) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, InsertBefore) {} - LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align, BasicBlock *InsertAtEnd) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, InsertAtEnd) {} - LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, Order, SSID, InsertBefore) {} - LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, Order, SSID, InsertAtEnd) {} @@ -238,10 +240,13 @@ public: /// Return the alignment of the access that is being performed. unsigned getAlignment() const { - return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1; + if (const auto MA = + decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31)) + return MA->value(); + return 0; } - void setAlignment(unsigned Align); + void setAlignment(MaybeAlign Align); /// Returns the ordering constraint of this load instruction. AtomicOrdering getOrdering() const { @@ -332,17 +337,15 @@ public: StoreInst(Value *Val, Value *Ptr, bool isVolatile = false, Instruction *InsertBefore = nullptr); StoreInst(Value *Val, Value *Ptr, bool isVolatile, BasicBlock *InsertAtEnd); - StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, Instruction *InsertBefore = nullptr); - StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, BasicBlock *InsertAtEnd); - StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SyncScope::ID SSID = SyncScope::System, + StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, Instruction *InsertBefore = nullptr); - StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, + StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, BasicBlock *InsertAtEnd); + StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, + AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, + Instruction *InsertBefore = nullptr); + StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, + AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly two operands void *operator new(size_t s) { @@ -363,10 +366,13 @@ public: /// Return the alignment of the access that is being performed unsigned getAlignment() const { - return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1; + if (const auto MA = + decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31)) + return MA->value(); + return 0; } - void setAlignment(unsigned Align); + void setAlignment(MaybeAlign Align); /// Returns the ordering constraint of this store instruction. AtomicOrdering getOrdering() const { @@ -1764,6 +1770,10 @@ public: void setTrueValue(Value *V) { Op<1>() = V; } void setFalseValue(Value *V) { Op<2>() = V; } + /// Swap the true and false values of the select instruction. + /// This doesn't swap prof metadata. + void swapValues() { Op<1>().swap(Op<2>()); } + /// Return a string if the specified operands are invalid /// for a select operation, otherwise return null. static const char *areInvalidOperands(Value *Cond, Value *True, Value *False); @@ -3455,16 +3465,7 @@ public: class SwitchInstProfUpdateWrapper { SwitchInst &SI; Optional > Weights = None; - - // Sticky invalid state is needed to safely ignore operations with prof data - // in cases where SwitchInstProfUpdateWrapper is created from SwitchInst - // with inconsistent prof data. TODO: once we fix all prof data - // inconsistencies we can turn invalid state to assertions. - enum { - Invalid, - Initialized, - Changed - } State = Invalid; + bool Changed = false; protected: static MDNode *getProfBranchWeightsMD(const SwitchInst &SI); @@ -3482,7 +3483,7 @@ public: SwitchInstProfUpdateWrapper(SwitchInst &SI) : SI(SI) { init(); } ~SwitchInstProfUpdateWrapper() { - if (State == Changed) + if (Changed) SI.setMetadata(LLVMContext::MD_prof, buildProfBranchWeightsMD()); } @@ -3938,6 +3939,9 @@ class CallBrInst : public CallBase { ArrayRef IndirectDests, ArrayRef Args, ArrayRef Bundles, const Twine &NameStr); + /// Should the Indirect Destinations change, scan + update the Arg list. + void updateArgBlockAddresses(unsigned i, BasicBlock *B); + /// Compute the number of operands to allocate. static int ComputeNumOperands(int NumArgs, int NumIndirectDests, int NumBundleInputs = 0) { @@ -4075,7 +4079,7 @@ public: return cast(*(&Op<-1>() - getNumIndirectDests() - 1)); } BasicBlock *getIndirectDest(unsigned i) const { - return cast(*(&Op<-1>() - getNumIndirectDests() + i)); + return cast_or_null(*(&Op<-1>() - getNumIndirectDests() + i)); } SmallVector getIndirectDests() const { SmallVector IndirectDests; @@ -4087,6 +4091,7 @@ public: *(&Op<-1>() - getNumIndirectDests() - 1) = reinterpret_cast(B); } void setIndirectDest(unsigned i, BasicBlock *B) { + updateArgBlockAddresses(i, B); *(&Op<-1>() - getNumIndirectDests() + i) = reinterpret_cast(B); } @@ -4096,11 +4101,10 @@ public: return i == 0 ? getDefaultDest() : getIndirectDest(i - 1); } - void setSuccessor(unsigned idx, BasicBlock *NewSucc) { - assert(idx < getNumIndirectDests() + 1 && + void setSuccessor(unsigned i, BasicBlock *NewSucc) { + assert(i < getNumIndirectDests() + 1 && "Successor # out of range for callbr!"); - *(&Op<-1>() - getNumIndirectDests() -1 + idx) = - reinterpret_cast(NewSucc); + return i == 0 ? setDefaultDest(NewSucc) : setIndirectDest(i - 1, NewSucc); } unsigned getNumSuccessors() const { return getNumIndirectDests() + 1; } @@ -5251,31 +5255,38 @@ public: /// A helper function that returns the pointer operand of a load or store /// instruction. Returns nullptr if not load or store. -inline Value *getLoadStorePointerOperand(Value *V) { +inline const Value *getLoadStorePointerOperand(const Value *V) { if (auto *Load = dyn_cast(V)) return Load->getPointerOperand(); if (auto *Store = dyn_cast(V)) return Store->getPointerOperand(); return nullptr; } +inline Value *getLoadStorePointerOperand(Value *V) { + return const_cast( + getLoadStorePointerOperand(static_cast(V))); +} /// A helper function that returns the pointer operand of a load, store /// or GEP instruction. Returns nullptr if not load, store, or GEP. -inline Value *getPointerOperand(Value *V) { +inline const Value *getPointerOperand(const Value *V) { if (auto *Ptr = getLoadStorePointerOperand(V)) return Ptr; if (auto *Gep = dyn_cast(V)) return Gep->getPointerOperand(); return nullptr; } +inline Value *getPointerOperand(Value *V) { + return const_cast(getPointerOperand(static_cast(V))); +} /// A helper function that returns the alignment of load or store instruction. -inline unsigned getLoadStoreAlignment(Value *I) { +inline MaybeAlign getLoadStoreAlignment(Value *I) { assert((isa(I) || isa(I)) && "Expected Load or Store instruction"); if (auto *LI = dyn_cast(I)) - return LI->getAlignment(); - return cast(I)->getAlignment(); + return MaybeAlign(LI->getAlignment()); + return MaybeAlign(cast(I)->getAlignment()); } /// A helper function that returns the address space of the pointer operand of diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index 438bdb29b706..c989b4a2e72a 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -259,6 +259,8 @@ namespace llvm { case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -271,12 +273,16 @@ namespace llvm { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: return true; @@ -405,11 +411,11 @@ namespace llvm { setArgOperand(ARG_DEST, Ptr); } - void setDestAlignment(unsigned Align) { + void setDestAlignment(unsigned Alignment) { removeParamAttr(ARG_DEST, Attribute::Alignment); - if (Align > 0) - addParamAttr(ARG_DEST, - Attribute::getWithAlignment(getContext(), Align)); + if (Alignment > 0) + addParamAttr(ARG_DEST, Attribute::getWithAlignment(getContext(), + Align(Alignment))); } void setLength(Value *L) { @@ -454,11 +460,12 @@ namespace llvm { BaseCL::setArgOperand(ARG_SOURCE, Ptr); } - void setSourceAlignment(unsigned Align) { + void setSourceAlignment(unsigned Alignment) { BaseCL::removeParamAttr(ARG_SOURCE, Attribute::Alignment); - if (Align > 0) - BaseCL::addParamAttr(ARG_SOURCE, Attribute::getWithAlignment( - BaseCL::getContext(), Align)); + if (Alignment > 0) + BaseCL::addParamAttr(ARG_SOURCE, + Attribute::getWithAlignment(BaseCL::getContext(), + Align(Alignment))); } }; diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h index f38f92022d21..9e4ebd915afc 100644 --- a/include/llvm/IR/Intrinsics.h +++ b/include/llvm/IR/Intrinsics.h @@ -100,7 +100,8 @@ namespace Intrinsic { Integer, Vector, Pointer, Struct, Argument, ExtendArgument, TruncArgument, HalfVecArgument, SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt, - VecElementArgument + VecElementArgument, ScalableVecArgument, Subdivide2Argument, + Subdivide4Argument, VecOfBitcastsToInt } Kind; union { @@ -125,14 +126,17 @@ namespace Intrinsic { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == PtrToElt || Kind == VecElementArgument); + Kind == PtrToElt || Kind == VecElementArgument || + Kind == Subdivide2Argument || Kind == Subdivide4Argument || + Kind == VecOfBitcastsToInt); return Argument_Info >> 3; } ArgKind getArgumentKind() const { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == VecElementArgument); + Kind == VecElementArgument || Kind == Subdivide2Argument || + Kind == Subdivide4Argument || Kind == VecOfBitcastsToInt); return (ArgKind)(Argument_Info & 7); } diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index d660f8278437..7a0263f88c2a 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -63,6 +63,12 @@ class NoCapture : IntrinsicProperty { int ArgNo = argNo; } +// NoAlias - The specified argument pointer is not aliasing other "noalias" pointer +// arguments of the intrinsic wrt. the intrinsic scope. +class NoAlias : IntrinsicProperty { + int ArgNo = argNo; +} + // Returned - The specified argument is always the return value of the // intrinsic. class Returned : IntrinsicProperty { @@ -181,6 +187,16 @@ class LLVMVectorElementType : LLVMMatchType; // vector type, but change the element count to be half as many class LLVMHalfElementsVectorType : LLVMMatchType; +// Match the type of another intrinsic parameter that is expected to be a +// vector type (i.e. ) but with each element subdivided to +// form a vector with more elements that are smaller than the original. +class LLVMSubdivide2VectorType : LLVMMatchType; +class LLVMSubdivide4VectorType : LLVMMatchType; + +// Match the element count and bit width of another intrinsic parameter, but +// change the element type to an integer. +class LLVMVectorOfBitcastsToInt : LLVMMatchType; + def llvm_void_ty : LLVMType; let isAny = 1 in { def llvm_any_ty : LLVMType; @@ -407,9 +423,9 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[], //===--------------------- Code Generator Intrinsics ----------------------===// // def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; -def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; -def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; -def int_sponentry : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; +def int_addressofreturnaddress : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; +def int_frameaddress : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; +def int_sponentry : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], @@ -451,8 +467,8 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, // from being reordered overly much with respect to nearby access to the same // memory while not impeding optimization. def int_prefetch - : Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>, + : Intrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<0>, NoCapture<0>, ImmArg<1>, ImmArg<2>]>; def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>; @@ -460,7 +476,7 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; // The assume intrinsic is marked as arbitrarily writing so that proper // control dependencies will be maintained. -def int_assume : Intrinsic<[], [llvm_i1_ty], []>; +def int_assume : Intrinsic<[], [llvm_i1_ty], [IntrWillReturn]>; // Stack Protector Intrinsic - The stackprotector intrinsic writes the stack // guard to the correct place on the stack frame. @@ -493,23 +509,23 @@ def int_instrprof_value_profile : Intrinsic<[], def int_memcpy : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, - WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, + NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>; def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, + [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, ReadOnly<1>, ImmArg<3>]>; def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, + [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, WriteOnly<0>, ImmArg<3>]>; // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. -let IntrProperties = [IntrNoMem, IntrSpeculatable] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_fma : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; @@ -551,19 +567,19 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_minnum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; def int_maxnum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; def int_minimum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; def int_maximum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; // NOTE: these are internal interfaces. @@ -576,13 +592,13 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>; def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>, + [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>, ImmArg<2>, ImmArg<3>]>, GCCBuiltin<"__builtin_object_size">; //===--------------- Constrained Floating Point Intrinsics ----------------===// // -let IntrProperties = [IntrInaccessibleMemOnly] in { +let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { def int_experimental_constrained_fadd : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -616,6 +632,14 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty, @@ -679,6 +703,14 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_lrint : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_llrint : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -697,6 +729,12 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_lround : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_llround : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, @@ -706,18 +744,19 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { llvm_metadata_ty, llvm_metadata_ty ]>; } -// FIXME: Add intrinsics for fcmp, fptoui and fptosi. +// FIXME: Add intrinsic for fcmp. +// FIXME: Consider maybe adding intrinsics for sitofp, uitofp. //===------------------------- Expect Intrinsics --------------------------===// // def int_expect : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrWillReturn]>; //===-------------------- Bit Manipulation Intrinsics ---------------------===// // // None of these intrinsics accesses memory at all. -let IntrProperties = [IntrNoMem, IntrSpeculatable] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; @@ -727,7 +766,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in { [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; } -let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>] in { def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; } @@ -739,7 +778,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in { // mean the optimizers can change them aggressively. Special handling // needed in a few places. These synthetic intrinsics have no // side-effects and just mark information about their operands. -let IntrProperties = [IntrNoMem, IntrSpeculatable] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_dbg_declare : Intrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, @@ -796,21 +835,21 @@ def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>; def int_var_annotation : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [], "llvm.var.annotation">; + [IntrWillReturn], "llvm.var.annotation">; def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType], [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [], "llvm.ptr.annotation">; + [IntrWillReturn], "llvm.ptr.annotation">; def int_annotation : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [], "llvm.annotation">; + [IntrWillReturn], "llvm.annotation">; // Annotates the current program point with metadata strings which are emitted // as CodeView debug info records. This is expensive, as it disables inlining // and is modelled as having side effects. def int_codeview_annotation : Intrinsic<[], [llvm_metadata_ty], - [IntrInaccessibleMemOnly, IntrNoDuplicate], + [IntrInaccessibleMemOnly, IntrNoDuplicate, IntrWillReturn], "llvm.codeview.annotation">; //===------------------------ Trampoline Intrinsics -----------------------===// @@ -828,79 +867,77 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], // // Expose the carry flag from add operations on two integrals. -def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; - -def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; - -def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; - +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { + def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + + def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + + def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; +} //===------------------------- Saturation Arithmetic Intrinsics ---------------------===// // def int_sadd_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>; def int_uadd_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>; def int_ssub_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_usub_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; //===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===// // def int_smul_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; def int_umul_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; //===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===// // def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; +def int_umul_fix_sat : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; //===------------------------- Memory Use Markers -------------------------===// // def int_lifetime_start : Intrinsic<[], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; def int_lifetime_end : Intrinsic<[], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; def int_invariant_start : Intrinsic<[llvm_descriptor_ty], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; def int_invariant_end : Intrinsic<[], [llvm_descriptor_ty, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<2>, ImmArg<1>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<2>, ImmArg<1>]>; // launder.invariant.group can't be marked with 'readnone' (IntrNoMem), // because it would cause CSE of two barriers with the same argument. @@ -916,12 +953,12 @@ def int_invariant_end : Intrinsic<[], // might change in the future. def int_launder_invariant_group : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrSpeculatable]>; + [IntrInaccessibleMemOnly, IntrSpeculatable, IntrWillReturn]>; def int_strip_invariant_group : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], - [IntrSpeculatable, IntrNoMem]>; + [IntrSpeculatable, IntrNoMem, IntrWillReturn]>; //===------------------------ Stackmap Intrinsics -------------------------===// // @@ -964,6 +1001,14 @@ def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], [IntrArgMemOnly, IntrReadMem, ReadNone<1>, ReadOnly<2>, NoCapture<2>]>; +def int_coro_id_retcon : Intrinsic<[llvm_token_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, + llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + []>; +def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, + llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + []>; def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>; def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], [WriteOnly<1>]>; @@ -979,6 +1024,13 @@ def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; +def int_coro_suspend_retcon : Intrinsic<[llvm_any_ty], [llvm_vararg_ty], []>; +def int_coro_prepare_retcon : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], + [IntrNoMem]>; +def int_coro_alloca_alloc : Intrinsic<[llvm_token_ty], + [llvm_anyint_ty, llvm_i32_ty], []>; +def int_coro_alloca_get : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], []>; +def int_coro_alloca_free : Intrinsic<[], [llvm_token_ty], []>; def int_coro_param : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrNoMem, ReadNone<0>, ReadNone<1>]>; @@ -1018,19 +1070,19 @@ def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty], // Supports widenable conditions for guards represented as explicit branches. def int_experimental_widenable_condition : Intrinsic<[llvm_i1_ty], [], - [IntrInaccessibleMemOnly]>; + [IntrInaccessibleMemOnly, IntrWillReturn]>; // NOP: calls/invokes to this intrinsic are removed by codegen -def int_donothing : Intrinsic<[], [], [IntrNoMem]>; +def int_donothing : Intrinsic<[], [], [IntrNoMem, IntrWillReturn]>; // This instruction has no actual effect, though it is treated by the optimizer // has having opaque side effects. This may be inserted into loops to ensure // that they are not removed even if they turn out to be empty, for languages // which specify that infinite loops must be preserved. -def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly]>; +def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; -// Intrisics to support half precision floating point format -let IntrProperties = [IntrNoMem] in { +// Intrinsics to support half precision floating point format +let IntrProperties = [IntrNoMem, IntrWillReturn] in { def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>; } @@ -1041,7 +1093,11 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.clear_cache">; // Intrinsic to detect whether its argument is a constant. -def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">; +def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWillReturn], "llvm.is.constant">; + +// Intrinsic to mask out bits of a pointer. +def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; //===-------------------------- Masked Intrinsics -------------------------===// // @@ -1049,45 +1105,45 @@ def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly, IntrWillReturn, ImmArg<2>]>; def int_masked_load : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<1>]>; def int_masked_gather: Intrinsic<[llvm_anyvector_ty], [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, ImmArg<1>]>; + [IntrReadMem, IntrWillReturn, ImmArg<1>]>; def int_masked_scatter: Intrinsic<[], [llvm_anyvector_ty, LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [ImmArg<2>]>; + [IntrWillReturn, ImmArg<2>]>; def int_masked_expandload: Intrinsic<[llvm_anyvector_ty], [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem]>; + [IntrReadMem, IntrWillReturn]>; def int_masked_compressstore: Intrinsic<[], [llvm_anyvector_ty, LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly]>; + [IntrArgMemOnly, IntrWillReturn]>; // Test whether a pointer is associated with a type metadata identifier. def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], - [IntrNoMem]>; + [IntrNoMem, IntrWillReturn]>; // Safely loads a function pointer from a virtual table pointer using type metadata. def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty], - [IntrNoMem]>; + [IntrNoMem, IntrWillReturn]>; // Create a branch funnel that implements an indirect call to a limited set of // callees. This needs to be a musttail call. @@ -1098,6 +1154,8 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], def int_hwasan_check_memaccess : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>; +def int_hwasan_check_memaccess_shortgranules : + Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>; // Xray intrinsics //===----------------------------------------------------------------------===// @@ -1121,7 +1179,7 @@ def int_memcpy_element_unordered_atomic llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty ], [ - IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, + IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3> ]>; @@ -1132,58 +1190,47 @@ def int_memmove_element_unordered_atomic llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty ], [ - IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, + IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3> ]>; // @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize) def int_memset_element_unordered_atomic : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ], - [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>; + [ IntrArgMemOnly, IntrWillReturn, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>; //===------------------------ Reduction Intrinsics ------------------------===// // -def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, - llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, - llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; +let IntrProperties = [IntrNoMem, IntrWillReturn] in { + def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty]>; + def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty]>; + def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; +} //===---------- Intrinsics to control hardware supported loops ----------===// diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 832aca4fd30f..db01700f409f 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -691,7 +691,7 @@ def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], // Memory Tagging Extensions (MTE) Intrinsics let TargetPrefix = "aarch64" in { def int_aarch64_irg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], - [IntrInaccessibleMemOnly]>; + [IntrNoMem, IntrHasSideEffects]>; def int_aarch64_addg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrNoMem]>; def int_aarch64_gmi : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], @@ -707,7 +707,7 @@ def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], // Generate a randomly tagged stack base pointer. def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], - [IntrInaccessibleMemOnly]>; + [IntrNoMem, IntrHasSideEffects]>; // Transfer pointer tag with offset. // ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where @@ -733,3 +733,124 @@ def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; } + +// Transactional Memory Extension (TME) Intrinsics +let TargetPrefix = "aarch64" in { +def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, + Intrinsic<[llvm_i64_ty]>; + +def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>; + +def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">, + Intrinsic<[], [llvm_i64_ty], [ImmArg<0>]>; + +def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">, + Intrinsic<[llvm_i64_ty], [], + [IntrNoMem, IntrHasSideEffects]>; +} + +def llvm_nxv2i1_ty : LLVMType; +def llvm_nxv4i1_ty : LLVMType; +def llvm_nxv8i1_ty : LLVMType; +def llvm_nxv16i1_ty : LLVMType; +def llvm_nxv16i8_ty : LLVMType; +def llvm_nxv4i32_ty : LLVMType; +def llvm_nxv2i64_ty : LLVMType; +def llvm_nxv8f16_ty : LLVMType; +def llvm_nxv4f32_ty : LLVMType; +def llvm_nxv2f64_ty : LLVMType; + +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". + class AdvSIMD_Merged1VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>], + [IntrNoMem]>; + + class AdvSIMD_SVE_CNT_Intrinsic + : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], + [LLVMVectorOfBitcastsToInt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_Unpack_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + + class AdvSIMD_SVE_PUNPKHI_Intrinsic + : Intrinsic<[LLVMHalfElementsVectorType<0>], + [llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_DOT_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide4VectorType<0>, + LLVMSubdivide4VectorType<0>], + [IntrNoMem]>; + + class AdvSIMD_SVE_DOT_Indexed_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide4VectorType<0>, + LLVMSubdivide4VectorType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + // This class of intrinsics are not intended to be useful within LLVM IR but + // are instead here to support some of the more regid parts of the ACLE. + class Builtin_SVCVT + : GCCBuiltin<"__builtin_sve_" # name>, + Intrinsic<[OUT], [OUT, llvm_nxv16i1_ty, IN], [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// +// SVE + +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". + +// +// Integer arithmetic +// + +def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; + +def int_aarch64_sve_sdot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + +def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + +// +// Counting bits +// + +def int_aarch64_sve_cnt : AdvSIMD_SVE_CNT_Intrinsic; + +// +// Permutations and selection +// + +def int_aarch64_sve_sunpkhi : AdvSIMD_SVE_Unpack_Intrinsic; +def int_aarch64_sve_sunpklo : AdvSIMD_SVE_Unpack_Intrinsic; + +def int_aarch64_sve_uunpkhi : AdvSIMD_SVE_Unpack_Intrinsic; +def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic; + +// +// Floating-point comparisons +// + +def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; + +// +// Predicate operations +// + +def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic; +def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic; +} diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 3982444b5401..ab6ee7f92dd1 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -175,6 +175,7 @@ def int_amdgcn_implicit_buffer_ptr : // Set EXEC to the 64-bit value given. // This is always moved to the beginning of the basic block. +// FIXME: Should be mangled for wave size. def int_amdgcn_init_exec : Intrinsic<[], [llvm_i64_ty], // 64-bit literal constant [IntrConvergent, ImmArg<0>]>; @@ -185,7 +186,7 @@ def int_amdgcn_init_exec : Intrinsic<[], def int_amdgcn_init_exec_from_input : Intrinsic<[], [llvm_i32_ty, // 32-bit SGPR input llvm_i32_ty], // bit offset of the thread count - [IntrConvergent]>; + [IntrConvergent, ImmArg<1>]>; def int_amdgcn_wavefrontsize : GCCBuiltin<"__builtin_amdgcn_wavefrontsize">, @@ -199,12 +200,14 @@ def int_amdgcn_wavefrontsize : // The first parameter is s_sendmsg immediate (i16), // the second one is copied to m0 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, - Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>; + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, - Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>; + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, - Intrinsic<[], [], [IntrConvergent]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent]>; def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, Intrinsic<[], [], [IntrConvergent]>; @@ -835,9 +838,6 @@ defset list AMDGPUImageDimAtomicIntrinsics = { defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">; defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">; defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">; - - // TODO: INC/DEC are weird: they seem to have a vdata argument in hardware, - // even though it clearly shouldn't be needed defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">; defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">; @@ -854,8 +854,8 @@ let TargetPrefix = "amdgcn" in { defset list AMDGPUBufferIntrinsics = { -class AMDGPUBufferLoad : Intrinsic < - [llvm_any_ty], +class AMDGPUBufferLoad : Intrinsic < + [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) @@ -863,7 +863,7 @@ class AMDGPUBufferLoad : Intrinsic < llvm_i1_ty], // slc(imm) [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; +def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; def int_amdgcn_s_buffer_load : Intrinsic < @@ -874,9 +874,9 @@ def int_amdgcn_s_buffer_load : Intrinsic < [IntrNoMem, ImmArg<2>]>, AMDGPURsrcIntrinsic<0>; -class AMDGPUBufferStore : Intrinsic < +class AMDGPUBufferStore : Intrinsic < [], - [llvm_any_ty, // vdata(VGPR) + [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) @@ -884,7 +884,7 @@ class AMDGPUBufferStore : Intrinsic < llvm_i1_ty], // slc(imm) [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -def int_amdgcn_buffer_store_format : AMDGPUBufferStore; +def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; // New buffer intrinsics with separate raw and struct variants. The raw @@ -894,56 +894,68 @@ def int_amdgcn_buffer_store : AMDGPUBufferStore; // and swizzling changes depending on whether idxen is set in the instruction. // These new instrinsics also keep the offset and soffset arguments separate as // they behave differently in bounds checking and swizzling. -class AMDGPURawBufferLoad : Intrinsic < - [llvm_any_ty], +class AMDGPURawBufferLoad : Intrinsic < + [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; +def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; -class AMDGPUStructBufferLoad : Intrinsic < - [llvm_any_ty], +class AMDGPUStructBufferLoad : Intrinsic < + [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; +def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; -class AMDGPURawBufferStore : Intrinsic < +class AMDGPURawBufferStore : Intrinsic < [], - [llvm_any_ty, // vdata(VGPR) + [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; +def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; -class AMDGPUStructBufferStore : Intrinsic < +class AMDGPUStructBufferStore : Intrinsic < [], - [llvm_any_ty, // vdata(VGPR) + [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; +def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; -class AMDGPURawBufferAtomic : Intrinsic < - [llvm_anyint_ty], +class AMDGPURawBufferAtomic : Intrinsic < + [data_ty], [LLVMMatchType<0>, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) @@ -961,6 +973,8 @@ def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic; +def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic; +def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< [llvm_anyint_ty], [LLVMMatchType<0>, // src(VGPR) @@ -972,8 +986,8 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< [ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; -class AMDGPUStructBufferAtomic : Intrinsic < - [llvm_anyint_ty], +class AMDGPUStructBufferAtomic : Intrinsic < + [data_ty], [LLVMMatchType<0>, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) @@ -992,6 +1006,8 @@ def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic; +def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic; +def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic< [llvm_anyint_ty], [LLVMMatchType<0>, // src(VGPR) @@ -1046,7 +1062,10 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1057,7 +1076,10 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1068,7 +1090,10 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1080,7 +1105,10 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1431,6 +1459,13 @@ def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +// Copies the source value to the destination value, such that the source +// is computed as if the entire program were executed in WQM if any other +// program code executes in WQM. +def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] +>; + // Return true if at least one thread within the pixel quad passes true into // the function. def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], @@ -1459,6 +1494,18 @@ def int_amdgcn_set_inactive : LLVMMatchType<0>], // value for the inactive lanes to take [IntrNoMem, IntrConvergent]>; +// Return if the given flat pointer points to a local memory address. +def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">, + Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture<0>] +>; + +// Return if the given flat pointer points to a prvate memory address. +def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">, + Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture<0>] +>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index 4792af097d95..e13da6157e04 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -777,5 +777,14 @@ class Neon_Dot_Intrinsic def int_arm_neon_udot : Neon_Dot_Intrinsic; def int_arm_neon_sdot : Neon_Dot_Intrinsic; +def int_arm_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>; + +// GNU eabi mcount +def int_arm_gnu_eabi_mcount : Intrinsic<[], + [], + [IntrReadMem, IntrWriteMem]>; } // end TargetPrefix diff --git a/include/llvm/IR/IntrinsicsBPF.td b/include/llvm/IR/IntrinsicsBPF.td index d7595a2a7700..3618cc6a4128 100644 --- a/include/llvm/IR/IntrinsicsBPF.td +++ b/include/llvm/IR/IntrinsicsBPF.td @@ -20,4 +20,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; def int_bpf_pseudo : GCCBuiltin<"__builtin_bpf_pseudo">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>; + def int_bpf_preserve_field_info : GCCBuiltin<"__builtin_bpf_preserve_field_info">, + Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<1>]>; } diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td index 6393a9ca35d5..bfcdd80a52d5 100644 --- a/include/llvm/IR/IntrinsicsMips.td +++ b/include/llvm/IR/IntrinsicsMips.td @@ -1260,16 +1260,16 @@ def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">, def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly]>; def int_mips_ld_h : GCCBuiltin<"__builtin_msa_ld_h">, Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly]>; def int_mips_ld_w : GCCBuiltin<"__builtin_msa_ld_w">, Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly]>; def int_mips_ld_d : GCCBuiltin<"__builtin_msa_ld_d">, Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly]>; def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; @@ -1684,16 +1684,16 @@ def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">, def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">, Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly]>; def int_mips_st_h : GCCBuiltin<"__builtin_msa_st_h">, Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly]>; def int_mips_st_w : GCCBuiltin<"__builtin_msa_st_w">, Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly]>; def int_mips_st_d : GCCBuiltin<"__builtin_msa_st_d">, Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly]>; def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index dba7dd76c4ff..0483d965ba64 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -276,6 +276,26 @@ class NVVM_MMA_SUPPORTED frags, string layout_a, string layout_b ); } +class SHFL_INFO { + string Suffix = !if(sync, "sync_", "") + # mode # "_" + # type + # !if(return_pred, "p", ""); + + string Name = "int_nvvm_shfl_" # Suffix; + string Builtin = "__nvvm_shfl_" # Suffix; + string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix); + list withGccBuiltin = !if(return_pred, [], [1]); + list withoutGccBuiltin = !if(return_pred, [1], []); + LLVMType OpType = !cond( + !eq(type,"i32"): llvm_i32_ty, + !eq(type,"f32"): llvm_float_ty); + list RetTy = !if(return_pred, [OpType, llvm_i1_ty], [OpType]); + list ArgsTy = !if(sync, + [llvm_i32_ty, OpType, llvm_i32_ty, llvm_i32_ty], + [OpType, llvm_i32_ty, llvm_i32_ty]); +} + let TargetPrefix = "nvvm" in { def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], @@ -3955,90 +3975,27 @@ def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">; // // SHUFFLE // - -// shfl.down.b32 dest, val, offset, mask_and_clamp -def int_nvvm_shfl_down_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.down.i32">, - GCCBuiltin<"__nvvm_shfl_down_i32">; -def int_nvvm_shfl_down_f32 : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.down.f32">, - GCCBuiltin<"__nvvm_shfl_down_f32">; - -// shfl.up.b32 dest, val, offset, mask_and_clamp -def int_nvvm_shfl_up_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.up.i32">, - GCCBuiltin<"__nvvm_shfl_up_i32">; -def int_nvvm_shfl_up_f32 : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.up.f32">, - GCCBuiltin<"__nvvm_shfl_up_f32">; - -// shfl.bfly.b32 dest, val, offset, mask_and_clamp -def int_nvvm_shfl_bfly_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.bfly.i32">, - GCCBuiltin<"__nvvm_shfl_bfly_i32">; -def int_nvvm_shfl_bfly_f32 : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.bfly.f32">, - GCCBuiltin<"__nvvm_shfl_bfly_f32">; - -// shfl.idx.b32 dest, val, lane, mask_and_clamp -def int_nvvm_shfl_idx_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.idx.i32">, - GCCBuiltin<"__nvvm_shfl_idx_i32">; -def int_nvvm_shfl_idx_f32 : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.idx.f32">, - GCCBuiltin<"__nvvm_shfl_idx_f32">; - -// Synchronizing shfl variants available in CUDA-9. -// On sm_70 these don't have to be convergent, so we may eventually want to -// implement non-convergent variant of this intrinsic. - -// shfl.sync.down.b32 dest, threadmask, val, offset , mask_and_clamp -def int_nvvm_shfl_sync_down_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.down.i32">, - GCCBuiltin<"__nvvm_shfl_sync_down_i32">; -def int_nvvm_shfl_sync_down_f32 : - Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.down.f32">, - GCCBuiltin<"__nvvm_shfl_sync_down_f32">; - -// shfl.sync.up.b32 dest, threadmask, val, offset, mask_and_clamp -def int_nvvm_shfl_sync_up_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.up.i32">, - GCCBuiltin<"__nvvm_shfl_sync_up_i32">; -def int_nvvm_shfl_sync_up_f32 : - Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.up.f32">, - GCCBuiltin<"__nvvm_shfl_sync_up_f32">; - -// shfl.sync.bfly.b32 dest, threadmask, val, offset, mask_and_clamp -def int_nvvm_shfl_sync_bfly_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.bfly.i32">, - GCCBuiltin<"__nvvm_shfl_sync_bfly_i32">; -def int_nvvm_shfl_sync_bfly_f32 : - Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.bfly.f32">, - GCCBuiltin<"__nvvm_shfl_sync_bfly_f32">; - -// shfl.sync.idx.b32 dest, threadmask, val, lane, mask_and_clamp -def int_nvvm_shfl_sync_idx_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.idx.i32">, - GCCBuiltin<"__nvvm_shfl_sync_idx_i32">; -def int_nvvm_shfl_sync_idx_f32 : - Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.idx.f32">, - GCCBuiltin<"__nvvm_shfl_sync_idx_f32">; +// Generate intrinsics for all variants of shfl instruction. +foreach sync = [0, 1] in { + foreach mode = ["up", "down", "bfly", "idx"] in { + foreach type = ["i32", "f32"] in { + foreach return_pred = [0, 1] in { + foreach i = [SHFL_INFO] in { + foreach _ = i.withGccBuiltin in { + def i.Name : GCCBuiltin, + Intrinsic; + } + foreach _ = i.withoutGccBuiltin in { + def i.Name : Intrinsic; + } + } + } + } + } +} // // VOTE diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td index 1b892727547d..810979b99934 100644 --- a/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/include/llvm/IR/IntrinsicsWebAssembly.td @@ -23,6 +23,17 @@ def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, LLVMMatchType<0>], []>; +//===----------------------------------------------------------------------===// +// Trapping float-to-int conversions +//===----------------------------------------------------------------------===// + +def int_wasm_trunc_signed : Intrinsic<[llvm_anyint_ty], + [llvm_anyfloat_ty], + [IntrNoMem]>; +def int_wasm_trunc_unsigned : Intrinsic<[llvm_anyint_ty], + [llvm_anyfloat_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Saturating float-to-int conversions //===----------------------------------------------------------------------===// @@ -89,6 +100,10 @@ def int_wasm_atomic_notify: // SIMD intrinsics //===----------------------------------------------------------------------===// +def int_wasm_swizzle : + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_sub_saturate_signed : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], @@ -109,6 +124,39 @@ def int_wasm_alltrue : Intrinsic<[llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_qfma : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_qfms : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_narrow_signed : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_narrow_unsigned : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_widen_low_signed : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_widen_high_signed : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_widen_low_unsigned : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_widen_high_unsigned : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Bulk memory intrinsics @@ -133,4 +181,14 @@ def int_wasm_tls_size : [], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_tls_align : + Intrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_tls_base : + Intrinsic<[llvm_ptr_ty], + [], + [IntrReadMem]>; + } // TargetPrefix = "wasm" diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 236d312d7d78..5796686dd79f 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2091,16 +2091,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_ptr_ty], [], []>; def int_x86_lwpins32 : GCCBuiltin<"__builtin_ia32_lwpins32">, - Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg<2>]>; def int_x86_lwpins64 : GCCBuiltin<"__builtin_ia32_lwpins64">, - Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg<2>]>; def int_x86_lwpval32 : GCCBuiltin<"__builtin_ia32_lwpval32">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg<2>]>; def int_x86_lwpval64 : GCCBuiltin<"__builtin_ia32_lwpval64">, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg<2>]>; } //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index c80504500418..91bd57dc5ac0 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -72,34 +72,9 @@ public: // Pinned metadata names, which always have the same value. This is a // compile-time performance optimization, not a correctness optimization. enum : unsigned { - MD_dbg = 0, // "dbg" - MD_tbaa = 1, // "tbaa" - MD_prof = 2, // "prof" - MD_fpmath = 3, // "fpmath" - MD_range = 4, // "range" - MD_tbaa_struct = 5, // "tbaa.struct" - MD_invariant_load = 6, // "invariant.load" - MD_alias_scope = 7, // "alias.scope" - MD_noalias = 8, // "noalias", - MD_nontemporal = 9, // "nontemporal" - MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access" - MD_nonnull = 11, // "nonnull" - MD_dereferenceable = 12, // "dereferenceable" - MD_dereferenceable_or_null = 13, // "dereferenceable_or_null" - MD_make_implicit = 14, // "make.implicit" - MD_unpredictable = 15, // "unpredictable" - MD_invariant_group = 16, // "invariant.group" - MD_align = 17, // "align" - MD_loop = 18, // "llvm.loop" - MD_type = 19, // "type" - MD_section_prefix = 20, // "section_prefix" - MD_absolute_symbol = 21, // "absolute_symbol" - MD_associated = 22, // "associated" - MD_callees = 23, // "callees" - MD_irr_loop = 24, // "irr_loop" - MD_access_group = 25, // "llvm.access.group" - MD_callback = 26, // "callback" - MD_preserve_access_index = 27, // "llvm.preserve.*.access.index" +#define LLVM_FIXED_MD_KIND(EnumID, Name, Value) EnumID = Value, +#include "llvm/IR/FixedMetadataKinds.def" +#undef LLVM_FIXED_MD_KIND }; /// Known operand bundle tag IDs, which always have the same value. All diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h index 3a2b1bddf45d..11e2e2623257 100644 --- a/include/llvm/IR/MDBuilder.h +++ b/include/llvm/IR/MDBuilder.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/DataTypes.h" #include @@ -75,6 +76,10 @@ public: /// Return metadata containing the section prefix for a function. MDNode *createFunctionSectionPrefix(StringRef Prefix); + /// return metadata containing expected value + MDNode *createMisExpect(uint64_t Index, uint64_t LikelyWeight, + uint64_t UnlikelyWeight); + //===------------------------------------------------------------------===// // Range metadata. //===------------------------------------------------------------------===// diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index 7ca2540181ba..f62b1e246cca 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -601,7 +601,7 @@ dyn_extract_or_null(Y &&MD) { /// These are used to efficiently contain a byte sequence for metadata. /// MDString is always unnamed. class MDString : public Metadata { - friend class StringMapEntry; + friend class StringMapEntryStorage; StringMapEntry *Entry = nullptr; @@ -806,7 +806,7 @@ public: /// Ensure that this has RAUW support, and then return it. ReplaceableMetadataImpl *getOrCreateReplaceableUses() { if (!hasReplaceableUses()) - makeReplaceable(llvm::make_unique(getContext())); + makeReplaceable(std::make_unique(getContext())); return getReplaceableUses(); } diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index f458680cfe15..59331142766a 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -46,6 +46,7 @@ class FunctionType; class GVMaterializer; class LLVMContext; class MemoryBuffer; +class Pass; class RandomNumberGenerator; template class SmallPtrSetImpl; class StructType; diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index aacf8cfc089f..be60447abd87 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -119,7 +119,7 @@ class GlobalValueSummary; using GlobalValueSummaryList = std::vector>; -struct LLVM_ALIGNAS(8) GlobalValueSummaryInfo { +struct alignas(8) GlobalValueSummaryInfo { union NameOrGV { NameOrGV(bool HaveGVs) { if (HaveGVs) @@ -603,7 +603,7 @@ public: if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() || !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() || !TypeCheckedLoadConstVCalls.empty()) - TIdInfo = llvm::make_unique(TypeIdInfo{ + TIdInfo = std::make_unique(TypeIdInfo{ std::move(TypeTests), std::move(TypeTestAssumeVCalls), std::move(TypeCheckedLoadVCalls), std::move(TypeTestAssumeConstVCalls), @@ -632,6 +632,8 @@ public: /// Return the list of pairs. ArrayRef calls() const { return CallGraphEdgeList; } + void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); } + /// Returns the list of type identifiers used by this function in /// llvm.type.test intrinsics other than by an llvm.assume intrinsic, /// represented as GUIDs. @@ -680,7 +682,7 @@ public: /// were unable to devirtualize a checked call. void addTypeTest(GlobalValue::GUID Guid) { if (!TIdInfo) - TIdInfo = llvm::make_unique(); + TIdInfo = std::make_unique(); TIdInfo->TypeTests.push_back(Guid); } @@ -780,7 +782,7 @@ public: void setVTableFuncs(VTableFuncList Funcs) { assert(!VTableFuncs); - VTableFuncs = llvm::make_unique(std::move(Funcs)); + VTableFuncs = std::make_unique(std::move(Funcs)); } ArrayRef vTableFuncs() const { @@ -1293,6 +1295,12 @@ public: return nullptr; } + TypeIdSummary *getTypeIdSummary(StringRef TypeId) { + return const_cast( + static_cast(this)->getTypeIdSummary( + TypeId)); + } + const std::map & typeIdCompatibleVtableMap() const { return TypeIdCompatibleVtableMap; @@ -1411,7 +1419,7 @@ template <> struct GraphTraits : public GraphTraits { static NodeRef getEntryNode(ModuleSummaryIndex *I) { std::unique_ptr Root = - make_unique(I->calculateCallGraphRoot()); + std::make_unique(I->calculateCallGraphRoot()); GlobalValueSummaryInfo G(I->haveGVs()); G.SummaryList.push_back(std::move(Root)); static auto P = diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h index 26d9c43fabf1..4d4a67c75172 100644 --- a/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -220,7 +220,7 @@ template <> struct CustomMappingTraits { V.emplace(RefGUID, /*IsAnalysis=*/false); Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*V.find(RefGUID))); } - Elem.SummaryList.push_back(llvm::make_unique( + Elem.SummaryList.push_back(std::make_unique( GlobalValueSummary::GVFlags( static_cast(FSum.Linkage), FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, FSum.CanAutoHide), diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h index 8199c65ca8a0..037f5aed03ee 100644 --- a/include/llvm/IR/Operator.h +++ b/include/llvm/IR/Operator.h @@ -379,16 +379,25 @@ public: return false; switch (Opcode) { + case Instruction::FNeg: + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp + // should not be treated as a math op, but the other opcodes should. + // This would make things consistent with Select/PHI (FP value type + // determines whether they are math ops and, therefore, capable of + // having fast-math-flags). case Instruction::FCmp: return true; - // non math FP Operators (no FMF) - case Instruction::ExtractElement: - case Instruction::ShuffleVector: - case Instruction::InsertElement: case Instruction::PHI: - return false; - default: + case Instruction::Select: + case Instruction::Call: return V->getType()->isFPOrFPVectorTy(); + default: + return false; } } }; diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h index 37fe2a5b01ad..1e1f4a92f844 100644 --- a/include/llvm/IR/PassManager.h +++ b/include/llvm/IR/PassManager.h @@ -45,6 +45,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManagerInternal.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TypeName.h" #include "llvm/Support/raw_ostream.h" @@ -418,7 +419,7 @@ template Args, - llvm::index_sequence) { + std::index_sequence) { (void)Args; return AM.template getResult(IR, std::get(Args)...); } @@ -435,7 +436,7 @@ getAnalysisResult(AnalysisManager &AM, IRUnitT &IR, std::tuple Args) { return (getAnalysisResultUnpackTuple< PassT, IRUnitT>)(AM, IR, Args, - llvm::index_sequence_for{}); + std::index_sequence_for{}); } } // namespace detail diff --git a/include/llvm/IR/PassManagerInternal.h b/include/llvm/IR/PassManagerInternal.h index 58198bf67b11..c602c0b5cc20 100644 --- a/include/llvm/IR/PassManagerInternal.h +++ b/include/llvm/IR/PassManagerInternal.h @@ -289,7 +289,7 @@ struct AnalysisPassModel : AnalysisPassConcept> run(IRUnitT &IR, AnalysisManager &AM, ExtraArgTs... ExtraArgs) override { - return llvm::make_unique( + return std::make_unique( Pass.run(IR, AM, std::forward(ExtraArgs)...)); } diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 0f03d7cc56b8..2851b24c05ae 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -88,6 +88,25 @@ inline class_match m_Undef() { return class_match(); } /// Match an arbitrary Constant and ignore it. inline class_match m_Constant() { return class_match(); } +/// Match an arbitrary basic block value and ignore it. +inline class_match m_BasicBlock() { + return class_match(); +} + +/// Inverting matcher +template struct match_unless { + Ty M; + + match_unless(const Ty &Matcher) : M(Matcher) {} + + template bool match(ITy *V) { return !M.match(V); } +}; + +/// Match if the inner matcher does *NOT* match. +template inline match_unless m_Unless(const Ty &M) { + return match_unless(M); +} + /// Matching combinators template struct match_combine_or { LTy L; @@ -300,6 +319,15 @@ template struct cstfp_pred_ty : public Predicate { // /////////////////////////////////////////////////////////////////////////////// +struct is_any_apint { + bool isValue(const APInt &C) { return true; } +}; +/// Match an integer or vector with any integral constant. +/// For vectors, this includes constants with undefined elements. +inline cst_pred_ty m_AnyIntegralConstant() { + return cst_pred_ty(); +} + struct is_all_ones { bool isValue(const APInt &C) { return C.isAllOnesValue(); } }; @@ -388,6 +416,18 @@ inline api_pred_ty m_Power2(const APInt *&V) { return V; } +struct is_negated_power2 { + bool isValue(const APInt &C) { return (-C).isPowerOf2(); } +}; +/// Match a integer or vector negated power-of-2. +/// For vectors, this includes constants with undefined elements. +inline cst_pred_ty m_NegatedPower2() { + return cst_pred_ty(); +} +inline api_pred_ty m_NegatedPower2(const APInt *&V) { + return V; +} + struct is_power2_or_zero { bool isValue(const APInt &C) { return !C || C.isPowerOf2(); } }; @@ -528,6 +568,12 @@ inline bind_ty m_Constant(Constant *&C) { return C; } /// Match a ConstantFP, capturing the value if we match. inline bind_ty m_ConstantFP(ConstantFP *&C) { return C; } +/// Match a basic block value, capturing it if we match. +inline bind_ty m_BasicBlock(BasicBlock *&V) { return V; } +inline bind_ty m_BasicBlock(const BasicBlock *&V) { + return V; +} + /// Match a specified Value*. struct specificval_ty { const Value *Val; @@ -597,11 +643,11 @@ struct bind_const_intval_ty { }; /// Match a specified integer value or vector of all elements of that -// value. +/// value. struct specific_intval { - uint64_t Val; + APInt Val; - specific_intval(uint64_t V) : Val(V) {} + specific_intval(APInt V) : Val(std::move(V)) {} template bool match(ITy *V) { const auto *CI = dyn_cast(V); @@ -609,18 +655,50 @@ struct specific_intval { if (const auto *C = dyn_cast(V)) CI = dyn_cast_or_null(C->getSplatValue()); - return CI && CI->getValue() == Val; + return CI && APInt::isSameValue(CI->getValue(), Val); } }; /// Match a specific integer value or vector with all elements equal to /// the value. -inline specific_intval m_SpecificInt(uint64_t V) { return specific_intval(V); } +inline specific_intval m_SpecificInt(APInt V) { + return specific_intval(std::move(V)); +} + +inline specific_intval m_SpecificInt(uint64_t V) { + return m_SpecificInt(APInt(64, V)); +} /// Match a ConstantInt and bind to its value. This does not match /// ConstantInts wider than 64-bits. inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } +/// Match a specified basic block value. +struct specific_bbval { + BasicBlock *Val; + + specific_bbval(BasicBlock *Val) : Val(Val) {} + + template bool match(ITy *V) { + const auto *BB = dyn_cast(V); + return BB && BB == Val; + } +}; + +/// Match a specific basic block value. +inline specific_bbval m_SpecificBB(BasicBlock *BB) { + return specific_bbval(BB); +} + +/// A commutative-friendly version of m_Specific(). +inline deferredval_ty m_Deferred(BasicBlock *const &BB) { + return BB; +} +inline deferredval_ty +m_Deferred(const BasicBlock *const &BB) { + return BB; +} + //===----------------------------------------------------------------------===// // Matcher for any binary operator. // @@ -968,6 +1046,12 @@ struct is_idiv_op { } }; +struct is_irem_op { + bool isOpType(unsigned Opcode) { + return Opcode == Instruction::SRem || Opcode == Instruction::URem; + } +}; + /// Matches shift operations. template inline BinOpPred_match m_Shift(const LHS &L, @@ -1003,6 +1087,13 @@ inline BinOpPred_match m_IDiv(const LHS &L, return BinOpPred_match(L, R); } +/// Matches integer remainder operations. +template +inline BinOpPred_match m_IRem(const LHS &L, + const RHS &R) { + return BinOpPred_match(L, R); +} + //===----------------------------------------------------------------------===// // Class that matches exact binary ops. // @@ -1210,6 +1301,12 @@ inline CastClass_match m_Trunc(const OpTy &Op) { return CastClass_match(Op); } +template +inline match_combine_or, OpTy> +m_TruncOrSelf(const OpTy &Op) { + return m_CombineOr(m_Trunc(Op), Op); +} + /// Matches SExt. template inline CastClass_match m_SExt(const OpTy &Op) { @@ -1222,6 +1319,18 @@ inline CastClass_match m_ZExt(const OpTy &Op) { return CastClass_match(Op); } +template +inline match_combine_or, OpTy> +m_ZExtOrSelf(const OpTy &Op) { + return m_CombineOr(m_ZExt(Op), Op); +} + +template +inline match_combine_or, OpTy> +m_SExtOrSelf(const OpTy &Op) { + return m_CombineOr(m_SExt(Op), Op); +} + template inline match_combine_or, CastClass_match> @@ -1229,6 +1338,15 @@ m_ZExtOrSExt(const OpTy &Op) { return m_CombineOr(m_ZExt(Op), m_SExt(Op)); } +template +inline match_combine_or< + match_combine_or, + CastClass_match>, + OpTy> +m_ZExtOrSExtOrSelf(const OpTy &Op) { + return m_CombineOr(m_ZExtOrSExt(Op), Op); +} + /// Matches UIToFP. template inline CastClass_match m_UIToFP(const OpTy &Op) { @@ -1274,27 +1392,34 @@ struct br_match { inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); } -template struct brc_match { +template +struct brc_match { Cond_t Cond; - BasicBlock *&T, *&F; + TrueBlock_t T; + FalseBlock_t F; - brc_match(const Cond_t &C, BasicBlock *&t, BasicBlock *&f) + brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f) : Cond(C), T(t), F(f) {} template bool match(OpTy *V) { if (auto *BI = dyn_cast(V)) - if (BI->isConditional() && Cond.match(BI->getCondition())) { - T = BI->getSuccessor(0); - F = BI->getSuccessor(1); - return true; - } + if (BI->isConditional() && Cond.match(BI->getCondition())) + return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1)); return false; } }; template -inline brc_match m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { - return brc_match(C, T, F); +inline brc_match, bind_ty> +m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { + return brc_match, bind_ty>( + C, m_BasicBlock(T), m_BasicBlock(F)); +} + +template +inline brc_match +m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) { + return brc_match(C, T, F); } //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/RemarkStreamer.h b/include/llvm/IR/RemarkStreamer.h index f34cc660b2fb..2abf6f99cb08 100644 --- a/include/llvm/IR/RemarkStreamer.h +++ b/include/llvm/IR/RemarkStreamer.h @@ -25,12 +25,12 @@ namespace llvm { /// Streamer for remarks. class RemarkStreamer { - /// The filename that the remark diagnostics are emitted to. - const std::string Filename; /// The regex used to filter remarks based on the passes that emit them. Optional PassFilter; /// The object used to serialize the remarks to a specific format. - std::unique_ptr Serializer; + std::unique_ptr RemarkSerializer; + /// The filename that the remark diagnostics are emitted to. + const Optional Filename; /// Convert diagnostics into remark objects. /// The lifetime of the members of the result is bound to the lifetime of @@ -38,14 +38,16 @@ class RemarkStreamer { remarks::Remark toRemark(const DiagnosticInfoOptimizationBase &Diag); public: - RemarkStreamer(StringRef Filename, - std::unique_ptr Serializer); + RemarkStreamer(std::unique_ptr RemarkSerializer, + Optional Filename = None); /// Return the filename that the remark diagnostics are emitted to. - StringRef getFilename() const { return Filename; } + Optional getFilename() const { + return Filename ? Optional(*Filename) : None; + } /// Return stream that the remark diagnostics are emitted to. - raw_ostream &getStream() { return Serializer->OS; } + raw_ostream &getStream() { return RemarkSerializer->OS; } /// Return the serializer used for this stream. - remarks::Serializer &getSerializer() { return *Serializer; } + remarks::RemarkSerializer &getSerializer() { return *RemarkSerializer; } /// Set a pass filter based on a regex \p Filter. /// Returns an error if the regex is invalid. Error setFilter(StringRef Filter); @@ -84,13 +86,21 @@ struct RemarkSetupFormatError : RemarkSetupErrorInfo { using RemarkSetupErrorInfo::RemarkSetupErrorInfo; }; -/// Setup optimization remarks. +/// Setup optimization remarks that output to a file. Expected> setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, StringRef RemarksFormat, bool RemarksWithHotness, unsigned RemarksHotnessThreshold = 0); +/// Setup optimization remarks that output directly to a raw_ostream. +/// \p OS is managed by the caller and should be open for writing as long as \p +/// Context is streaming remarks to it. +Error setupOptimizationRemarks(LLVMContext &Context, raw_ostream &OS, + StringRef RemarksPasses, StringRef RemarksFormat, + bool RemarksWithHotness, + unsigned RemarksHotnessThreshold = 0); + } // end namespace llvm #endif // LLVM_IR_REMARKSTREAMER_H diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h index f2aa49030aaa..d0961dac833d 100644 --- a/include/llvm/IR/Type.h +++ b/include/llvm/IR/Type.h @@ -21,6 +21,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -281,12 +282,15 @@ public: /// This will return zero if the type does not have a size or is not a /// primitive type. /// + /// If this is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// Note that this may not reflect the size of memory allocated for an /// instance of the type or the number of bytes that are written when an /// instance of the type is stored to memory. The DataLayout class provides /// additional query functions to provide this information. /// - unsigned getPrimitiveSizeInBits() const LLVM_READONLY; + TypeSize getPrimitiveSizeInBits() const LLVM_READONLY; /// If this is a vector type, return the getPrimitiveSizeInBits value for the /// element type. Otherwise return the getPrimitiveSizeInBits value for this @@ -368,6 +372,7 @@ public: inline bool getVectorIsScalable() const; inline unsigned getVectorNumElements() const; + inline ElementCount getVectorElementCount() const; Type *getVectorElementType() const { assert(getTypeID() == VectorTyID); return ContainedTys[0]; @@ -378,6 +383,14 @@ public: return ContainedTys[0]; } + /// Given an integer or vector type, change the lane bitwidth to NewBitwidth, + /// whilst keeping the old number of lanes. + inline Type *getWithNewBitWidth(unsigned NewBitWidth) const; + + /// Given scalar/vector integer type, returns a type with elements twice as + /// wide as in the original type. For vectors, preserves element count. + inline Type *getExtendedType() const; + /// Get the address space of this pointer or pointer vector type. inline unsigned getPointerAddressSpace() const; diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h index 19d87c5c621d..850ee72a0387 100644 --- a/include/llvm/IR/User.h +++ b/include/llvm/IR/User.h @@ -111,7 +111,7 @@ public: #endif } /// Placement delete - required by std, called if the ctor throws. - void operator delete(void *Usr, unsigned, bool) { + void operator delete(void *Usr, unsigned, unsigned) { // Note: If a subclass manipulates the information which is required to calculate the // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has // to restore the changed information to the original value, since the dtor of that class diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index b2d8e7ac4741..f2c4b3b3f203 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -14,8 +14,10 @@ #define LLVM_IR_VALUE_H #include "llvm-c/Types.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Use.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" #include @@ -292,10 +294,29 @@ public: /// "V" instead of "this". This function skips metadata entries in the list. void replaceNonMetadataUsesWith(Value *V); + /// Go through the uses list for this definition and make each use point + /// to "V" if the callback ShouldReplace returns true for the given Use. + /// Unlike replaceAllUsesWith() this function does not support basic block + /// values or constant users. + void replaceUsesWithIf(Value *New, + llvm::function_ref ShouldReplace) { + assert(New && "Value::replaceUsesWithIf() is invalid!"); + assert(New->getType() == getType() && + "replaceUses of value with new value of different type!"); + + for (use_iterator UI = use_begin(), E = use_end(); UI != E;) { + Use &U = *UI; + ++UI; + if (!ShouldReplace(U)) + continue; + U.set(New); + } + } + /// replaceUsesOutsideBlock - Go through the uses list for this definition and /// make each use point to "V" instead of "this" when the use is outside the /// block. 'This's use list is expected to have at least one element. - /// Unlike replaceAllUsesWith this function does not support basic block + /// Unlike replaceAllUsesWith() this function does not support basic block /// values or constant users. void replaceUsesOutsideBlock(Value *V, BasicBlock *BB); @@ -493,17 +514,27 @@ public: /// swifterror attribute. bool isSwiftError() const; - /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases. + /// Strip off pointer casts, all-zero GEPs and address space casts. /// /// Returns the original uncasted value. If this is called on a non-pointer /// value, it returns 'this'. const Value *stripPointerCasts() const; Value *stripPointerCasts() { return const_cast( - static_cast(this)->stripPointerCasts()); + static_cast(this)->stripPointerCasts()); } - /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases + /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases. + /// + /// Returns the original uncasted value. If this is called on a non-pointer + /// value, it returns 'this'. + const Value *stripPointerCastsAndAliases() const; + Value *stripPointerCastsAndAliases() { + return const_cast( + static_cast(this)->stripPointerCastsAndAliases()); + } + + /// Strip off pointer casts, all-zero GEPs and address space casts /// but ensures the representation of the result stays the same. /// /// Returns the original uncasted value with the same representation. If this @@ -514,26 +545,15 @@ public: ->stripPointerCastsSameRepresentation()); } - /// Strip off pointer casts, all-zero GEPs, aliases and invariant group - /// info. + /// Strip off pointer casts, all-zero GEPs and invariant group info. /// /// Returns the original uncasted value. If this is called on a non-pointer /// value, it returns 'this'. This function should be used only in /// Alias analysis. const Value *stripPointerCastsAndInvariantGroups() const; Value *stripPointerCastsAndInvariantGroups() { - return const_cast( - static_cast(this)->stripPointerCastsAndInvariantGroups()); - } - - /// Strip off pointer casts and all-zero GEPs. - /// - /// Returns the original uncasted value. If this is called on a non-pointer - /// value, it returns 'this'. - const Value *stripPointerCastsNoFollowAliases() const; - Value *stripPointerCastsNoFollowAliases() { - return const_cast( - static_cast(this)->stripPointerCastsNoFollowAliases()); + return const_cast(static_cast(this) + ->stripPointerCastsAndInvariantGroups()); } /// Strip off pointer casts and all-constant inbounds GEPs. @@ -612,7 +632,7 @@ public: /// /// Returns an alignment which is either specified explicitly, e.g. via /// align attribute of a function argument, or guaranteed by DataLayout. - unsigned getPointerAlignment(const DataLayout &DL) const; + MaybeAlign getPointerAlignment(const DataLayout &DL) const; /// Translate PHI node to its predecessor from the given basic block. /// diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h index 6a79b1d387f3..fb5440d5efe8 100644 --- a/include/llvm/IR/ValueMap.h +++ b/include/llvm/IR/ValueMap.h @@ -33,11 +33,11 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/UniqueLock.h" #include #include #include #include +#include #include #include @@ -93,7 +93,6 @@ class ValueMap { MapT Map; Optional MDMap; ExtraData Data; - bool MayMapMetadata = true; public: using key_type = KeyT; @@ -120,10 +119,6 @@ public: } Optional &getMDMap() { return MDMap; } - bool mayMapMetadata() const { return MayMapMetadata; } - void enableMapMetadata() { MayMapMetadata = true; } - void disableMapMetadata() { MayMapMetadata = false; } - /// Get the mapped metadata, if it's in the map. Optional getMappedMD(const Metadata *MD) const { if (!MDMap) @@ -266,9 +261,9 @@ public: // Make a copy that won't get changed even when *this is destroyed. ValueMapCallbackVH Copy(*this); typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data); - unique_lock Guard; + std::unique_lock Guard; if (M) - Guard = unique_lock(*M); + Guard = std::unique_lock(*M); Config::onDelete(Copy.Map->Data, Copy.Unwrap()); // May destroy *this. Copy.Map->Map.erase(Copy); // Definitely destroys *this. } @@ -279,9 +274,9 @@ public: // Make a copy that won't get changed even when *this is destroyed. ValueMapCallbackVH Copy(*this); typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data); - unique_lock Guard; + std::unique_lock Guard; if (M) - Guard = unique_lock(*M); + Guard = std::unique_lock(*M); KeyT typed_new_key = cast(new_key); // Can destroy *this: diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 164d0be2855a..49f69340c828 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -132,6 +132,7 @@ void initializeDwarfEHPreparePass(PassRegistry&); void initializeEarlyCSELegacyPassPass(PassRegistry&); void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry&); void initializeEarlyIfConverterPass(PassRegistry&); +void initializeEarlyIfPredicatorPass(PassRegistry &); void initializeEarlyMachineLICMPass(PassRegistry&); void initializeEarlyTailDuplicatePass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); @@ -202,6 +203,7 @@ void initializeLegacyLICMPassPass(PassRegistry&); void initializeLegacyLoopSinkPassPass(PassRegistry&); void initializeLegalizerPass(PassRegistry&); void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &); +void initializeGISelKnownBitsAnalysisPass(PassRegistry &); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); void initializeLintPass(PassRegistry&); void initializeLiveDebugValuesPass(PassRegistry&); @@ -241,6 +243,7 @@ void initializeLoopVectorizePass(PassRegistry&); void initializeLoopVersioningLICMPass(PassRegistry&); void initializeLoopVersioningPassPass(PassRegistry&); void initializeLowerAtomicLegacyPassPass(PassRegistry&); +void initializeLowerConstantIntrinsicsPass(PassRegistry&); void initializeLowerEmuTLSPass(PassRegistry&); void initializeLowerExpectIntrinsicPass(PassRegistry&); void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&); @@ -250,6 +253,7 @@ void initializeLowerInvokeLegacyPassPass(PassRegistry&); void initializeLowerSwitchPass(PassRegistry&); void initializeLowerTypeTestsPass(PassRegistry&); void initializeMIRCanonicalizerPass(PassRegistry &); +void initializeMIRNamerPass(PassRegistry &); void initializeMIRPrintingPassPass(PassRegistry&); void initializeMachineBlockFrequencyInfoPass(PassRegistry&); void initializeMachineBlockPlacementPass(PassRegistry&); @@ -263,7 +267,7 @@ void initializeMachineDominatorTreePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); void initializeMachineLICMPass(PassRegistry&); void initializeMachineLoopInfoPass(PassRegistry&); -void initializeMachineModuleInfoPass(PassRegistry&); +void initializeMachineModuleInfoWrapperPassPass(PassRegistry &); void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry&); void initializeMachineOutlinerPass(PassRegistry&); void initializeMachinePipelinerPass(PassRegistry&); @@ -286,7 +290,9 @@ void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&); void initializeMetaRenamerPass(PassRegistry&); void initializeModuleDebugInfoPrinterPass(PassRegistry&); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); +void initializeModuloScheduleTestPass(PassRegistry&); void initializeMustExecutePrinterPass(PassRegistry&); +void initializeMustBeExecutedContextPrinterPass(PassRegistry&); void initializeNameAnonGlobalLegacyPassPass(PassRegistry&); void initializeNaryReassociateLegacyPassPass(PassRegistry&); void initializeNewGVNLegacyPassPass(PassRegistry&); @@ -360,7 +366,7 @@ void initializeSROALegacyPassPass(PassRegistry&); void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSafepointIRVerifierPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); -void initializeSanitizerCoverageModulePass(PassRegistry&); +void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); void initializeScalarizerLegacyPassPass(PassRegistry&); diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h index fb107e3fbe02..daa6585b1113 100644 --- a/include/llvm/LTO/Config.h +++ b/include/llvm/LTO/Config.h @@ -226,7 +226,7 @@ struct LTOLLVMContext : LLVMContext { setDiscardValueNames(C.ShouldDiscardValueNames); enableDebugTypeODRUniquing(); setDiagnosticHandler( - llvm::make_unique(&DiagHandler), true); + std::make_unique(&DiagHandler), true); } DiagnosticHandlerFunction DiagHandler; }; diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h index ca0a8b64523a..0a1e3e1d0e42 100644 --- a/include/llvm/LTO/LTO.h +++ b/include/llvm/LTO/LTO.h @@ -59,7 +59,9 @@ void thinLTOResolvePrevailingInIndex( /// must apply the changes to the Module via thinLTOInternalizeModule. void thinLTOInternalizeAndPromoteInIndex( ModuleSummaryIndex &Index, - function_ref isExported); + function_ref isExported, + function_ref + isPrevailing); /// Computes a unique hash for the Module considering the current list of /// export/import and other global analysis results. @@ -296,6 +298,10 @@ public: /// Cache) for each task identifier. Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr); + /// Static method that returns a list of libcall symbols that can be generated + /// by LTO but might not be visible from bitcode symbol table. + static ArrayRef getRuntimeLibcallSymbols(); + private: Config Conf; @@ -303,7 +309,7 @@ private: RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf); struct CommonResolution { uint64_t Size = 0; - unsigned Align = 0; + MaybeAlign Align; /// Record if at least one instance of the common was marked as prevailing bool Prevailing = false; }; diff --git a/include/llvm/LTO/legacy/LTOCodeGenerator.h b/include/llvm/LTO/legacy/LTOCodeGenerator.h index d3cb4c8b79a0..8718df4b88e6 100644 --- a/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -113,7 +113,7 @@ struct LTOCodeGenerator { ShouldRestoreGlobalsLinkage = Value; } - void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols[Sym] = 1; } + void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols.insert(Sym); } /// Pass options to the driver and optimization passes. /// diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 675d179eb22a..ac88165845d3 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -140,6 +140,7 @@ namespace { (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLoopIdiomPass(); (void) llvm::createLoopRotatePass(); + (void) llvm::createLowerConstantIntrinsicsPass(); (void) llvm::createLowerExpectIntrinsicPass(); (void) llvm::createLowerInvokePass(); (void) llvm::createLowerSwitchPass(); @@ -219,6 +220,7 @@ namespace { (void) llvm::createStraightLineStrengthReducePass(); (void) llvm::createMemDerefPrinter(); (void) llvm::createMustExecutePrinter(); + (void) llvm::createMustBeExecutedContextPrinter(); (void) llvm::createFloat2IntPass(); (void) llvm::createEliminateAvailableExternallyPass(); (void) llvm::createScalarizeMaskedMemIntrinPass(); diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 971e9354da8c..3261c483e0d8 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -165,6 +165,10 @@ protected: /// instead. bool UseDataRegionDirectives = false; + /// True if .align is to be used for alignment. Only power-of-two + /// alignment is supported. + bool UseDotAlignForAlignment = false; + //===--- Data Emission Directives -------------------------------------===// /// This should be set to the directive used to get some number of zero bytes @@ -313,6 +317,10 @@ protected: /// Defaults to false. bool HasLinkOnceDirective = false; + /// True if we have a .lglobl directive, which is used to emit the information + /// of a static symbol into the symbol table. Defaults to false. + bool HasDotLGloblDirective = false; + /// This attribute, if not MCSA_Invalid, is used to declare a symbol as having /// hidden visibility. Defaults to MCSA_Hidden. MCSymbolAttr HiddenVisibilityAttr = MCSA_Hidden; @@ -388,6 +396,9 @@ protected: // %hi(), and similar unary operators. bool HasMipsExpressions = false; + // If true, emit function descriptor symbol on AIX. + bool NeedsFunctionDescriptors = false; + public: explicit MCAsmInfo(); virtual ~MCAsmInfo(); @@ -520,6 +531,10 @@ public: return UseDataRegionDirectives; } + bool useDotAlignForAlignment() const { + return UseDotAlignForAlignment; + } + const char *getZeroDirective() const { return ZeroDirective; } const char *getAsciiDirective() const { return AsciiDirective; } const char *getAscizDirective() const { return AscizDirective; } @@ -557,6 +572,8 @@ public: bool hasLinkOnceDirective() const { return HasLinkOnceDirective; } + bool hasDotLGloblDirective() const { return HasDotLGloblDirective; } + MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr; } MCSymbolAttr getHiddenDeclarationVisibilityAttr() const { @@ -639,6 +656,7 @@ public: bool canRelaxRelocations() const { return RelaxELFRelocations; } void setRelaxELFRelocations(bool V) { RelaxELFRelocations = V; } bool hasMipsExpressions() const { return HasMipsExpressions; } + bool needsFunctionDescriptors() const { return NeedsFunctionDescriptors; } }; } // end namespace llvm diff --git a/include/llvm/MC/MCAsmInfoXCOFF.h b/include/llvm/MC/MCAsmInfoXCOFF.h index 2a72ba7398a7..4a3bacc954e0 100644 --- a/include/llvm/MC/MCAsmInfoXCOFF.h +++ b/include/llvm/MC/MCAsmInfoXCOFF.h @@ -18,6 +18,11 @@ class MCAsmInfoXCOFF : public MCAsmInfo { protected: MCAsmInfoXCOFF(); + +public: + // Return true only when the identifier Name does not need quotes to be + // syntactically correct for XCOFF. + bool isValidUnquotedName(StringRef Name) const override; }; } // end namespace llvm diff --git a/include/llvm/MC/MCAsmMacro.h b/include/llvm/MC/MCAsmMacro.h index 364d3b5f3666..7eecce0faf64 100644 --- a/include/llvm/MC/MCAsmMacro.h +++ b/include/llvm/MC/MCAsmMacro.h @@ -124,7 +124,6 @@ public: } void dump(raw_ostream &OS) const; - void dump() const { dump(dbgs()); } }; struct MCAsmMacroParameter { @@ -133,10 +132,10 @@ struct MCAsmMacroParameter { bool Required = false; bool Vararg = false; - MCAsmMacroParameter() = default; - +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump() const { dump(dbgs()); } - void dump(raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; +#endif }; typedef std::vector MCAsmMacroParameters; @@ -149,8 +148,10 @@ public: MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P) : Name(N), Body(B), Parameters(std::move(P)) {} +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump() const { dump(dbgs()); } - void dump(raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; +#endif }; } // namespace llvm diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 5c2124cc0d15..b925f3218883 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -22,6 +22,7 @@ #include "llvm/MC/MCAsmMacro.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -112,6 +113,9 @@ namespace llvm { /// number of section symbols with the same name). StringMap UsedNames; + /// Keeps track of labels that are used in inline assembly. + SymbolTable InlineAsmUsedLabelNames; + /// The next ID to dole out to an unnamed assembler temporary symbol with /// a given prefix. StringMap NextID; @@ -275,6 +279,8 @@ namespace llvm { /// Do automatic reset in destructor bool AutoReset; + MCTargetOptions const *TargetOptions; + bool HadError = false; MCSymbol *createSymbolImpl(const StringMapEntry *Name, @@ -298,7 +304,9 @@ namespace llvm { public: explicit MCContext(const MCAsmInfo *MAI, const MCRegisterInfo *MRI, const MCObjectFileInfo *MOFI, - const SourceMgr *Mgr = nullptr, bool DoAutoReset = true); + const SourceMgr *Mgr = nullptr, + MCTargetOptions const *TargetOpts = nullptr, + bool DoAutoReset = true); MCContext(const MCContext &) = delete; MCContext &operator=(const MCContext &) = delete; ~MCContext(); @@ -377,6 +385,16 @@ namespace llvm { /// APIs. const SymbolTable &getSymbols() const { return Symbols; } + /// isInlineAsmLabel - Return true if the name is a label referenced in + /// inline assembly. + MCSymbol *getInlineAsmLabel(StringRef Name) const { + return InlineAsmUsedLabelNames.lookup(Name); + } + + /// registerInlineAsmLabel - Records that the name is a label referenced in + /// inline assembly. + void registerInlineAsmLabel(MCSymbol *Sym); + /// @} /// \name Section Management @@ -490,6 +508,8 @@ namespace llvm { MCSectionXCOFF *getXCOFFSection(StringRef Section, XCOFF::StorageMappingClass MappingClass, + XCOFF::SymbolType CSectType, + XCOFF::StorageClass StorageClass, SectionKind K, const char *BeginSymName = nullptr); @@ -659,6 +679,7 @@ namespace llvm { bool hadError() { return HadError; } void reportError(SMLoc L, const Twine &Msg); + void reportWarning(SMLoc L, const Twine &Msg); // Unrecoverable error has occurred. Display the best diagnostic we can // and bail via exit(1). For now, most MC backend errors are unrecoverable. // FIXME: We should really do something about that. diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h index 4029264c2026..ea79e68674e5 100644 --- a/include/llvm/MC/MCDirectives.h +++ b/include/llvm/MC/MCDirectives.h @@ -28,6 +28,7 @@ enum MCSymbolAttr { MCSA_ELF_TypeNoType, ///< .type _foo, STT_NOTYPE # aka @notype MCSA_ELF_TypeGnuUniqueObject, /// .type _foo, @gnu_unique_object MCSA_Global, ///< .globl + MCSA_LGlobal, ///< .lglobl (XCOFF) MCSA_Hidden, ///< .hidden (ELF) MCSA_IndirectSymbol, ///< .indirect_symbol (MachO) MCSA_Internal, ///< .internal (ELF) diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index 1a37aafd0654..a33b4b31bb06 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -629,7 +629,8 @@ public: static void Emit(MCObjectStreamer &streamer, MCAsmBackend *MAB, bool isEH); static void EmitAdvanceLoc(MCObjectStreamer &Streamer, uint64_t AddrDelta); static void EncodeAdvanceLoc(MCContext &Context, uint64_t AddrDelta, - raw_ostream &OS); + raw_ostream &OS, uint32_t *Offset = nullptr, + uint32_t *Size = nullptr); }; } // end namespace llvm diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index fb23c0114c76..eb2786501f84 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -46,10 +46,6 @@ private: ExprKind Kind; SMLoc Loc; - bool evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, - const MCAsmLayout *Layout, - const SectionAddrMap *Addrs) const; - bool evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs, bool InSet) const; @@ -136,7 +132,7 @@ class MCConstantExpr : public MCExpr { int64_t Value; bool PrintInHex = false; - MCConstantExpr(int64_t Value) + explicit MCConstantExpr(int64_t Value) : MCExpr(MCExpr::Constant, SMLoc()), Value(Value) {} MCConstantExpr(int64_t Value, bool PrintInHex) @@ -239,6 +235,8 @@ public: VK_PPC_TOC_LO, // symbol@toc@l VK_PPC_TOC_HI, // symbol@toc@h VK_PPC_TOC_HA, // symbol@toc@ha + VK_PPC_U, // symbol@u + VK_PPC_L, // symbol@l VK_PPC_DTPMOD, // symbol@dtpmod VK_PPC_TPREL_LO, // symbol@tprel@l VK_PPC_TPREL_HI, // symbol@tprel@h diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h index accffb7f2247..29e321e2354c 100644 --- a/include/llvm/MC/MCFixup.h +++ b/include/llvm/MC/MCFixup.h @@ -20,35 +20,38 @@ class MCExpr; /// Extensible enumeration to represent the type of a fixup. enum MCFixupKind { - FK_NONE = 0, ///< A no-op fixup. - FK_Data_1, ///< A one-byte fixup. - FK_Data_2, ///< A two-byte fixup. - FK_Data_4, ///< A four-byte fixup. - FK_Data_8, ///< A eight-byte fixup. - FK_PCRel_1, ///< A one-byte pc relative fixup. - FK_PCRel_2, ///< A two-byte pc relative fixup. - FK_PCRel_4, ///< A four-byte pc relative fixup. - FK_PCRel_8, ///< A eight-byte pc relative fixup. - FK_GPRel_1, ///< A one-byte gp relative fixup. - FK_GPRel_2, ///< A two-byte gp relative fixup. - FK_GPRel_4, ///< A four-byte gp relative fixup. - FK_GPRel_8, ///< A eight-byte gp relative fixup. - FK_DTPRel_4, ///< A four-byte dtp relative fixup. - FK_DTPRel_8, ///< A eight-byte dtp relative fixup. - FK_TPRel_4, ///< A four-byte tp relative fixup. - FK_TPRel_8, ///< A eight-byte tp relative fixup. - FK_SecRel_1, ///< A one-byte section relative fixup. - FK_SecRel_2, ///< A two-byte section relative fixup. - FK_SecRel_4, ///< A four-byte section relative fixup. - FK_SecRel_8, ///< A eight-byte section relative fixup. - FK_Data_Add_1, ///< A one-byte add fixup. - FK_Data_Add_2, ///< A two-byte add fixup. - FK_Data_Add_4, ///< A four-byte add fixup. - FK_Data_Add_8, ///< A eight-byte add fixup. - FK_Data_Sub_1, ///< A one-byte sub fixup. - FK_Data_Sub_2, ///< A two-byte sub fixup. - FK_Data_Sub_4, ///< A four-byte sub fixup. - FK_Data_Sub_8, ///< A eight-byte sub fixup. + FK_NONE = 0, ///< A no-op fixup. + FK_Data_1, ///< A one-byte fixup. + FK_Data_2, ///< A two-byte fixup. + FK_Data_4, ///< A four-byte fixup. + FK_Data_8, ///< A eight-byte fixup. + FK_Data_6b, ///< A six-bits fixup. + FK_PCRel_1, ///< A one-byte pc relative fixup. + FK_PCRel_2, ///< A two-byte pc relative fixup. + FK_PCRel_4, ///< A four-byte pc relative fixup. + FK_PCRel_8, ///< A eight-byte pc relative fixup. + FK_GPRel_1, ///< A one-byte gp relative fixup. + FK_GPRel_2, ///< A two-byte gp relative fixup. + FK_GPRel_4, ///< A four-byte gp relative fixup. + FK_GPRel_8, ///< A eight-byte gp relative fixup. + FK_DTPRel_4, ///< A four-byte dtp relative fixup. + FK_DTPRel_8, ///< A eight-byte dtp relative fixup. + FK_TPRel_4, ///< A four-byte tp relative fixup. + FK_TPRel_8, ///< A eight-byte tp relative fixup. + FK_SecRel_1, ///< A one-byte section relative fixup. + FK_SecRel_2, ///< A two-byte section relative fixup. + FK_SecRel_4, ///< A four-byte section relative fixup. + FK_SecRel_8, ///< A eight-byte section relative fixup. + FK_Data_Add_1, ///< A one-byte add fixup. + FK_Data_Add_2, ///< A two-byte add fixup. + FK_Data_Add_4, ///< A four-byte add fixup. + FK_Data_Add_8, ///< A eight-byte add fixup. + FK_Data_Add_6b, ///< A six-bits add fixup. + FK_Data_Sub_1, ///< A one-byte sub fixup. + FK_Data_Sub_2, ///< A two-byte sub fixup. + FK_Data_Sub_4, ///< A four-byte sub fixup. + FK_Data_Sub_8, ///< A eight-byte sub fixup. + FK_Data_Sub_6b, ///< A six-bits sub fixup. FirstTargetFixupKind = 128, @@ -75,25 +78,25 @@ class MCFixup { /// The value to put into the fixup location. The exact interpretation of the /// expression is target dependent, usually it will be one of the operands to /// an instruction or an assembler directive. - const MCExpr *Value; + const MCExpr *Value = nullptr; /// The byte index of start of the relocation inside the MCFragment. - uint32_t Offset; + uint32_t Offset = 0; /// The target dependent kind of fixup item this is. The kind is used to /// determine how the operand value should be encoded into the instruction. - unsigned Kind; + MCFixupKind Kind = FK_NONE; /// The source location which gave rise to the fixup, if any. SMLoc Loc; public: static MCFixup create(uint32_t Offset, const MCExpr *Value, MCFixupKind Kind, SMLoc Loc = SMLoc()) { - assert(unsigned(Kind) < MaxTargetFixupKind && "Kind out of range!"); + assert(Kind < MaxTargetFixupKind && "Kind out of range!"); MCFixup FI; FI.Value = Value; FI.Offset = Offset; - FI.Kind = unsigned(Kind); + FI.Kind = Kind; FI.Loc = Loc; return FI; } @@ -104,7 +107,7 @@ public: MCFixup FI; FI.Value = Fixup.getValue(); FI.Offset = Fixup.getOffset(); - FI.Kind = (unsigned)getAddKindForKind(Fixup.getKind()); + FI.Kind = getAddKindForKind(Fixup.getKind()); FI.Loc = Fixup.getLoc(); return FI; } @@ -115,12 +118,14 @@ public: MCFixup FI; FI.Value = Fixup.getValue(); FI.Offset = Fixup.getOffset(); - FI.Kind = (unsigned)getSubKindForKind(Fixup.getKind()); + FI.Kind = getSubKindForKind(Fixup.getKind()); FI.Loc = Fixup.getLoc(); return FI; } - MCFixupKind getKind() const { return MCFixupKind(Kind); } + MCFixupKind getKind() const { return Kind; } + + unsigned getTargetKind() const { return Kind; } uint32_t getOffset() const { return Offset; } void setOffset(uint32_t Value) { Offset = Value; } @@ -129,37 +134,63 @@ public: /// Return the generic fixup kind for a value with the given size. It /// is an error to pass an unsupported size. - static MCFixupKind getKindForSize(unsigned Size, bool isPCRel) { + static MCFixupKind getKindForSize(unsigned Size, bool IsPCRel) { switch (Size) { default: llvm_unreachable("Invalid generic fixup size!"); - case 1: return isPCRel ? FK_PCRel_1 : FK_Data_1; - case 2: return isPCRel ? FK_PCRel_2 : FK_Data_2; - case 4: return isPCRel ? FK_PCRel_4 : FK_Data_4; - case 8: return isPCRel ? FK_PCRel_8 : FK_Data_8; + case 1: + return IsPCRel ? FK_PCRel_1 : FK_Data_1; + case 2: + return IsPCRel ? FK_PCRel_2 : FK_Data_2; + case 4: + return IsPCRel ? FK_PCRel_4 : FK_Data_4; + case 8: + return IsPCRel ? FK_PCRel_8 : FK_Data_8; + } + } + + /// Return the generic fixup kind for a value with the given size in bits. + /// It is an error to pass an unsupported size. + static MCFixupKind getKindForSizeInBits(unsigned Size, bool IsPCRel) { + switch (Size) { + default: + llvm_unreachable("Invalid generic fixup size!"); + case 6: + assert(!IsPCRel && "Invalid pc-relative fixup size!"); + return FK_Data_6b; + case 8: + return IsPCRel ? FK_PCRel_1 : FK_Data_1; + case 16: + return IsPCRel ? FK_PCRel_2 : FK_Data_2; + case 32: + return IsPCRel ? FK_PCRel_4 : FK_Data_4; + case 64: + return IsPCRel ? FK_PCRel_8 : FK_Data_8; } } /// Return the generic fixup kind for an addition with a given size. It /// is an error to pass an unsupported size. - static MCFixupKind getAddKindForKind(unsigned Kind) { + static MCFixupKind getAddKindForKind(MCFixupKind Kind) { switch (Kind) { default: llvm_unreachable("Unknown type to convert!"); case FK_Data_1: return FK_Data_Add_1; case FK_Data_2: return FK_Data_Add_2; case FK_Data_4: return FK_Data_Add_4; case FK_Data_8: return FK_Data_Add_8; + case FK_Data_6b: return FK_Data_Add_6b; } } /// Return the generic fixup kind for an subtraction with a given size. It /// is an error to pass an unsupported size. - static MCFixupKind getSubKindForKind(unsigned Kind) { + static MCFixupKind getSubKindForKind(MCFixupKind Kind) { switch (Kind) { default: llvm_unreachable("Unknown type to convert!"); case FK_Data_1: return FK_Data_Sub_1; case FK_Data_2: return FK_Data_Sub_2; case FK_Data_4: return FK_Data_Sub_4; case FK_Data_8: return FK_Data_Sub_8; + case FK_Data_6b: return FK_Data_Sub_6b; } } diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h index aadf2ce725ea..b0def566c46a 100644 --- a/include/llvm/MC/MCFragment.h +++ b/include/llvm/MC/MCFragment.h @@ -149,6 +149,7 @@ public: case MCFragment::FT_CompactEncodedInst: case MCFragment::FT_Data: case MCFragment::FT_Dwarf: + case MCFragment::FT_DwarfFrame: return true; } } @@ -232,7 +233,8 @@ public: static bool classof(const MCFragment *F) { MCFragment::FragmentType Kind = F->getKind(); return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data || - Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf;; + Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf || + Kind == MCFragment::FT_DwarfFrame; } }; @@ -543,27 +545,21 @@ public: } }; -class MCDwarfCallFrameFragment : public MCFragment { +class MCDwarfCallFrameFragment : public MCEncodedFragmentWithFixups<8, 1> { /// AddrDelta - The expression for the difference of the two symbols that /// make up the address delta between two .cfi_* dwarf directives. const MCExpr *AddrDelta; - SmallString<8> Contents; - public: MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr) - : MCFragment(FT_DwarfFrame, false, Sec), AddrDelta(&AddrDelta) { - Contents.push_back(0); - } + : MCEncodedFragmentWithFixups<8, 1>(FT_DwarfFrame, false, Sec), + AddrDelta(&AddrDelta) {} /// \name Accessors /// @{ const MCExpr &getAddrDelta() const { return *AddrDelta; } - SmallString<8> &getContents() { return Contents; } - const SmallString<8> &getContents() const { return Contents; } - /// @} static bool classof(const MCFragment *F) { diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h index 6bbc4bc2903b..4501ce3084c8 100644 --- a/include/llvm/MC/MCInstPrinter.h +++ b/include/llvm/MC/MCInstPrinter.h @@ -87,12 +87,10 @@ public: /// Utility functions to make adding mark ups simpler. StringRef markup(StringRef s) const; - StringRef markup(StringRef a, StringRef b) const; bool getPrintImmHex() const { return PrintImmHex; } void setPrintImmHex(bool Value) { PrintImmHex = Value; } - HexStyle::Style getPrintHexStyle() const { return PrintHexStyle; } void setPrintHexStyle(HexStyle::Style Value) { PrintHexStyle = Value; } /// Utility function to print immediates in decimal or hex. diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h index dfefd7e72777..898ca47b13b8 100644 --- a/include/llvm/MC/MCInstrAnalysis.h +++ b/include/llvm/MC/MCInstrAnalysis.h @@ -152,6 +152,12 @@ public: evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const; + /// Given an instruction tries to get the address of a memory operand. Returns + /// the address on success. + virtual Optional evaluateMemoryOperandAddress(const MCInst &Inst, + uint64_t Addr, + uint64_t Size) const; + /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries. virtual std::vector> findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h index 0aa586dfc901..e75a27614a22 100644 --- a/include/llvm/MC/MCInstrDesc.h +++ b/include/llvm/MC/MCInstrDesc.h @@ -56,7 +56,11 @@ enum OperandType { OPERAND_GENERIC_5 = 11, OPERAND_LAST_GENERIC = 11, - OPERAND_FIRST_TARGET = 12, + OPERAND_FIRST_GENERIC_IMM = 12, + OPERAND_GENERIC_IMM_0 = 12, + OPERAND_LAST_GENERIC_IMM = 12, + + OPERAND_FIRST_TARGET = 13, }; } @@ -103,6 +107,16 @@ public: assert(isGenericType() && "non-generic types don't have an index"); return OperandType - MCOI::OPERAND_FIRST_GENERIC; } + + bool isGenericImm() const { + return OperandType >= MCOI::OPERAND_FIRST_GENERIC_IMM && + OperandType <= MCOI::OPERAND_LAST_GENERIC_IMM; + } + + unsigned getGenericImmIndex() const { + assert(isGenericImm() && "non-generic immediates don't have an index"); + return OperandType - MCOI::OPERAND_FIRST_GENERIC_IMM; + } }; //===----------------------------------------------------------------------===// @@ -115,7 +129,8 @@ namespace MCID { /// not use these directly. These all correspond to bitfields in the /// MCInstrDesc::Flags field. enum Flag { - Variadic = 0, + PreISelOpcode = 0, + Variadic, HasOptionalDef, Pseudo, Return, @@ -228,6 +243,10 @@ public: /// Return flags of this instruction. uint64_t getFlags() const { return Flags; } + /// \returns true if this instruction is emitted before instruction selection + /// and should be legalized/regbankselected/selected. + bool isPreISelOpcode() const { return Flags & (1ULL << MCID::PreISelOpcode); } + /// Return true if this instruction can have a variable number of /// operands. In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h index f2a1364ad884..003491f32f75 100644 --- a/include/llvm/MC/MCLinkerOptimizationHint.h +++ b/include/llvm/MC/MCLinkerOptimizationHint.h @@ -61,6 +61,7 @@ static inline int MCLOHNameToId(StringRef Name) { MCLOHCaseNameToId(AdrpAdd) MCLOHCaseNameToId(AdrpLdrGot) .Default(-1); +#undef MCLOHCaseNameToId } static inline StringRef MCLOHIdToName(MCLOHType Kind) { @@ -76,6 +77,7 @@ static inline StringRef MCLOHIdToName(MCLOHType Kind) { MCLOHCaseIdToName(AdrpLdrGot); } return StringRef(); +#undef MCLOHCaseIdToName } static inline int MCLOHIdToNbArgs(MCLOHType Kind) { diff --git a/include/llvm/MC/MCRegister.h b/include/llvm/MC/MCRegister.h new file mode 100644 index 000000000000..8372947a4ba1 --- /dev/null +++ b/include/llvm/MC/MCRegister.h @@ -0,0 +1,110 @@ +//===-- llvm/MC/Register.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_REGISTER_H +#define LLVM_MC_REGISTER_H + +#include "llvm/ADT/DenseMapInfo.h" +#include + +namespace llvm { + +/// An unsigned integer type large enough to represent all physical registers, +/// but not necessarily virtual registers. +using MCPhysReg = uint16_t; + +/// Wrapper class representing physical registers. Should be passed by value. +class MCRegister { + unsigned Reg; + +public: + MCRegister(unsigned Val = 0): Reg(Val) {} + + // Register numbers can represent physical registers, virtual registers, and + // sometimes stack slots. The unsigned values are divided into these ranges: + // + // 0 Not a register, can be used as a sentinel. + // [1;2^30) Physical registers assigned by TableGen. + // [2^30;2^31) Stack slots. (Rarely used.) + // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo. + // + // Further sentinels can be allocated from the small negative integers. + // DenseMapInfo uses -1u and -2u. + + /// This is the portion of the positive number space that is not a physical + /// register. StackSlot values do not exist in the MC layer, see + /// Register::isStackSlot() for the more information on them. + /// + /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack + /// slots, so if a variable may contains a stack slot, always check + /// isStackSlot() first. + static bool isStackSlot(unsigned Reg) { + return int(Reg) >= (1 << 30); + } + + /// Return true if the specified register number is in + /// the physical register namespace. + static bool isPhysicalRegister(unsigned Reg) { + assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); + return int(Reg) > 0; + } + + /// Return true if the specified register number is in the physical register + /// namespace. + bool isPhysical() const { + return isPhysicalRegister(Reg); + } + + operator unsigned() const { + return Reg; + } + + unsigned id() const { + return Reg; + } + + bool isValid() const { + return Reg != 0; + } + + /// Comparisons between register objects + bool operator==(const MCRegister &Other) const { return Reg == Other.Reg; } + bool operator!=(const MCRegister &Other) const { return Reg != Other.Reg; } + + /// Comparisons against register constants. E.g. + /// * R == AArch64::WZR + /// * R == 0 + /// * R == VirtRegMap::NO_PHYS_REG + bool operator==(unsigned Other) const { return Reg == Other; } + bool operator!=(unsigned Other) const { return Reg != Other; } + bool operator==(int Other) const { return Reg == unsigned(Other); } + bool operator!=(int Other) const { return Reg != unsigned(Other); } + // MSVC requires that we explicitly declare these two as well. + bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); } + bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); } +}; + +// Provide DenseMapInfo for MCRegister +template<> struct DenseMapInfo { + static inline unsigned getEmptyKey() { + return DenseMapInfo::getEmptyKey(); + } + static inline unsigned getTombstoneKey() { + return DenseMapInfo::getTombstoneKey(); + } + static unsigned getHashValue(const MCRegister &Val) { + return DenseMapInfo::getHashValue(Val.id()); + } + static bool isEqual(const MCRegister &LHS, const MCRegister &RHS) { + return DenseMapInfo::isEqual(LHS.id(), RHS.id()); + } +}; + +} + +#endif // ifndef LLVM_MC_REGISTER_H diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index 92d39c3fcfb7..c7dc56ea588e 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -18,16 +18,13 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/iterator_range.h" #include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegister.h" #include #include #include namespace llvm { -/// An unsigned integer type large enough to represent all physical registers, -/// but not necessarily virtual registers. -using MCPhysReg = uint16_t; - /// MCRegisterClass - Base class of TargetRegisterClass. class MCRegisterClass { public: @@ -65,16 +62,17 @@ public: /// contains - Return true if the specified register is included in this /// register class. This does not include virtual registers. - bool contains(unsigned Reg) const { - unsigned InByte = Reg % 8; - unsigned Byte = Reg / 8; + bool contains(MCRegister Reg) const { + unsigned RegNo = unsigned(Reg); + unsigned InByte = RegNo % 8; + unsigned Byte = RegNo / 8; if (Byte >= RegSetSize) return false; return (RegSet[Byte] & (1 << InByte)) != 0; } /// contains - Return true if both registers are in this class. - bool contains(unsigned Reg1, unsigned Reg2) const { + bool contains(MCRegister Reg1, MCRegister Reg2) const { return contains(Reg1) && contains(Reg2); } @@ -148,8 +146,8 @@ public: private: const MCRegisterDesc *Desc; // Pointer to the descriptor array unsigned NumRegs; // Number of entries in the array - unsigned RAReg; // Return address register - unsigned PCReg; // Program counter register + MCRegister RAReg; // Return address register + MCRegister PCReg; // Program counter register const MCRegisterClass *Classes; // Pointer to the regclass array unsigned NumClasses; // Number of entries in the array unsigned NumRegUnits; // Number of regunits. @@ -175,8 +173,8 @@ private: const DwarfLLVMRegPair *EHL2DwarfRegs; // LLVM to Dwarf regs mapping EH const DwarfLLVMRegPair *Dwarf2LRegs; // Dwarf to LLVM regs mapping const DwarfLLVMRegPair *EHDwarf2LRegs; // Dwarf to LLVM regs mapping EH - DenseMap L2SEHRegs; // LLVM to SEH regs mapping - DenseMap L2CVRegs; // LLVM to CV regs mapping + DenseMap L2SEHRegs; // LLVM to SEH regs mapping + DenseMap L2CVRegs; // LLVM to CV regs mapping public: /// DiffListIterator - Base iterator class that can traverse the @@ -202,7 +200,7 @@ public: /// advance - Move to the next list position, return the applied /// differential. This function does not detect the end of the list, that /// is the caller's responsibility (by checking for a 0 return value). - unsigned advance() { + MCRegister advance() { assert(isValid() && "Cannot move off the end of the list."); MCPhysReg D = *List++; Val += D; @@ -214,7 +212,7 @@ public: bool isValid() const { return List; } /// Dereference the iterator to get the value at the current position. - unsigned operator*() const { return Val; } + MCRegister operator*() const { return Val; } /// Pre-increment to move to the next position. void operator++() { @@ -309,26 +307,26 @@ public: /// as the LLVM register number. /// FIXME: TableGen these numbers. Currently this requires target specific /// initialization code. - void mapLLVMRegToSEHReg(unsigned LLVMReg, int SEHReg) { + void mapLLVMRegToSEHReg(MCRegister LLVMReg, int SEHReg) { L2SEHRegs[LLVMReg] = SEHReg; } - void mapLLVMRegToCVReg(unsigned LLVMReg, int CVReg) { + void mapLLVMRegToCVReg(MCRegister LLVMReg, int CVReg) { L2CVRegs[LLVMReg] = CVReg; } /// This method should return the register where the return /// address can be found. - unsigned getRARegister() const { + MCRegister getRARegister() const { return RAReg; } /// Return the register which is the program counter. - unsigned getProgramCounter() const { + MCRegister getProgramCounter() const { return PCReg; } - const MCRegisterDesc &operator[](unsigned RegNo) const { + const MCRegisterDesc &operator[](MCRegister RegNo) const { assert(RegNo < NumRegs && "Attempting to access record for invalid register number!"); return Desc[RegNo]; @@ -336,24 +334,24 @@ public: /// Provide a get method, equivalent to [], but more useful with a /// pointer to this object. - const MCRegisterDesc &get(unsigned RegNo) const { + const MCRegisterDesc &get(MCRegister RegNo) const { return operator[](RegNo); } /// Returns the physical register number of sub-register "Index" /// for physical register RegNo. Return zero if the sub-register does not /// exist. - unsigned getSubReg(unsigned Reg, unsigned Idx) const; + MCRegister getSubReg(MCRegister Reg, unsigned Idx) const; /// Return a super-register of the specified register /// Reg so its sub-register of index SubIdx is Reg. - unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, - const MCRegisterClass *RC) const; + MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, + const MCRegisterClass *RC) const; /// For a given register pair, return the sub-register index /// if the second register is a sub-register of the first. Return zero /// otherwise. - unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const; + unsigned getSubRegIndex(MCRegister RegNo, MCRegister SubRegNo) const; /// Get the size of the bit range covered by a sub-register index. /// If the index isn't continuous, return the sum of the sizes of its parts. @@ -367,7 +365,7 @@ public: /// Return the human-readable symbolic target-specific name for the /// specified physical register. - const char *getName(unsigned RegNo) const { + const char *getName(MCRegister RegNo) const { return RegStrings + get(RegNo).Name; } @@ -395,15 +393,11 @@ public: /// number. Returns -1 if there is no equivalent value. The second /// parameter allows targets to use different numberings for EH info and /// debugging info. - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - - /// Map a dwarf register back to a target register. - int getLLVMRegNum(unsigned RegNum, bool isEH) const; + int getDwarfRegNum(MCRegister RegNum, bool isEH) const; - /// Map a DWARF EH register back to a target register (same as - /// getLLVMRegNum(RegNum, true)) but return -1 if there is no mapping, - /// rather than asserting that there must be one. - int getLLVMRegNumFromEH(unsigned RegNum) const; + /// Map a dwarf register back to a target register. Returns None is there is + /// no mapping. + Optional getLLVMRegNum(unsigned RegNum, bool isEH) const; /// Map a target EH register number to an equivalent DWARF register /// number. @@ -411,11 +405,11 @@ public: /// Map a target register to an equivalent SEH register /// number. Returns LLVM register number if there is no equivalent value. - int getSEHRegNum(unsigned RegNum) const; + int getSEHRegNum(MCRegister RegNum) const; /// Map a target register to an equivalent CodeView register /// number. - int getCodeViewRegNum(unsigned RegNum) const; + int getCodeViewRegNum(MCRegister RegNum) const; regclass_iterator regclass_begin() const { return Classes; } regclass_iterator regclass_end() const { return Classes+NumClasses; } @@ -439,34 +433,34 @@ public: } /// Returns the encoding for RegNo - uint16_t getEncodingValue(unsigned RegNo) const { + uint16_t getEncodingValue(MCRegister RegNo) const { assert(RegNo < NumRegs && "Attempting to get encoding for invalid register number!"); return RegEncodingTable[RegNo]; } /// Returns true if RegB is a sub-register of RegA. - bool isSubRegister(unsigned RegA, unsigned RegB) const { + bool isSubRegister(MCRegister RegA, MCRegister RegB) const { return isSuperRegister(RegB, RegA); } /// Returns true if RegB is a super-register of RegA. - bool isSuperRegister(unsigned RegA, unsigned RegB) const; + bool isSuperRegister(MCRegister RegA, MCRegister RegB) const; /// Returns true if RegB is a sub-register of RegA or if RegB == RegA. - bool isSubRegisterEq(unsigned RegA, unsigned RegB) const { + bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const { return isSuperRegisterEq(RegB, RegA); } /// Returns true if RegB is a super-register of RegA or if /// RegB == RegA. - bool isSuperRegisterEq(unsigned RegA, unsigned RegB) const { + bool isSuperRegisterEq(MCRegister RegA, MCRegister RegB) const { return RegA == RegB || isSuperRegister(RegA, RegB); } /// Returns true if RegB is a super-register or sub-register of RegA /// or if RegB == RegA. - bool isSuperOrSubRegisterEq(unsigned RegA, unsigned RegB) const { + bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const { return isSubRegisterEq(RegA, RegB) || isSuperRegister(RegA, RegB); } }; @@ -482,8 +476,8 @@ public: /// If IncludeSelf is set, Reg itself is included in the list. class MCSubRegIterator : public MCRegisterInfo::DiffListIterator { public: - MCSubRegIterator(unsigned Reg, const MCRegisterInfo *MCRI, - bool IncludeSelf = false) { + MCSubRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI, + bool IncludeSelf = false) { init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs); // Initially, the iterator points to Reg itself. if (!IncludeSelf) @@ -500,13 +494,13 @@ class MCSubRegIndexIterator { public: /// Constructs an iterator that traverses subregisters and their /// associated subregister indices. - MCSubRegIndexIterator(unsigned Reg, const MCRegisterInfo *MCRI) + MCSubRegIndexIterator(MCRegister Reg, const MCRegisterInfo *MCRI) : SRIter(Reg, MCRI) { SRIndex = MCRI->SubRegIndices + MCRI->get(Reg).SubRegIndices; } /// Returns current sub-register. - unsigned getSubReg() const { + MCRegister getSubReg() const { return *SRIter; } @@ -531,7 +525,7 @@ class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator { public: MCSuperRegIterator() = default; - MCSuperRegIterator(unsigned Reg, const MCRegisterInfo *MCRI, + MCSuperRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI, bool IncludeSelf = false) { init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs); // Initially, the iterator points to Reg itself. @@ -542,7 +536,7 @@ public: // Definition for isSuperRegister. Put it down here since it needs the // iterator defined above in addition to the MCRegisterInfo class itself. -inline bool MCRegisterInfo::isSuperRegister(unsigned RegA, unsigned RegB) const{ +inline bool MCRegisterInfo::isSuperRegister(MCRegister RegA, MCRegister RegB) const{ for (MCSuperRegIterator I(RegA, this); I.isValid(); ++I) if (*I == RegB) return true; @@ -569,7 +563,7 @@ public: /// in Reg. MCRegUnitIterator() = default; - MCRegUnitIterator(unsigned Reg, const MCRegisterInfo *MCRI) { + MCRegUnitIterator(MCRegister Reg, const MCRegisterInfo *MCRI) { assert(Reg && "Null register has no regunits"); // Decode the RegUnits MCRegisterDesc field. unsigned RU = MCRI->get(Reg).RegUnits; @@ -600,7 +594,7 @@ public: /// Constructs an iterator that traverses the register units and their /// associated LaneMasks in Reg. - MCRegUnitMaskIterator(unsigned Reg, const MCRegisterInfo *MCRI) + MCRegUnitMaskIterator(MCRegister Reg, const MCRegisterInfo *MCRI) : RUIter(Reg, MCRI) { uint16_t Idx = MCRI->get(Reg).RegUnitLaneMasks; MaskListIter = &MCRI->RegUnitMaskSequences[Idx]; @@ -667,7 +661,7 @@ public: /// any ordering or that entries are unique. class MCRegAliasIterator { private: - unsigned Reg; + MCRegister Reg; const MCRegisterInfo *MCRI; bool IncludeSelf; @@ -676,7 +670,7 @@ private: MCSuperRegIterator SI; public: - MCRegAliasIterator(unsigned Reg, const MCRegisterInfo *MCRI, + MCRegAliasIterator(MCRegister Reg, const MCRegisterInfo *MCRI, bool IncludeSelf) : Reg(Reg), MCRI(MCRI), IncludeSelf(IncludeSelf) { // Initialize the iterators. @@ -692,7 +686,7 @@ public: bool isValid() const { return RI.isValid(); } - unsigned operator*() const { + MCRegister operator*() const { assert(SI.isValid() && "Cannot dereference an invalid iterator."); return *SI; } diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 6fad1ec2069c..d057feda87d8 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ilist.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/SectionKind.h" +#include "llvm/Support/Alignment.h" #include #include @@ -58,7 +59,7 @@ private: MCSymbol *Begin; MCSymbol *End = nullptr; /// The alignment requirement of this section. - unsigned Alignment = 1; + Align Alignment; /// The section index in the assemblers section list. unsigned Ordinal = 0; /// The index of this section in the layout order. @@ -117,8 +118,8 @@ public: MCSymbol *getEndSymbol(MCContext &Ctx); bool hasEnded() const; - unsigned getAlignment() const { return Alignment; } - void setAlignment(unsigned Value) { Alignment = Value; } + unsigned getAlignment() const { return Alignment.value(); } + void setAlignment(Align Value) { Alignment = Value; } unsigned getOrdinal() const { return Ordinal; } void setOrdinal(unsigned Value) { Ordinal = Value; } diff --git a/include/llvm/MC/MCSectionXCOFF.h b/include/llvm/MC/MCSectionXCOFF.h index 2a3f391fd3e2..ee302ed5ecec 100644 --- a/include/llvm/MC/MCSectionXCOFF.h +++ b/include/llvm/MC/MCSectionXCOFF.h @@ -23,16 +23,30 @@ class MCSymbol; // This class represents an XCOFF `Control Section`, more commonly referred to // as a csect. A csect represents the smallest possible unit of data/code which -// will be relocated as a single block. +// will be relocated as a single block. A csect can either be: +// 1) Initialized: The Type will be XTY_SD, and the symbols inside the csect +// will have a label definition representing their offset within the csect. +// 2) Uninitialized: The Type will be XTY_CM, it will contain a single symbol, +// and may not contain label definitions. +// 3) An external reference providing a symbol table entry for a symbol +// contained in another XCOFF object file. External reference csects are not +// implemented yet. class MCSectionXCOFF final : public MCSection { friend class MCContext; StringRef Name; XCOFF::StorageMappingClass MappingClass; + XCOFF::SymbolType Type; + XCOFF::StorageClass StorageClass; MCSectionXCOFF(StringRef Section, XCOFF::StorageMappingClass SMC, - SectionKind K, MCSymbol *Begin) - : MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC) {} + XCOFF::SymbolType ST, XCOFF::StorageClass SC, SectionKind K, + MCSymbol *Begin) + : MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC), + Type(ST), StorageClass(SC) { + assert((ST == XCOFF::XTY_SD || ST == XCOFF::XTY_CM) && + "Invalid or unhandled type for csect."); + } public: ~MCSectionXCOFF(); @@ -43,6 +57,8 @@ public: StringRef getSectionName() const { return Name; } XCOFF::StorageMappingClass getMappingClass() const { return MappingClass; } + XCOFF::StorageClass getStorageClass() const { return StorageClass; } + XCOFF::SymbolType getCSectType() const { return Type; } void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 731e7515448c..6b48580ae57c 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -46,6 +46,7 @@ struct MCDwarfFrameInfo; class MCExpr; class MCInst; class MCInstPrinter; +class MCRegister; class MCSection; class MCStreamer; class MCSymbolRefExpr; @@ -53,6 +54,13 @@ class MCSubtargetInfo; class raw_ostream; class Twine; +namespace codeview { +struct DefRangeRegisterRelHeader; +struct DefRangeSubfieldRegisterHeader; +struct DefRangeRegisterHeader; +struct DefRangeFramePointerRelHeader; +} + using MCSectionSubPair = std::pair; /// Target specific streamer interface. This is used so that targets can @@ -536,6 +544,15 @@ public: /// \param Symbol - Symbol the image relative relocation should point to. virtual void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset); + /// Emits an lcomm directive with XCOFF csect information. + /// + /// \param Symbol - The symbol we are emiting. + /// \param Size - The size of the block of storage. + /// \param ByteAlignment - The alignment of the symbol in bytes. Must be a power + /// of 2. + virtual void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + /// Emit an ELF .size directive. /// /// This corresponds to an assembler statement such as: @@ -860,6 +877,22 @@ public: ArrayRef> Ranges, StringRef FixedSizePortion); + virtual void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterRelHeader DRHdr); + + virtual void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeSubfieldRegisterHeader DRHdr); + + virtual void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterHeader DRHdr); + + virtual void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeFramePointerRelHeader DRHdr); + /// This implements the CodeView '.cv_stringtable' assembler directive. virtual void EmitCVStringTableDirective() {} @@ -917,13 +950,13 @@ public: virtual void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc()); virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc()); virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc()); - virtual void EmitWinCFIPushReg(unsigned Register, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISetFrame(unsigned Register, unsigned Offset, + virtual void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc()); + virtual void EmitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); virtual void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISaveReg(unsigned Register, unsigned Offset, + virtual void EmitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISaveXMM(unsigned Register, unsigned Offset, + virtual void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); virtual void EmitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc()); virtual void EmitWinCFIEndProlog(SMLoc Loc = SMLoc()); diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h index 9490a6ecedad..09130c4641ef 100644 --- a/include/llvm/MC/MCSubtargetInfo.h +++ b/include/llvm/MC/MCSubtargetInfo.h @@ -221,6 +221,52 @@ public: auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU); return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } + + virtual unsigned getHwMode() const { return 0; } + + /// Return the cache size in bytes for the given level of cache. + /// Level is zero-based, so a value of zero means the first level of + /// cache. + /// + virtual Optional getCacheSize(unsigned Level) const; + + /// Return the cache associatvity for the given level of cache. + /// Level is zero-based, so a value of zero means the first level of + /// cache. + /// + virtual Optional getCacheAssociativity(unsigned Level) const; + + /// Return the target cache line size in bytes at a given level. + /// + virtual Optional getCacheLineSize(unsigned Level) const; + + /// Return the target cache line size in bytes. By default, return + /// the line size for the bottom-most level of cache. This provides + /// a more convenient interface for the common case where all cache + /// levels have the same line size. Return zero if there is no + /// cache model. + /// + virtual unsigned getCacheLineSize() const { + Optional Size = getCacheLineSize(0); + if (Size) + return *Size; + + return 0; + } + + /// Return the preferred prefetch distance in terms of instructions. + /// + virtual unsigned getPrefetchDistance() const; + + /// Return the maximum prefetch distance in terms of loop + /// iterations. + /// + virtual unsigned getMaxPrefetchIterationsAhead() const; + + /// Return the minimum stride necessary to trigger software + /// prefetching. + /// + virtual unsigned getMinPrefetchStride() const; }; } // end namespace llvm diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h index c50cd0ee4709..95beebe3f75a 100644 --- a/include/llvm/MC/MCSymbolWasm.h +++ b/include/llvm/MC/MCSymbolWasm.h @@ -54,6 +54,13 @@ public: modifyFlags(wasm::WASM_SYMBOL_EXPORTED, wasm::WASM_SYMBOL_EXPORTED); } + bool isNoStrip() const { + return getFlags() & wasm::WASM_SYMBOL_NO_STRIP; + } + void setNoStrip() const { + modifyFlags(wasm::WASM_SYMBOL_NO_STRIP, wasm::WASM_SYMBOL_NO_STRIP); + } + bool isWeak() const { return IsWeak; } void setWeak(bool isWeak) { IsWeak = isWeak; } diff --git a/include/llvm/MC/MCSymbolXCOFF.h b/include/llvm/MC/MCSymbolXCOFF.h index 0a1fe1475138..98ecd2466926 100644 --- a/include/llvm/MC/MCSymbolXCOFF.h +++ b/include/llvm/MC/MCSymbolXCOFF.h @@ -8,17 +8,49 @@ #ifndef LLVM_MC_MCSYMBOLXCOFF_H #define LLVM_MC_MCSYMBOLXCOFF_H +#include "llvm/ADT/Optional.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCSymbol.h" namespace llvm { +class MCSectionXCOFF; + class MCSymbolXCOFF : public MCSymbol { public: MCSymbolXCOFF(const StringMapEntry *Name, bool isTemporary) : MCSymbol(SymbolKindXCOFF, Name, isTemporary) {} static bool classof(const MCSymbol *S) { return S->isXCOFF(); } + + void setStorageClass(XCOFF::StorageClass SC) { + assert((!StorageClass.hasValue() || StorageClass.getValue() == SC) && + "Redefining StorageClass of XCOFF MCSymbol."); + StorageClass = SC; + }; + + XCOFF::StorageClass getStorageClass() const { + assert(StorageClass.hasValue() && + "StorageClass not set on XCOFF MCSymbol."); + return StorageClass.getValue(); + } + + void setContainingCsect(MCSectionXCOFF *C) { + assert((!ContainingCsect || ContainingCsect == C) && + "Trying to set a containing csect that doesn't match the one that" + "this symbol is already mapped to."); + ContainingCsect = C; + } + + MCSectionXCOFF *getContainingCsect() const { + assert(ContainingCsect && + "Trying to get containing csect but none was set."); + return ContainingCsect; + } + +private: + Optional StorageClass; + MCSectionXCOFF *ContainingCsect = nullptr; }; } // end namespace llvm diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h index 4adbca28f116..fbb68549b503 100644 --- a/include/llvm/MC/MCWasmObjectWriter.h +++ b/include/llvm/MC/MCWasmObjectWriter.h @@ -20,9 +20,10 @@ class raw_pwrite_stream; class MCWasmObjectTargetWriter : public MCObjectTargetWriter { const unsigned Is64Bit : 1; + const unsigned IsEmscripten : 1; protected: - explicit MCWasmObjectTargetWriter(bool Is64Bit_); + explicit MCWasmObjectTargetWriter(bool Is64Bit_, bool IsEmscripten); public: virtual ~MCWasmObjectTargetWriter(); @@ -38,6 +39,7 @@ public: /// \name Accessors /// @{ bool is64Bit() const { return Is64Bit; } + bool isEmscripten() const { return IsEmscripten; } /// @} }; diff --git a/include/llvm/MC/MCXCOFFStreamer.h b/include/llvm/MC/MCXCOFFStreamer.h index 159ae4818749..b13b0031d18e 100644 --- a/include/llvm/MC/MCXCOFFStreamer.h +++ b/include/llvm/MC/MCXCOFFStreamer.h @@ -26,6 +26,8 @@ public: uint64_t Size = 0, unsigned ByteAlignment = 0, SMLoc Loc = SMLoc()) override; void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override; + void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlign) override; }; } // end namespace llvm diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h index c83eca4e512d..c8d4c3bbc262 100644 --- a/include/llvm/MC/StringTableBuilder.h +++ b/include/llvm/MC/StringTableBuilder.h @@ -22,7 +22,7 @@ class raw_ostream; /// Utility for building string tables with deduplicated suffixes. class StringTableBuilder { public: - enum Kind { ELF, WinCOFF, MachO, RAW, DWARF }; + enum Kind { ELF, WinCOFF, MachO, RAW, DWARF, XCOFF }; private: DenseMap StringIndexMap; diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h index fc9565ceafad..defbc3c64720 100644 --- a/include/llvm/MC/SubtargetFeature.h +++ b/include/llvm/MC/SubtargetFeature.h @@ -18,6 +18,7 @@ #define LLVM_MC_SUBTARGETFEATURE_H #include "llvm/ADT/StringRef.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -33,20 +34,123 @@ const unsigned MAX_SUBTARGET_WORDS = 3; const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64; /// Container class for subtarget features. -/// This is convenient because std::bitset does not have a constructor -/// with an initializer list of set bits. -class FeatureBitset : public std::bitset { -public: - // Cannot inherit constructors because it's not supported by VC++.. - FeatureBitset() = default; - - FeatureBitset(const bitset& B) : bitset(B) {} +/// This is a constexpr reimplementation of a subset of std::bitset. It would be +/// nice to use std::bitset directly, but it doesn't support constant +/// initialization. +class FeatureBitset { + static_assert((MAX_SUBTARGET_FEATURES % 64) == 0, + "Should be a multiple of 64!"); + // This cannot be a std::array, operator[] is not constexpr until C++17. + uint64_t Bits[MAX_SUBTARGET_WORDS] = {}; + +protected: + constexpr FeatureBitset(const std::array &B) { + for (unsigned I = 0; I != B.size(); ++I) + Bits[I] = B[I]; + } - FeatureBitset(std::initializer_list Init) { +public: + constexpr FeatureBitset() = default; + constexpr FeatureBitset(std::initializer_list Init) { for (auto I : Init) set(I); } + FeatureBitset &set() { + std::fill(std::begin(Bits), std::end(Bits), -1ULL); + return *this; + } + + constexpr FeatureBitset &set(unsigned I) { + // GCC <6.2 crashes if this is written in a single statement. + uint64_t NewBits = Bits[I / 64] | (uint64_t(1) << (I % 64)); + Bits[I / 64] = NewBits; + return *this; + } + + constexpr FeatureBitset &reset(unsigned I) { + // GCC <6.2 crashes if this is written in a single statement. + uint64_t NewBits = Bits[I / 64] & ~(uint64_t(1) << (I % 64)); + Bits[I / 64] = NewBits; + return *this; + } + + constexpr FeatureBitset &flip(unsigned I) { + // GCC <6.2 crashes if this is written in a single statement. + uint64_t NewBits = Bits[I / 64] ^ (uint64_t(1) << (I % 64)); + Bits[I / 64] = NewBits; + return *this; + } + + constexpr bool operator[](unsigned I) const { + uint64_t Mask = uint64_t(1) << (I % 64); + return (Bits[I / 64] & Mask) != 0; + } + + constexpr bool test(unsigned I) const { return (*this)[I]; } + + constexpr size_t size() const { return MAX_SUBTARGET_FEATURES; } + + bool any() const { + return llvm::any_of(Bits, [](uint64_t I) { return I != 0; }); + } + bool none() const { return !any(); } + size_t count() const { + size_t Count = 0; + for (auto B : Bits) + Count += countPopulation(B); + return Count; + } + + constexpr FeatureBitset &operator^=(const FeatureBitset &RHS) { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) { + Bits[I] ^= RHS.Bits[I]; + } + return *this; + } + constexpr FeatureBitset operator^(const FeatureBitset &RHS) const { + FeatureBitset Result = *this; + Result ^= RHS; + return Result; + } + + constexpr FeatureBitset &operator&=(const FeatureBitset &RHS) { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) { + Bits[I] &= RHS.Bits[I]; + } + return *this; + } + constexpr FeatureBitset operator&(const FeatureBitset &RHS) const { + FeatureBitset Result = *this; + Result &= RHS; + return Result; + } + + constexpr FeatureBitset &operator|=(const FeatureBitset &RHS) { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) { + Bits[I] |= RHS.Bits[I]; + } + return *this; + } + constexpr FeatureBitset operator|(const FeatureBitset &RHS) const { + FeatureBitset Result = *this; + Result |= RHS; + return Result; + } + + constexpr FeatureBitset operator~() const { + FeatureBitset Result = *this; + for (auto &B : Result.Bits) + B = ~B; + return Result; + } + + bool operator==(const FeatureBitset &RHS) const { + return std::equal(std::begin(Bits), std::end(Bits), std::begin(RHS.Bits)); + } + + bool operator!=(const FeatureBitset &RHS) const { return !(*this == RHS); } + bool operator < (const FeatureBitset &Other) const { for (unsigned I = 0, E = size(); I != E; ++I) { bool LHS = test(I), RHS = Other.test(I); @@ -58,23 +162,12 @@ public: }; /// Class used to store the subtarget bits in the tables created by tablegen. -/// The std::initializer_list constructor of FeatureBitset can't be done at -/// compile time and requires a static constructor to run at startup. -class FeatureBitArray { - std::array Bits; - +class FeatureBitArray : public FeatureBitset { public: constexpr FeatureBitArray(const std::array &B) - : Bits(B) {} - - FeatureBitset getAsBitset() const { - FeatureBitset Result; - - for (unsigned i = 0, e = Bits.size(); i != e; ++i) - Result |= FeatureBitset(Bits[i]) << (64 * i); + : FeatureBitset(B) {} - return Result; - } + const FeatureBitset &getAsBitset() const { return *this; } }; //===----------------------------------------------------------------------===// diff --git a/include/llvm/MCA/CodeEmitter.h b/include/llvm/MCA/CodeEmitter.h new file mode 100644 index 000000000000..c8d222bd8c2f --- /dev/null +++ b/include/llvm/MCA/CodeEmitter.h @@ -0,0 +1,72 @@ +//===--------------------- CodeEmitter.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A utility class used to compute instruction encodings. It buffers encodings +/// for later usage. It exposes a simple API to compute and get the encodings as +/// StringRef. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_CODEEMITTER_H +#define LLVM_MCA_CODEEMITTER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { +namespace mca { + +/// A utility class used to compute instruction encodings for a code region. +/// +/// It provides a simple API to compute and return instruction encodings as +/// strings. Encodings are cached internally for later usage. +class CodeEmitter { + const MCSubtargetInfo &STI; + const MCAsmBackend &MAB; + const MCCodeEmitter &MCE; + + SmallString<256> Code; + raw_svector_ostream VecOS; + ArrayRef Sequence; + + // An EncodingInfo pair stores information. Base (i.e. first) + // is an index to the `Code`. Length (i.e. second) is the encoding size. + using EncodingInfo = std::pair; + + // A cache of encodings. + SmallVector Encodings; + + EncodingInfo getOrCreateEncodingInfo(unsigned MCID); + +public: + CodeEmitter(const MCSubtargetInfo &ST, const MCAsmBackend &AB, + const MCCodeEmitter &CE, ArrayRef S) + : STI(ST), MAB(AB), MCE(CE), VecOS(Code), Sequence(S), + Encodings(S.size()) {} + + StringRef getEncoding(unsigned MCID) { + EncodingInfo EI = getOrCreateEncodingInfo(MCID); + return StringRef(&Code[EI.first], EI.second); + } +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_CODEEMITTER_H diff --git a/include/llvm/MCA/Context.h b/include/llvm/MCA/Context.h index 503d780d4947..af3cb8e1e837 100644 --- a/include/llvm/MCA/Context.h +++ b/include/llvm/MCA/Context.h @@ -20,7 +20,6 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MCA/HardwareUnits/HardwareUnit.h" -#include "llvm/MCA/InstrBuilder.h" #include "llvm/MCA/Pipeline.h" #include "llvm/MCA/SourceMgr.h" #include @@ -58,6 +57,9 @@ public: Context(const Context &C) = delete; Context &operator=(const Context &C) = delete; + const MCRegisterInfo &getMCRegisterInfo() const { return MRI; } + const MCSubtargetInfo &getMCSubtargetInfo() const { return STI; } + void addHardwareUnit(std::unique_ptr H) { Hardware.push_back(std::move(H)); } @@ -65,7 +67,6 @@ public: /// Construct a basic pipeline for simulating an out-of-order pipeline. /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages. std::unique_ptr createDefaultPipeline(const PipelineOptions &Opts, - InstrBuilder &IB, SourceMgr &SrcMgr); }; diff --git a/include/llvm/MCA/HardwareUnits/LSUnit.h b/include/llvm/MCA/HardwareUnits/LSUnit.h index ae9a49c64855..34903794db4a 100644 --- a/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -209,8 +209,10 @@ public: unsigned getUsedLQEntries() const { return UsedLQEntries; } unsigned getUsedSQEntries() const { return UsedSQEntries; } - unsigned assignLQSlot() { return UsedLQEntries++; } - unsigned assignSQSlot() { return UsedSQEntries++; } + void acquireLQSlot() { ++UsedLQEntries; } + void acquireSQSlot() { ++UsedSQEntries; } + void releaseLQSlot() { --UsedLQEntries; } + void releaseSQSlot() { --UsedSQEntries; } bool assumeNoAlias() const { return NoAlias; } @@ -285,13 +287,18 @@ public: unsigned createMemoryGroup() { Groups.insert( - std::make_pair(NextGroupID, llvm::make_unique())); + std::make_pair(NextGroupID, std::make_unique())); return NextGroupID++; } - // Instruction executed event handlers. virtual void onInstructionExecuted(const InstRef &IR); + // Loads are tracked by the LDQ (load queue) from dispatch until completion. + // Stores are tracked by the STQ (store queue) from dispatch until commitment. + // By default we conservatively assume that the LDQ receives a load at + // dispatch. Loads leave the LDQ at retirement stage. + virtual void onInstructionRetired(const InstRef &IR); + virtual void onInstructionIssued(const InstRef &IR) { unsigned GroupID = IR.getInstruction()->getLSUTokenID(); Groups[GroupID]->onInstructionIssued(IR); @@ -436,9 +443,6 @@ public: /// 6. A store has to wait until an older store barrier is fully executed. unsigned dispatch(const InstRef &IR) override; - // FIXME: For simplicity, we optimistically assume a similar behavior for - // store instructions. In practice, store operations don't tend to leave the - // store queue until they reach the 'Retired' stage (See PR39830). void onInstructionExecuted(const InstRef &IR) override; }; diff --git a/include/llvm/MCA/HardwareUnits/RegisterFile.h b/include/llvm/MCA/HardwareUnits/RegisterFile.h index 36506327bd29..cd7718d98744 100644 --- a/include/llvm/MCA/HardwareUnits/RegisterFile.h +++ b/include/llvm/MCA/HardwareUnits/RegisterFile.h @@ -220,7 +220,7 @@ public: // // Current implementation can simulate up to 32 register files (including the // special register file at index #0). - unsigned isAvailable(ArrayRef Regs) const; + unsigned isAvailable(ArrayRef Regs) const; // Returns the number of PRFs implemented by this processor. unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } diff --git a/include/llvm/MCA/HardwareUnits/ResourceManager.h b/include/llvm/MCA/HardwareUnits/ResourceManager.h index 2f91185516fb..917af3750044 100644 --- a/include/llvm/MCA/HardwareUnits/ResourceManager.h +++ b/include/llvm/MCA/HardwareUnits/ResourceManager.h @@ -33,8 +33,7 @@ namespace mca { /// with a buffer size of -1 is always available if it is not reserved. /// /// Values of type ResourceStateEvent are returned by method -/// ResourceState::isBufferAvailable(), which is used to query the internal -/// state of a resource. +/// ResourceManager::canBeDispatched() /// /// The naming convention for resource state events is: /// * Event names start with prefix RS_ @@ -263,16 +262,26 @@ public: /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. ResourceStateEvent isBufferAvailable() const; - /// Reserve a slot in the buffer. - void reserveBuffer() { - if (AvailableSlots) - AvailableSlots--; + /// Reserve a buffer slot. + /// + /// Returns true if the buffer is not full. + /// It always returns true if BufferSize is set to zero. + bool reserveBuffer() { + if (BufferSize <= 0) + return true; + + --AvailableSlots; + assert(AvailableSlots <= static_cast(BufferSize)); + return AvailableSlots; } - /// Release a slot in the buffer. + /// Releases a slot in the buffer. void releaseBuffer() { - if (BufferSize > 0) - AvailableSlots++; + // Ignore dispatch hazards or invalid buffer sizes. + if (BufferSize <= 0) + return; + + ++AvailableSlots; assert(AvailableSlots <= static_cast(BufferSize)); } @@ -351,9 +360,16 @@ class ResourceManager { // Set of processor resource units that are available during this cycle. uint64_t AvailableProcResUnits; - // Set of processor resource groups that are currently reserved. + // Set of processor resources that are currently reserved. uint64_t ReservedResourceGroups; + // Set of unavailable scheduler buffer resources. This is used internally to + // speedup `canBeDispatched()` queries. + uint64_t AvailableBuffers; + + // Set of dispatch hazard buffer resources that are currently unavailable. + uint64_t ReservedBuffers; + // Returns the actual resource unit that will be used. ResourceRef selectPipe(uint64_t ResourceID); @@ -382,17 +398,20 @@ public: // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if // there are enough available slots in the buffers. - ResourceStateEvent canBeDispatched(ArrayRef Buffers) const; + ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const; // Return the processor resource identifier associated to this Mask. unsigned resolveResourceMask(uint64_t Mask) const; - // Consume a slot in every buffered resource from array 'Buffers'. Resource - // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. - void reserveBuffers(ArrayRef Buffers); + // Acquires a slot from every buffered resource in mask `ConsumedBuffers`. + // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. + void reserveBuffers(uint64_t ConsumedBuffers); - // Release buffer entries previously allocated by method reserveBuffers. - void releaseBuffers(ArrayRef Buffers); + // Releases a slot from every buffered resource in mask `ConsumedBuffers`. + // ConsumedBuffers is a bitmask of previously acquired buffers (using method + // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are + // not automatically unreserved by this method. + void releaseBuffers(uint64_t ConsumedBuffers); // Reserve a processor resource. A reserved resource is not available for // instruction issue until it is released. diff --git a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h index 06290141739e..acbd4543bd4a 100644 --- a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h +++ b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h @@ -57,34 +57,43 @@ struct RetireControlUnit : public HardwareUnit { private: unsigned NextAvailableSlotIdx; unsigned CurrentInstructionSlotIdx; - unsigned AvailableSlots; + unsigned NumROBEntries; + unsigned AvailableEntries; unsigned MaxRetirePerCycle; // 0 means no limit. std::vector Queue; -public: - RetireControlUnit(const MCSchedModel &SM); - - bool isEmpty() const { return AvailableSlots == Queue.size(); } - bool isAvailable(unsigned Quantity = 1) const { + unsigned normalizeQuantity(unsigned Quantity) const { // Some instructions may declare a number of uOps which exceeds the size // of the reorder buffer. To avoid problems, cap the amount of slots to // the size of the reorder buffer. - Quantity = std::min(Quantity, static_cast(Queue.size())); + Quantity = std::min(Quantity, NumROBEntries); // Further normalize the number of micro opcodes for instructions that // declare zero opcodes. This should match the behavior of method // reserveSlot(). - Quantity = std::max(Quantity, 1U); - return AvailableSlots >= Quantity; + return std::max(Quantity, 1U); + } + + unsigned computeNextSlotIdx() const; + +public: + RetireControlUnit(const MCSchedModel &SM); + + bool isEmpty() const { return AvailableEntries == NumROBEntries; } + + bool isAvailable(unsigned Quantity = 1) const { + return AvailableEntries >= normalizeQuantity(Quantity); } unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; } - // Reserves a number of slots, and returns a new token. - unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps); + // Reserves a number of slots, and returns a new token reference. + unsigned dispatch(const InstRef &IS); // Return the current token from the RCU's circular token queue. - const RUToken &peekCurrentToken() const; + const RUToken &getCurrentToken() const; + + const RUToken &peekNextToken() const; // Advance the pointer to the next token in the circular token queue. void consumeCurrentToken(); diff --git a/include/llvm/MCA/HardwareUnits/Scheduler.h b/include/llvm/MCA/HardwareUnits/Scheduler.h index 27beb842dfd2..6c196757e571 100644 --- a/include/llvm/MCA/HardwareUnits/Scheduler.h +++ b/include/llvm/MCA/HardwareUnits/Scheduler.h @@ -68,7 +68,7 @@ public: /// instructions from the dispatch stage, until the write-back stage. /// class Scheduler : public HardwareUnit { - LSUnit &LSU; + LSUnitBase &LSU; // Instruction selection strategy for this Scheduler. std::unique_ptr Strategy; @@ -154,15 +154,15 @@ class Scheduler : public HardwareUnit { bool promoteToPendingSet(SmallVectorImpl &Pending); public: - Scheduler(const MCSchedModel &Model, LSUnit &Lsu) + Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu) : Scheduler(Model, Lsu, nullptr) {} - Scheduler(const MCSchedModel &Model, LSUnit &Lsu, + Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu, std::unique_ptr SelectStrategy) - : Scheduler(make_unique(Model), Lsu, + : Scheduler(std::make_unique(Model), Lsu, std::move(SelectStrategy)) {} - Scheduler(std::unique_ptr RM, LSUnit &Lsu, + Scheduler(std::unique_ptr RM, LSUnitBase &Lsu, std::unique_ptr SelectStrategy) : LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0), NumDispatchedToThePendingSet(0), HadTokenStall(false) { @@ -228,6 +228,9 @@ public: SmallVectorImpl &Ready); /// Convert a resource mask into a valid llvm processor resource identifier. + /// + /// Only the most significant bit of the Mask is used by this method to + /// identify the processor resource. unsigned getResourceID(uint64_t Mask) const { return Resources->resolveResourceMask(Mask); } diff --git a/include/llvm/MCA/Instruction.h b/include/llvm/MCA/Instruction.h index d4d3f22797f7..c97cb463d0f5 100644 --- a/include/llvm/MCA/Instruction.h +++ b/include/llvm/MCA/Instruction.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCRegister.h" // definition of MCPhysReg. #include "llvm/Support/MathExtras.h" #ifndef NDEBUG @@ -42,7 +43,7 @@ struct WriteDescriptor { unsigned Latency; // This field is set to a value different than zero only if this // is an implicit definition. - unsigned RegisterID; + MCPhysReg RegisterID; // Instruction itineraries would set this field to the SchedClass ID. // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry // element associated to this write. @@ -70,7 +71,7 @@ struct ReadDescriptor { // uses always come first in the sequence of uses. unsigned UseIndex; // This field is only set if this is an implicit read. - unsigned RegisterID; + MCPhysReg RegisterID; // Scheduling Class Index. It is used to query the scheduling model for the // MCSchedClassDesc object. unsigned SchedClassID; @@ -85,7 +86,7 @@ class ReadState; /// Field RegID is set to the invalid register for memory dependencies. struct CriticalDependency { unsigned IID; - unsigned RegID; + MCPhysReg RegID; unsigned Cycles; }; @@ -106,7 +107,7 @@ class WriteState { // to speedup queries on the register file. // For implicit writes, this field always matches the value of // field RegisterID from WD. - unsigned RegisterID; + MCPhysReg RegisterID; // Physical register file that serves register RegisterID. unsigned PRFID; @@ -146,7 +147,7 @@ class WriteState { SmallVector, 4> Users; public: - WriteState(const WriteDescriptor &Desc, unsigned RegID, + WriteState(const WriteDescriptor &Desc, MCPhysReg RegID, bool clearsSuperRegs = false, bool writesZero = false) : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), @@ -158,7 +159,7 @@ public: int getCyclesLeft() const { return CyclesLeft; } unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; } - unsigned getRegisterID() const { return RegisterID; } + MCPhysReg getRegisterID() const { return RegisterID; } unsigned getRegisterFileID() const { return PRFID; } unsigned getLatency() const { return WD->Latency; } unsigned getDependentWriteCyclesLeft() const { @@ -200,7 +201,7 @@ public: } void setDependentWrite(const WriteState *Other) { DependentWrite = Other; } - void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles); + void writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles); void setWriteZero() { WritesZero = true; } void setEliminated() { assert(Users.empty() && "Write is in an inconsistent state."); @@ -226,7 +227,7 @@ public: class ReadState { const ReadDescriptor *RD; // Physical register identified associated to this read. - unsigned RegisterID; + MCPhysReg RegisterID; // Physical register file that serves register RegisterID. unsigned PRFID; // Number of writes that contribute to the definition of RegisterID. @@ -253,14 +254,14 @@ class ReadState { bool IndependentFromDef; public: - ReadState(const ReadDescriptor &Desc, unsigned RegID) + ReadState(const ReadDescriptor &Desc, MCPhysReg RegID) : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0), CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(), IsReady(true), IsZero(false), IndependentFromDef(false) {} const ReadDescriptor &getDescriptor() const { return *RD; } unsigned getSchedClass() const { return RD->SchedClassID; } - unsigned getRegisterID() const { return RegisterID; } + MCPhysReg getRegisterID() const { return RegisterID; } unsigned getRegisterFileID() const { return PRFID; } const CriticalDependency &getCriticalRegDep() const { return CRD; } @@ -272,7 +273,7 @@ public: void setIndependentFromDef() { IndependentFromDef = true; } void cycleEvent(); - void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles); + void writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles); void setDependentWrites(unsigned Writes) { DependentWrites = Writes; IsReady = !Writes; @@ -352,11 +353,14 @@ struct InstrDesc { // reports the number of "consumed cycles". SmallVector, 4> Resources; - // A list of buffered resources consumed by this instruction. - SmallVector Buffers; + // A bitmask of used hardware buffers. + uint64_t UsedBuffers; - unsigned UsedProcResUnits; - unsigned UsedProcResGroups; + // A bitmask of used processor resource units. + uint64_t UsedProcResUnits; + + // A bitmask of used processor resource groups. + uint64_t UsedProcResGroups; unsigned MaxLatency; // Number of MicroOps for this instruction. @@ -414,6 +418,7 @@ public: const InstrDesc &getDesc() const { return Desc; } unsigned getLatency() const { return Desc.MaxLatency; } + unsigned getNumMicroOps() const { return Desc.NumMicroOps; } bool hasDependentUsers() const { return any_of(Defs, @@ -463,6 +468,12 @@ class Instruction : public InstructionBase { // operation. unsigned LSUTokenID; + // A resource mask which identifies buffered resources consumed by this + // instruction at dispatch stage. In the absence of macro-fusion, this value + // should always match the value of field `UsedBuffers` from the instruction + // descriptor (see field InstrBase::Desc). + uint64_t UsedBuffers; + // Critical register dependency. CriticalDependency CriticalRegDep; @@ -480,12 +491,18 @@ class Instruction : public InstructionBase { public: Instruction(const InstrDesc &D) : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), - RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(), - CriticalResourceMask(0), IsEliminated(false) {} + RCUTokenID(0), LSUTokenID(0), UsedBuffers(D.UsedBuffers), + CriticalRegDep(), CriticalMemDep(), CriticalResourceMask(0), + IsEliminated(false) {} unsigned getRCUTokenID() const { return RCUTokenID; } unsigned getLSUTokenID() const { return LSUTokenID; } void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; } + + uint64_t getUsedBuffers() const { return UsedBuffers; } + void setUsedBuffers(uint64_t Mask) { UsedBuffers = Mask; } + void clearUsedBuffers() { UsedBuffers = 0ULL; } + int getCyclesLeft() const { return CyclesLeft; } // Transition to the dispatch stage, and assign a RCUToken to this diff --git a/include/llvm/MCA/SourceMgr.h b/include/llvm/MCA/SourceMgr.h index dbe31db1b1dd..e844171bdcab 100644 --- a/include/llvm/MCA/SourceMgr.h +++ b/include/llvm/MCA/SourceMgr.h @@ -16,12 +16,13 @@ #define LLVM_MCA_SOURCEMGR_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/MCA/Instruction.h" namespace llvm { namespace mca { -class Instruction; - +// MSVC >= 19.15, < 19.20 need to see the definition of class Instruction to +// prevent compiler error C2139 about intrinsic type trait '__is_assignable'. typedef std::pair SourceRef; class SourceMgr { diff --git a/include/llvm/MCA/Stages/RetireStage.h b/include/llvm/MCA/Stages/RetireStage.h index 08c216ac7bf4..f4713688d25f 100644 --- a/include/llvm/MCA/Stages/RetireStage.h +++ b/include/llvm/MCA/Stages/RetireStage.h @@ -16,6 +16,7 @@ #ifndef LLVM_MCA_RETIRE_STAGE_H #define LLVM_MCA_RETIRE_STAGE_H +#include "llvm/MCA/HardwareUnits/LSUnit.h" #include "llvm/MCA/HardwareUnits/RegisterFile.h" #include "llvm/MCA/HardwareUnits/RetireControlUnit.h" #include "llvm/MCA/Stages/Stage.h" @@ -27,13 +28,14 @@ class RetireStage final : public Stage { // Owner will go away when we move listeners/eventing to the stages. RetireControlUnit &RCU; RegisterFile &PRF; + LSUnitBase &LSU; RetireStage(const RetireStage &Other) = delete; RetireStage &operator=(const RetireStage &Other) = delete; public: - RetireStage(RetireControlUnit &R, RegisterFile &F) - : Stage(), RCU(R), PRF(F) {} + RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS) + : Stage(), RCU(R), PRF(F), LSU(LS) {} bool hasWorkToComplete() const override { return !RCU.isEmpty(); } Error cycleStart() override; diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index c40278a4f923..c3f36bdd9d1a 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -48,8 +48,7 @@ public: /// Get the name looking up long names. Expected getName(uint64_t Size) const; - /// Members are not larger than 4GB. - Expected getSize() const; + Expected getSize() const; Expected getAccessMode() const; Expected> getLastModified() const; @@ -136,6 +135,7 @@ public: Expected getBuffer() const; uint64_t getChildOffset() const; + uint64_t getDataOffset() const { return getChildOffset() + StartOfFile; } Expected getMemoryBufferRef() const; @@ -221,6 +221,9 @@ public: Archive(MemoryBufferRef Source, Error &Err); static Expected> create(MemoryBufferRef Source); + /// Size field is 10 decimal digits long + static const uint64_t MaxMemberSize = 9999999999; + enum Kind { K_GNU, K_GNU64, diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h index 3c3e977baff4..aa5e718f5e9b 100644 --- a/include/llvm/Object/Binary.h +++ b/include/llvm/Object/Binary.h @@ -42,7 +42,9 @@ protected: ID_Archive, ID_MachOUniversalBinary, ID_COFFImportFile, - ID_IR, // LLVM IR + ID_IR, // LLVM IR + ID_TapiUniversal, // Text-based Dynamic Library Stub file. + ID_TapiFile, // Text-based Dynamic Library Stub file. ID_Minidump, @@ -101,16 +103,18 @@ public: return TypeID > ID_StartObjects && TypeID < ID_EndObjects; } - bool isSymbolic() const { return isIR() || isObject() || isCOFFImportFile(); } - - bool isArchive() const { - return TypeID == ID_Archive; + bool isSymbolic() const { + return isIR() || isObject() || isCOFFImportFile() || isTapiFile(); } + bool isArchive() const { return TypeID == ID_Archive; } + bool isMachOUniversalBinary() const { return TypeID == ID_MachOUniversalBinary; } + bool isTapiUniversal() const { return TypeID == ID_TapiUniversal; } + bool isELF() const { return TypeID >= ID_ELF32L && TypeID <= ID_ELF64B; } @@ -137,6 +141,8 @@ public: bool isMinidump() const { return TypeID == ID_Minidump; } + bool isTapiFile() const { return TypeID == ID_TapiFile; } + bool isLittleEndian() const { return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B || TypeID == ID_MachO32B || TypeID == ID_MachO64B); diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index c53cbc46c747..b91ee5887fec 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -314,7 +314,10 @@ public: return CS16 ? CS16->Name.Offset : CS32->Name.Offset; } - uint32_t getValue() const { return CS16 ? CS16->Value : CS32->Value; } + uint32_t getValue() const { + assert(isSet() && "COFFSymbolRef points to nothing!"); + return CS16 ? CS16->Value : CS32->Value; + } int32_t getSectionNumber() const { assert(isSet() && "COFFSymbolRef points to nothing!"); @@ -969,11 +972,14 @@ public: return nullptr; return reinterpret_cast(base()); } - std::error_code getCOFFHeader(const coff_file_header *&Res) const; - std::error_code - getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const; - std::error_code getPE32Header(const pe32_header *&Res) const; - std::error_code getPE32PlusHeader(const pe32plus_header *&Res) const; + + const coff_file_header *getCOFFHeader() const { return COFFHeader; } + const coff_bigobj_file_header *getCOFFBigObjHeader() const { + return COFFBigObjHeader; + } + const pe32_header *getPE32Header() const { return PE32Header; } + const pe32plus_header *getPE32PlusHeader() const { return PE32PlusHeader; } + std::error_code getDataDirectory(uint32_t index, const data_directory *&Res) const; std::error_code getSection(int32_t index, const coff_section *&Res) const; @@ -1201,16 +1207,34 @@ public: ResourceSectionRef() = default; explicit ResourceSectionRef(StringRef Ref) : BBS(Ref, support::little) {} + Error load(const COFFObjectFile *O); + Error load(const COFFObjectFile *O, const SectionRef &S); + Expected> getEntryNameString(const coff_resource_dir_entry &Entry); Expected getEntrySubDir(const coff_resource_dir_entry &Entry); + Expected + getEntryData(const coff_resource_dir_entry &Entry); Expected getBaseTable(); + Expected + getTableEntry(const coff_resource_dir_table &Table, uint32_t Index); + + Expected getContents(const coff_resource_data_entry &Entry); private: BinaryByteStream BBS; + SectionRef Section; + const COFFObjectFile *Obj; + + std::vector Relocs; + Expected getTableAtOffset(uint32_t Offset); + Expected + getTableEntryAtOffset(uint32_t Offset); + Expected + getDataEntryAtOffset(uint32_t Offset); Expected> getDirStringAtOffset(uint32_t Offset); }; diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index cf8e4529bad9..28b00c8413de 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -64,6 +64,10 @@ std::string getSecIndexForError(const ELFFile *Obj, return "[unknown index]"; } +static inline Error defaultWarningHandler(const Twine &Msg) { + return createError(Msg); +} + template class ELFFile { public: @@ -95,6 +99,13 @@ public: using Elf_Relr_Range = typename ELFT::RelrRange; using Elf_Phdr_Range = typename ELFT::PhdrRange; + // This is a callback that can be passed to a number of functions. + // It can be used to ignore non-critical errors (warnings), which is + // useful for dumpers, like llvm-readobj. + // It accepts a warning message string and returns a success + // when the warning should be ignored or an error otherwise. + using WarningHandler = llvm::function_ref; + const uint8_t *base() const { return Buf.bytes_begin(); } size_t getBufSize() const { return Buf.size(); } @@ -114,7 +125,9 @@ public: template Expected getEntry(const Elf_Shdr *Section, uint32_t Entry) const; - Expected getStringTable(const Elf_Shdr *Section) const; + Expected + getStringTable(const Elf_Shdr *Section, + WarningHandler WarnHandler = &defaultWarningHandler) const; Expected getStringTableForSymtab(const Elf_Shdr &Section) const; Expected getStringTableForSymtab(const Elf_Shdr &Section, Elf_Shdr_Range Sections) const; @@ -137,15 +150,16 @@ public: static Expected create(StringRef Object); + bool isLE() const { + return getHeader()->getDataEncoding() == ELF::ELFDATA2LSB; + } + bool isMipsELF64() const { return getHeader()->e_machine == ELF::EM_MIPS && getHeader()->getFileClass() == ELF::ELFCLASS64; } - bool isMips64EL() const { - return isMipsELF64() && - getHeader()->getDataEncoding() == ELF::ELFDATA2LSB; - } + bool isMips64EL() const { return isMipsELF64() && isLE(); } Expected sections() const; @@ -261,7 +275,9 @@ public: return make_range(notes_begin(Shdr, Err), notes_end()); } - Expected getSectionStringTable(Elf_Shdr_Range Sections) const; + Expected getSectionStringTable( + Elf_Shdr_Range Sections, + WarningHandler WarnHandler = &defaultWarningHandler) const; Expected getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, ArrayRef ShndxTable) const; Expected getSection(const Elf_Sym *Sym, @@ -271,12 +287,13 @@ public: Elf_Sym_Range Symtab, ArrayRef ShndxTable) const; Expected getSection(uint32_t Index) const; - Expected getSection(const StringRef SectionName) const; Expected getSymbol(const Elf_Shdr *Sec, uint32_t Index) const; - Expected getSectionName(const Elf_Shdr *Section) const; + Expected + getSectionName(const Elf_Shdr *Section, + WarningHandler WarnHandler = &defaultWarningHandler) const; Expected getSectionName(const Elf_Shdr *Section, StringRef DotShstrtab) const; template @@ -459,18 +476,18 @@ ELFFile::getRelocationSymbol(const Elf_Rel *Rel, template Expected -ELFFile::getSectionStringTable(Elf_Shdr_Range Sections) const { +ELFFile::getSectionStringTable(Elf_Shdr_Range Sections, + WarningHandler WarnHandler) const { uint32_t Index = getHeader()->e_shstrndx; if (Index == ELF::SHN_XINDEX) Index = Sections[0].sh_link; if (!Index) // no section string table. return ""; - // TODO: Test a case when the sh_link of the section with index 0 is broken. if (Index >= Sections.size()) return createError("section header string table index " + Twine(Index) + " does not exist"); - return getStringTable(&Sections[Index]); + return getStringTable(&Sections[Index], WarnHandler); } template ELFFile::ELFFile(StringRef Object) : Buf(Object) {} @@ -495,7 +512,8 @@ Expected ELFFile::sections() const { Twine(getHeader()->e_shentsize)); const uint64_t FileSize = Buf.size(); - if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize) + if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize || + SectionTableOffset + (uintX_t)sizeof(Elf_Shdr) < SectionTableOffset) return createError( "section header table goes past the end of the file: e_shoff = 0x" + Twine::utohexstr(SectionTableOffset)); @@ -513,15 +531,22 @@ Expected ELFFile::sections() const { NumSections = First->sh_size; if (NumSections > UINT64_MAX / sizeof(Elf_Shdr)) - // TODO: this error is untested. - return createError("section table goes past the end of file"); + return createError("invalid number of sections specified in the NULL " + "section's sh_size field (" + + Twine(NumSections) + ")"); const uint64_t SectionTableSize = NumSections * sizeof(Elf_Shdr); + if (SectionTableOffset + SectionTableSize < SectionTableOffset) + return createError( + "invalid section header table offset (e_shoff = 0x" + + Twine::utohexstr(SectionTableOffset) + + ") or invalid number of sections specified in the first section " + "header's sh_size field (0x" + + Twine::utohexstr(NumSections) + ")"); // Section table goes past end of file! if (SectionTableOffset + SectionTableSize > FileSize) return createError("section table goes past the end of file"); - return makeArrayRef(First, NumSections); } @@ -540,8 +565,9 @@ template Expected ELFFile::getEntry(const Elf_Shdr *Section, uint32_t Entry) const { if (sizeof(T) != Section->sh_entsize) - // TODO: this error is untested. - return createError("invalid sh_entsize"); + return createError("section " + getSecIndexForError(this, Section) + + " has invalid sh_entsize: expected " + Twine(sizeof(T)) + + ", but got " + Twine(Section->sh_entsize)); size_t Pos = Section->sh_offset + Entry * sizeof(T); if (Pos + sizeof(T) > Buf.size()) return createError("unable to access section " + @@ -560,43 +586,27 @@ ELFFile::getSection(uint32_t Index) const { return object::getSection(*TableOrErr, Index); } -template -Expected -ELFFile::getSection(const StringRef SectionName) const { - auto TableOrErr = sections(); - if (!TableOrErr) - return TableOrErr.takeError(); - for (auto &Sec : *TableOrErr) { - auto SecNameOrErr = getSectionName(&Sec); - if (!SecNameOrErr) - return SecNameOrErr.takeError(); - if (*SecNameOrErr == SectionName) - return &Sec; - } - // TODO: this error is untested. - return createError("invalid section name"); -} - template Expected -ELFFile::getStringTable(const Elf_Shdr *Section) const { +ELFFile::getStringTable(const Elf_Shdr *Section, + WarningHandler WarnHandler) const { if (Section->sh_type != ELF::SHT_STRTAB) - return createError("invalid sh_type for string table section " + - getSecIndexForError(this, Section) + - ": expected SHT_STRTAB, but got " + - object::getELFSectionTypeName(getHeader()->e_machine, - Section->sh_type)); + if (Error E = WarnHandler("invalid sh_type for string table section " + + getSecIndexForError(this, Section) + + ": expected SHT_STRTAB, but got " + + object::getELFSectionTypeName( + getHeader()->e_machine, Section->sh_type))) + return std::move(E); + auto V = getSectionContentsAsArray(Section); if (!V) return V.takeError(); ArrayRef Data = *V; if (Data.empty()) - // TODO: this error is untested. - return createError("empty string table"); + return createError("SHT_STRTAB string table section " + + getSecIndexForError(this, Section) + " is empty"); if (Data.back() != '\0') - return createError(object::getELFSectionTypeName(getHeader()->e_machine, - Section->sh_type) + - " string table section " + + return createError("SHT_STRTAB string table section " + getSecIndexForError(this, Section) + " is non-null terminated"); return StringRef(Data.begin(), Data.size()); @@ -626,8 +636,11 @@ ELFFile::getSHNDXTable(const Elf_Shdr &Section, const Elf_Shdr &SymTable = **SymTableOrErr; if (SymTable.sh_type != ELF::SHT_SYMTAB && SymTable.sh_type != ELF::SHT_DYNSYM) - // TODO: this error is untested. - return createError("invalid sh_type"); + return createError("SHT_SYMTAB_SHNDX section is linked with " + + object::getELFSectionTypeName(getHeader()->e_machine, + SymTable.sh_type) + + " section (expected SHT_SYMTAB/SHT_DYNSYM)"); + if (V.size() != (SymTable.sh_size / sizeof(Elf_Sym))) return createError("SHT_SYMTAB_SHNDX section has sh_size (" + Twine(SymTable.sh_size) + @@ -662,11 +675,12 @@ ELFFile::getStringTableForSymtab(const Elf_Shdr &Sec, template Expected -ELFFile::getSectionName(const Elf_Shdr *Section) const { +ELFFile::getSectionName(const Elf_Shdr *Section, + WarningHandler WarnHandler) const { auto SectionsOrErr = sections(); if (!SectionsOrErr) return SectionsOrErr.takeError(); - auto Table = getSectionStringTable(*SectionsOrErr); + auto Table = getSectionStringTable(*SectionsOrErr, WarnHandler); if (!Table) return Table.takeError(); return getSectionName(Section, *Table); diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h index 86c015efd704..424289a9ccaa 100644 --- a/include/llvm/Object/ELFObjectFile.h +++ b/include/llvm/Object/ELFObjectFile.h @@ -41,7 +41,7 @@ namespace llvm { namespace object { -constexpr int NumElfSymbolTypes = 8; +constexpr int NumElfSymbolTypes = 16; extern const llvm::EnumEntry ElfSymbolTypes[NumElfSymbolTypes]; class elf_symbol_iterator; @@ -239,6 +239,10 @@ public: using Elf_Rela = typename ELFT::Rela; using Elf_Dyn = typename ELFT::Dyn; + SectionRef toSectionRef(const Elf_Shdr *Sec) const { + return SectionRef(toDRI(Sec), this); + } + private: ELFObjectFile(MemoryBufferRef Object, ELFFile EF, const Elf_Shdr *DotDynSymSec, const Elf_Shdr *DotSymtabSec, @@ -284,7 +288,8 @@ protected: relocation_iterator section_rel_begin(DataRefImpl Sec) const override; relocation_iterator section_rel_end(DataRefImpl Sec) const override; std::vector dynamic_relocation_sections() const override; - section_iterator getRelocatedSection(DataRefImpl Sec) const override; + Expected + getRelocatedSection(DataRefImpl Sec) const override; void moveRelocationNext(DataRefImpl &Rel) const override; uint64_t getRelocationOffset(DataRefImpl Rel) const override; @@ -461,13 +466,15 @@ Expected ELFObjectFile::getSymbolName(DataRefImpl Sym) const { if (!SymStrTabOrErr) return SymStrTabOrErr.takeError(); Expected Name = ESym->getName(*SymStrTabOrErr); + if (Name && !Name->empty()) + return Name; // If the symbol name is empty use the section name. - if ((!Name || Name->empty()) && ESym->getType() == ELF::STT_SECTION) { - StringRef SecName; - Expected Sec = getSymbolSection(Sym); - if (Sec && !(*Sec)->getName(SecName)) - return SecName; + if (ESym->getType() == ELF::STT_SECTION) { + if (Expected SecOrErr = getSymbolSection(Sym)) { + consumeError(Name.takeError()); + return (*SecOrErr)->getName(); + } } return Name; } @@ -835,7 +842,7 @@ ELFObjectFile::section_rel_end(DataRefImpl Sec) const { } template -section_iterator +Expected ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { if (EF.getHeader()->e_type != ELF::ET_REL) return section_end(); @@ -845,10 +852,10 @@ ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA) return section_end(); - auto R = EF.getSection(EShdr->sh_info); - if (!R) - report_fatal_error(errorToErrorCode(R.takeError()).message()); - return section_iterator(SectionRef(toDRI(*R), this)); + Expected SecOrErr = EF.getSection(EShdr->sh_info); + if (!SecOrErr) + return SecOrErr.takeError(); + return section_iterator(SectionRef(toDRI(*SecOrErr), this)); } // Relocations diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h index 5552208b1f8a..7d1ade4d5437 100644 --- a/include/llvm/Object/ELFTypes.h +++ b/include/llvm/Object/ELFTypes.h @@ -248,7 +248,11 @@ template Expected Elf_Sym_Impl::getName(StringRef StrTab) const { uint32_t Offset = this->st_name; if (Offset >= StrTab.size()) - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "st_name (0x%" PRIx32 + ") is past the end of the string table" + " of size 0x%zx", + Offset, StrTab.size()); return StringRef(StrTab.data() + Offset); } diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index ca9512f21706..76be8049a7d4 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -297,6 +297,7 @@ public: uint64_t getSectionAddress(DataRefImpl Sec) const override; uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; + ArrayRef getSectionContents(uint32_t Offset, uint64_t Size) const; Expected> getSectionContents(DataRefImpl Sec) const override; uint64_t getSectionAlignment(DataRefImpl Sec) const override; diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h index 5bf724f2c8b2..eb45aff4480b 100644 --- a/include/llvm/Object/MachOUniversal.h +++ b/include/llvm/Object/MachOUniversal.h @@ -31,6 +31,8 @@ class MachOUniversalBinary : public Binary { uint32_t Magic; uint32_t NumberOfObjects; public: + static constexpr uint32_t MaxSectionAlignment = 15; /* 2**15 or 0x8000 */ + class ObjectForArch { const MachOUniversalBinary *Parent; /// Index of object in the universal binary. @@ -64,13 +66,13 @@ public: else // Parent->getMagic() == MachO::FAT_MAGIC_64 return Header64.cpusubtype; } - uint32_t getOffset() const { + uint64_t getOffset() const { if (Parent->getMagic() == MachO::FAT_MAGIC) return Header.offset; else // Parent->getMagic() == MachO::FAT_MAGIC_64 return Header64.offset; } - uint32_t getSize() const { + uint64_t getSize() const { if (Parent->getMagic() == MachO::FAT_MAGIC) return Header.size; else // Parent->getMagic() == MachO::FAT_MAGIC_64 @@ -157,8 +159,14 @@ public: return V->isMachOUniversalBinary(); } - Expected> + Expected getObjectForArch(StringRef ArchName) const; + + Expected> + getMachOObjectForArch(StringRef ArchName) const; + + Expected> + getArchiveForArch(StringRef ArchName) const; }; } diff --git a/include/llvm/Object/Minidump.h b/include/llvm/Object/Minidump.h index 470008d552e7..4429493aff45 100644 --- a/include/llvm/Object/Minidump.h +++ b/include/llvm/Object/Minidump.h @@ -11,6 +11,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/iterator.h" #include "llvm/BinaryFormat/Minidump.h" #include "llvm/Object/Binary.h" #include "llvm/Support/Error.h" @@ -80,16 +81,65 @@ public: return getListStream(minidump::StreamType::ThreadList); } - /// Returns the list of memory ranges embedded in the MemoryList stream. An - /// error is returned if the file does not contain this stream, or if the - /// stream is not large enough to contain the number of memory descriptors - /// declared in the stream header. The consistency of the MemoryDescriptor - /// entries themselves is not checked in any way. + /// Returns the contents of the Exception stream. An error is returned if the + /// file does not contain this stream, or the stream is smaller than the size + /// of the ExceptionStream structure. The internal consistency of the stream + /// is not checked in any way. + Expected getExceptionStream() const { + return getStream( + minidump::StreamType::Exception); + } + + /// Returns the list of descriptors embedded in the MemoryList stream. The + /// descriptors provide the content of interesting regions of memory at the + /// time the minidump was taken. An error is returned if the file does not + /// contain this stream, or if the stream is not large enough to contain the + /// number of memory descriptors declared in the stream header. The + /// consistency of the MemoryDescriptor entries themselves is not checked in + /// any way. Expected> getMemoryList() const { return getListStream( minidump::StreamType::MemoryList); } + class MemoryInfoIterator + : public iterator_facade_base { + public: + MemoryInfoIterator(ArrayRef Storage, size_t Stride) + : Storage(Storage), Stride(Stride) { + assert(Storage.size() % Stride == 0); + } + + bool operator==(const MemoryInfoIterator &R) const { + return Storage.size() == R.Storage.size(); + } + + const minidump::MemoryInfo &operator*() const { + assert(Storage.size() >= sizeof(minidump::MemoryInfo)); + return *reinterpret_cast(Storage.data()); + } + + MemoryInfoIterator &operator++() { + Storage = Storage.drop_front(Stride); + return *this; + } + + private: + ArrayRef Storage; + size_t Stride; + }; + + /// Returns the list of descriptors embedded in the MemoryInfoList stream. The + /// descriptors provide properties (e.g. permissions) of interesting regions + /// of memory at the time the minidump was taken. An error is returned if the + /// file does not contain this stream, or if the stream is not large enough to + /// contain the number of memory descriptors declared in the stream header. + /// The consistency of the MemoryInfoList entries themselves is not checked + /// in any way. + Expected> getMemoryInfoList() const; + private: static Error createError(StringRef Str) { return make_error(Str, object_error::parse_failed); @@ -137,10 +187,10 @@ private: }; template -Expected MinidumpFile::getStream(minidump::StreamType Stream) const { - if (auto OptionalStream = getRawStream(Stream)) { - if (OptionalStream->size() >= sizeof(T)) - return *reinterpret_cast(OptionalStream->data()); +Expected MinidumpFile::getStream(minidump::StreamType Type) const { + if (Optional> Stream = getRawStream(Type)) { + if (Stream->size() >= sizeof(T)) + return *reinterpret_cast(Stream->data()); return createEOFError(); } return createError("No such stream"); @@ -153,10 +203,11 @@ Expected> MinidumpFile::getDataSliceAs(ArrayRef Data, // Check for overflow. if (Count > std::numeric_limits::max() / sizeof(T)) return createEOFError(); - auto ExpectedArray = getDataSlice(Data, Offset, sizeof(T) * Count); - if (!ExpectedArray) - return ExpectedArray.takeError(); - return ArrayRef(reinterpret_cast(ExpectedArray->data()), Count); + Expected> Slice = + getDataSlice(Data, Offset, sizeof(T) * Count); + if (!Slice) + return Slice.takeError(); + return ArrayRef(reinterpret_cast(Slice->data()), Count); } } // end namespace object diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 483a3486bd72..adc9dbc189af 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -94,7 +94,7 @@ public: void moveNext(); - std::error_code getName(StringRef &Result) const; + Expected getName() const; uint64_t getAddress() const; uint64_t getIndex() const; uint64_t getSize() const; @@ -130,18 +130,13 @@ public: iterator_range relocations() const { return make_range(relocation_begin(), relocation_end()); } - section_iterator getRelocatedSection() const; + Expected getRelocatedSection() const; DataRefImpl getRawDataRefImpl() const; const ObjectFile *getObject() const; }; struct SectionedAddress { - // TODO: constructors could be removed when C++14 would be adopted. - SectionedAddress() {} - SectionedAddress(uint64_t Addr, uint64_t SectIdx) - : Address(Addr), SectionIndex(SectIdx) {} - const static uint64_t UndefSection = UINT64_MAX; uint64_t Address = 0; @@ -277,7 +272,7 @@ protected: virtual bool isBerkeleyData(DataRefImpl Sec) const; virtual relocation_iterator section_rel_begin(DataRefImpl Sec) const = 0; virtual relocation_iterator section_rel_end(DataRefImpl Sec) const = 0; - virtual section_iterator getRelocatedSection(DataRefImpl Sec) const; + virtual Expected getRelocatedSection(DataRefImpl Sec) const; // Same as above for RelocationRef. friend class RelocationRef; @@ -434,12 +429,8 @@ inline void SectionRef::moveNext() { return OwningObject->moveSectionNext(SectionPimpl); } -inline std::error_code SectionRef::getName(StringRef &Result) const { - Expected NameOrErr = OwningObject->getSectionName(SectionPimpl); - if (!NameOrErr) - return errorToErrorCode(NameOrErr.takeError()); - Result = *NameOrErr; - return std::error_code(); +inline Expected SectionRef::getName() const { + return OwningObject->getSectionName(SectionPimpl); } inline uint64_t SectionRef::getAddress() const { @@ -510,7 +501,7 @@ inline relocation_iterator SectionRef::relocation_end() const { return OwningObject->section_rel_end(SectionPimpl); } -inline section_iterator SectionRef::getRelocatedSection() const { +inline Expected SectionRef::getRelocatedSection() const { return OwningObject->getRelocatedSection(SectionPimpl); } diff --git a/include/llvm/Object/StackMapParser.h b/include/llvm/Object/StackMapParser.h index ed44efbf80b9..b408f4041034 100644 --- a/include/llvm/Object/StackMapParser.h +++ b/include/llvm/Object/StackMapParser.h @@ -19,7 +19,7 @@ namespace llvm { -/// A parser for the latest stackmap format. At the moment, latest=V2. +/// A parser for the latest stackmap format. At the moment, latest=V3. template class StackMapParser { public: @@ -299,7 +299,7 @@ public: const uint8_t *P; }; - /// Construct a parser for a version-2 stackmap. StackMap data will be read + /// Construct a parser for a version-3 stackmap. StackMap data will be read /// from the given array. StackMapParser(ArrayRef StackMapSection) : StackMapSection(StackMapSection) { diff --git a/include/llvm/Object/TapiFile.h b/include/llvm/Object/TapiFile.h new file mode 100644 index 000000000000..bc2e04e1cc96 --- /dev/null +++ b/include/llvm/Object/TapiFile.h @@ -0,0 +1,60 @@ +//===- TapiFile.h - Text-based Dynamic Library Stub -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the TapiFile interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_TAPI_FILE_H +#define LLVM_OBJECT_TAPI_FILE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TextAPI/MachO/InterfaceFile.h" + +namespace llvm { +namespace object { + +class TapiFile : public SymbolicFile { +public: + TapiFile(MemoryBufferRef Source, const MachO::InterfaceFile &interface, + MachO::Architecture Arch); + ~TapiFile() override; + + void moveSymbolNext(DataRefImpl &DRI) const override; + + Error printSymbolName(raw_ostream &OS, DataRefImpl DRI) const override; + + uint32_t getSymbolFlags(DataRefImpl DRI) const override; + + basic_symbol_iterator symbol_begin() const override; + + basic_symbol_iterator symbol_end() const override; + + static bool classof(const Binary *v) { return v->isTapiFile(); } + +private: + struct Symbol { + StringRef Prefix; + StringRef Name; + uint32_t Flags; + + constexpr Symbol(StringRef Prefix, StringRef Name, uint32_t Flags) + : Prefix(Prefix), Name(Name), Flags(Flags) {} + }; + + std::vector Symbols; +}; + +} // end namespace object. +} // end namespace llvm. + +#endif // LLVM_OBJECT_TAPI_FILE_H diff --git a/include/llvm/Object/TapiUniversal.h b/include/llvm/Object/TapiUniversal.h new file mode 100644 index 000000000000..4931183852ad --- /dev/null +++ b/include/llvm/Object/TapiUniversal.h @@ -0,0 +1,109 @@ +//===-- TapiUniversal.h - Text-based Dynamic Library Stub -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the TapiUniversal interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_TAPI_UNIVERSAL_H +#define LLVM_OBJECT_TAPI_UNIVERSAL_H + +#include "llvm/Object/Binary.h" +#include "llvm/Object/TapiFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TextAPI/MachO/Architecture.h" +#include "llvm/TextAPI/MachO/InterfaceFile.h" + +namespace llvm { +namespace object { + +class TapiUniversal : public Binary { +public: + class ObjectForArch { + const TapiUniversal *Parent; + int Index; + + public: + ObjectForArch(const TapiUniversal *Parent, int Index) + : Parent(Parent), Index(Index) {} + + ObjectForArch getNext() const { return ObjectForArch(Parent, Index + 1); } + + bool operator==(const ObjectForArch &Other) const { + return (Parent == Other.Parent) && (Index == Other.Index); + } + + uint32_t getCPUType() const { + auto Result = + MachO::getCPUTypeFromArchitecture(Parent->Architectures[Index]); + return Result.first; + } + + uint32_t getCPUSubType() const { + auto Result = + MachO::getCPUTypeFromArchitecture(Parent->Architectures[Index]); + return Result.second; + } + + std::string getArchFlagName() const { + return MachO::getArchitectureName(Parent->Architectures[Index]); + } + + Expected> getAsObjectFile() const; + }; + + class object_iterator { + ObjectForArch Obj; + + public: + object_iterator(const ObjectForArch &Obj) : Obj(Obj) {} + const ObjectForArch *operator->() const { return &Obj; } + const ObjectForArch &operator*() const { return Obj; } + + bool operator==(const object_iterator &Other) const { + return Obj == Other.Obj; + } + bool operator!=(const object_iterator &Other) const { + return !(*this == Other); + } + + object_iterator &operator++() { // Preincrement + Obj = Obj.getNext(); + return *this; + } + }; + + TapiUniversal(MemoryBufferRef Source, Error &Err); + static Expected> + create(MemoryBufferRef Source); + ~TapiUniversal() override; + + object_iterator begin_objects() const { return ObjectForArch(this, 0); } + object_iterator end_objects() const { + return ObjectForArch(this, Architectures.size()); + } + + iterator_range objects() const { + return make_range(begin_objects(), end_objects()); + } + + uint32_t getNumberOfObjects() const { return Architectures.size(); } + + // Cast methods. + static bool classof(const Binary *v) { return v->isTapiUniversal(); } + +private: + std::unique_ptr ParsedFile; + std::vector Architectures; +}; + +} // end namespace object. +} // end namespace llvm. + +#endif // LLVM_OBJECT_TAPI_UNIVERSAL_H diff --git a/include/llvm/Object/WindowsResource.h b/include/llvm/Object/WindowsResource.h index 356dcb03abba..a0d658491cb9 100644 --- a/include/llvm/Object/WindowsResource.h +++ b/include/llvm/Object/WindowsResource.h @@ -31,6 +31,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" @@ -48,6 +49,7 @@ class ScopedPrinter; namespace object { class WindowsResource; +class ResourceSectionRef; const size_t WIN_RES_MAGIC_SIZE = 16; const size_t WIN_RES_NULL_ENTRY_SIZE = 16; @@ -151,8 +153,11 @@ private: class WindowsResourceParser { public: class TreeNode; - WindowsResourceParser(); + WindowsResourceParser(bool MinGW = false); Error parse(WindowsResource *WR, std::vector &Duplicates); + Error parse(ResourceSectionRef &RSR, StringRef Filename, + std::vector &Duplicates); + void cleanUpManifests(std::vector &Duplicates); void printTree(raw_ostream &OS) const; const TreeNode &getTree() const { return Root; } const ArrayRef> getData() const { return Data; } @@ -181,32 +186,38 @@ public: private: friend class WindowsResourceParser; - static uint32_t StringCount; - static uint32_t DataCount; - - static std::unique_ptr createStringNode(); + // Index is the StringTable vector index for this node's name. + static std::unique_ptr createStringNode(uint32_t Index); static std::unique_ptr createIDNode(); + // DataIndex is the Data vector index that the data node points at. static std::unique_ptr createDataNode(uint16_t MajorVersion, uint16_t MinorVersion, uint32_t Characteristics, - uint32_t Origin); + uint32_t Origin, + uint32_t DataIndex); - explicit TreeNode(bool IsStringNode); + explicit TreeNode(uint32_t StringIndex); TreeNode(uint16_t MajorVersion, uint16_t MinorVersion, - uint32_t Characteristics, uint32_t Origin); + uint32_t Characteristics, uint32_t Origin, uint32_t DataIndex); bool addEntry(const ResourceEntryRef &Entry, uint32_t Origin, - bool &IsNewTypeString, bool &IsNewNameString, + std::vector> &Data, + std::vector> &StringTable, TreeNode *&Result); - TreeNode &addTypeNode(const ResourceEntryRef &Entry, bool &IsNewTypeString); - TreeNode &addNameNode(const ResourceEntryRef &Entry, bool &IsNewNameString); + TreeNode &addTypeNode(const ResourceEntryRef &Entry, + std::vector> &StringTable); + TreeNode &addNameNode(const ResourceEntryRef &Entry, + std::vector> &StringTable); bool addLanguageNode(const ResourceEntryRef &Entry, uint32_t Origin, + std::vector> &Data, TreeNode *&Result); bool addDataChild(uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion, uint32_t Characteristics, uint32_t Origin, - TreeNode *&Result); + uint32_t DataIndex, TreeNode *&Result); TreeNode &addIDChild(uint32_t ID); - TreeNode &addNameChild(ArrayRef NameRef, bool &IsNewString); + TreeNode &addNameChild(ArrayRef NameRef, + std::vector> &StringTable); + void shiftDataIndexDown(uint32_t Index); bool IsDataNode = false; uint32_t StringIndex; @@ -222,12 +233,30 @@ public: uint32_t Origin; }; + struct StringOrID { + bool IsString; + ArrayRef String; + uint32_t ID; + + StringOrID(uint32_t ID) : IsString(false), ID(ID) {} + StringOrID(ArrayRef String) : IsString(true), String(String) {} + }; + private: + Error addChildren(TreeNode &Node, ResourceSectionRef &RSR, + const coff_resource_dir_table &Table, uint32_t Origin, + std::vector &Context, + std::vector &Duplicates); + bool shouldIgnoreDuplicate(const ResourceEntryRef &Entry) const; + bool shouldIgnoreDuplicate(const std::vector &Context) const; + TreeNode Root; std::vector> Data; std::vector> StringTable; std::vector InputFilenames; + + bool MinGW; }; Expected> diff --git a/include/llvm/Object/XCOFFObjectFile.h b/include/llvm/Object/XCOFFObjectFile.h index cdee7129a2ab..84073ce5f6cf 100644 --- a/include/llvm/Object/XCOFFObjectFile.h +++ b/include/llvm/Object/XCOFFObjectFile.h @@ -13,23 +13,8 @@ #ifndef LLVM_OBJECT_XCOFFOBJECTFILE_H #define LLVM_OBJECT_XCOFFOBJECTFILE_H -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/BinaryFormat/Magic.h" #include "llvm/BinaryFormat/XCOFF.h" -#include "llvm/MC/SubtargetFeature.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Object/SymbolicFile.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include -#include -#include -#include namespace llvm { namespace object { @@ -63,7 +48,7 @@ struct XCOFFFileHeader64 { }; struct XCOFFSectionHeader32 { - char Name[XCOFF::SectionNameSize]; + char Name[XCOFF::NameSize]; support::ubig32_t PhysicalAddress; support::ubig32_t VirtualAddress; support::ubig32_t SectionSize; @@ -78,7 +63,7 @@ struct XCOFFSectionHeader32 { }; struct XCOFFSectionHeader64 { - char Name[XCOFF::SectionNameSize]; + char Name[XCOFF::NameSize]; support::ubig64_t PhysicalAddress; support::ubig64_t VirtualAddress; support::ubig64_t SectionSize; @@ -106,7 +91,7 @@ struct XCOFFSymbolEntry { } CFileLanguageIdAndTypeIdType; union { - char SymbolName[XCOFF::SymbolNameSize]; + char SymbolName[XCOFF::NameSize]; NameInStrTblType NameInStrTbl; }; @@ -127,6 +112,75 @@ struct XCOFFStringTable { const char *Data; }; +struct XCOFFCsectAuxEnt32 { + support::ubig32_t + SectionOrLength; // If the symbol type is XTY_SD or XTY_CM, the csect + // length. + // If the symbol type is XTY_LD, the symbol table + // index of the containing csect. + // If the symbol type is XTY_ER, 0. + support::ubig32_t ParameterHashIndex; + support::ubig16_t TypeChkSectNum; + uint8_t SymbolAlignmentAndType; + XCOFF::StorageMappingClass StorageMappingClass; + support::ubig32_t StabInfoIndex; + support::ubig16_t StabSectNum; +}; + +struct XCOFFFileAuxEnt { + typedef struct { + support::big32_t Magic; // Zero indicates name in string table. + support::ubig32_t Offset; + char NamePad[XCOFF::FileNamePadSize]; + } NameInStrTblType; + union { + char Name[XCOFF::NameSize + XCOFF::FileNamePadSize]; + NameInStrTblType NameInStrTbl; + }; + XCOFF::CFileStringType Type; + uint8_t ReservedZeros[2]; + uint8_t AuxType; // 64-bit XCOFF file only. +}; + +struct XCOFFSectAuxEntForStat { + support::ubig32_t SectionLength; + support::ubig16_t NumberOfRelocEnt; + support::ubig16_t NumberOfLineNum; + uint8_t Pad[10]; +}; + +struct XCOFFRelocation32 { + // Masks for packing/unpacking the r_rsize field of relocations. + + // The msb is used to indicate if the bits being relocated are signed or + // unsigned. + static constexpr uint8_t XR_SIGN_INDICATOR_MASK = 0x80; + + // The 2nd msb is used to indicate that the binder has replaced/modified the + // original instruction. + static constexpr uint8_t XR_FIXUP_INDICATOR_MASK = 0x40; + + // The remaining bits specify the bit length of the relocatable reference + // minus one. + static constexpr uint8_t XR_BIASED_LENGTH_MASK = 0x3f; + +public: + support::ubig32_t VirtualAddress; + support::ubig32_t SymbolIndex; + + // Packed field, see XR_* masks for details of packing. + uint8_t Info; + + XCOFF::RelocationType Type; + +public: + bool isRelocationSigned() const; + bool isFixupIndicated() const; + + // Returns the number of bits being relocated. + uint8_t getRelocatedLength() const; +}; + class XCOFFObjectFile : public ObjectFile { private: const void *FileHeader = nullptr; @@ -146,18 +200,18 @@ private: const XCOFFSectionHeader32 *toSection32(DataRefImpl Ref) const; const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const; - void checkSectionAddress(uintptr_t Addr, uintptr_t TableAddr) const; uintptr_t getSectionHeaderTableAddress() const; + uintptr_t getEndOfSymbolTableAddress() const; // This returns a pointer to the start of the storage for the name field of // the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily // null-terminated. const char *getSectionNameInternal(DataRefImpl Sec) const; - int32_t getSectionFlags(DataRefImpl Sec) const; + // This function returns string table entry. + Expected getStringTableEntry(uint32_t Offset) const; static bool isReservedSectionNumber(int16_t SectionNumber); - Expected getSectionByNum(int16_t Num) const; // Constructor and "create" factory function. The constructor is only a thin // wrapper around the base constructor. The "create" function fills out the @@ -175,6 +229,8 @@ private: friend Expected> ObjectFile::createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType); + void checkSectionAddress(uintptr_t Addr, uintptr_t TableAddr) const; + public: // Interface inherited from base classes. void moveSymbolNext(DataRefImpl &Symb) const override; @@ -253,15 +309,49 @@ public: uint32_t getLogicalNumberOfSymbolTableEntries32() const; uint32_t getNumberOfSymbolTableEntries64() const; + uint32_t getSymbolIndex(uintptr_t SymEntPtr) const; + Expected getSymbolNameByIndex(uint32_t SymbolTableIndex) const; + Expected getCFileName(const XCOFFFileAuxEnt *CFileEntPtr) const; uint16_t getOptionalHeaderSize() const; uint16_t getFlags() const; // Section header table related interfaces. ArrayRef sections32() const; ArrayRef sections64() const; + + int32_t getSectionFlags(DataRefImpl Sec) const; + Expected getSectionByNum(int16_t Num) const; + + void checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const; + + // Relocation-related interfaces. + Expected + getLogicalNumberOfRelocationEntries(const XCOFFSectionHeader32 &Sec) const; + + Expected> + relocations(const XCOFFSectionHeader32 &) const; }; // XCOFFObjectFile +class XCOFFSymbolRef { + const DataRefImpl SymEntDataRef; + const XCOFFObjectFile *const OwningObjectPtr; + +public: + XCOFFSymbolRef(DataRefImpl SymEntDataRef, + const XCOFFObjectFile *OwningObjectPtr) + : SymEntDataRef(SymEntDataRef), OwningObjectPtr(OwningObjectPtr){}; + + XCOFF::StorageClass getStorageClass() const; + uint8_t getNumberOfAuxEntries() const; + const XCOFFCsectAuxEnt32 *getXCOFFCsectAuxEnt32() const; + uint16_t getType() const; + int16_t getSectionNumber() const; + + bool hasCsectAuxEnt() const; + bool isFunction() const; +}; + } // namespace object } // namespace llvm diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h index 78d736c3ef05..525fd9a89242 100644 --- a/include/llvm/ObjectYAML/DWARFYAML.h +++ b/include/llvm/ObjectYAML/DWARFYAML.h @@ -234,7 +234,7 @@ template <> struct MappingTraits { static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF); }; -#define HANDLE_DW_TAG(unused, name, unused2, unused3) \ +#define HANDLE_DW_TAG(unused, name, unused2, unused3, unused4) \ io.enumCase(value, "DW_TAG_" #name, dwarf::DW_TAG_##name); template <> struct ScalarEnumerationTraits { diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h index f4212516f486..0898a0e7d532 100644 --- a/include/llvm/ObjectYAML/ELFYAML.h +++ b/include/llvm/ObjectYAML/ELFYAML.h @@ -25,6 +25,8 @@ namespace llvm { namespace ELFYAML { +StringRef dropUniqueSuffix(StringRef S); + // These types are invariant across 32/64-bit ELF, so for simplicity just // directly give them their exact sizes. We don't need to worry about // endianness because these are just the types in the YAMLIO structures, @@ -54,8 +56,6 @@ LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_SHF) LLVM_YAML_STRONG_TYPEDEF(uint16_t, ELF_SHN) LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STB) LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STT) -LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STV) -LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STO) LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_AFL_REG) LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_ABI_FP) @@ -77,7 +77,7 @@ struct FileHeader { llvm::yaml::Hex64 Entry; Optional SHEntSize; - Optional SHOffset; + Optional SHOff; Optional SHNum; Optional SHStrNdx; }; @@ -107,7 +107,7 @@ struct Symbol { ELF_STB Binding; llvm::yaml::Hex64 Value; llvm::yaml::Hex64 Size; - uint8_t Other; + Optional Other; }; struct SectionOrType { @@ -119,6 +119,11 @@ struct DynamicEntry { llvm::yaml::Hex64 Val; }; +struct StackSizeEntry { + llvm::yaml::Hex64 Address; + llvm::yaml::Hex64 Size; +}; + struct Section { enum class SectionKind { Dynamic, @@ -126,10 +131,14 @@ struct Section { RawContent, Relocation, NoBits, + Hash, Verdef, Verneed, + StackSizes, + SymtabShndxSection, Symver, - MipsABIFlags + MipsABIFlags, + Addrsig }; SectionKind Kind; StringRef Name; @@ -140,16 +149,44 @@ struct Section { llvm::yaml::Hex64 AddressAlign; Optional EntSize; + // Usually sections are not created implicitly, but loaded from YAML. + // When they are, this flag is used to signal about that. + bool IsImplicit; + + Section(SectionKind Kind, bool IsImplicit = false) + : Kind(Kind), IsImplicit(IsImplicit) {} + virtual ~Section(); + + // The following members are used to override section fields which is + // useful for creating invalid objects. + + // This can be used to override the offset stored in the sh_name field. + // It does not affect the name stored in the string table. + Optional ShName; + // This can be used to override the sh_offset field. It does not place the - // section data at the offset specified. Useful for creating invalid objects. + // section data at the offset specified. Optional ShOffset; // This can be used to override the sh_size field. It does not affect the // content written. Optional ShSize; +}; - Section(SectionKind Kind) : Kind(Kind) {} - virtual ~Section(); +struct StackSizesSection : Section { + Optional Content; + Optional Size; + Optional> Entries; + + StackSizesSection() : Section(SectionKind::StackSizes) {} + + static bool classof(const Section *S) { + return S->Kind == SectionKind::StackSizes; + } + + static bool nameMatches(StringRef Name) { + return Name == ".stack_sizes"; + } }; struct DynamicSection : Section { @@ -185,6 +222,17 @@ struct NoBitsSection : Section { } }; +struct HashSection : Section { + Optional Content; + Optional Size; + Optional> Bucket; + Optional> Chain; + + HashSection() : Section(SectionKind::Hash) {} + + static bool classof(const Section *S) { return S->Kind == SectionKind::Hash; } +}; + struct VernauxEntry { uint32_t Hash; uint16_t Flags; @@ -209,6 +257,26 @@ struct VerneedSection : Section { } }; +struct AddrsigSymbol { + AddrsigSymbol(StringRef N) : Name(N), Index(None) {} + AddrsigSymbol(llvm::yaml::Hex32 Ndx) : Name(None), Index(Ndx) {} + AddrsigSymbol() : Name(None), Index(None) {} + + Optional Name; + Optional Index; +}; + +struct AddrsigSection : Section { + Optional Content; + Optional Size; + Optional> Symbols; + + AddrsigSection() : Section(SectionKind::Addrsig) {} + static bool classof(const Section *S) { + return S->Kind == SectionKind::Addrsig; + } +}; + struct SymverSection : Section { std::vector Entries; @@ -269,6 +337,16 @@ struct RelocationSection : Section { } }; +struct SymtabShndxSection : Section { + std::vector Entries; + + SymtabShndxSection() : Section(SectionKind::SymtabShndxSection) {} + + static bool classof(const Section *S) { + return S->Kind == SectionKind::SymtabShndxSection; + } +}; + // Represents .MIPS.abiflags section struct MipsABIFlags : Section { llvm::yaml::Hex16 Version; @@ -298,13 +376,15 @@ struct Object { // cleaner and nicer if we read them from the YAML as a separate // top-level key, which automatically ensures that invariants like there // being a single SHT_SYMTAB section are upheld. - std::vector Symbols; + Optional> Symbols; std::vector DynamicSymbols; }; } // end namespace ELFYAML } // end namespace llvm +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::AddrsigSymbol) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::StackSizeEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ProgramHeader) LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr) @@ -380,16 +460,6 @@ struct ScalarEnumerationTraits { static void enumeration(IO &IO, ELFYAML::ELF_STT &Value); }; -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &IO, ELFYAML::ELF_STV &Value); -}; - -template <> -struct ScalarBitSetTraits { - static void bitset(IO &IO, ELFYAML::ELF_STO &Value); -}; - template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, ELFYAML::ELF_REL &Value); @@ -450,6 +520,10 @@ struct MappingTraits { static StringRef validate(IO &IO, ELFYAML::Symbol &Symbol); }; +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::StackSizeEntry &Rel); +}; + template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::DynamicEntry &Rel); }; @@ -466,6 +540,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::VernauxEntry &E); }; +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::AddrsigSymbol &Sym); +}; + template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::Relocation &Rel); }; diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h index d7e1c033f43b..327c3b9f892b 100644 --- a/include/llvm/ObjectYAML/MachOYAML.h +++ b/include/llvm/ObjectYAML/MachOYAML.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/ObjectYAML/DWARFYAML.h" +#include "llvm/ObjectYAML/YAML.h" #include "llvm/Support/YAMLTraits.h" #include #include @@ -39,6 +40,7 @@ struct Section { llvm::yaml::Hex32 reserved1; llvm::yaml::Hex32 reserved2; llvm::yaml::Hex32 reserved3; + Optional content; }; struct FileHeader { @@ -198,6 +200,7 @@ template <> struct MappingTraits { template <> struct MappingTraits { static void mapping(IO &IO, MachOYAML::Section &Section); + static StringRef validate(IO &io, MachOYAML::Section &Section); }; template <> struct MappingTraits { diff --git a/include/llvm/ObjectYAML/MinidumpYAML.h b/include/llvm/ObjectYAML/MinidumpYAML.h index 39fdd62e017b..c1711a28dd84 100644 --- a/include/llvm/ObjectYAML/MinidumpYAML.h +++ b/include/llvm/ObjectYAML/MinidumpYAML.h @@ -26,6 +26,8 @@ namespace MinidumpYAML { /// from Types to Kinds is fixed and given by the static getKind function. struct Stream { enum class StreamKind { + Exception, + MemoryInfoList, MemoryList, ModuleList, RawContent, @@ -102,6 +104,45 @@ using ModuleListStream = detail::ListStream; using ThreadListStream = detail::ListStream; using MemoryListStream = detail::ListStream; +/// ExceptionStream minidump stream. +struct ExceptionStream : public Stream { + minidump::ExceptionStream MDExceptionStream; + yaml::BinaryRef ThreadContext; + + ExceptionStream() + : Stream(StreamKind::Exception, minidump::StreamType::Exception), + MDExceptionStream({}) {} + + explicit ExceptionStream(const minidump::ExceptionStream &MDExceptionStream, + ArrayRef ThreadContext) + : Stream(StreamKind::Exception, minidump::StreamType::Exception), + MDExceptionStream(MDExceptionStream), ThreadContext(ThreadContext) {} + + static bool classof(const Stream *S) { + return S->Kind == StreamKind::Exception; + } +}; + +/// A structure containing the list of MemoryInfo entries comprising a +/// MemoryInfoList stream. +struct MemoryInfoListStream : public Stream { + std::vector Infos; + + MemoryInfoListStream() + : Stream(StreamKind::MemoryInfoList, + minidump::StreamType::MemoryInfoList) {} + + explicit MemoryInfoListStream( + iterator_range Range) + : Stream(StreamKind::MemoryInfoList, + minidump::StreamType::MemoryInfoList), + Infos(Range.begin(), Range.end()) {} + + static bool classof(const Stream *S) { + return S->Kind == StreamKind::MemoryInfoList; + } +}; + /// A minidump stream represented as a sequence of hex bytes. This is used as a /// fallback when no other stream kind is suitable. struct RawContentStream : public Stream { @@ -122,16 +163,16 @@ struct SystemInfoStream : public Stream { minidump::SystemInfo Info; std::string CSDVersion; - explicit SystemInfoStream(const minidump::SystemInfo &Info, - std::string CSDVersion) - : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo), - Info(Info), CSDVersion(std::move(CSDVersion)) {} - SystemInfoStream() : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) { memset(&Info, 0, sizeof(Info)); } + explicit SystemInfoStream(const minidump::SystemInfo &Info, + std::string CSDVersion) + : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo), + Info(Info), CSDVersion(std::move(CSDVersion)) {} + static bool classof(const Stream *S) { return S->Kind == StreamKind::SystemInfo; } @@ -177,12 +218,6 @@ struct Object { static Expected create(const object::MinidumpFile &File); }; -/// Serialize the minidump file represented by Obj to OS in binary form. -void writeAsBinary(Object &Obj, raw_ostream &OS); - -/// Serialize the yaml string as a minidump file to OS in binary form. -Error writeAsBinary(StringRef Yaml, raw_ostream &OS); - } // namespace MinidumpYAML namespace yaml { @@ -213,6 +248,10 @@ template <> struct MappingContextTraits { } // namespace llvm +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryProtection) +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryState) +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryType) + LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::ProcessorArchitecture) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::OSPlatform) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType) @@ -220,6 +259,8 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::ArmInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::OtherInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::X86Info) +LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::Exception) +LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::MemoryInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::VSFixedFileInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS( @@ -233,6 +274,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::MemoryListStream::entry_type) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::entry_type) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ThreadListStream::entry_type) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::minidump::MemoryInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object) diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h index 2411dc7ac17d..15a8cc215020 100644 --- a/include/llvm/ObjectYAML/WasmYAML.h +++ b/include/llvm/ObjectYAML/WasmYAML.h @@ -145,7 +145,7 @@ struct Signature { uint32_t Index; SignatureForm Form = wasm::WASM_TYPE_FUNC; std::vector ParamTypes; - ValueType ReturnType; + std::vector ReturnTypes; }; struct SymbolInfo { diff --git a/include/llvm/ObjectYAML/yaml2obj.h b/include/llvm/ObjectYAML/yaml2obj.h new file mode 100644 index 000000000000..386551337d86 --- /dev/null +++ b/include/llvm/ObjectYAML/yaml2obj.h @@ -0,0 +1,67 @@ +//===--- yaml2obj.h - -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Common declarations for yaml2obj +//===----------------------------------------------------------------------===// +#ifndef LLVM_TOOLS_YAML2OBJ_YAML2OBJ_H +#define LLVM_TOOLS_YAML2OBJ_YAML2OBJ_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +class raw_ostream; +template class SmallVectorImpl; +template class Expected; + +namespace object { +class ObjectFile; +} + +namespace COFFYAML { +struct Object; +} + +namespace ELFYAML { +struct Object; +} + +namespace MinidumpYAML { +struct Object; +} + +namespace WasmYAML { +struct Object; +} + +namespace yaml { +class Input; +struct YamlObjectFile; + +using ErrorHandler = llvm::function_ref; + +bool yaml2coff(COFFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); +bool yaml2elf(ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); +bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler EH); +bool yaml2minidump(MinidumpYAML::Object &Doc, raw_ostream &Out, + ErrorHandler EH); +bool yaml2wasm(WasmYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); + +bool convertYAML(Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler, + unsigned DocNum = 1); + +/// Convenience function for tests. +std::unique_ptr +yaml2ObjectFile(SmallVectorImpl &Storage, StringRef Yaml, + ErrorHandler ErrHandler); + +} // namespace yaml +} // namespace llvm + +#endif diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h index 329f7eaba73d..1d53ae32cf37 100644 --- a/include/llvm/Pass.h +++ b/include/llvm/Pass.h @@ -306,6 +306,9 @@ protected: }; //===----------------------------------------------------------------------===// +/// Deprecated - do not create new passes as BasicBlockPasses. Use FunctionPass +/// with a loop over the BasicBlocks instead. +// /// BasicBlockPass class - This class is used to implement most local /// optimizations. Optimizations should subclass this class if they /// meet the following constraints: @@ -338,6 +341,8 @@ public: /// do any post processing needed after all passes have run. virtual bool doFinalization(Function &); + void preparePassManager(PMStack &PMS) override; + void assignPassManager(PMStack &PMS, PassManagerType T) override; /// Return what kind of Pass Manager can manage this pass. diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h index 5e6660599f93..f73e4b42dd4b 100644 --- a/include/llvm/Passes/PassBuilder.h +++ b/include/llvm/Passes/PassBuilder.h @@ -629,6 +629,12 @@ public: TopLevelPipelineParsingCallbacks.push_back(C); } + /// Add PGOInstrumenation passes for O0 only. + void addPGOInstrPassesForO0(ModulePassManager &MPM, bool DebugLogging, + bool RunProfileGen, bool IsCS, + std::string ProfileFile, + std::string ProfileRemappingFile); + private: static Optional> parsePipelineText(StringRef Text); @@ -660,7 +666,6 @@ private: OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile); - void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); // Extension Point callbacks diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h index 11758ac4cf2f..0dd0c7ec8065 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -301,7 +301,12 @@ public: struct FunctionRecord { /// Raw function name. std::string Name; - /// Associated files. + /// Mapping from FileID (i.e. vector index) to filename. Used to support + /// macro expansions within a function in which the macro and function are + /// defined in separate files. + /// + /// TODO: Uniquing filenames across all function records may be a performance + /// optimization. std::vector Filenames; /// Regions in the function along with their counts. std::vector CountedRegions; @@ -508,6 +513,7 @@ public: class CoverageMapping { DenseMap> RecordProvenance; std::vector Functions; + DenseMap> FilenameHash2RecordIndices; std::vector> FuncHashMismatches; CoverageMapping() = default; @@ -516,6 +522,13 @@ class CoverageMapping { Error loadFunctionRecord(const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader); + /// Look up the indices for function records which are at least partially + /// defined in the specified file. This is guaranteed to return a superset of + /// such records: extra records not in the file may be included if there is + /// a hash collision on the filename. Clients must be robust to collisions. + ArrayRef + getImpreciseRecordIndicesForFilename(StringRef Filename) const; + public: CoverageMapping(const CoverageMapping &) = delete; CoverageMapping &operator=(const CoverageMapping &) = delete; @@ -527,6 +540,7 @@ public: /// Load the coverage mapping from the given object files and profile. If /// \p Arches is non-empty, it must specify an architecture for each object. + /// Ignores non-instrumented object files unless all are not instrumented. static Expected> load(ArrayRef ObjectFilenames, StringRef ProfileFilename, ArrayRef Arches = None); diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h index 5f88cacdfcbb..6fcd8a09a494 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h +++ b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h @@ -30,8 +30,7 @@ class CoverageFilenamesSectionWriter { ArrayRef Filenames; public: - CoverageFilenamesSectionWriter(ArrayRef Filenames) - : Filenames(Filenames) {} + CoverageFilenamesSectionWriter(ArrayRef Filenames); /// Write encoded filenames to the given output stream. void write(raw_ostream &OS); diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index c7d764ade30d..c26f76949992 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -93,10 +93,6 @@ inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } /// Return the name of value profile node array variables: inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } -/// Return the name prefix of the COMDAT group for instrumentation variables -/// associated with a COMDAT function. -inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } - /// Return the name of the variable holding the strings (possibly compressed) /// of all function's PGO names. inline StringRef getInstrProfNamesVarName() { @@ -634,8 +630,8 @@ struct OverlapStats { FuncHash = Hash; } - Error accumuateCounts(const std::string &BaseFilename, - const std::string &TestFilename, bool IsCS); + Error accumulateCounts(const std::string &BaseFilename, + const std::string &TestFilename, bool IsCS); void addOneMismatch(const CountSumOrPercent &MismatchFunc); void addOneUnique(const CountSumOrPercent &UniqueFunc); @@ -695,7 +691,7 @@ struct InstrProfRecord { InstrProfRecord(const InstrProfRecord &RHS) : Counts(RHS.Counts), ValueData(RHS.ValueData - ? llvm::make_unique(*RHS.ValueData) + ? std::make_unique(*RHS.ValueData) : nullptr) {} InstrProfRecord &operator=(InstrProfRecord &&) = default; InstrProfRecord &operator=(const InstrProfRecord &RHS) { @@ -705,7 +701,7 @@ struct InstrProfRecord { return *this; } if (!ValueData) - ValueData = llvm::make_unique(*RHS.ValueData); + ValueData = std::make_unique(*RHS.ValueData); else *ValueData = *RHS.ValueData; return *this; @@ -772,7 +768,7 @@ struct InstrProfRecord { void clearValueData() { ValueData = nullptr; } /// Compute the sums of all counts and store in Sum. - void accumuateCounts(CountSumOrPercent &Sum) const; + void accumulateCounts(CountSumOrPercent &Sum) const; /// Compute the overlap b/w this IntrprofRecord and Other. void overlap(InstrProfRecord &Other, OverlapStats &Overlap, @@ -817,7 +813,7 @@ private: std::vector & getOrCreateValueSitesForKind(uint32_t ValueKind) { if (!ValueData) - ValueData = llvm::make_unique(); + ValueData = std::make_unique(); switch (ValueKind) { case IPVK_IndirectCallTarget: return ValueData->IndirectCallSites; @@ -897,7 +893,7 @@ InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site, return std::unique_ptr(nullptr); } - auto VD = llvm::make_unique(N); + auto VD = std::make_unique(N); TotalCount = getValueForSite(VD.get(), ValueKind, Site); return VD; diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index 73751faab88e..f5f552672bf0 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -92,7 +92,7 @@ public: virtual InstrProfSymtab &getSymtab() = 0; /// Compute the sum of counts and return in Sum. - void accumuateCounts(CountSumOrPercent &Sum, bool IsCS); + void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); protected: std::unique_ptr Symtab; @@ -268,8 +268,14 @@ private: return (const char *)ValueDataStart; } - const uint64_t *getCounter(IntPtrT CounterPtr) const { - ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); + /// Get the offset of \p CounterPtr from the start of the counters section of + /// the profile. The offset has units of "number of counters", i.e. increasing + /// the offset by 1 corresponds to an increase in the *byte offset* by 8. + ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { + return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); + } + + const uint64_t *getCounter(ptrdiff_t Offset) const { return CountersStart + Offset; } diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 7fbc857b7230..55418d9d0f9c 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -18,15 +18,18 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include #include #include +#include #include #include #include @@ -49,7 +52,10 @@ enum class sampleprof_error { truncated_name_table, not_implemented, counter_overflow, - ostream_seek_unsupported + ostream_seek_unsupported, + compress_failed, + uncompress_failed, + zlib_unavailable }; inline std::error_code make_error_code(sampleprof_error E) { @@ -83,6 +89,7 @@ enum SampleProfileFormat { SPF_Text = 0x1, SPF_Compact_Binary = 0x2, SPF_GCC = 0x3, + SPF_Ext_Binary = 0x4, SPF_Binary = 0xff }; @@ -105,6 +112,61 @@ static inline StringRef getRepInFormat(StringRef Name, static inline uint64_t SPVersion() { return 103; } +// Section Type used by SampleProfileExtBinaryBaseReader and +// SampleProfileExtBinaryBaseWriter. Never change the existing +// value of enum. Only append new ones. +enum SecType { + SecInValid = 0, + SecProfSummary = 1, + SecNameTable = 2, + SecProfileSymbolList = 3, + SecFuncOffsetTable = 4, + // marker for the first type of profile. + SecFuncProfileFirst = 32, + SecLBRProfile = SecFuncProfileFirst +}; + +static inline std::string getSecName(SecType Type) { + switch (Type) { + case SecInValid: + return "InvalidSection"; + case SecProfSummary: + return "ProfileSummarySection"; + case SecNameTable: + return "NameTableSection"; + case SecProfileSymbolList: + return "ProfileSymbolListSection"; + case SecFuncOffsetTable: + return "FuncOffsetTableSection"; + case SecLBRProfile: + return "LBRProfileSection"; + } + llvm_unreachable("A SecType has no name for output"); +} + +// Entry type of section header table used by SampleProfileExtBinaryBaseReader +// and SampleProfileExtBinaryBaseWriter. +struct SecHdrTableEntry { + SecType Type; + uint64_t Flags; + uint64_t Offset; + uint64_t Size; +}; + +enum SecFlags { SecFlagInValid = 0, SecFlagCompress = (1 << 0) }; + +static inline void addSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) { + Entry.Flags |= Flags; +} + +static inline void removeSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) { + Entry.Flags &= ~Flags; +} + +static inline bool hasSecFlag(SecHdrTableEntry &Entry, SecFlags Flag) { + return Entry.Flags & Flag; +} + /// Represents the relative location of an instruction. /// /// Instruction locations are specified by the line offset from the @@ -143,8 +205,18 @@ raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); /// will be a list of one or more functions. class SampleRecord { public: - using CallTargetMap = StringMap; + using CallTarget = std::pair; + struct CallTargetComparator { + bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { + if (LHS.second != RHS.second) + return LHS.second > RHS.second; + + return LHS.first < RHS.first; + } + }; + using SortedCallTargetSet = std::set; + using CallTargetMap = StringMap; SampleRecord() = default; /// Increment the number of samples for this record by \p S. @@ -179,6 +251,18 @@ public: uint64_t getSamples() const { return NumSamples; } const CallTargetMap &getCallTargets() const { return CallTargets; } + const SortedCallTargetSet getSortedCallTargets() const { + return SortCallTargets(CallTargets); + } + + /// Sort call targets in descending order of call frequency. + static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { + SortedCallTargetSet SortedTargets; + for (const auto &I : Targets) { + SortedTargets.emplace(I.first(), I.second); + } + return SortedTargets; + } /// Merge the samples in \p Other into this record. /// Optionally scale sample counts by \p Weight. @@ -205,7 +289,7 @@ class FunctionSamples; using BodySampleMap = std::map; // NOTE: Using a StringMap here makes parsed profiles consume around 17% more // memory, which is *very* significant for large profiles. -using FunctionSamplesMap = std::map; +using FunctionSamplesMap = std::map>; using CallsiteSampleMap = std::map; /// Representation of the samples collected for a function. @@ -447,11 +531,10 @@ public: StringRef getNameInModule(StringRef Name, const Module *M) const { if (Format != SPF_Compact_Binary) return Name; - // Expect CurrentModule to be initialized by GUIDToFuncNameMapper. - if (M != CurrentModule) - llvm_unreachable("Input Module should be the same as CurrentModule"); - auto iter = GUIDToFuncNameMap.find(std::stoull(Name.data())); - if (iter == GUIDToFuncNameMap.end()) + + assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first"); + auto iter = GUIDToFuncNameMap->find(std::stoull(Name.data())); + if (iter == GUIDToFuncNameMap->end()) return StringRef(); return iter->second; } @@ -472,42 +555,10 @@ public: const FunctionSamples *findFunctionSamples(const DILocation *DIL) const; static SampleProfileFormat Format; - /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for - /// all the function symbols defined or declared in CurrentModule. - static DenseMap GUIDToFuncNameMap; - static Module *CurrentModule; - - class GUIDToFuncNameMapper { - public: - GUIDToFuncNameMapper(Module &M) { - if (Format != SPF_Compact_Binary) - return; - - for (const auto &F : M) { - StringRef OrigName = F.getName(); - GUIDToFuncNameMap.insert({Function::getGUID(OrigName), OrigName}); - /// Local to global var promotion used by optimization like thinlto - /// will rename the var and add suffix like ".llvm.xxx" to the - /// original local name. In sample profile, the suffixes of function - /// names are all stripped. Since it is possible that the mapper is - /// built in post-thin-link phase and var promotion has been done, - /// we need to add the substring of function name without the suffix - /// into the GUIDToFuncNameMap. - StringRef CanonName = getCanonicalFnName(F); - if (CanonName != OrigName) - GUIDToFuncNameMap.insert({Function::getGUID(CanonName), CanonName}); - } - CurrentModule = &M; - } - - ~GUIDToFuncNameMapper() { - if (Format != SPF_Compact_Binary) - return; - GUIDToFuncNameMap.clear(); - CurrentModule = nullptr; - } - }; + /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for + /// all the function symbols defined or declared in current module. + DenseMap *GUIDToFuncNameMap = nullptr; // Assume the input \p Name is a name coming from FunctionSamples itself. // If the format is SPF_Compact_Binary, the name is already a GUID and we @@ -583,6 +634,47 @@ private: SamplesWithLocList V; }; +/// ProfileSymbolList records the list of function symbols shown up +/// in the binary used to generate the profile. It is useful to +/// to discriminate a function being so cold as not to shown up +/// in the profile and a function newly added. +class ProfileSymbolList { +public: + /// copy indicates whether we need to copy the underlying memory + /// for the input Name. + void add(StringRef Name, bool copy = false) { + if (!copy) { + Syms.insert(Name); + return; + } + Syms.insert(Name.copy(Allocator)); + } + + bool contains(StringRef Name) { return Syms.count(Name); } + + void merge(const ProfileSymbolList &List) { + for (auto Sym : List.Syms) + add(Sym, true); + } + + unsigned size() { return Syms.size(); } + + void setToCompress(bool TC) { ToCompress = TC; } + bool toCompress() { return ToCompress; } + + std::error_code read(const uint8_t *Data, uint64_t ListSize); + std::error_code write(raw_ostream &OS); + void dump(raw_ostream &OS = dbgs()) const; + +private: + // Determine whether or not to compress the symbol list when + // writing it into profile. The variable is unused when the symbol + // list is read from an existing profile. + bool ToCompress = false; + DenseSet Syms; + BumpPtrAllocator Allocator; +}; + } // end namespace sampleprof } // end namespace llvm diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h index 969cdea859c9..5a5d4cfde224 100644 --- a/include/llvm/ProfileData/SampleProfReader.h +++ b/include/llvm/ProfileData/SampleProfReader.h @@ -235,6 +235,62 @@ class raw_ostream; namespace sampleprof { +class SampleProfileReader; + +/// SampleProfileReaderItaniumRemapper remaps the profile data from a +/// sample profile data reader, by applying a provided set of equivalences +/// between components of the symbol names in the profile. +class SampleProfileReaderItaniumRemapper { +public: + SampleProfileReaderItaniumRemapper(std::unique_ptr B, + std::unique_ptr SRR, + SampleProfileReader &R) + : Buffer(std::move(B)), Remappings(std::move(SRR)), Reader(R) { + assert(Remappings && "Remappings cannot be nullptr"); + } + + /// Create a remapper from the given remapping file. The remapper will + /// be used for profile read in by Reader. + static ErrorOr> + create(const std::string Filename, SampleProfileReader &Reader, + LLVMContext &C); + + /// Create a remapper from the given Buffer. The remapper will + /// be used for profile read in by Reader. + static ErrorOr> + create(std::unique_ptr &B, SampleProfileReader &Reader, + LLVMContext &C); + + /// Apply remappings to the profile read by Reader. + void applyRemapping(LLVMContext &Ctx); + + bool hasApplied() { return RemappingApplied; } + + /// Insert function name into remapper. + void insert(StringRef FunctionName) { Remappings->insert(FunctionName); } + + /// Query whether there is equivalent in the remapper which has been + /// inserted. + bool exist(StringRef FunctionName) { + return Remappings->lookup(FunctionName); + } + + /// Return the samples collected for function \p F if remapper knows + /// it is present in SampleMap. + FunctionSamples *getSamplesFor(StringRef FunctionName); + +private: + // The buffer holding the content read from remapping file. + std::unique_ptr Buffer; + std::unique_ptr Remappings; + DenseMap SampleMap; + // The Reader the remapper is servicing. + SampleProfileReader &Reader; + // Indicate whether remapping has been applied to the profile read + // by Reader -- by calling applyRemapping. + bool RemappingApplied = false; +}; + /// Sample-based profile reader. /// /// Each profile contains sample counts for all the functions @@ -273,13 +329,22 @@ public: /// Read and validate the file header. virtual std::error_code readHeader() = 0; - /// Read sample profiles from the associated file. - virtual std::error_code read() = 0; + /// The interface to read sample profiles from the associated file. + std::error_code read() { + if (std::error_code EC = readImpl()) + return EC; + if (Remapper) + Remapper->applyRemapping(Ctx); + return sampleprof_error::success; + } + + /// The implementaion to read sample profiles from the associated file. + virtual std::error_code readImpl() = 0; /// Print the profile for \p FName on stream \p OS. void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs()); - virtual void collectFuncsToUse(const Module &M) {} + virtual void collectFuncsFrom(const Module &M) {} /// Print all the profiles on stream \p OS. void dump(raw_ostream &OS = dbgs()); @@ -295,6 +360,10 @@ public: /// Return the samples collected for function \p F. virtual FunctionSamples *getSamplesFor(StringRef Fname) { + if (Remapper) { + if (auto FS = Remapper->getSamplesFor(Fname)) + return FS; + } std::string FGUID; Fname = getRepInFormat(Fname, getFormat(), FGUID); auto It = Profiles.find(Fname); @@ -313,18 +382,33 @@ public: } /// Create a sample profile reader appropriate to the file format. + /// Create a remapper underlying if RemapFilename is not empty. static ErrorOr> - create(const Twine &Filename, LLVMContext &C); + create(const std::string Filename, LLVMContext &C, + const std::string RemapFilename = ""); /// Create a sample profile reader from the supplied memory buffer. + /// Create a remapper underlying if RemapFilename is not empty. static ErrorOr> - create(std::unique_ptr &B, LLVMContext &C); + create(std::unique_ptr &B, LLVMContext &C, + const std::string RemapFilename = ""); /// Return the profile summary. - ProfileSummary &getSummary() { return *(Summary.get()); } + ProfileSummary &getSummary() const { return *(Summary.get()); } + + MemoryBuffer *getBuffer() const { return Buffer.get(); } /// \brief Return the profile format. - SampleProfileFormat getFormat() { return Format; } + SampleProfileFormat getFormat() const { return Format; } + + virtual std::unique_ptr getProfileSymbolList() { + return nullptr; + }; + + /// It includes all the names that have samples either in outline instance + /// or inline instance. + virtual std::vector *getNameTable() { return nullptr; } + virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; }; protected: /// Map every function to its associated profile. @@ -352,6 +436,8 @@ protected: /// Compute summary for this profile. void computeSummary(); + std::unique_ptr Remapper; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; }; @@ -365,7 +451,7 @@ public: std::error_code readHeader() override { return sampleprof_error::success; } /// Read sample profiles from the associated file. - std::error_code read() override; + std::error_code readImpl() override; /// Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); @@ -381,7 +467,11 @@ public: virtual std::error_code readHeader() override; /// Read sample profiles from the associated file. - std::error_code read() override; + std::error_code readImpl() override; + + /// It includes all the names that have samples either in outline instance + /// or inline instance. + virtual std::vector *getNameTable() override { return &NameTable; } protected: /// Read a numeric value of type T from the profile. @@ -411,46 +501,134 @@ protected: bool at_eof() const { return Data >= End; } /// Read the next function profile instance. - std::error_code readFuncProfile(); + std::error_code readFuncProfile(const uint8_t *Start); /// Read the contents of the given profile instance. std::error_code readProfile(FunctionSamples &FProfile); + /// Read the contents of Magic number and Version number. + std::error_code readMagicIdent(); + + /// Read profile summary. + std::error_code readSummary(); + + /// Read the whole name table. + virtual std::error_code readNameTable(); + /// Points to the current location in the buffer. const uint8_t *Data = nullptr; /// Points to the end of the buffer. const uint8_t *End = nullptr; + /// Function name table. + std::vector NameTable; + + /// Read a string indirectly via the name table. + virtual ErrorOr readStringFromTable(); + private: std::error_code readSummaryEntry(std::vector &Entries); virtual std::error_code verifySPMagic(uint64_t Magic) = 0; +}; - /// Read profile summary. - std::error_code readSummary(); +class SampleProfileReaderRawBinary : public SampleProfileReaderBinary { +private: + virtual std::error_code verifySPMagic(uint64_t Magic) override; - /// Read the whole name table. - virtual std::error_code readNameTable() = 0; +public: + SampleProfileReaderRawBinary(std::unique_ptr B, LLVMContext &C, + SampleProfileFormat Format = SPF_Binary) + : SampleProfileReaderBinary(std::move(B), C, Format) {} - /// Read a string indirectly via the name table. - virtual ErrorOr readStringFromTable() = 0; + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); }; -class SampleProfileReaderRawBinary : public SampleProfileReaderBinary { +/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase defines +/// the basic structure of the extensible binary format. +/// The format is organized in sections except the magic and version number +/// at the beginning. There is a section table before all the sections, and +/// each entry in the table describes the entry type, start, size and +/// attributes. The format in each section is defined by the section itself. +/// +/// It is easy to add a new section while maintaining the backward +/// compatibility of the profile. Nothing extra needs to be done. If we want +/// to extend an existing section, like add cache misses information in +/// addition to the sample count in the profile body, we can add a new section +/// with the extension and retire the existing section, and we could choose +/// to keep the parser of the old section if we want the reader to be able +/// to read both new and old format profile. +/// +/// SampleProfileReaderExtBinary/SampleProfileWriterExtBinary define the +/// commonly used sections of a profile in extensible binary format. It is +/// possible to define other types of profile inherited from +/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase. +class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { +private: + std::error_code decompressSection(const uint8_t *SecStart, + const uint64_t SecSize, + const uint8_t *&DecompressBuf, + uint64_t &DecompressBufSize); + + BumpPtrAllocator Allocator; + +protected: + std::vector SecHdrTable; + std::unique_ptr ProfSymList; + std::error_code readSecHdrTableEntry(); + std::error_code readSecHdrTable(); + virtual std::error_code readHeader() override; + virtual std::error_code verifySPMagic(uint64_t Magic) override = 0; + virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, + SecType Type) = 0; + +public: + SampleProfileReaderExtBinaryBase(std::unique_ptr B, + LLVMContext &C, SampleProfileFormat Format) + : SampleProfileReaderBinary(std::move(B), C, Format) {} + + /// Read sample profiles in extensible format from the associated file. + std::error_code readImpl() override; + + /// Get the total size of all \p Type sections. + uint64_t getSectionSize(SecType Type); + /// Get the total size of header and all sections. + uint64_t getFileSize(); + virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override; +}; + +class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { private: - /// Function name table. - std::vector NameTable; virtual std::error_code verifySPMagic(uint64_t Magic) override; - virtual std::error_code readNameTable() override; - /// Read a string indirectly via the name table. - virtual ErrorOr readStringFromTable() override; + virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, + SecType Type) override; + std::error_code readProfileSymbolList(); + std::error_code readFuncOffsetTable(); + std::error_code readFuncProfiles(); + + /// The table mapping from function name to the offset of its FunctionSample + /// towards file start. + DenseMap FuncOffsetTable; + /// The set containing the functions to use when compiling a module. + DenseSet FuncsToUse; + /// Use all functions from the input profile. + bool UseAllFuncs = true; public: - SampleProfileReaderRawBinary(std::unique_ptr B, LLVMContext &C) - : SampleProfileReaderBinary(std::move(B), C, SPF_Binary) {} + SampleProfileReaderExtBinary(std::unique_ptr B, LLVMContext &C, + SampleProfileFormat Format = SPF_Ext_Binary) + : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {} /// \brief Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); + + virtual std::unique_ptr getProfileSymbolList() override { + return std::move(ProfSymList); + }; + + /// Collect functions with definitions in Module \p M. + void collectFuncsFrom(const Module &M) override; }; class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { @@ -462,6 +640,8 @@ private: DenseMap FuncOffsetTable; /// The set containing the functions to use when compiling a module. DenseSet FuncsToUse; + /// Use all functions from the input profile. + bool UseAllFuncs = true; virtual std::error_code verifySPMagic(uint64_t Magic) override; virtual std::error_code readNameTable() override; /// Read a string indirectly via the name table. @@ -478,10 +658,10 @@ public: static bool hasFormat(const MemoryBuffer &Buffer); /// Read samples only for functions to use. - std::error_code read() override; + std::error_code readImpl() override; /// Collect functions to be used when compiling Module \p M. - void collectFuncsToUse(const Module &M) override; + void collectFuncsFrom(const Module &M) override; }; using InlineCallStack = SmallVector; @@ -509,7 +689,7 @@ public: std::error_code readHeader() override; /// Read sample profiles from the associated file. - std::error_code read() override; + std::error_code readImpl() override; /// Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); @@ -537,44 +717,6 @@ protected: static const uint32_t GCOVTagAFDOFunction = 0xac000000; }; -/// A profile data reader proxy that remaps the profile data from another -/// sample profile data reader, by applying a provided set of equivalences -/// between components of the symbol names in the profile. -class SampleProfileReaderItaniumRemapper : public SampleProfileReader { -public: - SampleProfileReaderItaniumRemapper( - std::unique_ptr B, LLVMContext &C, - std::unique_ptr Underlying) - : SampleProfileReader(std::move(B), C, Underlying->getFormat()) { - Profiles = std::move(Underlying->getProfiles()); - Summary = takeSummary(*Underlying); - // Keep the underlying reader alive; the profile data may contain - // StringRefs referencing names in its name table. - UnderlyingReader = std::move(Underlying); - } - - /// Create a remapped sample profile from the given remapping file and - /// underlying samples. - static ErrorOr> - create(const Twine &Filename, LLVMContext &C, - std::unique_ptr Underlying); - - /// Read and validate the file header. - std::error_code readHeader() override { return sampleprof_error::success; } - - /// Read remapping file and apply it to the sample profile. - std::error_code read() override; - - /// Return the samples collected for function \p F. - FunctionSamples *getSamplesFor(StringRef FunctionName) override; - using SampleProfileReader::getSamplesFor; - -private: - SymbolRemappingReader Remappings; - DenseMap SampleMap; - std::unique_ptr UnderlyingReader; -}; - } // end namespace sampleprof } // end namespace llvm diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h index 81e6e3ab0b4a..cc951594c9e2 100644 --- a/include/llvm/ProfileData/SampleProfWriter.h +++ b/include/llvm/ProfileData/SampleProfWriter.h @@ -36,7 +36,7 @@ public: /// Write sample profiles in \p S. /// /// \returns status code of the file update operation. - virtual std::error_code write(const FunctionSamples &S) = 0; + virtual std::error_code writeSample(const FunctionSamples &S) = 0; /// Write all the sample profiles in the given map of samples. /// @@ -56,6 +56,8 @@ public: static ErrorOr> create(std::unique_ptr &OS, SampleProfileFormat Format); + virtual void setProfileSymbolList(ProfileSymbolList *PSL) {} + protected: SampleProfileWriter(std::unique_ptr &OS) : OutputStream(std::move(OS)) {} @@ -64,6 +66,10 @@ protected: virtual std::error_code writeHeader(const StringMap &ProfileMap) = 0; + // Write function profiles to the profile file. + virtual std::error_code + writeFuncProfiles(const StringMap &ProfileMap); + /// Output stream where to emit the profile to. std::unique_ptr OutputStream; @@ -72,12 +78,15 @@ protected: /// Compute summary for this profile. void computeSummary(const StringMap &ProfileMap); + + /// Profile format. + SampleProfileFormat Format; }; /// Sample-based profile writer (text format). class SampleProfileWriterText : public SampleProfileWriter { public: - std::error_code write(const FunctionSamples &S) override; + std::error_code writeSample(const FunctionSamples &S) override; protected: SampleProfileWriterText(std::unique_ptr &OS) @@ -102,13 +111,14 @@ private: /// Sample-based profile writer (binary format). class SampleProfileWriterBinary : public SampleProfileWriter { public: - virtual std::error_code write(const FunctionSamples &S) override; SampleProfileWriterBinary(std::unique_ptr &OS) : SampleProfileWriter(OS) {} + virtual std::error_code writeSample(const FunctionSamples &S) override; + protected: - virtual std::error_code writeNameTable() = 0; - virtual std::error_code writeMagicIdent() = 0; + virtual std::error_code writeMagicIdent(SampleProfileFormat Format); + virtual std::error_code writeNameTable(); virtual std::error_code writeHeader(const StringMap &ProfileMap) override; std::error_code writeSummary(); @@ -118,10 +128,10 @@ protected: MapVector NameTable; -private: void addName(StringRef FName); void addNames(const FunctionSamples &S); +private: friend ErrorOr> SampleProfileWriter::create(std::unique_ptr &OS, SampleProfileFormat Format); @@ -129,10 +139,99 @@ private: class SampleProfileWriterRawBinary : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; +}; + +class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { + using SampleProfileWriterBinary::SampleProfileWriterBinary; +public: + virtual std::error_code + write(const StringMap &ProfileMap) override; + + void setToCompressAllSections(); + void setToCompressSection(SecType Type); protected: - virtual std::error_code writeNameTable() override; - virtual std::error_code writeMagicIdent() override; + uint64_t markSectionStart(SecType Type); + std::error_code addNewSection(SecType Sec, uint64_t SectionStart); + virtual void initSectionHdrLayout() = 0; + virtual std::error_code + writeSections(const StringMap &ProfileMap) = 0; + + // Specifiy the order of sections in section header table. Note + // the order of sections in the profile may be different that the + // order in SectionHdrLayout. sample Reader will follow the order + // in SectionHdrLayout to read each section. + SmallVector SectionHdrLayout; + +private: + void allocSecHdrTable(); + std::error_code writeSecHdrTable(); + virtual std::error_code + writeHeader(const StringMap &ProfileMap) override; + void addSectionFlags(SecType Type, SecFlags Flags); + SecHdrTableEntry &getEntryInLayout(SecType Type); + std::error_code compressAndOutput(); + + // We will swap the raw_ostream held by LocalBufStream and that + // held by OutputStream if we try to add a section which needs + // compression. After the swap, all the data written to output + // will be temporarily buffered into the underlying raw_string_ostream + // originally held by LocalBufStream. After the data writing for the + // section is completed, compress the data in the local buffer, + // swap the raw_ostream back and write the compressed data to the + // real output. + std::unique_ptr LocalBufStream; + // The location where the output stream starts. + uint64_t FileStart; + // The location in the output stream where the SecHdrTable should be + // written to. + uint64_t SecHdrTableOffset; + // Initial Section Flags setting. + std::vector SecHdrTable; +}; + +class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { +public: + SampleProfileWriterExtBinary(std::unique_ptr &OS) + : SampleProfileWriterExtBinaryBase(OS) { + initSectionHdrLayout(); + } + + virtual std::error_code writeSample(const FunctionSamples &S) override; + virtual void setProfileSymbolList(ProfileSymbolList *PSL) override { + ProfSymList = PSL; + }; + +private: + virtual void initSectionHdrLayout() override { + // Note that SecFuncOffsetTable section is written after SecLBRProfile + // in the profile, but is put before SecLBRProfile in SectionHdrLayout. + // + // This is because sample reader follows the order of SectionHdrLayout to + // read each section, to read function profiles on demand sample reader + // need to get the offset of each function profile first. + // + // SecFuncOffsetTable section is written after SecLBRProfile in the + // profile because FuncOffsetTable needs to be populated while section + // SecLBRProfile is written. + SectionHdrLayout = {{SecProfSummary, 0, 0, 0}, + {SecNameTable, 0, 0, 0}, + {SecFuncOffsetTable, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0}, + {SecProfileSymbolList, 0, 0, 0}}; + }; + virtual std::error_code + writeSections(const StringMap &ProfileMap) override; + ProfileSymbolList *ProfSymList = nullptr; + + // Save the start of SecLBRProfile so we can compute the offset to the + // start of SecLBRProfile for each Function's Profile and will keep it + // in FuncOffsetTable. + uint64_t SecLBRProfileStart; + // FuncOffsetTable maps function name to its profile offset in SecLBRProfile + // section. It is used to load function profile on demand. + MapVector FuncOffsetTable; + std::error_code writeFuncOffsetTable(); }; // CompactBinary is a compact format of binary profile which both reduces @@ -169,7 +268,7 @@ class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; public: - virtual std::error_code write(const FunctionSamples &S) override; + virtual std::error_code writeSample(const FunctionSamples &S) override; virtual std::error_code write(const StringMap &ProfileMap) override; @@ -181,7 +280,6 @@ protected: /// towards profile start. uint64_t TableOffset; virtual std::error_code writeNameTable() override; - virtual std::error_code writeMagicIdent() override; virtual std::error_code writeHeader(const StringMap &ProfileMap) override; std::error_code writeFuncOffsetTable(); diff --git a/include/llvm/Remarks/BitstreamRemarkContainer.h b/include/llvm/Remarks/BitstreamRemarkContainer.h new file mode 100644 index 000000000000..a2282fca04ab --- /dev/null +++ b/include/llvm/Remarks/BitstreamRemarkContainer.h @@ -0,0 +1,106 @@ +//===-- BitstreamRemarkContainer.h - Container for remarks --------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides declarations for things used in the various types of +// remark containers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_REMARK_CONTAINER_H +#define LLVM_REMARKS_REMARK_CONTAINER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Bitstream/BitCodes.h" +#include + +namespace llvm { +namespace remarks { + +/// The current version of the remark container. +/// Note: this is different from the version of the remark entry. +constexpr uint64_t CurrentContainerVersion = 0; +/// The magic number used for identifying remark blocks. +constexpr StringLiteral ContainerMagic("RMRK"); + +/// Type of the remark container. +/// The remark container has two modes: +/// * separate: the metadata is separate from the remarks and points to the +/// auxiliary file that contains the remarks. +/// * standalone: the metadata and the remarks are emitted together. +enum class BitstreamRemarkContainerType { + /// The metadata emitted separately. + /// This will contain the following: + /// * Container version and type + /// * String table + /// * External file + SeparateRemarksMeta, + /// The remarks emitted separately. + /// This will contain the following: + /// * Container version and type + /// * Remark version + SeparateRemarksFile, + /// Everything is emitted together. + /// This will contain the following: + /// * Container version and type + /// * Remark version + /// * String table + Standalone, + First = SeparateRemarksMeta, + Last = Standalone, +}; + +/// The possible blocks that will be encountered in a bitstream remark +/// container. +enum BlockIDs { + /// The metadata block is mandatory. It should always come after the + /// BLOCKINFO_BLOCK, and contains metadata that should be used when parsing + /// REMARK_BLOCKs. + /// There should always be only one META_BLOCK. + META_BLOCK_ID = bitc::FIRST_APPLICATION_BLOCKID, + /// One remark entry is represented using a REMARK_BLOCK. There can be + /// multiple REMARK_BLOCKs in the same file. + REMARK_BLOCK_ID +}; + +constexpr StringRef MetaBlockName = StringRef("Meta", 4); +constexpr StringRef RemarkBlockName = StringRef("Remark", 6); + +/// The possible records that can be encountered in the previously described +/// blocks. +enum RecordIDs { + // Meta block records. + RECORD_META_CONTAINER_INFO = 1, + RECORD_META_REMARK_VERSION, + RECORD_META_STRTAB, + RECORD_META_EXTERNAL_FILE, + // Remark block records. + RECORD_REMARK_HEADER, + RECORD_REMARK_DEBUG_LOC, + RECORD_REMARK_HOTNESS, + RECORD_REMARK_ARG_WITH_DEBUGLOC, + RECORD_REMARK_ARG_WITHOUT_DEBUGLOC, + // Helpers. + RECORD_FIRST = RECORD_META_CONTAINER_INFO, + RECORD_LAST = RECORD_REMARK_ARG_WITHOUT_DEBUGLOC +}; + +constexpr StringRef MetaContainerInfoName = StringRef("Container info", 14); +constexpr StringRef MetaRemarkVersionName = StringRef("Remark version", 14); +constexpr StringRef MetaStrTabName = StringRef("String table", 12); +constexpr StringRef MetaExternalFileName = StringRef("External File", 13); +constexpr StringRef RemarkHeaderName = StringRef("Remark header", 13); +constexpr StringRef RemarkDebugLocName = StringRef("Remark debug location", 21); +constexpr StringRef RemarkHotnessName = StringRef("Remark hotness", 14); +constexpr StringRef RemarkArgWithDebugLocName = + StringRef("Argument with debug location", 28); +constexpr StringRef RemarkArgWithoutDebugLocName = StringRef("Argument", 8); + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_REMARK_CONTAINER_H */ diff --git a/include/llvm/Remarks/BitstreamRemarkParser.h b/include/llvm/Remarks/BitstreamRemarkParser.h new file mode 100644 index 000000000000..7ebd731693b2 --- /dev/null +++ b/include/llvm/Remarks/BitstreamRemarkParser.h @@ -0,0 +1,116 @@ +//===-- BitstreamRemarkParser.h - Bitstream parser --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides an implementation of the remark parser using the LLVM +// Bitstream format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H +#define LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Bitstream/BitstreamReader.h" +#include "llvm/Remarks/BitstreamRemarkContainer.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkParser.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace remarks { + +/// Helper to parse a META_BLOCK for a bitstream remark container. +struct BitstreamMetaParserHelper { + /// The Bitstream reader. + BitstreamCursor &Stream; + /// Reference to the storage for the block info. + BitstreamBlockInfo &BlockInfo; + /// The parsed content: depending on the container type, some fields might be + /// empty. + Optional ContainerVersion; + Optional ContainerType; + Optional StrTabBuf; + Optional ExternalFilePath; + Optional RemarkVersion; + + /// Continue parsing with \p Stream. \p Stream is expected to contain a + /// ENTER_SUBBLOCK to the META_BLOCK at the current position. + /// \p Stream is expected to have a BLOCKINFO_BLOCK set. + BitstreamMetaParserHelper(BitstreamCursor &Stream, + BitstreamBlockInfo &BlockInfo); + + /// Parse the META_BLOCK and fill the available entries. + /// This helper does not check for the validity of the fields. + Error parse(); +}; + +/// Helper to parse a REMARK_BLOCK for a bitstream remark container. +struct BitstreamRemarkParserHelper { + /// The Bitstream reader. + BitstreamCursor &Stream; + /// The parsed content: depending on the remark, some fields might be empty. + Optional Type; + Optional RemarkNameIdx; + Optional PassNameIdx; + Optional FunctionNameIdx; + Optional SourceFileNameIdx; + Optional SourceLine; + Optional SourceColumn; + Optional Hotness; + struct Argument { + Optional KeyIdx; + Optional ValueIdx; + Optional SourceFileNameIdx; + Optional SourceLine; + Optional SourceColumn; + }; + Optional> Args; + /// Avoid re-allocating a vector every time. + SmallVector TmpArgs; + + /// Continue parsing with \p Stream. \p Stream is expected to contain a + /// ENTER_SUBBLOCK to the REMARK_BLOCK at the current position. + /// \p Stream is expected to have a BLOCKINFO_BLOCK set and to have already + /// parsed the META_BLOCK. + BitstreamRemarkParserHelper(BitstreamCursor &Stream); + + /// Parse the REMARK_BLOCK and fill the available entries. + /// This helper does not check for the validity of the fields. + Error parse(); +}; + +/// Helper to parse any bitstream remark container. +struct BitstreamParserHelper { + /// The Bitstream reader. + BitstreamCursor Stream; + /// The block info block. + BitstreamBlockInfo BlockInfo; + /// Start parsing at \p Buffer. + BitstreamParserHelper(StringRef Buffer); + /// Parse the magic number. + Expected> parseMagic(); + /// Parse the block info block containing all the abbrevs. + /// This needs to be called before calling any other parsing function. + Error parseBlockInfoBlock(); + /// Return true if the next block is a META_BLOCK. This function does not move + /// the cursor. + Expected isMetaBlock(); + /// Return true if the next block is a REMARK_BLOCK. This function does not + /// move the cursor. + Expected isRemarkBlock(); + /// Return true if the parser reached the end of the stream. + bool atEndOfStream() { return Stream.AtEndOfStream(); } + /// Jump to the end of the stream, skipping everything. + void skipToEnd() { return Stream.skipToEnd(); } +}; + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H */ diff --git a/include/llvm/Remarks/BitstreamRemarkSerializer.h b/include/llvm/Remarks/BitstreamRemarkSerializer.h new file mode 100644 index 000000000000..62a175a1db0b --- /dev/null +++ b/include/llvm/Remarks/BitstreamRemarkSerializer.h @@ -0,0 +1,196 @@ +//===-- BitstreamRemarkSerializer.h - Bitstream serializer ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides an implementation of the serializer using the LLVM +// Bitstream format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H +#define LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H + +#include "llvm/Bitstream/BitstreamWriter.h" +#include "llvm/Remarks/BitstreamRemarkContainer.h" +#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace remarks { + +/// Serialize the remarks to LLVM bitstream. +/// This class provides ways to emit remarks in the LLVM bitstream format and +/// its associated metadata. +/// +/// * The separate model: +/// Separate meta: | Container info +/// | String table +/// | External file +/// +/// Separate remarks: | Container info +/// | Remark version +/// | Remark0 +/// | Remark1 +/// | Remark2 +/// | ... +/// +/// * The standalone model: | Container info +/// | String table +/// | Remark version +/// | Remark0 +/// | Remark1 +/// | Remark2 +/// | ... +/// +struct BitstreamRemarkSerializerHelper { + /// Buffer used for encoding the bitstream before writing it to the final + /// stream. + SmallVector Encoded; + /// Buffer used to construct records and pass to the bitstream writer. + SmallVector R; + /// The Bitstream writer. + BitstreamWriter Bitstream; + /// The type of the container we are serializing. + BitstreamRemarkContainerType ContainerType; + + /// Abbrev IDs initialized in the block info block. + /// Note: depending on the container type, some IDs might be uninitialized. + /// Warning: When adding more abbrev IDs, make sure to update the + /// BlockCodeSize (in the call to EnterSubblock). + uint64_t RecordMetaContainerInfoAbbrevID = 0; + uint64_t RecordMetaRemarkVersionAbbrevID = 0; + uint64_t RecordMetaStrTabAbbrevID = 0; + uint64_t RecordMetaExternalFileAbbrevID = 0; + uint64_t RecordRemarkHeaderAbbrevID = 0; + uint64_t RecordRemarkDebugLocAbbrevID = 0; + uint64_t RecordRemarkHotnessAbbrevID = 0; + uint64_t RecordRemarkArgWithDebugLocAbbrevID = 0; + uint64_t RecordRemarkArgWithoutDebugLocAbbrevID = 0; + + BitstreamRemarkSerializerHelper(BitstreamRemarkContainerType ContainerType); + + // Disable copy and move: Bitstream points to Encoded, which needs special + // handling during copy/move, but moving the vectors is probably useless + // anyway. + BitstreamRemarkSerializerHelper(const BitstreamRemarkSerializerHelper &) = + delete; + BitstreamRemarkSerializerHelper & + operator=(const BitstreamRemarkSerializerHelper &) = delete; + BitstreamRemarkSerializerHelper(BitstreamRemarkSerializerHelper &&) = delete; + BitstreamRemarkSerializerHelper & + operator=(BitstreamRemarkSerializerHelper &&) = delete; + + /// Set up the necessary block info entries according to the container type. + void setupBlockInfo(); + + /// Set up the block info for the metadata block. + void setupMetaBlockInfo(); + /// The remark version in the metadata block. + void setupMetaRemarkVersion(); + void emitMetaRemarkVersion(uint64_t RemarkVersion); + /// The strtab in the metadata block. + void setupMetaStrTab(); + void emitMetaStrTab(const StringTable &StrTab); + /// The external file in the metadata block. + void setupMetaExternalFile(); + void emitMetaExternalFile(StringRef Filename); + + /// The block info for the remarks block. + void setupRemarkBlockInfo(); + + /// Emit the metadata for the remarks. + void emitMetaBlock(uint64_t ContainerVersion, + Optional RemarkVersion, + Optional StrTab = None, + Optional Filename = None); + + /// Emit a remark block. The string table is required. + void emitRemarkBlock(const Remark &Remark, StringTable &StrTab); + /// Finalize the writing to \p OS. + void flushToStream(raw_ostream &OS); + /// Finalize the writing to a buffer. + /// The contents of the buffer remain valid for the lifetime of the object. + /// Any call to any other function in this class will invalidate the buffer. + StringRef getBuffer(); +}; + +/// Implementation of the remark serializer using LLVM bitstream. +struct BitstreamRemarkSerializer : public RemarkSerializer { + /// The file should contain: + /// 1) The block info block that describes how to read the blocks. + /// 2) The metadata block that contains various information about the remarks + /// in the file. + /// 3) A number of remark blocks. + + /// We need to set up 1) and 2) first, so that we can emit 3) after. This flag + /// is used to emit the first two blocks only once. + bool DidSetUp = false; + /// The helper to emit bitstream. + BitstreamRemarkSerializerHelper Helper; + + /// Construct a serializer that will create its own string table. + BitstreamRemarkSerializer(raw_ostream &OS, SerializerMode Mode); + /// Construct a serializer with a pre-filled string table. + BitstreamRemarkSerializer(raw_ostream &OS, SerializerMode Mode, + StringTable StrTab); + + /// Emit a remark to the stream. This also emits the metadata associated to + /// the remarks based on the SerializerMode specified at construction. + /// This writes the serialized output to the provided stream. + void emit(const Remark &Remark) override; + /// The metadata serializer associated to this remark serializer. Based on the + /// container type of the current serializer, the container type of the + /// metadata serializer will change. + std::unique_ptr + metaSerializer(raw_ostream &OS, + Optional ExternalFilename = None) override; + + static bool classof(const RemarkSerializer *S) { + return S->SerializerFormat == Format::Bitstream; + } +}; + +/// Serializer of metadata for bitstream remarks. +struct BitstreamMetaSerializer : public MetaSerializer { + /// This class can be used with [1] a pre-constructed + /// BitstreamRemarkSerializerHelper, or with [2] one that is owned by the meta + /// serializer. In case of [1], we need to be able to store a reference to the + /// object, while in case of [2] we need to store the whole object. + Optional TmpHelper; + /// The actual helper, that can point to \p TmpHelper or to an external helper + /// object. + BitstreamRemarkSerializerHelper *Helper = nullptr; + + Optional StrTab; + Optional ExternalFilename; + + /// Create a new meta serializer based on \p ContainerType. + BitstreamMetaSerializer(raw_ostream &OS, + BitstreamRemarkContainerType ContainerType, + Optional StrTab = None, + Optional ExternalFilename = None) + : MetaSerializer(OS), TmpHelper(None), Helper(nullptr), StrTab(StrTab), + ExternalFilename(ExternalFilename) { + TmpHelper.emplace(ContainerType); + Helper = &*TmpHelper; + } + + /// Create a new meta serializer based on a previously built \p Helper. + BitstreamMetaSerializer(raw_ostream &OS, + BitstreamRemarkSerializerHelper &Helper, + Optional StrTab = None, + Optional ExternalFilename = None) + : MetaSerializer(OS), TmpHelper(None), Helper(&Helper), StrTab(StrTab), + ExternalFilename(ExternalFilename) {} + + void emit() override; +}; + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H */ diff --git a/include/llvm/Remarks/Remark.h b/include/llvm/Remarks/Remark.h index 05d0ea60accd..1243311fb8c5 100644 --- a/include/llvm/Remarks/Remark.h +++ b/include/llvm/Remarks/Remark.h @@ -23,7 +23,8 @@ namespace llvm { namespace remarks { -constexpr uint64_t Version = 0; +/// The current version of the remark entry. +constexpr uint64_t CurrentRemarkVersion = 0; /// The debug location used to track a remark back to the source file. struct RemarkLocation { @@ -58,7 +59,8 @@ enum class Type { AnalysisFPCommute, AnalysisAliasing, Failure, - LastTypeValue = Failure + First = Unknown, + Last = Failure }; /// A remark type used for both emission and parsing. @@ -107,6 +109,36 @@ private: // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Remark, LLVMRemarkEntryRef) +/// Comparison operators for Remark objects and dependent objects. +inline bool operator==(const RemarkLocation &LHS, const RemarkLocation &RHS) { + return LHS.SourceFilePath == RHS.SourceFilePath && + LHS.SourceLine == RHS.SourceLine && + LHS.SourceColumn == RHS.SourceColumn; +} + +inline bool operator!=(const RemarkLocation &LHS, const RemarkLocation &RHS) { + return !(LHS == RHS); +} + +inline bool operator==(const Argument &LHS, const Argument &RHS) { + return LHS.Key == RHS.Key && LHS.Val == RHS.Val && LHS.Loc == RHS.Loc; +} + +inline bool operator!=(const Argument &LHS, const Argument &RHS) { + return !(LHS == RHS); +} + +inline bool operator==(const Remark &LHS, const Remark &RHS) { + return LHS.RemarkType == RHS.RemarkType && LHS.PassName == RHS.PassName && + LHS.RemarkName == RHS.RemarkName && + LHS.FunctionName == RHS.FunctionName && LHS.Loc == RHS.Loc && + LHS.Hotness == RHS.Hotness && LHS.Args == RHS.Args; +} + +inline bool operator!=(const Remark &LHS, const Remark &RHS) { + return !(LHS == RHS); +} + } // end namespace remarks } // end namespace llvm diff --git a/include/llvm/Remarks/RemarkFormat.h b/include/llvm/Remarks/RemarkFormat.h index e167d99d2517..6dd32b226099 100644 --- a/include/llvm/Remarks/RemarkFormat.h +++ b/include/llvm/Remarks/RemarkFormat.h @@ -19,10 +19,10 @@ namespace llvm { namespace remarks { -constexpr StringRef Magic("REMARKS", 7); +constexpr StringLiteral Magic("REMARKS"); /// The format used for serializing/deserializing remarks. -enum class Format { Unknown, YAML }; +enum class Format { Unknown, YAML, YAMLStrTab, Bitstream }; /// Parse and validate a string for the remark format. Expected parseFormat(StringRef FormatStr); diff --git a/include/llvm/Remarks/RemarkParser.h b/include/llvm/Remarks/RemarkParser.h index 671e1abe5ec7..d6b1fddb06ff 100644 --- a/include/llvm/Remarks/RemarkParser.h +++ b/include/llvm/Remarks/RemarkParser.h @@ -23,9 +23,6 @@ namespace llvm { namespace remarks { -struct ParserImpl; -struct ParsedStringTable; - class EndOfFileError : public ErrorInfo { public: static char ID; @@ -39,11 +36,13 @@ public: }; /// Parser used to parse a raw buffer to remarks::Remark objects. -struct Parser { +struct RemarkParser { /// The format of the parser. Format ParserFormat; + /// Path to prepend when opening an external remark file. + std::string ExternalFilePrependPath; - Parser(Format ParserFormat) : ParserFormat(ParserFormat) {} + RemarkParser(Format ParserFormat) : ParserFormat(ParserFormat) {} /// If no error occurs, this returns a valid Remark object. /// If an error of type EndOfFileError occurs, it is safe to recover from it @@ -52,7 +51,7 @@ struct Parser { /// The pointer should never be null. virtual Expected> next() = 0; - virtual ~Parser() = default; + virtual ~RemarkParser() = default; }; /// In-memory representation of the string table parsed from a buffer (e.g. the @@ -60,16 +59,33 @@ struct Parser { struct ParsedStringTable { /// The buffer mapped from the section contents. StringRef Buffer; - /// Collection of offsets in the buffer for each string entry. - SmallVector Offsets; + /// This object has high changes to be std::move'd around, so don't use a + /// SmallVector for once. + std::vector Offsets; - Expected operator[](size_t Index) const; ParsedStringTable(StringRef Buffer); + /// Disable copy. + ParsedStringTable(const ParsedStringTable &) = delete; + ParsedStringTable &operator=(const ParsedStringTable &) = delete; + /// Should be movable. + ParsedStringTable(ParsedStringTable &&) = default; + ParsedStringTable &operator=(ParsedStringTable &&) = default; + + size_t size() const { return Offsets.size(); } + Expected operator[](size_t Index) const; }; -Expected> +Expected> createRemarkParser(Format ParserFormat, + StringRef Buf); + +Expected> createRemarkParser(Format ParserFormat, StringRef Buf, - Optional StrTab = None); + ParsedStringTable StrTab); + +Expected> +createRemarkParserFromMeta(Format ParserFormat, StringRef Buf, + Optional StrTab = None, + Optional ExternalFilePrependPath = None); } // end namespace remarks } // end namespace llvm diff --git a/include/llvm/Remarks/RemarkSerializer.h b/include/llvm/Remarks/RemarkSerializer.h index def5c2e16620..35752cd5f6fb 100644 --- a/include/llvm/Remarks/RemarkSerializer.h +++ b/include/llvm/Remarks/RemarkSerializer.h @@ -14,54 +14,74 @@ #define LLVM_REMARKS_REMARK_SERIALIZER_H #include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkStringTable.h" -#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" namespace llvm { namespace remarks { +enum class SerializerMode { + Separate, // A mode where the metadata is serialized separately from the + // remarks. Typically, this is used when the remarks need to be + // streamed to a side file and the metadata is embedded into the + // final result of the compilation. + Standalone // A mode where everything can be retrieved in the same + // file/buffer. Typically, this is used for storing remarks for + // later use. +}; + +struct MetaSerializer; + /// This is the base class for a remark serializer. /// It includes support for using a string table while emitting. -struct Serializer { +struct RemarkSerializer { + /// The format of the serializer. + Format SerializerFormat; /// The open raw_ostream that the remark diagnostics are emitted to. raw_ostream &OS; + /// The serialization mode. + SerializerMode Mode; /// The string table containing all the unique strings used in the output. /// The table can be serialized to be consumed after the compilation. Optional StrTab; - Serializer(raw_ostream &OS) : OS(OS), StrTab() {} + RemarkSerializer(Format SerializerFormat, raw_ostream &OS, + SerializerMode Mode) + : SerializerFormat(SerializerFormat), OS(OS), Mode(Mode), StrTab() {} /// This is just an interface. - virtual ~Serializer() = default; + virtual ~RemarkSerializer() = default; + /// Emit a remark to the stream. virtual void emit(const Remark &Remark) = 0; + /// Return the corresponding metadata serializer. + virtual std::unique_ptr + metaSerializer(raw_ostream &OS, + Optional ExternalFilename = None) = 0; }; -/// Wether the serializer should use a string table while emitting. -enum class UseStringTable { No, Yes }; - -/// Serialize the remarks to YAML. One remark entry looks like this: -/// --- ! -/// Pass: -/// Name: -/// DebugLoc: { File: , Line: , -/// Column: } -/// Function: -/// Args: -/// - : -/// DebugLoc: { File: , Line: , Column: } -/// ... -struct YAMLSerializer : public Serializer { - /// The YAML streamer. - yaml::Output YAMLOutput; +/// This is the base class for a remark metadata serializer. +struct MetaSerializer { + /// The open raw_ostream that the metadata is emitted to. + raw_ostream &OS; - YAMLSerializer(raw_ostream &OS, - UseStringTable UseStringTable = remarks::UseStringTable::No); + MetaSerializer(raw_ostream &OS) : OS(OS) {} - /// Emit a remark to the stream. - void emit(const Remark &Remark) override; + /// This is just an interface. + virtual ~MetaSerializer() = default; + virtual void emit() = 0; }; +/// Create a remark serializer. +Expected> +createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, + raw_ostream &OS); + +/// Create a remark serializer that uses a pre-filled string table. +Expected> +createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, + raw_ostream &OS, remarks::StringTable StrTab); + } // end namespace remarks } // end namespace llvm diff --git a/include/llvm/Remarks/RemarkStringTable.h b/include/llvm/Remarks/RemarkStringTable.h index f9b4fdbbfb8d..4ce27ee884c8 100644 --- a/include/llvm/Remarks/RemarkStringTable.h +++ b/include/llvm/Remarks/RemarkStringTable.h @@ -18,7 +18,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" +#include "llvm/Remarks/Remark.h" #include namespace llvm { @@ -27,21 +27,35 @@ class raw_ostream; namespace remarks { +struct ParsedStringTable; + /// The string table used for serializing remarks. /// This table can be for example serialized in a section to be consumed after /// the compilation. struct StringTable { - /// Allocator holding all the memory used by the map. - BumpPtrAllocator Allocator; /// The string table containing all the unique strings used in the output. /// It maps a string to an unique ID. - StringMap StrTab; + StringMap StrTab; /// Total size of the string table when serialized. size_t SerializedSize = 0; - StringTable() : Allocator(), StrTab(Allocator) {} + StringTable() = default; + + /// Disable copy. + StringTable(const StringTable &) = delete; + StringTable &operator=(const StringTable &) = delete; + /// Should be movable. + StringTable(StringTable &&) = default; + StringTable &operator=(StringTable &&) = default; + + /// Construct a string table from a ParsedStringTable. + StringTable(const ParsedStringTable &Other); + /// Add a string to the table. It returns an unique ID of the string. std::pair add(StringRef Str); + /// Modify \p R to use strings from this string table. If the string table + /// does not contain the strings, it adds them. + void internalize(Remark &R); /// Serialize the string table to a stream. It is serialized as a little /// endian uint64 (the size of the table in bytes) followed by a sequence of /// NULL-terminated strings, where the N-th string is the string with the ID N diff --git a/include/llvm/Remarks/YAMLRemarkSerializer.h b/include/llvm/Remarks/YAMLRemarkSerializer.h new file mode 100644 index 000000000000..f1213beab15d --- /dev/null +++ b/include/llvm/Remarks/YAMLRemarkSerializer.h @@ -0,0 +1,108 @@ +//===-- YAMLRemarkSerializer.h - YAML Remark serialization ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides an interface for serializing remarks to YAML. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_YAML_REMARK_SERIALIZER_H +#define LLVM_REMARKS_YAML_REMARK_SERIALIZER_H + +#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Support/YAMLTraits.h" + +namespace llvm { +namespace remarks { + +/// Serialize the remarks to YAML. One remark entry looks like this: +/// --- ! +/// Pass: +/// Name: +/// DebugLoc: { File: , Line: , +/// Column: } +/// Function: +/// Args: +/// - : +/// DebugLoc: { File: , Line: , Column: } +/// ... +struct YAMLRemarkSerializer : public RemarkSerializer { + /// The YAML streamer. + yaml::Output YAMLOutput; + + YAMLRemarkSerializer(raw_ostream &OS, SerializerMode Mode, + Optional StrTab = None); + + void emit(const Remark &Remark) override; + std::unique_ptr + metaSerializer(raw_ostream &OS, + Optional ExternalFilename = None) override; + + static bool classof(const RemarkSerializer *S) { + return S->SerializerFormat == Format::YAML; + } + +protected: + YAMLRemarkSerializer(Format SerializerFormat, raw_ostream &OS, + SerializerMode Mode, + Optional StrTab = None); +}; + +struct YAMLMetaSerializer : public MetaSerializer { + Optional ExternalFilename; + + YAMLMetaSerializer(raw_ostream &OS, Optional ExternalFilename) + : MetaSerializer(OS), ExternalFilename(ExternalFilename) {} + + void emit() override; +}; + +/// Serialize the remarks to YAML using a string table. An remark entry looks +/// like the regular YAML remark but instead of string entries it's using +/// numbers that map to an index in the string table. +struct YAMLStrTabRemarkSerializer : public YAMLRemarkSerializer { + /// Wether we already emitted the metadata in standalone mode. + /// This should be set to true after the first invocation of `emit`. + bool DidEmitMeta = false; + + YAMLStrTabRemarkSerializer(raw_ostream &OS, SerializerMode Mode) + : YAMLRemarkSerializer(Format::YAMLStrTab, OS, Mode) { + // We always need a string table for this type of serializer. + StrTab.emplace(); + } + YAMLStrTabRemarkSerializer(raw_ostream &OS, SerializerMode Mode, + StringTable StrTab) + : YAMLRemarkSerializer(Format::YAMLStrTab, OS, Mode, std::move(StrTab)) {} + + /// Override to emit the metadata if necessary. + void emit(const Remark &Remark) override; + + std::unique_ptr + metaSerializer(raw_ostream &OS, + Optional ExternalFilename = None) override; + + static bool classof(const RemarkSerializer *S) { + return S->SerializerFormat == Format::YAMLStrTab; + } +}; + +struct YAMLStrTabMetaSerializer : public YAMLMetaSerializer { + /// The string table is part of the metadata. + const StringTable &StrTab; + + YAMLStrTabMetaSerializer(raw_ostream &OS, + Optional ExternalFilename, + const StringTable &StrTab) + : YAMLMetaSerializer(OS, ExternalFilename), StrTab(StrTab) {} + + void emit() override; +}; + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_REMARK_SERIALIZER_H */ diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def index e152f383b3ec..15737265dfc3 100644 --- a/include/llvm/Support/AArch64TargetParser.def +++ b/include/llvm/Support/AArch64TargetParser.def @@ -50,35 +50,36 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a", #define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) #endif // FIXME: This would be nicer were it tablegen -AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) -AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) -AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") -AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") -AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") -AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") -AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") -AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") -AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") -AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") -AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod") -AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") -AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") -AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") -AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") -AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") -AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") -AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") -AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2") -AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes") -AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4") -AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") -AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm") -AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") -AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") -AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") -AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") -AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") -AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") +AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) +AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) +AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") +AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") +AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") +AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") +AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") +AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") +AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") +AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") +AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod") +AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") +AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") +AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") +AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") +AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") +AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") +AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") +AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2") +AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes") +AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4") +AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") +AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm") +AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") +AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") +AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") +AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") +AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") +AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") +AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME @@ -92,6 +93,12 @@ AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC)) AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) +AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-a65ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_SSBS)) AARCH64_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a73", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, @@ -104,6 +111,13 @@ AARCH64_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | + AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_SSBS)) AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_NONE)) AARCH64_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/include/llvm/Support/AArch64TargetParser.h b/include/llvm/Support/AArch64TargetParser.h index 965d38535e74..94f341c83260 100644 --- a/include/llvm/Support/AArch64TargetParser.h +++ b/include/llvm/Support/AArch64TargetParser.h @@ -53,7 +53,8 @@ enum ArchExtKind : unsigned { AEK_SVE2AES = 1 << 24, AEK_SVE2SM4 = 1 << 25, AEK_SVE2SHA3 = 1 << 26, - AEK_BITPERM = 1 << 27, + AEK_SVE2BITPERM = 1 << 27, + AEK_TME = 1 << 28, }; enum class ArchKind { diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def index f466b3252748..3e77e20762c1 100644 --- a/include/llvm/Support/ARMTargetParser.def +++ b/include/llvm/Support/ARMTargetParser.def @@ -274,6 +274,8 @@ ARM_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) +ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) diff --git a/include/llvm/Support/ARMTargetParser.h b/include/llvm/Support/ARMTargetParser.h index 4b9070dea596..02d4c975129f 100644 --- a/include/llvm/Support/ARMTargetParser.h +++ b/include/llvm/Support/ARMTargetParser.h @@ -39,19 +39,13 @@ enum ArchExtKind : unsigned { AEK_DSP = 1 << 10, AEK_FP16 = 1 << 11, AEK_RAS = 1 << 12, - AEK_SVE = 1 << 13, - AEK_DOTPROD = 1 << 14, - AEK_SHA2 = 1 << 15, - AEK_AES = 1 << 16, - AEK_FP16FML = 1 << 17, - AEK_SB = 1 << 18, - AEK_SVE2 = 1 << 19, - AEK_SVE2AES = 1 << 20, - AEK_SVE2SM4 = 1 << 21, - AEK_SVE2SHA3 = 1 << 22, - AEK_BITPERM = 1 << 23, - AEK_FP_DP = 1 << 24, - AEK_LOB = 1 << 25, + AEK_DOTPROD = 1 << 13, + AEK_SHA2 = 1 << 14, + AEK_AES = 1 << 15, + AEK_FP16FML = 1 << 16, + AEK_SB = 1 << 17, + AEK_FP_DP = 1 << 18, + AEK_LOB = 1 << 19, // Unsupported extensions. AEK_OS = 0x8000000, AEK_IWMMXT = 0x10000000, diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h index d12401f0eb49..eb42542b777f 100644 --- a/include/llvm/Support/AlignOf.h +++ b/include/llvm/Support/AlignOf.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines the AlignedCharArray and AlignedCharArrayUnion classes. +// This file defines the AlignedCharArrayUnion class. // //===----------------------------------------------------------------------===// @@ -18,128 +18,38 @@ namespace llvm { -/// \struct AlignedCharArray -/// Helper for building an aligned character array type. -/// -/// This template is used to explicitly build up a collection of aligned -/// character array types. We have to build these up using a macro and explicit -/// specialization to cope with MSVC (at least till 2015) where only an -/// integer literal can be used to specify an alignment constraint. Once built -/// up here, we can then begin to indirect between these using normal C++ -/// template parameters. - -// MSVC requires special handling here. -#ifndef _MSC_VER - -template -struct AlignedCharArray { - alignas(Alignment) char buffer[Size]; -}; - -#else // _MSC_VER - -/// Create a type with an aligned char buffer. -template -struct AlignedCharArray; - -// We provide special variations of this template for the most common -// alignments because __declspec(align(...)) doesn't actually work when it is -// a member of a by-value function argument in MSVC, even if the alignment -// request is something reasonably like 8-byte or 16-byte. Note that we can't -// even include the declspec with the union that forces the alignment because -// MSVC warns on the existence of the declspec despite the union member forcing -// proper alignment. - -template -struct AlignedCharArray<1, Size> { - union { - char aligned; - char buffer[Size]; - }; -}; - -template -struct AlignedCharArray<2, Size> { - union { - short aligned; - char buffer[Size]; - }; -}; - -template -struct AlignedCharArray<4, Size> { - union { - int aligned; - char buffer[Size]; - }; -}; +namespace detail { -template -struct AlignedCharArray<8, Size> { - union { - double aligned; - char buffer[Size]; - }; +template class AlignerImpl { + T t; + AlignerImpl rest; + AlignerImpl() = delete; }; - -// The rest of these are provided with a __declspec(align(...)) and we simply -// can't pass them by-value as function arguments on MSVC. - -#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ - template \ - struct AlignedCharArray { \ - __declspec(align(x)) char buffer[Size]; \ - }; - -LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16) -LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32) -LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64) -LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128) - -#undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT - -#endif // _MSC_VER - -namespace detail { -template -class AlignerImpl { - T1 t1; T2 t2; T3 t3; T4 t4; T5 t5; T6 t6; T7 t7; T8 t8; T9 t9; T10 t10; - +template class AlignerImpl { + T t; AlignerImpl() = delete; }; -template -union SizerImpl { - char arr1[sizeof(T1)], arr2[sizeof(T2)], arr3[sizeof(T3)], arr4[sizeof(T4)], - arr5[sizeof(T5)], arr6[sizeof(T6)], arr7[sizeof(T7)], arr8[sizeof(T8)], - arr9[sizeof(T9)], arr10[sizeof(T10)]; +template union SizerImpl { + char arr[sizeof(T)]; + SizerImpl rest; }; + +template union SizerImpl { char arr[sizeof(T)]; }; } // end namespace detail -/// This union template exposes a suitably aligned and sized character -/// array member which can hold elements of any of up to ten types. +/// A suitably aligned and sized character array member which can hold elements +/// of any type. /// -/// These types may be arrays, structs, or any other types. The goal is to -/// expose a char array buffer member which can be used as suitable storage for -/// a placement new of any of these types. Support for more than ten types can -/// be added at the cost of more boilerplate. -template -struct AlignedCharArrayUnion : llvm::AlignedCharArray< - alignof(llvm::detail::AlignerImpl), - sizeof(::llvm::detail::SizerImpl)> { +/// These types may be arrays, structs, or any other types. This exposes a +/// `buffer` member which can be used as suitable storage for a placement new of +/// any of these types. +template struct AlignedCharArrayUnion { + alignas(::llvm::detail::AlignerImpl) char buffer[sizeof( + llvm::detail::SizerImpl)]; }; + } // end namespace llvm #endif // LLVM_SUPPORT_ALIGNOF_H diff --git a/include/llvm/Support/Alignment.h b/include/llvm/Support/Alignment.h new file mode 100644 index 000000000000..72fad87dd0d4 --- /dev/null +++ b/include/llvm/Support/Alignment.h @@ -0,0 +1,403 @@ +//===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains types to represent alignments. +// They are instrumented to guarantee some invariants are preserved and prevent +// invalid manipulations. +// +// - Align represents an alignment in bytes, it is always set and always a valid +// power of two, its minimum value is 1 which means no alignment requirements. +// +// - MaybeAlign is an optional type, it may be undefined or set. When it's set +// you can get the underlying Align type by using the getValue() method. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_ALIGNMENT_H_ +#define LLVM_SUPPORT_ALIGNMENT_H_ + +#include "llvm/ADT/Optional.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" +#include +#include + +namespace llvm { + +#define ALIGN_CHECK_ISPOSITIVE(decl) \ + assert(decl > 0 && (#decl " should be defined")) +#define ALIGN_CHECK_ISSET(decl) \ + assert(decl.hasValue() && (#decl " should be defined")) + +/// This struct is a compact representation of a valid (non-zero power of two) +/// alignment. +/// It is suitable for use as static global constants. +struct Align { +private: + uint8_t ShiftValue = 0; /// The log2 of the required alignment. + /// ShiftValue is less than 64 by construction. + + friend struct MaybeAlign; + friend unsigned Log2(Align); + friend bool operator==(Align Lhs, Align Rhs); + friend bool operator!=(Align Lhs, Align Rhs); + friend bool operator<=(Align Lhs, Align Rhs); + friend bool operator>=(Align Lhs, Align Rhs); + friend bool operator<(Align Lhs, Align Rhs); + friend bool operator>(Align Lhs, Align Rhs); + friend unsigned encode(struct MaybeAlign A); + friend struct MaybeAlign decodeMaybeAlign(unsigned Value); + + /// A trivial type to allow construction of constexpr Align. + /// This is currently needed to workaround a bug in GCC 5.3 which prevents + /// definition of constexpr assign operators. + /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic + /// FIXME: Remove this, make all assign operators constexpr and introduce user + /// defined literals when we don't have to support GCC 5.3 anymore. + /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain + struct LogValue { + uint8_t Log; + }; + +public: + /// Default is byte-aligned. + constexpr Align() = default; + /// Do not perform checks in case of copy/move construct/assign, because the + /// checks have been performed when building `Other`. + constexpr Align(const Align &Other) = default; + constexpr Align(Align &&Other) = default; + Align &operator=(const Align &Other) = default; + Align &operator=(Align &&Other) = default; + + explicit Align(uint64_t Value) { + assert(Value > 0 && "Value must not be 0"); + assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2"); + ShiftValue = Log2_64(Value); + assert(ShiftValue < 64 && "Broken invariant"); + } + + /// This is a hole in the type system and should not be abused. + /// Needed to interact with C for instance. + uint64_t value() const { return uint64_t(1) << ShiftValue; } + + /// Returns a default constructed Align which corresponds to no alignment. + /// This is useful to test for unalignment as it conveys clear semantic. + /// `if (A != Align::None())` + /// would be better than + /// `if (A > Align(1))` + constexpr static const Align None() { return Align(); } + + /// Allow constructions of constexpr Align. + template constexpr static LogValue Constant() { + return LogValue{static_cast(CTLog2())}; + } + + /// Allow constructions of constexpr Align from types. + /// Compile time equivalent to Align(alignof(T)). + template constexpr static LogValue Of() { + return Constant::value>(); + } + + /// Constexpr constructor from LogValue type. + constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} +}; + +/// Treats the value 0 as a 1, so Align is always at least 1. +inline Align assumeAligned(uint64_t Value) { + return Value ? Align(Value) : Align(); +} + +/// This struct is a compact representation of a valid (power of two) or +/// undefined (0) alignment. +struct MaybeAlign : public llvm::Optional { +private: + using UP = llvm::Optional; + +public: + /// Default is undefined. + MaybeAlign() = default; + /// Do not perform checks in case of copy/move construct/assign, because the + /// checks have been performed when building `Other`. + MaybeAlign(const MaybeAlign &Other) = default; + MaybeAlign &operator=(const MaybeAlign &Other) = default; + MaybeAlign(MaybeAlign &&Other) = default; + MaybeAlign &operator=(MaybeAlign &&Other) = default; + + /// Use llvm::Optional constructor. + using UP::UP; + + explicit MaybeAlign(uint64_t Value) { + assert((Value == 0 || llvm::isPowerOf2_64(Value)) && + "Alignment is neither 0 nor a power of 2"); + if (Value) + emplace(Value); + } + + /// For convenience, returns a valid alignment or 1 if undefined. + Align valueOrOne() const { return hasValue() ? getValue() : Align(); } +}; + +/// Checks that SizeInBytes is a multiple of the alignment. +inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { + return SizeInBytes % Lhs.value() == 0; +} + +/// Checks that SizeInBytes is a multiple of the alignment. +/// Returns false if the alignment is undefined. +inline bool isAligned(MaybeAlign Lhs, uint64_t SizeInBytes) { + ALIGN_CHECK_ISSET(Lhs); + return SizeInBytes % (*Lhs).value() == 0; +} + +/// Checks that Addr is a multiple of the alignment. +inline bool isAddrAligned(Align Lhs, const void *Addr) { + return isAligned(Lhs, reinterpret_cast(Addr)); +} + +/// Returns a multiple of A needed to store `Size` bytes. +inline uint64_t alignTo(uint64_t Size, Align A) { + const uint64_t value = A.value(); + // The following line is equivalent to `(Size + value - 1) / value * value`. + + // The division followed by a multiplication can be thought of as a right + // shift followed by a left shift which zeros out the extra bits produced in + // the bump; `~(value - 1)` is a mask where all those bits being zeroed out + // are just zero. + + // Most compilers can generate this code but the pattern may be missed when + // multiple functions gets inlined. + return (Size + value - 1) & ~(value - 1); +} + +/// Returns a multiple of A needed to store `Size` bytes. +/// Returns `Size` if current alignment is undefined. +inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { + return A ? alignTo(Size, A.getValue()) : Size; +} + +/// Aligns `Addr` to `Alignment` bytes, rounding up. +inline uintptr_t alignAddr(const void *Addr, Align Alignment) { + uintptr_t ArithAddr = reinterpret_cast(Addr); + assert(static_cast(ArithAddr + Alignment.value() - 1) >= + ArithAddr && "Overflow"); + return alignTo(ArithAddr, Alignment); +} + +/// Returns the offset to the next integer (mod 2**64) that is greater than +/// or equal to \p Value and is a multiple of \p Align. +inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { + return alignTo(Value, Alignment) - Value; +} + +/// Returns the necessary adjustment for aligning `Addr` to `Alignment` +/// bytes, rounding up. +inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { + return offsetToAlignment(reinterpret_cast(Addr), Alignment); +} + +/// Returns the log2 of the alignment. +inline unsigned Log2(Align A) { return A.ShiftValue; } + +/// Returns the log2 of the alignment. +/// \pre A must be defined. +inline unsigned Log2(MaybeAlign A) { + ALIGN_CHECK_ISSET(A); + return Log2(A.getValue()); +} + +/// Returns the alignment that satisfies both alignments. +/// Same semantic as MinAlign. +inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } + +/// Returns the alignment that satisfies both alignments. +/// Same semantic as MinAlign. +inline Align commonAlignment(Align A, uint64_t Offset) { + return Align(MinAlign(A.value(), Offset)); +} + +/// Returns the alignment that satisfies both alignments. +/// Same semantic as MinAlign. +inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { + return A && B ? commonAlignment(*A, *B) : A ? A : B; +} + +/// Returns the alignment that satisfies both alignments. +/// Same semantic as MinAlign. +inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { + return MaybeAlign(MinAlign((*A).value(), Offset)); +} + +/// Returns a representation of the alignment that encodes undefined as 0. +inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } + +/// Dual operation of the encode function above. +inline MaybeAlign decodeMaybeAlign(unsigned Value) { + if (Value == 0) + return MaybeAlign(); + Align Out; + Out.ShiftValue = Value - 1; + return Out; +} + +/// Returns a representation of the alignment, the encoded value is positive by +/// definition. +inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } + +/// Comparisons between Align and scalars. Rhs must be positive. +inline bool operator==(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() == Rhs; +} +inline bool operator!=(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() != Rhs; +} +inline bool operator<=(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() <= Rhs; +} +inline bool operator>=(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() >= Rhs; +} +inline bool operator<(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() < Rhs; +} +inline bool operator>(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() > Rhs; +} + +/// Comparisons between MaybeAlign and scalars. +inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { + return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; +} +inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { + return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; +} +inline bool operator<=(MaybeAlign Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISSET(Lhs); + ALIGN_CHECK_ISPOSITIVE(Rhs); + return (*Lhs).value() <= Rhs; +} +inline bool operator>=(MaybeAlign Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISSET(Lhs); + ALIGN_CHECK_ISPOSITIVE(Rhs); + return (*Lhs).value() >= Rhs; +} +inline bool operator<(MaybeAlign Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISSET(Lhs); + ALIGN_CHECK_ISPOSITIVE(Rhs); + return (*Lhs).value() < Rhs; +} +inline bool operator>(MaybeAlign Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISSET(Lhs); + ALIGN_CHECK_ISPOSITIVE(Rhs); + return (*Lhs).value() > Rhs; +} + +/// Comparisons operators between Align. +inline bool operator==(Align Lhs, Align Rhs) { + return Lhs.ShiftValue == Rhs.ShiftValue; +} +inline bool operator!=(Align Lhs, Align Rhs) { + return Lhs.ShiftValue != Rhs.ShiftValue; +} +inline bool operator<=(Align Lhs, Align Rhs) { + return Lhs.ShiftValue <= Rhs.ShiftValue; +} +inline bool operator>=(Align Lhs, Align Rhs) { + return Lhs.ShiftValue >= Rhs.ShiftValue; +} +inline bool operator<(Align Lhs, Align Rhs) { + return Lhs.ShiftValue < Rhs.ShiftValue; +} +inline bool operator>(Align Lhs, Align Rhs) { + return Lhs.ShiftValue > Rhs.ShiftValue; +} + +/// Comparisons operators between Align and MaybeAlign. +inline bool operator==(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() == (*Rhs).value(); +} +inline bool operator!=(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() != (*Rhs).value(); +} +inline bool operator<=(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() <= (*Rhs).value(); +} +inline bool operator>=(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() >= (*Rhs).value(); +} +inline bool operator<(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() < (*Rhs).value(); +} +inline bool operator>(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() > (*Rhs).value(); +} + +/// Comparisons operators between MaybeAlign and Align. +inline bool operator==(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() == Rhs.value(); +} +inline bool operator!=(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() != Rhs.value(); +} +inline bool operator<=(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() <= Rhs.value(); +} +inline bool operator>=(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() >= Rhs.value(); +} +inline bool operator<(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() < Rhs.value(); +} +inline bool operator>(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() > Rhs.value(); +} + +inline Align operator/(Align Lhs, uint64_t Divisor) { + assert(llvm::isPowerOf2_64(Divisor) && + "Divisor must be positive and a power of 2"); + assert(Lhs != 1 && "Can't halve byte alignment"); + return Align(Lhs.value() / Divisor); +} + +inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { + assert(llvm::isPowerOf2_64(Divisor) && + "Divisor must be positive and a power of 2"); + return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); +} + +inline Align max(MaybeAlign Lhs, Align Rhs) { + return Lhs && *Lhs > Rhs ? *Lhs : Rhs; +} + +inline Align max(Align Lhs, MaybeAlign Rhs) { + return Rhs && *Rhs > Lhs ? *Rhs : Lhs; +} + +#undef ALIGN_CHECK_ISPOSITIVE +#undef ALIGN_CHECK_ISSET + +} // namespace llvm + +#endif // LLVM_SUPPORT_ALIGNMENT_H_ diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h index 09e967b98abc..106b90c35bf5 100644 --- a/include/llvm/Support/Allocator.h +++ b/include/llvm/Support/Allocator.h @@ -22,6 +22,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -211,13 +212,11 @@ public: /// Allocate space at the specified alignment. LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void * - Allocate(size_t Size, size_t Alignment) { - assert(Alignment > 0 && "0-byte alignnment is not allowed. Use 1 instead."); - + Allocate(size_t Size, Align Alignment) { // Keep track of how many bytes we've allocated. BytesAllocated += Size; - size_t Adjustment = alignmentAdjustment(CurPtr, Alignment); + size_t Adjustment = offsetToAlignedAddr(CurPtr, Alignment); assert(Adjustment + Size >= Size && "Adjustment + Size must not overflow"); size_t SizeToAllocate = Size; @@ -240,7 +239,7 @@ public: } // If Size is really big, allocate a separate slab for it. - size_t PaddedSize = SizeToAllocate + Alignment - 1; + size_t PaddedSize = SizeToAllocate + Alignment.value() - 1; if (PaddedSize > SizeThreshold) { void *NewSlab = Allocator.Allocate(PaddedSize, 0); // We own the new slab and don't want anyone reading anyting other than @@ -268,6 +267,12 @@ public: return AlignedPtr; } + inline LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void * + Allocate(size_t Size, size_t Alignment) { + assert(Alignment > 0 && "0-byte alignnment is not allowed. Use 1 instead."); + return Allocate(Size, Align(Alignment)); + } + // Pull in base class overloads. using AllocatorBase::Allocate; @@ -461,7 +466,7 @@ public: /// all memory allocated so far. void DestroyAll() { auto DestroyElements = [](char *Begin, char *End) { - assert(Begin == (char *)alignAddr(Begin, alignof(T))); + assert(Begin == (char *)alignAddr(Begin, Align::Of())); for (char *Ptr = Begin; Ptr + sizeof(T) <= End; Ptr += sizeof(T)) reinterpret_cast(Ptr)->~T(); }; @@ -470,7 +475,7 @@ public: ++I) { size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize( std::distance(Allocator.Slabs.begin(), I)); - char *Begin = (char *)alignAddr(*I, alignof(T)); + char *Begin = (char *)alignAddr(*I, Align::Of()); char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr : (char *)*I + AllocatedSlabSize; @@ -480,7 +485,8 @@ public: for (auto &PtrAndSize : Allocator.CustomSizedSlabs) { void *Ptr = PtrAndSize.first; size_t Size = PtrAndSize.second; - DestroyElements((char *)alignAddr(Ptr, alignof(T)), (char *)Ptr + Size); + DestroyElements((char *)alignAddr(Ptr, Align::Of()), + (char *)Ptr + Size); } Allocator.Reset(); diff --git a/include/llvm/Support/Automaton.h b/include/llvm/Support/Automaton.h new file mode 100644 index 000000000000..7c13a698e492 --- /dev/null +++ b/include/llvm/Support/Automaton.h @@ -0,0 +1,253 @@ +//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements class that drive and introspect deterministic finite- +// state automata (DFAs) as generated by TableGen's -gen-automata backend. +// +// For a description of how to define an automaton, see +// include/llvm/TableGen/Automaton.td. +// +// One important detail is that these deterministic automata are created from +// (potentially) nondeterministic definitions. Therefore a unique sequence of +// input symbols will produce one path through the DFA but multiple paths +// through the original NFA. An automaton by default only returns "accepted" or +// "not accepted", but frequently we want to analyze what NFA path was taken. +// Finding a path through the NFA states that results in a DFA state can help +// answer *what* the solution to a problem was, not just that there exists a +// solution. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_AUTOMATON_H +#define LLVM_SUPPORT_AUTOMATON_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include +#include +#include +#include +#include + +namespace llvm { + +using NfaPath = SmallVector; + +/// Forward define the pair type used by the automata transition info tables. +/// +/// Experimental results with large tables have shown a significant (multiple +/// orders of magnitude) parsing speedup by using a custom struct here with a +/// trivial constructor rather than std::pair. +struct NfaStatePair { + uint64_t FromDfaState, ToDfaState; + + bool operator<(const NfaStatePair &Other) const { + return std::make_tuple(FromDfaState, ToDfaState) < + std::make_tuple(Other.FromDfaState, Other.ToDfaState); + } +}; + +namespace internal { +/// The internal class that maintains all possible paths through an NFA based +/// on a path through the DFA. +class NfaTranscriber { +private: + /// Cached transition table. This is a table of NfaStatePairs that contains + /// zero-terminated sequences pointed to by DFA transitions. + ArrayRef TransitionInfo; + + /// A simple linked-list of traversed states that can have a shared tail. The + /// traversed path is stored in reverse order with the latest state as the + /// head. + struct PathSegment { + uint64_t State; + PathSegment *Tail; + }; + + /// We allocate segment objects frequently. Allocate them upfront and dispose + /// at the end of a traversal rather than hammering the system allocator. + SpecificBumpPtrAllocator Allocator; + + /// Heads of each tracked path. These are not ordered. + std::deque Heads; + + /// The returned paths. This is populated during getPaths. + SmallVector Paths; + + /// Create a new segment and return it. + PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) { + PathSegment *P = Allocator.Allocate(); + *P = {State, Tail}; + return P; + } + + /// Pairs defines a sequence of possible NFA transitions for a single DFA + /// transition. + void transition(ArrayRef Pairs) { + // Iterate over all existing heads. We will mutate the Heads deque during + // iteration. + unsigned NumHeads = Heads.size(); + for (unsigned I = 0; I < NumHeads; ++I) { + PathSegment *Head = Heads[I]; + // The sequence of pairs is sorted. Select the set of pairs that + // transition from the current head state. + auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL}); + auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX}); + // For every transition from the current head state, add a new path + // segment. + for (; PI != PE; ++PI) + if (PI->FromDfaState == Head->State) + Heads.push_back(makePathSegment(PI->ToDfaState, Head)); + } + // Now we've iterated over all the initial heads and added new ones, + // dispose of the original heads. + Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads)); + } + +public: + NfaTranscriber(ArrayRef TransitionInfo) + : TransitionInfo(TransitionInfo) { + reset(); + } + + void reset() { + Paths.clear(); + Heads.clear(); + Allocator.DestroyAll(); + // The initial NFA state is 0. + Heads.push_back(makePathSegment(0ULL, nullptr)); + } + + void transition(unsigned TransitionInfoIdx) { + unsigned EndIdx = TransitionInfoIdx; + while (TransitionInfo[EndIdx].ToDfaState != 0) + ++EndIdx; + ArrayRef Pairs(&TransitionInfo[TransitionInfoIdx], + EndIdx - TransitionInfoIdx); + transition(Pairs); + } + + ArrayRef getPaths() { + Paths.clear(); + for (auto *Head : Heads) { + NfaPath P; + while (Head->State != 0) { + P.push_back(Head->State); + Head = Head->Tail; + } + std::reverse(P.begin(), P.end()); + Paths.push_back(std::move(P)); + } + return Paths; + } +}; +} // namespace internal + +/// A deterministic finite-state automaton. The automaton is defined in +/// TableGen; this object drives an automaton defined by tblgen-emitted tables. +/// +/// An automaton accepts a sequence of input tokens ("actions"). This class is +/// templated on the type of these actions. +template class Automaton { + /// Map from {State, Action} to {NewState, TransitionInfoIdx}. + /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition. + /// FIXME: This uses a std::map because ActionT can be a pair type including + /// an enum. In particular DenseMapInfo must be defined to use + /// DenseMap here. + /// This is a shared_ptr to allow very quick copy-construction of Automata; this + /// state is immutable after construction so this is safe. + using MapTy = std::map, std::pair>; + std::shared_ptr M; + /// An optional transcription object. This uses much more state than simply + /// traversing the DFA for acceptance, so is heap allocated. + std::shared_ptr Transcriber; + /// The initial DFA state is 1. + uint64_t State = 1; + /// True if we should transcribe and false if not (even if Transcriber is defined). + bool Transcribe; + +public: + /// Create an automaton. + /// \param Transitions The Transitions table as created by TableGen. Note that + /// because the action type differs per automaton, the + /// table type is templated as ArrayRef. + /// \param TranscriptionTable The TransitionInfo table as created by TableGen. + /// + /// Providing the TranscriptionTable argument as non-empty will enable the + /// use of transcription, which analyzes the possible paths in the original + /// NFA taken by the DFA. NOTE: This is substantially more work than simply + /// driving the DFA, so unless you require the getPaths() method leave this + /// empty. + template + Automaton(ArrayRef Transitions, + ArrayRef TranscriptionTable = {}) { + if (!TranscriptionTable.empty()) + Transcriber = + std::make_shared(TranscriptionTable); + Transcribe = Transcriber != nullptr; + M = std::make_shared(); + for (const auto &I : Transitions) + // Greedily read and cache the transition table. + M->emplace(std::make_pair(I.FromDfaState, I.Action), + std::make_pair(I.ToDfaState, I.InfoIdx)); + } + Automaton(const Automaton &) = default; + + /// Reset the automaton to its initial state. + void reset() { + State = 1; + if (Transcriber) + Transcriber->reset(); + } + + /// Enable or disable transcription. Transcription is only available if + /// TranscriptionTable was provided to the constructor. + void enableTranscription(bool Enable = true) { + assert(Transcriber && + "Transcription is only available if TranscriptionTable was provided " + "to the Automaton constructor"); + Transcribe = Enable; + } + + /// Transition the automaton based on input symbol A. Return true if the + /// automaton transitioned to a valid state, false if the automaton + /// transitioned to an invalid state. + /// + /// If this function returns false, all methods are undefined until reset() is + /// called. + bool add(const ActionT &A) { + auto I = M->find({State, A}); + if (I == M->end()) + return false; + if (Transcriber && Transcribe) + Transcriber->transition(I->second.second); + State = I->second.first; + return true; + } + + /// Return true if the automaton can be transitioned based on input symbol A. + bool canAdd(const ActionT &A) { + auto I = M->find({State, A}); + return I != M->end(); + } + + /// Obtain a set of possible paths through the input nondeterministic + /// automaton that could be obtained from the sequence of input actions + /// presented to this deterministic automaton. + ArrayRef getNfaPaths() { + assert(Transcriber && Transcribe && + "Can only obtain NFA paths if transcribing!"); + return Transcriber->getPaths(); + } +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_AUTOMATON_H diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h index 96d09db69ae5..67ba2e4189be 100644 --- a/include/llvm/Support/BinaryStreamArray.h +++ b/include/llvm/Support/BinaryStreamArray.h @@ -286,7 +286,7 @@ public: // an exact multiple of the element size. consumeError(std::move(EC)); } - assert(llvm::alignmentAdjustment(Data.data(), alignof(T)) == 0); + assert(isAddrAligned(Align::Of(), Data.data())); return *reinterpret_cast(Data.data()); } diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h index d8fddde66bfa..9e16ce227ff8 100644 --- a/include/llvm/Support/BinaryStreamReader.h +++ b/include/llvm/Support/BinaryStreamReader.h @@ -198,7 +198,7 @@ public: if (auto EC = readBytes(Bytes, NumElements * sizeof(T))) return EC; - assert(alignmentAdjustment(Bytes.data(), alignof(T)) == 0 && + assert(isAddrAligned(Align::Of(), Bytes.data()) && "Reading at invalid alignment!"); Array = ArrayRef(reinterpret_cast(Bytes.data()), NumElements); diff --git a/include/llvm/Support/CRC.h b/include/llvm/Support/CRC.h index 6ea8e3edcea4..210890ae06d4 100644 --- a/include/llvm/Support/CRC.h +++ b/include/llvm/Support/CRC.h @@ -6,20 +6,55 @@ // //===----------------------------------------------------------------------===// // -// This file contains basic functions for calculating Cyclic Redundancy Check -// or CRC. +// This file contains implementations of CRC functions. // //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_CRC_H #define LLVM_SUPPORT_CRC_H -#include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" namespace llvm { -/// zlib independent CRC32 calculation. -uint32_t crc32(uint32_t CRC, StringRef S); +template class ArrayRef; + +// Compute the CRC-32 of Data. +uint32_t crc32(ArrayRef Data); + +// Compute the running CRC-32 of Data, with CRC being the previous value of the +// checksum. +uint32_t crc32(uint32_t CRC, ArrayRef Data); + +// Class for computing the JamCRC. +// +// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties +// of this CRC: +// Width : 32 +// Poly : 04C11DB7 +// Init : FFFFFFFF +// RefIn : True +// RefOut : True +// XorOut : 00000000 +// Check : 340BC6D9 (result of CRC for "123456789") +// +// In other words, this is the same as CRC-32, except that XorOut is 0 instead +// of FFFFFFFF. +// +// N.B. We permit flexibility of the "Init" value. Some consumers of this need +// it to be zero. +class JamCRC { +public: + JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {} + + // Update the CRC calculation with Data. + void update(ArrayRef Data); + + uint32_t getCRC() const { return CRC; } + +private: + uint32_t CRC; +}; + } // end namespace llvm #endif diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index 3cc2c3c0121b..63784463e171 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -2000,6 +2000,9 @@ void ResetAllOptionOccurrences(); /// where no options are supported. void ResetCommandLineParser(); +/// Parses `Arg` into the option handler `Handler`. +bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i); + } // end namespace cl } // end namespace llvm diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index 3f4f465f3960..cb7e57d4cd21 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -7,7 +7,8 @@ //===----------------------------------------------------------------------===// // // This file defines several macros, based on the current compiler. This allows -// use of compiler-specific features in a way that remains portable. +// use of compiler-specific features in a way that remains portable. This header +// can be included from either C or C++. // //===----------------------------------------------------------------------===// @@ -16,7 +17,9 @@ #include "llvm/Config/llvm-config.h" +#ifdef __cplusplus #include +#endif #include #if defined(_MSC_VER) @@ -35,14 +38,20 @@ # define __has_attribute(x) 0 #endif -#ifndef __has_cpp_attribute -# define __has_cpp_attribute(x) 0 -#endif - #ifndef __has_builtin # define __has_builtin(x) 0 #endif +// Only use __has_cpp_attribute in C++ mode. GCC defines __has_cpp_attribute in +// C mode, but the :: in __has_cpp_attribute(scoped::attribute) is invalid. +#ifndef LLVM_HAS_CPP_ATTRIBUTE +#if defined(__cplusplus) && defined(__has_cpp_attribute) +# define LLVM_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define LLVM_HAS_CPP_ATTRIBUTE(x) 0 +#endif +#endif + /// \macro LLVM_GNUC_PREREQ /// Extend the default __GNUC_PREREQ even if glibc's features.h isn't /// available. @@ -62,13 +71,21 @@ /// \macro LLVM_MSC_PREREQ /// Is the compiler MSVC of at least the specified version? /// The common \param version values to check for are: -/// * 1900: Microsoft Visual Studio 2015 / 14.0 +/// * 1910: VS2017, version 15.1 & 15.2 +/// * 1911: VS2017, version 15.3 & 15.4 +/// * 1912: VS2017, version 15.5 +/// * 1913: VS2017, version 15.6 +/// * 1914: VS2017, version 15.7 +/// * 1915: VS2017, version 15.8 +/// * 1916: VS2017, version 15.9 +/// * 1920: VS2019, version 16.0 +/// * 1921: VS2019, version 16.1 #ifdef _MSC_VER #define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version)) -// We require at least MSVC 2015. -#if !LLVM_MSC_PREREQ(1900) -#error LLVM requires at least MSVC 2015. +// We require at least MSVC 2017. +#if !LLVM_MSC_PREREQ(1910) +#error LLVM requires at least MSVC 2017. #endif #else @@ -120,14 +137,18 @@ #endif /// LLVM_NODISCARD - Warn if a type or return value is discarded. -#if __cplusplus > 201402L && __has_cpp_attribute(nodiscard) + +// Use the 'nodiscard' attribute in C++17 or newer mode. +#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(nodiscard) #define LLVM_NODISCARD [[nodiscard]] -#elif !__cplusplus -// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious -// error when __has_cpp_attribute is given a scoped attribute in C mode. -#define LLVM_NODISCARD -#elif __has_cpp_attribute(clang::warn_unused_result) +#elif LLVM_HAS_CPP_ATTRIBUTE(clang::warn_unused_result) #define LLVM_NODISCARD [[clang::warn_unused_result]] +// Clang in C++14 mode claims that it has the 'nodiscard' attribute, but also +// warns in the pedantic mode that 'nodiscard' is a C++17 extension (PR33518). +// Use the 'nodiscard' attribute in C++14 mode only with GCC. +// TODO: remove this workaround when PR33518 is resolved. +#elif defined(__GNUC__) && LLVM_HAS_CPP_ATTRIBUTE(nodiscard) +#define LLVM_NODISCARD [[nodiscard]] #else #define LLVM_NODISCARD #endif @@ -139,7 +160,7 @@ // The clang-tidy check bugprone-use-after-move recognizes this attribute as a // marker that a moved-from object has left the indeterminate state and can be // reused. -#if __has_cpp_attribute(clang::reinitializes) +#if LLVM_HAS_CPP_ATTRIBUTE(clang::reinitializes) #define LLVM_ATTRIBUTE_REINITIALIZES [[clang::reinitializes]] #else #define LLVM_ATTRIBUTE_REINITIALIZES @@ -240,15 +261,13 @@ #endif /// LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements. -#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough) +#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(fallthrough) #define LLVM_FALLTHROUGH [[fallthrough]] -#elif __has_cpp_attribute(gnu::fallthrough) +#elif LLVM_HAS_CPP_ATTRIBUTE(gnu::fallthrough) #define LLVM_FALLTHROUGH [[gnu::fallthrough]] -#elif !__cplusplus -// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious -// error when __has_cpp_attribute is given a scoped attribute in C mode. -#define LLVM_FALLTHROUGH -#elif __has_cpp_attribute(clang::fallthrough) +#elif __has_attribute(fallthrough) +#define LLVM_FALLTHROUGH __attribute__((fallthrough)) +#elif LLVM_HAS_CPP_ATTRIBUTE(clang::fallthrough) #define LLVM_FALLTHROUGH [[clang::fallthrough]] #else #define LLVM_FALLTHROUGH @@ -256,7 +275,7 @@ /// LLVM_REQUIRE_CONSTANT_INITIALIZATION - Apply this to globals to ensure that /// they are constant initialized. -#if __has_cpp_attribute(clang::require_constant_initialization) +#if LLVM_HAS_CPP_ATTRIBUTE(clang::require_constant_initialization) #define LLVM_REQUIRE_CONSTANT_INITIALIZATION \ [[clang::require_constant_initialization]] #else @@ -338,14 +357,6 @@ # define LLVM_ASSUME_ALIGNED(p, a) (p) #endif -/// \macro LLVM_ALIGNAS -/// Used to specify a minimum alignment for a structure or variable. -#if __GNUC__ && !__has_feature(cxx_alignas) && !LLVM_GNUC_PREREQ(4, 8, 1) -# define LLVM_ALIGNAS(x) __attribute__((aligned(x))) -#else -# define LLVM_ALIGNAS(x) alignas(x) -#endif - /// \macro LLVM_PACKED /// Used to specify a packed structure. /// LLVM_PACKED( @@ -376,8 +387,8 @@ /// \macro LLVM_PTR_SIZE /// A constant integer equivalent to the value of sizeof(void*). -/// Generally used in combination with LLVM_ALIGNAS or when doing computation in -/// the preprocessor. +/// Generally used in combination with alignas or when doing computation in the +/// preprocessor. #ifdef __SIZEOF_POINTER__ # define LLVM_PTR_SIZE __SIZEOF_POINTER__ #elif defined(_WIN64) @@ -527,6 +538,7 @@ void AnnotateIgnoreWritesEnd(const char *file, int line); #define LLVM_ENABLE_EXCEPTIONS 1 #endif +#ifdef __cplusplus namespace llvm { /// Allocate a buffer of memory with the given size and alignment. @@ -569,4 +581,5 @@ inline void deallocate_buffer(void *Ptr, size_t Size, size_t Alignment) { } // End namespace llvm +#endif // __cplusplus #endif diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h index 6b08a2a2a445..f590a1e104fb 100644 --- a/include/llvm/Support/DataExtractor.h +++ b/include/llvm/Support/DataExtractor.h @@ -11,6 +11,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/Error.h" namespace llvm { @@ -42,6 +43,38 @@ class DataExtractor { uint8_t IsLittleEndian; uint8_t AddressSize; public: + /// A class representing a position in a DataExtractor, as well as any error + /// encountered during extraction. It enables one to extract a sequence of + /// values without error-checking and then checking for errors in bulk at the + /// end. The class holds an Error object, so failing to check the result of + /// the parse will result in a runtime error. The error flag is sticky and + /// will cause all subsequent extraction functions to fail without even + /// attempting to parse and without updating the Cursor offset. After clearing + /// the error flag, one can again use the Cursor object for parsing. + class Cursor { + uint64_t Offset; + Error Err; + + friend class DataExtractor; + + public: + /// Construct a cursor for extraction from the given offset. + explicit Cursor(uint64_t Offset) : Offset(Offset), Err(Error::success()) {} + + /// Checks whether the cursor is valid (i.e. no errors were encountered). In + /// case of errors, this does not clear the error flag -- one must call + /// takeError() instead. + explicit operator bool() { return !Err; } + + /// Return the current position of this Cursor. In the error state this is + /// the position of the Cursor before the first error was encountered. + uint64_t tell() const { return Offset; } + + /// Return error contained inside this Cursor, if any. Clears the internal + /// Cursor state. + Error takeError() { return std::move(Err); } + }; + /// Construct with a buffer that is owned by the caller. /// /// This constructor allows us to use data that is owned by the @@ -49,6 +82,11 @@ public: /// valid. DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize) : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {} + DataExtractor(ArrayRef Data, bool IsLittleEndian, + uint8_t AddressSize) + : Data(StringRef(reinterpret_cast(Data.data()), + Data.size())), + IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {} /// Get the data pointed to by this extractor. StringRef getData() const { return Data; } @@ -79,17 +117,17 @@ public: /// pointed to by \a offset_ptr is out of bounds, or if the /// offset plus the length of the C string is out of bounds, /// NULL will be returned. - const char *getCStr(uint32_t *offset_ptr) const; + const char *getCStr(uint64_t *offset_ptr) const; - /// Extract a C string from \a *OffsetPtr. + /// Extract a C string from \a *offset_ptr. /// /// Returns a StringRef for the C String from the data at the offset - /// pointed to by \a OffsetPtr. A variable length NULL terminated C - /// string will be extracted and the \a OffsetPtr will be + /// pointed to by \a offset_ptr. A variable length NULL terminated C + /// string will be extracted and the \a offset_ptr will be /// updated with the offset of the byte that follows the NULL /// terminator byte. /// - /// \param[in,out] OffsetPtr + /// \param[in,out] offset_ptr /// A pointer to an offset within the data that will be advanced /// by the appropriate number of bytes if the value is extracted /// correctly. If the offset is out of bounds or there are not @@ -98,10 +136,10 @@ public: /// /// \return /// A StringRef for the C string value in the data. If the offset - /// pointed to by \a OffsetPtr is out of bounds, or if the + /// pointed to by \a offset_ptr is out of bounds, or if the /// offset plus the length of the C string is out of bounds, /// a default-initialized StringRef will be returned. - StringRef getCStrRef(uint32_t *OffsetPtr) const; + StringRef getCStrRef(uint64_t *offset_ptr) const; /// Extract an unsigned integer of size \a byte_size from \a /// *offset_ptr. @@ -124,10 +162,24 @@ public: /// @param[in] byte_size /// The size in byte of the integer to extract. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The unsigned integer value that was extracted, or zero on /// failure. - uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const; + uint64_t getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, + Error *Err = nullptr) const; + + /// Extract an unsigned integer of the given size from the location given by + /// the cursor. In case of an extraction error, or if the cursor is already in + /// an error state, zero is returned. + uint64_t getUnsigned(Cursor &C, uint32_t Size) const { + return getUnsigned(&C.Offset, Size, &C.Err); + } /// Extract an signed integer of size \a byte_size from \a *offset_ptr. /// @@ -152,7 +204,7 @@ public: /// @return /// The sign extended signed integer value that was extracted, /// or zero on failure. - int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const; + int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const; //------------------------------------------------------------------ /// Extract an pointer from \a *offset_ptr. @@ -171,10 +223,15 @@ public: /// /// @return /// The extracted pointer value as a 64 integer. - uint64_t getAddress(uint32_t *offset_ptr) const { + uint64_t getAddress(uint64_t *offset_ptr) const { return getUnsigned(offset_ptr, AddressSize); } + /// Extract a pointer-sized unsigned integer from the location given by the + /// cursor. In case of an extraction error, or if the cursor is already in + /// an error state, zero is returned. + uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); } + /// Extract a uint8_t value from \a *offset_ptr. /// /// Extract a single uint8_t from the binary data at the offset @@ -187,9 +244,20 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted uint8_t value. - uint8_t getU8(uint32_t *offset_ptr) const; + uint8_t getU8(uint64_t *offset_ptr, Error *Err = nullptr) const; + + /// Extract a single uint8_t value from the location given by the cursor. In + /// case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint8_t getU8(Cursor &C) const { return getU8(&C.Offset, &C.Err); } /// Extract \a count uint8_t values from \a *offset_ptr. /// @@ -214,7 +282,27 @@ public: /// @return /// \a dst if all values were properly extracted and copied, /// NULL otherise. - uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const; + uint8_t *getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const; + + /// Extract \a Count uint8_t values from the location given by the cursor and + /// store them into the destination buffer. In case of an extraction error, or + /// if the cursor is already in an error state, a nullptr is returned and the + /// destination buffer is left unchanged. + uint8_t *getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const; + + /// Extract \a Count uint8_t values from the location given by the cursor and + /// store them into the destination vector. The vector is resized to fit the + /// extracted data. In case of an extraction error, or if the cursor is + /// already in an error state, the destination vector is left unchanged and + /// cursor is placed into an error state. + void getU8(Cursor &C, SmallVectorImpl &Dst, uint32_t Count) const { + if (isValidOffsetForDataOfSize(C.Offset, Count)) + Dst.resize(Count); + + // This relies on the fact that getU8 will not attempt to write to the + // buffer if isValidOffsetForDataOfSize(C.Offset, Count) is false. + getU8(C, Dst.data(), Count); + } //------------------------------------------------------------------ /// Extract a uint16_t value from \a *offset_ptr. @@ -229,10 +317,21 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted uint16_t value. //------------------------------------------------------------------ - uint16_t getU16(uint32_t *offset_ptr) const; + uint16_t getU16(uint64_t *offset_ptr, Error *Err = nullptr) const; + + /// Extract a single uint16_t value from the location given by the cursor. In + /// case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint16_t getU16(Cursor &C) const { return getU16(&C.Offset, &C.Err); } /// Extract \a count uint16_t values from \a *offset_ptr. /// @@ -257,7 +356,7 @@ public: /// @return /// \a dst if all values were properly extracted and copied, /// NULL otherise. - uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const; + uint16_t *getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const; /// Extract a 24-bit unsigned value from \a *offset_ptr and return it /// in a uint32_t. @@ -274,7 +373,7 @@ public: /// /// @return /// The extracted 24-bit value represented in a uint32_t. - uint32_t getU24(uint32_t *offset_ptr) const; + uint32_t getU24(uint64_t *offset_ptr) const; /// Extract a uint32_t value from \a *offset_ptr. /// @@ -288,9 +387,20 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted uint32_t value. - uint32_t getU32(uint32_t *offset_ptr) const; + uint32_t getU32(uint64_t *offset_ptr, Error *Err = nullptr) const; + + /// Extract a single uint32_t value from the location given by the cursor. In + /// case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint32_t getU32(Cursor &C) const { return getU32(&C.Offset, &C.Err); } /// Extract \a count uint32_t values from \a *offset_ptr. /// @@ -315,7 +425,7 @@ public: /// @return /// \a dst if all values were properly extracted and copied, /// NULL otherise. - uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const; + uint32_t *getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const; /// Extract a uint64_t value from \a *offset_ptr. /// @@ -329,9 +439,20 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted uint64_t value. - uint64_t getU64(uint32_t *offset_ptr) const; + uint64_t getU64(uint64_t *offset_ptr, Error *Err = nullptr) const; + + /// Extract a single uint64_t value from the location given by the cursor. In + /// case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint64_t getU64(Cursor &C) const { return getU64(&C.Offset, &C.Err); } /// Extract \a count uint64_t values from \a *offset_ptr. /// @@ -356,7 +477,7 @@ public: /// @return /// \a dst if all values were properly extracted and copied, /// NULL otherise. - uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const; + uint64_t *getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const; /// Extract a signed LEB128 value from \a *offset_ptr. /// @@ -374,7 +495,7 @@ public: /// /// @return /// The extracted signed integer value. - int64_t getSLEB128(uint32_t *offset_ptr) const; + int64_t getSLEB128(uint64_t *offset_ptr) const; /// Extract a unsigned LEB128 value from \a *offset_ptr. /// @@ -390,23 +511,44 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted unsigned integer value. - uint64_t getULEB128(uint32_t *offset_ptr) const; + uint64_t getULEB128(uint64_t *offset_ptr, llvm::Error *Err = nullptr) const; + + /// Extract an unsigned ULEB128 value from the location given by the cursor. + /// In case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint64_t getULEB128(Cursor &C) const { return getULEB128(&C.Offset, &C.Err); } + + /// Advance the Cursor position by the given number of bytes. No-op if the + /// cursor is in an error state. + void skip(Cursor &C, uint64_t Length) const; + + /// Return true iff the cursor is at the end of the buffer, regardless of the + /// error state of the cursor. The only way both eof and error states can be + /// true is if one attempts a read while the cursor is at the very end of the + /// data buffer. + bool eof(const Cursor &C) const { return Data.size() == C.Offset; } /// Test the validity of \a offset. /// /// @return /// \b true if \a offset is a valid offset into the data in this /// object, \b false otherwise. - bool isValidOffset(uint32_t offset) const { return Data.size() > offset; } + bool isValidOffset(uint64_t offset) const { return Data.size() > offset; } /// Test the availability of \a length bytes of data from \a offset. /// /// @return /// \b true if \a offset is a valid offset and there are \a /// length bytes available at that offset, \b false otherwise. - bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const { + bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const { return offset + length >= offset && isValidOffset(offset + length - 1); } @@ -417,9 +559,15 @@ public: /// \b true if \a offset is a valid offset and there are enough /// bytes for a pointer available at that offset, \b false /// otherwise. - bool isValidOffsetForAddress(uint32_t offset) const { + bool isValidOffsetForAddress(uint64_t offset) const { return isValidOffsetForDataOfSize(offset, AddressSize); } + +protected: + // Make it possible for subclasses to access these fields without making them + // public. + static uint64_t &getOffset(Cursor &C) { return C.Offset; } + static Error &getError(Cursor &C) { return C.Err; } }; } // namespace llvm diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h index d8be94427d7e..87aecedd3a4b 100644 --- a/include/llvm/Support/Endian.h +++ b/include/llvm/Support/Endian.h @@ -203,9 +203,8 @@ inline void writeAtBitAlignment(void *memory, value_type value, namespace detail { -template +template ::value> struct packed_endian_specific_integral { using value_type = ValueType; static constexpr endianness endian = Endian; @@ -246,8 +245,9 @@ struct packed_endian_specific_integral { } private: - AlignedCharArray::value, - sizeof(value_type)> Value; + struct { + alignas(ALIGN) char buffer[sizeof(value_type)]; + } Value; public: struct ref { diff --git a/include/llvm/Support/Error.h b/include/llvm/Support/Error.h index 299fce7a1368..350877a219bf 100644 --- a/include/llvm/Support/Error.h +++ b/include/llvm/Support/Error.h @@ -328,7 +328,7 @@ inline ErrorSuccess Error::success() { return ErrorSuccess(); } /// Make a Error instance representing failure using the given error info /// type. template Error make_error(ArgTs &&... Args) { - return Error(llvm::make_unique(std::forward(Args)...)); + return Error(std::make_unique(std::forward(Args)...)); } /// Base class for user error types. Users should declare their error types @@ -548,7 +548,7 @@ public: /// Take ownership of the stored error. /// After calling this the Expected is in an indeterminate state that can /// only be safely destructed. No further calls (beside the destructor) should - /// be made on the Expected vaule. + /// be made on the Expected value. Error takeError() { #if LLVM_ENABLE_ABI_BREAKING_CHECKS Unchecked = false; @@ -704,6 +704,12 @@ inline void cantFail(Error Err, const char *Msg = nullptr) { if (Err) { if (!Msg) Msg = "Failure value returned from cantFail wrapped call"; +#ifndef NDEBUG + std::string Str; + raw_string_ostream OS(Str); + OS << Msg << "\n" << Err; + Msg = OS.str().c_str(); +#endif llvm_unreachable(Msg); } } @@ -728,6 +734,13 @@ T cantFail(Expected ValOrErr, const char *Msg = nullptr) { else { if (!Msg) Msg = "Failure value returned from cantFail wrapped call"; +#ifndef NDEBUG + std::string Str; + raw_string_ostream OS(Str); + auto E = ValOrErr.takeError(); + OS << Msg << "\n" << E; + Msg = OS.str().c_str(); +#endif llvm_unreachable(Msg); } } @@ -752,6 +765,13 @@ T& cantFail(Expected ValOrErr, const char *Msg = nullptr) { else { if (!Msg) Msg = "Failure value returned from cantFail wrapped call"; +#ifndef NDEBUG + std::string Str; + raw_string_ostream OS(Str); + auto E = ValOrErr.takeError(); + OS << Msg << "\n" << E; + Msg = OS.str().c_str(); +#endif llvm_unreachable(Msg); } } @@ -982,6 +1002,20 @@ inline void consumeError(Error Err) { handleAllErrors(std::move(Err), [](const ErrorInfoBase &) {}); } +/// Convert an Expected to an Optional without doing anything. This method +/// should be used only where an error can be considered a reasonable and +/// expected return value. +/// +/// Uses of this method are potentially indicative of problems: perhaps the +/// error should be propagated further, or the error-producer should just +/// return an Optional in the first place. +template Optional expectedToOptional(Expected &&E) { + if (E) + return std::move(*E); + consumeError(E.takeError()); + return None; +} + /// Helper for converting an Error to a bool. /// /// This method returns true if Err is in an error state, or false if it is @@ -1170,6 +1204,10 @@ inline Error createStringError(std::error_code EC, char const *Fmt, Error createStringError(std::error_code EC, char const *Msg); +inline Error createStringError(std::error_code EC, const Twine &S) { + return createStringError(EC, S.str().c_str()); +} + template inline Error createStringError(std::errc EC, char const *Fmt, const Ts &... Vals) { diff --git a/include/llvm/Support/FileCheck.h b/include/llvm/Support/FileCheck.h index 0cd25a71a3b3..2547449246a8 100644 --- a/include/llvm/Support/FileCheck.h +++ b/include/llvm/Support/FileCheck.h @@ -13,12 +13,12 @@ #ifndef LLVM_SUPPORT_FILECHECK_H #define LLVM_SUPPORT_FILECHECK_H -#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" +#include #include -#include namespace llvm { @@ -30,6 +30,7 @@ struct FileCheckRequest { std::vector GlobalDefines; bool AllowEmptyInput = false; bool MatchFullLines = false; + bool IgnoreCase = false; bool EnableVarScope = false; bool AllowDeprecatedDagOverlap = false; bool Verbose = false; @@ -37,217 +38,7 @@ struct FileCheckRequest { }; //===----------------------------------------------------------------------===// -// Numeric substitution handling code. -//===----------------------------------------------------------------------===// - -/// Base class representing the AST of a given expression. -class FileCheckExpressionAST { -public: - virtual ~FileCheckExpressionAST() = default; - - /// Evaluates and \returns the value of the expression represented by this - /// AST or an error if evaluation fails. - virtual Expected eval() const = 0; -}; - -/// Class representing an unsigned literal in the AST of an expression. -class FileCheckExpressionLiteral : public FileCheckExpressionAST { -private: - /// Actual value of the literal. - uint64_t Value; - -public: - /// Constructs a literal with the specified value. - FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} - - /// \returns the literal's value. - Expected eval() const { return Value; } -}; - -/// Class to represent an undefined variable error, which quotes that -/// variable's name when printed. -class FileCheckUndefVarError : public ErrorInfo { -private: - StringRef VarName; - -public: - static char ID; - - FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} - - StringRef getVarName() const { return VarName; } - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print name of variable associated with this error. - void log(raw_ostream &OS) const override { - OS << "\""; - OS.write_escaped(VarName) << "\""; - } -}; - -/// Class representing a numeric variable and its associated current value. -class FileCheckNumericVariable { -private: - /// Name of the numeric variable. - StringRef Name; - - /// Value of numeric variable, if defined, or None otherwise. - Optional Value; - - /// Line number where this variable is defined, or None if defined before - /// input is parsed. Used to determine whether a variable is defined on the - /// same line as a given use. - Optional DefLineNumber; - -public: - /// Constructor for a variable \p Name defined at line \p DefLineNumber or - /// defined before input is parsed if DefLineNumber is None. - FileCheckNumericVariable(StringRef Name, - Optional DefLineNumber = None) - : Name(Name), DefLineNumber(DefLineNumber) {} - - /// \returns name of this numeric variable. - StringRef getName() const { return Name; } - - /// \returns this variable's value. - Optional getValue() const { return Value; } - - /// Sets value of this numeric variable, if undefined. Triggers an assertion - /// failure if the variable is actually defined. - void setValue(uint64_t Value); - - /// Clears value of this numeric variable, regardless of whether it is - /// currently defined or not. - void clearValue(); - - /// \returns the line number where this variable is defined, if any, or None - /// if defined before input is parsed. - Optional getDefLineNumber() { return DefLineNumber; } -}; - -/// Class representing the use of a numeric variable in the AST of an -/// expression. -class FileCheckNumericVariableUse : public FileCheckExpressionAST { -private: - /// Name of the numeric variable. - StringRef Name; - - /// Pointer to the class instance for the variable this use is about. - FileCheckNumericVariable *NumericVariable; - -public: - FileCheckNumericVariableUse(StringRef Name, - FileCheckNumericVariable *NumericVariable) - : Name(Name), NumericVariable(NumericVariable) {} - - /// \returns the value of the variable referenced by this instance. - Expected eval() const; -}; - -/// Type of functions evaluating a given binary operation. -using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); - -/// Class representing a single binary operation in the AST of an expression. -class FileCheckASTBinop : public FileCheckExpressionAST { -private: - /// Left operand. - std::unique_ptr LeftOperand; - - /// Right operand. - std::unique_ptr RightOperand; - - /// Pointer to function that can evaluate this binary operation. - binop_eval_t EvalBinop; - -public: - FileCheckASTBinop(binop_eval_t EvalBinop, - std::unique_ptr LeftOp, - std::unique_ptr RightOp) - : EvalBinop(EvalBinop) { - LeftOperand = std::move(LeftOp); - RightOperand = std::move(RightOp); - } - - /// Evaluates the value of the binary operation represented by this AST, - /// using EvalBinop on the result of recursively evaluating the operands. - /// \returns the expression value or an error if an undefined numeric - /// variable is used in one of the operands. - Expected eval() const; -}; - -class FileCheckPatternContext; - -/// Class representing a substitution to perform in the RegExStr string. -class FileCheckSubstitution { -protected: - /// Pointer to a class instance holding, among other things, the table with - /// the values of live string variables at the start of any given CHECK line. - /// Used for substituting string variables with the text they were defined - /// as. Expressions are linked to the numeric variables they use at - /// parse time and directly access the value of the numeric variable to - /// evaluate their value. - FileCheckPatternContext *Context; - - /// The string that needs to be substituted for something else. For a - /// string variable this is its name, otherwise this is the whole expression. - StringRef FromStr; - - // Index in RegExStr of where to do the substitution. - size_t InsertIdx; - -public: - FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, - size_t InsertIdx) - : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} - - virtual ~FileCheckSubstitution() = default; - - /// \returns the string to be substituted for something else. - StringRef getFromString() const { return FromStr; } - - /// \returns the index where the substitution is to be performed in RegExStr. - size_t getIndex() const { return InsertIdx; } - - /// \returns a string containing the result of the substitution represented - /// by this class instance or an error if substitution failed. - virtual Expected getResult() const = 0; -}; - -class FileCheckStringSubstitution : public FileCheckSubstitution { -public: - FileCheckStringSubstitution(FileCheckPatternContext *Context, - StringRef VarName, size_t InsertIdx) - : FileCheckSubstitution(Context, VarName, InsertIdx) {} - - /// \returns the text that the string variable in this substitution matched - /// when defined, or an error if the variable is undefined. - Expected getResult() const override; -}; - -class FileCheckNumericSubstitution : public FileCheckSubstitution { -private: - /// Pointer to the class representing the expression whose value is to be - /// substituted. - std::unique_ptr ExpressionAST; - -public: - FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, - std::unique_ptr ExprAST, - size_t InsertIdx) - : FileCheckSubstitution(Context, Expr, InsertIdx) { - ExpressionAST = std::move(ExprAST); - } - - /// \returns a string containing the result of evaluating the expression in - /// this substitution, or an error if evaluation failed. - Expected getResult() const override; -}; - -//===----------------------------------------------------------------------===// -// Pattern handling code. +// Summary of a FileCheck diagnostic. //===----------------------------------------------------------------------===// namespace Check { @@ -291,325 +82,6 @@ public: }; } // namespace Check -struct FileCheckDiag; - -/// Class holding the FileCheckPattern global state, shared by all patterns: -/// tables holding values of variables and whether they are defined or not at -/// any given time in the matching process. -class FileCheckPatternContext { - friend class FileCheckPattern; - -private: - /// When matching a given pattern, this holds the value of all the string - /// variables defined in previous patterns. In a pattern, only the last - /// definition for a given variable is recorded in this table. - /// Back-references are used for uses after any the other definition. - StringMap GlobalVariableTable; - - /// Map of all string variables defined so far. Used at parse time to detect - /// a name conflict between a numeric variable and a string variable when - /// the former is defined on a later line than the latter. - StringMap DefinedVariableTable; - - /// When matching a given pattern, this holds the pointers to the classes - /// representing the numeric variables defined in previous patterns. When - /// matching a pattern all definitions for that pattern are recorded in the - /// NumericVariableDefs table in the FileCheckPattern instance of that - /// pattern. - StringMap GlobalNumericVariableTable; - - /// Pointer to the class instance representing the @LINE pseudo variable for - /// easily updating its value. - FileCheckNumericVariable *LineVariable = nullptr; - - /// Vector holding pointers to all parsed numeric variables. Used to - /// automatically free them once they are guaranteed to no longer be used. - std::vector> NumericVariables; - - /// Vector holding pointers to all substitutions. Used to automatically free - /// them once they are guaranteed to no longer be used. - std::vector> Substitutions; - -public: - /// \returns the value of string variable \p VarName or an error if no such - /// variable has been defined. - Expected getPatternVarValue(StringRef VarName); - - /// Defines string and numeric variables from definitions given on the - /// command line, passed as a vector of [#]VAR=VAL strings in - /// \p CmdlineDefines. \returns an error list containing diagnostics against - /// \p SM for all definition parsing failures, if any, or Success otherwise. - Error defineCmdlineVariables(std::vector &CmdlineDefines, - SourceMgr &SM); - - /// Create @LINE pseudo variable. Value is set when pattern are being - /// matched. - void createLineVariable(); - - /// Undefines local variables (variables whose name does not start with a '$' - /// sign), i.e. removes them from GlobalVariableTable and from - /// GlobalNumericVariableTable and also clears the value of numeric - /// variables. - void clearLocalVars(); - -private: - /// Makes a new numeric variable and registers it for destruction when the - /// context is destroyed. - template - FileCheckNumericVariable *makeNumericVariable(Types... args); - - /// Makes a new string substitution and registers it for destruction when the - /// context is destroyed. - FileCheckSubstitution *makeStringSubstitution(StringRef VarName, - size_t InsertIdx); - - /// Makes a new numeric substitution and registers it for destruction when - /// the context is destroyed. - FileCheckSubstitution * - makeNumericSubstitution(StringRef ExpressionStr, - std::unique_ptr ExpressionAST, - size_t InsertIdx); -}; - -/// Class to represent an error holding a diagnostic with location information -/// used when printing it. -class FileCheckErrorDiagnostic : public ErrorInfo { -private: - SMDiagnostic Diagnostic; - -public: - static char ID; - - FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print diagnostic associated with this error when printing the error. - void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } - - static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { - return make_error( - SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); - } - - static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { - return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); - } -}; - -class FileCheckNotFoundError : public ErrorInfo { -public: - static char ID; - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print diagnostic associated with this error when printing the error. - void log(raw_ostream &OS) const override { - OS << "String not found in input"; - } -}; - -class FileCheckPattern { - SMLoc PatternLoc; - - /// A fixed string to match as the pattern or empty if this pattern requires - /// a regex match. - StringRef FixedStr; - - /// A regex string to match as the pattern or empty if this pattern requires - /// a fixed string to match. - std::string RegExStr; - - /// Entries in this vector represent a substitution of a string variable or - /// an expression in the RegExStr regex at match time. For example, in the - /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", - /// RegExStr will contain "foobaz" and we'll get two entries in this vector - /// that tells us to insert the value of string variable "bar" at offset 3 - /// and the value of expression "N+1" at offset 6. - std::vector Substitutions; - - /// Maps names of string variables defined in a pattern to the number of - /// their parenthesis group in RegExStr capturing their last definition. - /// - /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", - /// RegExStr will be "foo(.*)baz(\1(.*))" where is - /// the value captured for QUUX on the earlier line where it was defined, and - /// VariableDefs will map "bar" to the third parenthesis group which captures - /// the second definition of "bar". - /// - /// Note: uses std::map rather than StringMap to be able to get the key when - /// iterating over values. - std::map VariableDefs; - - /// Structure representing the definition of a numeric variable in a pattern. - /// It holds the pointer to the class representing the numeric variable whose - /// value is being defined and the number of the parenthesis group in - /// RegExStr to capture that value. - struct FileCheckNumericVariableMatch { - /// Pointer to class representing the numeric variable whose value is being - /// defined. - FileCheckNumericVariable *DefinedNumericVariable; - - /// Number of the parenthesis group in RegExStr that captures the value of - /// this numeric variable definition. - unsigned CaptureParenGroup; - }; - - /// Holds the number of the parenthesis group in RegExStr and pointer to the - /// corresponding FileCheckNumericVariable class instance of all numeric - /// variable definitions. Used to set the matched value of all those - /// variables. - StringMap NumericVariableDefs; - - /// Pointer to a class instance holding the global state shared by all - /// patterns: - /// - separate tables with the values of live string and numeric variables - /// respectively at the start of any given CHECK line; - /// - table holding whether a string variable has been defined at any given - /// point during the parsing phase. - FileCheckPatternContext *Context; - - Check::FileCheckType CheckTy; - - /// Line number for this CHECK pattern or None if it is an implicit pattern. - /// Used to determine whether a variable definition is made on an earlier - /// line to the one with this CHECK. - Optional LineNumber; - -public: - FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, - Optional Line = None) - : Context(Context), CheckTy(Ty), LineNumber(Line) {} - - /// \returns the location in source code. - SMLoc getLoc() const { return PatternLoc; } - - /// \returns the pointer to the global state for all patterns in this - /// FileCheck instance. - FileCheckPatternContext *getContext() const { return Context; } - - /// \returns whether \p C is a valid first character for a variable name. - static bool isValidVarNameStart(char C); - - /// Parsing information about a variable. - struct VariableProperties { - StringRef Name; - bool IsPseudo; - }; - - /// Parses the string at the start of \p Str for a variable name. \returns - /// a VariableProperties structure holding the variable name and whether it - /// is the name of a pseudo variable, or an error holding a diagnostic - /// against \p SM if parsing fail. If parsing was successful, also strips - /// \p Str from the variable name. - static Expected parseVariable(StringRef &Str, - const SourceMgr &SM); - /// Parses \p Expr for the name of a numeric variable to be defined at line - /// \p LineNumber or before input is parsed if \p LineNumber is None. - /// \returns a pointer to the class instance representing that variable, - /// creating it if needed, or an error holding a diagnostic against \p SM - /// should defining such a variable be invalid. - static Expected parseNumericVariableDefinition( - StringRef &Expr, FileCheckPatternContext *Context, - Optional LineNumber, const SourceMgr &SM); - /// Parses \p Expr for a numeric substitution block. Parameter - /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE - /// expression. \returns a pointer to the class instance representing the AST - /// of the expression whose value must be substituted, or an error holding a - /// diagnostic against \p SM if parsing fails. If substitution was - /// successful, sets \p DefinedNumericVariable to point to the class - /// representing the numeric variable being defined in this numeric - /// substitution block, or None if this block does not define any variable. - Expected> - parseNumericSubstitutionBlock( - StringRef Expr, - Optional &DefinedNumericVariable, - bool IsLegacyLineExpr, const SourceMgr &SM) const; - /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern - /// instance accordingly. - /// - /// \p Prefix provides which prefix is being matched, \p Req describes the - /// global options that influence the parsing such as whitespace - /// canonicalization, \p SM provides the SourceMgr used for error reports. - /// \returns true in case of an error, false otherwise. - bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, - const FileCheckRequest &Req); - /// Matches the pattern string against the input buffer \p Buffer - /// - /// \returns the position that is matched or an error indicating why matching - /// failed. If there is a match, updates \p MatchLen with the size of the - /// matched string. - /// - /// The GlobalVariableTable StringMap in the FileCheckPatternContext class - /// instance provides the current values of FileCheck string variables and - /// is updated if this match defines new values. Likewise, the - /// GlobalNumericVariableTable StringMap in the same class provides the - /// current values of FileCheck numeric variables and is updated if this - /// match defines new numeric values. - Expected match(StringRef Buffer, size_t &MatchLen, - const SourceMgr &SM) const; - /// Prints the value of successful substitutions or the name of the undefined - /// string or numeric variables preventing a successful substitution. - void printSubstitutions(const SourceMgr &SM, StringRef Buffer, - SMRange MatchRange = None) const; - void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, - std::vector *Diags) const; - - bool hasVariable() const { - return !(Substitutions.empty() && VariableDefs.empty()); - } - - Check::FileCheckType getCheckTy() const { return CheckTy; } - - int getCount() const { return CheckTy.getCount(); } - -private: - bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); - void AddBackrefToRegEx(unsigned BackrefNum); - /// Computes an arbitrary estimate for the quality of matching this pattern - /// at the start of \p Buffer; a distance of zero should correspond to a - /// perfect match. - unsigned computeMatchDistance(StringRef Buffer) const; - /// Finds the closing sequence of a regex variable usage or definition. - /// - /// \p Str has to point in the beginning of the definition (right after the - /// opening sequence). \p SM holds the SourceMgr used for error repporting. - /// \returns the offset of the closing sequence within Str, or npos if it - /// was not found. - size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); - - /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use. - /// \returns the pointer to the class instance representing that variable if - /// successful, or an error holding a diagnostic against \p SM otherwise. - Expected> - parseNumericVariableUse(StringRef Name, bool IsPseudo, - const SourceMgr &SM) const; - enum class AllowedOperand { LineVar, Literal, Any }; - /// Parses \p Expr for use of a numeric operand. Accepts both literal values - /// and numeric variables, depending on the value of \p AO. \returns the - /// class representing that operand in the AST of the expression or an error - /// holding a diagnostic against \p SM otherwise. - Expected> - parseNumericOperand(StringRef &Expr, AllowedOperand AO, - const SourceMgr &SM) const; - /// Parses \p Expr for a binary operation. The left operand of this binary - /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether - /// we are parsing a legacy @LINE expression. \returns the class representing - /// the binary operation in the AST of the expression, or an error holding a - /// diagnostic against \p SM otherwise. - Expected> - parseBinop(StringRef &Expr, std::unique_ptr LeftOp, - bool IsLegacyLineExpr, const SourceMgr &SM) const; -}; - -//===----------------------------------------------------------------------===// -/// Summary of a FileCheck diagnostic. -//===----------------------------------------------------------------------===// - struct FileCheckDiag { /// What is the FileCheck directive for this diagnostic? Check::FileCheckType CheckTy; @@ -659,61 +131,20 @@ struct FileCheckDiag { SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange); }; -//===----------------------------------------------------------------------===// -// Check Strings. -//===----------------------------------------------------------------------===// - -/// A check that we found in the input file. -struct FileCheckString { - /// The pattern to match. - FileCheckPattern Pat; - - /// Which prefix name this check matched. - StringRef Prefix; - - /// The location in the match file that the check string was specified. - SMLoc Loc; - - /// All of the strings that are disallowed from occurring between this match - /// string and the previous one (or start of file). - std::vector DagNotStrings; - - FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) - : Pat(P), Prefix(S), Loc(L) {} - - /// Matches check string and its "not strings" and/or "dag strings". - size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, - size_t &MatchLen, FileCheckRequest &Req, - std::vector *Diags) const; - - /// Verifies that there is a single line in the given \p Buffer. Errors are - /// reported against \p SM. - bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; - /// Verifies that there is no newline in the given \p Buffer. Errors are - /// reported against \p SM. - bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; - /// Verifies that none of the strings in \p NotStrings are found in the given - /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in - /// \p Diags according to the verbosity level set in \p Req. - bool CheckNot(const SourceMgr &SM, StringRef Buffer, - const std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const; - /// Matches "dag strings" and their mixed "not strings". - size_t CheckDag(const SourceMgr &SM, StringRef Buffer, - std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const; -}; +class FileCheckPatternContext; +struct FileCheckString; /// FileCheck class takes the request and exposes various methods that /// use information from the request. class FileCheck { FileCheckRequest Req; - FileCheckPatternContext PatternContext; + std::unique_ptr PatternContext; + // C++17 TODO: make this a plain std::vector. + std::unique_ptr> CheckStrings; public: - FileCheck(FileCheckRequest Req) : Req(Req) {} + explicit FileCheck(FileCheckRequest Req); + ~FileCheck(); // Combines the check prefixes into a single regex so that we can efficiently // scan for any of the set. @@ -723,13 +154,11 @@ public: Regex buildCheckPrefixRegex(); /// Reads the check file from \p Buffer and records the expected strings it - /// contains in the \p CheckStrings vector. Errors are reported against - /// \p SM. + /// contains. Errors are reported against \p SM. /// /// Only expected strings whose prefix is one of those listed in \p PrefixRE /// are recorded. \returns true in case of an error, false otherwise. - bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, - std::vector &CheckStrings); + bool readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE); bool ValidateCheckPrefixes(); @@ -739,13 +168,14 @@ public: SmallVectorImpl &OutputBuffer); /// Checks the input to FileCheck provided in the \p Buffer against the - /// \p CheckStrings read from the check file and record diagnostics emitted + /// expected strings read from the check file and record diagnostics emitted /// in \p Diags. Errors are recorded against \p SM. /// /// \returns false if the input fails to satisfy the checks. - bool CheckInput(SourceMgr &SM, StringRef Buffer, - ArrayRef CheckStrings, + bool checkInput(SourceMgr &SM, StringRef Buffer, std::vector *Diags = nullptr); }; + } // namespace llvm + #endif diff --git a/include/llvm/Support/FileCollector.h b/include/llvm/Support/FileCollector.h new file mode 100644 index 000000000000..19429bd3e9b4 --- /dev/null +++ b/include/llvm/Support/FileCollector.h @@ -0,0 +1,79 @@ +//===-- FileCollector.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_FILE_COLLECTOR_H +#define LLVM_SUPPORT_FILE_COLLECTOR_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/VirtualFileSystem.h" + +#include + +namespace llvm { + +/// Collects files into a directory and generates a mapping that can be used by +/// the VFS. +class FileCollector { +public: + FileCollector(std::string Root, std::string OverlayRoot); + + void addFile(const Twine &file); + + /// Write the yaml mapping (for the VFS) to the given file. + std::error_code writeMapping(StringRef mapping_file); + + /// Copy the files into the root directory. + /// + /// When StopOnError is true (the default) we abort as soon as one file + /// cannot be copied. This is relatively common, for example when a file was + /// removed after it was added to the mapping. + std::error_code copyFiles(bool StopOnError = true); + + /// Create a VFS that collects all the paths that might be looked at by the + /// file system accesses. + static IntrusiveRefCntPtr + createCollectorVFS(IntrusiveRefCntPtr BaseFS, + std::shared_ptr Collector); + +private: + void addFileImpl(StringRef SrcPath); + + bool markAsSeen(StringRef Path) { return Seen.insert(Path).second; } + + bool getRealPath(StringRef SrcPath, SmallVectorImpl &Result); + + void addFileToMapping(StringRef VirtualPath, StringRef RealPath) { + VFSWriter.addFileMapping(VirtualPath, RealPath); + } + +protected: + /// Synchronizes adding files. + std::mutex Mutex; + + /// The root directory where files are copied. + std::string Root; + + /// The root directory where the VFS overlay lives. + std::string OverlayRoot; + + /// Tracks already seen files so they can be skipped. + StringSet<> Seen; + + /// The yaml mapping writer. + vfs::YAMLVFSWriter VFSWriter; + + /// Caches RealPath calls when resolving symlinks. + StringMap SymlinkMap; +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_FILE_COLLECTOR_H diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 1bec27bddad9..a29a9d787947 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -991,29 +991,27 @@ file_t getStdoutHandle(); /// Returns kInvalidFile when the stream is closed. file_t getStderrHandle(); -/// Reads \p Buf.size() bytes from \p FileHandle into \p Buf. The number of -/// bytes actually read is returned in \p BytesRead. On Unix, this is equivalent -/// to `*BytesRead = ::read(FD, Buf.data(), Buf.size())`, with error reporting. -/// BytesRead will contain zero when reaching EOF. +/// Reads \p Buf.size() bytes from \p FileHandle into \p Buf. Returns the number +/// of bytes actually read. On Unix, this is equivalent to `return ::read(FD, +/// Buf.data(), Buf.size())`, with error reporting. Returns 0 when reaching EOF. /// /// @param FileHandle File to read from. /// @param Buf Buffer to read into. -/// @param BytesRead Output parameter of the number of bytes read. -/// @returns The error, if any, or errc::success. -std::error_code readNativeFile(file_t FileHandle, MutableArrayRef Buf, - size_t *BytesRead); +/// @returns The number of bytes read, or error. +Expected readNativeFile(file_t FileHandle, MutableArrayRef Buf); /// Reads \p Buf.size() bytes from \p FileHandle at offset \p Offset into \p /// Buf. If 'pread' is available, this will use that, otherwise it will use -/// 'lseek'. Bytes requested beyond the end of the file will be zero -/// initialized. +/// 'lseek'. Returns the number of bytes actually read. Returns 0 when reaching +/// EOF. /// /// @param FileHandle File to read from. /// @param Buf Buffer to read into. /// @param Offset Offset into the file at which the read should occur. -/// @returns The error, if any, or errc::success. -std::error_code readNativeFileSlice(file_t FileHandle, - MutableArrayRef Buf, size_t Offset); +/// @returns The number of bytes read, or error. +Expected readNativeFileSlice(file_t FileHandle, + MutableArrayRef Buf, + uint64_t Offset); /// @brief Opens the file with the given name in a write-only or read-write /// mode, returning its open file descriptor. If the file does not exist, it @@ -1217,9 +1215,9 @@ class directory_entry { // that whole structure, callers end up paying for a stat(). // std::filesystem::directory_entry may be a better model. std::string Path; - file_type Type; // Most platforms can provide this. - bool FollowSymlinks; // Affects the behavior of status(). - basic_file_status Status; // If available. + file_type Type = file_type::type_unknown; // Most platforms can provide this. + bool FollowSymlinks = true; // Affects the behavior of status(). + basic_file_status Status; // If available. public: explicit directory_entry(const Twine &Path, bool FollowSymlinks = true, diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h index 16b2206924c3..04efdced32a4 100644 --- a/include/llvm/Support/FileUtilities.h +++ b/include/llvm/Support/FileUtilities.h @@ -14,6 +14,9 @@ #ifndef LLVM_SUPPORT_FILEUTILITIES_H #define LLVM_SUPPORT_FILEUTILITIES_H +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -72,6 +75,41 @@ namespace llvm { /// will not be removed when the object is destroyed. void releaseFile() { DeleteIt = false; } }; + + enum class atomic_write_error { + failed_to_create_uniq_file = 0, + output_stream_error, + failed_to_rename_temp_file + }; + + class AtomicFileWriteError : public llvm::ErrorInfo { + public: + AtomicFileWriteError(atomic_write_error Error) : Error(Error) {} + + void log(raw_ostream &OS) const override; + + const atomic_write_error Error; + static char ID; + + private: + // Users are not expected to use error_code. + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + }; + + // atomic_write_error + whatever the Writer can return + + /// Creates a unique file with name according to the given \p TempPathModel, + /// writes content of \p Buffer to the file and renames it to \p FinalPath. + /// + /// \returns \c AtomicFileWriteError in case of error. + llvm::Error writeFileAtomically(StringRef TempPathModel, StringRef FinalPath, + StringRef Buffer); + + llvm::Error + writeFileAtomically(StringRef TempPathModel, StringRef FinalPath, + std::function Writer); } // End llvm namespace #endif diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h index 77dcbaebf1a3..9dd7b401b46a 100644 --- a/include/llvm/Support/Format.h +++ b/include/llvm/Support/Format.h @@ -29,6 +29,7 @@ #include #include #include +#include namespace llvm { @@ -91,7 +92,7 @@ class format_object final : public format_object_base { template int snprint_tuple(char *Buffer, unsigned BufferSize, - index_sequence) const { + std::index_sequence) const { #ifdef _MSC_VER return _snprintf(Buffer, BufferSize, Fmt, std::get(Vals)...); #else @@ -106,7 +107,7 @@ public: } int snprint(char *Buffer, unsigned BufferSize) const override { - return snprint_tuple(Buffer, BufferSize, index_sequence_for()); + return snprint_tuple(Buffer, BufferSize, std::index_sequence_for()); } }; diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h index 99620802505b..9169379f746d 100644 --- a/include/llvm/Support/GenericDomTree.h +++ b/include/llvm/Support/GenericDomTree.h @@ -242,7 +242,7 @@ protected: using DomTreeNodeMapType = DenseMap>>; DomTreeNodeMapType DomTreeNodes; - DomTreeNodeBase *RootNode; + DomTreeNodeBase *RootNode = nullptr; ParentPtr Parent = nullptr; mutable bool DFSInfoValid = false; @@ -571,7 +571,7 @@ protected: assert(IDomNode && "Not immediate dominator specified for block!"); DFSInfoValid = false; return (DomTreeNodes[BB] = IDomNode->addChild( - llvm::make_unique>(BB, IDomNode))).get(); + std::make_unique>(BB, IDomNode))).get(); } /// Add a new node to the forward dominator tree and make it a new root. @@ -585,7 +585,7 @@ protected: "Cannot change root of post-dominator tree"); DFSInfoValid = false; DomTreeNodeBase *NewNode = (DomTreeNodes[BB] = - llvm::make_unique>(BB, nullptr)).get(); + std::make_unique>(BB, nullptr)).get(); if (Roots.empty()) { addRoot(BB); } else { diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h index ccceba881718..7c0278e8770e 100644 --- a/include/llvm/Support/GenericDomTreeConstruction.h +++ b/include/llvm/Support/GenericDomTreeConstruction.h @@ -186,7 +186,7 @@ struct SemiNCAInfo { // Add a new tree node for this NodeT, and link it as a child of // IDomNode return (DT.DomTreeNodes[BB] = IDomNode->addChild( - llvm::make_unique>(BB, IDomNode))) + std::make_unique>(BB, IDomNode))) .get(); } @@ -586,7 +586,7 @@ struct SemiNCAInfo { NodePtr Root = IsPostDom ? nullptr : DT.Roots[0]; DT.RootNode = (DT.DomTreeNodes[Root] = - llvm::make_unique>(Root, nullptr)) + std::make_unique>(Root, nullptr)) .get(); SNCA.attachNewSubtree(DT, DT.RootNode); } @@ -611,7 +611,7 @@ struct SemiNCAInfo { // Add a new tree node for this BasicBlock, and link it as a child of // IDomNode. DT.DomTreeNodes[W] = IDomNode->addChild( - llvm::make_unique>(W, IDomNode)); + std::make_unique>(W, IDomNode)); } } @@ -663,7 +663,7 @@ struct SemiNCAInfo { TreeNodePtr VirtualRoot = DT.getNode(nullptr); FromTN = (DT.DomTreeNodes[From] = VirtualRoot->addChild( - llvm::make_unique>(From, VirtualRoot))) + std::make_unique>(From, VirtualRoot))) .get(); DT.Roots.push_back(From); } diff --git a/include/llvm/Support/GlobPattern.h b/include/llvm/Support/GlobPattern.h index 66a4cd94c12a..0098ac65fd30 100644 --- a/include/llvm/Support/GlobPattern.h +++ b/include/llvm/Support/GlobPattern.h @@ -21,7 +21,7 @@ #include // This class represents a glob pattern. Supported metacharacters -// are "*", "?", "[]" and "[^]". +// are "*", "?", "\", "[]", "[^]", and "[!]". namespace llvm { class BitVector; template class ArrayRef; diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h index b37cc514c92e..44f543c363db 100644 --- a/include/llvm/Support/Host.h +++ b/include/llvm/Support/Host.h @@ -15,39 +15,11 @@ #include "llvm/ADT/StringMap.h" -#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) -#include -#elif defined(_AIX) -#include -#elif defined(__sun) -/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */ -#include -#define BIG_ENDIAN 4321 -#define LITTLE_ENDIAN 1234 -#if defined(_BIG_ENDIAN) -#define BYTE_ORDER BIG_ENDIAN -#else -#define BYTE_ORDER LITTLE_ENDIAN -#endif -#else -#if !defined(BYTE_ORDER) && !defined(_WIN32) -#include -#endif -#endif - #include namespace llvm { namespace sys { -#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN -constexpr bool IsBigEndianHost = true; -#else -constexpr bool IsBigEndianHost = false; -#endif - - static const bool IsLittleEndianHost = !IsBigEndianHost; - /// getDefaultTargetTriple() - Return the default target triple the compiler /// has been configured to produce code for. /// diff --git a/include/llvm/Support/JamCRC.h b/include/llvm/Support/JamCRC.h deleted file mode 100644 index b6fc4e7b9b03..000000000000 --- a/include/llvm/Support/JamCRC.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- llvm/Support/JamCRC.h - Cyclic Redundancy Check ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains an implementation of JamCRC. -// -// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties -// of this CRC: -// Width : 32 -// Poly : 04C11DB7 -// Init : FFFFFFFF -// RefIn : True -// RefOut : True -// XorOut : 00000000 -// Check : 340BC6D9 (result of CRC for "123456789") -// -// N.B. We permit flexibility of the "Init" value. Some consumers of this need -// it to be zero. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_JAMCRC_H -#define LLVM_SUPPORT_JAMCRC_H - -#include "llvm/Support/DataTypes.h" - -namespace llvm { -template class ArrayRef; - -class JamCRC { -public: - JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {} - - // Update the CRC calculation with Data. - void update(ArrayRef Data); - - uint32_t getCRC() const { return CRC; } - -private: - uint32_t CRC; -}; -} // End of namespace llvm - -#endif diff --git a/include/llvm/Support/MachineValueType.h b/include/llvm/Support/MachineValueType.h index b94d2c4836cc..7f9f0b85c55e 100644 --- a/include/llvm/Support/MachineValueType.h +++ b/include/llvm/Support/MachineValueType.h @@ -17,6 +17,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include namespace llvm { @@ -64,152 +65,162 @@ namespace llvm { v32i1 = 19, // 32 x i1 v64i1 = 20, // 64 x i1 v128i1 = 21, // 128 x i1 - v512i1 = 22, // 512 x i1 - v1024i1 = 23, // 1024 x i1 - - v1i8 = 24, // 1 x i8 - v2i8 = 25, // 2 x i8 - v4i8 = 26, // 4 x i8 - v8i8 = 27, // 8 x i8 - v16i8 = 28, // 16 x i8 - v32i8 = 29, // 32 x i8 - v64i8 = 30, // 64 x i8 - v128i8 = 31, //128 x i8 - v256i8 = 32, //256 x i8 - - v1i16 = 33, // 1 x i16 - v2i16 = 34, // 2 x i16 - v4i16 = 35, // 4 x i16 - v8i16 = 36, // 8 x i16 - v16i16 = 37, // 16 x i16 - v32i16 = 38, // 32 x i16 - v64i16 = 39, // 64 x i16 - v128i16 = 40, //128 x i16 - - v1i32 = 41, // 1 x i32 - v2i32 = 42, // 2 x i32 - v3i32 = 43, // 3 x i32 - v4i32 = 44, // 4 x i32 - v5i32 = 45, // 5 x i32 - v8i32 = 46, // 8 x i32 - v16i32 = 47, // 16 x i32 - v32i32 = 48, // 32 x i32 - v64i32 = 49, // 64 x i32 - v128i32 = 50, // 128 x i32 - v256i32 = 51, // 256 x i32 - v512i32 = 52, // 512 x i32 - v1024i32 = 53, // 1024 x i32 - v2048i32 = 54, // 2048 x i32 - - v1i64 = 55, // 1 x i64 - v2i64 = 56, // 2 x i64 - v4i64 = 57, // 4 x i64 - v8i64 = 58, // 8 x i64 - v16i64 = 59, // 16 x i64 - v32i64 = 60, // 32 x i64 - - v1i128 = 61, // 1 x i128 - - // Scalable integer types - nxv1i1 = 62, // n x 1 x i1 - nxv2i1 = 63, // n x 2 x i1 - nxv4i1 = 64, // n x 4 x i1 - nxv8i1 = 65, // n x 8 x i1 - nxv16i1 = 66, // n x 16 x i1 - nxv32i1 = 67, // n x 32 x i1 - - nxv1i8 = 68, // n x 1 x i8 - nxv2i8 = 69, // n x 2 x i8 - nxv4i8 = 70, // n x 4 x i8 - nxv8i8 = 71, // n x 8 x i8 - nxv16i8 = 72, // n x 16 x i8 - nxv32i8 = 73, // n x 32 x i8 - - nxv1i16 = 74, // n x 1 x i16 - nxv2i16 = 75, // n x 2 x i16 - nxv4i16 = 76, // n x 4 x i16 - nxv8i16 = 77, // n x 8 x i16 - nxv16i16 = 78, // n x 16 x i16 - nxv32i16 = 79, // n x 32 x i16 - - nxv1i32 = 80, // n x 1 x i32 - nxv2i32 = 81, // n x 2 x i32 - nxv4i32 = 82, // n x 4 x i32 - nxv8i32 = 83, // n x 8 x i32 - nxv16i32 = 84, // n x 16 x i32 - nxv32i32 = 85, // n x 32 x i32 - - nxv1i64 = 86, // n x 1 x i64 - nxv2i64 = 87, // n x 2 x i64 - nxv4i64 = 88, // n x 4 x i64 - nxv8i64 = 89, // n x 8 x i64 - nxv16i64 = 90, // n x 16 x i64 - nxv32i64 = 91, // n x 32 x i64 - - FIRST_INTEGER_VECTOR_VALUETYPE = v1i1, - LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, - - FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1, - LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, - - v2f16 = 92, // 2 x f16 - v4f16 = 93, // 4 x f16 - v8f16 = 94, // 8 x f16 - v1f32 = 95, // 1 x f32 - v2f32 = 96, // 2 x f32 - v3f32 = 97, // 3 x f32 - v4f32 = 98, // 4 x f32 - v5f32 = 99, // 5 x f32 - v8f32 = 100, // 8 x f32 - v16f32 = 101, // 16 x f32 - v32f32 = 102, // 32 x f32 - v64f32 = 103, // 64 x f32 - v128f32 = 104, // 128 x f32 - v256f32 = 105, // 256 x f32 - v512f32 = 106, // 512 x f32 - v1024f32 = 107, // 1024 x f32 - v2048f32 = 108, // 2048 x f32 - v1f64 = 109, // 1 x f64 - v2f64 = 110, // 2 x f64 - v4f64 = 111, // 4 x f64 - v8f64 = 112, // 8 x f64 - - nxv2f16 = 113, // n x 2 x f16 - nxv4f16 = 114, // n x 4 x f16 - nxv8f16 = 115, // n x 8 x f16 - nxv1f32 = 116, // n x 1 x f32 - nxv2f32 = 117, // n x 2 x f32 - nxv4f32 = 118, // n x 4 x f32 - nxv8f32 = 119, // n x 8 x f32 - nxv16f32 = 120, // n x 16 x f32 - nxv1f64 = 121, // n x 1 x f64 - nxv2f64 = 122, // n x 2 x f64 - nxv4f64 = 123, // n x 4 x f64 - nxv8f64 = 124, // n x 8 x f64 - - FIRST_FP_VECTOR_VALUETYPE = v2f16, - LAST_FP_VECTOR_VALUETYPE = nxv8f64, - - FIRST_FP_SCALABLE_VALUETYPE = nxv2f16, - LAST_FP_SCALABLE_VALUETYPE = nxv8f64, + v256i1 = 22, // 256 x i1 + v512i1 = 23, // 512 x i1 + v1024i1 = 24, // 1024 x i1 + + v1i8 = 25, // 1 x i8 + v2i8 = 26, // 2 x i8 + v4i8 = 27, // 4 x i8 + v8i8 = 28, // 8 x i8 + v16i8 = 29, // 16 x i8 + v32i8 = 30, // 32 x i8 + v64i8 = 31, // 64 x i8 + v128i8 = 32, //128 x i8 + v256i8 = 33, //256 x i8 + + v1i16 = 34, // 1 x i16 + v2i16 = 35, // 2 x i16 + v3i16 = 36, // 3 x i16 + v4i16 = 37, // 4 x i16 + v8i16 = 38, // 8 x i16 + v16i16 = 39, // 16 x i16 + v32i16 = 40, // 32 x i16 + v64i16 = 41, // 64 x i16 + v128i16 = 42, //128 x i16 + + v1i32 = 43, // 1 x i32 + v2i32 = 44, // 2 x i32 + v3i32 = 45, // 3 x i32 + v4i32 = 46, // 4 x i32 + v5i32 = 47, // 5 x i32 + v8i32 = 48, // 8 x i32 + v16i32 = 49, // 16 x i32 + v32i32 = 50, // 32 x i32 + v64i32 = 51, // 64 x i32 + v128i32 = 52, // 128 x i32 + v256i32 = 53, // 256 x i32 + v512i32 = 54, // 512 x i32 + v1024i32 = 55, // 1024 x i32 + v2048i32 = 56, // 2048 x i32 + + v1i64 = 57, // 1 x i64 + v2i64 = 58, // 2 x i64 + v4i64 = 59, // 4 x i64 + v8i64 = 60, // 8 x i64 + v16i64 = 61, // 16 x i64 + v32i64 = 62, // 32 x i64 + + v1i128 = 63, // 1 x i128 + + FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, + LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, + + v2f16 = 64, // 2 x f16 + v3f16 = 65, // 3 x f16 + v4f16 = 66, // 4 x f16 + v8f16 = 67, // 8 x f16 + v16f16 = 68, // 16 x f16 + v32f16 = 69, // 32 x f16 + v1f32 = 70, // 1 x f32 + v2f32 = 71, // 2 x f32 + v3f32 = 72, // 3 x f32 + v4f32 = 73, // 4 x f32 + v5f32 = 74, // 5 x f32 + v8f32 = 75, // 8 x f32 + v16f32 = 76, // 16 x f32 + v32f32 = 77, // 32 x f32 + v64f32 = 78, // 64 x f32 + v128f32 = 79, // 128 x f32 + v256f32 = 80, // 256 x f32 + v512f32 = 81, // 512 x f32 + v1024f32 = 82, // 1024 x f32 + v2048f32 = 83, // 2048 x f32 + v1f64 = 84, // 1 x f64 + v2f64 = 85, // 2 x f64 + v4f64 = 86, // 4 x f64 + v8f64 = 87, // 8 x f64 + + FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16, + LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v8f64, + + FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, + LAST_FIXEDLEN_VECTOR_VALUETYPE = v8f64, + + nxv1i1 = 88, // n x 1 x i1 + nxv2i1 = 89, // n x 2 x i1 + nxv4i1 = 90, // n x 4 x i1 + nxv8i1 = 91, // n x 8 x i1 + nxv16i1 = 92, // n x 16 x i1 + nxv32i1 = 93, // n x 32 x i1 + + nxv1i8 = 94, // n x 1 x i8 + nxv2i8 = 95, // n x 2 x i8 + nxv4i8 = 96, // n x 4 x i8 + nxv8i8 = 97, // n x 8 x i8 + nxv16i8 = 98, // n x 16 x i8 + nxv32i8 = 99, // n x 32 x i8 + + nxv1i16 = 100, // n x 1 x i16 + nxv2i16 = 101, // n x 2 x i16 + nxv4i16 = 102, // n x 4 x i16 + nxv8i16 = 103, // n x 8 x i16 + nxv16i16 = 104, // n x 16 x i16 + nxv32i16 = 105, // n x 32 x i16 + + nxv1i32 = 106, // n x 1 x i32 + nxv2i32 = 107, // n x 2 x i32 + nxv4i32 = 108, // n x 4 x i32 + nxv8i32 = 109, // n x 8 x i32 + nxv16i32 = 110, // n x 16 x i32 + nxv32i32 = 111, // n x 32 x i32 + + nxv1i64 = 112, // n x 1 x i64 + nxv2i64 = 113, // n x 2 x i64 + nxv4i64 = 114, // n x 4 x i64 + nxv8i64 = 115, // n x 8 x i64 + nxv16i64 = 116, // n x 16 x i64 + nxv32i64 = 117, // n x 32 x i64 + + FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, + LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, + + nxv2f16 = 118, // n x 2 x f16 + nxv4f16 = 119, // n x 4 x f16 + nxv8f16 = 120, // n x 8 x f16 + nxv1f32 = 121, // n x 1 x f32 + nxv2f32 = 122, // n x 2 x f32 + nxv4f32 = 123, // n x 4 x f32 + nxv8f32 = 124, // n x 8 x f32 + nxv16f32 = 125, // n x 16 x f32 + nxv1f64 = 126, // n x 1 x f64 + nxv2f64 = 127, // n x 2 x f64 + nxv4f64 = 128, // n x 4 x f64 + nxv8f64 = 129, // n x 8 x f64 + + FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv2f16, + LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, + + FIRST_SCALABLE_VECTOR_VALUETYPE = nxv1i1, + LAST_SCALABLE_VECTOR_VALUETYPE = nxv8f64, FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 125, // This is an X86 MMX value + x86mmx = 130, // This is an X86 MMX value - Glue = 126, // This glues nodes together during pre-RA sched + Glue = 131, // This glues nodes together during pre-RA sched - isVoid = 127, // This has no value + isVoid = 132, // This has no value - Untyped = 128, // This value takes a register, but has + Untyped = 133, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - exnref = 129, // WebAssembly's exnref type + exnref = 134, // WebAssembly's exnref type FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 130, // This always remains at the end of the list. + LAST_VALUETYPE = 135, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -253,41 +264,6 @@ namespace llvm { SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE; - // A class to represent the number of elements in a vector - // - // For fixed-length vectors, the total number of elements is equal to 'Min' - // For scalable vectors, the total number of elements is a multiple of 'Min' - class ElementCount { - public: - unsigned Min; - bool Scalable; - - ElementCount(unsigned Min, bool Scalable) - : Min(Min), Scalable(Scalable) {} - - ElementCount operator*(unsigned RHS) { - return { Min * RHS, Scalable }; - } - - ElementCount& operator*=(unsigned RHS) { - Min *= RHS; - return *this; - } - - ElementCount operator/(unsigned RHS) { - return { Min / RHS, Scalable }; - } - - ElementCount& operator/=(unsigned RHS) { - Min /= RHS; - return *this; - } - - bool operator==(const ElementCount& RHS) { - return Min == RHS.Min && Scalable == RHS.Scalable; - } - }; - constexpr MVT() = default; constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {} @@ -308,16 +284,20 @@ namespace llvm { bool isFloatingPoint() const { return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE && SimpleTy <= MVT::LAST_FP_VALUETYPE) || - (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE && - SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE)); + (SimpleTy >= MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE) || + (SimpleTy >= MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE)); } /// Return true if this is an integer or a vector integer type. bool isInteger() const { return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE && SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) || - (SimpleTy >= MVT::FIRST_INTEGER_VECTOR_VALUETYPE && - SimpleTy <= MVT::LAST_INTEGER_VECTOR_VALUETYPE)); + (SimpleTy >= MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE) || + (SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE)); } /// Return true if this is an integer, not including vectors. @@ -335,10 +315,13 @@ namespace llvm { /// Return true if this is a vector value type where the /// runtime length is machine dependent bool isScalableVector() const { - return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE && - SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) || - (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE && - SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE)); + return (SimpleTy >= MVT::FIRST_SCALABLE_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_SCALABLE_VECTOR_VALUETYPE); + } + + bool isFixedLengthVector() const { + return (SimpleTy >= MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE); } /// Return true if this is a 16-bit vector type. @@ -373,17 +356,18 @@ namespace llvm { /// Return true if this is a 256-bit vector type. bool is256BitVector() const { - return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 || - SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 || - SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64); + return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 || + SimpleTy == MVT::v4f64 || SimpleTy == MVT::v32i8 || + SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 || + SimpleTy == MVT::v4i64 || SimpleTy == MVT::v256i1); } /// Return true if this is a 512-bit vector type. bool is512BitVector() const { - return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 || - SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 || - SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 || - SimpleTy == MVT::v8i64); + return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 || + SimpleTy == MVT::v8f64 || SimpleTy == MVT::v512i1 || + SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 || + SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64); } /// Return true if this is a 1024-bit vector type. @@ -406,6 +390,15 @@ namespace llvm { SimpleTy==MVT::vAny || SimpleTy==MVT::iPTRAny); } + /// Return a VT for a vector type with the same element type but + /// half the number of elements. + MVT getHalfNumVectorElementsVT() const { + MVT EltVT = getVectorElementType(); + auto EltCnt = getVectorElementCount(); + assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); + return getVectorVT(EltVT, EltCnt / 2); + } + /// Returns true if the given vector is a power of 2. bool isPow2VectorType() const { unsigned NElts = getVectorNumElements(); @@ -440,6 +433,7 @@ namespace llvm { case v32i1: case v64i1: case v128i1: + case v256i1: case v512i1: case v1024i1: case nxv1i1: @@ -465,6 +459,7 @@ namespace llvm { case nxv32i8: return i8; case v1i16: case v2i16: + case v3i16: case v4i16: case v8i16: case v16i16: @@ -511,8 +506,11 @@ namespace llvm { case nxv32i64: return i64; case v1i128: return i128; case v2f16: + case v3f16: case v4f16: case v8f16: + case v16f16: + case v32f16: case nxv2f16: case nxv4f16: case nxv8f16: return f16; @@ -558,6 +556,7 @@ namespace llvm { case v512i1: case v512i32: case v512f32: return 512; + case v256i1: case v256i8: case v256i32: case v256f32: return 256; @@ -576,6 +575,7 @@ namespace llvm { case v32i16: case v32i32: case v32i64: + case v32f16: case v32f32: case nxv32i1: case nxv32i8: @@ -587,6 +587,7 @@ namespace llvm { case v16i16: case v16i32: case v16i64: + case v16f16: case v16f32: case nxv16i1: case nxv16i8: @@ -628,7 +629,9 @@ namespace llvm { case nxv4f16: case nxv4f32: case nxv4f64: return 4; + case v3i16: case v3i32: + case v3f16: case v3f32: return 3; case v2i1: case v2i8: @@ -664,7 +667,7 @@ namespace llvm { } } - MVT::ElementCount getVectorElementCount() const { + ElementCount getVectorElementCount() const { return { getVectorNumElements(), isScalableVector() }; } @@ -721,6 +724,8 @@ namespace llvm { case nxv1i32: case nxv2f16: case nxv1f32: return 32; + case v3i16: + case v3f16: return 48; case x86mmx: case f64 : case i64 : @@ -763,10 +768,12 @@ namespace llvm { case nxv2f64: return 128; case v5i32: case v5f32: return 160; + case v256i1: case v32i8: case v16i16: case v8i32: case v4i64: + case v16f16: case v8f32: case v4f64: case nxv32i8: @@ -780,6 +787,7 @@ namespace llvm { case v32i16: case v16i32: case v8i64: + case v32f16: case v16f32: case v8f64: case nxv32i16: @@ -900,6 +908,7 @@ namespace llvm { if (NumElements == 32) return MVT::v32i1; if (NumElements == 64) return MVT::v64i1; if (NumElements == 128) return MVT::v128i1; + if (NumElements == 256) return MVT::v256i1; if (NumElements == 512) return MVT::v512i1; if (NumElements == 1024) return MVT::v1024i1; break; @@ -917,6 +926,7 @@ namespace llvm { case MVT::i16: if (NumElements == 1) return MVT::v1i16; if (NumElements == 2) return MVT::v2i16; + if (NumElements == 3) return MVT::v3i16; if (NumElements == 4) return MVT::v4i16; if (NumElements == 8) return MVT::v8i16; if (NumElements == 16) return MVT::v16i16; @@ -953,8 +963,11 @@ namespace llvm { break; case MVT::f16: if (NumElements == 2) return MVT::v2f16; + if (NumElements == 3) return MVT::v3f16; if (NumElements == 4) return MVT::v4f16; if (NumElements == 8) return MVT::v8f16; + if (NumElements == 16) return MVT::v16f16; + if (NumElements == 32) return MVT::v32f16; break; case MVT::f32: if (NumElements == 1) return MVT::v1f32; @@ -1054,7 +1067,7 @@ namespace llvm { return getVectorVT(VT, NumElements); } - static MVT getVectorVT(MVT VT, MVT::ElementCount EC) { + static MVT getVectorVT(MVT VT, ElementCount EC) { if (EC.Scalable) return getScalableVectorVT(VT, EC.Min); return getVectorVT(VT, EC.Min); @@ -1108,26 +1121,40 @@ namespace llvm { (MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1)); } - static mvt_range integer_vector_valuetypes() { + static mvt_range fixedlen_vector_valuetypes() { return mvt_range( - MVT::FIRST_INTEGER_VECTOR_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1)); + MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE + 1)); } - static mvt_range fp_vector_valuetypes() { + static mvt_range scalable_vector_valuetypes() { return mvt_range( - MVT::FIRST_FP_VECTOR_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1)); + MVT::FIRST_SCALABLE_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_SCALABLE_VECTOR_VALUETYPE + 1)); + } + + static mvt_range integer_fixedlen_vector_valuetypes() { + return mvt_range( + MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE + 1)); + } + + static mvt_range fp_fixedlen_vector_valuetypes() { + return mvt_range( + MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE + 1)); } static mvt_range integer_scalable_vector_valuetypes() { - return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1)); + return mvt_range( + MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE + 1)); } static mvt_range fp_scalable_vector_valuetypes() { - return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1)); + return mvt_range( + MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE + 1)); } /// @} }; diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 249139e824b5..004a6f5f6eb8 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -39,6 +39,7 @@ unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); #endif namespace llvm { + /// The behavior an operation has on an input of 0. enum ZeroBehavior { /// The returned value is undefined. @@ -49,6 +50,42 @@ enum ZeroBehavior { ZB_Width }; +/// Mathematical constants. +namespace numbers { +// TODO: Track C++20 std::numbers. +// TODO: Favor using the hexadecimal FP constants (requires C++17). +constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 + egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 + ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 + ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 + log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) + log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) + pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 + inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 + sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 + inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 + sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 + inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) + sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 + inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) + phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 +constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 + egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 + ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 + ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 + log2ef = 1.44269504F, // (0x1.715476P+0) + log10ef = .434294482F, // (0x1.bcb7b2P-2) + pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 + inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 + sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 + inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 + sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 + inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) + sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 + inv_sqrt3f = .577350269F, // (0x1.279a74P-1) + phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 +} // namespace numbers + namespace detail { template struct TrailingZerosCounter { static unsigned count(T Val, ZeroBehavior) { @@ -73,13 +110,13 @@ template struct TrailingZerosCounter { } }; -#if __GNUC__ >= 4 || defined(_MSC_VER) +#if defined(__GNUC__) || defined(_MSC_VER) template struct TrailingZerosCounter { static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; -#if __has_builtin(__builtin_ctz) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_builtin(__builtin_ctz) || defined(__GNUC__) return __builtin_ctz(Val); #elif defined(_MSC_VER) unsigned long Index; @@ -95,7 +132,7 @@ template struct TrailingZerosCounter { if (ZB != ZB_Undefined && Val == 0) return 64; -#if __has_builtin(__builtin_ctzll) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_builtin(__builtin_ctzll) || defined(__GNUC__) return __builtin_ctzll(Val); #elif defined(_MSC_VER) unsigned long Index; @@ -142,13 +179,13 @@ template struct LeadingZerosCounter { } }; -#if __GNUC__ >= 4 || defined(_MSC_VER) +#if defined(__GNUC__) || defined(_MSC_VER) template struct LeadingZerosCounter { static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; -#if __has_builtin(__builtin_clz) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_builtin(__builtin_clz) || defined(__GNUC__) return __builtin_clz(Val); #elif defined(_MSC_VER) unsigned long Index; @@ -164,7 +201,7 @@ template struct LeadingZerosCounter { if (ZB != ZB_Undefined && Val == 0) return 64; -#if __has_builtin(__builtin_clzll) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_builtin(__builtin_clzll) || defined(__GNUC__) return __builtin_clzll(Val); #elif defined(_MSC_VER) unsigned long Index; @@ -486,7 +523,7 @@ template struct PopulationCounter { static unsigned count(T Value) { // Generic version, forward to 32 bits. static_assert(SizeOfT <= 4, "Not implemented!"); -#if __GNUC__ >= 4 +#if defined(__GNUC__) return __builtin_popcount(Value); #else uint32_t v = Value; @@ -499,7 +536,7 @@ template struct PopulationCounter { template struct PopulationCounter { static unsigned count(T Value) { -#if __GNUC__ >= 4 +#if defined(__GNUC__) return __builtin_popcountll(Value); #else uint64_t v = Value; @@ -523,6 +560,16 @@ inline unsigned countPopulation(T Value) { return detail::PopulationCounter::count(Value); } +/// Compile time Log2. +/// Valid only for positive powers of two. +template constexpr inline size_t CTLog2() { + static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), + "Value is not a valid power of 2"); + return 1 + CTLog2(); +} + +template <> constexpr inline size_t CTLog2<1>() { return 0; } + /// Return the log base 2 of the specified value. inline double Log2(double Value) { #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 @@ -620,25 +667,6 @@ constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { return (A | B) & (1 + ~(A | B)); } -/// Aligns \c Addr to \c Alignment bytes, rounding up. -/// -/// Alignment should be a power of two. This method rounds up, so -/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8. -inline uintptr_t alignAddr(const void *Addr, size_t Alignment) { - assert(Alignment && isPowerOf2_64((uint64_t)Alignment) && - "Alignment is not a power of two!"); - - assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr); - - return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1)); -} - -/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment -/// bytes, rounding up. -inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) { - return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr; -} - /// Returns the next power of two (in 64-bits) that is strictly greater than A. /// Returns zero on overflow. inline uint64_t NextPowerOf2(uint64_t A) { @@ -704,19 +732,6 @@ inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { return alignTo(Numerator, Denominator) / Denominator; } -/// \c alignTo for contexts where a constant expression is required. -/// \sa alignTo -/// -/// \todo FIXME: remove when \c constexpr becomes really \c constexpr -template -struct AlignTo { - static_assert(Align != 0u, "Align must be non-zero"); - template - struct from_value { - static const uint64_t value = (Value + Align - 1) / Align * Align; - }; -}; - /// Returns the largest uint64_t less than or equal to \p Value and is /// \p Skew mod \p Align. \p Align must be non-zero inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { @@ -725,13 +740,6 @@ inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { return (Value - Skew) / Align * Align + Skew; } -/// Returns the offset to the next integer (mod 2**64) that is greater than -/// or equal to \p Value and is a multiple of \p Align. \p Align must be -/// non-zero. -inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) { - return alignTo(Value, Align) - Value; -} - /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. /// Requires 0 < B <= 32. template constexpr inline int32_t SignExtend32(uint32_t X) { @@ -853,6 +861,91 @@ SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. extern const float huge_valf; + + +/// Add two signed integers, computing the two's complement truncated result, +/// returning true if overflow occured. +template +typename std::enable_if::value, T>::type +AddOverflow(T X, T Y, T &Result) { +#if __has_builtin(__builtin_add_overflow) + return __builtin_add_overflow(X, Y, &Result); +#else + // Perform the unsigned addition. + using U = typename std::make_unsigned::type; + const U UX = static_cast(X); + const U UY = static_cast(Y); + const U UResult = UX + UY; + + // Convert to signed. + Result = static_cast(UResult); + + // Adding two positive numbers should result in a positive number. + if (X > 0 && Y > 0) + return Result <= 0; + // Adding two negatives should result in a negative number. + if (X < 0 && Y < 0) + return Result >= 0; + return false; +#endif +} + +/// Subtract two signed integers, computing the two's complement truncated +/// result, returning true if an overflow ocurred. +template +typename std::enable_if::value, T>::type +SubOverflow(T X, T Y, T &Result) { +#if __has_builtin(__builtin_sub_overflow) + return __builtin_sub_overflow(X, Y, &Result); +#else + // Perform the unsigned addition. + using U = typename std::make_unsigned::type; + const U UX = static_cast(X); + const U UY = static_cast(Y); + const U UResult = UX - UY; + + // Convert to signed. + Result = static_cast(UResult); + + // Subtracting a positive number from a negative results in a negative number. + if (X <= 0 && Y > 0) + return Result >= 0; + // Subtracting a negative number from a positive results in a positive number. + if (X >= 0 && Y < 0) + return Result <= 0; + return false; +#endif +} + + +/// Multiply two signed integers, computing the two's complement truncated +/// result, returning true if an overflow ocurred. +template +typename std::enable_if::value, T>::type +MulOverflow(T X, T Y, T &Result) { + // Perform the unsigned multiplication on absolute values. + using U = typename std::make_unsigned::type; + const U UX = X < 0 ? (0 - static_cast(X)) : static_cast(X); + const U UY = Y < 0 ? (0 - static_cast(Y)) : static_cast(Y); + const U UResult = UX * UY; + + // Convert to signed. + const bool IsNegative = (X < 0) ^ (Y < 0); + Result = IsNegative ? (0 - UResult) : UResult; + + // If any of the args was 0, result is 0 and no overflow occurs. + if (UX == 0 || UY == 0) + return false; + + // UX and UY are in [1, 2^n], where n is the number of digits. + // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for + // positive) divided by an argument compares to the other. + if (IsNegative) + return UX > (static_cast(std::numeric_limits::max()) + U(1)) / UY; + else + return UX > (static_cast(std::numeric_limits::max())) / UY; +} + } // End llvm namespace #endif diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h index c3abfc7a7806..1d8a0d3c87cb 100644 --- a/include/llvm/Support/Mutex.h +++ b/include/llvm/Support/Mutex.h @@ -13,97 +13,31 @@ #ifndef LLVM_SUPPORT_MUTEX_H #define LLVM_SUPPORT_MUTEX_H -#include "llvm/Config/llvm-config.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Threading.h" #include +#include namespace llvm { namespace sys { - /// Platform agnostic Mutex class. - class MutexImpl - { - /// @name Constructors - /// @{ - public: - - /// Initializes the lock but doesn't acquire it. if \p recursive is set - /// to false, the lock will not be recursive which makes it cheaper but - /// also more likely to deadlock (same thread can't acquire more than - /// once). - /// Default Constructor. - explicit MutexImpl(bool recursive = true); - - /// Releases and removes the lock - /// Destructor - ~MutexImpl(); - - /// @} - /// @name Methods - /// @{ - public: - - /// Attempts to unconditionally acquire the lock. If the lock is held by - /// another thread, this method will wait until it can acquire the lock. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally acquire the lock. - bool acquire(); - - /// Attempts to release the lock. If the lock is held by the current - /// thread, the lock is released allowing other threads to acquire the - /// lock. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally release the lock. - bool release(); - - /// Attempts to acquire the lock without blocking. If the lock is not - /// available, this function returns false quickly (without blocking). If - /// the lock is available, it is acquired. - /// @returns false if any kind of error occurs or the lock is not - /// available, true otherwise. - /// Try to acquire the lock. - bool tryacquire(); - - //@} - /// @name Platform Dependent Data - /// @{ - private: -#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0 - void* data_; ///< We don't know what the data will be -#endif - - /// @} - /// @name Do Not Implement - /// @{ - private: - MutexImpl(const MutexImpl &) = delete; - void operator=(const MutexImpl &) = delete; - /// @} - }; - - /// SmartMutex - A mutex with a compile time constant parameter that /// indicates whether this mutex should become a no-op when we're not /// running in multithreaded mode. template class SmartMutex { - MutexImpl impl; - unsigned acquired; - bool recursive; - public: - explicit SmartMutex(bool rec = true) : - impl(rec), acquired(0), recursive(rec) { } + std::recursive_mutex impl; + unsigned acquired = 0; + public: bool lock() { if (!mt_only || llvm_is_multithreaded()) { - return impl.acquire(); + impl.lock(); + return true; } else { // Single-threaded debugging code. This would be racy in // multithreaded mode, but provides not sanity checks in single // threaded mode. - assert((recursive || acquired == 0) && "Lock already acquired!!"); ++acquired; return true; } @@ -111,13 +45,13 @@ namespace llvm bool unlock() { if (!mt_only || llvm_is_multithreaded()) { - return impl.release(); + impl.unlock(); + return true; } else { // Single-threaded debugging code. This would be racy in // multithreaded mode, but provides not sanity checks in single // threaded mode. - assert(((recursive && acquired) || (acquired == 1)) && - "Lock not acquired before release!"); + assert(acquired && "Lock not acquired before release!"); --acquired; return true; } @@ -125,31 +59,16 @@ namespace llvm bool try_lock() { if (!mt_only || llvm_is_multithreaded()) - return impl.tryacquire(); + return impl.try_lock(); else return true; } - - private: - SmartMutex(const SmartMutex & original); - void operator=(const SmartMutex &); }; /// Mutex - A standard, always enforced mutex. typedef SmartMutex Mutex; - template - class SmartScopedLock { - SmartMutex& mtx; - - public: - SmartScopedLock(SmartMutex& m) : mtx(m) { - mtx.lock(); - } - - ~SmartScopedLock() { - mtx.unlock(); - } - }; + template + using SmartScopedLock = std::lock_guard>; typedef SmartScopedLock ScopedLock; } diff --git a/include/llvm/Support/MutexGuard.h b/include/llvm/Support/MutexGuard.h deleted file mode 100644 index d86ced145816..000000000000 --- a/include/llvm/Support/MutexGuard.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- Support/MutexGuard.h - Acquire/Release Mutex In Scope ---*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a guard for a block of code that ensures a Mutex is locked -// upon construction and released upon destruction. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_MUTEXGUARD_H -#define LLVM_SUPPORT_MUTEXGUARD_H - -#include "llvm/Support/Mutex.h" - -namespace llvm { - /// Instances of this class acquire a given Mutex Lock when constructed and - /// hold that lock until destruction. The intention is to instantiate one of - /// these on the stack at the top of some scope to be assured that C++ - /// destruction of the object will always release the Mutex and thus avoid - /// a host of nasty multi-threading problems in the face of exceptions, etc. - /// Guard a section of code with a Mutex. - class MutexGuard { - sys::Mutex &M; - MutexGuard(const MutexGuard &) = delete; - void operator=(const MutexGuard &) = delete; - public: - MutexGuard(sys::Mutex &m) : M(m) { M.lock(); } - ~MutexGuard() { M.unlock(); } - /// holds - Returns true if this locker instance holds the specified lock. - /// This is mostly used in assertions to validate that the correct mutex - /// is held. - bool holds(const sys::Mutex& lock) const { return &M == &lock; } - }; -} - -#endif // LLVM_SUPPORT_MUTEXGUARD_H diff --git a/include/llvm/Support/OnDiskHashTable.h b/include/llvm/Support/OnDiskHashTable.h index d84da92aab9b..11dc0de0f354 100644 --- a/include/llvm/Support/OnDiskHashTable.h +++ b/include/llvm/Support/OnDiskHashTable.h @@ -13,6 +13,7 @@ #ifndef LLVM_SUPPORT_ONDISKHASHTABLE_H #define LLVM_SUPPORT_ONDISKHASHTABLE_H +#include "llvm/Support/Alignment.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/EndianStream.h" @@ -207,7 +208,7 @@ public: // Pad with zeros so that we can start the hashtable at an aligned address. offset_type TableOff = Out.tell(); - uint64_t N = llvm::OffsetToAlignment(TableOff, alignof(offset_type)); + uint64_t N = offsetToAlignment(TableOff, Align(alignof(offset_type))); TableOff += N; while (N--) LE.write(0); diff --git a/include/llvm/Support/Parallel.h b/include/llvm/Support/Parallel.h index eab9b492c4a5..3c0ed2c11127 100644 --- a/include/llvm/Support/Parallel.h +++ b/include/llvm/Support/Parallel.h @@ -18,14 +18,6 @@ #include #include -#if defined(_MSC_VER) && LLVM_ENABLE_THREADS -#pragma warning(push) -#pragma warning(disable : 4530) -#include -#include -#pragma warning(pop) -#endif - namespace llvm { namespace parallel { @@ -84,23 +76,6 @@ public: void sync() const { L.sync(); } }; -#if defined(_MSC_VER) -template -void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp) { - concurrency::parallel_sort(Start, End, Comp); -} -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - concurrency::parallel_for_each(Begin, End, Fn); -} - -template -void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { - concurrency::parallel_for(Begin, End, Fn); -} - -#else const ptrdiff_t MinParallelSize = 1024; /// Inclusive median. @@ -188,8 +163,6 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { #endif -#endif - template using DefComparator = std::less::value_type>; diff --git a/include/llvm/Support/RWMutex.h b/include/llvm/Support/RWMutex.h index 9cd57cbd65a1..150bc7dbbce1 100644 --- a/include/llvm/Support/RWMutex.h +++ b/include/llvm/Support/RWMutex.h @@ -16,161 +16,184 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Threading.h" #include +#include +#include + +// std::shared_timed_mutex is only availble on macOS 10.12 and later. +#if defined(__APPLE__) && defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) +#if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200 +#define LLVM_USE_RW_MUTEX_IMPL +#endif +#endif namespace llvm { namespace sys { - /// Platform agnostic RWMutex class. - class RWMutexImpl - { - /// @name Constructors - /// @{ - public: - - /// Initializes the lock but doesn't acquire it. - /// Default Constructor. - explicit RWMutexImpl(); - - /// @} - /// @name Do Not Implement - /// @{ - RWMutexImpl(const RWMutexImpl & original) = delete; - RWMutexImpl &operator=(const RWMutexImpl &) = delete; - /// @} - - /// Releases and removes the lock - /// Destructor - ~RWMutexImpl(); - - /// @} - /// @name Methods - /// @{ - public: - - /// Attempts to unconditionally acquire the lock in reader mode. If the - /// lock is held by a writer, this method will wait until it can acquire - /// the lock. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally acquire the lock in reader mode. - bool reader_acquire(); - - /// Attempts to release the lock in reader mode. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally release the lock in reader mode. - bool reader_release(); - - /// Attempts to unconditionally acquire the lock in reader mode. If the - /// lock is held by any readers, this method will wait until it can - /// acquire the lock. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally acquire the lock in writer mode. - bool writer_acquire(); - - /// Attempts to release the lock in writer mode. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally release the lock in write mode. - bool writer_release(); - - //@} - /// @name Platform Dependent Data - /// @{ - private: +#if defined(LLVM_USE_RW_MUTEX_IMPL) +/// Platform agnostic RWMutex class. +class RWMutexImpl { + /// @name Constructors + /// @{ +public: + /// Initializes the lock but doesn't acquire it. + /// Default Constructor. + explicit RWMutexImpl(); + + /// @} + /// @name Do Not Implement + /// @{ + RWMutexImpl(const RWMutexImpl &original) = delete; + RWMutexImpl &operator=(const RWMutexImpl &) = delete; + /// @} + + /// Releases and removes the lock + /// Destructor + ~RWMutexImpl(); + + /// @} + /// @name Methods + /// @{ +public: + /// Attempts to unconditionally acquire the lock in reader mode. If the + /// lock is held by a writer, this method will wait until it can acquire + /// the lock. + /// @returns false if any kind of error occurs, true otherwise. + /// Unconditionally acquire the lock in reader mode. + bool lock_shared(); + + /// Attempts to release the lock in reader mode. + /// @returns false if any kind of error occurs, true otherwise. + /// Unconditionally release the lock in reader mode. + bool unlock_shared(); + + /// Attempts to unconditionally acquire the lock in reader mode. If the + /// lock is held by any readers, this method will wait until it can + /// acquire the lock. + /// @returns false if any kind of error occurs, true otherwise. + /// Unconditionally acquire the lock in writer mode. + bool lock(); + + /// Attempts to release the lock in writer mode. + /// @returns false if any kind of error occurs, true otherwise. + /// Unconditionally release the lock in write mode. + bool unlock(); + + //@} + /// @name Platform Dependent Data + /// @{ +private: #if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0 - void* data_ = nullptr; ///< We don't know what the data will be + void *data_ = nullptr; ///< We don't know what the data will be +#endif +}; +#endif + +/// SmartMutex - An R/W mutex with a compile time constant parameter that +/// indicates whether this mutex should become a no-op when we're not +/// running in multithreaded mode. +template class SmartRWMutex { + // shared_mutex (C++17) is more efficient than shared_timed_mutex (C++14) + // on Windows and always available on MSVC. +#if defined(_MSC_VER) || __cplusplus > 201402L + std::shared_mutex impl; +#else +#if !defined(LLVM_USE_RW_MUTEX_IMPL) + std::shared_timed_mutex impl; +#else + RWMutexImpl impl; +#endif +#endif + unsigned readers = 0; + unsigned writers = 0; + +public: + bool lock_shared() { + if (!mt_only || llvm_is_multithreaded()) { + impl.lock_shared(); + return true; + } + + // Single-threaded debugging code. This would be racy in multithreaded + // mode, but provides not sanity checks in single threaded mode. + ++readers; + return true; + } + + bool unlock_shared() { + if (!mt_only || llvm_is_multithreaded()) { + impl.unlock_shared(); + return true; + } + + // Single-threaded debugging code. This would be racy in multithreaded + // mode, but provides not sanity checks in single threaded mode. + assert(readers > 0 && "Reader lock not acquired before release!"); + --readers; + return true; + } + + bool lock() { + if (!mt_only || llvm_is_multithreaded()) { + impl.lock(); + return true; + } + + // Single-threaded debugging code. This would be racy in multithreaded + // mode, but provides not sanity checks in single threaded mode. + assert(writers == 0 && "Writer lock already acquired!"); + ++writers; + return true; + } + + bool unlock() { + if (!mt_only || llvm_is_multithreaded()) { + impl.unlock(); + return true; + } + + // Single-threaded debugging code. This would be racy in multithreaded + // mode, but provides not sanity checks in single threaded mode. + assert(writers == 1 && "Writer lock not acquired before release!"); + --writers; + return true; + } +}; + +typedef SmartRWMutex RWMutex; + +/// ScopedReader - RAII acquisition of a reader lock +#if !defined(LLVM_USE_RW_MUTEX_IMPL) +template +using SmartScopedReader = const std::shared_lock>; +#else +template struct SmartScopedReader { + SmartRWMutex &mutex; + + explicit SmartScopedReader(SmartRWMutex &m) : mutex(m) { + mutex.lock_shared(); + } + + ~SmartScopedReader() { mutex.unlock_shared(); } +}; +#endif +typedef SmartScopedReader ScopedReader; + +/// ScopedWriter - RAII acquisition of a writer lock +#if !defined(LLVM_USE_RW_MUTEX_IMPL) +template +using SmartScopedWriter = std::lock_guard>; +#else +template struct SmartScopedWriter { + SmartRWMutex &mutex; + + explicit SmartScopedWriter(SmartRWMutex &m) : mutex(m) { + mutex.lock(); + } + + ~SmartScopedWriter() { mutex.unlock(); } +}; #endif - }; - - /// SmartMutex - An R/W mutex with a compile time constant parameter that - /// indicates whether this mutex should become a no-op when we're not - /// running in multithreaded mode. - template - class SmartRWMutex { - RWMutexImpl impl; - unsigned readers = 0; - unsigned writers = 0; - - public: - explicit SmartRWMutex() = default; - SmartRWMutex(const SmartRWMutex & original) = delete; - SmartRWMutex &operator=(const SmartRWMutex &) = delete; - - bool lock_shared() { - if (!mt_only || llvm_is_multithreaded()) - return impl.reader_acquire(); - - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - ++readers; - return true; - } - - bool unlock_shared() { - if (!mt_only || llvm_is_multithreaded()) - return impl.reader_release(); - - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - assert(readers > 0 && "Reader lock not acquired before release!"); - --readers; - return true; - } - - bool lock() { - if (!mt_only || llvm_is_multithreaded()) - return impl.writer_acquire(); - - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - assert(writers == 0 && "Writer lock already acquired!"); - ++writers; - return true; - } - - bool unlock() { - if (!mt_only || llvm_is_multithreaded()) - return impl.writer_release(); - - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - assert(writers == 1 && "Writer lock not acquired before release!"); - --writers; - return true; - } - }; - - typedef SmartRWMutex RWMutex; - - /// ScopedReader - RAII acquisition of a reader lock - template - struct SmartScopedReader { - SmartRWMutex& mutex; - - explicit SmartScopedReader(SmartRWMutex& m) : mutex(m) { - mutex.lock_shared(); - } - - ~SmartScopedReader() { - mutex.unlock_shared(); - } - }; - - typedef SmartScopedReader ScopedReader; - - /// ScopedWriter - RAII acquisition of a writer lock - template - struct SmartScopedWriter { - SmartRWMutex& mutex; - - explicit SmartScopedWriter(SmartRWMutex& m) : mutex(m) { - mutex.lock(); - } - - ~SmartScopedWriter() { - mutex.unlock(); - } - }; - - typedef SmartScopedWriter ScopedWriter; +typedef SmartScopedWriter ScopedWriter; } // end namespace sys } // end namespace llvm diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h index 2d19b10fd890..b2620ab4cfc9 100644 --- a/include/llvm/Support/Regex.h +++ b/include/llvm/Support/Regex.h @@ -44,6 +44,9 @@ namespace llvm { Regex(); /// Compiles the given regular expression \p Regex. + /// + /// \param Regex - referenced string is no longer needed after this + /// constructor does finish. Only its compiled form is kept stored. Regex(StringRef Regex, unsigned Flags = NoFlags); Regex(const Regex &) = delete; Regex &operator=(Regex regex) { @@ -54,9 +57,10 @@ namespace llvm { Regex(Regex &®ex); ~Regex(); - /// isValid - returns the error encountered during regex compilation, or - /// matching, if any. + /// isValid - returns the error encountered during regex compilation, if + /// any. bool isValid(std::string &Error) const; + bool isValid() const { return !error; } /// getNumMatches - In a valid regex, return the number of parenthesized /// matches it contains. The number filled in by match will include this @@ -69,8 +73,12 @@ namespace llvm { /// with references to the matched group expressions (inside \p String), /// the first group is always the entire pattern. /// + /// \param Error - If non-null, any errors in the matching will be recorded + /// as a non-empty string. If there is no error, it will be an empty string. + /// /// This returns true on a successful match. - bool match(StringRef String, SmallVectorImpl *Matches = nullptr); + bool match(StringRef String, SmallVectorImpl *Matches = nullptr, + std::string *Error = nullptr) const; /// sub - Return the result of replacing the first match of the regex in /// \p String with the \p Repl string. Backreferences like "\0" in the @@ -81,9 +89,9 @@ namespace llvm { /// /// \param Error If non-null, any errors in the substitution (invalid /// backreferences, trailing backslashes) will be recorded as a non-empty - /// string. + /// string. If there is no error, it will be an empty string. std::string sub(StringRef Repl, StringRef String, - std::string *Error = nullptr); + std::string *Error = nullptr) const; /// If this function returns true, ^Str$ is an extended regular /// expression that matches Str and only Str. diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h index 4d8aa5f1470d..5bb6a254a47f 100644 --- a/include/llvm/Support/Registry.h +++ b/include/llvm/Support/Registry.h @@ -115,7 +115,7 @@ namespace llvm { entry Entry; node Node; - static std::unique_ptr CtorFn() { return make_unique(); } + static std::unique_ptr CtorFn() { return std::make_unique(); } public: Add(StringRef Name, StringRef Desc) diff --git a/include/llvm/Support/SHA1.h b/include/llvm/Support/SHA1.h index 87fe94bbd5cd..2cfbd2179364 100644 --- a/include/llvm/Support/SHA1.h +++ b/include/llvm/Support/SHA1.h @@ -16,13 +16,13 @@ #define LLVM_SUPPORT_SHA1_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include #include namespace llvm { template class ArrayRef; -class StringRef; /// A class that wrap the SHA1 algorithm. class SHA1 { diff --git a/include/llvm/Support/ScalableSize.h b/include/llvm/Support/ScalableSize.h deleted file mode 100644 index 96bf043773a0..000000000000 --- a/include/llvm/Support/ScalableSize.h +++ /dev/null @@ -1,43 +0,0 @@ -//===- ScalableSize.h - Scalable vector size info ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides a struct that can be used to query the size of IR types -// which may be scalable vectors. It provides convenience operators so that -// it can be used in much the same way as a single scalar value. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_SCALABLESIZE_H -#define LLVM_SUPPORT_SCALABLESIZE_H - -namespace llvm { - -class ElementCount { -public: - unsigned Min; // Minimum number of vector elements. - bool Scalable; // If true, NumElements is a multiple of 'Min' determined - // at runtime rather than compile time. - - ElementCount(unsigned Min, bool Scalable) - : Min(Min), Scalable(Scalable) {} - - ElementCount operator*(unsigned RHS) { - return { Min * RHS, Scalable }; - } - ElementCount operator/(unsigned RHS) { - return { Min / RHS, Scalable }; - } - - bool operator==(const ElementCount& RHS) const { - return Min == RHS.Min && Scalable == RHS.Scalable; - } -}; - -} // end namespace llvm - -#endif // LLVM_SUPPORT_SCALABLESIZE_H diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h index a6b215a24311..a4f1fad22dd5 100644 --- a/include/llvm/Support/Signals.h +++ b/include/llvm/Support/Signals.h @@ -84,6 +84,17 @@ namespace sys { /// function. Note also that the handler may be executed on a different /// thread on some platforms. void SetInfoSignalFunction(void (*Handler)()); + + /// Registers a function to be called when a "pipe" signal is delivered to + /// the process. + /// + /// The "pipe" signal typically indicates a failed write to a pipe (SIGPIPE). + /// The default installed handler calls `exit(EX_IOERR)`, causing the process + /// to immediately exit with an IO error exit code. + /// + /// This function is only applicable on POSIX systems. + void SetPipeSignalFunction(void (*Handler)()); + } // End sys namespace } // End llvm namespace diff --git a/include/llvm/Support/SwapByteOrder.h b/include/llvm/Support/SwapByteOrder.h index 06a447a27c2a..6cec87006c02 100644 --- a/include/llvm/Support/SwapByteOrder.h +++ b/include/llvm/Support/SwapByteOrder.h @@ -22,9 +22,37 @@ #include #endif +#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) +#include +#elif defined(_AIX) +#include +#elif defined(__sun) +/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */ +#include +#define BIG_ENDIAN 4321 +#define LITTLE_ENDIAN 1234 +#if defined(_BIG_ENDIAN) +#define BYTE_ORDER BIG_ENDIAN +#else +#define BYTE_ORDER LITTLE_ENDIAN +#endif +#else +#if !defined(BYTE_ORDER) && !defined(_WIN32) +#include +#endif +#endif + namespace llvm { namespace sys { +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN +constexpr bool IsBigEndianHost = true; +#else +constexpr bool IsBigEndianHost = false; +#endif + +static const bool IsLittleEndianHost = !IsBigEndianHost; + /// SwapByteOrder_16 - This function returns a byte-swapped representation of /// the 16-bit argument. inline uint16_t SwapByteOrder_16(uint16_t value) { @@ -39,10 +67,9 @@ inline uint16_t SwapByteOrder_16(uint16_t value) { #endif } -/// SwapByteOrder_32 - This function returns a byte-swapped representation of -/// the 32-bit argument. +/// This function returns a byte-swapped representation of the 32-bit argument. inline uint32_t SwapByteOrder_32(uint32_t value) { -#if defined(__llvm__) || (LLVM_GNUC_PREREQ(4, 3, 0) && !defined(__ICC)) +#if defined(__llvm__) || (defined(__GNUC__) && !defined(__ICC)) return __builtin_bswap32(value); #elif defined(_MSC_VER) && !defined(_DEBUG) return _byteswap_ulong(value); @@ -55,10 +82,9 @@ inline uint32_t SwapByteOrder_32(uint32_t value) { #endif } -/// SwapByteOrder_64 - This function returns a byte-swapped representation of -/// the 64-bit argument. +/// This function returns a byte-swapped representation of the 64-bit argument. inline uint64_t SwapByteOrder_64(uint64_t value) { -#if defined(__llvm__) || (LLVM_GNUC_PREREQ(4, 3, 0) && !defined(__ICC)) +#if defined(__llvm__) || (defined(__GNUC__) && !defined(__ICC)) return __builtin_bswap64(value); #elif defined(_MSC_VER) && !defined(_DEBUG) return _byteswap_uint64(value); diff --git a/include/llvm/Support/TargetOpcodes.def b/include/llvm/Support/TargetOpcodes.def index 598c1064efd0..11731ac35415 100644 --- a/include/llvm/Support/TargetOpcodes.def +++ b/include/llvm/Support/TargetOpcodes.def @@ -294,9 +294,21 @@ HANDLE_TARGET_OPCODE(G_SEXTLOAD) /// Generic zeroext load HANDLE_TARGET_OPCODE(G_ZEXTLOAD) +/// Generic indexed load (including anyext load) +HANDLE_TARGET_OPCODE(G_INDEXED_LOAD) + +/// Generic indexed signext load +HANDLE_TARGET_OPCODE(G_INDEXED_SEXTLOAD) + +/// Generic indexed zeroext load +HANDLE_TARGET_OPCODE(G_INDEXED_ZEXTLOAD) + /// Generic store. HANDLE_TARGET_OPCODE(G_STORE) +/// Generic indexed store. +HANDLE_TARGET_OPCODE(G_INDEXED_STORE) + /// Generic atomic cmpxchg with internal success check. HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS) @@ -315,6 +327,8 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_MAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN) HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FADD) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB) // Generic atomic fence HANDLE_TARGET_OPCODE(G_FENCE) @@ -354,6 +368,7 @@ HANDLE_TARGET_OPCODE(G_VAARG) // Generic sign extend HANDLE_TARGET_OPCODE(G_SEXT) +HANDLE_TARGET_OPCODE(G_SEXT_INREG) // Generic zero extend HANDLE_TARGET_OPCODE(G_ZEXT) @@ -436,6 +451,9 @@ HANDLE_TARGET_OPCODE(G_FMUL) /// Generic FMA multiplication. Behaves like llvm fma intrinsic HANDLE_TARGET_OPCODE(G_FMA) +/// Generic FP multiply and add. Behaves as separate fmul and fadd. +HANDLE_TARGET_OPCODE(G_FMAD) + /// Generic FP division. HANDLE_TARGET_OPCODE(G_FDIV) @@ -557,6 +575,9 @@ HANDLE_TARGET_OPCODE(G_CTPOP) /// Generic byte swap. HANDLE_TARGET_OPCODE(G_BSWAP) +/// Generic bit reverse. +HANDLE_TARGET_OPCODE(G_BITREVERSE) + /// Floating point ceil. HANDLE_TARGET_OPCODE(G_FCEIL) @@ -587,12 +608,15 @@ HANDLE_TARGET_OPCODE(G_BLOCK_ADDR) /// Generic jump table address HANDLE_TARGET_OPCODE(G_JUMP_TABLE) +/// Generic dynamic stack allocation. +HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC) + // TODO: Add more generic opcodes as we move along. /// Marker for the end of the generic opcode. /// This is used to check if an opcode is in the range of the /// generic opcodes. -HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_JUMP_TABLE) +HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_DYN_STACKALLOC) /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific post-isel opcode values start here. diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index bf75650760d0..f4bc26b858c8 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -510,8 +510,8 @@ public: std::move(Emitter), RelaxAll); break; case Triple::XCOFF: - S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter), RelaxAll); break; } if (ObjectTargetStreamerCtorFn) diff --git a/include/llvm/Support/TimeProfiler.h b/include/llvm/Support/TimeProfiler.h index 72b6f7180bde..8cc430d0bc72 100644 --- a/include/llvm/Support/TimeProfiler.h +++ b/include/llvm/Support/TimeProfiler.h @@ -19,7 +19,7 @@ extern TimeTraceProfiler *TimeTraceProfilerInstance; /// Initialize the time trace profiler. /// This sets up the global \p TimeTraceProfilerInstance /// variable to be the profiler instance. -void timeTraceProfilerInitialize(); +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity); /// Cleanup the time trace profiler, if it was initialized. void timeTraceProfilerCleanup(); diff --git a/include/llvm/Support/TrailingObjects.h b/include/llvm/Support/TrailingObjects.h index 8cf4f7aed7f8..49be89613c43 100644 --- a/include/llvm/Support/TrailingObjects.h +++ b/include/llvm/Support/TrailingObjects.h @@ -47,6 +47,7 @@ #define LLVM_SUPPORT_TRAILINGOBJECTS_H #include "llvm/Support/AlignOf.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/type_traits.h" @@ -87,11 +88,6 @@ protected: template struct OverloadToken {}; }; -/// This helper template works-around MSVC 2013's lack of useful -/// alignas() support. The argument to alignas(), in MSVC, is -/// required to be a literal integer. But, you *can* use template -/// specialization to select between a bunch of different alignas() -/// expressions... template class TrailingObjectsAligner : public TrailingObjectsBase {}; template <> @@ -172,7 +168,7 @@ protected: if (requiresRealignment()) return reinterpret_cast( - llvm::alignAddr(Ptr, alignof(NextTy))); + alignAddr(Ptr, Align::Of())); else return reinterpret_cast(Ptr); } @@ -186,7 +182,7 @@ protected: Obj, TrailingObjectsBase::OverloadToken()); if (requiresRealignment()) - return reinterpret_cast(llvm::alignAddr(Ptr, alignof(NextTy))); + return reinterpret_cast(alignAddr(Ptr, Align::Of())); else return reinterpret_cast(Ptr); } @@ -254,9 +250,7 @@ class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl< // because BaseTy isn't complete at class instantiation time, but // will be by the time this function is instantiated. static void verifyTrailingObjectsAssertions() { -#ifdef LLVM_IS_FINAL - static_assert(LLVM_IS_FINAL(BaseTy), "BaseTy must be final."); -#endif + static_assert(std::is_final(), "BaseTy must be final."); } // These two methods are the base of the recursion for this method. @@ -369,7 +363,9 @@ public: template struct FixedSizeStorage { template struct with_counts { enum { Size = totalSizeToAlloc(Counts...) }; - typedef llvm::AlignedCharArray type; + struct type { + alignas(BaseTy) char buffer[Size]; + }; }; }; diff --git a/include/llvm/Support/TypeSize.h b/include/llvm/Support/TypeSize.h new file mode 100644 index 000000000000..711679cdcacb --- /dev/null +++ b/include/llvm/Support/TypeSize.h @@ -0,0 +1,201 @@ +//===- TypeSize.h - Wrapper around type sizes -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a struct that can be used to query the size of IR types +// which may be scalable vectors. It provides convenience operators so that +// it can be used in much the same way as a single scalar value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_TYPESIZE_H +#define LLVM_SUPPORT_TYPESIZE_H + +#include +#include + +namespace llvm { + +class ElementCount { +public: + unsigned Min; // Minimum number of vector elements. + bool Scalable; // If true, NumElements is a multiple of 'Min' determined + // at runtime rather than compile time. + + ElementCount(unsigned Min, bool Scalable) + : Min(Min), Scalable(Scalable) {} + + ElementCount operator*(unsigned RHS) { + return { Min * RHS, Scalable }; + } + ElementCount operator/(unsigned RHS) { + return { Min / RHS, Scalable }; + } + + bool operator==(const ElementCount& RHS) const { + return Min == RHS.Min && Scalable == RHS.Scalable; + } + bool operator!=(const ElementCount& RHS) const { + return !(*this == RHS); + } +}; + +// This class is used to represent the size of types. If the type is of fixed +// size, it will represent the exact size. If the type is a scalable vector, +// it will represent the known minimum size. +class TypeSize { + uint64_t MinSize; // The known minimum size. + bool IsScalable; // If true, then the runtime size is an integer multiple + // of MinSize. + +public: + constexpr TypeSize(uint64_t MinSize, bool Scalable) + : MinSize(MinSize), IsScalable(Scalable) {} + + static constexpr TypeSize Fixed(uint64_t Size) { + return TypeSize(Size, /*IsScalable=*/false); + } + + static constexpr TypeSize Scalable(uint64_t MinSize) { + return TypeSize(MinSize, /*IsScalable=*/true); + } + + // Scalable vector types with the same minimum size as a fixed size type are + // not guaranteed to be the same size at runtime, so they are never + // considered to be equal. + friend bool operator==(const TypeSize &LHS, const TypeSize &RHS) { + return std::tie(LHS.MinSize, LHS.IsScalable) == + std::tie(RHS.MinSize, RHS.IsScalable); + } + + friend bool operator!=(const TypeSize &LHS, const TypeSize &RHS) { + return !(LHS == RHS); + } + + // For many cases, size ordering between scalable and fixed size types cannot + // be determined at compile time, so such comparisons aren't allowed. + // + // e.g. could be bigger than <4 x i32> with a runtime + // vscale >= 5, equal sized with a vscale of 4, and smaller with + // a vscale <= 3. + // + // If the scalable flags match, just perform the requested comparison + // between the minimum sizes. + friend bool operator<(const TypeSize &LHS, const TypeSize &RHS) { + assert(LHS.IsScalable == RHS.IsScalable && + "Ordering comparison of scalable and fixed types"); + + return LHS.MinSize < RHS.MinSize; + } + + friend bool operator>(const TypeSize &LHS, const TypeSize &RHS) { + return RHS < LHS; + } + + friend bool operator<=(const TypeSize &LHS, const TypeSize &RHS) { + return !(RHS < LHS); + } + + friend bool operator>=(const TypeSize &LHS, const TypeSize& RHS) { + return !(LHS < RHS); + } + + // Convenience operators to obtain relative sizes independently of + // the scalable flag. + TypeSize operator*(unsigned RHS) const { + return { MinSize * RHS, IsScalable }; + } + + friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + TypeSize operator/(unsigned RHS) const { + return { MinSize / RHS, IsScalable }; + } + + // Return the minimum size with the assumption that the size is exact. + // Use in places where a scalable size doesn't make sense (e.g. non-vector + // types, or vectors in backends which don't support scalable vectors). + uint64_t getFixedSize() const { + assert(!IsScalable && "Request for a fixed size on a scalable object"); + return MinSize; + } + + // Return the known minimum size. Use in places where the scalable property + // doesn't matter (e.g. determining alignment) or in conjunction with the + // isScalable method below. + uint64_t getKnownMinSize() const { + return MinSize; + } + + // Return whether or not the size is scalable. + bool isScalable() const { + return IsScalable; + } + + // Casts to a uint64_t if this is a fixed-width size. + // + // NOTE: This interface is obsolete and will be removed in a future version + // of LLVM in favour of calling getFixedSize() directly. + operator uint64_t() const { + return getFixedSize(); + } + + // Additional convenience operators needed to avoid ambiguous parses. + // TODO: Make uint64_t the default operator? + TypeSize operator*(uint64_t RHS) const { + return { MinSize * RHS, IsScalable }; + } + + TypeSize operator*(int RHS) const { + return { MinSize * RHS, IsScalable }; + } + + TypeSize operator*(int64_t RHS) const { + return { MinSize * RHS, IsScalable }; + } + + friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + friend TypeSize operator*(const int LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + TypeSize operator/(uint64_t RHS) const { + return { MinSize / RHS, IsScalable }; + } + + TypeSize operator/(int RHS) const { + return { MinSize / RHS, IsScalable }; + } + + TypeSize operator/(int64_t RHS) const { + return { MinSize / RHS, IsScalable }; + } +}; + +/// Returns a TypeSize with a known minimum size that is the next integer +/// (mod 2**64) that is greater than or equal to \p Value and is a multiple +/// of \p Align. \p Align must be non-zero. +/// +/// Similar to the alignTo functions in MathExtras.h +inline TypeSize alignTo(TypeSize Size, uint64_t Align) { + assert(Align != 0u && "Align must be non-zero"); + return {(Size.getKnownMinSize() + Align - 1) / Align * Align, + Size.isScalable()}; +} + +} // end namespace llvm + +#endif // LLVM_SUPPORT_TypeSize_H diff --git a/include/llvm/Support/UnicodeCharRanges.h b/include/llvm/Support/UnicodeCharRanges.h index 4b59f8a92b76..73d3603b74df 100644 --- a/include/llvm/Support/UnicodeCharRanges.h +++ b/include/llvm/Support/UnicodeCharRanges.h @@ -9,11 +9,8 @@ #define LLVM_SUPPORT_UNICODECHARRANGES_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/include/llvm/Support/UniqueLock.h b/include/llvm/Support/UniqueLock.h deleted file mode 100644 index 0a887ad5965d..000000000000 --- a/include/llvm/Support/UniqueLock.h +++ /dev/null @@ -1,68 +0,0 @@ -//===- Support/UniqueLock.h - Acquire/Release Mutex In Scope ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a guard for a block of code that ensures a Mutex is locked -// upon construction and released upon destruction. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_UNIQUE_LOCK_H -#define LLVM_SUPPORT_UNIQUE_LOCK_H - -#include - -namespace llvm { - - /// A pared-down imitation of std::unique_lock from C++11. Contrary to the - /// name, it's really more of a wrapper for a lock. It may or may not have - /// an associated mutex, which is guaranteed to be locked upon creation - /// and unlocked after destruction. unique_lock can also unlock the mutex - /// and re-lock it freely during its lifetime. - /// Guard a section of code with a mutex. - template - class unique_lock { - MutexT *M = nullptr; - bool locked = false; - - public: - unique_lock() = default; - explicit unique_lock(MutexT &m) : M(&m), locked(true) { M->lock(); } - unique_lock(const unique_lock &) = delete; - unique_lock &operator=(const unique_lock &) = delete; - - void operator=(unique_lock &&o) { - if (owns_lock()) - M->unlock(); - M = o.M; - locked = o.locked; - o.M = nullptr; - o.locked = false; - } - - ~unique_lock() { if (owns_lock()) M->unlock(); } - - void lock() { - assert(!locked && "mutex already locked!"); - assert(M && "no associated mutex!"); - M->lock(); - locked = true; - } - - void unlock() { - assert(locked && "unlocking a mutex that isn't locked!"); - assert(M && "no associated mutex!"); - M->unlock(); - locked = false; - } - - bool owns_lock() { return locked; } - }; - -} // end namespace llvm - -#endif // LLVM_SUPPORT_UNIQUE_LOCK_H diff --git a/include/llvm/Support/VirtualFileSystem.h b/include/llvm/Support/VirtualFileSystem.h index 31c9e851daed..c844d9d194f0 100644 --- a/include/llvm/Support/VirtualFileSystem.h +++ b/include/llvm/Support/VirtualFileSystem.h @@ -647,9 +647,19 @@ private: friend class VFSFromYamlDirIterImpl; friend class RedirectingFileSystemParser; + bool shouldUseExternalFS() const { + return ExternalFSValidWD && IsFallthrough; + } + /// The root(s) of the virtual file system. std::vector> Roots; + /// The current working directory of the file system. + std::string WorkingDirectory; + + /// Whether the current working directory is valid for the external FS. + bool ExternalFSValidWD = false; + /// The file system to use for external references. IntrusiveRefCntPtr ExternalFS; @@ -689,8 +699,7 @@ private: true; #endif - RedirectingFileSystem(IntrusiveRefCntPtr ExternalFS) - : ExternalFS(std::move(ExternalFS)) {} + RedirectingFileSystem(IntrusiveRefCntPtr ExternalFS); /// Looks up the path [Start, End) in \p From, possibly /// recursing into the contents of \p From if it is a directory. @@ -730,9 +739,10 @@ public: StringRef getExternalContentsPrefixDir() const; + void dump(raw_ostream &OS) const; + void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const; - LLVM_DUMP_METHOD void dumpEntry(Entry *E, int NumSpaces = 0) const; #endif }; diff --git a/include/llvm/Support/Win64EH.h b/include/llvm/Support/Win64EH.h index bdd23b41594e..8220131e5be9 100644 --- a/include/llvm/Support/Win64EH.h +++ b/include/llvm/Support/Win64EH.h @@ -30,7 +30,9 @@ enum UnwindOpcodes { UOP_SetFPReg, UOP_SaveNonVol, UOP_SaveNonVolBig, - UOP_SaveXMM128 = 8, + UOP_Epilog, + UOP_SpareCode, + UOP_SaveXMM128, UOP_SaveXMM128Big, UOP_PushMachFrame, // The following set of unwind opcodes is for ARM64. They are documented at diff --git a/include/llvm/Support/X86TargetParser.def b/include/llvm/Support/X86TargetParser.def index 1749be3b3ae2..4ebf2d79cb8d 100644 --- a/include/llvm/Support/X86TargetParser.def +++ b/include/llvm/Support/X86TargetParser.def @@ -112,6 +112,7 @@ X86_CPU_SUBTYPE ("k6-2", AMDPENTIUM_K62) X86_CPU_SUBTYPE ("k6-3", AMDPENTIUM_K63) X86_CPU_SUBTYPE ("geode", AMDPENTIUM_GEODE) X86_CPU_SUBTYPE ("cooperlake", INTEL_COREI7_COOPERLAKE) +X86_CPU_SUBTYPE ("tigerlake", INTEL_COREI7_TIGERLAKE) #undef X86_CPU_SUBTYPE_COMPAT #undef X86_CPU_SUBTYPE @@ -160,12 +161,13 @@ X86_FEATURE_COMPAT(32, FEATURE_GFNI, "gfni") X86_FEATURE_COMPAT(33, FEATURE_VPCLMULQDQ, "vpclmulqdq") X86_FEATURE_COMPAT(34, FEATURE_AVX512VNNI, "avx512vnni") X86_FEATURE_COMPAT(35, FEATURE_AVX512BITALG, "avx512bitalg") +X86_FEATURE_COMPAT(36, FEATURE_AVX512BF16, "avx512bf16") // Features below here are not in libgcc/compiler-rt. X86_FEATURE (64, FEATURE_MOVBE) X86_FEATURE (65, FEATURE_ADX) X86_FEATURE (66, FEATURE_EM64T) X86_FEATURE (67, FEATURE_CLFLUSHOPT) X86_FEATURE (68, FEATURE_SHA) -X86_FEATURE (69, FEATURE_AVX512BF16) +X86_FEATURE (69, FEATURE_AVX512VP2INTERSECT) #undef X86_FEATURE_COMPAT #undef X86_FEATURE diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h index 5181dc56d81d..a3bfa7dc4678 100644 --- a/include/llvm/Support/YAMLTraits.h +++ b/include/llvm/Support/YAMLTraits.h @@ -649,7 +649,8 @@ inline bool isBool(StringRef S) { inline QuotingType needsQuotes(StringRef S) { if (S.empty()) return QuotingType::Single; - if (isspace(S.front()) || isspace(S.back())) + if (isspace(static_cast(S.front())) || + isspace(static_cast(S.back()))) return QuotingType::Single; if (isNull(S)) return QuotingType::Single; @@ -748,7 +749,7 @@ public: IO(void *Ctxt = nullptr); virtual ~IO(); - virtual bool outputting() = 0; + virtual bool outputting() const = 0; virtual unsigned beginSequence() = 0; virtual bool preflightElement(unsigned, void *&) = 0; @@ -842,7 +843,7 @@ public: Val = Val | ConstVal; } - void *getContext(); + void *getContext() const; void setContext(void *); template void mapRequired(const char *Key, T &Val) { @@ -1402,7 +1403,7 @@ public: std::error_code error(); private: - bool outputting() override; + bool outputting() const override; bool mapTag(StringRef, bool) override; void beginMapping() override; void endMapping() override; @@ -1549,7 +1550,7 @@ public: /// anyway. void setWriteDefaultValues(bool Write) { WriteDefaultValues = Write; } - bool outputting() override; + bool outputting() const override; bool mapTag(StringRef, bool) override; void beginMapping() override; void endMapping() override; diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h index 4ecdb17376f1..a72acd4fe002 100644 --- a/include/llvm/Support/circular_raw_ostream.h +++ b/include/llvm/Support/circular_raw_ostream.h @@ -122,6 +122,10 @@ namespace llvm { delete[] BufferArray; } + bool is_displayed() const override { + return TheStream->is_displayed(); + } + /// setStream - Tell the circular_raw_ostream to output a /// different stream. "Owns" tells circular_raw_ostream whether /// it should take responsibility for managing the underlying diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index 48bb623b0638..0debc5da7a68 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -72,7 +72,7 @@ private: public: // color order matches ANSI escape sequence, don't change - enum Colors { + enum class Colors { BLACK = 0, RED, GREEN, @@ -81,9 +81,21 @@ public: MAGENTA, CYAN, WHITE, - SAVEDCOLOR + SAVEDCOLOR, + RESET, }; + static const Colors BLACK = Colors::BLACK; + static const Colors RED = Colors::RED; + static const Colors GREEN = Colors::GREEN; + static const Colors YELLOW = Colors::YELLOW; + static const Colors BLUE = Colors::BLUE; + static const Colors MAGENTA = Colors::MAGENTA; + static const Colors CYAN = Colors::CYAN; + static const Colors WHITE = Colors::WHITE; + static const Colors SAVEDCOLOR = Colors::SAVEDCOLOR; + static const Colors RESET = Colors::RESET; + explicit raw_ostream(bool unbuffered = false) : BufferMode(unbuffered ? Unbuffered : InternalBuffer) { // Start out ready to flush. @@ -214,6 +226,9 @@ public: /// Output \p N in hexadecimal, without any prefix or padding. raw_ostream &write_hex(unsigned long long N); + // Change the foreground color of text. + raw_ostream &operator<<(Colors C); + /// Output a formatted UUID with dash separators. using uuid_t = uint8_t[16]; raw_ostream &write_uuid(const uuid_t UUID); @@ -277,6 +292,10 @@ public: /// This function determines if this stream is displayed and supports colors. virtual bool has_colors() const { return is_displayed(); } + // Enable or disable colors. Once disable_colors() is called, + // changeColor() has no effect until enable_colors() is called. + virtual void enable_colors(bool /*enable*/) {} + //===--------------------------------------------------------------------===// // Subclass Interface //===--------------------------------------------------------------------===// @@ -365,8 +384,8 @@ public: class raw_fd_ostream : public raw_pwrite_stream { int FD; bool ShouldClose; - bool SupportsSeeking; + bool ColorEnabled = true; #ifdef _WIN32 /// True if this fd refers to a Windows console device. Mintty and other @@ -442,6 +461,8 @@ public: bool has_colors() const override; + void enable_colors(bool enable) override { ColorEnabled = enable; } + std::error_code error() const { return EC; } /// Return the value of the flag in this raw_fd_ostream indicating whether an diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h index c8c6a76a90f1..b7d48e8e1ade 100644 --- a/include/llvm/Support/type_traits.h +++ b/include/llvm/Support/type_traits.h @@ -17,11 +17,6 @@ #include #include -#ifndef __has_feature -#define LLVM_DEFINED_HAS_FEATURE -#define __has_feature(x) 0 -#endif - namespace llvm { @@ -194,17 +189,4 @@ class is_trivially_copyable : public std::true_type { } // end namespace llvm -// If the compiler supports detecting whether a class is final, define -// an LLVM_IS_FINAL macro. If it cannot be defined properly, this -// macro will be left undefined. -#if __cplusplus >= 201402L || defined(_MSC_VER) -#define LLVM_IS_FINAL(Ty) std::is_final() -#elif __has_feature(is_final) || LLVM_GNUC_PREREQ(4, 7, 0) -#define LLVM_IS_FINAL(Ty) __is_final(Ty) -#endif - -#ifdef LLVM_DEFINED_HAS_FEATURE -#undef __has_feature -#endif - #endif // LLVM_SUPPORT_TYPE_TRAITS_H diff --git a/include/llvm/TableGen/Automaton.td b/include/llvm/TableGen/Automaton.td new file mode 100644 index 000000000000..13ced2a0e784 --- /dev/null +++ b/include/llvm/TableGen/Automaton.td @@ -0,0 +1,95 @@ +//===- Automaton.td ----------------------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the key top-level classes needed to produce a reasonably +// generic finite-state automaton. +// +//===----------------------------------------------------------------------===// + +// Define a record inheriting from GenericAutomaton to generate a reasonably +// generic finite-state automaton over a set of actions and states. +// +// This automaton is defined by: +// 1) a state space (explicit, always bits<32>). +// 2) a set of input symbols (actions, explicit) and +// 3) a transition function from state + action -> state. +// +// A theoretical automaton is defined by : +// Q: A set of possible states. +// S: (sigma) The input alphabet. +// d: (delta) The transition function f(q in Q, s in S) -> q' in Q. +// F: The set of final (accepting) states. +// +// Because generating all possible states is tedious, we instead define the +// transition function only and crawl all reachable states starting from the +// initial state with all inputs under all transitions until termination. +// +// We define F = S, that is, all valid states are accepting. +// +// To ensure the generation of the automaton terminates, the state transitions +// are defined as a lattice (meaning every transitioned-to state is more +// specific than the transitioned-from state, for some definition of specificity). +// Concretely a transition may set one or more bits in the state that were +// previously zero to one. If any bit was not zero, the transition is invalid. +// +// Instead of defining all possible states (which would be cumbersome), the user +// provides a set of possible Transitions from state A, consuming an input +// symbol A to state B. The Transition object transforms state A to state B and +// acts as a predicate. This means the state space can be discovered by crawling +// all the possible transitions until none are valid. +// +// This automaton is considered to be nondeterministic, meaning that multiple +// transitions can occur from any (state, action) pair. The generated automaton +// is determinized, meaning that is executes in O(k) time where k is the input +// sequence length. +// +// In addition to a generated automaton that determines if a sequence of inputs +// is accepted or not, a table is emitted that allows determining a plausible +// sequence of states traversed to accept that input. +class GenericAutomaton { + // Name of a class that inherits from Transition. All records inheriting from + // this class will be considered when constructing the automaton. + string TransitionClass; + + // Names of fields within TransitionClass that define the action symbol. This + // defines the action as an N-tuple. + // + // Each symbol field can be of class, int, string or code type. + // If the type of a field is a class, the Record's name is used verbatim + // in C++ and the class name is used as the C++ type name. + // If the type of a field is a string, code or int, that is also used + // verbatim in C++. + // + // To override the C++ type name for field F, define a field called TypeOf_F. + // This should be a string that will be used verbatim in C++. + // + // As an example, to define a 2-tuple with an enum and a string, one might: + // def MyTransition : Transition { + // MyEnum S1; + // int S2; + // } + // def MyAutomaton : GenericAutomaton }{ + // let TransitionClass = "Transition"; + // let SymbolFields = ["S1", "S2"]; + // let TypeOf_S1 = "MyEnumInCxxKind"; + // } + list SymbolFields; +} + +// All transitions inherit from Transition. +class Transition { + // A transition S' = T(S) is valid if, for every set bit in NewState, the + // corresponding bit in S is clear. That is: + // def T(S): + // S' = S | NewState + // return S' if S' != S else Failure + // + // The automaton generator uses this property to crawl the set of possible + // transitions from a starting state of 0b0. + bits<32> NewState; +} diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h index 7c83b6298620..cf990427f577 100644 --- a/include/llvm/TableGen/Error.h +++ b/include/llvm/TableGen/Error.h @@ -18,6 +18,7 @@ namespace llvm { +void PrintNote(const Twine &Msg); void PrintNote(ArrayRef NoteLoc, const Twine &Msg); void PrintWarning(ArrayRef WarningLoc, const Twine &Msg); diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index bf7f02208c28..73ed342a6101 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -1263,7 +1263,14 @@ class FieldInit : public TypedInit { FieldInit(Init *R, StringInit *FN) : TypedInit(IK_FieldInit, R->getFieldType(FN)), Rec(R), FieldName(FN) { - assert(getType() && "FieldInit with non-record type!"); +#ifndef NDEBUG + if (!getType()) { + llvm::errs() << "In Record = " << Rec->getAsString() + << ", got FieldName = " << *FieldName + << " with non-record type!\n"; + llvm_unreachable("FieldInit with non-record type!"); + } +#endif } public: @@ -1323,6 +1330,7 @@ public: void Profile(FoldingSetNodeID &ID) const; Init *getOperator() const { return Val; } + Record *getOperatorAsDef(ArrayRef Loc) const; StringInit *getName() const { return ValName; } @@ -1680,10 +1688,10 @@ raw_ostream &operator<<(raw_ostream &OS, const Record &R); class RecordKeeper { friend class RecordRecTy; - using RecordMap = std::map>; + using RecordMap = std::map, std::less<>>; RecordMap Classes, Defs; FoldingSet RecordTypePool; - std::map ExtraGlobals; + std::map> ExtraGlobals; unsigned AnonCounter = 0; public: diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td index 45718327b4a7..4b49dfd4dd18 100644 --- a/include/llvm/Target/GenericOpcodes.td +++ b/include/llvm/Target/GenericOpcodes.td @@ -15,7 +15,9 @@ // Unary ops. //------------------------------------------------------------------------------ -class GenericInstruction : StandardPseudoInstruction; +class GenericInstruction : StandardPseudoInstruction { + let isPreISelOpcode = 1; +} // Extend the underlying scalar type of an operation, leaving the high bits // unspecified. @@ -33,6 +35,20 @@ def G_SEXT : GenericInstruction { let hasSideEffects = 0; } +// Sign extend the a value from an arbitrary bit position, copying the sign bit +// into all bits above it. This is equivalent to a shl + ashr pair with an +// appropriate shift amount. $sz is an immediate (MachineOperand::isImm() +// returns true) to allow targets to have some bitwidths legal and others +// lowered. This opcode is particularly useful if the target has sign-extension +// instructions that are cheaper than the constituent shifts as the optimizer is +// able to make decisions on whether it's better to hang on to the G_SEXT_INREG +// or to lower it and optimize the individual shifts. +def G_SEXT_INREG : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src, untyped_imm_0:$sz); + let hasSideEffects = 0; +} + // Zero extend the underlying scalar type of an operation, putting zero bits // into the newly-created space. def G_ZEXT : GenericInstruction { @@ -157,6 +173,12 @@ def G_BSWAP : GenericInstruction { let hasSideEffects = 0; } +def G_BITREVERSE : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = 0; +} + def G_ADDRSPACE_CAST : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); @@ -175,6 +197,12 @@ def G_JUMP_TABLE : GenericInstruction { let hasSideEffects = 0; } +def G_DYN_STACKALLOC : GenericInstruction { + let OutOperandList = (outs ptype0:$dst); + let InOperandList = (ins type1:$size, i32imm:$align); + let hasSideEffects = 1; +} + //------------------------------------------------------------------------------ // Binary ops. //------------------------------------------------------------------------------ @@ -598,6 +626,15 @@ def G_FMA : GenericInstruction { let isCommutable = 0; } +/// Generic FP multiply and add. Perform a * b + c, while getting the +/// same result as the separately rounded operations, unlike G_FMA. +def G_FMAD : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; + let isCommutable = 0; +} + // Generic FP division. def G_FDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); @@ -725,7 +762,11 @@ def G_INTRINSIC_ROUND : GenericInstruction { // Memory ops //------------------------------------------------------------------------------ -// Generic load. Expects a MachineMemOperand in addition to explicit operands. +// Generic load. Expects a MachineMemOperand in addition to explicit +// operands. If the result size is larger than the memory size, the +// high bits are undefined. If the result is a vector type and larger +// than the memory size, the high elements are undefined (i.e. this is +// not a per-element, vector anyextload) def G_LOAD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins ptype1:$addr); @@ -749,6 +790,32 @@ def G_ZEXTLOAD : GenericInstruction { let mayLoad = 1; } +// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset. +// If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed) +// then the value is loaded from $newaddr. +def G_INDEXED_LOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD. +def G_INDEXED_SEXTLOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD. +def G_INDEXED_ZEXTLOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + // Generic store. Expects a MachineMemOperand in addition to explicit operands. def G_STORE : GenericInstruction { let OutOperandList = (outs); @@ -757,6 +824,15 @@ def G_STORE : GenericInstruction { let mayStore = 1; } +// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour. +def G_INDEXED_STORE : GenericInstruction { + let OutOperandList = (outs ptype0:$newaddr); + let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset, + unknown:$am); + let hasSideEffects = 0; + let mayStore = 1; +} + // Generic atomic cmpxchg with internal success check. Expects a // MachineMemOperand in addition to explicit operands. def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction { @@ -798,6 +874,8 @@ def G_ATOMICRMW_MAX : G_ATOMICRMW_OP; def G_ATOMICRMW_MIN : G_ATOMICRMW_OP; def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP; def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP; +def G_ATOMICRMW_FADD : G_ATOMICRMW_OP; +def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; def G_FENCE : GenericInstruction { let OutOperandList = (outs); @@ -947,9 +1025,12 @@ def G_EXTRACT_VECTOR_ELT : GenericInstruction { } // Generic shufflevector. +// +// The mask operand should be an IR Constant which exactly matches the +// corresponding mask for the IR shufflevector instruction. def G_SHUFFLE_VECTOR: GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type1:$v1, type1:$v2, type2:$mask); + let InOperandList = (ins type1:$v1, type1:$v2, unknown:$mask); let hasSideEffects = 0; } diff --git a/include/llvm/Target/GlobalISel/Combine.td b/include/llvm/Target/GlobalISel/Combine.td new file mode 100644 index 000000000000..dcac399fd693 --- /dev/null +++ b/include/llvm/Target/GlobalISel/Combine.td @@ -0,0 +1,103 @@ +//===- Combine.td - Combine rule definitions ---------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declare GlobalISel combine rules and provide mechanisms to opt-out. +// +//===----------------------------------------------------------------------===// + +// Common base class for GICombineRule and GICombineGroup. +class GICombine { + // See GICombineGroup. We only declare it here to make the tablegen pass + // simpler. + list Rules = ?; +} + +// A group of combine rules that can be added to a GICombiner or another group. +class GICombineGroup rules> : GICombine { + // The rules contained in this group. The rules in a group are flattened into + // a single list and sorted into whatever order is most efficient. However, + // they will never be re-ordered such that behaviour differs from the + // specified order. It is therefore possible to use the order of rules in this + // list to describe priorities. + let Rules = rules; +} + +// Declares a combiner helper class +class GICombinerHelper rules> + : GICombineGroup { + // The class name to use in the generated output. + string Classname = classname; + // The name of a run-time compiler option that will be generated to disable + // specific rules within this combiner. + string DisableRuleOption = ?; +} +class GICombineRule : GICombine { + /// Defines the external interface of the match rule. This includes: + /// * The names of the root nodes (requires at least one) + /// See GIDefKind for details. + dag Defs = defs; + + /// Defines the things which must be true for the pattern to match + /// See GIMatchKind for details. + dag Match = match; + + /// Defines the things which happen after the decision is made to apply a + /// combine rule. + /// See GIApplyKind for details. + dag Apply = apply; +} + +/// The operator at the root of a GICombineRule.Defs dag. +def defs; + +/// All arguments of the defs operator must be subclasses of GIDefKind or +/// sub-dags whose operator is GIDefKindWithArgs. +class GIDefKind; +class GIDefKindWithArgs; +/// Declare a root node. There must be at least one of these in every combine +/// rule. +/// TODO: The plan is to elide `root` definitions and determine it from the DAG +/// itself with an overide for situations where the usual determination +/// is incorrect. +def root : GIDefKind; + +/// The operator at the root of a GICombineRule.Match dag. +def match; +/// All arguments of the match operator must be either: +/// * A subclass of GIMatchKind +/// * A subclass of GIMatchKindWithArgs +/// * A MIR code block (deprecated) +/// The GIMatchKind and GIMatchKindWithArgs cases are described in more detail +/// in their definitions below. +/// For the Instruction case, these are collected into a DAG where operand names +/// that occur multiple times introduce edges. +class GIMatchKind; +class GIMatchKindWithArgs; + +/// The operator at the root of a GICombineRule.Apply dag. +def apply; +/// All arguments of the apply operator must be subclasses of GIApplyKind, or +/// sub-dags whose operator is GIApplyKindWithArgs, or an MIR block +/// (deprecated). +class GIApplyKind; +class GIApplyKindWithArgs; + +def copy_prop : GICombineRule< + (defs root:$d), + (match [{ return Helper.matchCombineCopy(${d}); }]), + (apply [{ Helper.applyCombineCopy(${d}); }])>; +def trivial_combines : GICombineGroup<[copy_prop]>; + +// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of +// all_combines because it wasn't there. +def elide_br_by_inverting_cond : GICombineRule< + (defs root:$d), + (match [{ return Helper.matchElideBrByInvertingCond(${d}); }]), + (apply [{ Helper.applyElideBrByInvertingCond(${d}); }])>; + +def all_combines : GICombineGroup<[trivial_combines]>; diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 6cc58d6521da..b846d2252b8d 100644 --- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -27,6 +27,7 @@ class GINodeEquiv { // (ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE) but GlobalISel // stores this information in the MachineMemoryOperand. bit CheckMMOIsNonAtomic = 0; + bit CheckMMOIsAtomic = 0; // SelectionDAG has one node for all loads and uses predicates to // differentiate them. GlobalISel on the other hand uses separate opcodes. @@ -34,6 +35,10 @@ class GINodeEquiv { // depending on the predicates on the node. Instruction IfSignExtend = ?; Instruction IfZeroExtend = ?; + + // SelectionDAG has one setcc for all compares. This differentiates + // for G_ICMP and G_FCMP. + Instruction IfFloatingPoint = ?; } // These are defined in the same order as the G_* instructions. @@ -46,6 +51,7 @@ def : GINodeEquiv; // G_PTRTOINT - SelectionDAG has no equivalent. def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -72,6 +78,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -85,6 +92,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -100,10 +108,15 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some // complications that tablegen must take care of. For example, Predicates such @@ -117,6 +130,11 @@ def : GINodeEquiv { let IfSignExtend = G_SEXTLOAD; let IfZeroExtend = G_ZEXTLOAD; } + +def : GINodeEquiv { + let IfFloatingPoint = G_FCMP; +} + // Broadly speaking G_STORE is equivalent to ISD::STORE but there are some // complications that tablegen must take care of. For example, predicates such // as isTruncStore require that this is not a perfect 1:1 mapping since a @@ -126,6 +144,11 @@ def : GINodeEquiv { // G_STORE with a non-atomic MachineMemOperand. def : GINodeEquiv { let CheckMMOIsNonAtomic = 1; } +def : GINodeEquiv { + let CheckMMOIsNonAtomic = 0; + let CheckMMOIsAtomic = 1; +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -138,6 +161,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index d58662e128e0..dd8679661b9a 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -351,7 +351,11 @@ def interleave; // RegisterTuples instances can be used in other set operations to form // register classes and so on. This is the only way of using the generated // registers. -class RegisterTuples Indices, list Regs> { +// +// RegNames may be specified to supply asm names for the generated tuples. +// If used must have the same size as the list of produced registers. +class RegisterTuples Indices, list Regs, + list RegNames = []> { // SubRegs - N lists of registers to be zipped up. Super-registers are // synthesized from the first element of each SubRegs list, the second // element and so on. @@ -360,6 +364,9 @@ class RegisterTuples Indices, list Regs> { // SubRegIndices - N SubRegIndex instances. This provides the names of the // sub-registers in the synthesized super-registers. list SubRegIndices = Indices; + + // List of asm names for the generated tuple registers. + list RegAsmNames = RegNames; } @@ -436,6 +443,15 @@ class InstructionEncoding { bit hasCompleteDecoder = 1; } +// Allows specifying an InstructionEncoding by HwMode. If an Instruction specifies +// an EncodingByHwMode, its Inst and Size members are ignored and Ts are used +// to encode and decode based on HwMode. +class EncodingByHwMode Ms = [], list Ts = []> + : HwModeSelect { + // The length of this list must be the same as the length of Ms. + list Objects = Ts; +} + //===----------------------------------------------------------------------===// // Instruction set description - These classes correspond to the C++ classes in // the Target/TargetInstrInfo.h file. @@ -447,6 +463,10 @@ class Instruction : InstructionEncoding { dag InOperandList; // An dag containing the MI use operand list. string AsmString = ""; // The .s format to print the instruction with. + // Allows specifying a canonical InstructionEncoding by HwMode. If non-empty, + // the Inst member of this Instruction is ignored. + EncodingByHwMode EncodingInfos; + // Pattern - Set to the DAG pattern for this instruction, if we know of one, // otherwise, uninitialized. list Pattern; @@ -472,6 +492,10 @@ class Instruction : InstructionEncoding { // Added complexity passed onto matching pattern. int AddedComplexity = 0; + // Indicates if this is a pre-isel opcode that should be + // legalized/regbankselected/selected. + bit isPreISelOpcode = 0; + // These bits capture information about the high-level semantics of the // instruction. bit isReturn = 0; // Is this instruction a return instruction? @@ -834,6 +858,7 @@ def f64imm : Operand; class TypedOperand : Operand { let OperandType = Ty; bit IsPointer = 0; + bit IsImmediate = 0; } def type0 : TypedOperand<"OPERAND_GENERIC_0">; @@ -852,6 +877,12 @@ let IsPointer = 1 in { def ptype5 : TypedOperand<"OPERAND_GENERIC_5">; } +// untyped_imm is for operands where isImm() will be true. It currently has no +// special behaviour and is only used for clarity. +def untyped_imm_0 : TypedOperand<"OPERAND_GENERIC_IMM_0"> { + let IsImmediate = 1; +} + /// zero_reg definition - Special node to stand for the zero register. /// def zero_reg; diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td index 1bc03cf8a49d..7b1973cc3828 100644 --- a/include/llvm/Target/TargetCallingConv.td +++ b/include/llvm/Target/TargetCallingConv.td @@ -152,6 +152,12 @@ class CCBitConvertToType : CCAction { ValueType DestTy = destTy; } +/// CCTruncToType - If applied, this truncates the specified current value to +/// the specified type. +class CCTruncToType : CCAction { + ValueType DestTy = destTy; +} + /// CCPassIndirect - If applied, this stores the value to stack and passes the pointer /// as normal argument. class CCPassIndirect : CCAction { diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td index b68ed045520c..89e5abd947d0 100644 --- a/include/llvm/Target/TargetItinerary.td +++ b/include/llvm/Target/TargetItinerary.td @@ -127,6 +127,17 @@ class ProcessorItineraries fu, list bp, list FU = fu; list BP = bp; list IID = iid; + // The packetizer automaton to use for this itinerary. By default all + // itineraries for a target are bundled up into the same automaton. This only + // works correctly when there are no conflicts in functional unit IDs between + // itineraries. For example, given two itineraries A<[SLOT_A]>, B<[SLOT_B]>, + // SLOT_A and SLOT_B will be assigned the same functional unit index, and + // the generated packetizer will confuse instructions referencing these slots. + // + // To avoid this, setting PacketizerNamespace to non-"" will cause this + // itinerary to be generated in a different automaton. The subtarget will need + // to declare a method "create##Namespace##DFAPacketizer()". + string PacketizerNamespace = ""; } // NoItineraries - A marker that can be used by processors without schedule diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h index 3a2497bff11e..d74341b23fb1 100644 --- a/include/llvm/Target/TargetLoweringObjectFile.h +++ b/include/llvm/Target/TargetLoweringObjectFile.h @@ -191,7 +191,8 @@ public: } /// Get the target specific PC relative GOT entry relocation - virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + virtual const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index cdf9f8bfd5ea..285c0ec0fb90 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -25,7 +25,7 @@ namespace llvm { class Function; class GlobalValue; -class MachineModuleInfo; +class MachineModuleInfoWrapperPass; class Mangler; class MCAsmInfo; class MCContext; @@ -284,12 +284,13 @@ public: /// emitted. Typically this will involve several steps of code generation. /// This method should return true if emission of this file type is not /// supported, or false on success. - /// \p MMI is an optional parameter that, if set to non-nullptr, + /// \p MMIWP is an optional parameter that, if set to non-nullptr, /// will be used to set the MachineModuloInfo for this PM. - virtual bool addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &, - raw_pwrite_stream *, CodeGenFileType, - bool /*DisableVerify*/ = true, - MachineModuleInfo *MMI = nullptr) { + virtual bool + addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &, + raw_pwrite_stream *, CodeGenFileType, + bool /*DisableVerify*/ = true, + MachineModuleInfoWrapperPass *MMIWP = nullptr) { return true; } @@ -341,12 +342,13 @@ public: /// Add passes to the specified pass manager to get the specified file /// emitted. Typically this will involve several steps of code generation. - /// \p MMI is an optional parameter that, if set to non-nullptr, - /// will be used to set the MachineModuloInfofor this PM. - bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, CodeGenFileType FileType, - bool DisableVerify = true, - MachineModuleInfo *MMI = nullptr) override; + /// \p MMIWP is an optional parameter that, if set to non-nullptr, + /// will be used to set the MachineModuloInfo for this PM. + bool + addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, CodeGenFileType FileType, + bool DisableVerify = true, + MachineModuleInfoWrapperPass *MMIWP = nullptr) override; /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It @@ -365,7 +367,7 @@ public: /// Adds an AsmPrinter pass to the pipeline that prints assembly or /// machine code from the MI representation. bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, CodeGenFileType FileTYpe, + raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context); /// True if the target uses physical regs at Prolog/Epilog insertion diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index a36d259df831..24f37e94da91 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -563,10 +563,10 @@ class RetireControlUnit { // Base class for Load/StoreQueue. It is used to identify processor resources // which describe load/store queues in the LS unit. -class MemoryQueue { - ProcResource QueueDescriptor = PR; +class MemoryQueue { + ProcResourceKind QueueDescriptor = PR; SchedMachineModel SchedModel = ?; } -class LoadQueue : MemoryQueue; -class StoreQueue : MemoryQueue; +class LoadQueue : MemoryQueue; +class StoreQueue : MemoryQueue; diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index b913a054ac2c..441f3d7d118d 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -137,9 +137,12 @@ def SDTFPSignOp : SDTypeProfile<1, 2, [ // fcopysign. def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0> ]>; -def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz +def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; +def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz + SDTCisInt<0>, SDTCisInt<1> +]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; @@ -239,6 +242,9 @@ def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> ]>; +def SDTVecReduce : SDTypeProfile<1, 1, [ // vector reduction + SDTCisInt<0>, SDTCisVec<1> +]>; def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract SDTCisSubVecOfVec<0,1>, SDTCisInt<2> @@ -393,6 +399,7 @@ def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>; def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; +def umulfixsat : SDNode<"ISD::UMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>; def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>; @@ -401,11 +408,11 @@ def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>; def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>; def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; -def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>; -def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>; -def ctpop : SDNode<"ISD::CTPOP" , SDTIntUnaryOp>; -def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>; -def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>; +def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>; +def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>; +def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>; +def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; +def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>; def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>; def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>; @@ -415,6 +422,12 @@ def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>; def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>; def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>; +def vecreduce_add : SDNode<"ISD::VECREDUCE_ADD", SDTVecReduce>; +def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>; +def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>; +def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>; +def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>; + def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>; def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>; def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>; @@ -493,12 +506,20 @@ def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_frint : SDNode<"ISD::STRICT_FRINT", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_lrint : SDNode<"ISD::STRICT_LRINT", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_llrint : SDNode<"ISD::STRICT_LLRINT", + SDTFPToIntOp, [SDNPHasChain]>; def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fceil : SDNode<"ISD::STRICT_FCEIL", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_lround : SDNode<"ISD::STRICT_LROUND", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_llround : SDNode<"ISD::STRICT_LLROUND", + SDTFPToIntOp, [SDNPHasChain]>; def strict_fround : SDNode<"ISD::STRICT_FROUND", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC", @@ -513,6 +534,10 @@ def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND", SDTFPRoundOp, [SDNPHasChain]>; def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND", SDTFPExtendOp, [SDNPHasChain]>; +def strict_fp_to_sint : SDNode<"ISD::STRICT_FP_TO_SINT", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_fp_to_uint : SDNode<"ISD::STRICT_FP_TO_UINT", + SDTFPToIntOp, [SDNPHasChain]>; def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; @@ -638,16 +663,32 @@ def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; //===----------------------------------------------------------------------===// // Selection DAG Condition Codes -class CondCode; // ISD::CondCode enums -def SETOEQ : CondCode; def SETOGT : CondCode; -def SETOGE : CondCode; def SETOLT : CondCode; def SETOLE : CondCode; -def SETONE : CondCode; def SETO : CondCode; def SETUO : CondCode; -def SETUEQ : CondCode; def SETUGT : CondCode; def SETUGE : CondCode; -def SETULT : CondCode; def SETULE : CondCode; def SETUNE : CondCode; - -def SETEQ : CondCode; def SETGT : CondCode; def SETGE : CondCode; -def SETLT : CondCode; def SETLE : CondCode; def SETNE : CondCode; - +class CondCode { + string ICmpPredicate = icmpName; + string FCmpPredicate = fcmpName; +} + +// ISD::CondCode enums, and mapping to CmpInst::Predicate names +def SETOEQ : CondCode<"FCMP_OEQ">; +def SETOGT : CondCode<"FCMP_OGT">; +def SETOGE : CondCode<"FCMP_OGE">; +def SETOLT : CondCode<"FCMP_OLT">; +def SETOLE : CondCode<"FCMP_OLE">; +def SETONE : CondCode<"FCMP_ONE">; +def SETO : CondCode<"FCMP_ORD">; +def SETUO : CondCode<"FCMP_UNO">; +def SETUEQ : CondCode<"FCMP_UEQ">; +def SETUGT : CondCode<"FCMP_UGT", "ICMP_UGT">; +def SETUGE : CondCode<"FCMP_UGE", "ICMP_UGE">; +def SETULT : CondCode<"FCMP_ULT", "ICMP_ULT">; +def SETULE : CondCode<"FCMP_ULE", "ICMP_ULE">; +def SETUNE : CondCode<"FCMP_UNE">; +def SETEQ : CondCode<"", "ICMP_EQ">; +def SETGT : CondCode<"", "ICMP_SGT">; +def SETGE : CondCode<"", "ICMP_SGE">; +def SETLT : CondCode<"", "ICMP_SLT">; +def SETLE : CondCode<"", "ICMP_SLE">; +def SETNE : CondCode<"", "ICMP_NE">; //===----------------------------------------------------------------------===// // Selection DAG Node Transformation Functions. @@ -741,6 +782,10 @@ class PatFrags frags, code pred = [{}], // If this empty, accept any address space. list AddressSpaces = ?; + // cast(N)->getAlignment() >= + // If this is empty, accept any alignment. + int MinAlignment = ?; + // cast(N)->getOrdering() == AtomicOrdering::Monotonic bit IsAtomicOrderingMonotonic = ?; // cast(N)->getOrdering() == AtomicOrdering::Acquire @@ -766,8 +811,6 @@ class PatFrags frags, code pred = [{}], // cast(N)->getMemoryVT().getScalarType() == MVT::; // cast(N)->getMemoryVT().getScalarType() == MVT::; ValueType ScalarMemoryVT = ?; - - // TODO: Add alignment } // PatFrag - A version of PatFrags matching only a single fragment. @@ -813,6 +856,11 @@ class ImmLeaf : ImmLeaf; + // An ImmLeaf except that Imm is an APInt. This is useful when you need to // zero-extend the immediate instead of sign-extend it. // @@ -1111,6 +1159,16 @@ def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), let IsStore = 1; let MemoryVT = f32; } +def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (istore node:$val, node:$ptr, node:$offset), [{ @@ -1148,14 +1206,26 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), let IsStore = 1; let MemoryVT = f32; } +def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} -def nonvolatile_load : PatFrag<(ops node:$ptr), - (load node:$ptr), [{ - return !cast(N)->isVolatile(); +// TODO: Split these into volatile and unordered flavors to enable +// selectively legal optimizations for each. (See D66309) +def simple_load : PatFrag<(ops node:$ptr), + (load node:$ptr), [{ + return cast(N)->isSimple(); }]>; -def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return !cast(N)->isVolatile(); +def simple_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->isSimple(); }]>; // nontemporal store fragments. @@ -1277,6 +1347,12 @@ def any_flog2 : PatFrags<(ops node:$src), def any_frint : PatFrags<(ops node:$src), [(strict_frint node:$src), (frint node:$src)]>; +def any_lrint : PatFrags<(ops node:$src), + [(strict_lrint node:$src), + (lrint node:$src)]>; +def any_llrint : PatFrags<(ops node:$src), + [(strict_llrint node:$src), + (llrint node:$src)]>; def any_fnearbyint : PatFrags<(ops node:$src), [(strict_fnearbyint node:$src), (fnearbyint node:$src)]>; @@ -1286,6 +1362,12 @@ def any_fceil : PatFrags<(ops node:$src), def any_ffloor : PatFrags<(ops node:$src), [(strict_ffloor node:$src), (ffloor node:$src)]>; +def any_lround : PatFrags<(ops node:$src), + [(strict_lround node:$src), + (lround node:$src)]>; +def any_llround : PatFrags<(ops node:$src), + [(strict_llround node:$src), + (llround node:$src)]>; def any_fround : PatFrags<(ops node:$src), [(strict_fround node:$src), (fround node:$src)]>; @@ -1310,6 +1392,12 @@ def any_extloadf32 : PatFrags<(ops node:$ptr), def any_extloadf64 : PatFrags<(ops node:$ptr), [(strict_extloadf64 node:$ptr), (extloadf64 node:$ptr)]>; +def any_fp_to_sint : PatFrags<(ops node:$src), + [(strict_fp_to_sint node:$src), + (fp_to_sint node:$src)]>; +def any_fp_to_uint : PatFrags<(ops node:$src), + [(strict_fp_to_uint node:$src), + (fp_to_uint node:$src)]>; multiclass binary_atomic_op_ord { def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), @@ -1367,26 +1455,26 @@ multiclass ternary_atomic_op_ord { } } -multiclass binary_atomic_op { +multiclass binary_atomic_op { def _8 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i8; + let MemoryVT = !if(IsInt, i8, ?); } def _16 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i16; + let MemoryVT = !if(IsInt, i16, f16); } def _32 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i32; + let MemoryVT = !if(IsInt, i32, f32); } def _64 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i64; + let MemoryVT = !if(IsInt, i64, f64); } defm NAME#_8 : binary_atomic_op_ord; diff --git a/include/llvm/TextAPI/MachO/Architecture.h b/include/llvm/TextAPI/MachO/Architecture.h index 055baeb0c0f0..3898cbada68f 100644 --- a/include/llvm/TextAPI/MachO/Architecture.h +++ b/include/llvm/TextAPI/MachO/Architecture.h @@ -14,6 +14,7 @@ #define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -39,6 +40,9 @@ StringRef getArchitectureName(Architecture Arch); /// Convert an architecture slice to a CPU Type and Subtype pair. std::pair getCPUTypeFromArchitecture(Architecture Arch); +/// Convert a target to an architecture slice. +Architecture mapToArchitecture(const llvm::Triple &Target); + raw_ostream &operator<<(raw_ostream &OS, Architecture Arch); } // end namespace MachO. diff --git a/include/llvm/TextAPI/MachO/ArchitectureSet.h b/include/llvm/TextAPI/MachO/ArchitectureSet.h index d8dfc7f1af21..6e4ede6275b4 100644 --- a/include/llvm/TextAPI/MachO/ArchitectureSet.h +++ b/include/llvm/TextAPI/MachO/ArchitectureSet.h @@ -59,6 +59,10 @@ public: ArchSetType rawValue() const { return ArchSet; } + bool hasX86() const { + return has(AK_i386) || has(AK_x86_64) || has(AK_x86_64h); + } + template class arch_iterator : public std::iterator { diff --git a/include/llvm/TextAPI/MachO/InterfaceFile.h b/include/llvm/TextAPI/MachO/InterfaceFile.h index e722449d52f1..bd434e04b693 100644 --- a/include/llvm/TextAPI/MachO/InterfaceFile.h +++ b/include/llvm/TextAPI/MachO/InterfaceFile.h @@ -26,21 +26,13 @@ #include "llvm/TextAPI/MachO/Architecture.h" #include "llvm/TextAPI/MachO/ArchitectureSet.h" #include "llvm/TextAPI/MachO/PackedVersion.h" +#include "llvm/TextAPI/MachO/Platform.h" #include "llvm/TextAPI/MachO/Symbol.h" +#include "llvm/TextAPI/MachO/Target.h" namespace llvm { namespace MachO { -/// Defines the list of MachO platforms. -enum class PlatformKind : unsigned { - unknown, - macOS = MachO::PLATFORM_MACOS, - iOS = MachO::PLATFORM_IOS, - tvOS = MachO::PLATFORM_TVOS, - watchOS = MachO::PLATFORM_WATCHOS, - bridgeOS = MachO::PLATFORM_BRIDGEOS, -}; - /// Defines a list of Objective-C constraints. enum class ObjCConstraintType : unsigned { /// No constraint. @@ -75,6 +67,9 @@ enum FileType : unsigned { /// Text-based stub file (.tbd) version 3.0 TBD_V3 = 1U << 2, + /// Text-based stub file (.tbd) version 4.0 + TBD_V4 = 1U << 3, + All = ~0U, LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All), @@ -89,29 +84,42 @@ public: InterfaceFileRef(StringRef InstallName) : InstallName(InstallName) {} - InterfaceFileRef(StringRef InstallName, ArchitectureSet Archs) - : InstallName(InstallName), Architectures(Archs) {} + InterfaceFileRef(StringRef InstallName, const TargetList Targets) + : InstallName(InstallName), Targets(std::move(Targets)) {} StringRef getInstallName() const { return InstallName; }; - void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; } - ArchitectureSet getArchitectures() const { return Architectures; } - bool hasArchitecture(Architecture Arch) const { - return Architectures.has(Arch); + + void addTarget(const Target &Target); + template void addTargets(RangeT &&Targets) { + for (const auto &Target : Targets) + addTarget(Target(Target)); } + using const_target_iterator = TargetList::const_iterator; + using const_target_range = llvm::iterator_range; + const_target_range targets() const { return {Targets}; } + + ArchitectureSet getArchitectures() const { + return mapToArchitectureSet(Targets); + } + + PlatformSet getPlatforms() const { return mapToPlatformSet(Targets); } + bool operator==(const InterfaceFileRef &O) const { - return std::tie(InstallName, Architectures) == - std::tie(O.InstallName, O.Architectures); + return std::tie(InstallName, Targets) == std::tie(O.InstallName, O.Targets); + } + + bool operator!=(const InterfaceFileRef &O) const { + return std::tie(InstallName, Targets) != std::tie(O.InstallName, O.Targets); } bool operator<(const InterfaceFileRef &O) const { - return std::tie(InstallName, Architectures) < - std::tie(O.InstallName, O.Architectures); + return std::tie(InstallName, Targets) < std::tie(O.InstallName, O.Targets); } private: std::string InstallName; - ArchitectureSet Architectures; + TargetList Targets; }; } // end namespace MachO. @@ -170,27 +178,43 @@ public: /// \return The file type. FileType getFileType() const { return FileKind; } - /// Set the platform. - void setPlatform(PlatformKind Platform_) { Platform = Platform_; } + /// Get the architectures. + /// + /// \return The applicable architectures. + ArchitectureSet getArchitectures() const { + return mapToArchitectureSet(Targets); + } - /// Get the platform. - PlatformKind getPlatform() const { return Platform; } + /// Get the platforms. + /// + /// \return The applicable platforms. + PlatformSet getPlatforms() const { return mapToPlatformSet(Targets); } - /// Specify the set of supported architectures by this file. - void setArchitectures(ArchitectureSet Architectures_) { - Architectures = Architectures_; - } + /// Set and add target. + /// + /// \param Target the target to add into. + void addTarget(const Target &Target); - /// Add the set of supported architectures by this file. - void addArchitectures(ArchitectureSet Architectures_) { - Architectures |= Architectures_; + /// Set and add targets. + /// + /// Add the subset of llvm::triples that is supported by Tapi + /// + /// \param Targets the collection of targets. + template void addTargets(RangeT &&Targets) { + for (const auto &Target_ : Targets) + addTarget(Target(Target_)); } - /// Add supported architecture by this file.. - void addArch(Architecture Arch) { Architectures.set(Arch); } + using const_target_iterator = TargetList::const_iterator; + using const_target_range = llvm::iterator_range; + const_target_range targets() const { return {Targets}; } - /// Get the set of supported architectures. - ArchitectureSet getArchitectures() const { return Architectures; } + using const_filtered_target_iterator = + llvm::filter_iterator>; + using const_filtered_target_range = + llvm::iterator_range; + const_filtered_target_range targets(ArchitectureSet Archs) const; /// Set the install name of the library. void setInstallName(StringRef InstallName_) { InstallName = InstallName_; } @@ -244,11 +268,18 @@ public: /// Check if this file was generated during InstallAPI. bool isInstallAPI() const { return IsInstallAPI; } - /// Set the parent umbrella framework. - void setParentUmbrella(StringRef Parent) { ParentUmbrella = Parent; } + /// Set the parent umbrella frameworks. + /// \param Target_ The target applicable to Parent + /// \param Parent The name of Parent + void addParentUmbrella(const Target &Target_, StringRef Parent); + const std::vector> &umbrellas() const { + return ParentUmbrellas; + } /// Get the parent umbrella framework. - StringRef getParentUmbrella() const { return ParentUmbrella; } + const std::vector> getParentUmbrellas() const { + return ParentUmbrellas; + } /// Add an allowable client. /// @@ -257,9 +288,9 @@ public: /// that is being generated needs to match one of the allowable clients or the /// linker refuses to link this library. /// - /// \param Name The name of the client that is allowed to link this library. - /// \param Architectures The set of architecture for which this applies. - void addAllowableClient(StringRef Name, ArchitectureSet Architectures); + /// \param InstallName The name of the client that is allowed to link this library. + /// \param Target The target triple for which this applies. + void addAllowableClient(StringRef InstallName, const Target &Target); /// Get the list of allowable clients. /// @@ -271,9 +302,8 @@ public: /// Add a re-exported library. /// /// \param InstallName The name of the library to re-export. - /// \param Architectures The set of architecture for which this applies. - void addReexportedLibrary(StringRef InstallName, - ArchitectureSet Architectures); + /// \param Target The target triple for which this applies. + void addReexportedLibrary(StringRef InstallName, const Target &Target); /// Get the list of re-exported libraries. /// @@ -282,27 +312,27 @@ public: return ReexportedLibraries; } - /// Add an architecture/UUID pair. + /// Add an Target/UUID pair. /// - /// \param Arch The architecture for which this applies. + /// \param Target The target triple for which this applies. /// \param UUID The UUID of the library for the specified architecture. - void addUUID(Architecture Arch, StringRef UUID); + void addUUID(const Target &Target, StringRef UUID); - /// Add an architecture/UUID pair. + /// Add an Target/UUID pair. /// - /// \param Arch The architecture for which this applies. + /// \param Target The target triple for which this applies. /// \param UUID The UUID of the library for the specified architecture. - void addUUID(Architecture Arch, uint8_t UUID[16]); + void addUUID(const Target &Target, uint8_t UUID[16]); - /// Get the list of architecture/UUID pairs. + /// Get the list of Target/UUID pairs. /// - /// \return Returns a list of architecture/UUID pairs. - const std::vector> &uuids() const { + /// \return Returns a list of Target/UUID pairs. + const std::vector> &uuids() const { return UUIDs; } /// Add a symbol to the symbols list or extend an existing one. - void addSymbol(SymbolKind Kind, StringRef Name, ArchitectureSet Architectures, + void addSymbol(SymbolKind Kind, StringRef Name, const TargetList &Targets, SymbolFlags Flags = SymbolFlags::None); using SymbolMapType = DenseMap; @@ -320,84 +350,35 @@ public: reference operator*() const { return I->second; } pointer operator->() const { return I->second; } }; - using const_symbol_range = iterator_range; - - // Custom iterator to return only exported symbols. - struct const_export_iterator - : public iterator_adaptor_base< - const_export_iterator, const_symbol_iterator, - std::forward_iterator_tag, const Symbol *> { - const_symbol_iterator _end; - - void skipToNextSymbol() { - while (I != _end && I->isUndefined()) - ++I; - } - - const_export_iterator() = default; - template - const_export_iterator(U &&it, U &&end) - : iterator_adaptor_base(std::forward(it)), - _end(std::forward(end)) { - skipToNextSymbol(); - } - - const_export_iterator &operator++() { - ++I; - skipToNextSymbol(); - return *this; - } - - const_export_iterator operator++(int) { - const_export_iterator tmp(*this); - ++(*this); - return tmp; - } - }; - using const_export_range = llvm::iterator_range; - - // Custom iterator to return only undefined symbols. - struct const_undefined_iterator - : public iterator_adaptor_base< - const_undefined_iterator, const_symbol_iterator, - std::forward_iterator_tag, const Symbol *> { - const_symbol_iterator _end; - void skipToNextSymbol() { - while (I != _end && !I->isUndefined()) - ++I; - } + using const_symbol_range = iterator_range; - const_undefined_iterator() = default; - template - const_undefined_iterator(U &&it, U &&end) - : iterator_adaptor_base(std::forward(it)), - _end(std::forward(end)) { - skipToNextSymbol(); - } - - const_undefined_iterator &operator++() { - ++I; - skipToNextSymbol(); - return *this; - } - - const_undefined_iterator operator++(int) { - const_undefined_iterator tmp(*this); - ++(*this); - return tmp; - } - }; - using const_undefined_range = llvm::iterator_range; + using const_filtered_symbol_iterator = + filter_iterator>; + using const_filtered_symbol_range = + iterator_range; const_symbol_range symbols() const { return {Symbols.begin(), Symbols.end()}; } - const_export_range exports() const { - return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}}; + + const_filtered_symbol_range exports() const { + std::function fn = [](const Symbol *Symbol) { + return !Symbol->isUndefined(); + }; + return make_filter_range( + make_range({Symbols.begin()}, {Symbols.end()}), + fn); } - const_undefined_range undefineds() const { - return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}}; + + const_filtered_symbol_range undefineds() const { + std::function fn = [](const Symbol *Symbol) { + return Symbol->isUndefined(); + }; + return make_filter_range( + make_range({Symbols.begin()}, {Symbols.end()}), + fn); } private: @@ -411,10 +392,9 @@ private: return StringRef(reinterpret_cast(Ptr), String.size()); } + TargetList Targets; std::string Path; FileType FileKind; - PlatformKind Platform; - ArchitectureSet Architectures; std::string InstallName; PackedVersion CurrentVersion; PackedVersion CompatibilityVersion; @@ -423,10 +403,10 @@ private: bool IsAppExtensionSafe{false}; bool IsInstallAPI{false}; ObjCConstraintType ObjcConstraint = ObjCConstraintType::None; - std::string ParentUmbrella; + std::vector> ParentUmbrellas; std::vector AllowableClients; std::vector ReexportedLibraries; - std::vector> UUIDs; + std::vector> UUIDs; SymbolMapType Symbols; }; diff --git a/include/llvm/TextAPI/MachO/Platform.h b/include/llvm/TextAPI/MachO/Platform.h new file mode 100644 index 000000000000..a22aae9b7dce --- /dev/null +++ b/include/llvm/TextAPI/MachO/Platform.h @@ -0,0 +1,45 @@ +//===- llvm/TextAPI/MachO/Platform.h - Platform -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the Platforms supported by Tapi and helpers. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TEXTAPI_MACHO_PLATFORM_H +#define LLVM_TEXTAPI_MACHO_PLATFORM_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/BinaryFormat/MachO.h" + +namespace llvm { +namespace MachO { + +/// Defines the list of MachO platforms. +enum class PlatformKind : unsigned { + unknown, + macOS = MachO::PLATFORM_MACOS, + iOS = MachO::PLATFORM_IOS, + tvOS = MachO::PLATFORM_TVOS, + watchOS = MachO::PLATFORM_WATCHOS, + bridgeOS = MachO::PLATFORM_BRIDGEOS, + macCatalyst = MachO::PLATFORM_MACCATALYST, + iOSSimulator = MachO::PLATFORM_IOSSIMULATOR, + tvOSSimulator = MachO::PLATFORM_TVOSSIMULATOR, + watchOSSimulator = MachO::PLATFORM_WATCHOSSIMULATOR +}; + +using PlatformSet = SmallSet; + +PlatformKind mapToPlatformKind(PlatformKind Platform, bool WantSim); +PlatformKind mapToPlatformKind(const Triple &Target); +PlatformSet mapToPlatformSet(ArrayRef Targets); +StringRef getPlatformName(PlatformKind Platform); + +} // end namespace MachO. +} // end namespace llvm. + +#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H \ No newline at end of file diff --git a/include/llvm/TextAPI/MachO/Symbol.h b/include/llvm/TextAPI/MachO/Symbol.h index 3c7ff5e0f4ea..1b1632c599c4 100644 --- a/include/llvm/TextAPI/MachO/Symbol.h +++ b/include/llvm/TextAPI/MachO/Symbol.h @@ -14,6 +14,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TextAPI/MachO/ArchitectureSet.h" +#include "llvm/TextAPI/MachO/Target.h" namespace llvm { namespace MachO { @@ -37,7 +38,10 @@ enum class SymbolFlags : uint8_t { /// Undefined Undefined = 1U << 3, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Undefined), + /// Rexported + Rexported = 1U << 4, + + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Rexported), }; // clang-format on @@ -49,16 +53,18 @@ enum class SymbolKind : uint8_t { ObjectiveCInstanceVariable, }; +using TargetList = SmallVector; class Symbol { public: - constexpr Symbol(SymbolKind Kind, StringRef Name, - ArchitectureSet Architectures, SymbolFlags Flags) - : Name(Name), Architectures(Architectures), Kind(Kind), Flags(Flags) {} + Symbol(SymbolKind Kind, StringRef Name, TargetList Targets, SymbolFlags Flags) + : Name(Name), Targets(std::move(Targets)), Kind(Kind), Flags(Flags) {} + void addTarget(Target target) { Targets.emplace_back(target); } SymbolKind getKind() const { return Kind; } StringRef getName() const { return Name; } - ArchitectureSet getArchitectures() const { return Architectures; } - void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; } + ArchitectureSet getArchitectures() const { + return mapToArchitectureSet(Targets); + } SymbolFlags getFlags() const { return Flags; } bool isWeakDefined() const { @@ -78,6 +84,21 @@ public: return (Flags & SymbolFlags::Undefined) == SymbolFlags::Undefined; } + bool isReexported() const { + return (Flags & SymbolFlags::Rexported) == SymbolFlags::Rexported; + } + + using const_target_iterator = TargetList::const_iterator; + using const_target_range = llvm::iterator_range; + const_target_range targets() const { return {Targets}; } + + using const_filtered_target_iterator = + llvm::filter_iterator>; + using const_filtered_target_range = + llvm::iterator_range; + const_filtered_target_range targets(ArchitectureSet architectures) const; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump(raw_ostream &OS) const; void dump() const { dump(llvm::errs()); } @@ -85,7 +106,7 @@ public: private: StringRef Name; - ArchitectureSet Architectures; + TargetList Targets; SymbolKind Kind; SymbolFlags Flags; }; diff --git a/include/llvm/TextAPI/MachO/Target.h b/include/llvm/TextAPI/MachO/Target.h new file mode 100644 index 000000000000..5fe44cb7d366 --- /dev/null +++ b/include/llvm/TextAPI/MachO/Target.h @@ -0,0 +1,68 @@ +//===- llvm/TextAPI/Target.h - TAPI Target ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TEXTAPI_MACHO_TARGET_H +#define LLVM_TEXTAPI_MACHO_TARGET_H + +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Error.h" +#include "llvm/TextAPI/MachO/Architecture.h" +#include "llvm/TextAPI/MachO/ArchitectureSet.h" +#include "llvm/TextAPI/MachO/Platform.h" + +namespace llvm { +namespace MachO { + +// This is similar to a llvm Triple, but the triple doesn't have all the +// information we need. For example there is no enum value for x86_64h. The +// only way to get that information is to parse the triple string. +class Target { +public: + Target() = default; + Target(Architecture Arch, PlatformKind Platform) + : Arch(Arch), Platform(Platform) {} + explicit Target(const llvm::Triple &Triple) + : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformKind(Triple)) {} + + static llvm::Expected create(StringRef Target); + + operator std::string() const; + + Architecture Arch; + PlatformKind Platform; +}; + +inline bool operator==(const Target &LHS, const Target &RHS) { + return std::tie(LHS.Arch, LHS.Platform) == std::tie(RHS.Arch, RHS.Platform); +} + +inline bool operator!=(const Target &LHS, const Target &RHS) { + return std::tie(LHS.Arch, LHS.Platform) != std::tie(RHS.Arch, RHS.Platform); +} + +inline bool operator<(const Target &LHS, const Target &RHS) { + return std::tie(LHS.Arch, LHS.Platform) < std::tie(RHS.Arch, RHS.Platform); +} + +inline bool operator==(const Target &LHS, const Architecture &RHS) { + return LHS.Arch == RHS; +} + +inline bool operator!=(const Target &LHS, const Architecture &RHS) { + return LHS.Arch != RHS; +} + +PlatformSet mapToPlatformSet(ArrayRef Targets); +ArchitectureSet mapToArchitectureSet(ArrayRef Targets); + +raw_ostream &operator<<(raw_ostream &OS, const Target &Target); + +} // namespace MachO +} // namespace llvm + +#endif // LLVM_TEXTAPI_MACHO_TARGET_H diff --git a/include/llvm/TextAPI/MachO/TextAPIReader.h b/include/llvm/TextAPI/MachO/TextAPIReader.h index 6d9c09de5294..c551f0454e8e 100644 --- a/include/llvm/TextAPI/MachO/TextAPIReader.h +++ b/include/llvm/TextAPI/MachO/TextAPIReader.h @@ -20,10 +20,7 @@ class InterfaceFile; class TextAPIReader { public: static Expected> - get(std::unique_ptr InputBuffer); - - static Expected> - getUnmanaged(llvm::MemoryBuffer *InputBuffer); + get(MemoryBufferRef InputBuffer); TextAPIReader() = delete; }; diff --git a/include/llvm/Transforms/IPO/Attributor.h b/include/llvm/Transforms/IPO/Attributor.h index 5dbe21ac5e4e..3dbe0fcd76ea 100644 --- a/include/llvm/Transforms/IPO/Attributor.h +++ b/include/llvm/Transforms/IPO/Attributor.h @@ -60,13 +60,12 @@ // manifest their result in the IR for passes to come. // // Attribute manifestation is not mandatory. If desired, there is support to -// generate a single LLVM-IR attribute already in the AbstractAttribute base -// class. In the simplest case, a subclass overloads -// `AbstractAttribute::getManifestPosition()` and -// `AbstractAttribute::getAttrKind()` to return the appropriate values. The -// Attributor manifestation framework will then create and place a new attribute -// if it is allowed to do so (based on the abstract state). Other use cases can -// be achieved by overloading other abstract attribute methods. +// generate a single or multiple LLVM-IR attributes already in the helper struct +// IRAttribute. In the simplest case, a subclass inherits from IRAttribute with +// a proper Attribute::AttrKind as template parameter. The Attributor +// manifestation framework will then create and place a new attribute if it is +// allowed to do so (based on the abstract state). Other use cases can be +// achieved by overloading AbstractAttribute or IRAttribute methods. // // // The "mechanics" of adding a new "abstract attribute": @@ -97,7 +96,13 @@ #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H -#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" @@ -105,6 +110,7 @@ namespace llvm { struct AbstractAttribute; struct InformationCache; +struct AAIsDead; class Function; @@ -120,6 +126,563 @@ ChangeStatus operator|(ChangeStatus l, ChangeStatus r); ChangeStatus operator&(ChangeStatus l, ChangeStatus r); ///} +/// Helper to describe and deal with positions in the LLVM-IR. +/// +/// A position in the IR is described by an anchor value and an "offset" that +/// could be the argument number, for call sites and arguments, or an indicator +/// of the "position kind". The kinds, specified in the Kind enum below, include +/// the locations in the attribute list, i.a., function scope and return value, +/// as well as a distinction between call sites and functions. Finally, there +/// are floating values that do not have a corresponding attribute list +/// position. +struct IRPosition { + virtual ~IRPosition() {} + + /// The positions we distinguish in the IR. + /// + /// The values are chosen such that the KindOrArgNo member has a value >= 1 + /// if it is an argument or call site argument while a value < 1 indicates the + /// respective kind of that value. + enum Kind : int { + IRP_INVALID = -6, ///< An invalid position. + IRP_FLOAT = -5, ///< A position that is not associated with a spot suitable + ///< for attributes. This could be any value or instruction. + IRP_RETURNED = -4, ///< An attribute for the function return value. + IRP_CALL_SITE_RETURNED = -3, ///< An attribute for a call site return value. + IRP_FUNCTION = -2, ///< An attribute for a function (scope). + IRP_CALL_SITE = -1, ///< An attribute for a call site (function scope). + IRP_ARGUMENT = 0, ///< An attribute for a function argument. + IRP_CALL_SITE_ARGUMENT = 1, ///< An attribute for a call site argument. + }; + + /// Default constructor available to create invalid positions implicitly. All + /// other positions need to be created explicitly through the appropriate + /// static member function. + IRPosition() : AnchorVal(nullptr), KindOrArgNo(IRP_INVALID) { verify(); } + + /// Create a position describing the value of \p V. + static const IRPosition value(const Value &V) { + if (auto *Arg = dyn_cast(&V)) + return IRPosition::argument(*Arg); + if (auto *CB = dyn_cast(&V)) + return IRPosition::callsite_returned(*CB); + return IRPosition(const_cast(V), IRP_FLOAT); + } + + /// Create a position describing the function scope of \p F. + static const IRPosition function(const Function &F) { + return IRPosition(const_cast(F), IRP_FUNCTION); + } + + /// Create a position describing the returned value of \p F. + static const IRPosition returned(const Function &F) { + return IRPosition(const_cast(F), IRP_RETURNED); + } + + /// Create a position describing the argument \p Arg. + static const IRPosition argument(const Argument &Arg) { + return IRPosition(const_cast(Arg), Kind(Arg.getArgNo())); + } + + /// Create a position describing the function scope of \p CB. + static const IRPosition callsite_function(const CallBase &CB) { + return IRPosition(const_cast(CB), IRP_CALL_SITE); + } + + /// Create a position describing the returned value of \p CB. + static const IRPosition callsite_returned(const CallBase &CB) { + return IRPosition(const_cast(CB), IRP_CALL_SITE_RETURNED); + } + + /// Create a position describing the argument of \p CB at position \p ArgNo. + static const IRPosition callsite_argument(const CallBase &CB, + unsigned ArgNo) { + return IRPosition(const_cast(CB), Kind(ArgNo)); + } + + /// Create a position describing the function scope of \p ICS. + static const IRPosition callsite_function(ImmutableCallSite ICS) { + return IRPosition::callsite_function(cast(*ICS.getInstruction())); + } + + /// Create a position describing the returned value of \p ICS. + static const IRPosition callsite_returned(ImmutableCallSite ICS) { + return IRPosition::callsite_returned(cast(*ICS.getInstruction())); + } + + /// Create a position describing the argument of \p ICS at position \p ArgNo. + static const IRPosition callsite_argument(ImmutableCallSite ICS, + unsigned ArgNo) { + return IRPosition::callsite_argument(cast(*ICS.getInstruction()), + ArgNo); + } + + /// Create a position describing the argument of \p ACS at position \p ArgNo. + static const IRPosition callsite_argument(AbstractCallSite ACS, + unsigned ArgNo) { + int CSArgNo = ACS.getCallArgOperandNo(ArgNo); + if (CSArgNo >= 0) + return IRPosition::callsite_argument( + cast(*ACS.getInstruction()), CSArgNo); + return IRPosition(); + } + + /// Create a position with function scope matching the "context" of \p IRP. + /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result + /// will be a call site position, otherwise the function position of the + /// associated function. + static const IRPosition function_scope(const IRPosition &IRP) { + if (IRP.isAnyCallSitePosition()) { + return IRPosition::callsite_function( + cast(IRP.getAnchorValue())); + } + assert(IRP.getAssociatedFunction()); + return IRPosition::function(*IRP.getAssociatedFunction()); + } + + bool operator==(const IRPosition &RHS) const { + return (AnchorVal == RHS.AnchorVal) && (KindOrArgNo == RHS.KindOrArgNo); + } + bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); } + + /// Return the value this abstract attribute is anchored with. + /// + /// The anchor value might not be the associated value if the latter is not + /// sufficient to determine where arguments will be manifested. This is, so + /// far, only the case for call site arguments as the value is not sufficient + /// to pinpoint them. Instead, we can use the call site as an anchor. + /// + ///{ + Value &getAnchorValue() { + assert(KindOrArgNo != IRP_INVALID && + "Invalid position does not have an anchor value!"); + return *AnchorVal; + } + const Value &getAnchorValue() const { + return const_cast(this)->getAnchorValue(); + } + ///} + + /// Return the associated function, if any. + /// + ///{ + Function *getAssociatedFunction() { + if (auto *CB = dyn_cast(AnchorVal)) + return CB->getCalledFunction(); + assert(KindOrArgNo != IRP_INVALID && + "Invalid position does not have an anchor scope!"); + Value &V = getAnchorValue(); + if (isa(V)) + return &cast(V); + if (isa(V)) + return cast(V).getParent(); + if (isa(V)) + return cast(V).getFunction(); + return nullptr; + } + const Function *getAssociatedFunction() const { + return const_cast(this)->getAssociatedFunction(); + } + ///} + + /// Return the associated argument, if any. + /// + ///{ + Argument *getAssociatedArgument() { + if (auto *Arg = dyn_cast(&getAnchorValue())) + return Arg; + int ArgNo = getArgNo(); + if (ArgNo < 0) + return nullptr; + Function *AssociatedFn = getAssociatedFunction(); + if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo)) + return nullptr; + return AssociatedFn->arg_begin() + ArgNo; + } + const Argument *getAssociatedArgument() const { + return const_cast(this)->getAssociatedArgument(); + } + ///} + + /// Return true if the position refers to a function interface, that is the + /// function scope, the function return, or an argumnt. + bool isFnInterfaceKind() const { + switch (getPositionKind()) { + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_RETURNED: + case IRPosition::IRP_ARGUMENT: + return true; + default: + return false; + } + } + + /// Return the Function surrounding the anchor value. + /// + ///{ + Function *getAnchorScope() { + Value &V = getAnchorValue(); + if (isa(V)) + return &cast(V); + if (isa(V)) + return cast(V).getParent(); + if (isa(V)) + return cast(V).getFunction(); + return nullptr; + } + const Function *getAnchorScope() const { + return const_cast(this)->getAnchorScope(); + } + ///} + + /// Return the context instruction, if any. + /// + ///{ + Instruction *getCtxI() { + Value &V = getAnchorValue(); + if (auto *I = dyn_cast(&V)) + return I; + if (auto *Arg = dyn_cast(&V)) + if (!Arg->getParent()->isDeclaration()) + return &Arg->getParent()->getEntryBlock().front(); + if (auto *F = dyn_cast(&V)) + if (!F->isDeclaration()) + return &(F->getEntryBlock().front()); + return nullptr; + } + const Instruction *getCtxI() const { + return const_cast(this)->getCtxI(); + } + ///} + + /// Return the value this abstract attribute is associated with. + /// + ///{ + Value &getAssociatedValue() { + assert(KindOrArgNo != IRP_INVALID && + "Invalid position does not have an associated value!"); + if (getArgNo() < 0 || isa(AnchorVal)) + return *AnchorVal; + assert(isa(AnchorVal) && "Expected a call base!"); + return *cast(AnchorVal)->getArgOperand(getArgNo()); + } + const Value &getAssociatedValue() const { + return const_cast(this)->getAssociatedValue(); + } + ///} + + /// Return the argument number of the associated value if it is an argument or + /// call site argument, otherwise a negative value. + int getArgNo() const { return KindOrArgNo; } + + /// Return the index in the attribute list for this position. + unsigned getAttrIdx() const { + switch (getPositionKind()) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + break; + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_CALL_SITE: + return AttributeList::FunctionIndex; + case IRPosition::IRP_RETURNED: + case IRPosition::IRP_CALL_SITE_RETURNED: + return AttributeList::ReturnIndex; + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return KindOrArgNo + AttributeList::FirstArgIndex; + } + llvm_unreachable( + "There is no attribute index for a floating or invalid position!"); + } + + /// Return the associated position kind. + Kind getPositionKind() const { + if (getArgNo() >= 0) { + assert(((isa(getAnchorValue()) && + isa(getAssociatedValue())) || + isa(getAnchorValue())) && + "Expected argument or call base due to argument number!"); + if (isa(getAnchorValue())) + return IRP_CALL_SITE_ARGUMENT; + return IRP_ARGUMENT; + } + + assert(KindOrArgNo < 0 && + "Expected (call site) arguments to never reach this point!"); + return Kind(KindOrArgNo); + } + + /// TODO: Figure out if the attribute related helper functions should live + /// here or somewhere else. + + /// Return true if any kind in \p AKs existing in the IR at a position that + /// will affect this one. See also getAttrs(...). + /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, + /// e.g., the function position if this is an + /// argument position, should be ignored. + bool hasAttr(ArrayRef AKs, + bool IgnoreSubsumingPositions = false) const; + + /// Return the attributes of any kind in \p AKs existing in the IR at a + /// position that will affect this one. While each position can only have a + /// single attribute of any kind in \p AKs, there are "subsuming" positions + /// that could have an attribute as well. This method returns all attributes + /// found in \p Attrs. + void getAttrs(ArrayRef AKs, + SmallVectorImpl &Attrs) const; + + /// Return the attribute of kind \p AK existing in the IR at this position. + Attribute getAttr(Attribute::AttrKind AK) const { + if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) + return Attribute(); + + AttributeList AttrList; + if (ImmutableCallSite ICS = ImmutableCallSite(&getAnchorValue())) + AttrList = ICS.getAttributes(); + else + AttrList = getAssociatedFunction()->getAttributes(); + + if (AttrList.hasAttribute(getAttrIdx(), AK)) + return AttrList.getAttribute(getAttrIdx(), AK); + return Attribute(); + } + + /// Remove the attribute of kind \p AKs existing in the IR at this position. + void removeAttrs(ArrayRef AKs) { + if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) + return; + + AttributeList AttrList; + CallSite CS = CallSite(&getAnchorValue()); + if (CS) + AttrList = CS.getAttributes(); + else + AttrList = getAssociatedFunction()->getAttributes(); + + LLVMContext &Ctx = getAnchorValue().getContext(); + for (Attribute::AttrKind AK : AKs) + AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK); + + if (CS) + CS.setAttributes(AttrList); + else + getAssociatedFunction()->setAttributes(AttrList); + } + + bool isAnyCallSitePosition() const { + switch (getPositionKind()) { + case IRPosition::IRP_CALL_SITE: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return true; + default: + return false; + } + } + + /// Special DenseMap key values. + /// + ///{ + static const IRPosition EmptyKey; + static const IRPosition TombstoneKey; + ///} + +private: + /// Private constructor for special values only! + explicit IRPosition(int KindOrArgNo) + : AnchorVal(0), KindOrArgNo(KindOrArgNo) {} + + /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK. + explicit IRPosition(Value &AnchorVal, Kind PK) + : AnchorVal(&AnchorVal), KindOrArgNo(PK) { + verify(); + } + + /// Verify internal invariants. + void verify(); + + /// The value this position is anchored at. + Value *AnchorVal; + + /// The argument number, if non-negative, or the position "kind". + int KindOrArgNo; +}; + +/// Helper that allows IRPosition as a key in a DenseMap. +template <> struct DenseMapInfo { + static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; } + static inline IRPosition getTombstoneKey() { + return IRPosition::TombstoneKey; + } + static unsigned getHashValue(const IRPosition &IRP) { + return (DenseMapInfo::getHashValue(&IRP.getAnchorValue()) << 4) ^ + (unsigned(IRP.getArgNo())); + } + static bool isEqual(const IRPosition &LHS, const IRPosition &RHS) { + return LHS == RHS; + } +}; + +/// A visitor class for IR positions. +/// +/// Given a position P, the SubsumingPositionIterator allows to visit "subsuming +/// positions" wrt. attributes/information. Thus, if a piece of information +/// holds for a subsuming position, it also holds for the position P. +/// +/// The subsuming positions always include the initial position and then, +/// depending on the position kind, additionally the following ones: +/// - for IRP_RETURNED: +/// - the function (IRP_FUNCTION) +/// - for IRP_ARGUMENT: +/// - the function (IRP_FUNCTION) +/// - for IRP_CALL_SITE: +/// - the callee (IRP_FUNCTION), if known +/// - for IRP_CALL_SITE_RETURNED: +/// - the callee (IRP_RETURNED), if known +/// - the call site (IRP_FUNCTION) +/// - the callee (IRP_FUNCTION), if known +/// - for IRP_CALL_SITE_ARGUMENT: +/// - the argument of the callee (IRP_ARGUMENT), if known +/// - the callee (IRP_FUNCTION), if known +/// - the position the call site argument is associated with if it is not +/// anchored to the call site, e.g., if it is an arugment then the argument +/// (IRP_ARGUMENT) +class SubsumingPositionIterator { + SmallVector IRPositions; + using iterator = decltype(IRPositions)::iterator; + +public: + SubsumingPositionIterator(const IRPosition &IRP); + iterator begin() { return IRPositions.begin(); } + iterator end() { return IRPositions.end(); } +}; + +/// Wrapper for FunctoinAnalysisManager. +struct AnalysisGetter { + template + typename Analysis::Result *getAnalysis(const Function &F) { + if (!MAM || !F.getParent()) + return nullptr; + auto &FAM = MAM->getResult( + const_cast(*F.getParent())) + .getManager(); + return &FAM.getResult(const_cast(F)); + } + + template + typename Analysis::Result *getAnalysis(const Module &M) { + if (!MAM) + return nullptr; + return &MAM->getResult(const_cast(M)); + } + AnalysisGetter(ModuleAnalysisManager &MAM) : MAM(&MAM) {} + AnalysisGetter() {} + +private: + ModuleAnalysisManager *MAM = nullptr; +}; + +/// Data structure to hold cached (LLVM-IR) information. +/// +/// All attributes are given an InformationCache object at creation time to +/// avoid inspection of the IR by all of them individually. This default +/// InformationCache will hold information required by 'default' attributes, +/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..) +/// is called. +/// +/// If custom abstract attributes, registered manually through +/// Attributor::registerAA(...), need more information, especially if it is not +/// reusable, it is advised to inherit from the InformationCache and cast the +/// instance down in the abstract attributes. +struct InformationCache { + InformationCache(const Module &M, AnalysisGetter &AG) + : DL(M.getDataLayout()), Explorer(/* ExploreInterBlock */ true), AG(AG) { + + CallGraph *CG = AG.getAnalysis(M); + if (!CG) + return; + + DenseMap SccSize; + for (scc_iterator I = scc_begin(CG); !I.isAtEnd(); ++I) { + for (CallGraphNode *Node : *I) + SccSize[Node->getFunction()] = I->size(); + } + SccSizeOpt = std::move(SccSize); + } + + /// A map type from opcodes to instructions with this opcode. + using OpcodeInstMapTy = DenseMap>; + + /// Return the map that relates "interesting" opcodes with all instructions + /// with that opcode in \p F. + OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) { + return FuncInstOpcodeMap[&F]; + } + + /// A vector type to hold instructions. + using InstructionVectorTy = std::vector; + + /// Return the instructions in \p F that may read or write memory. + InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) { + return FuncRWInstsMap[&F]; + } + + /// Return MustBeExecutedContextExplorer + MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() { + return Explorer; + } + + /// Return TargetLibraryInfo for function \p F. + TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) { + return AG.getAnalysis(F); + } + + /// Return AliasAnalysis Result for function \p F. + AAResults *getAAResultsForFunction(const Function &F) { + return AG.getAnalysis(F); + } + + /// Return SCC size on call graph for function \p F. + unsigned getSccSize(const Function &F) { + if (!SccSizeOpt.hasValue()) + return 0; + return (SccSizeOpt.getValue())[&F]; + } + + /// Return datalayout used in the module. + const DataLayout &getDL() { return DL; } + +private: + /// A map type from functions to opcode to instruction maps. + using FuncInstOpcodeMapTy = DenseMap; + + /// A map type from functions to their read or write instructions. + using FuncRWInstsMapTy = DenseMap; + + /// A nested map that remembers all instructions in a function with a certain + /// instruction opcode (Instruction::getOpcode()). + FuncInstOpcodeMapTy FuncInstOpcodeMap; + + /// A map from functions to their instructions that may read or write memory. + FuncRWInstsMapTy FuncRWInstsMap; + + /// The datalayout used in the module. + const DataLayout &DL; + + /// MustBeExecutedContextExplorer + MustBeExecutedContextExplorer Explorer; + + /// Getters for analysis. + AnalysisGetter &AG; + + /// Cache result for scc size in the call graph + Optional> SccSizeOpt; + + /// Give the Attributor access to the members so + /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. + friend struct Attributor; +}; + /// The fixpoint analysis framework that orchestrates the attribute deduction. /// /// The Attributor provides a general abstract analysis framework (guided @@ -148,6 +711,18 @@ ChangeStatus operator&(ChangeStatus l, ChangeStatus r); /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. struct Attributor { + /// Constructor + /// + /// \param InfoCache Cache to hold various information accessible for + /// the abstract attributes. + /// \param DepRecomputeInterval Number of iterations until the dependences + /// between abstract attributes are recomputed. + /// \param Whitelist If not null, a set limiting the attribute opportunities. + Attributor(InformationCache &InfoCache, unsigned DepRecomputeInterval, + DenseSet *Whitelist = nullptr) + : InfoCache(InfoCache), DepRecomputeInterval(DepRecomputeInterval), + Whitelist(Whitelist) {} + ~Attributor() { DeleteContainerPointers(AllAbstractAttributes); } /// Run the analyses until a fixpoint is reached or enforced (timeout). @@ -156,12 +731,13 @@ struct Attributor { /// as the Attributor is not destroyed (it owns the attributes now). /// /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED. - ChangeStatus run(); + ChangeStatus run(Module &M); - /// Lookup an abstract attribute of type \p AAType anchored at value \p V and - /// argument number \p ArgNo. If no attribute is found and \p V is a call base - /// instruction, the called function is tried as a value next. Thus, the - /// returned abstract attribute might be anchored at the callee of \p V. + /// Lookup an abstract attribute of type \p AAType at position \p IRP. While + /// no abstract attribute is found equivalent positions are checked, see + /// SubsumingPositionIterator. Thus, the returned abstract attribute + /// might be anchored at a different position, e.g., the callee if \p IRP is a + /// call base. /// /// This method is the only (supported) way an abstract attribute can retrieve /// information from another abstract attribute. As an example, take an @@ -170,51 +746,29 @@ struct Attributor { /// most optimistic information for other abstract attributes in-flight, e.g. /// the one reasoning about the "captured" state for the argument or the one /// reasoning on the memory access behavior of the function as a whole. + /// + /// If the flag \p TrackDependence is set to false the dependence from + /// \p QueryingAA to the return abstract attribute is not automatically + /// recorded. This should only be used if the caller will record the + /// dependence explicitly if necessary, thus if it the returned abstract + /// attribute is used for reasoning. To record the dependences explicitly use + /// the `Attributor::recordDependence` method. template - const AAType *getAAFor(AbstractAttribute &QueryingAA, const Value &V, - int ArgNo = -1) { - static_assert(std::is_base_of::value, - "Cannot query an attribute with a type not derived from " - "'AbstractAttribute'!"); - assert(AAType::ID != Attribute::None && - "Cannot lookup generic abstract attributes!"); - - // Determine the argument number automatically for llvm::Arguments if none - // is set. Do not override a given one as it could be a use of the argument - // in a call site. - if (ArgNo == -1) - if (auto *Arg = dyn_cast(&V)) - ArgNo = Arg->getArgNo(); - - // If a function was given together with an argument number, perform the - // lookup for the actual argument instead. Don't do it for variadic - // arguments. - if (ArgNo >= 0 && isa(&V) && - cast(&V)->arg_size() > (size_t)ArgNo) - return getAAFor( - QueryingAA, *(cast(&V)->arg_begin() + ArgNo), ArgNo); - - // Lookup the abstract attribute of type AAType. If found, return it after - // registering a dependence of QueryingAA on the one returned attribute. - const auto &KindToAbstractAttributeMap = AAMap.lookup({&V, ArgNo}); - if (AAType *AA = static_cast( - KindToAbstractAttributeMap.lookup(AAType::ID))) { - // Do not return an attribute with an invalid state. This minimizes checks - // at the calls sites and allows the fallback below to kick in. - if (AA->getState().isValidState()) { - QueryMap[AA].insert(&QueryingAA); - return AA; - } - } - - // If no abstract attribute was found and we look for a call site argument, - // defer to the actual argument instead. - ImmutableCallSite ICS(&V); - if (ICS && ICS.getCalledValue()) - return getAAFor(QueryingAA, *ICS.getCalledValue(), ArgNo); + const AAType &getAAFor(const AbstractAttribute &QueryingAA, + const IRPosition &IRP, bool TrackDependence = true) { + return getOrCreateAAFor(IRP, &QueryingAA, TrackDependence); + } - // No matching attribute found - return nullptr; + /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if + /// \p FromAA changes \p ToAA should be updated as well. + /// + /// This method should be used in conjunction with the `getAAFor` method and + /// with the TrackDependence flag passed to the method set to false. This can + /// be beneficial to avoid false dependences but it requires the users of + /// `getAAFor` to explicitly record true dependences through this method. + void recordDependence(const AbstractAttribute &FromAA, + const AbstractAttribute &ToAA) { + QueryMap[&FromAA].insert(const_cast(&ToAA)); } /// Introduce a new abstract attribute into the fixpoint analysis. @@ -222,126 +776,242 @@ struct Attributor { /// Note that ownership of the attribute is given to the Attributor. It will /// invoke delete for the Attributor on destruction of the Attributor. /// - /// Attributes are identified by - /// (1) their anchored value (see AA.getAnchoredValue()), - /// (2) their argument number (\p ArgNo, or Argument::getArgNo()), and - /// (3) their default attribute kind (see AAType::ID). - template AAType ®isterAA(AAType &AA, int ArgNo = -1) { + /// Attributes are identified by their IR position (AAType::getIRPosition()) + /// and the address of their static member (see AAType::ID). + template AAType ®isterAA(AAType &AA) { static_assert(std::is_base_of::value, "Cannot register an attribute with a type not derived from " "'AbstractAttribute'!"); - - // Determine the anchor value and the argument number which are used to - // lookup the attribute together with AAType::ID. If passed an argument, - // use its argument number but do not override a given one as it could be a - // use of the argument at a call site. - Value &AnchoredVal = AA.getAnchoredValue(); - if (ArgNo == -1) - if (auto *Arg = dyn_cast(&AnchoredVal)) - ArgNo = Arg->getArgNo(); - // Put the attribute in the lookup map structure and the container we use to // keep track of all attributes. - AAMap[{&AnchoredVal, ArgNo}][AAType::ID] = &AA; + IRPosition &IRP = AA.getIRPosition(); + auto &KindToAbstractAttributeMap = AAMap[IRP]; + assert(!KindToAbstractAttributeMap.count(&AAType::ID) && + "Attribute already in map!"); + KindToAbstractAttributeMap[&AAType::ID] = &AA; AllAbstractAttributes.push_back(&AA); return AA; } + /// Return the internal information cache. + InformationCache &getInfoCache() { return InfoCache; } + /// Determine opportunities to derive 'default' attributes in \p F and create /// abstract attribute objects for them. /// /// \param F The function that is checked for attribute opportunities. - /// \param InfoCache A cache for information queryable by the new attributes. - /// \param Whitelist If not null, a set limiting the attribute opportunities. /// /// Note that abstract attribute instances are generally created even if the /// IR already contains the information they would deduce. The most important /// reason for this is the single interface, the one of the abstract attribute /// instance, which can be queried without the need to look at the IR in /// various places. - void identifyDefaultAbstractAttributes( - Function &F, InformationCache &InfoCache, - DenseSet *Whitelist = nullptr); + void identifyDefaultAbstractAttributes(Function &F); + + /// Initialize the information cache for queries regarding function \p F. + /// + /// This method needs to be called for all function that might be looked at + /// through the information cache interface *prior* to looking at them. + void initializeInformationCache(Function &F); + + /// Mark the internal function \p F as live. + /// + /// This will trigger the identification and initialization of attributes for + /// \p F. + void markLiveInternalFunction(const Function &F) { + assert(F.hasLocalLinkage() && + "Only local linkage is assumed dead initially."); + + identifyDefaultAbstractAttributes(const_cast(F)); + } + + /// Record that \p I is deleted after information was manifested. + void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); } + + /// Record that \p BB is deleted after information was manifested. + void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); } + + /// Record that \p F is deleted after information was manifested. + void deleteAfterManifest(Function &F) { ToBeDeletedFunctions.insert(&F); } + + /// Return true if \p AA (or its context instruction) is assumed dead. + /// + /// If \p LivenessAA is not provided it is queried. + bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA); /// Check \p Pred on all function call sites. /// /// This method will evaluate \p Pred on call sites and return /// true if \p Pred holds in every call sites. However, this is only possible /// all call sites are known, hence the function has internal linkage. - bool checkForAllCallSites(Function &F, std::function &Pred, + bool checkForAllCallSites(const function_ref &Pred, + const AbstractAttribute &QueryingAA, bool RequireAllCallSites); + /// Check \p Pred on all values potentially returned by \p F. + /// + /// This method will evaluate \p Pred on all values potentially returned by + /// the function associated with \p QueryingAA. The returned values are + /// matched with their respective return instructions. Returns true if \p Pred + /// holds on all of them. + bool checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred, + const AbstractAttribute &QueryingAA); + + /// Check \p Pred on all values potentially returned by the function + /// associated with \p QueryingAA. + /// + /// This is the context insensitive version of the method above. + bool checkForAllReturnedValues(const function_ref &Pred, + const AbstractAttribute &QueryingAA); + + /// Check \p Pred on all instructions with an opcode present in \p Opcodes. + /// + /// This method will evaluate \p Pred on all instructions with an opcode + /// present in \p Opcode and return true if \p Pred holds on all of them. + bool checkForAllInstructions(const function_ref &Pred, + const AbstractAttribute &QueryingAA, + const ArrayRef &Opcodes); + + /// Check \p Pred on all call-like instructions (=CallBased derived). + /// + /// See checkForAllCallLikeInstructions(...) for more information. + bool + checkForAllCallLikeInstructions(const function_ref &Pred, + const AbstractAttribute &QueryingAA) { + return checkForAllInstructions(Pred, QueryingAA, + {(unsigned)Instruction::Invoke, + (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call}); + } + + /// Check \p Pred on all Read/Write instructions. + /// + /// This method will evaluate \p Pred on all instructions that read or write + /// to memory present in the information cache and return true if \p Pred + /// holds on all of them. + bool checkForAllReadWriteInstructions( + const llvm::function_ref &Pred, + AbstractAttribute &QueryingAA); + + /// Return the data layout associated with the anchor scope. + const DataLayout &getDataLayout() const { return InfoCache.DL; } + private: + /// Check \p Pred on all call sites of \p Fn. + /// + /// This method will evaluate \p Pred on call sites and return + /// true if \p Pred holds in every call sites. However, this is only possible + /// all call sites are known, hence the function has internal linkage. + bool checkForAllCallSites(const function_ref &Pred, + const Function &Fn, bool RequireAllCallSites, + const AbstractAttribute *QueryingAA); + + /// The private version of getAAFor that allows to omit a querying abstract + /// attribute. See also the public getAAFor method. + template + const AAType &getOrCreateAAFor(const IRPosition &IRP, + const AbstractAttribute *QueryingAA = nullptr, + bool TrackDependence = false) { + if (const AAType *AAPtr = + lookupAAFor(IRP, QueryingAA, TrackDependence)) + return *AAPtr; + + // No matching attribute found, create one. + // Use the static create method. + auto &AA = AAType::createForPosition(IRP, *this); + registerAA(AA); + + // For now we ignore naked and optnone functions. + bool Invalidate = Whitelist && !Whitelist->count(&AAType::ID); + if (const Function *Fn = IRP.getAnchorScope()) + Invalidate |= Fn->hasFnAttribute(Attribute::Naked) || + Fn->hasFnAttribute(Attribute::OptimizeNone); + + // Bootstrap the new attribute with an initial update to propagate + // information, e.g., function -> call site. If it is not on a given + // whitelist we will not perform updates at all. + if (Invalidate) { + AA.getState().indicatePessimisticFixpoint(); + return AA; + } + + AA.initialize(*this); + AA.update(*this); + + if (TrackDependence && AA.getState().isValidState()) + QueryMap[&AA].insert(const_cast(QueryingAA)); + return AA; + } + + /// Return the attribute of \p AAType for \p IRP if existing. + template + const AAType *lookupAAFor(const IRPosition &IRP, + const AbstractAttribute *QueryingAA = nullptr, + bool TrackDependence = false) { + static_assert(std::is_base_of::value, + "Cannot query an attribute with a type not derived from " + "'AbstractAttribute'!"); + assert((QueryingAA || !TrackDependence) && + "Cannot track dependences without a QueryingAA!"); + + // Lookup the abstract attribute of type AAType. If found, return it after + // registering a dependence of QueryingAA on the one returned attribute. + const auto &KindToAbstractAttributeMap = AAMap.lookup(IRP); + if (AAType *AA = static_cast( + KindToAbstractAttributeMap.lookup(&AAType::ID))) { + // Do not register a dependence on an attribute with an invalid state. + if (TrackDependence && AA->getState().isValidState()) + QueryMap[AA].insert(const_cast(QueryingAA)); + return AA; + } + return nullptr; + } + /// The set of all abstract attributes. ///{ using AAVector = SmallVector; AAVector AllAbstractAttributes; ///} - /// A nested map to lookup abstract attributes based on the anchored value and - /// an argument positions (or -1) on the outer level, and attribute kinds - /// (Attribute::AttrKind) on the inner level. + /// A nested map to lookup abstract attributes based on the argument position + /// on the outer level, and the addresses of the static member (AAType::ID) on + /// the inner level. ///{ - using KindToAbstractAttributeMap = DenseMap; - DenseMap, KindToAbstractAttributeMap> AAMap; + using KindToAbstractAttributeMap = + DenseMap; + DenseMap AAMap; ///} /// A map from abstract attributes to the ones that queried them through calls /// to the getAAFor<...>(...) method. ///{ using QueryMapTy = - DenseMap>; + MapVector>; QueryMapTy QueryMap; - ///} -}; - -/// Data structure to hold cached (LLVM-IR) information. -/// -/// All attributes are given an InformationCache object at creation time to -/// avoid inspection of the IR by all of them individually. This default -/// InformationCache will hold information required by 'default' attributes, -/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..) -/// is called. -/// -/// If custom abstract attributes, registered manually through -/// Attributor::registerAA(...), need more information, especially if it is not -/// reusable, it is advised to inherit from the InformationCache and cast the -/// instance down in the abstract attributes. -struct InformationCache { - /// A map type from opcodes to instructions with this opcode. - using OpcodeInstMapTy = DenseMap>; - - /// Return the map that relates "interesting" opcodes with all instructions - /// with that opcode in \p F. - OpcodeInstMapTy &getOpcodeInstMapForFunction(Function &F) { - return FuncInstOpcodeMap[&F]; - } - - /// A vector type to hold instructions. - using InstructionVectorTy = std::vector; - - /// Return the instructions in \p F that may read or write memory. - InstructionVectorTy &getReadOrWriteInstsForFunction(Function &F) { - return FuncRWInstsMap[&F]; - } + ///} -private: - /// A map type from functions to opcode to instruction maps. - using FuncInstOpcodeMapTy = DenseMap; + /// The information cache that holds pre-processed (LLVM-IR) information. + InformationCache &InfoCache; - /// A map type from functions to their read or write instructions. - using FuncRWInstsMapTy = DenseMap; + /// Number of iterations until the dependences between abstract attributes are + /// recomputed. + const unsigned DepRecomputeInterval; - /// A nested map that remembers all instructions in a function with a certain - /// instruction opcode (Instruction::getOpcode()). - FuncInstOpcodeMapTy FuncInstOpcodeMap; + /// If not null, a set limiting the attribute opportunities. + const DenseSet *Whitelist; - /// A map from functions to their instructions that may read or write memory. - FuncRWInstsMapTy FuncRWInstsMap; + /// A set to remember the functions we already assume to be live and visited. + DenseSet VisitedFunctions; - /// Give the Attributor access to the members so - /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. - friend struct Attributor; + /// Functions, blocks, and instructions we delete after manifest is done. + /// + ///{ + SmallPtrSet ToBeDeletedFunctions; + SmallPtrSet ToBeDeletedBlocks; + SmallPtrSet ToBeDeletedInsts; + ///} }; /// An interface to query the internal state of an abstract attribute. @@ -375,13 +1045,17 @@ struct AbstractState { /// /// This will usually make the optimistically assumed state the known to be /// true state. - virtual void indicateOptimisticFixpoint() = 0; + /// + /// \returns ChangeStatus::UNCHANGED as the assumed value should not change. + virtual ChangeStatus indicateOptimisticFixpoint() = 0; /// Indicate that the abstract state should converge to the pessimistic state. /// /// This will usually revert the optimistically assumed state to the known to /// be true state. - virtual void indicatePessimisticFixpoint() = 0; + /// + /// \returns ChangeStatus::CHANGED as the assumed value may change. + virtual ChangeStatus indicatePessimisticFixpoint() = 0; }; /// Simple state with integers encoding. @@ -412,10 +1086,16 @@ struct IntegerState : public AbstractState { bool isAtFixpoint() const override { return Assumed == Known; } /// See AbstractState::indicateOptimisticFixpoint(...) - void indicateOptimisticFixpoint() override { Known = Assumed; } + ChangeStatus indicateOptimisticFixpoint() override { + Known = Assumed; + return ChangeStatus::UNCHANGED; + } /// See AbstractState::indicatePessimisticFixpoint(...) - void indicatePessimisticFixpoint() override { Assumed = Known; } + ChangeStatus indicatePessimisticFixpoint() override { + Assumed = Known; + return ChangeStatus::CHANGED; + } /// Return the known state encoding base_t getKnown() const { return Known; } @@ -448,6 +1128,12 @@ struct IntegerState : public AbstractState { return *this; } + /// Remove the bits in \p BitsEncoding from the "known bits". + IntegerState &removeKnownBits(base_t BitsEncoding) { + Known = (Known & ~BitsEncoding); + return *this; + } + /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones. IntegerState &intersectAssumedBits(base_t BitsEncoding) { // Make sure we never loose any "known bits". @@ -455,6 +1141,62 @@ struct IntegerState : public AbstractState { return *this; } + /// Take minimum of assumed and \p Value. + IntegerState &takeAssumedMinimum(base_t Value) { + // Make sure we never loose "known value". + Assumed = std::max(std::min(Assumed, Value), Known); + return *this; + } + + /// Take maximum of known and \p Value. + IntegerState &takeKnownMaximum(base_t Value) { + // Make sure we never loose "known value". + Assumed = std::max(Value, Assumed); + Known = std::max(Value, Known); + return *this; + } + + /// Equality for IntegerState. + bool operator==(const IntegerState &R) const { + return this->getAssumed() == R.getAssumed() && + this->getKnown() == R.getKnown(); + } + + /// Inequality for IntegerState. + bool operator!=(const IntegerState &R) const { return !(*this == R); } + + /// "Clamp" this state with \p R. The result is the minimum of the assumed + /// information but not less than what was known before. + /// + /// TODO: Consider replacing the operator with a call or using it only when + /// we can also take the maximum of the known information, thus when + /// \p R is not dependent on additional assumed state. + IntegerState operator^=(const IntegerState &R) { + takeAssumedMinimum(R.Assumed); + return *this; + } + + /// "Clamp" this state with \p R. The result is the maximum of the known + /// information but not more than what was assumed before. + IntegerState operator+=(const IntegerState &R) { + takeKnownMaximum(R.Known); + return *this; + } + + /// Make this the minimum, known and assumed, of this state and \p R. + IntegerState operator&=(const IntegerState &R) { + Known = std::min(Known, R.Known); + Assumed = std::min(Assumed, R.Assumed); + return *this; + } + + /// Make this the maximum, known and assumed, of this state and \p R. + IntegerState operator|=(const IntegerState &R) { + Known = std::max(Known, R.Known); + Assumed = std::max(Assumed, R.Assumed); + return *this; + } + private: /// The known state encoding in an integer of type base_t. base_t Known = getWorstState(); @@ -468,6 +1210,77 @@ struct BooleanState : public IntegerState { BooleanState() : IntegerState(1){}; }; +/// Helper struct necessary as the modular build fails if the virtual method +/// IRAttribute::manifest is defined in the Attributor.cpp. +struct IRAttributeManifest { + static ChangeStatus manifestAttrs(Attributor &A, IRPosition &IRP, + const ArrayRef &DeducedAttrs); +}; + +/// Helper to tie a abstract state implementation to an abstract attribute. +template +struct StateWrapper : public StateTy, public Base { + /// Provide static access to the type of the state. + using StateType = StateTy; + + /// See AbstractAttribute::getState(...). + StateType &getState() override { return *this; } + + /// See AbstractAttribute::getState(...). + const AbstractState &getState() const override { return *this; } +}; + +/// Helper class that provides common functionality to manifest IR attributes. +template +struct IRAttribute : public IRPosition, public Base { + IRAttribute(const IRPosition &IRP) : IRPosition(IRP) {} + ~IRAttribute() {} + + /// See AbstractAttribute::initialize(...). + virtual void initialize(Attributor &A) override { + if (hasAttr(getAttrKind())) { + this->getState().indicateOptimisticFixpoint(); + return; + } + + const IRPosition &IRP = this->getIRPosition(); + bool IsFnInterface = IRP.isFnInterfaceKind(); + const Function *FnScope = IRP.getAnchorScope(); + // TODO: Not all attributes require an exact definition. Find a way to + // enable deduction for some but not all attributes in case the + // definition might be changed at runtime, see also + // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html. + // TODO: We could always determine abstract attributes and if sufficient + // information was found we could duplicate the functions that do not + // have an exact definition. + if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + this->getState().indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + SmallVector DeducedAttrs; + getDeducedAttributes(getAnchorValue().getContext(), DeducedAttrs); + return IRAttributeManifest::manifestAttrs(A, getIRPosition(), DeducedAttrs); + } + + /// Return the kind that identifies the abstract attribute implementation. + Attribute::AttrKind getAttrKind() const { return AK; } + + /// Return the deduced attributes in \p Attrs. + virtual void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const { + Attrs.emplace_back(Attribute::get(Ctx, getAttrKind())); + } + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() override { return *this; } + const IRPosition &getIRPosition() const override { return *this; } + ///} +}; + /// Base struct for all "concrete attribute" deductions. /// /// The abstract attribute is a minimal interface that allows the Attributor to @@ -512,29 +1325,7 @@ struct BooleanState : public IntegerState { /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. struct AbstractAttribute { - - /// The positions attributes can be manifested in. - enum ManifestPosition { - MP_ARGUMENT, ///< An attribute for a function argument. - MP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument. - MP_FUNCTION, ///< An attribute for a function as a whole. - MP_RETURNED, ///< An attribute for the function return value. - }; - - /// An abstract attribute associated with \p AssociatedVal and anchored at - /// \p AnchoredVal. - /// - /// \param AssociatedVal The value this abstract attribute is associated with. - /// \param AnchoredVal The value this abstract attributes is anchored at. - /// \param InfoCache Cached information accessible to the abstract attribute. - AbstractAttribute(Value *AssociatedVal, Value &AnchoredVal, - InformationCache &InfoCache) - : AssociatedVal(AssociatedVal), AnchoredVal(AnchoredVal), - InfoCache(InfoCache) {} - - /// An abstract attribute associated with and anchored at \p V. - AbstractAttribute(Value &V, InformationCache &InfoCache) - : AbstractAttribute(&V, V, InfoCache) {} + using StateType = AbstractState; /// Virtual destructor. virtual ~AbstractAttribute() {} @@ -550,47 +1341,11 @@ struct AbstractAttribute { virtual void initialize(Attributor &A) {} /// Return the internal abstract state for inspection. - virtual const AbstractState &getState() const = 0; - - /// Return the value this abstract attribute is anchored with. - /// - /// The anchored value might not be the associated value if the latter is not - /// sufficient to determine where arguments will be manifested. This is mostly - /// the case for call site arguments as the value is not sufficient to - /// pinpoint them. Instead, we can use the call site as an anchor. - /// - ///{ - Value &getAnchoredValue() { return AnchoredVal; } - const Value &getAnchoredValue() const { return AnchoredVal; } - ///} - - /// Return the llvm::Function surrounding the anchored value. - /// - ///{ - Function &getAnchorScope(); - const Function &getAnchorScope() const; - ///} - - /// Return the value this abstract attribute is associated with. - /// - /// The abstract state usually represents this value. - /// - ///{ - virtual Value *getAssociatedValue() { return AssociatedVal; } - virtual const Value *getAssociatedValue() const { return AssociatedVal; } - ///} - - /// Return the position this abstract state is manifested in. - virtual ManifestPosition getManifestPosition() const = 0; - - /// Return the kind that identifies the abstract attribute implementation. - virtual Attribute::AttrKind getAttrKind() const = 0; + virtual StateType &getState() = 0; + virtual const StateType &getState() const = 0; - /// Return the deduced attributes in \p Attrs. - virtual void getDeducedAttributes(SmallVectorImpl &Attrs) const { - LLVMContext &Ctx = AnchoredVal.getContext(); - Attrs.emplace_back(Attribute::get(Ctx, getAttrKind())); - } + /// Return an IR position, see struct IRPosition. + virtual const IRPosition &getIRPosition() const = 0; /// Helper functions, for debug purposes only. ///{ @@ -617,10 +1372,19 @@ protected: /// represented by the abstract attribute in the LLVM-IR. /// /// \Return CHANGED if the IR was altered, otherwise UNCHANGED. - virtual ChangeStatus manifest(Attributor &A); + virtual ChangeStatus manifest(Attributor &A) { + return ChangeStatus::UNCHANGED; + } - /// Return the internal abstract state for careful modification. - virtual AbstractState &getState() = 0; + /// Hook to enable custom statistic tracking, called after manifest that + /// resulted in a change if statistics are enabled. + /// + /// We require subclasses to provide an implementation so we remember to + /// add statistics for them. + virtual void trackStatistics() const = 0; + + /// Return an IR position, see struct IRPosition. + virtual IRPosition &getIRPosition() = 0; /// The actual update/transfer function which has to be implemented by the /// derived classes. @@ -630,15 +1394,6 @@ protected: /// /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. virtual ChangeStatus updateImpl(Attributor &A) = 0; - - /// The value this abstract attribute is associated with. - Value *AssociatedVal; - - /// The value this abstract attribute is anchored at. - Value &AnchoredVal; - - /// The information cache accessible to this abstract attribute. - InformationCache &InfoCache; }; /// Forward declarations of output streams for debug purposes. @@ -646,8 +1401,10 @@ protected: ///{ raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA); raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S); -raw_ostream &operator<<(raw_ostream &OS, AbstractAttribute::ManifestPosition); +raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind); +raw_ostream &operator<<(raw_ostream &OS, const IRPosition &); raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State); +raw_ostream &operator<<(raw_ostream &OS, const IntegerState &S); ///} struct AttributorPass : public PassInfoMixin { @@ -661,129 +1418,531 @@ Pass *createAttributorLegacyPass(); /// ---------------------------------------------------------------------------- /// An abstract attribute for the returned values of a function. -struct AAReturnedValues : public AbstractAttribute { - /// See AbstractAttribute::AbstractAttribute(...). - AAReturnedValues(Function &F, InformationCache &InfoCache) - : AbstractAttribute(F, InfoCache) {} +struct AAReturnedValues + : public IRAttribute { + AAReturnedValues(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return an assumed unique return value if a single candidate is found. If + /// there cannot be one, return a nullptr. If it is not clear yet, return the + /// Optional::NoneType. + Optional getAssumedUniqueReturnValue(Attributor &A) const; /// Check \p Pred on all returned values. /// /// This method will evaluate \p Pred on returned values and return /// true if (1) all returned values are known, and (2) \p Pred returned true /// for all returned values. - virtual bool - checkForallReturnedValues(std::function &Pred) const = 0; - - /// See AbstractAttribute::getAttrKind() - Attribute::AttrKind getAttrKind() const override { return ID; } - - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::Returned; + /// + /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts + /// method, this one will not filter dead return instructions. + virtual bool checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred) const = 0; + + using iterator = + MapVector>::iterator; + using const_iterator = + MapVector>::const_iterator; + virtual llvm::iterator_range returned_values() = 0; + virtual llvm::iterator_range returned_values() const = 0; + + virtual size_t getNumReturnValues() const = 0; + virtual const SmallSetVector &getUnresolvedCalls() const = 0; + + /// Create an abstract attribute view for the position \p IRP. + static AAReturnedValues &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; }; -struct AANoUnwind : public AbstractAttribute { - /// An abstract interface for all nosync attributes. - AANoUnwind(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} - - /// See AbstractAttribute::getAttrKind()/ - Attribute::AttrKind getAttrKind() const override { return ID; } - - static constexpr Attribute::AttrKind ID = Attribute::NoUnwind; +struct AANoUnwind + : public IRAttribute> { + AANoUnwind(const IRPosition &IRP) : IRAttribute(IRP) {} /// Returns true if nounwind is assumed. - virtual bool isAssumedNoUnwind() const = 0; + bool isAssumedNoUnwind() const { return getAssumed(); } /// Returns true if nounwind is known. - virtual bool isKnownNoUnwind() const = 0; -}; + bool isKnownNoUnwind() const { return getKnown(); } -struct AANoSync : public AbstractAttribute { - /// An abstract interface for all nosync attributes. - AANoSync(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} + /// Create an abstract attribute view for the position \p IRP. + static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A); - /// See AbstractAttribute::getAttrKind(). - Attribute::AttrKind getAttrKind() const override { return ID; } + /// Unique ID (due to the unique address) + static const char ID; +}; - static constexpr Attribute::AttrKind ID = - Attribute::AttrKind(Attribute::NoSync); +struct AANoSync + : public IRAttribute> { + AANoSync(const IRPosition &IRP) : IRAttribute(IRP) {} /// Returns true if "nosync" is assumed. - virtual bool isAssumedNoSync() const = 0; + bool isAssumedNoSync() const { return getAssumed(); } /// Returns true if "nosync" is known. - virtual bool isKnownNoSync() const = 0; -}; + bool isKnownNoSync() const { return getKnown(); } -/// An abstract interface for all nonnull attributes. -struct AANonNull : public AbstractAttribute { + /// Create an abstract attribute view for the position \p IRP. + static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A); - /// See AbstractAttribute::AbstractAttribute(...). - AANonNull(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} + /// Unique ID (due to the unique address) + static const char ID; +}; - /// See AbstractAttribute::AbstractAttribute(...). - AANonNull(Value *AssociatedVal, Value &AnchoredValue, - InformationCache &InfoCache) - : AbstractAttribute(AssociatedVal, AnchoredValue, InfoCache) {} +/// An abstract interface for all nonnull attributes. +struct AANonNull + : public IRAttribute> { + AANonNull(const IRPosition &IRP) : IRAttribute(IRP) {} /// Return true if we assume that the underlying value is nonnull. - virtual bool isAssumedNonNull() const = 0; + bool isAssumedNonNull() const { return getAssumed(); } /// Return true if we know that underlying value is nonnull. - virtual bool isKnownNonNull() const = 0; + bool isKnownNonNull() const { return getKnown(); } - /// See AbastractState::getAttrKind(). - Attribute::AttrKind getAttrKind() const override { return ID; } + /// Create an abstract attribute view for the position \p IRP. + static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A); - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::NonNull; + /// Unique ID (due to the unique address) + static const char ID; }; /// An abstract attribute for norecurse. -struct AANoRecurse : public AbstractAttribute { +struct AANoRecurse + : public IRAttribute> { + AANoRecurse(const IRPosition &IRP) : IRAttribute(IRP) {} - /// See AbstractAttribute::AbstractAttribute(...). - AANoRecurse(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} - - /// See AbstractAttribute::getAttrKind() - virtual Attribute::AttrKind getAttrKind() const override { - return Attribute::NoRecurse; - } + /// Return true if "norecurse" is assumed. + bool isAssumedNoRecurse() const { return getAssumed(); } /// Return true if "norecurse" is known. - virtual bool isKnownNoRecurse() const = 0; + bool isKnownNoRecurse() const { return getKnown(); } - /// Return true if "norecurse" is assumed. - virtual bool isAssumedNoRecurse() const = 0; + /// Create an abstract attribute view for the position \p IRP. + static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A); - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::NoRecurse; + /// Unique ID (due to the unique address) + static const char ID; }; /// An abstract attribute for willreturn. -struct AAWillReturn : public AbstractAttribute { +struct AAWillReturn + : public IRAttribute> { + AAWillReturn(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if "willreturn" is assumed. + bool isAssumedWillReturn() const { return getAssumed(); } + + /// Return true if "willreturn" is known. + bool isKnownWillReturn() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for all noalias attributes. +struct AANoAlias + : public IRAttribute> { + AANoAlias(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if we assume that the underlying value is alias. + bool isAssumedNoAlias() const { return getAssumed(); } + + /// Return true if we know that underlying value is noalias. + bool isKnownNoAlias() const { return getKnown(); } - /// See AbstractAttribute::AbstractAttribute(...). - AAWillReturn(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} + /// Create an abstract attribute view for the position \p IRP. + static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A); - /// See AbstractAttribute::getAttrKind() - virtual Attribute::AttrKind getAttrKind() const override { - return Attribute::WillReturn; + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An AbstractAttribute for nofree. +struct AANoFree + : public IRAttribute> { + AANoFree(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if "nofree" is assumed. + bool isAssumedNoFree() const { return getAssumed(); } + + /// Return true if "nofree" is known. + bool isKnownNoFree() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An AbstractAttribute for noreturn. +struct AANoReturn + : public IRAttribute> { + AANoReturn(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if the underlying object is assumed to never return. + bool isAssumedNoReturn() const { return getAssumed(); } + + /// Return true if the underlying object is known to never return. + bool isKnownNoReturn() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for liveness abstract attribute. +struct AAIsDead : public StateWrapper, + public IRPosition { + AAIsDead(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Returns true if \p BB is assumed dead. + virtual bool isAssumedDead(const BasicBlock *BB) const = 0; + + /// Returns true if \p BB is known dead. + virtual bool isKnownDead(const BasicBlock *BB) const = 0; + + /// Returns true if \p I is assumed dead. + virtual bool isAssumedDead(const Instruction *I) const = 0; + + /// Returns true if \p I is known dead. + virtual bool isKnownDead(const Instruction *I) const = 0; + + /// This method is used to check if at least one instruction in a collection + /// of instructions is live. + template bool isLiveInstSet(T begin, T end) const { + for (const auto &I : llvm::make_range(begin, end)) { + assert(I->getFunction() == getIRPosition().getAssociatedFunction() && + "Instruction must be in the same anchor scope function."); + + if (!isAssumedDead(I)) + return true; + } + + return false; } - /// Return true if "willreturn" is known. - virtual bool isKnownWillReturn() const = 0; + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() override { return *this; } + const IRPosition &getIRPosition() const override { return *this; } + ///} - /// Return true if "willreturn" is assumed. - virtual bool isAssumedWillReturn() const = 0; + /// Create an abstract attribute view for the position \p IRP. + static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// State for dereferenceable attribute +struct DerefState : AbstractState { + + /// State representing for dereferenceable bytes. + IntegerState DerefBytesState; + + /// State representing that whether the value is globaly dereferenceable. + BooleanState GlobalState; + + /// See AbstractState::isValidState() + bool isValidState() const override { return DerefBytesState.isValidState(); } + + /// See AbstractState::isAtFixpoint() + bool isAtFixpoint() const override { + return !isValidState() || + (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint()); + } + + /// See AbstractState::indicateOptimisticFixpoint(...) + ChangeStatus indicateOptimisticFixpoint() override { + DerefBytesState.indicateOptimisticFixpoint(); + GlobalState.indicateOptimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + /// See AbstractState::indicatePessimisticFixpoint(...) + ChangeStatus indicatePessimisticFixpoint() override { + DerefBytesState.indicatePessimisticFixpoint(); + GlobalState.indicatePessimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + /// Update known dereferenceable bytes. + void takeKnownDerefBytesMaximum(uint64_t Bytes) { + DerefBytesState.takeKnownMaximum(Bytes); + } + + /// Update assumed dereferenceable bytes. + void takeAssumedDerefBytesMinimum(uint64_t Bytes) { + DerefBytesState.takeAssumedMinimum(Bytes); + } + + /// Equality for DerefState. + bool operator==(const DerefState &R) { + return this->DerefBytesState == R.DerefBytesState && + this->GlobalState == R.GlobalState; + } + + /// Inequality for IntegerState. + bool operator!=(const DerefState &R) { return !(*this == R); } + + /// See IntegerState::operator^= + DerefState operator^=(const DerefState &R) { + DerefBytesState ^= R.DerefBytesState; + GlobalState ^= R.GlobalState; + return *this; + } + + /// See IntegerState::operator+= + DerefState operator+=(const DerefState &R) { + DerefBytesState += R.DerefBytesState; + GlobalState += R.GlobalState; + return *this; + } + + /// See IntegerState::operator&= + DerefState operator&=(const DerefState &R) { + DerefBytesState &= R.DerefBytesState; + GlobalState &= R.GlobalState; + return *this; + } + + /// See IntegerState::operator|= + DerefState operator|=(const DerefState &R) { + DerefBytesState |= R.DerefBytesState; + GlobalState |= R.GlobalState; + return *this; + } + +protected: + const AANonNull *NonNullAA = nullptr; +}; + +/// An abstract interface for all dereferenceable attribute. +struct AADereferenceable + : public IRAttribute> { + AADereferenceable(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if we assume that the underlying value is nonnull. + bool isAssumedNonNull() const { + return NonNullAA && NonNullAA->isAssumedNonNull(); + } + + /// Return true if we know that the underlying value is nonnull. + bool isKnownNonNull() const { + return NonNullAA && NonNullAA->isKnownNonNull(); + } + + /// Return true if we assume that underlying value is + /// dereferenceable(_or_null) globally. + bool isAssumedGlobal() const { return GlobalState.getAssumed(); } + + /// Return true if we know that underlying value is + /// dereferenceable(_or_null) globally. + bool isKnownGlobal() const { return GlobalState.getKnown(); } + + /// Return assumed dereferenceable bytes. + uint32_t getAssumedDereferenceableBytes() const { + return DerefBytesState.getAssumed(); + } + + /// Return known dereferenceable bytes. + uint32_t getKnownDereferenceableBytes() const { + return DerefBytesState.getKnown(); + } + + /// Create an abstract attribute view for the position \p IRP. + static AADereferenceable &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for all align attributes. +struct AAAlign + : public IRAttribute> { + AAAlign(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return assumed alignment. + unsigned getAssumedAlign() const { return getAssumed(); } + + /// Return known alignemnt. + unsigned getKnownAlign() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for all nocapture attributes. +struct AANoCapture + : public IRAttribute> { + AANoCapture(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// State encoding bits. A set bit in the state means the property holds. + /// NO_CAPTURE is the best possible state, 0 the worst possible state. + enum { + NOT_CAPTURED_IN_MEM = 1 << 0, + NOT_CAPTURED_IN_INT = 1 << 1, + NOT_CAPTURED_IN_RET = 1 << 2, + + /// If we do not capture the value in memory or through integers we can only + /// communicate it back as a derived pointer. + NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT, + + /// If we do not capture the value in memory, through integers, or as a + /// derived pointer we know it is not captured. + NO_CAPTURE = + NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET, + }; + + /// Return true if we know that the underlying value is not captured in its + /// respective scope. + bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); } + + /// Return true if we assume that the underlying value is not captured in its + /// respective scope. + bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); } + + /// Return true if we know that the underlying value is not captured in its + /// respective scope but we allow it to escape through a "return". + bool isKnownNoCaptureMaybeReturned() const { + return isKnown(NO_CAPTURE_MAYBE_RETURNED); + } + + /// Return true if we assume that the underlying value is not captured in its + /// respective scope but we allow it to escape through a "return". + bool isAssumedNoCaptureMaybeReturned() const { + return isAssumed(NO_CAPTURE_MAYBE_RETURNED); + } + + /// Create an abstract attribute view for the position \p IRP. + static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for value simplify abstract attribute. +struct AAValueSimplify : public StateWrapper, + public IRPosition { + AAValueSimplify(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() { return *this; } + const IRPosition &getIRPosition() const { return *this; } + ///} + + /// Return an assumed simplified value if a single candidate is found. If + /// there cannot be one, return original value. If it is not clear yet, return + /// the Optional::NoneType. + virtual Optional getAssumedSimplifiedValue(Attributor &A) const = 0; + + /// Create an abstract attribute view for the position \p IRP. + static AAValueSimplify &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +struct AAHeapToStack : public StateWrapper, + public IRPosition { + AAHeapToStack(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Returns true if HeapToStack conversion is assumed to be possible. + bool isAssumedHeapToStack() const { return getAssumed(); } + + /// Returns true if HeapToStack conversion is known to be possible. + bool isKnownHeapToStack() const { return getKnown(); } + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() { return *this; } + const IRPosition &getIRPosition() const { return *this; } + ///} + + /// Create an abstract attribute view for the position \p IRP. + static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for all memory related attributes. +struct AAMemoryBehavior + : public IRAttribute> { + AAMemoryBehavior(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// State encoding bits. A set bit in the state means the property holds. + /// BEST_STATE is the best possible state, 0 the worst possible state. + enum { + NO_READS = 1 << 0, + NO_WRITES = 1 << 1, + NO_ACCESSES = NO_READS | NO_WRITES, + + BEST_STATE = NO_ACCESSES, + }; - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::WillReturn; + /// Return true if we know that the underlying value is not read or accessed + /// in its respective scope. + bool isKnownReadNone() const { return isKnown(NO_ACCESSES); } + + /// Return true if we assume that the underlying value is not read or accessed + /// in its respective scope. + bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); } + + /// Return true if we know that the underlying value is not accessed + /// (=written) in its respective scope. + bool isKnownReadOnly() const { return isKnown(NO_WRITES); } + + /// Return true if we assume that the underlying value is not accessed + /// (=written) in its respective scope. + bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); } + + /// Return true if we know that the underlying value is not read in its + /// respective scope. + bool isKnownWriteOnly() const { return isKnown(NO_READS); } + + /// Return true if we assume that the underlying value is not read in its + /// respective scope. + bool isAssumedWriteOnly() const { return isAssumed(NO_READS); } + + /// Create an abstract attribute view for the position \p IRP. + static AAMemoryBehavior &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/include/llvm/Transforms/IPO/GlobalDCE.h b/include/llvm/Transforms/IPO/GlobalDCE.h index c434484d1ae3..0a6851849e7e 100644 --- a/include/llvm/Transforms/IPO/GlobalDCE.h +++ b/include/llvm/Transforms/IPO/GlobalDCE.h @@ -43,11 +43,25 @@ private: /// Comdat -> Globals in that Comdat section. std::unordered_multimap ComdatMembers; + /// !type metadata -> set of (vtable, offset) pairs + DenseMap, 4>> + TypeIdMap; + + // Global variables which are vtables, and which we have enough information + // about to safely do dead virtual function elimination. + SmallPtrSet VFESafeVTables; + void UpdateGVDependencies(GlobalValue &GV); void MarkLive(GlobalValue &GV, SmallVectorImpl *Updates = nullptr); bool RemoveUnusedGlobalValue(GlobalValue &GV); + // Dead virtual function elimination. + void AddVirtualFunctionDependencies(Module &M); + void ScanVTables(Module &M); + void ScanTypeCheckedLoadIntrinsics(Module &M); + void ScanVTableLoad(Function *Caller, Metadata *TypeId, uint64_t CallOffset); + void ComputeDependencies(Value *V, SmallPtrSetImpl &U); }; diff --git a/include/llvm/Transforms/IPO/HotColdSplitting.h b/include/llvm/Transforms/IPO/HotColdSplitting.h index 73668844590d..8c3049fbaac4 100644 --- a/include/llvm/Transforms/IPO/HotColdSplitting.h +++ b/include/llvm/Transforms/IPO/HotColdSplitting.h @@ -17,6 +17,45 @@ namespace llvm { class Module; +class ProfileSummaryInfo; +class BlockFrequencyInfo; +class TargetTransformInfo; +class OptimizationRemarkEmitter; +class AssumptionCache; +class DominatorTree; +class CodeExtractorAnalysisCache; + +/// A sequence of basic blocks. +/// +/// A 0-sized SmallVector is slightly cheaper to move than a std::vector. +using BlockSequence = SmallVector; + +class HotColdSplitting { +public: + HotColdSplitting(ProfileSummaryInfo *ProfSI, + function_ref GBFI, + function_ref GTTI, + std::function *GORE, + function_ref LAC) + : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {} + bool run(Module &M); + +private: + bool isFunctionCold(const Function &F) const; + bool shouldOutlineFrom(const Function &F) const; + bool outlineColdRegions(Function &F, bool HasProfileSummary); + Function *extractColdRegion(const BlockSequence &Region, + const CodeExtractorAnalysisCache &CEAC, + DominatorTree &DT, BlockFrequencyInfo *BFI, + TargetTransformInfo &TTI, + OptimizationRemarkEmitter &ORE, + AssumptionCache *AC, unsigned Count); + ProfileSummaryInfo *PSI; + function_ref GetBFI; + function_ref GetTTI; + std::function *GetORE; + function_ref LookupAC; +}; /// Pass to outline cold regions. class HotColdSplittingPass : public PassInfoMixin { diff --git a/include/llvm/Transforms/IPO/LowerTypeTests.h b/include/llvm/Transforms/IPO/LowerTypeTests.h index 39b23f5957db..3c2bb65b9552 100644 --- a/include/llvm/Transforms/IPO/LowerTypeTests.h +++ b/include/llvm/Transforms/IPO/LowerTypeTests.h @@ -193,6 +193,8 @@ struct ByteArrayBuilder { uint64_t &AllocByteOffset, uint8_t &AllocMask); }; +bool isJumpTableCanonical(Function *F); + } // end namespace lowertypetests class LowerTypeTestsPass : public PassInfoMixin { diff --git a/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 509fcc867060..22435e4ed1e5 100644 --- a/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -16,8 +16,10 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/IPO/FunctionImport.h" #include #include +#include #include #include @@ -28,6 +30,7 @@ template class MutableArrayRef; class Function; class GlobalVariable; class ModuleSummaryIndex; +struct ValueInfo; namespace wholeprogramdevirt { @@ -228,6 +231,29 @@ struct WholeProgramDevirtPass : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; +struct VTableSlotSummary { + StringRef TypeID; + uint64_t ByteOffset; +}; + +/// Perform index-based whole program devirtualization on the \p Summary +/// index. Any devirtualized targets used by a type test in another module +/// are added to the \p ExportedGUIDs set. For any local devirtualized targets +/// only used within the defining module, the information necessary for +/// locating the corresponding WPD resolution is recorded for the ValueInfo +/// in case it is exported by cross module importing (in which case the +/// devirtualized target name will need adjustment). +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap); + +/// Call after cross-module importing to update the recorded single impl +/// devirt target names for any locals that were exported. +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + function_ref isExported, + std::map> &LocalWPDTargetsMap); + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index 8b70d2926ae9..fcad1e11895f 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -181,10 +181,6 @@ struct SanitizerCoverageOptions { SanitizerCoverageOptions() = default; }; -// Insert SanitizerCoverage instrumentation. -ModulePass *createSanitizerCoverageModulePass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()); - /// Calculate what to divide by to scale counts. /// /// Given the maximum count, calculate a divisor that will scale all the diff --git a/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 8f76d4a1ce55..2e0fae527b15 100644 --- a/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -39,13 +39,14 @@ public: : Options(Options), IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - bool run(Module &M, const TargetLibraryInfo &TLI); + bool run(Module &M, + std::function GetTLI); private: InstrProfOptions Options; Module *M; Triple TT; - const TargetLibraryInfo *TLI; + std::function GetTLI; struct PerFunctionProfileData { uint32_t NumValueSites[IPVK_Last + 1]; GlobalVariable *RegionCounters = nullptr; diff --git a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index 0739d9e58a61..01a86ee3f1fd 100644 --- a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -19,12 +19,11 @@ namespace llvm { struct MemorySanitizerOptions { - MemorySanitizerOptions() = default; - MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel) - : TrackOrigins(TrackOrigins), Recover(Recover), Kernel(Kernel) {} - int TrackOrigins = 0; - bool Recover = false; - bool Kernel = false; + MemorySanitizerOptions() : MemorySanitizerOptions(0, false, false){}; + MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel); + bool Kernel; + int TrackOrigins; + bool Recover; }; // Insert MemorySanitizer instrumentation (detection of uninitialized reads) @@ -41,6 +40,7 @@ struct MemorySanitizerPass : public PassInfoMixin { MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: MemorySanitizerOptions Options; diff --git a/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h new file mode 100644 index 000000000000..85a43ff86f2e --- /dev/null +++ b/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h @@ -0,0 +1,47 @@ +//===--------- Definition of the SanitizerCoverage class --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SanitizerCoverage class which is a port of the legacy +// SanitizerCoverage pass to use the new PassManager infrastructure. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Instrumentation.h" + +namespace llvm { + +/// This is the ModuleSanitizerCoverage pass used in the new pass manager. The +/// pass instruments functions for coverage, adds initialization calls to the +/// module for trace PC guards and 8bit counters if they are requested, and +/// appends globals to llvm.compiler.used. +class ModuleSanitizerCoveragePass + : public PassInfoMixin { +public: + explicit ModuleSanitizerCoveragePass( + SanitizerCoverageOptions Options = SanitizerCoverageOptions()) + : Options(Options) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + SanitizerCoverageOptions Options; +}; + +// Insert SanitizerCoverage instrumentation. +ModulePass *createModuleSanitizerCoverageLegacyPassPass( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()); + +} // namespace llvm + +#endif diff --git a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index b4e7d9924ff6..ce0e46745abb 100644 --- a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -27,6 +27,8 @@ FunctionPass *createThreadSanitizerLegacyPassPass(); /// yet, the pass inserts the declarations. Otherwise the existing globals are struct ThreadSanitizerPass : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; + } // namespace llvm #endif /* LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H */ diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index f9360b5ee2c8..f06230b6f366 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -308,7 +308,7 @@ FunctionPass *createGVNSinkPass(); // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // -FunctionPass *createMergedLoadStoreMotionPass(); +FunctionPass *createMergedLoadStoreMotionPass(bool SplitFooterBB = false); //===----------------------------------------------------------------------===// // @@ -395,6 +395,13 @@ extern char &InferAddressSpacesID; // "block_weights" metadata. FunctionPass *createLowerExpectIntrinsicPass(); +//===----------------------------------------------------------------------===// +// +// LowerConstantIntrinsicss - Expand any remaining llvm.objectsize and +// llvm.is.constant intrinsic calls, even for the unknown cases. +// +FunctionPass *createLowerConstantIntrinsicsPass(); + //===----------------------------------------------------------------------===// // // PartiallyInlineLibCalls - Tries to inline the fast path of library diff --git a/include/llvm/Transforms/Scalar/CallSiteSplitting.h b/include/llvm/Transforms/Scalar/CallSiteSplitting.h index b6055639e8a8..74cbf84b64b2 100644 --- a/include/llvm/Transforms/Scalar/CallSiteSplitting.h +++ b/include/llvm/Transforms/Scalar/CallSiteSplitting.h @@ -9,13 +9,8 @@ #ifndef LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H #define LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H -#include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/Compiler.h" -#include namespace llvm { diff --git a/include/llvm/Transforms/Scalar/ConstantHoisting.h b/include/llvm/Transforms/Scalar/ConstantHoisting.h index 6b0fc9c1dd07..39039b093241 100644 --- a/include/llvm/Transforms/Scalar/ConstantHoisting.h +++ b/include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -37,7 +37,9 @@ #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" @@ -154,21 +156,21 @@ private: /// Keeps track of constant candidates found in the function. using ConstCandVecType = std::vector; - using GVCandVecMapType = DenseMap; + using GVCandVecMapType = MapVector; ConstCandVecType ConstIntCandVec; GVCandVecMapType ConstGEPCandMap; /// These are the final constants we decided to hoist. using ConstInfoVecType = SmallVector; - using GVInfoVecMapType = DenseMap; + using GVInfoVecMapType = MapVector; ConstInfoVecType ConstIntInfoVec; GVInfoVecMapType ConstGEPInfoMap; /// Keep track of cast instructions we already cloned. - SmallDenseMap ClonedCastMap; + MapVector ClonedCastMap; Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const; - SmallPtrSet + SetVector findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const; void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx, diff --git a/include/llvm/Transforms/Scalar/Float2Int.h b/include/llvm/Transforms/Scalar/Float2Int.h index 06aeb8322527..f04b98a19d82 100644 --- a/include/llvm/Transforms/Scalar/Float2Int.h +++ b/include/llvm/Transforms/Scalar/Float2Int.h @@ -17,6 +17,7 @@ #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" @@ -26,10 +27,11 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for old PM. - bool runImpl(Function &F); + bool runImpl(Function &F, const DominatorTree &DT); private: - void findRoots(Function &F, SmallPtrSet &Roots); + void findRoots(Function &F, const DominatorTree &DT, + SmallPtrSet &Roots); void seen(Instruction *I, ConstantRange R); ConstantRange badRange(); ConstantRange unknownRange(); diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h index 9fe00a9e7f2d..8a64768af6b5 100644 --- a/include/llvm/Transforms/Scalar/GVN.h +++ b/include/llvm/Transforms/Scalar/GVN.h @@ -120,6 +120,8 @@ public: uint32_t lookupOrAddCall(CallInst *C); uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, uint32_t Num, GVN &Gvn); + bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred, + const BasicBlock *PhiBlock, GVN &Gvn); std::pair assignExpNewValueNum(Expression &exp); bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn); @@ -159,6 +161,7 @@ private: SetVector DeadBlocks; OptimizationRemarkEmitter *ORE; ImplicitControlFlowTracking *ICF; + LoopInfo *LI; ValueTable VN; @@ -175,7 +178,7 @@ private: // Block-local map of equivalent values to their leader, does not // propagate to any successors. Entries added mid-block are applied // to the remaining instructions in the block. - SmallMapVector ReplaceWithConstMap; + SmallMapVector ReplaceOperandsWithMap; SmallVector InstrsToErase; // Map the block to reversed postorder traversal number. It is used to @@ -280,7 +283,7 @@ private: void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); - bool replaceOperandsWithConsts(Instruction *I) const; + bool replaceOperandsForInBlockEquality(Instruction *I) const; bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, bool DominatesByEdge); bool processFoldableCondBr(BranchInst *BI); diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h index 3dc4515f85a1..1600d1af3242 100644 --- a/include/llvm/Transforms/Scalar/GVNExpression.h +++ b/include/llvm/Transforms/Scalar/GVNExpression.h @@ -323,7 +323,7 @@ public: class LoadExpression final : public MemoryExpression { private: LoadInst *Load; - unsigned Alignment; + MaybeAlign Alignment; public: LoadExpression(unsigned NumOperands, LoadInst *L, @@ -333,7 +333,8 @@ public: LoadExpression(enum ExpressionType EType, unsigned NumOperands, LoadInst *L, const MemoryAccess *MemoryLeader) : MemoryExpression(NumOperands, EType, MemoryLeader), Load(L) { - Alignment = L ? L->getAlignment() : 0; + if (L) + Alignment = MaybeAlign(L->getAlignment()); } LoadExpression() = delete; @@ -348,8 +349,8 @@ public: LoadInst *getLoadInst() const { return Load; } void setLoadInst(LoadInst *L) { Load = L; } - unsigned getAlignment() const { return Alignment; } - void setAlignment(unsigned Align) { Alignment = Align; } + MaybeAlign getAlignment() const { return Alignment; } + void setAlignment(MaybeAlign Align) { Alignment = Align; } bool equals(const Expression &Other) const override; bool exactlyEquals(const Expression &Other) const override { diff --git a/include/llvm/Transforms/Scalar/LoopPassManager.h b/include/llvm/Transforms/Scalar/LoopPassManager.h index 61ec58585fd0..aed764855b2e 100644 --- a/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -263,8 +263,10 @@ template class FunctionToLoopPassAdaptor : public PassInfoMixin> { public: - explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) - : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging) { + explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false, + bool DebugLogging = false) + : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging), + UseMemorySSA(UseMemorySSA) { LoopCanonicalizationFPM.addPass(LoopSimplifyPass()); LoopCanonicalizationFPM.addPass(LCSSAPass()); } @@ -293,7 +295,7 @@ public: return PA; // Get the analysis results needed by loop passes. - MemorySSA *MSSA = EnableMSSALoopDependency + MemorySSA *MSSA = UseMemorySSA ? (&AM.getResult(F).getMSSA()) : nullptr; LoopStandardAnalysisResults LAR = {AM.getResult(F), @@ -310,8 +312,10 @@ public: // LoopStandardAnalysisResults object. The loop analyses cached in this // manager have access to those analysis results and so it must invalidate // itself when they go away. - LoopAnalysisManager &LAM = - AM.getResult(F).getManager(); + auto &LAMFP = AM.getResult(F); + if (UseMemorySSA) + LAMFP.markMSSAUsed(); + LoopAnalysisManager &LAM = LAMFP.getManager(); // A postorder worklist of loops to process. SmallPriorityWorklist Worklist; @@ -382,7 +386,7 @@ public: PA.preserve(); PA.preserve(); PA.preserve(); - if (EnableMSSALoopDependency) + if (UseMemorySSA) PA.preserve(); // FIXME: What we really want to do here is preserve an AA category, but // that concept doesn't exist yet. @@ -397,14 +401,18 @@ private: LoopPassT Pass; FunctionPassManager LoopCanonicalizationFPM; + + bool UseMemorySSA = false; }; /// A function to deduce a loop pass type and wrap it in the templated /// adaptor. template FunctionToLoopPassAdaptor -createFunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) { - return FunctionToLoopPassAdaptor(std::move(Pass), DebugLogging); +createFunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false, + bool DebugLogging = false) { + return FunctionToLoopPassAdaptor(std::move(Pass), UseMemorySSA, + DebugLogging); } /// Pass for printing a loop's contents as textual IR. diff --git a/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/include/llvm/Transforms/Scalar/LoopUnrollPass.h index a84d889a83ad..afeb1f1da029 100644 --- a/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ b/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -62,6 +62,8 @@ struct LoopUnrollOptions { Optional AllowPeeling; Optional AllowRuntime; Optional AllowUpperBound; + Optional AllowProfileBasedPeeling; + Optional FullUnrollMaxCount; int OptLevel; /// If false, use a cost model to determine whether unrolling of a loop is @@ -110,6 +112,18 @@ struct LoopUnrollOptions { OptLevel = O; return *this; } + + // Enables or disables loop peeling basing on profile. + LoopUnrollOptions &setProfileBasedPeeling(int O) { + AllowProfileBasedPeeling = O; + return *this; + } + + // Sets the max full unroll count. + LoopUnrollOptions &setFullUnrollMaxCount(unsigned O) { + FullUnrollMaxCount = O; + return *this; + } }; /// Loop unroll pass that will support both full and partial unrolling. diff --git a/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h b/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h new file mode 100644 index 000000000000..a5ad4a2192a0 --- /dev/null +++ b/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h @@ -0,0 +1,41 @@ +//===- LowerConstantIntrinsics.h - Lower constant int. pass -*- C++ -*-========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The header file for the LowerConstantIntrinsics pass as used by the new pass +/// manager. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H +#define LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct LowerConstantIntrinsicsPass : + PassInfoMixin { +public: + explicit LowerConstantIntrinsicsPass() {} + + /// Run the pass over the function. + /// + /// This will lower all remaining 'objectsize' and 'is.constant'` + /// intrinsic calls in this function, even when the argument has no known + /// size or is not a constant respectively. The resulting constant is + /// propagated and conditional branches are resolved where possible. + /// This complements the Instruction Simplification and + /// Instruction Combination passes of the optimized pass chain. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; + +} + +#endif diff --git a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h index 9071a56532f8..c5f6d6e0e8bd 100644 --- a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h +++ b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h @@ -27,12 +27,28 @@ #include "llvm/IR/PassManager.h" namespace llvm { +struct MergedLoadStoreMotionOptions { + bool SplitFooterBB; + MergedLoadStoreMotionOptions(bool SplitFooterBB = false) + : SplitFooterBB(SplitFooterBB) {} + + MergedLoadStoreMotionOptions &splitFooterBB(bool SFBB) { + SplitFooterBB = SFBB; + return *this; + } +}; + class MergedLoadStoreMotionPass : public PassInfoMixin { + MergedLoadStoreMotionOptions Options; + public: + MergedLoadStoreMotionPass() + : MergedLoadStoreMotionPass(MergedLoadStoreMotionOptions()) {} + MergedLoadStoreMotionPass(const MergedLoadStoreMotionOptions &PassOptions) + : Options(PassOptions) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; - } #endif // LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H diff --git a/include/llvm/Transforms/Scalar/Reassociate.h b/include/llvm/Transforms/Scalar/Reassociate.h index 2db8d8ce309c..d5b175eff0e6 100644 --- a/include/llvm/Transforms/Scalar/Reassociate.h +++ b/include/llvm/Transforms/Scalar/Reassociate.h @@ -122,7 +122,9 @@ private: void EraseInst(Instruction *I); void RecursivelyEraseDeadInsts(Instruction *I, OrderedSet &Insts); void OptimizeInst(Instruction *I); - Instruction *canonicalizeNegConstExpr(Instruction *I); + Instruction *canonicalizeNegFPConstantsForOp(Instruction *I, Instruction *Op, + Value *OtherOp); + Instruction *canonicalizeNegFPConstants(Instruction *I); void BuildPairMap(ReversePostOrderTraversal &RPOT); }; diff --git a/include/llvm/Transforms/Scalar/SCCP.h b/include/llvm/Transforms/Scalar/SCCP.h index 0ffd983eb3e0..45e674a20a16 100644 --- a/include/llvm/Transforms/Scalar/SCCP.h +++ b/include/llvm/Transforms/Scalar/SCCP.h @@ -45,7 +45,8 @@ struct AnalysisResultsForFn { PostDominatorTree *PDT; }; -bool runIPSCCP(Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI, +bool runIPSCCP(Module &M, const DataLayout &DL, + std::function GetTLI, function_ref getAnalysis); } // end namespace llvm diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 4d861ffe9a31..698e57fd0394 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -83,10 +83,16 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr); /// Attempts to merge a block into its predecessor, if possible. The return /// value indicates success or failure. +/// By default do not merge blocks if BB's predecessor has multiple successors. +/// If PredecessorWithTwoSuccessors = true, the blocks can only be merged +/// if BB's Pred has a branch to BB and to AnotherBB, and BB has a single +/// successor Sing. In this case the branch will be updated with Sing instead of +/// BB, and BB will still be merged into its predecessor and removed. bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr, MemorySSAUpdater *MSSAU = nullptr, - MemoryDependenceResults *MemDep = nullptr); + MemoryDependenceResults *MemDep = nullptr, + bool PredecessorWithTwoSuccessors = false); /// Replace all uses of an instruction (specified by BI) with a value, then /// remove and delete the original instruction. @@ -222,7 +228,8 @@ BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To, /// info is updated. BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, - MemorySSAUpdater *MSSAU = nullptr); + MemorySSAUpdater *MSSAU = nullptr, + const Twine &BBName = ""); /// This method introduces at least one new basic block into the function and /// moves some of the predecessors of BB to be predecessors of the new block. diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h index 8421c31a36da..3d15b2a7bf2a 100644 --- a/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -30,17 +30,16 @@ namespace llvm { bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI); bool inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI); - /// Check whether the overloaded unary floating point function + /// Check whether the overloaded floating point function /// corresponding to \a Ty is available. - bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn); + bool hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn); - /// Get the name of the overloaded unary floating point function + /// Get the name of the overloaded floating point function /// corresponding to \a Ty. - StringRef getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn); + StringRef getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn); /// Return V if it is an i8*, otherwise cast it to i8*. Value *castToCStr(Value *V, IRBuilder<> &B); @@ -51,6 +50,11 @@ namespace llvm { Value *emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the strdup function to the builder, for the specified + /// pointer. Ptr is required to be some pointer type, and the return value has + /// 'i8*' type. + Value *emitStrDup(Value *Ptr, IRBuilder<> &B, const TargetLibraryInfo *TLI); + /// Emit a call to the strnlen function to the builder, for the specified /// pointer. Ptr is required to be some pointer type, MaxLen must be of size_t /// type, and the return value has 'intptr_t' type. @@ -164,6 +168,13 @@ namespace llvm { Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, IRBuilder<> &B, const AttributeList &Attrs); + /// Emit a call to the binary function DoubleFn, FloatFn or LongDoubleFn, + /// depending of the type of Op1. + Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, LibFunc DoubleFn, + LibFunc FloatFn, LibFunc LongDoubleFn, + IRBuilder<> &B, const AttributeList &Attrs); + /// Emit a call to the putchar function. This assumes that Char is an integer. Value *emitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI); diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h index 471055921fa8..bd98c902d1ab 100644 --- a/include/llvm/Transforms/Utils/BypassSlowDivision.h +++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/IR/ValueHandle.h" #include namespace llvm { @@ -28,8 +29,10 @@ class Value; struct DivRemMapKey { bool SignedOp; - Value *Dividend; - Value *Divisor; + AssertingVH Dividend; + AssertingVH Divisor; + + DivRemMapKey() = default; DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor) : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} @@ -50,8 +53,10 @@ template <> struct DenseMapInfo { } static unsigned getHashValue(const DivRemMapKey &Val) { - return (unsigned)(reinterpret_cast(Val.Dividend) ^ - reinterpret_cast(Val.Divisor)) ^ + return (unsigned)(reinterpret_cast( + static_cast(Val.Dividend)) ^ + reinterpret_cast( + static_cast(Val.Divisor))) ^ (unsigned)Val.SignedOp; } }; diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h index 9d79ee1633f6..8a1ab796734e 100644 --- a/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/include/llvm/Transforms/Utils/CodeExtractor.h @@ -22,6 +22,7 @@ namespace llvm { +class AllocaInst; class BasicBlock; class BlockFrequency; class BlockFrequencyInfo; @@ -36,6 +37,38 @@ class Module; class Type; class Value; +/// A cache for the CodeExtractor analysis. The operation \ref +/// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this +/// object. This object should conservatively be considered invalid if any +/// other mutating operations on the IR occur. +/// +/// Constructing this object is O(n) in the size of the function. +class CodeExtractorAnalysisCache { + /// The allocas in the function. + SmallVector Allocas; + + /// Base memory addresses of load/store instructions, grouped by block. + DenseMap> BaseMemAddrs; + + /// Blocks which contain instructions which may have unknown side-effects + /// on memory. + DenseSet SideEffectingBlocks; + + void findSideEffectInfoForBlock(BasicBlock &BB); + +public: + CodeExtractorAnalysisCache(Function &F); + + /// Get the allocas in the function at the time the analysis was created. + /// Note that some of these allocas may no longer be present in the function, + /// due to \ref CodeExtractor::extractCodeRegion. + ArrayRef getAllocas() const { return Allocas; } + + /// Check whether \p BB contains an instruction thought to load from, store + /// to, or otherwise clobber the alloca \p Addr. + bool doesBlockContainClobberOfAddr(BasicBlock &BB, AllocaInst *Addr) const; +}; + /// Utility class for extracting code into a new function. /// /// This utility provides a simple interface for extracting some sequence of @@ -104,13 +137,21 @@ class Value; /// /// Returns zero when called on a CodeExtractor instance where isEligible /// returns false. - Function *extractCodeRegion(); + Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC); + + /// Verify that assumption cache isn't stale after a region is extracted. + /// Returns false when verifier finds errors. AssumptionCache is passed as + /// parameter to make this function stateless. + static bool verifyAssumptionCache(const Function& F, AssumptionCache *AC); /// Test whether this code extractor is eligible. /// /// Based on the blocks used when constructing the code extractor, /// determine whether it is eligible for extraction. - bool isEligible() const { return !Blocks.empty(); } + /// + /// Checks that varargs handling (with vastart and vaend) is only done in + /// the outlined blocks. + bool isEligible() const; /// Compute the set of input values and output values for the code. /// @@ -127,7 +168,9 @@ class Value; /// region. /// /// Returns true if it is safe to do the code motion. - bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const; + bool + isLegalToShrinkwrapLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *AllocaAddr) const; /// Find the set of allocas whose life ranges are contained within the /// outlined region. @@ -137,7 +180,8 @@ class Value; /// are used by the lifetime markers are also candidates for shrink- /// wrapping. The instructions that need to be sunk are collected in /// 'Allocas'. - void findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + void findAllocas(const CodeExtractorAnalysisCache &CEAC, + ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const; /// Find or create a block within the outline region for placing hoisted @@ -158,8 +202,9 @@ class Value; Instruction *LifeEnd = nullptr; }; - LifetimeMarkerInfo getLifetimeMarkers(Instruction *Addr, - BasicBlock *ExitBlock) const; + LifetimeMarkerInfo + getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *Addr, BasicBlock *ExitBlock) const; void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(const SmallPtrSetImpl &Exits); diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index ff516f230979..9fcb2f64d79b 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -271,6 +271,15 @@ inline unsigned getKnownAlignment(Value *V, const DataLayout &DL, return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT); } +/// Create a call that matches the invoke \p II in terms of arguments, +/// attributes, debug information, etc. The call is not placed in a block and it +/// will not have a name. The invoke instruction is not removed, nor are the +/// uses replaced by the new call. +CallInst *createCallMatchingInvoke(InvokeInst *II); + +/// This function converts the specified invoek into a normall call. +void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr); + ///===---------------------------------------------------------------------===// /// Dbg Intrinsic utilities /// @@ -403,8 +412,7 @@ void removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr); /// Remove all blocks that can not be reached from the function's entry. /// /// Returns true if any basic block was removed. -bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr, - DomTreeUpdater *DTU = nullptr, +bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr, MemorySSAUpdater *MSSAU = nullptr); /// Combine the metadata of two instructions so that K can replace J. Some @@ -424,6 +432,10 @@ void combineMetadata(Instruction *K, const Instruction *J, void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove); +/// Copy the metadata from the source instruction to the destination (the +/// replacement for the source instruction). +void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source); + /// Patch the replacement so that it is not more restrictive than the value /// being replaced. It assumes that the replacement does not get moved from /// its original position. diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index 68bdded5cf93..d32f08717e9b 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -215,6 +215,9 @@ makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef FollowupAttrs, /// Look for the loop attribute that disables all transformation heuristic. bool hasDisableAllTransformsHint(const Loop *L); +/// Look for the loop attribute that disables the LICM transformation heuristics. +bool hasDisableLICMTransformsHint(const Loop *L); + /// The mode sets how eager a transformation should be applied. enum TransformationMode { /// The pass can use heuristics to determine whether a transformation should @@ -252,6 +255,8 @@ TransformationMode hasLICMVersioningTransformation(Loop *L); /// @} /// Set input string into loop metadata by keeping other values intact. +/// If the string is already in loop metadata update value if it is +/// different. void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); diff --git a/include/llvm/Transforms/Utils/MisExpect.h b/include/llvm/Transforms/Utils/MisExpect.h new file mode 100644 index 000000000000..1dbe8cb95936 --- /dev/null +++ b/include/llvm/Transforms/Utils/MisExpect.h @@ -0,0 +1,43 @@ +//===--- MisExpect.h - Check the use of llvm.expect with PGO data ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of the +// llvm.expect intrinsic. This utility extracts the threshold values from +// metadata associated with the instrumented Branch or Switch instruction. The +// threshold values are then used to determine if a warning should be emmited. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { +namespace misexpect { + +/// verifyMisExpect - compares PGO counters to the thresholds used for +/// llvm.expect and warns if the PGO counters are outside of the expected +/// range. +/// \param I The Instruction being checked +/// \param Weights A vector of profile weights for each target block +/// \param Ctx The current LLVM context +void verifyMisExpect(llvm::Instruction *I, + const llvm::SmallVector &Weights, + llvm::LLVMContext &Ctx); + +/// checkClangInstrumentation - verify if llvm.expect matches PGO profile +/// This function checks the frontend instrumentation in the backend when +/// lowering llvm.expect intrinsics. It checks for existing metadata, and +/// then validates the use of llvm.expect against the assigned branch weights. +// +/// \param I the Instruction being checked +void checkFrontendInstrumentation(Instruction &I); + +} // namespace misexpect +} // namespace llvm diff --git a/include/llvm/Transforms/Utils/PredicateInfo.h b/include/llvm/Transforms/Utils/PredicateInfo.h index da4a5dcc28c0..7c7a8eb04a2c 100644 --- a/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/include/llvm/Transforms/Utils/PredicateInfo.h @@ -229,10 +229,10 @@ protected: private: void buildPredicateInfo(); - void processAssume(IntrinsicInst *, BasicBlock *, SmallPtrSetImpl &); - void processBranch(BranchInst *, BasicBlock *, SmallPtrSetImpl &); - void processSwitch(SwitchInst *, BasicBlock *, SmallPtrSetImpl &); - void renameUses(SmallPtrSetImpl &); + void processAssume(IntrinsicInst *, BasicBlock *, SmallVectorImpl &); + void processBranch(BranchInst *, BasicBlock *, SmallVectorImpl &); + void processSwitch(SwitchInst *, BasicBlock *, SmallVectorImpl &); + void renameUses(SmallVectorImpl &); using ValueDFS = PredicateInfoClasses::ValueDFS; typedef SmallVectorImpl ValueDFSStack; void convertUsesToDFSOrdered(Value *, SmallVectorImpl &); @@ -240,7 +240,7 @@ private: bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const; void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &); ValueInfo &getOrCreateValueInfo(Value *); - void addInfoFor(SmallPtrSetImpl &OpsToRename, Value *Op, + void addInfoFor(SmallVectorImpl &OpsToRename, Value *Op, PredicateBase *PB); const ValueInfo &getValueInfo(Value *) const; Function &F; diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 2572094ddac8..88c2ef787ad8 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -126,6 +126,12 @@ private: /// Erase an instruction from its parent with our eraser. void eraseFromParent(Instruction *I); + /// Replace an instruction with a value and erase it from its parent. + void substituteInParent(Instruction *I, Value *With) { + replaceAllUsesWith(I, With); + eraseFromParent(I); + } + Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B); public: @@ -154,6 +160,7 @@ private: Value *optimizeStrRChr(CallInst *CI, IRBuilder<> &B); Value *optimizeStrCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeStrNCmp(CallInst *CI, IRBuilder<> &B); + Value *optimizeStrNDup(CallInst *CI, IRBuilder<> &B); Value *optimizeStrCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeStpCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeStrNCpy(CallInst *CI, IRBuilder<> &B); @@ -164,14 +171,17 @@ private: Value *optimizeStrCSpn(CallInst *CI, IRBuilder<> &B); Value *optimizeStrStr(CallInst *CI, IRBuilder<> &B); Value *optimizeMemChr(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemRChr(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B); Value *optimizeRealloc(CallInst *CI, IRBuilder<> &B); Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B); + Value *optimizeBCopy(CallInst *CI, IRBuilder<> &B); // Wrapper for all String/Memory Library Call Optimizations Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B); diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h index 593ca26feb98..02b81b4b7ee2 100644 --- a/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/include/llvm/Transforms/Utils/UnrollLoop.h @@ -114,8 +114,8 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, - unsigned MaxTripCount, unsigned &TripMultiple, - unsigned LoopSize, + unsigned MaxTripCount, bool MaxOrZero, + unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound); @@ -132,7 +132,9 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling); + Optional UserUpperBound, Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling, + Optional UserFullUnrollMaxCount); unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h index 1952a210291e..ff5bfc609586 100644 --- a/include/llvm/Transforms/Utils/ValueMapper.h +++ b/include/llvm/Transforms/Utils/ValueMapper.h @@ -22,7 +22,7 @@ namespace llvm { class Constant; class Function; -class GlobalAlias; +class GlobalIndirectSymbol; class GlobalVariable; class Instruction; class MDNode; @@ -120,7 +120,7 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) { /// instance: /// - \a scheduleMapGlobalInitializer() /// - \a scheduleMapAppendingVariable() -/// - \a scheduleMapGlobalAliasee() +/// - \a scheduleMapGlobalIndirectSymbol() /// - \a scheduleRemapFunction() /// /// Sometimes a callback needs a different mapping context. Such a context can @@ -180,8 +180,9 @@ public: bool IsOldCtorDtor, ArrayRef NewMembers, unsigned MappingContextID = 0); - void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MappingContextID = 0); + void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, + Constant &Target, + unsigned MappingContextID = 0); void scheduleRemapFunction(Function &F, unsigned MappingContextID = 0); }; diff --git a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index b144006e2628..d1e7acc877bf 100644 --- a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -33,18 +33,6 @@ namespace llvm { -/// Create an analysis remark that explains why vectorization failed -/// -/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p -/// RemarkName is the identifier for the remark. If \p I is passed it is an -/// instruction that prevents vectorization. Otherwise \p TheLoop is used for -/// the location of the remark. \return the remark object that can be -/// streamed to. -OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, - StringRef RemarkName, - Loop *TheLoop, - Instruction *I = nullptr); - /// Utility class for getting and setting loop vectorizer hints in the form /// of loop metadata. /// This class keeps a number of loop annotations locally (as member variables) @@ -55,7 +43,8 @@ OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, /// for example 'force', means a decision has been made. So, we need to be /// careful NOT to add them if the user hasn't specifically asked so. class LoopVectorizeHints { - enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED }; + enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED, + HK_PREDICATE }; /// Hint - associates name and validation with the hint value. struct Hint { @@ -81,6 +70,9 @@ class LoopVectorizeHints { /// Already Vectorized Hint IsVectorized; + /// Vector Predicate + Hint Predicate; + /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } @@ -109,6 +101,7 @@ public: unsigned getWidth() const { return Width.Value; } unsigned getInterleave() const { return Interleave.Value; } unsigned getIsVectorized() const { return IsVectorized.Value; } + unsigned getPredicate() const { return Predicate.Value; } enum ForceKind getForce() const { if ((ForceKind)Force.Value == FK_Undefined && hasDisableAllTransformsHint(TheLoop)) @@ -235,8 +228,8 @@ public: bool canVectorize(bool UseVPlanNativePath); /// Return true if we can vectorize this loop while folding its tail by - /// masking. - bool canFoldTailByMasking(); + /// masking, and mark all respective loads/stores for masking. + bool prepareToFoldTailByMasking(); /// Returns the primary induction variable. PHINode *getPrimaryInduction() { return PrimaryInduction; } @@ -362,9 +355,16 @@ private: bool canVectorizeOuterLoop(); /// Return true if all of the instructions in the block can be speculatively - /// executed. \p SafePtrs is a list of addresses that are known to be legal - /// and we know that we can read from them without segfault. - bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs); + /// executed, and record the loads/stores that require masking. If's that + /// guard loads can be ignored under "assume safety" unless \p PreserveGuards + /// is true. This can happen when we introduces guards for which the original + /// "unguarded-loads are safe" assumption does not hold. For example, the + /// vectorizer's fold-tail transformation changes the loop to execute beyond + /// its original trip-count, under a proper guard, which should be preserved. + /// \p SafePtrs is a list of addresses that are known to be legal and we know + /// that we can read from them without segfault. + bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs, + bool PreserveGuards = false); /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a @@ -382,14 +382,6 @@ private: return LAI ? &LAI->getSymbolicStrides() : nullptr; } - /// Reports a vectorization illegality: print \p DebugMsg for debugging - /// purposes along with the corresponding optimization remark \p RemarkName. - /// If \p I is passed it is an instruction that prevents vectorization. - /// Otherwise the loop is used for the location of the remark. - void reportVectorizationFailure(const StringRef DebugMsg, - const StringRef OREMsg, const StringRef ORETag, - Instruction *I = nullptr) const; - /// The loop that we evaluate. Loop *TheLoop; @@ -452,8 +444,8 @@ private: /// Holds the widest induction type encountered. Type *WidestIndTy = nullptr; - /// Allowed outside users. This holds the induction and reduction - /// vars which can be accessed from outside the loop. + /// Allowed outside users. This holds the variables that can be accessed from + /// outside the loop. SmallPtrSet AllowedExit; /// Can we assume the absence of NaNs. diff --git a/include/llvm/Transforms/Vectorize/LoopVectorize.h b/include/llvm/Transforms/Vectorize/LoopVectorize.h index d1ec06afb02a..d824e2903ef3 100644 --- a/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -155,6 +155,14 @@ struct LoopVectorizePass : public PassInfoMixin { bool processLoop(Loop *L); }; +/// Reports a vectorization failure: print \p DebugMsg for debugging +/// purposes along with the corresponding optimization remark \p RemarkName. +/// If \p I is passed, it is an instruction that prevents vectorization. +/// Otherwise, the loop \p TheLoop is used for the location of the remark. +void reportVectorizationFailure(const StringRef DebugMsg, + const StringRef OREMsg, const StringRef ORETag, + OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr); + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index ac6afb761d4d..32ccc8a46380 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -24,7 +24,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/ValueHandle.h" namespace llvm { @@ -60,8 +59,8 @@ extern cl::opt RunSLPVectorization; struct SLPVectorizerPass : public PassInfoMixin { using StoreList = SmallVector; using StoreListMap = MapVector; - using WeakTrackingVHList = SmallVector; - using WeakTrackingVHListMap = MapVector; + using GEPList = SmallVector; + using GEPListMap = MapVector; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; @@ -131,7 +130,7 @@ private: /// Tries to vectorize constructs started from CmpInst, InsertValueInst or /// InsertElementInst instructions. - bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, + bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, BasicBlock *BB, slpvectorizer::BoUpSLP &R); /// Scan the basic block and look for patterns that are likely to start @@ -147,7 +146,7 @@ private: StoreListMap Stores; /// The getelementptr instructions in a basic block organized by base pointer. - WeakTrackingVHListMap GEPs; + GEPListMap GEPs; }; } // end namespace llvm diff --git a/include/llvm/XRay/FDRRecordProducer.h b/include/llvm/XRay/FDRRecordProducer.h index b530a85bc7e1..043d91568f4e 100644 --- a/include/llvm/XRay/FDRRecordProducer.h +++ b/include/llvm/XRay/FDRRecordProducer.h @@ -27,7 +27,7 @@ public: class FileBasedRecordProducer : public RecordProducer { const XRayFileHeader &Header; DataExtractor &E; - uint32_t &OffsetPtr; + uint64_t &OffsetPtr; uint32_t CurrentBufferBytes = 0; // Helper function which gets the next record by speculatively reading through @@ -36,7 +36,7 @@ class FileBasedRecordProducer : public RecordProducer { public: FileBasedRecordProducer(const XRayFileHeader &FH, DataExtractor &DE, - uint32_t &OP) + uint64_t &OP) : Header(FH), E(DE), OffsetPtr(OP) {} /// This producer encapsulates the logic for loading a File-backed diff --git a/include/llvm/XRay/FDRRecords.h b/include/llvm/XRay/FDRRecords.h index a8ce74bd88fb..e3e16f71e2fe 100644 --- a/include/llvm/XRay/FDRRecords.h +++ b/include/llvm/XRay/FDRRecords.h @@ -417,16 +417,16 @@ public: class RecordInitializer : public RecordVisitor { DataExtractor &E; - uint32_t &OffsetPtr; + uint64_t &OffsetPtr; uint16_t Version; public: static constexpr uint16_t DefaultVersion = 5u; - explicit RecordInitializer(DataExtractor &DE, uint32_t &OP, uint16_t V) + explicit RecordInitializer(DataExtractor &DE, uint64_t &OP, uint16_t V) : RecordVisitor(), E(DE), OffsetPtr(OP), Version(V) {} - explicit RecordInitializer(DataExtractor &DE, uint32_t &OP) + explicit RecordInitializer(DataExtractor &DE, uint64_t &OP) : RecordInitializer(DE, OP, DefaultVersion) {} Error visit(BufferExtents &) override; diff --git a/include/llvm/XRay/FileHeaderReader.h b/include/llvm/XRay/FileHeaderReader.h index 1c9681cfd9af..30878f3e99e8 100644 --- a/include/llvm/XRay/FileHeaderReader.h +++ b/include/llvm/XRay/FileHeaderReader.h @@ -24,7 +24,7 @@ namespace xray { /// Convenience function for loading the file header given a data extractor at a /// specified offset. Expected readBinaryFormatHeader(DataExtractor &HeaderExtractor, - uint32_t &OffsetPtr); + uint64_t &OffsetPtr); } // namespace xray } // namespace llvm diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index 9c4668e1473c..ecb3b37004fd 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -253,6 +253,7 @@ module LLVM_IR { textual header "IR/DebugInfoFlags.def" textual header "IR/Instruction.def" textual header "IR/Metadata.def" + textual header "IR/FixedMetadataKinds.def" textual header "IR/Value.def" textual header "IR/RuntimeLibcalls.def" } @@ -331,6 +332,7 @@ module LLVM_TableGen { module LLVM_Transforms { requires cplusplus umbrella "Transforms" + module * { export * } } diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 32241e355eb8..55dd9a4cda08 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -784,7 +784,7 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) { // previous object first, in this case replacing it with an empty one, before // registering new results. AAR.reset( - new AAResults(getAnalysis().getTLI())); + new AAResults(getAnalysis().getTLI(F))); // BasicAA is always available for function analyses. Also, we add it first // so that it can trump TBAA results when it proves MustAlias. @@ -840,7 +840,7 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR) { - AAResults AAR(P.getAnalysis().getTLI()); + AAResults AAR(P.getAnalysis().getTLI(F)); // Add in our explicitly constructed BasicAA results. if (!DisableBasicAA) diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index a6e5b9fab558..79fbcd464c1b 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -119,6 +119,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) { TotalMayAliasSetSize -= AS->size(); AliasSets.erase(AS); + // If we've removed the saturated alias set, set saturated marker back to + // nullptr and ensure this tracker is empty. + if (AS == AliasAnyAS) { + AliasAnyAS = nullptr; + assert(AliasSets.empty() && "Tracker not empty"); + } } void AliasSet::removeFromTracker(AliasSetTracker &AST) { @@ -690,8 +696,10 @@ void AliasSet::print(raw_ostream &OS) const { } void AliasSetTracker::print(raw_ostream &OS) const { - OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " - << PointerMap.size() << " pointer values.\n"; + OS << "Alias Set Tracker: " << AliasSets.size(); + if (AliasAnyAS) + OS << " (Saturated)"; + OS << " alias sets for " << PointerMap.size() << " pointer values.\n"; for (const AliasSet &AS : *this) AS.print(OS); OS << "\n"; diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index d46a8d8e306c..af718526684b 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -65,6 +65,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeModuleDebugInfoPrinterPass(Registry); initializeModuleSummaryIndexWrapperPassPass(Registry); initializeMustExecutePrinterPass(Registry); + initializeMustBeExecutedContextPrinterPass(Registry); initializeObjCARCAAWrapperPassPass(Registry); initializeOptimizationRemarkEmitterWrapperPassPass(Registry); initializePhiValuesWrapperPassPass(Registry); diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp index cf2f845dee0a..129944743c5e 100644 --- a/lib/Analysis/AssumptionCache.cpp +++ b/lib/Analysis/AssumptionCache.cpp @@ -130,7 +130,10 @@ void AssumptionCache::unregisterAssumption(CallInst *CI) { if (AVI != AffectedValues.end()) AffectedValues.erase(AVI); } - remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }); + + AssumeHandles.erase( + remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }), + AssumeHandles.end()); } void AssumptionCache::AffectedValueCallbackVH::deleted() { @@ -140,7 +143,7 @@ void AssumptionCache::AffectedValueCallbackVH::deleted() { // 'this' now dangles! } -void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) { +void AssumptionCache::transferAffectedValuesInCache(Value *OV, Value *NV) { auto &NAVV = getOrInsertAffectedValues(NV); auto AVI = AffectedValues.find(OV); if (AVI == AffectedValues.end()) @@ -149,6 +152,7 @@ void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) { for (auto &A : AVI->second) if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end()) NAVV.push_back(A); + AffectedValues.erase(OV); } void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { @@ -157,7 +161,7 @@ void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { // Any assumptions that affected this value now affect the new value. - AC->copyAffectedValuesInCache(getValPtr(), NV); + AC->transferAffectedValuesInCache(getValPtr(), NV); // 'this' now might dangle! If the AffectedValues map was resized to add an // entry for NV then this object might have been destroyed in favor of some // copy in the grown map. @@ -252,7 +256,7 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) { // Ok, build a new cache by scanning the function, insert it and the value // handle into our map, and return the newly populated cache. auto IP = AssumptionCaches.insert(std::make_pair( - FunctionCallbackVH(&F, this), llvm::make_unique(F))); + FunctionCallbackVH(&F, this), std::make_unique(F))); assert(IP.second && "Scanning function already in the map?"); return *IP.first->second; } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 3721c99883b8..f3c30c258c19 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -233,6 +233,26 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size; } +/// Return the minimal extent from \p V to the end of the underlying object, +/// assuming the result is used in an aliasing query. E.g., we do use the query +/// location size and the fact that null pointers cannot alias here. +static uint64_t getMinimalExtentFrom(const Value &V, + const LocationSize &LocSize, + const DataLayout &DL, + bool NullIsValidLoc) { + // If we have dereferenceability information we know a lower bound for the + // extent as accesses for a lower offset would be valid. We need to exclude + // the "or null" part if null is a valid pointer. + bool CanBeNull; + uint64_t DerefBytes = V.getPointerDereferenceableBytes(DL, CanBeNull); + DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes; + // If queried with a precise location size, we assume that location size to be + // accessed, thus valid. + if (LocSize.isPrecise()) + DerefBytes = std::max(DerefBytes, LocSize.getValue()); + return DerefBytes; +} + /// Returns true if we can prove that the object specified by V has size Size. static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, const TargetLibraryInfo &TLI, bool NullIsValidLoc) { @@ -481,7 +501,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, // because it should be in sync with CaptureTracking. Not using it may // cause weird miscompilations where 2 aliasing pointers are assumed to // noalias. - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) { + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { V = RP; continue; } @@ -1792,10 +1812,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. bool NullIsValidLocation = NullPointerIsDefined(&F); - if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI, - NullIsValidLocation)) || - (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI, - NullIsValidLocation))) + if ((isObjectSmallerThan( + O2, getMinimalExtentFrom(*V1, V1Size, DL, NullIsValidLocation), DL, + TLI, NullIsValidLocation)) || + (isObjectSmallerThan( + O1, getMinimalExtentFrom(*V2, V2Size, DL, NullIsValidLocation), DL, + TLI, NullIsValidLocation))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -2053,8 +2075,9 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { auto *LIWP = getAnalysisIfAvailable(); auto *PVWP = getAnalysisIfAvailable(); - Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(), - ACT.getAssumptionCache(F), &DTWP.getDomTree(), + Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, + TLIWP.getTLI(F), ACT.getAssumptionCache(F), + &DTWP.getDomTree(), LIWP ? &LIWP->getLoopInfo() : nullptr, PVWP ? &PVWP->getResult() : nullptr)); @@ -2071,8 +2094,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { return BasicAAResult( - F.getParent()->getDataLayout(), - F, - P.getAnalysis().getTLI(), + F.getParent()->getDataLayout(), F, + P.getAnalysis().getTLI(F), P.getAnalysis().getAssumptionCache(F)); } diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 5eb95003f5d8..a06ee096d54c 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -118,6 +118,13 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12; static const uint32_t FPH_TAKEN_WEIGHT = 20; static const uint32_t FPH_NONTAKEN_WEIGHT = 12; +/// This is the probability for an ordered floating point comparison. +static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; +/// This is the probability for an unordered floating point comparison, it means +/// one or two of the operands are NaN. Usually it is used to test for an +/// exceptional case, so the result is unlikely. +static const uint32_t FPH_UNO_WEIGHT = 1; + /// Invoke-terminating normal branch taken weight /// /// This is the weight for branching to the normal destination of an invoke @@ -778,6 +785,8 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { if (!FCmp) return false; + uint32_t TakenWeight = FPH_TAKEN_WEIGHT; + uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT; bool isProb; if (FCmp->isEquality()) { // f1 == f2 -> Unlikely @@ -786,9 +795,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) { // !isnan -> Likely isProb = true; + TakenWeight = FPH_ORD_WEIGHT; + NontakenWeight = FPH_UNO_WEIGHT; } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) { // isnan -> Unlikely isProb = false; + TakenWeight = FPH_ORD_WEIGHT; + NontakenWeight = FPH_UNO_WEIGHT; } else { return false; } @@ -798,8 +811,7 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - BranchProbability TakenProb(FPH_TAKEN_WEIGHT, - FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); + BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight); setEdgeProbability(BB, TakenIdx, TakenProb); setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; @@ -1014,7 +1026,8 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage( bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis().getLoopInfo(); - const TargetLibraryInfo &TLI = getAnalysis().getTLI(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); BPI.calculate(F, LI, &TLI); return false; } diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index 18b83d6838cc..8215b4ecbb03 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -87,11 +87,18 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB, /// with multiple predecessors. bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges) { - assert(TI->isTerminator() && "Must be a terminator to have successors!"); assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + return isCriticalEdge(TI, TI->getSuccessor(SuccNum), AllowIdenticalEdges); +} + +bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest, + bool AllowIdenticalEdges) { + assert(TI->isTerminator() && "Must be a terminator to have successors!"); if (TI->getNumSuccessors() == 1) return false; - const BasicBlock *Dest = TI->getSuccessor(SuccNum); + assert(find(predecessors(Dest), TI->getParent()) != pred_end(Dest) && + "No edge between TI's block and Dest."); + const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); // If there is more than one predecessor, this is a critical edge... diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 619b675b58d8..4f4103fefa25 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -99,7 +99,7 @@ static void writeCFGToDotFile(Function &F, bool CFGOnly = false) { errs() << "Writing '" << Filename << "'..."; std::error_code EC; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); if (!EC) WriteGraph(File, (const Function*)&F, CFGOnly); diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp index 690e514d4f5c..fd90bd1521d6 100644 --- a/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -88,9 +88,11 @@ using namespace llvm::cflaa; #define DEBUG_TYPE "cfl-anders-aa" -CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {} +CFLAndersAAResult::CFLAndersAAResult( + std::function GetTLI) + : GetTLI(std::move(GetTLI)) {} CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) - : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {} + : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {} CFLAndersAAResult::~CFLAndersAAResult() = default; namespace { @@ -779,7 +781,7 @@ static AliasAttrMap buildAttrMap(const CFLGraph &Graph, CFLAndersAAResult::FunctionInfo CFLAndersAAResult::buildInfoFrom(const Function &Fn) { CFLGraphBuilder GraphBuilder( - *this, TLI, + *this, GetTLI(const_cast(Fn)), // Cast away the constness here due to GraphBuilder's API requirement const_cast(Fn)); auto &Graph = GraphBuilder.getCFLGraph(); @@ -898,7 +900,10 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, AnalysisKey CFLAndersAA::Key; CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) { - return CFLAndersAAResult(AM.getResult(F)); + auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & { + return AM.getResult(F); + }; + return CFLAndersAAResult(GetTLI); } char CFLAndersAAWrapperPass::ID = 0; @@ -914,8 +919,10 @@ CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) { } void CFLAndersAAWrapperPass::initializePass() { - auto &TLIWP = getAnalysis(); - Result.reset(new CFLAndersAAResult(TLIWP.getTLI())); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + Result.reset(new CFLAndersAAResult(GetTLI)); } void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp index 44b1834f70bf..b87aa4065392 100644 --- a/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -60,10 +60,11 @@ using namespace llvm::cflaa; #define DEBUG_TYPE "cfl-steens-aa" -CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI) - : AAResultBase(), TLI(TLI) {} +CFLSteensAAResult::CFLSteensAAResult( + std::function GetTLI) + : AAResultBase(), GetTLI(std::move(GetTLI)) {} CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) - : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {} + : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} CFLSteensAAResult::~CFLSteensAAResult() = default; /// Information we have about a function and would like to keep around. @@ -181,7 +182,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo( // Builds the graph + StratifiedSets for a function. CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { - CFLGraphBuilder GraphBuilder(*this, TLI, *Fn); + CFLGraphBuilder GraphBuilder(*this, GetTLI(*Fn), *Fn); StratifiedSetsBuilder SetBuilder; // Add all CFLGraph nodes and all Dereference edges to StratifiedSets @@ -331,7 +332,10 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, AnalysisKey CFLSteensAA::Key; CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { - return CFLSteensAAResult(AM.getResult(F)); + auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & { + return AM.getResult(F); + }; + return CFLSteensAAResult(GetTLI); } char CFLSteensAAWrapperPass::ID = 0; @@ -347,8 +351,10 @@ CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { } void CFLSteensAAWrapperPass::initializePass() { - auto &TLIWP = getAnalysis(); - Result.reset(new CFLSteensAAResult(TLIWP.getTLI())); + auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + Result.reset(new CFLSteensAAResult(GetTLI)); } void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index ec5e94d499be..70aeb1a688ee 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -29,7 +29,7 @@ using namespace llvm; CallGraph::CallGraph(Module &M) : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)), - CallsExternalNode(llvm::make_unique(nullptr)) { + CallsExternalNode(std::make_unique(nullptr)) { // Add every function to the call graph. for (Function &F : M) addToCallGraph(&F); @@ -150,7 +150,7 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { return CGN.get(); assert((!F || F->getParent() == &M) && "Function not in current module!"); - CGN = llvm::make_unique(const_cast(F)); + CGN = std::make_unique(const_cast(F)); return CGN.get(); } diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index adaa83a6c443..20e2f06540a3 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -33,6 +33,22 @@ CaptureTracker::~CaptureTracker() {} bool CaptureTracker::shouldExplore(const Use *U) { return true; } +bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) { + // An inbounds GEP can either be a valid pointer (pointing into + // or to the end of an allocation), or be null in the default + // address space. So for an inbounds GEP there is no way to let + // the pointer escape using clever GEP hacking because doing so + // would make the pointer point outside of the allocated object + // and thus make the GEP result a poison value. Similarly, other + // dereferenceable pointers cannot be manipulated without producing + // poison. + if (auto *GEP = dyn_cast(O)) + if (GEP->isInBounds()) + return true; + bool CanBeNull; + return O->getPointerDereferenceableBytes(DL, CanBeNull); +} + namespace { struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) @@ -251,7 +267,8 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // marked with nocapture do not capture. This means that places like // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression // in BasicAA also need to know about this property. - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) { + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, + true)) { AddUses(Call); break; } @@ -330,7 +347,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, AddUses(I); break; case Instruction::ICmp: { - if (auto *CPN = dyn_cast(I->getOperand(1))) { + unsigned Idx = (I->getOperand(0) == V) ? 0 : 1; + unsigned OtherIdx = 1 - Idx; + if (auto *CPN = dyn_cast(I->getOperand(OtherIdx))) { // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results // with null, for example. @@ -338,29 +357,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, if (isNoAliasCall(V->stripPointerCasts())) break; if (!I->getFunction()->nullPointerIsDefined()) { - auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation(); - // An inbounds GEP can either be a valid pointer (pointing into - // or to the end of an allocation), or be null in the default - // address space. So for an inbounds GEPs there is no way to let - // the pointer escape using clever GEP hacking because doing so - // would make the pointer point outside of the allocated object - // and thus make the GEP result a poison value. - if (auto *GEP = dyn_cast(O)) - if (GEP->isInBounds()) - break; - // Comparing a dereferenceable_or_null argument against null - // cannot lead to pointer escapes, because if it is not null it - // must be a valid (in-bounds) pointer. - bool CanBeNull; - if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull)) + auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation(); + // Comparing a dereferenceable_or_null pointer against null cannot + // lead to pointer escapes, because if it is not null it must be a + // valid (in-bounds) pointer. + if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout())) break; } } // Comparison against value stored in global variable. Given the pointer // does not escape, its value cannot be guessed and stored separately in a // global variable. - unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0; - auto *LI = dyn_cast(I->getOperand(OtherIndex)); + auto *LI = dyn_cast(I->getOperand(OtherIdx)); if (LI && isa(LI->getPointerOperand())) break; // Otherwise, be conservative. There are crazy ways to capture pointers diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 20231ca78b45..8dbcf7034fda 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -93,6 +93,9 @@ static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, /// This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { + assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) && + "Invalid constantexpr bitcast!"); + // Catch the obvious splat cases. if (C->isNullValue() && !DestTy->isX86_MMXTy()) return Constant::getNullValue(DestTy); @@ -521,8 +524,23 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, return nullptr; C = FoldBitCast(C, MapTy->getPointerTo(AS), DL); - if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) - return FoldBitCast(Res, LoadTy, DL); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) { + if (Res->isNullValue() && !LoadTy->isX86_MMXTy()) + // Materializing a zero can be done trivially without a bitcast + return Constant::getNullValue(LoadTy); + Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; + Res = FoldBitCast(Res, CastTy, DL); + if (LoadTy->isPtrOrPtrVectorTy()) { + // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr + if (Res->isNullValue() && !LoadTy->isX86_MMXTy()) + return Constant::getNullValue(LoadTy); + if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) + // Be careful not to replace a load of an addrspace value with an inttoptr here + return nullptr; + Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy); + } + return Res; + } return nullptr; } @@ -544,7 +562,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, int64_t InitializerSize = DL.getTypeAllocSize(GV->getInitializer()->getType()); // If we're not accessing anything in this constant, the result is undefined. - if (Offset + BytesLoaded <= 0) + if (Offset <= -1 * static_cast(BytesLoaded)) return UndefValue::get(IntType); // If we're not accessing anything in this constant, the result is undefined. @@ -781,10 +799,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef Ops, } /// Strip the pointer casts, but preserve the address space information. -Constant* StripPtrCastKeepAS(Constant* Ptr, Type *&ElemTy) { +Constant *StripPtrCastKeepAS(Constant *Ptr, Type *&ElemTy) { assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); auto *OldPtrTy = cast(Ptr->getType()); - Ptr = Ptr->stripPointerCasts(); + Ptr = cast(Ptr->stripPointerCasts()); auto *NewPtrTy = cast(Ptr->getType()); ElemTy = NewPtrTy->getPointerElementType(); @@ -1038,7 +1056,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, return ConstantExpr::getExtractElement(Ops[0], Ops[1]); case Instruction::ExtractValue: return ConstantExpr::getExtractValue( - Ops[0], dyn_cast(InstOrCE)->getIndices()); + Ops[0], cast(InstOrCE)->getIndices()); case Instruction::InsertElement: return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: @@ -1464,40 +1482,50 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { if (!F->hasName()) return false; - StringRef Name = F->getName(); // In these cases, the check of the length is required. We don't want to // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. + StringRef Name = F->getName(); switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || Name == "atan" || - Name == "atan2" || Name == "acosf" || Name == "asinf" || - Name == "atanf" || Name == "atan2f"; + return Name == "acos" || Name == "acosf" || + Name == "asin" || Name == "asinf" || + Name == "atan" || Name == "atanf" || + Name == "atan2" || Name == "atan2f"; case 'c': - return Name == "ceil" || Name == "cos" || Name == "cosh" || - Name == "ceilf" || Name == "cosf" || Name == "coshf"; + return Name == "ceil" || Name == "ceilf" || + Name == "cos" || Name == "cosf" || + Name == "cosh" || Name == "coshf"; case 'e': - return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f"; + return Name == "exp" || Name == "expf" || + Name == "exp2" || Name == "exp2f"; case 'f': - return Name == "fabs" || Name == "floor" || Name == "fmod" || - Name == "fabsf" || Name == "floorf" || Name == "fmodf"; + return Name == "fabs" || Name == "fabsf" || + Name == "floor" || Name == "floorf" || + Name == "fmod" || Name == "fmodf"; case 'l': - return Name == "log" || Name == "log10" || Name == "logf" || - Name == "log10f"; + return Name == "log" || Name == "logf" || + Name == "log2" || Name == "log2f" || + Name == "log10" || Name == "log10f"; + case 'n': + return Name == "nearbyint" || Name == "nearbyintf"; case 'p': return Name == "pow" || Name == "powf"; case 'r': - return Name == "round" || Name == "roundf"; + return Name == "rint" || Name == "rintf" || + Name == "round" || Name == "roundf"; case 's': - return Name == "sin" || Name == "sinh" || Name == "sqrt" || - Name == "sinf" || Name == "sinhf" || Name == "sqrtf"; + return Name == "sin" || Name == "sinf" || + Name == "sinh" || Name == "sinhf" || + Name == "sqrt" || Name == "sqrtf"; case 't': - return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf"; + return Name == "tan" || Name == "tanf" || + Name == "tanh" || Name == "tanhf" || + Name == "trunc" || Name == "truncf"; case '_': - // Check for various function names that get used for the math functions // when the header files are preprocessed with the macro // __FINITE_MATH_ONLY__ enabled. @@ -1713,40 +1741,37 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return nullptr; - if (IntrinsicID == Intrinsic::round) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToAway); - return ConstantFP::get(Ty->getContext(), V); + // Use internal versions of these intrinsics. + APFloat U = Op->getValueAPF(); + + if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) { + U.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::floor) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardNegative); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::round) { + U.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::ceil) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardPositive); - return ConstantFP::get(Ty->getContext(), V); + U.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::trunc) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardZero); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::floor) { + U.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::rint) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToEven); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::trunc) { + U.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::nearbyint) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToEven); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::fabs) { + U.clearSign(); + return ConstantFP::get(Ty->getContext(), U); } /// We only fold functions with finite arguments. Folding NaN and inf is @@ -1763,18 +1788,19 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, switch (IntrinsicID) { default: break; - case Intrinsic::fabs: - return ConstantFoldFP(fabs, V, Ty); - case Intrinsic::log2: - return ConstantFoldFP(Log2, V, Ty); case Intrinsic::log: return ConstantFoldFP(log, V, Ty); + case Intrinsic::log2: + // TODO: What about hosts that lack a C99 library? + return ConstantFoldFP(Log2, V, Ty); case Intrinsic::log10: + // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, V, Ty); case Intrinsic::exp: return ConstantFoldFP(exp, V, Ty); case Intrinsic::exp2: - return ConstantFoldFP(exp2, V, Ty); + // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. + return ConstantFoldBinaryFP(pow, 2.0, V, Ty); case Intrinsic::sin: return ConstantFoldFP(sin, V, Ty); case Intrinsic::cos: @@ -1786,104 +1812,150 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (!TLI) return nullptr; - char NameKeyChar = Name[0]; - if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_') - NameKeyChar = Name[2]; - - switch (NameKeyChar) { - case 'a': - if ((Name == "acos" && TLI->has(LibFunc_acos)) || - (Name == "acosf" && TLI->has(LibFunc_acosf)) || - (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) || - (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite))) + LibFunc Func = NotLibFunc; + TLI->getLibFunc(Name, Func); + switch (Func) { + default: + break; + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acos_finite: + case LibFunc_acosf_finite: + if (TLI->has(Func)) return ConstantFoldFP(acos, V, Ty); - else if ((Name == "asin" && TLI->has(LibFunc_asin)) || - (Name == "asinf" && TLI->has(LibFunc_asinf)) || - (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) || - (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite))) + break; + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asin_finite: + case LibFunc_asinf_finite: + if (TLI->has(Func)) return ConstantFoldFP(asin, V, Ty); - else if ((Name == "atan" && TLI->has(LibFunc_atan)) || - (Name == "atanf" && TLI->has(LibFunc_atanf))) + break; + case LibFunc_atan: + case LibFunc_atanf: + if (TLI->has(Func)) return ConstantFoldFP(atan, V, Ty); break; - case 'c': - if ((Name == "ceil" && TLI->has(LibFunc_ceil)) || - (Name == "ceilf" && TLI->has(LibFunc_ceilf))) - return ConstantFoldFP(ceil, V, Ty); - else if ((Name == "cos" && TLI->has(LibFunc_cos)) || - (Name == "cosf" && TLI->has(LibFunc_cosf))) + case LibFunc_ceil: + case LibFunc_ceilf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_cos: + case LibFunc_cosf: + if (TLI->has(Func)) return ConstantFoldFP(cos, V, Ty); - else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) || - (Name == "coshf" && TLI->has(LibFunc_coshf)) || - (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) || - (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite))) + break; + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_cosh_finite: + case LibFunc_coshf_finite: + if (TLI->has(Func)) return ConstantFoldFP(cosh, V, Ty); break; - case 'e': - if ((Name == "exp" && TLI->has(LibFunc_exp)) || - (Name == "expf" && TLI->has(LibFunc_expf)) || - (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) || - (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite))) + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_exp_finite: + case LibFunc_expf_finite: + if (TLI->has(Func)) return ConstantFoldFP(exp, V, Ty); - if ((Name == "exp2" && TLI->has(LibFunc_exp2)) || - (Name == "exp2f" && TLI->has(LibFunc_exp2f)) || - (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) || - (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite))) - // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a - // C99 library. + break; + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2_finite: + case LibFunc_exp2f_finite: + if (TLI->has(Func)) + // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); break; - case 'f': - if ((Name == "fabs" && TLI->has(LibFunc_fabs)) || - (Name == "fabsf" && TLI->has(LibFunc_fabsf))) - return ConstantFoldFP(fabs, V, Ty); - else if ((Name == "floor" && TLI->has(LibFunc_floor)) || - (Name == "floorf" && TLI->has(LibFunc_floorf))) - return ConstantFoldFP(floor, V, Ty); + case LibFunc_fabs: + case LibFunc_fabsf: + if (TLI->has(Func)) { + U.clearSign(); + return ConstantFP::get(Ty->getContext(), U); + } break; - case 'l': - if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) || - (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) || - (Name == "__log_finite" && V > 0 && - TLI->has(LibFunc_log_finite)) || - (Name == "__logf_finite" && V > 0 && - TLI->has(LibFunc_logf_finite))) + case LibFunc_floor: + case LibFunc_floorf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_log: + case LibFunc_logf: + case LibFunc_log_finite: + case LibFunc_logf_finite: + if (V > 0.0 && TLI->has(Func)) return ConstantFoldFP(log, V, Ty); - else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) || - (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) || - (Name == "__log10_finite" && V > 0 && - TLI->has(LibFunc_log10_finite)) || - (Name == "__log10f_finite" && V > 0 && - TLI->has(LibFunc_log10f_finite))) + break; + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2_finite: + case LibFunc_log2f_finite: + if (V > 0.0 && TLI->has(Func)) + // TODO: What about hosts that lack a C99 library? + return ConstantFoldFP(Log2, V, Ty); + break; + case LibFunc_log10: + case LibFunc_log10f: + case LibFunc_log10_finite: + case LibFunc_log10f_finite: + if (V > 0.0 && TLI->has(Func)) + // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, V, Ty); break; - case 'r': - if ((Name == "round" && TLI->has(LibFunc_round)) || - (Name == "roundf" && TLI->has(LibFunc_roundf))) - return ConstantFoldFP(round, V, Ty); + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_rint: + case LibFunc_rintf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), U); + } break; - case 's': - if ((Name == "sin" && TLI->has(LibFunc_sin)) || - (Name == "sinf" && TLI->has(LibFunc_sinf))) + case LibFunc_round: + case LibFunc_roundf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_sin: + case LibFunc_sinf: + if (TLI->has(Func)) return ConstantFoldFP(sin, V, Ty); - else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) || - (Name == "sinhf" && TLI->has(LibFunc_sinhf)) || - (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) || - (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite))) + break; + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinh_finite: + case LibFunc_sinhf_finite: + if (TLI->has(Func)) return ConstantFoldFP(sinh, V, Ty); - else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) || - (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf))) + break; + case LibFunc_sqrt: + case LibFunc_sqrtf: + if (V >= 0.0 && TLI->has(Func)) return ConstantFoldFP(sqrt, V, Ty); break; - case 't': - if ((Name == "tan" && TLI->has(LibFunc_tan)) || - (Name == "tanf" && TLI->has(LibFunc_tanf))) + case LibFunc_tan: + case LibFunc_tanf: + if (TLI->has(Func)) return ConstantFoldFP(tan, V, Ty); - else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) || - (Name == "tanhf" && TLI->has(LibFunc_tanhf))) + break; + case LibFunc_tanh: + case LibFunc_tanhf: + if (TLI->has(Func)) return ConstantFoldFP(tanh, V, Ty); break; - default: + case LibFunc_trunc: + case LibFunc_truncf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), U); + } break; } return nullptr; @@ -2002,19 +2074,35 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, if (!TLI) return nullptr; - if ((Name == "pow" && TLI->has(LibFunc_pow)) || - (Name == "powf" && TLI->has(LibFunc_powf)) || - (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) || - (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite))) - return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if ((Name == "fmod" && TLI->has(LibFunc_fmod)) || - (Name == "fmodf" && TLI->has(LibFunc_fmodf))) - return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if ((Name == "atan2" && TLI->has(LibFunc_atan2)) || - (Name == "atan2f" && TLI->has(LibFunc_atan2f)) || - (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) || - (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite))) - return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + + LibFunc Func = NotLibFunc; + TLI->getLibFunc(Name, Func); + switch (Func) { + default: + break; + case LibFunc_pow: + case LibFunc_powf: + case LibFunc_pow_finite: + case LibFunc_powf_finite: + if (TLI->has(Func)) + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + break; + case LibFunc_fmod: + case LibFunc_fmodf: + if (TLI->has(Func)) { + APFloat V = Op1->getValueAPF(); + if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF())) + return ConstantFP::get(Ty->getContext(), V); + } + break; + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2_finite: + case LibFunc_atan2f_finite: + if (TLI->has(Func)) + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + break; + } } else if (auto *Op2C = dyn_cast(Operands[1])) { if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) return ConstantFP::get(Ty->getContext(), @@ -2041,20 +2129,27 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, switch (IntrinsicID) { default: break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + // X - undef -> { undef, false } + // undef - X -> { undef, false } + // X + undef -> { undef, false } + // undef + x -> { undef, false } + if (!C0 || !C1) { + return ConstantStruct::get( + cast(Ty), + {UndefValue::get(Ty->getStructElementType(0)), + Constant::getNullValue(Ty->getStructElementType(1))}); + } + LLVM_FALLTHROUGH; case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: - // Even if both operands are undef, we cannot fold muls to undef - // in the general case. For example, on i2 there are no inputs - // that would produce { i2 -1, i1 true } as the result. + case Intrinsic::umul_with_overflow: { + // undef * X -> { 0, false } + // X * undef -> { 0, false } if (!C0 || !C1) return Constant::getNullValue(Ty); - LLVM_FALLTHROUGH; - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::usub_with_overflow: { - if (!C0 || !C1) - return UndefValue::get(Ty); APInt Res; bool Overflow; @@ -2194,13 +2289,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, case Intrinsic::fma: case Intrinsic::fmuladd: { APFloat V = Op1->getValueAPF(); - APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(), - Op3->getValueAPF(), - APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) - return ConstantFP::get(Ty->getContext(), V); - - return nullptr; + V.fusedMultiplyAdd(Op2->getValueAPF(), Op3->getValueAPF(), + APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); } } } diff --git a/lib/Analysis/DDG.cpp b/lib/Analysis/DDG.cpp new file mode 100644 index 000000000000..b5c3c761ad98 --- /dev/null +++ b/lib/Analysis/DDG.cpp @@ -0,0 +1,203 @@ +//===- DDG.cpp - Data Dependence Graph -------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The implementation for the data dependence graph. +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/DDG.h" +#include "llvm/Analysis/LoopInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "ddg" + +template class llvm::DGEdge; +template class llvm::DGNode; +template class llvm::DirectedGraph; + +//===--------------------------------------------------------------------===// +// DDGNode implementation +//===--------------------------------------------------------------------===// +DDGNode::~DDGNode() {} + +bool DDGNode::collectInstructions( + llvm::function_ref const &Pred, + InstructionListType &IList) const { + assert(IList.empty() && "Expected the IList to be empty on entry."); + if (isa(this)) { + for (auto *I : cast(this)->getInstructions()) + if (Pred(I)) + IList.push_back(I); + } else + llvm_unreachable("unimplemented type of node"); + return !IList.empty(); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) { + const char *Out; + switch (K) { + case DDGNode::NodeKind::SingleInstruction: + Out = "single-instruction"; + break; + case DDGNode::NodeKind::MultiInstruction: + Out = "multi-instruction"; + break; + case DDGNode::NodeKind::Root: + Out = "root"; + break; + case DDGNode::NodeKind::Unknown: + Out = "??"; + break; + } + OS << Out; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { + OS << "Node Address:" << &N << ":" << N.getKind() << "\n"; + if (isa(N)) { + OS << " Instructions:\n"; + for (auto *I : cast(N).getInstructions()) + OS.indent(2) << *I << "\n"; + } else if (!isa(N)) + llvm_unreachable("unimplemented type of node"); + + OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n"); + for (auto &E : N.getEdges()) + OS.indent(2) << *E; + return OS; +} + +//===--------------------------------------------------------------------===// +// SimpleDDGNode implementation +//===--------------------------------------------------------------------===// + +SimpleDDGNode::SimpleDDGNode(Instruction &I) + : DDGNode(NodeKind::SingleInstruction), InstList() { + assert(InstList.empty() && "Expected empty list."); + InstList.push_back(&I); +} + +SimpleDDGNode::SimpleDDGNode(const SimpleDDGNode &N) + : DDGNode(N), InstList(N.InstList) { + assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) || + (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) && + "constructing from invalid simple node."); +} + +SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N) + : DDGNode(std::move(N)), InstList(std::move(N.InstList)) { + assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) || + (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) && + "constructing from invalid simple node."); +} + +SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); } + +//===--------------------------------------------------------------------===// +// DDGEdge implementation +//===--------------------------------------------------------------------===// + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) { + const char *Out; + switch (K) { + case DDGEdge::EdgeKind::RegisterDefUse: + Out = "def-use"; + break; + case DDGEdge::EdgeKind::MemoryDependence: + Out = "memory"; + break; + case DDGEdge::EdgeKind::Rooted: + Out = "rooted"; + break; + case DDGEdge::EdgeKind::Unknown: + Out = "??"; + break; + } + OS << Out; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge &E) { + OS << "[" << E.getKind() << "] to " << &E.getTargetNode() << "\n"; + return OS; +} + +//===--------------------------------------------------------------------===// +// DataDependenceGraph implementation +//===--------------------------------------------------------------------===// +using BasicBlockListType = SmallVector; + +DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D) + : DependenceGraphInfo(F.getName().str(), D) { + BasicBlockListType BBList; + for (auto &BB : F.getBasicBlockList()) + BBList.push_back(&BB); + DDGBuilder(*this, D, BBList).populate(); +} + +DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D) + : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." + + L.getHeader()->getName()) + .str(), + D) { + BasicBlockListType BBList; + for (BasicBlock *BB : L.blocks()) + BBList.push_back(BB); + DDGBuilder(*this, D, BBList).populate(); +} + +DataDependenceGraph::~DataDependenceGraph() { + for (auto *N : Nodes) { + for (auto *E : *N) + delete E; + delete N; + } +} + +bool DataDependenceGraph::addNode(DDGNode &N) { + if (!DDGBase::addNode(N)) + return false; + + // In general, if the root node is already created and linked, it is not safe + // to add new nodes since they may be unreachable by the root. + // TODO: Allow adding Pi-block nodes after root is created. Pi-blocks are an + // exception because they represent components that are already reachable by + // root. + assert(!Root && "Root node is already added. No more nodes can be added."); + if (isa(N)) + Root = &N; + + return true; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) { + for (auto *Node : G) + OS << *Node << "\n"; + return OS; +} + +//===--------------------------------------------------------------------===// +// DDG Analysis Passes +//===--------------------------------------------------------------------===// + +/// DDG as a loop pass. +DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + Function *F = L.getHeader()->getParent(); + DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); + return std::make_unique(L, DI); +} +AnalysisKey DDGAnalysis::Key; + +PreservedAnalyses DDGAnalysisPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + OS << "'DDG' for loop '" << L.getHeader()->getName() << "':\n"; + OS << *AM.getResult(L, AR); + return PreservedAnalyses::all(); +} diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 75f269e84f9d..0038c9fb9ce4 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -254,7 +254,7 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination, LoopIndependent(PossiblyLoopIndependent) { Consistent = true; if (CommonLevels) - DV = make_unique(CommonLevels); + DV = std::make_unique(CommonLevels); } // The rest are simple getters that hide the implementation. @@ -3415,7 +3415,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); - return make_unique(Src, Dst); + return std::make_unique(Src, Dst); } assert(isLoadOrStore(Src) && "instruction is not load or store"); @@ -3430,7 +3430,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, case PartialAlias: // cannot analyse objects if we don't understand their aliasing. LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); - return make_unique(Src, Dst); + return std::make_unique(Src, Dst); case NoAlias: // If the objects noalias, they are distinct, accesses are independent. LLVM_DEBUG(dbgs() << "no alias\n"); @@ -3777,7 +3777,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, return nullptr; } - return make_unique(std::move(Result)); + return std::make_unique(std::move(Result)); } diff --git a/lib/Analysis/DependenceGraphBuilder.cpp b/lib/Analysis/DependenceGraphBuilder.cpp new file mode 100644 index 000000000000..ed1d8351b2f0 --- /dev/null +++ b/lib/Analysis/DependenceGraphBuilder.cpp @@ -0,0 +1,228 @@ +//===- DependenceGraphBuilder.cpp ------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements common steps of the build algorithm for construction +// of dependence graphs such as DDG and PDG. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DDG.h" + +using namespace llvm; + +#define DEBUG_TYPE "dgb" + +STATISTIC(TotalGraphs, "Number of dependence graphs created."); +STATISTIC(TotalDefUseEdges, "Number of def-use edges created."); +STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created."); +STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created."); +STATISTIC(TotalConfusedEdges, + "Number of confused memory dependencies between two nodes."); +STATISTIC(TotalEdgeReversals, + "Number of times the source and sink of dependence was reversed to " + "expose cycles in the graph."); + +using InstructionListType = SmallVector; + +//===--------------------------------------------------------------------===// +// AbstractDependenceGraphBuilder implementation +//===--------------------------------------------------------------------===// + +template +void AbstractDependenceGraphBuilder::createFineGrainedNodes() { + ++TotalGraphs; + assert(IMap.empty() && "Expected empty instruction map at start"); + for (BasicBlock *BB : BBList) + for (Instruction &I : *BB) { + auto &NewNode = createFineGrainedNode(I); + IMap.insert(std::make_pair(&I, &NewNode)); + ++TotalFineGrainedNodes; + } +} + +template +void AbstractDependenceGraphBuilder::createAndConnectRootNode() { + // Create a root node that connects to every connected component of the graph. + // This is done to allow graph iterators to visit all the disjoint components + // of the graph, in a single walk. + // + // This algorithm works by going through each node of the graph and for each + // node N, do a DFS starting from N. A rooted edge is established between the + // root node and N (if N is not yet visited). All the nodes reachable from N + // are marked as visited and are skipped in the DFS of subsequent nodes. + // + // Note: This algorithm tries to limit the number of edges out of the root + // node to some extent, but there may be redundant edges created depending on + // the iteration order. For example for a graph {A -> B}, an edge from the + // root node is added to both nodes if B is visited before A. While it does + // not result in minimal number of edges, this approach saves compile-time + // while keeping the number of edges in check. + auto &RootNode = createRootNode(); + df_iterator_default_set Visited; + for (auto *N : Graph) { + if (*N == RootNode) + continue; + for (auto I : depth_first_ext(N, Visited)) + if (I == N) + createRootedEdge(RootNode, *N); + } +} + +template void AbstractDependenceGraphBuilder::createDefUseEdges() { + for (NodeType *N : Graph) { + InstructionListType SrcIList; + N->collectInstructions([](const Instruction *I) { return true; }, SrcIList); + + // Use a set to mark the targets that we link to N, so we don't add + // duplicate def-use edges when more than one instruction in a target node + // use results of instructions that are contained in N. + SmallPtrSet VisitedTargets; + + for (Instruction *II : SrcIList) { + for (User *U : II->users()) { + Instruction *UI = dyn_cast(U); + if (!UI) + continue; + NodeType *DstNode = nullptr; + if (IMap.find(UI) != IMap.end()) + DstNode = IMap.find(UI)->second; + + // In the case of loops, the scope of the subgraph is all the + // basic blocks (and instructions within them) belonging to the loop. We + // simply ignore all the edges coming from (or going into) instructions + // or basic blocks outside of this range. + if (!DstNode) { + LLVM_DEBUG( + dbgs() + << "skipped def-use edge since the sink" << *UI + << " is outside the range of instructions being considered.\n"); + continue; + } + + // Self dependencies are ignored because they are redundant and + // uninteresting. + if (DstNode == N) { + LLVM_DEBUG(dbgs() + << "skipped def-use edge since the sink and the source (" + << N << ") are the same.\n"); + continue; + } + + if (VisitedTargets.insert(DstNode).second) { + createDefUseEdge(*N, *DstNode); + ++TotalDefUseEdges; + } + } + } + } +} + +template +void AbstractDependenceGraphBuilder::createMemoryDependencyEdges() { + using DGIterator = typename G::iterator; + auto isMemoryAccess = [](const Instruction *I) { + return I->mayReadOrWriteMemory(); + }; + for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) { + InstructionListType SrcIList; + (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList); + if (SrcIList.empty()) + continue; + + for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) { + if (**SrcIt == **DstIt) + continue; + InstructionListType DstIList; + (*DstIt)->collectInstructions(isMemoryAccess, DstIList); + if (DstIList.empty()) + continue; + bool ForwardEdgeCreated = false; + bool BackwardEdgeCreated = false; + for (Instruction *ISrc : SrcIList) { + for (Instruction *IDst : DstIList) { + auto D = DI.depends(ISrc, IDst, true); + if (!D) + continue; + + // If we have a dependence with its left-most non-'=' direction + // being '>' we need to reverse the direction of the edge, because + // the source of the dependence cannot occur after the sink. For + // confused dependencies, we will create edges in both directions to + // represent the possibility of a cycle. + + auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) { + if (!ForwardEdgeCreated) { + createMemoryEdge(Src, Dst); + ++TotalMemoryEdges; + } + if (!BackwardEdgeCreated) { + createMemoryEdge(Dst, Src); + ++TotalMemoryEdges; + } + ForwardEdgeCreated = BackwardEdgeCreated = true; + ++TotalConfusedEdges; + }; + + auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) { + if (!ForwardEdgeCreated) { + createMemoryEdge(Src, Dst); + ++TotalMemoryEdges; + } + ForwardEdgeCreated = true; + }; + + auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) { + if (!BackwardEdgeCreated) { + createMemoryEdge(Dst, Src); + ++TotalMemoryEdges; + } + BackwardEdgeCreated = true; + }; + + if (D->isConfused()) + createConfusedEdges(**SrcIt, **DstIt); + else if (D->isOrdered() && !D->isLoopIndependent()) { + bool ReversedEdge = false; + for (unsigned Level = 1; Level <= D->getLevels(); ++Level) { + if (D->getDirection(Level) == Dependence::DVEntry::EQ) + continue; + else if (D->getDirection(Level) == Dependence::DVEntry::GT) { + createBackwardEdge(**SrcIt, **DstIt); + ReversedEdge = true; + ++TotalEdgeReversals; + break; + } else if (D->getDirection(Level) == Dependence::DVEntry::LT) + break; + else { + createConfusedEdges(**SrcIt, **DstIt); + break; + } + } + if (!ReversedEdge) + createForwardEdge(**SrcIt, **DstIt); + } else + createForwardEdge(**SrcIt, **DstIt); + + // Avoid creating duplicate edges. + if (ForwardEdgeCreated && BackwardEdgeCreated) + break; + } + + // If we've created edges in both directions, there is no more + // unique edge that we can create between these two nodes, so we + // can exit early. + if (ForwardEdgeCreated && BackwardEdgeCreated) + break; + } + } + } +} + +template class llvm::AbstractDependenceGraphBuilder; +template class llvm::DependenceGraphInfo; diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index 0ccd59ef2bfd..3d1be1e1cce0 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const { return DivergentValues.find(&V) != DivergentValues.end(); } +bool DivergenceAnalysis::isDivergentUse(const Use &U) const { + Value &V = *U.get(); + Instruction &I = *cast(U.getUser()); + return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); +} + void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if (DivergentValues.empty()) return; @@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const { return DA.isDivergent(val); } +bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const { + return DA.isDivergentUse(use); +} + void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const { OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n"; DA.print(OS, mod); diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp index 0d6c0ffb18a8..efdf9706ba3c 100644 --- a/lib/Analysis/GlobalsModRef.cpp +++ b/lib/Analysis/GlobalsModRef.cpp @@ -370,7 +370,8 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, // passing into the function. if (Call->isDataOperand(&U)) { // Detect calls to free. - if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) { + if (Call->isArgOperand(&U) && + isFreeCall(I, &GetTLI(*Call->getFunction()))) { if (Writers) Writers->insert(Call->getParent()->getParent()); } else { @@ -432,7 +433,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { Value *Ptr = GetUnderlyingObject(SI->getOperand(0), GV->getParent()->getDataLayout()); - if (!isAllocLikeFn(Ptr, &TLI)) + if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction()))) return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a @@ -576,6 +577,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We handle calls specially because the graph-relevant aspects are // handled above. if (auto *Call = dyn_cast(&I)) { + auto &TLI = GetTLI(*Node->getFunction()); if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) { // FIXME: It is completely unclear why this is necessary and not // handled by the above graph code. @@ -937,12 +939,13 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI)); } -GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, - const TargetLibraryInfo &TLI) - : AAResultBase(), DL(DL), TLI(TLI) {} +GlobalsAAResult::GlobalsAAResult( + const DataLayout &DL, + std::function GetTLI) + : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {} GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI), + : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)), NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)), IndirectGlobals(std::move(Arg.IndirectGlobals)), AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)), @@ -957,10 +960,10 @@ GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) GlobalsAAResult::~GlobalsAAResult() {} -/*static*/ GlobalsAAResult -GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, - CallGraph &CG) { - GlobalsAAResult Result(M.getDataLayout(), TLI); +/*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule( + Module &M, std::function GetTLI, + CallGraph &CG) { + GlobalsAAResult Result(M.getDataLayout(), GetTLI); // Discover which functions aren't recursive, to feed into AnalyzeGlobals. Result.CollectSCCMembership(CG); @@ -977,8 +980,12 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, AnalysisKey GlobalsAA::Key; GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) { - return GlobalsAAResult::analyzeModule(M, - AM.getResult(M), + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + return GlobalsAAResult::analyzeModule(M, GetTLI, AM.getResult(M)); } @@ -999,9 +1006,11 @@ GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) { } bool GlobalsAAWrapperPass::runOnModule(Module &M) { + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule( - M, getAnalysis().getTLI(), - getAnalysis().getCallGraph()))); + M, GetTLI, getAnalysis().getCallGraph()))); return false; } diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp index ce285f82f720..6fb600114bc6 100644 --- a/lib/Analysis/IVDescriptors.cpp +++ b/lib/Analysis/IVDescriptors.cpp @@ -300,7 +300,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); if (!ReduxDesc.isRecurrence()) return false; - if (isa(ReduxDesc.getPatternInst())) + // FIXME: FMF is allowed on phi, but propagation is not handled correctly. + if (isa(ReduxDesc.getPatternInst()) && !IsAPhi) FMF &= ReduxDesc.getPatternInst()->getFastMathFlags(); } diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 6ff840efcb64..68153de8219f 100644 --- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -53,7 +53,7 @@ static cl::opt "call callsite")); ICallPromotionAnalysis::ICallPromotionAnalysis() { - ValueDataArray = llvm::make_unique(MaxNumPromotions); + ValueDataArray = std::make_unique(MaxNumPromotions); } bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count, diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 0dec146e0465..89811ec0e377 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -436,7 +436,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { if (auto *AllocSize = dyn_cast_or_null(Size)) { Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( - AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); + AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(), + AllocatedSize); return Base::visitAlloca(I); } } @@ -444,7 +445,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Accumulate the allocated size. if (I.isStaticAlloca()) { Type *Ty = I.getAllocatedType(); - AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); + AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(), + AllocatedSize); } // We will happily inline static alloca instructions. @@ -1070,8 +1072,8 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *SimpleV = nullptr; if (auto FI = dyn_cast(&I)) - SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS, - CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); + SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, + CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); else SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); @@ -1453,19 +1455,6 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // Maximum valid cost increased in this function. int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; - // Exit early for a large switch, assuming one case needs at least one - // instruction. - // FIXME: This is not true for a bit test, but ignore such case for now to - // save compile-time. - int64_t CostLowerBound = - std::min((int64_t)CostUpperBound, - (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - - if (CostLowerBound > Threshold && !ComputeFullInlineCost) { - addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost); - return false; - } - unsigned JumpTableSize = 0; unsigned NumCaseCluster = TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index e34bf6f4e43f..cb8987721700 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -56,8 +56,8 @@ static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &, const SimplifyQuery &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); -static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, - const SimplifyQuery &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &, + const SimplifyQuery &, unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -1371,7 +1371,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, /// Commuted variants are assumed to be handled by calling this function again /// with the parameters swapped. static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, - ICmpInst *UnsignedICmp, bool IsAnd) { + ICmpInst *UnsignedICmp, bool IsAnd, + const SimplifyQuery &Q) { Value *X, *Y; ICmpInst::Predicate EqPred; @@ -1380,6 +1381,59 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, return nullptr; ICmpInst::Predicate UnsignedPred; + + Value *A, *B; + // Y = (A - B); + if (match(Y, m_Sub(m_Value(A), m_Value(B)))) { + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) && + ICmpInst::isUnsigned(UnsignedPred)) { + if (UnsignedICmp->getOperand(0) != A) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + // A >=/<= B || (A - B) != 0 <--> true + if ((UnsignedPred == ICmpInst::ICMP_UGE || + UnsignedPred == ICmpInst::ICMP_ULE) && + EqPred == ICmpInst::ICMP_NE && !IsAnd) + return ConstantInt::getTrue(UnsignedICmp->getType()); + // A B && (A - B) == 0 <--> false + if ((UnsignedPred == ICmpInst::ICMP_ULT || + UnsignedPred == ICmpInst::ICMP_UGT) && + EqPred == ICmpInst::ICMP_EQ && IsAnd) + return ConstantInt::getFalse(UnsignedICmp->getType()); + + // A B && (A - B) != 0 <--> A B + // A B || (A - B) != 0 <--> (A - B) != 0 + if (EqPred == ICmpInst::ICMP_NE && (UnsignedPred == ICmpInst::ICMP_ULT || + UnsignedPred == ICmpInst::ICMP_UGT)) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // A <=/>= B && (A - B) == 0 <--> (A - B) == 0 + // A <=/>= B || (A - B) == 0 <--> A <=/>= B + if (EqPred == ICmpInst::ICMP_EQ && (UnsignedPred == ICmpInst::ICMP_ULE || + UnsignedPred == ICmpInst::ICMP_UGE)) + return IsAnd ? ZeroICmp : UnsignedICmp; + } + + // Given Y = (A - B) + // Y >= A && Y != 0 --> Y >= A iff B != 0 + // Y < A || Y == 0 --> Y < A iff B != 0 + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) { + if (UnsignedICmp->getOperand(0) != Y) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd && + EqPred == ICmpInst::ICMP_NE && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return UnsignedICmp; + if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd && + EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return UnsignedICmp; + } + } + if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) && ICmpInst::isUnsigned(UnsignedPred)) ; @@ -1395,19 +1449,33 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE) return IsAnd ? UnsignedICmp : ZeroICmp; - // X >= Y || Y != 0 --> true + // X <= Y && Y != 0 --> X <= Y iff X != 0 + // X <= Y || Y != 0 --> Y != 0 iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // X >= Y && Y == 0 --> Y == 0 // X >= Y || Y == 0 --> X >= Y - if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) { - if (EqPred == ICmpInst::ICMP_NE) - return getTrue(UnsignedICmp->getType()); - return UnsignedICmp; - } + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ) + return IsAnd ? ZeroICmp : UnsignedICmp; + + // X > Y && Y == 0 --> Y == 0 iff X != 0 + // X > Y || Y == 0 --> X > Y iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? ZeroICmp : UnsignedICmp; // X < Y && Y == 0 --> false if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ && IsAnd) return getFalse(UnsignedICmp->getType()); + // X >= Y || Y != 0 --> true + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_NE && + !IsAnd) + return getTrue(UnsignedICmp->getType()); + return nullptr; } @@ -1587,10 +1655,10 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, } static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, - const InstrInfoQuery &IIQ) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) + const SimplifyQuery &Q) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q)) return X; - if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, Q)) return X; if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) @@ -1604,9 +1672,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) return X; - if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ)) + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; - if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ)) + if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ)) return X; return nullptr; @@ -1660,10 +1728,10 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, } static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, - const InstrInfoQuery &IIQ) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) + const SimplifyQuery &Q) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, Q)) return X; - if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, Q)) return X; if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) @@ -1677,9 +1745,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) return X; - if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ)) + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; - if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ)) + if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ)) return X; return nullptr; @@ -1738,8 +1806,8 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, auto *ICmp0 = dyn_cast(Op0); auto *ICmp1 = dyn_cast(Op1); if (ICmp0 && ICmp1) - V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ) - : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ); + V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q) + : simplifyOrOfICmps(ICmp0, ICmp1, Q); auto *FCmp0 = dyn_cast(Op0); auto *FCmp1 = dyn_cast(Op1); @@ -1759,6 +1827,77 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, return nullptr; } +/// Check that the Op1 is in expected form, i.e.: +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1, + Value *X) { + auto *Extract = dyn_cast(Op1); + // We should only be extracting the overflow bit. + if (!Extract || !Extract->getIndices().equals(1)) + return false; + Value *Agg = Extract->getAggregateOperand(); + // This should be a multiplication-with-overflow intrinsic. + if (!match(Agg, m_CombineOr(m_Intrinsic(), + m_Intrinsic()))) + return false; + // One of its multipliers should be the value we checked for zero before. + if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)), + m_Argument<1>(m_Specific(X))))) + return false; + return true; +} + +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp ne i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %??? = and i1 %Op0, %Op1 +/// We can just return %Op1 +static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_NE) + return nullptr; + // Is Op1 in expected form? + if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X)) + return nullptr; + // Can omit 'and', and just return the overflow bit. + return Op1; +} + +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp eq i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %NotOp1 = xor i1 %Op1, true +/// %or = or i1 %Op0, %NotOp1 +/// We can just return %NotOp1 +static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0, + Value *NotOp1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_EQ) + return nullptr; + // We expect the other hand of an 'or' to be a 'not'. + Value *Op1; + if (!match(NotOp1, m_Not(m_Value(Op1)))) + return nullptr; + // Is Op1 in expected form? + if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X)) + return nullptr; + // Can omit 'and', and just return the inverted overflow bit. + return NotOp1; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -1813,6 +1952,14 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op0; } + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0)) + return V; + // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { @@ -1987,6 +2134,14 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false)) return V; + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0)) + return V; + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) @@ -3529,6 +3684,9 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // %sel = select i1 %cmp, i32 -2147483648, i32 %add // // We can't replace %sel with %add unless we strip away the flags. + // TODO: This is an unusual limitation because better analysis results in + // worse simplification. InstCombine can do this fold more generally + // by dropping the flags. Remove this fold to save compile-time? if (isa(B)) if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B)) return nullptr; @@ -4324,14 +4482,16 @@ static Constant *propagateNaN(Constant *In) { return In; } -static Constant *simplifyFPBinop(Value *Op0, Value *Op1) { - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); +/// Perform folds that are common to any floating-point operation. This implies +/// transforms based on undef/NaN because the operation itself makes no +/// difference to the result. +static Constant *simplifyFPOp(ArrayRef Ops) { + if (any_of(Ops, [](Value *V) { return isa(V); })) + return ConstantFP::getNaN(Ops[0]->getType()); - if (match(Op0, m_NaN())) - return propagateNaN(cast(Op0)); - if (match(Op1, m_NaN())) - return propagateNaN(cast(Op1)); + for (Value *V : Ops) + if (match(V, m_NaN())) + return propagateNaN(cast(V)); return nullptr; } @@ -4343,7 +4503,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fadd X, -0 ==> X @@ -4390,7 +4550,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fsub X, +0 ==> X @@ -4430,23 +4590,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } -/// Given the operands for an FMul, see if we can fold the result -static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) - return C; - - if (Constant *C = simplifyFPBinop(Op0, Op1)) +static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fmul X, 1.0 ==> X if (match(Op1, m_FPOne())) return Op0; + // fmul 1.0, X ==> X + if (match(Op0, m_FPOne())) + return Op1; + // fmul nnan nsz X, 0 ==> 0 if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP())) return ConstantFP::getNullValue(Op0->getType()); + // fmul nnan nsz 0, X ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP())) + return ConstantFP::getNullValue(Op1->getType()); + // sqrt(X) * sqrt(X) --> X, if we can: // 1. Remove the intermediate rounding (reassociate). // 2. Ignore non-zero negative numbers because sqrt would produce NAN. @@ -4459,6 +4623,16 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) + return C; + + // Now apply simplifications that do not require rounding. + return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse); +} + Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); @@ -4475,12 +4649,17 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); } +Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit); +} + static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // X / 1.0 -> X @@ -4525,7 +4704,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. @@ -4564,8 +4743,7 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q, /// Given the operand for a UnaryOperator, see if we can fold the result. /// If not, this returns null. -/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. +/// Try to use FastMathFlags when folding the result. static Value *simplifyFPUnOp(unsigned Opcode, Value *Op, const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -4581,8 +4759,8 @@ Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) { return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit); } -Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, - const SimplifyQuery &Q) { +Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q) { return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit); } @@ -4634,11 +4812,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. -/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. -static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const SimplifyQuery &Q, - unsigned MaxRecurse) { +/// Try to use FastMathFlags when folding the result. +static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const FastMathFlags &FMF, const SimplifyQuery &Q, + unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); @@ -4658,9 +4835,9 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); } -Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); } /// Given operands for a CmpInst, see if we can fold the result. @@ -5009,6 +5186,15 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } return nullptr; } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + Value *Op0 = Call->getArgOperand(0); + Value *Op1 = Call->getArgOperand(1); + Value *Op2 = Call->getArgOperand(2); + if (Value *V = simplifyFPOp({ Op0, Op1, Op2 })) + return V; + return nullptr; + } default: return nullptr; } @@ -5221,14 +5407,16 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, /// If we have a pre-simplified value in 'SimpleV', that is forcibly used to /// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of /// instructions to process and attempt to simplify it using -/// InstructionSimplify. +/// InstructionSimplify. Recursively visited users which could not be +/// simplified themselves are to the optional UnsimplifiedUsers set for +/// further processing by the caller. /// /// This routine returns 'true' only when *it* simplifies something. The passed /// in simplified value does not count toward this. -static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionCache *AC) { +static bool replaceAndRecursivelySimplifyImpl( + Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + SmallSetVector *UnsimplifiedUsers = nullptr) { bool Simplified = false; SmallSetVector Worklist; const DataLayout &DL = I->getModule()->getDataLayout(); @@ -5258,8 +5446,11 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, // See if this instruction simplifies. SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC}); - if (!SimpleV) + if (!SimpleV) { + if (UnsimplifiedUsers) + UnsimplifiedUsers->insert(I); continue; + } Simplified = true; @@ -5285,16 +5476,17 @@ bool llvm::recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { - return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC, nullptr); } -bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionCache *AC) { +bool llvm::replaceAndRecursivelySimplify( + Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + SmallSetVector *UnsimplifiedUsers) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC, + UnsimplifiedUsers); } namespace llvm { @@ -5302,7 +5494,7 @@ const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) { auto *DTWP = P.getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *TLIWP = P.getAnalysisIfAvailable(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + auto *TLI = TLIWP ? &TLIWP->getTLI(F) : nullptr; auto *ACWP = P.getAnalysisIfAvailable(); auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; return {F.getParent()->getDataLayout(), TLI, DT, AC}; diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp index f2592c26b373..e727de468a0d 100644 --- a/lib/Analysis/LazyBranchProbabilityInfo.cpp +++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -55,8 +55,9 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); } bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) { LoopInfo &LI = getAnalysis().getLoopInfo(); - TargetLibraryInfo &TLI = getAnalysis().getTLI(); - LBPI = llvm::make_unique(&F, &LI, &TLI); + TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); + LBPI = std::make_unique(&F, &LI, &TLI); return false; } diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index 797fcf516429..ef31c1e0ba8c 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -150,7 +150,8 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName()); } -LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { +LazyCallGraph::LazyCallGraph( + Module &M, function_ref GetTLI) { LLVM_DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() << "\n"); for (Function &F : M) { @@ -159,7 +160,7 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { // If this function is a known lib function to LLVM then we want to // synthesize reference edges to it to model the fact that LLVM can turn // arbitrary code into a library function call. - if (isKnownLibFunction(F, TLI)) + if (isKnownLibFunction(F, GetTLI(F))) LibFunctions.insert(&F); if (F.hasLocalLinkage()) @@ -631,7 +632,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall( // If the merge range is empty, then adding the edge didn't actually form any // new cycles. We're done. - if (empty(MergeRange)) { + if (MergeRange.empty()) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); return false; // No new cycle. @@ -1751,16 +1752,14 @@ static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) { } static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) { - ptrdiff_t Size = size(C); - OS << " SCC with " << Size << " functions:\n"; + OS << " SCC with " << C.size() << " functions:\n"; for (LazyCallGraph::Node &N : C) OS << " " << N.getFunction().getName() << "\n"; } static void printRefSCC(raw_ostream &OS, LazyCallGraph::RefSCC &C) { - ptrdiff_t Size = size(C); - OS << " RefSCC with " << Size << " call SCCs:\n"; + OS << " RefSCC with " << C.size() << " call SCCs:\n"; for (LazyCallGraph::SCC &InnerC : C) printSCC(OS, InnerC); diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 542ff709d475..96722f32e355 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -188,7 +188,7 @@ namespace { else { auto It = ValueCache.find_as(Val); if (It == ValueCache.end()) { - ValueCache[Val] = make_unique(Val, this); + ValueCache[Val] = std::make_unique(Val, this); It = ValueCache.find_as(Val); assert(It != ValueCache.end() && "Val was just added to the map!"); } @@ -434,6 +434,8 @@ namespace { ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB); bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB); + bool solveBlockValueExtractValue(ValueLatticeElement &BBLV, + ExtractValueInst *EVI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, ValueLatticeElement &BBLV, Instruction *BBI); @@ -648,9 +650,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, return solveBlockValueBinaryOp(Res, BO, BB); if (auto *EVI = dyn_cast(BBI)) - if (auto *WO = dyn_cast(EVI->getAggregateOperand())) - if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0) - return solveBlockValueOverflowIntrinsic(Res, WO, BB); + return solveBlockValueExtractValue(Res, EVI, BB); if (auto *II = dyn_cast(BBI)) return solveBlockValueIntrinsic(Res, II, BB); @@ -1135,6 +1135,33 @@ bool LazyValueInfoImpl::solveBlockValueIntrinsic( } } +bool LazyValueInfoImpl::solveBlockValueExtractValue( + ValueLatticeElement &BBLV, ExtractValueInst *EVI, BasicBlock *BB) { + if (auto *WO = dyn_cast(EVI->getAggregateOperand())) + if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0) + return solveBlockValueOverflowIntrinsic(BBLV, WO, BB); + + // Handle extractvalue of insertvalue to allow further simplification + // based on replaced with.overflow intrinsics. + if (Value *V = SimplifyExtractValueInst( + EVI->getAggregateOperand(), EVI->getIndices(), + EVI->getModule()->getDataLayout())) { + if (!hasBlockValue(V, BB)) { + if (pushBlockValue({ BB, V })) + return false; + BBLV = ValueLatticeElement::getOverdefined(); + return true; + } + BBLV = getBlockValue(V, BB); + return true; + } + + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined (unknown extractvalue).\n"); + BBLV = ValueLatticeElement::getOverdefined(); + return true; +} + static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, bool isTrueDest) { Value *LHS = ICI->getOperand(0); @@ -1575,7 +1602,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); Info.DT = DTWP ? &DTWP->getDomTree() : nullptr; - Info.TLI = &getAnalysis().getTLI(); + Info.TLI = &getAnalysis().getTLI(F); if (Info.PImpl) getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear(); diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp index 52212e1c42aa..7de9d2cbfddb 100644 --- a/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -93,8 +93,9 @@ namespace { class DivergencePropagator { public: DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - PostDominatorTree &PDT, DenseSet &DV) - : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} + PostDominatorTree &PDT, DenseSet &DV, + DenseSet &DU) + : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {} void populateWithSourcesOfDivergence(); void propagate(); @@ -118,11 +119,14 @@ private: PostDominatorTree &PDT; std::vector Worklist; // Stack for DFS. DenseSet &DV; // Stores all divergent values. + DenseSet &DU; // Stores divergent uses of possibly uniform + // values. }; void DivergencePropagator::populateWithSourcesOfDivergence() { Worklist.clear(); DV.clear(); + DU.clear(); for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { Worklist.push_back(&I); @@ -197,8 +201,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) { // dominators of TI until it is outside the influence region. BasicBlock *InfluencedBB = ThisBB; while (InfluenceRegion.count(InfluencedBB)) { - for (auto &I : *InfluencedBB) - findUsersOutsideInfluenceRegion(I, InfluenceRegion); + for (auto &I : *InfluencedBB) { + if (!DV.count(&I)) + findUsersOutsideInfluenceRegion(I, InfluenceRegion); + } DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); if (IDomNode == nullptr) break; @@ -208,9 +214,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) { void DivergencePropagator::findUsersOutsideInfluenceRegion( Instruction &I, const DenseSet &InfluenceRegion) { - for (User *U : I.users()) { - Instruction *UserInst = cast(U); + for (Use &Use : I.uses()) { + Instruction *UserInst = cast(Use.getUser()); if (!InfluenceRegion.count(UserInst->getParent())) { + DU.insert(&Use); if (DV.insert(UserInst).second) Worklist.push_back(UserInst); } @@ -250,9 +257,8 @@ void DivergencePropagator::computeInfluenceRegion( void DivergencePropagator::exploreDataDependency(Value *V) { // Follow def-use chains of V. for (User *U : V->users()) { - Instruction *UserInst = cast(U); - if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second) - Worklist.push_back(UserInst); + if (!TTI.isAlwaysUniform(U) && DV.insert(U).second) + Worklist.push_back(U); } } @@ -320,6 +326,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { return false; DivergentValues.clear(); + DivergentUses.clear(); gpuDA = nullptr; auto &DT = getAnalysis().getDomTree(); @@ -328,11 +335,11 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { if (shouldUseGPUDivergenceAnalysis(F)) { // run the new GPU divergence analysis auto &LI = getAnalysis().getLoopInfo(); - gpuDA = llvm::make_unique(F, DT, PDT, LI, TTI); + gpuDA = std::make_unique(F, DT, PDT, LI, TTI); } else { // run LLVM's existing DivergenceAnalysis - DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues); + DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); DP.populateWithSourcesOfDivergence(); DP.propagate(); } @@ -351,6 +358,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { return DivergentValues.count(V); } +bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const { + if (gpuDA) { + return gpuDA->isDivergentUse(*U); + } + return DivergentValues.count(U->get()) || DivergentUses.count(U); +} + void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) return; diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index d28b8a189d4b..db18716c64cf 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -205,7 +205,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis().getAAResults(); AC = &getAnalysis().getAssumptionCache(F); DT = &getAnalysis().getDomTree(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI(F); visit(F); dbgs() << MessagesStr.str(); Messages.clear(); diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 31da4e9ec783..641e92eac781 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -12,6 +12,9 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" @@ -24,34 +27,30 @@ using namespace llvm; -static bool isAligned(const Value *Base, const APInt &Offset, unsigned Align, - const DataLayout &DL) { - APInt BaseAlign(Offset.getBitWidth(), Base->getPointerAlignment(DL)); - - if (!BaseAlign) { - Type *Ty = Base->getType()->getPointerElementType(); - if (!Ty->isSized()) - return false; - BaseAlign = DL.getABITypeAlignment(Ty); - } - - APInt Alignment(Offset.getBitWidth(), Align); - - assert(Alignment.isPowerOf2() && "must be a power of 2!"); - return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); +static MaybeAlign getBaseAlign(const Value *Base, const DataLayout &DL) { + if (const MaybeAlign PA = Base->getPointerAlignment(DL)) + return *PA; + Type *const Ty = Base->getType()->getPointerElementType(); + if (!Ty->isSized()) + return None; + return Align(DL.getABITypeAlignment(Ty)); } -static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { - Type *Ty = Base->getType(); - assert(Ty->isSized() && "must be sized"); - APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); - return isAligned(Base, Offset, Align, DL); +static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment, + const DataLayout &DL) { + if (MaybeAlign BA = getBaseAlign(Base, DL)) { + const APInt APBaseAlign(Offset.getBitWidth(), BA->value()); + const APInt APAlign(Offset.getBitWidth(), Alignment.value()); + assert(APAlign.isPowerOf2() && "must be a power of 2!"); + return APBaseAlign.uge(APAlign) && !(Offset & (APAlign - 1)); + } + return false; } /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceableAndAlignedPointer( - const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL, + const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, SmallPtrSetImpl &Visited) { // Already visited? Bail out, we've likely hit unreachable code. @@ -63,17 +62,22 @@ static bool isDereferenceableAndAlignedPointer( // bitcast instructions are no-ops as far as dereferenceability is concerned. if (const BitCastOperator *BC = dyn_cast(V)) - return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, Size, - DL, CtxI, DT, Visited); + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Alignment, + Size, DL, CtxI, DT, Visited); bool CheckForNonNull = false; APInt KnownDerefBytes(Size.getBitWidth(), V->getPointerDereferenceableBytes(DL, CheckForNonNull)); - if (KnownDerefBytes.getBoolValue()) { - if (KnownDerefBytes.uge(Size)) - if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) - return isAligned(V, Align, DL); - } + if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size)) + if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) { + // As we recursed through GEPs to get here, we've incrementally checked + // that each step advanced by a multiple of the alignment. If our base is + // properly aligned, then the original offset accessed must also be. + Type *Ty = V->getType(); + assert(Ty->isSized() && "must be sized"); + APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); + return isAligned(V, Offset, Alignment, DL); + } // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast(V)) { @@ -81,7 +85,8 @@ static bool isDereferenceableAndAlignedPointer( APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() || - !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue()) + !Offset.urem(APInt(Offset.getBitWidth(), Alignment.value())) + .isMinValue()) return false; // If the base pointer is dereferenceable for Offset+Size bytes, then the @@ -93,67 +98,69 @@ static bool isDereferenceableAndAlignedPointer( // Offset and Size may have different bit widths if we have visited an // addrspacecast, so we can't do arithmetic directly on the APInt values. return isDereferenceableAndAlignedPointer( - Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()), - DL, CtxI, DT, Visited); + Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL, + CtxI, DT, Visited); } // For gc.relocate, look through relocations if (const GCRelocateInst *RelocateInst = dyn_cast(V)) return isDereferenceableAndAlignedPointer( - RelocateInst->getDerivedPtr(), Align, Size, DL, CtxI, DT, Visited); + RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited); if (const AddrSpaceCastInst *ASC = dyn_cast(V)) - return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size, - DL, CtxI, DT, Visited); + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment, + Size, DL, CtxI, DT, Visited); if (const auto *Call = dyn_cast(V)) - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) - return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT, - Visited); + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) + return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI, + DT, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { + // Note: At the moment, Size can be zero. This ends up being interpreted as + // a query of whether [Base, V] is dereferenceable and V is aligned (since + // that's what the implementation happened to do). It's unclear if this is + // the desired semantic, but at least SelectionDAG does exercise this case. + SmallPtrSet Visited; - return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT, + return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT, Visited); } bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - unsigned Align, + MaybeAlign MA, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { + if (!Ty->isSized()) + return false; + // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. // Require ABI alignment for loads without alignment specification - if (Align == 0) - Align = DL.getABITypeAlignment(Ty); - - if (!Ty->isSized()) - return false; - - SmallPtrSet Visited; - return ::isDereferenceableAndAlignedPointer( - V, Align, - APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)), - DL, CtxI, DT, Visited); + const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty); + APInt AccessSize(DL.getIndexTypeSizeInBits(V->getType()), + DL.getTypeStoreSize(Ty)); + return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI, + DT); } bool llvm::isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT); + return isDereferenceableAndAlignedPointer(V, Ty, Align::None(), DL, CtxI, DT); } /// Test if A and B will obviously have the same value. @@ -187,6 +194,60 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { return false; } +bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, + ScalarEvolution &SE, + DominatorTree &DT) { + auto &DL = LI->getModule()->getDataLayout(); + Value *Ptr = LI->getPointerOperand(); + + APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), + DL.getTypeStoreSize(LI->getType())); + const Align Alignment = DL.getValueOrABITypeAlignment( + MaybeAlign(LI->getAlignment()), LI->getType()); + + Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); + + // If given a uniform (i.e. non-varying) address, see if we can prove the + // access is safe within the loop w/o needing predication. + if (L->isLoopInvariant(Ptr)) + return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL, + HeaderFirstNonPHI, &DT); + + // Otherwise, check to see if we have a repeating access pattern where we can + // prove that all accesses are well aligned and dereferenceable. + auto *AddRec = dyn_cast(SE.getSCEV(Ptr)); + if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine()) + return false; + auto* Step = dyn_cast(AddRec->getStepRecurrence(SE)); + if (!Step) + return false; + // TODO: generalize to access patterns which have gaps + if (Step->getAPInt() != EltSize) + return false; + + // TODO: If the symbolic trip count has a small bound (max count), we might + // be able to prove safety. + auto TC = SE.getSmallConstantTripCount(L); + if (!TC) + return false; + + const APInt AccessSize = TC * EltSize; + + auto *StartS = dyn_cast(AddRec->getStart()); + if (!StartS) + return false; + assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition"); + Value *Base = StartS->getValue(); + + // For the moment, restrict ourselves to the case where the access size is a + // multiple of the requested alignment and the base is aligned. + // TODO: generalize if a case found which warrants + if (EltSize.urem(Alignment.value()) != 0) + return false; + return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, + HeaderFirstNonPHI, &DT); +} + /// Check if executing a load of this pointer value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive @@ -198,64 +259,25 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. -bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, +bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { // Zero alignment means that the load has the ABI alignment for the target - if (Align == 0) - Align = DL.getABITypeAlignment(V->getType()->getPointerElementType()); - assert(isPowerOf2_32(Align)); + const Align Alignment = + DL.getValueOrABITypeAlignment(MA, V->getType()->getPointerElementType()); // If DT is not specified we can't make context-sensitive query const Instruction* CtxI = DT ? ScanFrom : nullptr; - if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT)) + if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT)) return true; - int64_t ByteOffset = 0; - Value *Base = V; - Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); - - if (ByteOffset < 0) // out of bounds + if (!ScanFrom) return false; - Type *BaseType = nullptr; - unsigned BaseAlign = 0; - if (const AllocaInst *AI = dyn_cast(Base)) { - // An alloca is safe to load from as load as it is suitably aligned. - BaseType = AI->getAllocatedType(); - BaseAlign = AI->getAlignment(); - } else if (const GlobalVariable *GV = dyn_cast(Base)) { - // Global variables are not necessarily safe to load from if they are - // interposed arbitrarily. Their size may change or they may be weak and - // require a test to determine if they were in fact provided. - if (!GV->isInterposable()) { - BaseType = GV->getType()->getElementType(); - BaseAlign = GV->getAlignment(); - } - } - - PointerType *AddrTy = cast(V->getType()); - uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType()); - - // If we found a base allocated type from either an alloca or global variable, - // try to see if we are definitively within the allocated region. We need to - // know the size of the base type and the loaded type to do anything in this - // case. - if (BaseType && BaseType->isSized()) { - if (BaseAlign == 0) - BaseAlign = DL.getPrefTypeAlignment(BaseType); - - if (Align <= BaseAlign) { - // Check if the load is within the bounds of the underlying object. - if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) && - ((ByteOffset % Align) == 0)) - return true; - } - } - - if (!ScanFrom) + if (Size.getBitWidth() > 64) return false; + const uint64_t LoadSize = Size.getZExtValue(); // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored @@ -279,7 +301,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, return false; Value *AccessedPtr; - unsigned AccessedAlign; + MaybeAlign MaybeAccessedAlign; if (LoadInst *LI = dyn_cast(BBI)) { // Ignore volatile loads. The execution of a volatile load cannot // be used to prove an address is backed by regular memory; it can, @@ -287,24 +309,26 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, if (LI->isVolatile()) continue; AccessedPtr = LI->getPointerOperand(); - AccessedAlign = LI->getAlignment(); + MaybeAccessedAlign = MaybeAlign(LI->getAlignment()); } else if (StoreInst *SI = dyn_cast(BBI)) { // Ignore volatile stores (see comment for loads). if (SI->isVolatile()) continue; AccessedPtr = SI->getPointerOperand(); - AccessedAlign = SI->getAlignment(); + MaybeAccessedAlign = MaybeAlign(SI->getAlignment()); } else continue; Type *AccessedTy = AccessedPtr->getType()->getPointerElementType(); - if (AccessedAlign == 0) - AccessedAlign = DL.getABITypeAlignment(AccessedTy); - if (AccessedAlign < Align) + + const Align AccessedAlign = + DL.getValueOrABITypeAlignment(MaybeAccessedAlign, AccessedTy); + if (AccessedAlign < Alignment) continue; // Handle trivial cases. - if (AccessedPtr == V) + if (AccessedPtr == V && + LoadSize <= DL.getTypeStoreSize(AccessedTy)) return true; if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && @@ -314,12 +338,12 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, return false; } -bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, +bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); - return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT); + return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT); } /// DefMaxInstsToScan - the default number of maximum instructions diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 36bd9a8b7ea7..3d8f77675f3a 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -1189,18 +1189,31 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, unsigned IdxWidth = DL.getIndexSizeInBits(ASA); Type *Ty = cast(PtrA->getType())->getElementType(); - APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); + // Retrieve the address space again as pointer stripping now tracks through + // `addrspacecast`. + ASA = cast(PtrA->getType())->getAddressSpace(); + ASB = cast(PtrB->getType())->getAddressSpace(); + // Check that the address spaces match and that the pointers are valid. + if (ASA != ASB) + return false; + + IdxWidth = DL.getIndexSizeInBits(ASA); + OffsetA = OffsetA.sextOrTrunc(IdxWidth); + OffsetB = OffsetB.sextOrTrunc(IdxWidth); + + APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); + // OffsetDelta = OffsetB - OffsetA; const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); - const SCEVConstant *OffsetDeltaC = dyn_cast(OffsetDeltaSCEV); - const APInt &OffsetDelta = OffsetDeltaC->getAPInt(); + const APInt &OffsetDelta = cast(OffsetDeltaSCEV)->getAPInt(); + // Check if they are based on the same pointer. That makes the offsets // sufficient. if (PtrA == PtrB) @@ -1641,13 +1654,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, // Check every access pair. while (AI != AE) { Visited.insert(*AI); - EquivalenceClasses::member_iterator OI = std::next(AI); + bool AIIsWrite = AI->getInt(); + // Check loads only against next equivalent class, but stores also against + // other stores in the same equivalence class - to the same address. + EquivalenceClasses::member_iterator OI = + (AIIsWrite ? AI : std::next(AI)); while (OI != AE) { // Check every accessing instruction pair in program order. for (std::vector::iterator I1 = Accesses[*AI].begin(), I1E = Accesses[*AI].end(); I1 != I1E; ++I1) - for (std::vector::iterator I2 = Accesses[*OI].begin(), - I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { + // Scan all accesses of another equivalence class, but only the next + // accesses of the same equivalent class. + for (std::vector::iterator + I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()), + I2E = (OI == AI ? I1E : Accesses[*OI].end()); + I2 != I2E; ++I2) { auto A = std::make_pair(&*AI, *I1); auto B = std::make_pair(&*OI, *I2); @@ -2078,7 +2099,7 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, DL = I->getDebugLoc(); } - Report = make_unique(DEBUG_TYPE, RemarkName, DL, + Report = std::make_unique(DEBUG_TYPE, RemarkName, DL, CodeRegion); return *Report; } @@ -2323,9 +2344,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI) - : PSE(llvm::make_unique(*SE, *L)), - PtrRtChecking(llvm::make_unique(SE)), - DepChecker(llvm::make_unique(*PSE, L)), TheLoop(L), + : PSE(std::make_unique(*SE, *L)), + PtrRtChecking(std::make_unique(SE)), + DepChecker(std::make_unique(*PSE, L)), TheLoop(L), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { @@ -2380,7 +2401,7 @@ const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { auto &LAI = LoopAccessInfoMap[L]; if (!LAI) - LAI = llvm::make_unique(L, SE, TLI, AA, DT, LI); + LAI = std::make_unique(L, SE, TLI, AA, DT, LI); return *LAI.get(); } @@ -2399,7 +2420,7 @@ void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) { SE = &getAnalysis().getSE(); auto *TLIP = getAnalysisIfAvailable(); - TLI = TLIP ? &TLIP->getTLI() : nullptr; + TLI = TLIP ? &TLIP->getTLI(F) : nullptr; AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp index a10a87ce113b..02d40fb8d72a 100644 --- a/lib/Analysis/LoopAnalysisManager.cpp +++ b/lib/Analysis/LoopAnalysisManager.cpp @@ -46,7 +46,7 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate( // invalidation logic below to act on that. auto PAC = PA.getChecker(); bool invalidateMemorySSAAnalysis = false; - if (EnableMSSALoopDependency) + if (MSSAUsed) invalidateMemorySSAAnalysis = Inv.invalidate(F, PA); if (!(PAC.preserved() || PAC.preservedSet>()) || Inv.invalidate(F, PA) || diff --git a/lib/Analysis/LoopCacheAnalysis.cpp b/lib/Analysis/LoopCacheAnalysis.cpp new file mode 100644 index 000000000000..10d2fe07884a --- /dev/null +++ b/lib/Analysis/LoopCacheAnalysis.cpp @@ -0,0 +1,625 @@ +//===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the implementation for the loop cache analysis. +/// The implementation is largely based on the following paper: +/// +/// Compiler Optimizations for Improving Data Locality +/// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng +/// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf +/// +/// The general approach taken to estimate the number of cache lines used by the +/// memory references in an inner loop is: +/// 1. Partition memory references that exhibit temporal or spacial reuse +/// into reference groups. +/// 2. For each loop L in the a loop nest LN: +/// a. Compute the cost of the reference group +/// b. Compute the loop cost by summing up the reference groups costs +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopCacheAnalysis.h" +#include "llvm/ADT/BreadthFirstIterator.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "loop-cache-cost" + +static cl::opt DefaultTripCount( + "default-trip-count", cl::init(100), cl::Hidden, + cl::desc("Use this to specify the default trip count of a loop")); + +// In this analysis two array references are considered to exhibit temporal +// reuse if they access either the same memory location, or a memory location +// with distance smaller than a configurable threshold. +static cl::opt TemporalReuseThreshold( + "temporal-reuse-threshold", cl::init(2), cl::Hidden, + cl::desc("Use this to specify the max. distance between array elements " + "accessed in a loop so that the elements are classified to have " + "temporal reuse")); + +/// Retrieve the innermost loop in the given loop nest \p Loops. It returns a +/// nullptr if any loops in the loop vector supplied has more than one sibling. +/// The loop vector is expected to contain loops collected in breadth-first +/// order. +static Loop *getInnerMostLoop(const LoopVectorTy &Loops) { + assert(!Loops.empty() && "Expecting a non-empy loop vector"); + + Loop *LastLoop = Loops.back(); + Loop *ParentLoop = LastLoop->getParentLoop(); + + if (ParentLoop == nullptr) { + assert(Loops.size() == 1 && "Expecting a single loop"); + return LastLoop; + } + + return (std::is_sorted(Loops.begin(), Loops.end(), + [](const Loop *L1, const Loop *L2) { + return L1->getLoopDepth() < L2->getLoopDepth(); + })) + ? LastLoop + : nullptr; +} + +static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, + const Loop &L, ScalarEvolution &SE) { + const SCEVAddRecExpr *AR = dyn_cast(&AccessFn); + if (!AR || !AR->isAffine()) + return false; + + assert(AR->getLoop() && "AR should have a loop"); + + // Check that start and increment are not add recurrences. + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(SE); + if (isa(Start) || isa(Step)) + return false; + + // Check that start and increment are both invariant in the loop. + if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) + return false; + + return AR->getStepRecurrence(SE) == &ElemSize; +} + +/// Compute the trip count for the given loop \p L. Return the SCEV expression +/// for the trip count or nullptr if it cannot be computed. +static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) { + const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L); + if (isa(BackedgeTakenCount) || + !isa(BackedgeTakenCount)) + return nullptr; + + return SE.getAddExpr(BackedgeTakenCount, + SE.getOne(BackedgeTakenCount->getType())); +} + +//===----------------------------------------------------------------------===// +// IndexedReference implementation +// +raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) { + if (!R.IsValid) { + OS << R.StoreOrLoadInst; + OS << ", IsValid=false."; + return OS; + } + + OS << *R.BasePointer; + for (const SCEV *Subscript : R.Subscripts) + OS << "[" << *Subscript << "]"; + + OS << ", Sizes: "; + for (const SCEV *Size : R.Sizes) + OS << "[" << *Size << "]"; + + return OS; +} + +IndexedReference::IndexedReference(Instruction &StoreOrLoadInst, + const LoopInfo &LI, ScalarEvolution &SE) + : StoreOrLoadInst(StoreOrLoadInst), SE(SE) { + assert((isa(StoreOrLoadInst) || isa(StoreOrLoadInst)) && + "Expecting a load or store instruction"); + + IsValid = delinearize(LI); + if (IsValid) + LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this + << "\n"); +} + +Optional IndexedReference::hasSpacialReuse(const IndexedReference &Other, + unsigned CLS, + AliasAnalysis &AA) const { + assert(IsValid && "Expecting a valid reference"); + + if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse: different base pointers\n"); + return false; + } + + unsigned NumSubscripts = getNumSubscripts(); + if (NumSubscripts != Other.getNumSubscripts()) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse: different number of subscripts\n"); + return false; + } + + // all subscripts must be equal, except the leftmost one (the last one). + for (auto SubNum : seq(0, NumSubscripts - 1)) { + if (getSubscript(SubNum) != Other.getSubscript(SubNum)) { + LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: " + << "\n\t" << *getSubscript(SubNum) << "\n\t" + << *Other.getSubscript(SubNum) << "\n"); + return false; + } + } + + // the difference between the last subscripts must be less than the cache line + // size. + const SCEV *LastSubscript = getLastSubscript(); + const SCEV *OtherLastSubscript = Other.getLastSubscript(); + const SCEVConstant *Diff = dyn_cast( + SE.getMinusSCEV(LastSubscript, OtherLastSubscript)); + + if (Diff == nullptr) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse, difference between subscript:\n\t" + << *LastSubscript << "\n\t" << OtherLastSubscript + << "\nis not constant.\n"); + return None; + } + + bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS); + + LLVM_DEBUG({ + if (InSameCacheLine) + dbgs().indent(2) << "Found spacial reuse.\n"; + else + dbgs().indent(2) << "No spacial reuse.\n"; + }); + + return InSameCacheLine; +} + +Optional IndexedReference::hasTemporalReuse(const IndexedReference &Other, + unsigned MaxDistance, + const Loop &L, + DependenceInfo &DI, + AliasAnalysis &AA) const { + assert(IsValid && "Expecting a valid reference"); + + if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { + LLVM_DEBUG(dbgs().indent(2) + << "No temporal reuse: different base pointer\n"); + return false; + } + + std::unique_ptr D = + DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true); + + if (D == nullptr) { + LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n"); + return false; + } + + if (D->isLoopIndependent()) { + LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); + return true; + } + + // Check the dependence distance at every loop level. There is temporal reuse + // if the distance at the given loop's depth is small (|d| <= MaxDistance) and + // it is zero at every other loop level. + int LoopDepth = L.getLoopDepth(); + int Levels = D->getLevels(); + for (int Level = 1; Level <= Levels; ++Level) { + const SCEV *Distance = D->getDistance(Level); + const SCEVConstant *SCEVConst = dyn_cast_or_null(Distance); + + if (SCEVConst == nullptr) { + LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n"); + return None; + } + + const ConstantInt &CI = *SCEVConst->getValue(); + if (Level != LoopDepth && !CI.isZero()) { + LLVM_DEBUG(dbgs().indent(2) + << "No temporal reuse: distance is not zero at depth=" << Level + << "\n"); + return false; + } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) { + LLVM_DEBUG( + dbgs().indent(2) + << "No temporal reuse: distance is greater than MaxDistance at depth=" + << Level << "\n"); + return false; + } + } + + LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); + return true; +} + +CacheCostTy IndexedReference::computeRefCost(const Loop &L, + unsigned CLS) const { + assert(IsValid && "Expecting a valid reference"); + LLVM_DEBUG({ + dbgs().indent(2) << "Computing cache cost for:\n"; + dbgs().indent(4) << *this << "\n"; + }); + + // If the indexed reference is loop invariant the cost is one. + if (isLoopInvariant(L)) { + LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n"); + return 1; + } + + const SCEV *TripCount = computeTripCount(L, SE); + if (!TripCount) { + LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() + << " could not be computed, using DefaultTripCount\n"); + const SCEV *ElemSize = Sizes.back(); + TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount); + } + LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); + + // If the indexed reference is 'consecutive' the cost is + // (TripCount*Stride)/CLS, otherwise the cost is TripCount. + const SCEV *RefCost = TripCount; + + if (isConsecutive(L, CLS)) { + const SCEV *Coeff = getLastCoefficient(); + const SCEV *ElemSize = Sizes.back(); + const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); + const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); + RefCost = SE.getUDivExpr(Numerator, CacheLineSize); + LLVM_DEBUG(dbgs().indent(4) + << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" + << *RefCost << "\n"); + } else + LLVM_DEBUG(dbgs().indent(4) + << "Access is not consecutive: RefCost=TripCount=" << *RefCost + << "\n"); + + // Attempt to fold RefCost into a constant. + if (auto ConstantCost = dyn_cast(RefCost)) + return ConstantCost->getValue()->getSExtValue(); + + LLVM_DEBUG(dbgs().indent(4) + << "RefCost is not a constant! Setting to RefCost=InvalidCost " + "(invalid value).\n"); + + return CacheCost::InvalidCost; +} + +bool IndexedReference::delinearize(const LoopInfo &LI) { + assert(Subscripts.empty() && "Subscripts should be empty"); + assert(Sizes.empty() && "Sizes should be empty"); + assert(!IsValid && "Should be called once from the constructor"); + LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n"); + + const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst); + const BasicBlock *BB = StoreOrLoadInst.getParent(); + + for (Loop *L = LI.getLoopFor(BB); L != nullptr; L = L->getParentLoop()) { + const SCEV *AccessFn = + SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L); + + BasePointer = dyn_cast(SE.getPointerBase(AccessFn)); + if (BasePointer == nullptr) { + LLVM_DEBUG( + dbgs().indent(2) + << "ERROR: failed to delinearize, can't identify base pointer\n"); + return false; + } + + AccessFn = SE.getMinusSCEV(AccessFn, BasePointer); + + LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() + << "', AccessFn: " << *AccessFn << "\n"); + + SE.delinearize(AccessFn, Subscripts, Sizes, + SE.getElementSize(&StoreOrLoadInst)); + + if (Subscripts.empty() || Sizes.empty() || + Subscripts.size() != Sizes.size()) { + // Attempt to determine whether we have a single dimensional array access. + // before giving up. + if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) { + LLVM_DEBUG(dbgs().indent(2) + << "ERROR: failed to delinearize reference\n"); + Subscripts.clear(); + Sizes.clear(); + break; + } + + const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize); + Subscripts.push_back(Div); + Sizes.push_back(ElemSize); + } + + return all_of(Subscripts, [&](const SCEV *Subscript) { + return isSimpleAddRecurrence(*Subscript, *L); + }); + } + + return false; +} + +bool IndexedReference::isLoopInvariant(const Loop &L) const { + Value *Addr = getPointerOperand(&StoreOrLoadInst); + assert(Addr != nullptr && "Expecting either a load or a store instruction"); + assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable"); + + if (SE.isLoopInvariant(SE.getSCEV(Addr), &L)) + return true; + + // The indexed reference is loop invariant if none of the coefficients use + // the loop induction variable. + bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) { + return isCoeffForLoopZeroOrInvariant(*Subscript, L); + }); + + return allCoeffForLoopAreZero; +} + +bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { + // The indexed reference is 'consecutive' if the only coefficient that uses + // the loop induction variable is the last one... + const SCEV *LastSubscript = Subscripts.back(); + for (const SCEV *Subscript : Subscripts) { + if (Subscript == LastSubscript) + continue; + if (!isCoeffForLoopZeroOrInvariant(*Subscript, L)) + return false; + } + + // ...and the access stride is less than the cache line size. + const SCEV *Coeff = getLastCoefficient(); + const SCEV *ElemSize = Sizes.back(); + const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); + + return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); +} + +const SCEV *IndexedReference::getLastCoefficient() const { + const SCEV *LastSubscript = getLastSubscript(); + assert(isa(LastSubscript) && + "Expecting a SCEV add recurrence expression"); + const SCEVAddRecExpr *AR = dyn_cast(LastSubscript); + return AR->getStepRecurrence(SE); +} + +bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, + const Loop &L) const { + const SCEVAddRecExpr *AR = dyn_cast(&Subscript); + return (AR != nullptr) ? AR->getLoop() != &L + : SE.isLoopInvariant(&Subscript, &L); +} + +bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript, + const Loop &L) const { + if (!isa(Subscript)) + return false; + + const SCEVAddRecExpr *AR = cast(&Subscript); + assert(AR->getLoop() && "AR should have a loop"); + + if (!AR->isAffine()) + return false; + + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(SE); + + if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) + return false; + + return true; +} + +bool IndexedReference::isAliased(const IndexedReference &Other, + AliasAnalysis &AA) const { + const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst); + const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst); + return AA.isMustAlias(Loc1, Loc2); +} + +//===----------------------------------------------------------------------===// +// CacheCost implementation +// +raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { + for (const auto &LC : CC.LoopCosts) { + const Loop *L = LC.first; + OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n"; + } + return OS; +} + +CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, + ScalarEvolution &SE, TargetTransformInfo &TTI, + AliasAnalysis &AA, DependenceInfo &DI, + Optional TRT) + : Loops(Loops), TripCounts(), LoopCosts(), + TRT(TRT == None ? Optional(TemporalReuseThreshold) : TRT), + LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) { + assert(!Loops.empty() && "Expecting a non-empty loop vector."); + + for (const Loop *L : Loops) { + unsigned TripCount = SE.getSmallConstantTripCount(L); + TripCount = (TripCount == 0) ? DefaultTripCount : TripCount; + TripCounts.push_back({L, TripCount}); + } + + calculateCacheFootprint(); +} + +std::unique_ptr +CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, + DependenceInfo &DI, Optional TRT) { + if (Root.getParentLoop()) { + LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n"); + return nullptr; + } + + LoopVectorTy Loops; + for (Loop *L : breadth_first(&Root)) + Loops.push_back(L); + + if (!getInnerMostLoop(Loops)) { + LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more " + "than one innermost loop\n"); + return nullptr; + } + + return std::make_unique(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT); +} + +void CacheCost::calculateCacheFootprint() { + LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n"); + ReferenceGroupsTy RefGroups; + if (!populateReferenceGroups(RefGroups)) + return; + + LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n"); + for (const Loop *L : Loops) { + assert((std::find_if(LoopCosts.begin(), LoopCosts.end(), + [L](const LoopCacheCostTy &LCC) { + return LCC.first == L; + }) == LoopCosts.end()) && + "Should not add duplicate element"); + CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups); + LoopCosts.push_back(std::make_pair(L, LoopCost)); + } + + sortLoopCosts(); + RefGroups.clear(); +} + +bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const { + assert(RefGroups.empty() && "Reference groups should be empty"); + + unsigned CLS = TTI.getCacheLineSize(); + Loop *InnerMostLoop = getInnerMostLoop(Loops); + assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop"); + + for (BasicBlock *BB : InnerMostLoop->getBlocks()) { + for (Instruction &I : *BB) { + if (!isa(I) && !isa(I)) + continue; + + std::unique_ptr R(new IndexedReference(I, LI, SE)); + if (!R->isValid()) + continue; + + bool Added = false; + for (ReferenceGroupTy &RefGroup : RefGroups) { + const IndexedReference &Representative = *RefGroup.front().get(); + LLVM_DEBUG({ + dbgs() << "References:\n"; + dbgs().indent(2) << *R << "\n"; + dbgs().indent(2) << Representative << "\n"; + }); + + Optional HasTemporalReuse = + R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA); + Optional HasSpacialReuse = + R->hasSpacialReuse(Representative, CLS, AA); + + if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) || + (HasSpacialReuse.hasValue() && *HasSpacialReuse)) { + RefGroup.push_back(std::move(R)); + Added = true; + break; + } + } + + if (!Added) { + ReferenceGroupTy RG; + RG.push_back(std::move(R)); + RefGroups.push_back(std::move(RG)); + } + } + } + + if (RefGroups.empty()) + return false; + + LLVM_DEBUG({ + dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n"; + int n = 1; + for (const ReferenceGroupTy &RG : RefGroups) { + dbgs().indent(2) << "RefGroup " << n << ":\n"; + for (const auto &IR : RG) + dbgs().indent(4) << *IR << "\n"; + n++; + } + dbgs() << "\n"; + }); + + return true; +} + +CacheCostTy +CacheCost::computeLoopCacheCost(const Loop &L, + const ReferenceGroupsTy &RefGroups) const { + if (!L.isLoopSimplifyForm()) + return InvalidCost; + + LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName() + << "' as innermost loop.\n"); + + // Compute the product of the trip counts of each other loop in the nest. + CacheCostTy TripCountsProduct = 1; + for (const auto &TC : TripCounts) { + if (TC.first == &L) + continue; + TripCountsProduct *= TC.second; + } + + CacheCostTy LoopCost = 0; + for (const ReferenceGroupTy &RG : RefGroups) { + CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L); + LoopCost += RefGroupCost * TripCountsProduct; + } + + LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName() + << "' has cost=" << LoopCost << "\n"); + + return LoopCost; +} + +CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG, + const Loop &L) const { + assert(!RG.empty() && "Reference group should have at least one member."); + + const IndexedReference *Representative = RG.front().get(); + return Representative->computeRefCost(L, TTI.getCacheLineSize()); +} + +//===----------------------------------------------------------------------===// +// LoopCachePrinterPass implementation +// +PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function *F = L.getHeader()->getParent(); + DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); + + if (auto CC = CacheCost::getCacheCost(L, AR, DI)) + OS << *CC; + + return PreservedAnalyses::all(); +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index aa5da0859805..dbab5db7dbc2 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -359,6 +359,45 @@ bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar, return SE.isLoopInvariant(IndDesc.getStep(), this); } +BranchInst *Loop::getLoopGuardBranch() const { + if (!isLoopSimplifyForm()) + return nullptr; + + BasicBlock *Preheader = getLoopPreheader(); + BasicBlock *Latch = getLoopLatch(); + assert(Preheader && Latch && + "Expecting a loop with valid preheader and latch"); + + // Loop should be in rotate form. + if (!isLoopExiting(Latch)) + return nullptr; + + // Disallow loops with more than one unique exit block, as we do not verify + // that GuardOtherSucc post dominates all exit blocks. + BasicBlock *ExitFromLatch = getUniqueExitBlock(); + if (!ExitFromLatch) + return nullptr; + + BasicBlock *ExitFromLatchSucc = ExitFromLatch->getUniqueSuccessor(); + if (!ExitFromLatchSucc) + return nullptr; + + BasicBlock *GuardBB = Preheader->getUniquePredecessor(); + if (!GuardBB) + return nullptr; + + assert(GuardBB->getTerminator() && "Expecting valid guard terminator"); + + BranchInst *GuardBI = dyn_cast(GuardBB->getTerminator()); + if (!GuardBI || GuardBI->isUnconditional()) + return nullptr; + + BasicBlock *GuardOtherSucc = (GuardBI->getSuccessor(0) == Preheader) + ? GuardBI->getSuccessor(1) + : GuardBI->getSuccessor(0); + return (GuardOtherSucc == ExitFromLatchSucc) ? GuardBI : nullptr; +} + bool Loop::isCanonical(ScalarEvolution &SE) const { InductionDescriptor IndDesc; if (!getInductionDescriptor(SE, IndDesc)) diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp index 1728b5e9f6d2..762623de41e9 100644 --- a/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -78,7 +78,7 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) { const DataLayout &DL = I.getModule()->getDataLayout(); if (auto FI = dyn_cast(&I)) SimpleV = - SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); + SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); else SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp index 77ebf89d9a08..5cf516a538b5 100644 --- a/lib/Analysis/MemDerefPrinter.cpp +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -55,8 +55,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) { Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, LI->getType(), DL)) Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer(PO, LI->getType(), - LI->getAlignment(), DL)) + if (isDereferenceableAndAlignedPointer( + PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL)) DerefAndAligned.insert(PO); } } diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 729dad463657..172c86eb4646 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -180,6 +180,19 @@ static Optional getAllocationData(const Value *V, AllocType AllocTy, return None; } +static Optional +getAllocationData(const Value *V, AllocType AllocTy, + function_ref GetTLI, + bool LookThroughBitCast = false) { + bool IsNoBuiltinCall; + if (const Function *Callee = + getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall)) + if (!IsNoBuiltinCall) + return getAllocationDataForFunction( + Callee, AllocTy, &GetTLI(const_cast(*Callee))); + return None; +} + static Optional getAllocationSize(const Value *V, const TargetLibraryInfo *TLI) { bool IsNoBuiltinCall; @@ -223,6 +236,11 @@ bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue(); } +bool llvm::isAllocationFn( + const Value *V, function_ref GetTLI, + bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, GetTLI, LookThroughBitCast).hasValue(); +} /// Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). @@ -240,6 +258,12 @@ bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue(); } +bool llvm::isMallocLikeFn( + const Value *V, function_ref GetTLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, GetTLI, LookThroughBitCast) + .hasValue(); +} /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). @@ -276,12 +300,27 @@ bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) { return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue(); } +/// Tests if a value is a call or invoke to a library function that +/// allocates memory and throws if an allocation failed (e.g., new). +bool llvm::isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast).hasValue(); +} + +/// Tests if a value is a call or invoke to a library function that +/// allocates memory (strdup, strndup). +bool llvm::isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, StrDupLike, TLI, LookThroughBitCast).hasValue(); +} + /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall(const Value *I, - const TargetLibraryInfo *TLI) { - return isMallocLikeFn(I, TLI) ? dyn_cast(I) : nullptr; +const CallInst *llvm::extractMallocCall( + const Value *I, + function_ref GetTLI) { + return isMallocLikeFn(I, GetTLI) ? dyn_cast(I) : nullptr; } static Value *computeArraySize(const CallInst *CI, const DataLayout &DL, @@ -521,9 +560,9 @@ STATISTIC(ObjectVisitorArgument, STATISTIC(ObjectVisitorLoad, "Number of load instructions with unsolved size and offset"); -APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { - if (Options.RoundToAlign && Align) - return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align)); +APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Alignment) { + if (Options.RoundToAlign && Alignment) + return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align(Alignment))); return Size; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index b25b655165d7..884587e020bb 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -183,7 +183,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, MemDepResult MemoryDependenceResults::getCallDependencyFrom( CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB) { - unsigned Limit = BlockScanLimit; + unsigned Limit = getDefaultBlockScanLimit(); // Walk backwards through the block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -356,7 +356,7 @@ MemDepResult MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB) { - if (!LI->getMetadata(LLVMContext::MD_invariant_group)) + if (!LI->hasMetadata(LLVMContext::MD_invariant_group)) return MemDepResult::getUnknown(); // Take the ptr operand after all casts and geps 0. This way we can search @@ -417,7 +417,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // same pointer operand) we can assume that value pointed by pointer // operand didn't change. if ((isa(U) || isa(U)) && - U->getMetadata(LLVMContext::MD_invariant_group) != nullptr) + U->hasMetadata(LLVMContext::MD_invariant_group)) ClosestDependency = GetClosestDependency(ClosestDependency, U); } } @@ -443,7 +443,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( OrderedBasicBlock *OBB) { bool isInvariantLoad = false; - unsigned DefaultLimit = BlockScanLimit; + unsigned DefaultLimit = getDefaultBlockScanLimit(); if (!Limit) Limit = &DefaultLimit; @@ -481,7 +481,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // Arguably, this logic should be pushed inside AliasAnalysis itself. if (isLoad && QueryInst) { LoadInst *LI = dyn_cast(QueryInst); - if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) + if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load)) isInvariantLoad = true; } @@ -1746,6 +1746,9 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const { AnalysisKey MemoryDependenceAnalysis::Key; +MemoryDependenceAnalysis::MemoryDependenceAnalysis() + : DefaultBlockScanLimit(BlockScanLimit) {} + MemoryDependenceResults MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &AA = AM.getResult(F); @@ -1753,7 +1756,7 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); auto &PV = AM.getResult(F); - return MemoryDependenceResults(AA, AC, TLI, DT, PV); + return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit); } char MemoryDependenceWrapperPass::ID = 0; @@ -1807,15 +1810,15 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P } unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const { - return BlockScanLimit; + return DefaultBlockScanLimit; } bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AA = getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); auto &PV = getAnalysis().getResult(); - MemDep.emplace(AA, AC, TLI, DT, PV); + MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit); return false; } diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index 17f5d9b9f0ad..cfb8b7e7dcb5 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -49,6 +49,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #include #include @@ -83,7 +84,7 @@ bool llvm::VerifyMemorySSA = false; #endif /// Enables memory ssa as a dependency for loop passes in legacy pass manager. cl::opt llvm::EnableMSSALoopDependency( - "enable-mssa-loop-dependency", cl::Hidden, cl::init(false), + "enable-mssa-loop-dependency", cl::Hidden, cl::init(true), cl::desc("Enable MemorySSA dependency for loop pass manager")); static cl::opt @@ -284,6 +285,11 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, case Intrinsic::invariant_end: case Intrinsic::assume: return {false, NoAlias}; + case Intrinsic::dbg_addr: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::dbg_value: + llvm_unreachable("debuginfo shouldn't have associated defs!"); default: break; } @@ -369,7 +375,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, const Instruction *I) { // If the memory can't be changed, then loads of the memory can't be // clobbered. - return isa(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || + return isa(I) && (I->hasMetadata(LLVMContext::MD_invariant_load) || AA.pointsToConstantMemory(MemoryLocation( cast(I)->getPointerOperand()))); } @@ -867,6 +873,7 @@ template class ClobberWalker { if (!DefChainEnd) for (auto *MA : def_chain(const_cast(Target))) DefChainEnd = MA; + assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry"); // If any of the terminated paths don't dominate the phi we'll try to // optimize, we need to figure out what they are and quit. @@ -1087,9 +1094,14 @@ void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, AccessList *Accesses = It->second.get(); auto *Phi = cast(&Accesses->front()); if (RenameAllUses) { - int PhiIndex = Phi->getBasicBlockIndex(BB); - assert(PhiIndex != -1 && "Incomplete phi during partial rename"); - Phi->setIncomingValue(PhiIndex, IncomingVal); + bool ReplacementDone = false; + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) + if (Phi->getIncomingBlock(I) == BB) { + Phi->setIncomingValue(I, IncomingVal); + ReplacementDone = true; + } + (void) ReplacementDone; + assert(ReplacementDone && "Incomplete phi during partial rename"); } else Phi->addIncoming(IncomingVal, BB); } @@ -1237,7 +1249,7 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); if (Res.second) - Res.first->second = llvm::make_unique(); + Res.first->second = std::make_unique(); return Res.first->second.get(); } @@ -1245,7 +1257,7 @@ MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); if (Res.second) - Res.first->second = llvm::make_unique(); + Res.first->second = std::make_unique(); return Res.first->second.get(); } @@ -1554,10 +1566,10 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { if (!WalkerBase) WalkerBase = - llvm::make_unique>(this, AA, DT); + std::make_unique>(this, AA, DT); Walker = - llvm::make_unique>(this, WalkerBase.get()); + std::make_unique>(this, WalkerBase.get()); return Walker.get(); } @@ -1567,10 +1579,10 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() { if (!WalkerBase) WalkerBase = - llvm::make_unique>(this, AA, DT); + std::make_unique>(this, AA, DT); SkipWalker = - llvm::make_unique>(this, WalkerBase.get()); + std::make_unique>(this, WalkerBase.get()); return SkipWalker.get(); } @@ -1687,13 +1699,15 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, MemoryAccess *Definition, - const MemoryUseOrDef *Template) { + const MemoryUseOrDef *Template, + bool CreationMustSucceed) { assert(!isa(I) && "Cannot create a defined access for a PHI"); MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template); - assert( - NewAccess != nullptr && - "Tried to create a memory access for a non-memory touching instruction"); - NewAccess->setDefiningAccess(Definition); + if (CreationMustSucceed) + assert(NewAccess != nullptr && "Tried to create a memory access for a " + "non-memory touching instruction"); + if (NewAccess) + NewAccess->setDefiningAccess(Definition); return NewAccess; } @@ -1717,13 +1731,21 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, AliasAnalysisType *AAP, const MemoryUseOrDef *Template) { // The assume intrinsic has a control dependency which we model by claiming - // that it writes arbitrarily. Ignore that fake memory dependency here. + // that it writes arbitrarily. Debuginfo intrinsics may be considered + // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory + // dependencies here. // FIXME: Replace this special casing with a more accurate modelling of // assume's control dependency. if (IntrinsicInst *II = dyn_cast(I)) if (II->getIntrinsicID() == Intrinsic::assume) return nullptr; + // Using a nonstandard AA pipelines might leave us with unexpected modref + // results for I, so add a check to not model instructions that may not read + // from or write to memory. This is necessary for correctness. + if (!I->mayReadFromMemory() && !I->mayWriteToMemory()) + return nullptr; + bool Def, Use; if (Template) { Def = dyn_cast_or_null(Template) != nullptr; @@ -1850,6 +1872,7 @@ void MemorySSA::verifyMemorySSA() const { verifyDomination(F); verifyOrdering(F); verifyDominationNumbers(F); + verifyPrevDefInPhis(F); // Previously, the verification used to also verify that the clobberingAccess // cached by MemorySSA is the same as the clobberingAccess found at a later // query to AA. This does not hold true in general due to the current fragility @@ -1862,6 +1885,40 @@ void MemorySSA::verifyMemorySSA() const { // example, see test4 added in D51960. } +void MemorySSA::verifyPrevDefInPhis(Function &F) const { +#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS) + for (const BasicBlock &BB : F) { + if (MemoryPhi *Phi = getMemoryAccess(&BB)) { + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { + auto *Pred = Phi->getIncomingBlock(I); + auto *IncAcc = Phi->getIncomingValue(I); + // If Pred has no unreachable predecessors, get last def looking at + // IDoms. If, while walkings IDoms, any of these has an unreachable + // predecessor, then the incoming def can be any access. + if (auto *DTNode = DT->getNode(Pred)) { + while (DTNode) { + if (auto *DefList = getBlockDefs(DTNode->getBlock())) { + auto *LastAcc = &*(--DefList->end()); + assert(LastAcc == IncAcc && + "Incorrect incoming access into phi."); + break; + } + DTNode = DTNode->getIDom(); + } + } else { + // If Pred has unreachable predecessors, but has at least a Def, the + // incoming access can be the last Def in Pred, or it could have been + // optimized to LoE. After an update, though, the LoE may have been + // replaced by another access, so IncAcc may be any access. + // If Pred has unreachable predecessors and no Defs, incoming access + // should be LoE; However, after an update, it may be any access. + } + } + } + } +#endif +} + /// Verify that all of the blocks we believe to have valid domination numbers /// actually have valid domination numbers. void MemorySSA::verifyDominationNumbers(const Function &F) const { @@ -2005,7 +2062,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { /// accesses and verifying that, for each use, it appears in the /// appropriate def's use list void MemorySSA::verifyDefUses(Function &F) const { -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS) for (BasicBlock &B : F) { // Phi nodes are attached to basic blocks if (MemoryPhi *Phi = getMemoryAccess(&B)) { @@ -2212,7 +2269,7 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &AA = AM.getResult(F); - return MemorySSAAnalysis::Result(llvm::make_unique(F, &AA, &DT)); + return MemorySSAAnalysis::Result(std::make_unique(F, &AA, &DT)); } bool MemorySSAAnalysis::Result::invalidate( diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp index 4c1feee7fd9a..f2d56b05d968 100644 --- a/lib/Analysis/MemorySSAUpdater.cpp +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -44,11 +44,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // First, do a cache lookup. Without this cache, certain CFG structures // (like a series of if statements) take exponential time to visit. auto Cached = CachedPreviousDef.find(BB); - if (Cached != CachedPreviousDef.end()) { + if (Cached != CachedPreviousDef.end()) return Cached->second; - } - if (BasicBlock *Pred = BB->getSinglePredecessor()) { + // If this method is called from an unreachable block, return LoE. + if (!MSSA->DT->isReachableFromEntry(BB)) + return MSSA->getLiveOnEntryDef(); + + if (BasicBlock *Pred = BB->getUniquePredecessor()) { + VisitedBlocks.insert(BB); // Single predecessor case, just recurse, we can only have one definition. MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); CachedPreviousDef.insert({BB, Result}); @@ -71,11 +75,19 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // Recurse to get the values in our predecessors for placement of a // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. - for (auto *Pred : predecessors(BB)) - if (MSSA->DT->isReachableFromEntry(Pred)) - PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); - else + bool UniqueIncomingAccess = true; + MemoryAccess *SingleAccess = nullptr; + for (auto *Pred : predecessors(BB)) { + if (MSSA->DT->isReachableFromEntry(Pred)) { + auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef); + if (!SingleAccess) + SingleAccess = IncomingAccess; + else if (IncomingAccess != SingleAccess) + UniqueIncomingAccess = false; + PhiOps.push_back(IncomingAccess); + } else PhiOps.push_back(MSSA->getLiveOnEntryDef()); + } // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay @@ -84,7 +96,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // See if we can avoid the phi by simplifying it. auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); // If we couldn't simplify, we may have to create a phi - if (Result == Phi) { + if (Result == Phi && UniqueIncomingAccess && SingleAccess) { + // A concrete Phi only exists if we created an empty one to break a cycle. + if (Phi) { + assert(Phi->operands().empty() && "Expected empty Phi"); + Phi->replaceAllUsesWith(SingleAccess); + removeMemoryAccess(Phi); + } + Result = SingleAccess; + } else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) { if (!Phi) Phi = MSSA->createMemoryPhi(BB); @@ -173,12 +193,9 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { TrackingVH Res(Phi); SmallVector, 8> Uses; std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses)); - for (auto &U : Uses) { - if (MemoryPhi *UsePhi = dyn_cast(&*U)) { - auto OperRange = UsePhi->operands(); - tryRemoveTrivialPhi(UsePhi, OperRange); - } - } + for (auto &U : Uses) + if (MemoryPhi *UsePhi = dyn_cast(&*U)) + tryRemoveTrivialPhi(UsePhi); return Res; } @@ -187,6 +204,11 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { // argument. // IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c) // We recursively try to remove them. +MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi) { + assert(Phi && "Can only remove concrete Phi."); + auto OperRange = Phi->operands(); + return tryRemoveTrivialPhi(Phi, OperRange); +} template MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands) { @@ -218,17 +240,49 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, return recursePhi(Same); } -void MemorySSAUpdater::insertUse(MemoryUse *MU) { +void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) { InsertedPHIs.clear(); MU->setDefiningAccess(getPreviousDef(MU)); - // Unlike for defs, there is no extra work to do. Because uses do not create - // new may-defs, there are only two cases: - // + + // In cases without unreachable blocks, because uses do not create new + // may-defs, there are only two cases: // 1. There was a def already below us, and therefore, we should not have // created a phi node because it was already needed for the def. // // 2. There is no def below us, and therefore, there is no extra renaming work // to do. + + // In cases with unreachable blocks, where the unnecessary Phis were + // optimized out, adding the Use may re-insert those Phis. Hence, when + // inserting Uses outside of the MSSA creation process, and new Phis were + // added, rename all uses if we are asked. + + if (!RenameUses && !InsertedPHIs.empty()) { + auto *Defs = MSSA->getBlockDefs(MU->getBlock()); + (void)Defs; + assert((!Defs || (++Defs->begin() == Defs->end())) && + "Block may have only a Phi or no defs"); + } + + if (RenameUses && InsertedPHIs.size()) { + SmallPtrSet Visited; + BasicBlock *StartBlock = MU->getBlock(); + + if (auto *Defs = MSSA->getWritableBlockDefs(StartBlock)) { + MemoryAccess *FirstDef = &*Defs->begin(); + // Convert to incoming value if it's a memorydef. A phi *is* already an + // incoming value. + if (auto *MD = dyn_cast(FirstDef)) + FirstDef = MD->getDefiningAccess(); + + MSSA->renamePass(MU->getBlock(), FirstDef, Visited); + } + // We just inserted a phi into this block, so the incoming value will + // become the phi anyway, so it does not matter what we pass. + for (auto &MP : InsertedPHIs) + if (MemoryPhi *Phi = cast_or_null(MP)) + MSSA->renamePass(Phi->getBlock(), nullptr, Visited); + } } // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. @@ -260,33 +314,35 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // See if we had a local def, and if not, go hunting. MemoryAccess *DefBefore = getPreviousDef(MD); - bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock(); + bool DefBeforeSameBlock = false; + if (DefBefore->getBlock() == MD->getBlock() && + !(isa(DefBefore) && + std::find(InsertedPHIs.begin(), InsertedPHIs.end(), DefBefore) != + InsertedPHIs.end())) + DefBeforeSameBlock = true; // There is a def before us, which means we can replace any store/phi uses // of that thing with us, since we are in the way of whatever was there // before. // We now define that def's memorydefs and memoryphis if (DefBeforeSameBlock) { - for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end(); - UI != UE;) { - Use &U = *UI++; + DefBefore->replaceUsesWithIf(MD, [MD](Use &U) { // Leave the MemoryUses alone. // Also make sure we skip ourselves to avoid self references. - if (isa(U.getUser()) || U.getUser() == MD) - continue; + User *Usr = U.getUser(); + return !isa(Usr) && Usr != MD; // Defs are automatically unoptimized when the user is set to MD below, // because the isOptimized() call will fail to find the same ID. - U.set(MD); - } + }); } // and that def is now our defining access. MD->setDefiningAccess(DefBefore); - // Remember the index where we may insert new phis below. - unsigned NewPhiIndex = InsertedPHIs.size(); - SmallVector FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); + + // Remember the index where we may insert new phis. + unsigned NewPhiIndex = InsertedPHIs.size(); if (!DefBeforeSameBlock) { // If there was a local def before us, we must have the same effect it // did. Because every may-def is the same, any phis/etc we would create, it @@ -302,46 +358,54 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // If this is the first def in the block and this insert is in an arbitrary // place, compute IDF and place phis. + SmallPtrSet DefiningBlocks; + + // If this is the last Def in the block, also compute IDF based on MD, since + // this may a new Def added, and we may need additional Phis. auto Iter = MD->getDefsIterator(); ++Iter; auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end(); - if (Iter == IterEnd) { - ForwardIDFCalculator IDFs(*MSSA->DT); - SmallVector IDFBlocks; - SmallPtrSet DefiningBlocks; + if (Iter == IterEnd) DefiningBlocks.insert(MD->getBlock()); - IDFs.setDefiningBlocks(DefiningBlocks); - IDFs.calculate(IDFBlocks); - SmallVector, 4> NewInsertedPHIs; - for (auto *BBIDF : IDFBlocks) - if (!MSSA->getMemoryAccess(BBIDF)) { - auto *MPhi = MSSA->createMemoryPhi(BBIDF); - NewInsertedPHIs.push_back(MPhi); - // Add the phis created into the IDF blocks to NonOptPhis, so they are - // not optimized out as trivial by the call to getPreviousDefFromEnd - // below. Once they are complete, all these Phis are added to the - // FixupList, and removed from NonOptPhis inside fixupDefs(). - NonOptPhis.insert(MPhi); - } - for (auto &MPhi : NewInsertedPHIs) { - auto *BBIDF = MPhi->getBlock(); - for (auto *Pred : predecessors(BBIDF)) { - DenseMap> CachedPreviousDef; - MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), - Pred); - } + for (const auto &VH : InsertedPHIs) + if (const auto *RealPHI = cast_or_null(VH)) + DefiningBlocks.insert(RealPHI->getBlock()); + ForwardIDFCalculator IDFs(*MSSA->DT); + SmallVector IDFBlocks; + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.calculate(IDFBlocks); + SmallVector, 4> NewInsertedPHIs; + for (auto *BBIDF : IDFBlocks) { + auto *MPhi = MSSA->getMemoryAccess(BBIDF); + if (!MPhi) { + MPhi = MSSA->createMemoryPhi(BBIDF); + NewInsertedPHIs.push_back(MPhi); } - - // Re-take the index where we're adding the new phis, because the above - // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs. - NewPhiIndex = InsertedPHIs.size(); - for (auto &MPhi : NewInsertedPHIs) { - InsertedPHIs.push_back(&*MPhi); - FixupList.push_back(&*MPhi); + // Add the phis created into the IDF blocks to NonOptPhis, so they are not + // optimized out as trivial by the call to getPreviousDefFromEnd below. + // Once they are complete, all these Phis are added to the FixupList, and + // removed from NonOptPhis inside fixupDefs(). Existing Phis in IDF may + // need fixing as well, and potentially be trivial before this insertion, + // hence add all IDF Phis. See PR43044. + NonOptPhis.insert(MPhi); + } + for (auto &MPhi : NewInsertedPHIs) { + auto *BBIDF = MPhi->getBlock(); + for (auto *Pred : predecessors(BBIDF)) { + DenseMap> CachedPreviousDef; + MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred); } } + // Re-take the index where we're adding the new phis, because the above call + // to getPreviousDefFromEnd, may have inserted into InsertedPHIs. + NewPhiIndex = InsertedPHIs.size(); + for (auto &MPhi : NewInsertedPHIs) { + InsertedPHIs.push_back(&*MPhi); + FixupList.push_back(&*MPhi); + } + FixupList.push_back(MD); } @@ -458,8 +522,7 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl &Vars) { void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) { if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) { MPhi->unorderedDeleteIncomingBlock(From); - if (MPhi->getNumIncomingValues() == 1) - removeMemoryAccess(MPhi); + tryRemoveTrivialPhi(MPhi); } } @@ -475,34 +538,51 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From, Found = true; return false; }); - if (MPhi->getNumIncomingValues() == 1) - removeMemoryAccess(MPhi); + tryRemoveTrivialPhi(MPhi); + } +} + +static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, + const ValueToValueMapTy &VMap, + PhiToDefMap &MPhiMap, + bool CloneWasSimplified, + MemorySSA *MSSA) { + MemoryAccess *InsnDefining = MA; + if (MemoryDef *DefMUD = dyn_cast(InsnDefining)) { + if (!MSSA->isLiveOnEntryDef(DefMUD)) { + Instruction *DefMUDI = DefMUD->getMemoryInst(); + assert(DefMUDI && "Found MemoryUseOrDef with no Instruction."); + if (Instruction *NewDefMUDI = + cast_or_null(VMap.lookup(DefMUDI))) { + InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); + if (!CloneWasSimplified) + assert(InsnDefining && "Defining instruction cannot be nullptr."); + else if (!InsnDefining || isa(InsnDefining)) { + // The clone was simplified, it's no longer a MemoryDef, look up. + auto DefIt = DefMUD->getDefsIterator(); + // Since simplified clones only occur in single block cloning, a + // previous definition must exist, otherwise NewDefMUDI would not + // have been found in VMap. + assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() && + "Previous def must exist"); + InsnDefining = getNewDefiningAccessForClone( + &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA); + } + } + } + } else { + MemoryPhi *DefPhi = cast(InsnDefining); + if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi)) + InsnDefining = NewDefPhi; } + assert(InsnDefining && "Defining instruction cannot be nullptr."); + return InsnDefining; } void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap, bool CloneWasSimplified) { - auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * { - MemoryAccess *InsnDefining = MA; - if (MemoryUseOrDef *DefMUD = dyn_cast(InsnDefining)) { - if (!MSSA->isLiveOnEntryDef(DefMUD)) { - Instruction *DefMUDI = DefMUD->getMemoryInst(); - assert(DefMUDI && "Found MemoryUseOrDef with no Instruction."); - if (Instruction *NewDefMUDI = - cast_or_null(VMap.lookup(DefMUDI))) - InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); - } - } else { - MemoryPhi *DefPhi = cast(InsnDefining); - if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi)) - InsnDefining = NewDefPhi; - } - assert(InsnDefining && "Defining instruction cannot be nullptr."); - return InsnDefining; - }; - const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); if (!Acc) return; @@ -519,9 +599,13 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, if (Instruction *NewInsn = dyn_cast_or_null(VMap.lookup(Insn))) { MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess( - NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), - CloneWasSimplified ? nullptr : MUD); - MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); + NewInsn, + getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap, + MPhiMap, CloneWasSimplified, MSSA), + /*Template=*/CloneWasSimplified ? nullptr : MUD, + /*CreationMustSucceed=*/CloneWasSimplified ? false : true); + if (NewUseOrDef) + MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); } } } @@ -563,8 +647,7 @@ void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock( // If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be // replaced with the unique value. - if (HasUniqueIncomingValue) - removeMemoryAccess(NewMPhi); + tryRemoveTrivialPhi(NewMPhi); } void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, @@ -770,6 +853,9 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates, } else { // Single predecessor, BB cannot be dead. GetLastDef of Pred. assert(Count == 1 && Pred && "Single predecessor expected."); + // BB can be unreachable though, return LoE if that is the case. + if (!DT.getNode(BB)) + return MSSA->getLiveOnEntryDef(); BB = Pred; } }; @@ -1010,7 +1096,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates, for (; UI != E;) { Use &U = *UI; ++UI; - MemoryAccess *Usr = dyn_cast(U.getUser()); + MemoryAccess *Usr = cast(U.getUser()); if (MemoryPhi *UsrPhi = dyn_cast(Usr)) { BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U); if (!DT.dominates(DominatingBlock, DominatedBlock)) @@ -1052,9 +1138,9 @@ void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB, // Now reinsert it into the IR and do whatever fixups needed. if (auto *MD = dyn_cast(What)) - insertDef(MD); + insertDef(MD, /*RenameUses=*/true); else - insertUse(cast(What)); + insertUse(cast(What), /*RenameUses=*/true); // Clear dangling pointers. We added all MemoryPhi users, but not all // of them are removed by fixupDefs(). @@ -1084,25 +1170,32 @@ void MemorySSAUpdater::moveAllAccesses(BasicBlock *From, BasicBlock *To, if (!Accs) return; + assert(Start->getParent() == To && "Incorrect Start instruction"); MemoryAccess *FirstInNew = nullptr; for (Instruction &I : make_range(Start->getIterator(), To->end())) if ((FirstInNew = MSSA->getMemoryAccess(&I))) break; - if (!FirstInNew) - return; + if (FirstInNew) { + auto *MUD = cast(FirstInNew); + do { + auto NextIt = ++MUD->getIterator(); + MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end()) + ? nullptr + : cast(&*NextIt); + MSSA->moveTo(MUD, To, MemorySSA::End); + // Moving MUD from Accs in the moveTo above, may delete Accs, so we need + // to retrieve it again. + Accs = MSSA->getWritableBlockAccesses(From); + MUD = NextMUD; + } while (MUD); + } - auto *MUD = cast(FirstInNew); - do { - auto NextIt = ++MUD->getIterator(); - MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end()) - ? nullptr - : cast(&*NextIt); - MSSA->moveTo(MUD, To, MemorySSA::End); - // Moving MUD from Accs in the moveTo above, may delete Accs, so we need to - // retrieve it again. - Accs = MSSA->getWritableBlockAccesses(From); - MUD = NextMUD; - } while (MUD); + // If all accesses were moved and only a trivial Phi remains, we try to remove + // that Phi. This is needed when From is going to be deleted. + auto *Defs = MSSA->getWritableBlockDefs(From); + if (Defs && !Defs->empty()) + if (auto *Phi = dyn_cast(&*Defs->begin())) + tryRemoveTrivialPhi(Phi); } void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From, @@ -1118,7 +1211,7 @@ void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From, void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start) { - assert(From->getSinglePredecessor() == To && + assert(From->getUniquePredecessor() == To && "From block is expected to have a single predecessor (To)."); moveAllAccesses(From, To, Start); for (BasicBlock *Succ : successors(From)) @@ -1173,8 +1266,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor( return false; }); Phi->addIncoming(NewPhi, New); - if (onlySingleValue(NewPhi)) - removeMemoryAccess(NewPhi); + tryRemoveTrivialPhi(NewPhi); } } @@ -1239,10 +1331,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) { unsigned PhisSize = PhisToOptimize.size(); while (PhisSize-- > 0) if (MemoryPhi *MP = - cast_or_null(PhisToOptimize.pop_back_val())) { - auto OperRange = MP->operands(); - tryRemoveTrivialPhi(MP, OperRange); - } + cast_or_null(PhisToOptimize.pop_back_val())) + tryRemoveTrivialPhi(MP); } } @@ -1256,8 +1346,7 @@ void MemorySSAUpdater::removeBlocks( if (!DeadBlocks.count(Succ)) if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) { MP->unorderedDeleteIncomingBlock(BB); - if (MP->getNumIncomingValues() == 1) - removeMemoryAccess(MP); + tryRemoveTrivialPhi(MP); } // Drop all references of all accesses in BB if (MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB)) @@ -1281,10 +1370,8 @@ void MemorySSAUpdater::removeBlocks( void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef UpdatedPHIs) { for (auto &VH : UpdatedPHIs) - if (auto *MPhi = cast_or_null(VH)) { - auto OperRange = MPhi->operands(); - tryRemoveTrivialPhi(MPhi, OperRange); - } + if (auto *MPhi = cast_or_null(VH)) + tryRemoveTrivialPhi(MPhi); } void MemorySSAUpdater::changeToUnreachable(const Instruction *I) { diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index e25eb290a665..8232bf07cafc 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -319,7 +319,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, auto *CalledValue = CS.getCalledValue(); auto *CalledFunction = CS.getCalledFunction(); if (CalledValue && !CalledFunction) { - CalledValue = CalledValue->stripPointerCastsNoFollowAliases(); + CalledValue = CalledValue->stripPointerCasts(); // Stripping pointer casts can reveal a called function. CalledFunction = dyn_cast(CalledValue); } @@ -467,7 +467,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. // Don't try to import functions with noinline attribute. F.getAttributes().hasFnAttribute(Attribute::NoInline)}; - auto FuncSummary = llvm::make_unique( + auto FuncSummary = std::make_unique( Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), @@ -598,7 +598,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized); - auto GVarSummary = llvm::make_unique(Flags, VarFlags, + auto GVarSummary = std::make_unique(Flags, VarFlags, RefEdges.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(V.getGUID()); @@ -616,7 +616,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, /* Live = */ false, A.isDSOLocal(), A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr()); - auto AS = llvm::make_unique(Flags); + auto AS = std::make_unique(Flags); auto *Aliasee = A.getBaseObject(); auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID()); assert(AliaseeVI && "Alias expects aliasee summary to be available"); @@ -696,7 +696,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( // Create the appropriate summary type. if (Function *F = dyn_cast(GV)) { std::unique_ptr Summary = - llvm::make_unique( + std::make_unique( GVFlags, /*InstCount=*/0, FunctionSummary::FFlags{ F->hasFnAttribute(Attribute::ReadNone), @@ -714,7 +714,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr Summary = - llvm::make_unique( + std::make_unique( GVFlags, GlobalVarSummary::GVarFlags(false, false), ArrayRef{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); @@ -741,7 +741,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( else if (F.hasProfileData()) { LoopInfo LI{DT}; BranchProbabilityInfo BPI{F, LI}; - BFIPtr = llvm::make_unique(F, BPI, LI); + BFIPtr = std::make_unique(F, BPI, LI); BFI = BFIPtr.get(); } @@ -813,7 +813,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( if (!ModuleSummaryDotFile.empty()) { std::error_code EC; - raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None); if (EC) report_fatal_error(Twine("Failed to open dot file ") + ModuleSummaryDotFile + ": " + EC.message() + "\n"); diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp index b616cd6f762b..44527773115d 100644 --- a/lib/Analysis/MustExecute.cpp +++ b/lib/Analysis/MustExecute.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MustExecute.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -19,8 +21,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; +#define DEBUG_TYPE "must-execute" + const DenseMap & LoopSafetyInfo::getBlockColors() const { return BlockColors; @@ -306,6 +311,17 @@ namespace { } bool runOnFunction(Function &F) override; }; + struct MustBeExecutedContextPrinter : public ModulePass { + static char ID; + + MustBeExecutedContextPrinter() : ModulePass(ID) { + initializeMustBeExecutedContextPrinterPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnModule(Module &M) override; + }; } char MustExecutePrinter::ID = 0; @@ -320,6 +336,36 @@ FunctionPass *llvm::createMustExecutePrinter() { return new MustExecutePrinter(); } +char MustBeExecutedContextPrinter::ID = 0; +INITIALIZE_PASS_BEGIN( + MustBeExecutedContextPrinter, "print-must-be-executed-contexts", + "print the must-be-executed-contexed for all instructions", false, true) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(MustBeExecutedContextPrinter, + "print-must-be-executed-contexts", + "print the must-be-executed-contexed for all instructions", + false, true) + +ModulePass *llvm::createMustBeExecutedContextPrinter() { + return new MustBeExecutedContextPrinter(); +} + +bool MustBeExecutedContextPrinter::runOnModule(Module &M) { + MustBeExecutedContextExplorer Explorer(true); + for (Function &F : M) { + for (Instruction &I : instructions(F)) { + dbgs() << "-- Explore context of: " << I << "\n"; + for (const Instruction *CI : Explorer.range(&I)) + dbgs() << " [F: " << CI->getFunction()->getName() << "] " << *CI + << "\n"; + } + } + + return false; +} + static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) { // TODO: merge these two routines. For the moment, we display the best // result obtained by *either* implementation. This is a bit unfair since no @@ -396,3 +442,75 @@ bool MustExecutePrinter::runOnFunction(Function &F) { return false; } + +const Instruction * +MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction( + MustBeExecutedIterator &It, const Instruction *PP) { + if (!PP) + return PP; + LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP << "\n"); + + // If we explore only inside a given basic block we stop at terminators. + if (!ExploreInterBlock && PP->isTerminator()) { + LLVM_DEBUG(dbgs() << "\tReached terminator in intra-block mode, done\n"); + return nullptr; + } + + // If we do not traverse the call graph we check if we can make progress in + // the current function. First, check if the instruction is guaranteed to + // transfer execution to the successor. + bool TransfersExecution = isGuaranteedToTransferExecutionToSuccessor(PP); + if (!TransfersExecution) + return nullptr; + + // If this is not a terminator we know that there is a single instruction + // after this one that is executed next if control is transfered. If not, + // we can try to go back to a call site we entered earlier. If none exists, we + // do not know any instruction that has to be executd next. + if (!PP->isTerminator()) { + const Instruction *NextPP = PP->getNextNode(); + LLVM_DEBUG(dbgs() << "\tIntermediate instruction does transfer control\n"); + return NextPP; + } + + // Finally, we have to handle terminators, trivial ones first. + assert(PP->isTerminator() && "Expected a terminator!"); + + // A terminator without a successor is not handled yet. + if (PP->getNumSuccessors() == 0) { + LLVM_DEBUG(dbgs() << "\tUnhandled terminator\n"); + return nullptr; + } + + // A terminator with a single successor, we will continue at the beginning of + // that one. + if (PP->getNumSuccessors() == 1) { + LLVM_DEBUG( + dbgs() << "\tUnconditional terminator, continue with successor\n"); + return &PP->getSuccessor(0)->front(); + } + + LLVM_DEBUG(dbgs() << "\tNo join point found\n"); + return nullptr; +} + +MustBeExecutedIterator::MustBeExecutedIterator( + MustBeExecutedContextExplorer &Explorer, const Instruction *I) + : Explorer(Explorer), CurInst(I) { + reset(I); +} + +void MustBeExecutedIterator::reset(const Instruction *I) { + CurInst = I; + Visited.clear(); + Visited.insert(I); +} + +const Instruction *MustBeExecutedIterator::advance() { + assert(CurInst && "Cannot advance an end iterator!"); + const Instruction *Next = + Explorer.getMustBeExecutedNextInstruction(*this, CurInst); + if (Next && !Visited.insert(Next).second) + Next = nullptr; + return Next; +} diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp index 72c40a0be232..07a5619a35b9 100644 --- a/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -39,7 +39,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) BPI.calculate(*F, LI); // Finally compute BFI. - OwnedBFI = llvm::make_unique(*F, BPI, LI); + OwnedBFI = std::make_unique(*F, BPI, LI); BFI = OwnedBFI.get(); } @@ -97,7 +97,7 @@ bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) { else BFI = nullptr; - ORE = llvm::make_unique(&Fn, BFI); + ORE = std::make_unique(&Fn, BFI); return false; } diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp index 458c0a7de6c2..e947e5e388a8 100644 --- a/lib/Analysis/OrderedInstructions.cpp +++ b/lib/Analysis/OrderedInstructions.cpp @@ -21,7 +21,7 @@ bool OrderedInstructions::localDominates(const Instruction *InstA, const BasicBlock *IBB = InstA->getParent(); auto OBB = OBBMap.find(IBB); if (OBB == OBBMap.end()) - OBB = OBBMap.insert({IBB, make_unique(IBB)}).first; + OBB = OBBMap.insert({IBB, std::make_unique(IBB)}).first; return OBB->second->dominates(InstA, InstB); } diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp index dce19d6d546e..b99b75715025 100644 --- a/lib/Analysis/ProfileSummaryInfo.cpp +++ b/lib/Analysis/ProfileSummaryInfo.cpp @@ -45,6 +45,13 @@ static cl::opt ProfileSummaryHugeWorkingSetSizeThreshold( " blocks required to reach the -profile-summary-cutoff-hot" " percentile exceeds this count.")); +static cl::opt ProfileSummaryLargeWorkingSetSizeThreshold( + "profile-summary-large-working-set-size-threshold", cl::Hidden, + cl::init(12500), cl::ZeroOrMore, + cl::desc("The code working set size is considered large if the number of" + " blocks required to reach the -profile-summary-cutoff-hot" + " percentile exceeds this count.")); + // The next two options override the counts derived from summary computation and // are useful for debugging purposes. static cl::opt ProfileSummaryHotCount( @@ -186,6 +193,31 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, return true; } +// Like isFunctionHotInCallGraph but for a given cutoff. +bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { + if (!F || !computeSummary()) + return false; + if (auto FunctionCount = F->getEntryCount()) + if (isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) + return true; + + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa(I) || isa(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) + return true; + } + for (const auto &BB : *F) + if (isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI)) + return true; + return false; +} + /// Returns true if the function's entry is a cold. If it returns false, it /// either means it is not cold or it is unknown whether it is cold or not (for /// example, no profile data is available). @@ -222,6 +254,23 @@ void ProfileSummaryInfo::computeThresholds() { "Cold count threshold cannot exceed hot count threshold!"); HasHugeWorkingSetSize = HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; +} + +Optional ProfileSummaryInfo::computeThreshold(int PercentileCutoff) { + if (!computeSummary()) + return None; + auto iter = ThresholdCache.find(PercentileCutoff); + if (iter != ThresholdCache.end()) { + return iter->second; + } + auto &DetailedSummary = Summary->getDetailedSummary(); + auto &Entry = + getEntryForPercentile(DetailedSummary, PercentileCutoff); + uint64_t CountThreshold = Entry.MinCount; + ThresholdCache[PercentileCutoff] = CountThreshold; + return CountThreshold; } bool ProfileSummaryInfo::hasHugeWorkingSetSize() { @@ -230,6 +279,12 @@ bool ProfileSummaryInfo::hasHugeWorkingSetSize() { return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue(); } +bool ProfileSummaryInfo::hasLargeWorkingSetSize() { + if (!HasLargeWorkingSetSize) + computeThresholds(); + return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue(); +} + bool ProfileSummaryInfo::isHotCount(uint64_t C) { if (!HotCountThreshold) computeThresholds(); @@ -242,6 +297,11 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) { return ColdCountThreshold && C <= ColdCountThreshold.getValue(); } +bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) { + auto CountThreshold = computeThreshold(PercentileCutoff); + return CountThreshold && C >= CountThreshold.getValue(); +} + uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() { if (!HotCountThreshold) computeThresholds(); @@ -265,6 +325,13 @@ bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB, return Count && isColdCount(*Count); } +bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, + BlockFrequencyInfo *BFI) { + auto Count = BFI->getBlockProfileCount(BB); + return Count && isHotCountNthPercentile(PercentileCutoff, *Count); +} + bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI) { auto C = getProfileCount(CS.getInstruction(), BFI); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index bc2cfd6fcc42..5ce0a1adeaa0 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -148,6 +148,7 @@ STATISTIC(NumBruteForceTripCountsComputed, static cl::opt MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, + cl::ZeroOrMore, cl::desc("Maximum number of iterations SCEV will " "symbolically execute a constant " "derived loop"), @@ -157,6 +158,9 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, static cl::opt VerifySCEV( "verify-scev", cl::Hidden, cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); +static cl::opt VerifySCEVStrict( + "verify-scev-strict", cl::Hidden, + cl::desc("Enable stricter verification with -verify-scev is passed")); static cl::opt VerifySCEVMap("verify-scev-maps", cl::Hidden, cl::desc("Verify no dangling value in ScalarEvolution's " @@ -1707,7 +1711,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. @@ -2051,7 +2055,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. @@ -3421,7 +3425,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X } - // It's tempting to want to call getMaxBackedgeTakenCount count here and + // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and // use that information to infer NUW and NSW flags. However, computing a // BE count requires calling getAddRecExpr, so we may not yet have a // meaningful BE count at this point (and if we don't, we'd be stuck @@ -4991,7 +4995,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, // overflow. if (auto *BEInst = dyn_cast(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) - (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags); + (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); return PHISCEV; } @@ -5596,6 +5600,22 @@ ScalarEvolution::getRangeRef(const SCEV *S, ConservativeResult.intersectWith(X, RangeType)); } + if (const SCEVSMinExpr *SMin = dyn_cast(S)) { + ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint); + for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i) + X = X.smin(getRangeRef(SMin->getOperand(i), SignHint)); + return setRange(SMin, SignHint, + ConservativeResult.intersectWith(X, RangeType)); + } + + if (const SCEVUMinExpr *UMin = dyn_cast(S)) { + ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint); + for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i) + X = X.umin(getRangeRef(UMin->getOperand(i), SignHint)); + return setRange(UMin, SignHint, + ConservativeResult.intersectWith(X, RangeType)); + } + if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); @@ -5654,7 +5674,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, // TODO: non-affine addrec if (AddRec->isAffine()) { - const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { auto RangeFromAffine = getRangeForAffineAR( @@ -6523,7 +6543,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { const auto *MaxExitCount = - dyn_cast(getMaxBackedgeTakenCount(L)); + dyn_cast(getConstantMaxBackedgeTakenCount(L)); return getConstantTripCount(MaxExitCount); } @@ -6599,7 +6619,7 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { /// Similar to getBackedgeTakenCount, except return the least SCEV value that is /// known never to be less than the actual backedge taken count. -const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { +const SCEV *ScalarEvolution::getConstantMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).getMax(this); } @@ -9833,6 +9853,10 @@ Optional ScalarEvolution::computeConstantDifference(const SCEV *More, // We avoid subtracting expressions here because this function is usually // fairly deep in the call stack (i.e. is called many times). + // X - X = 0. + if (More == Less) + return APInt(getTypeSizeInBits(More->getType()), 0); + if (isa(Less) && isa(More)) { const auto *LAR = cast(Less); const auto *MAR = cast(More); @@ -10314,10 +10338,43 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, return false; } +static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // zext x u<= sext x, sext x s<= zext x + switch (Pred) { + case ICmpInst::ICMP_SGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLE: { + // If operand >=s 0 then ZExt == SExt. If operand (LHS); + const SCEVZeroExtendExpr *ZExt = dyn_cast(RHS); + if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) + return true; + break; + } + case ICmpInst::ICMP_UGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: { + // If operand >=s 0 then ZExt == SExt. If operand (LHS); + const SCEVSignExtendExpr *SExt = dyn_cast(RHS); + if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) + return true; + break; + } + default: + break; + }; + return false; +} + bool ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { - return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || + return isKnownPredicateExtendIdiom(Pred, LHS, RHS) || + isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || isKnownPredicateViaNoOverflow(Pred, LHS, RHS); @@ -11434,8 +11491,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; - if (!isa(SE->getMaxBackedgeTakenCount(L))) { - OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); + if (!isa(SE->getConstantMaxBackedgeTakenCount(L))) { + OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L); if (SE->isBackedgeTakenCountMaxOrZero(L)) OS << ", actual taken count either this or zero."; } else { @@ -11901,14 +11958,14 @@ void ScalarEvolution::verify() const { SE.getTypeSizeInBits(NewBECount->getType())) CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); - auto *ConstantDelta = - dyn_cast(SE2.getMinusSCEV(CurBECount, NewBECount)); + const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount); - if (ConstantDelta && ConstantDelta->getAPInt() != 0) { - dbgs() << "Trip Count Changed!\n"; + // Unless VerifySCEVStrict is set, we only compare constant deltas. + if ((VerifySCEVStrict || isa(Delta)) && !Delta->isZero()) { + dbgs() << "Trip Count for " << *L << " Changed!\n"; dbgs() << "Old: " << *CurBECount << "\n"; dbgs() << "New: " << *NewBECount << "\n"; - dbgs() << "Delta: " << *ConstantDelta << "\n"; + dbgs() << "Delta: " << *Delta << "\n"; std::abort(); } } @@ -11959,7 +12016,7 @@ ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { SE.reset(new ScalarEvolution( - F, getAnalysis().getTLI(), + F, getAnalysis().getTLI(F), getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), getAnalysis().getLoopInfo())); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index e8a95d35482c..bceec921188e 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -240,9 +240,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// division. If so, update S with Factor divided out and return true. /// S need not be evenly divisible if a reasonable remainder can be /// computed. -/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made -/// unnecessary; in its place, just signed-divide Ops[i] by the scale and -/// check to see if the divide was folded. static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, const SCEV *Factor, ScalarEvolution &SE, const DataLayout &DL) { @@ -1486,7 +1483,18 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { } Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { - if (!CanonicalMode) return expandAddRecExprLiterally(S); + // In canonical mode we compute the addrec as an expression of a canonical IV + // using evaluateAtIteration and expand the resulting SCEV expression. This + // way we avoid introducing new IVs to carry on the comutation of the addrec + // throughout the loop. + // + // For nested addrecs evaluateAtIteration might need a canonical IV of a + // type wider than the addrec itself. Emitting a canonical IV of the + // proper type might produce non-legal types, for example expanding an i64 + // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall + // back to non-canonical mode for nested addrecs. + if (!CanonicalMode || (S->getNumOperands() > 2)) + return expandAddRecExprLiterally(S); Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); @@ -2094,11 +2102,10 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, for (BasicBlock *BB : ExitingBlocks) { ICmpInst::Predicate Pred; Instruction *LHS, *RHS; - BasicBlock *TrueBB, *FalseBB; if (!match(BB->getTerminator(), m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), - TrueBB, FalseBB))) + m_BasicBlock(), m_BasicBlock()))) continue; if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp index 4cf235db86eb..1b3638698950 100644 --- a/lib/Analysis/StackSafetyAnalysis.cpp +++ b/lib/Analysis/StackSafetyAnalysis.cpp @@ -333,8 +333,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { // FIXME: consult devirt? // Do not follow aliases, otherwise we could inadvertently follow // dso_preemptable aliases or aliases with interposable linkage. - const GlobalValue *Callee = dyn_cast( - CS.getCalledValue()->stripPointerCastsNoFollowAliases()); + const GlobalValue *Callee = + dyn_cast(CS.getCalledValue()->stripPointerCasts()); if (!Callee) { US.updateRange(UnknownRange); return false; diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp index 3cf248a31142..8447dc87069d 100644 --- a/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/lib/Analysis/SyncDependenceAnalysis.cpp @@ -218,9 +218,11 @@ struct DivergencePropagator { template std::unique_ptr computeJoinPoints(const BasicBlock &RootBlock, - SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) { + SuccessorIterable NodeSuccessors, const Loop *ParentLoop) { assert(JoinBlocks); + LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "") << "\n" ); + // bootstrap with branch targets for (const auto *SuccBlock : NodeSuccessors) { DefMap.emplace(SuccBlock, SuccBlock); @@ -228,13 +230,19 @@ struct DivergencePropagator { if (ParentLoop && !ParentLoop->contains(SuccBlock)) { // immediate loop exit from node. ReachedLoopExits.insert(SuccBlock); - continue; } else { // regular successor PendingUpdates.insert(SuccBlock); } } + LLVM_DEBUG( + dbgs() << "SDA: rpo order:\n"; + for (const auto * RpoBlock : FuncRPOT) { + dbgs() << "- " << RpoBlock->getName() << "\n"; + } + ); + auto ItBeginRPO = FuncRPOT.begin(); // skip until term (TODO RPOT won't let us start at @term directly) @@ -245,16 +253,18 @@ struct DivergencePropagator { // propagate definitions at the immediate successors of the node in RPO auto ItBlockRPO = ItBeginRPO; - while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) { + while ((++ItBlockRPO != ItEndRPO) && + !PendingUpdates.empty()) { const auto *Block = *ItBlockRPO; + LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n"); - // skip @block if not pending update + // skip Block if not pending update auto ItPending = PendingUpdates.find(Block); if (ItPending == PendingUpdates.end()) continue; PendingUpdates.erase(ItPending); - // propagate definition at @block to its successors + // propagate definition at Block to its successors auto ItDef = DefMap.find(Block); const auto *DefBlock = ItDef->second; assert(DefBlock); @@ -278,6 +288,8 @@ struct DivergencePropagator { } } + LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs())); + // We need to know the definition at the parent loop header to decide // whether the definition at the header is different from the definition at // the loop exits, which would indicate a divergent loop exits. @@ -292,24 +304,17 @@ struct DivergencePropagator { // | // proper exit from both loops // - // D post-dominates B as it is the only proper exit from the "A loop". - // If C has a divergent branch, propagation will therefore stop at D. - // That implies that B will never receive a definition. - // But that definition can only be the same as at D (D itself in thise case) - // because all paths to anywhere have to pass through D. - // - const BasicBlock *ParentLoopHeader = - ParentLoop ? ParentLoop->getHeader() : nullptr; - if (ParentLoop && ParentLoop->contains(PdBoundBlock)) { - DefMap[ParentLoopHeader] = DefMap[PdBoundBlock]; - } - // analyze reached loop exits if (!ReachedLoopExits.empty()) { + const BasicBlock *ParentLoopHeader = + ParentLoop ? ParentLoop->getHeader() : nullptr; + assert(ParentLoop); - const auto *HeaderDefBlock = DefMap[ParentLoopHeader]; + auto ItHeaderDef = DefMap.find(ParentLoopHeader); + const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second; + LLVM_DEBUG(printDefs(dbgs())); - assert(HeaderDefBlock && "no definition in header of carrying loop"); + assert(HeaderDefBlock && "no definition at header of carrying loop"); for (const auto *ExitBlock : ReachedLoopExits) { auto ItExitDef = DefMap.find(ExitBlock); @@ -339,19 +344,10 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) { return *ItCached->second; } - // dont propagte beyond the immediate post dom of the loop - const auto *PdNode = PDT.getNode(const_cast(Loop.getHeader())); - const auto *IpdNode = PdNode->getIDom(); - const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - while (PdBoundBlock && Loop.contains(PdBoundBlock)) { - IpdNode = IpdNode->getIDom(); - PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - } - // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; auto JoinBlocks = Propagator.computeJoinPoints( - *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock); + *Loop.getHeader(), LoopExits, Loop.getParentLoop()); auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks)); assert(ItInserted.second); @@ -370,16 +366,11 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) { if (ItCached != CachedBranchJoins.end()) return *ItCached->second; - // dont propagate beyond the immediate post dominator of the branch - const auto *PdNode = PDT.getNode(const_cast(Term.getParent())); - const auto *IpdNode = PdNode->getIDom(); - const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; const auto &TermBlock = *Term.getParent(); auto JoinBlocks = Propagator.computeJoinPoints( - TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock); + TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock)); auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks)); assert(ItInserted.second); diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index ef139d3257d2..230969698054 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -28,7 +28,8 @@ static cl::opt ClVectorLibrary( clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", "Intel SVML library"))); -StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { +StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = + { #define TLI_DEFINE_STRING #include "llvm/Analysis/TargetLibraryInfo.def" }; @@ -58,14 +59,14 @@ static bool hasBcmp(const Triple &TT) { return TT.isGNUEnvironment() || TT.isMusl(); // Both NetBSD and OpenBSD are planning to remove the function. Windows does // not have it. - return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin(); + return TT.isOSFreeBSD() || TT.isOSSolaris(); } /// Initialize the set of available library functions based on the specified /// target triple. This should be carefully written so that a missing target /// triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - ArrayRef StandardNames) { + ArrayRef StandardNames) { // Verify that the StandardNames array is in alphabetical order. assert(std::is_sorted(StandardNames.begin(), StandardNames.end(), [](StringRef LHS, StringRef RHS) { @@ -104,19 +105,10 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setShouldSignExtI32Param(ShouldSignExtI32Param); if (T.getArch() == Triple::r600 || - T.getArch() == Triple::amdgcn) { - TLI.setUnavailable(LibFunc_ldexp); - TLI.setUnavailable(LibFunc_ldexpf); - TLI.setUnavailable(LibFunc_ldexpl); - TLI.setUnavailable(LibFunc_exp10); - TLI.setUnavailable(LibFunc_exp10f); - TLI.setUnavailable(LibFunc_exp10l); - TLI.setUnavailable(LibFunc_log10); - TLI.setUnavailable(LibFunc_log10f); - TLI.setUnavailable(LibFunc_log10l); - } + T.getArch() == Triple::amdgcn) + TLI.disableAllFunctions(); - // There are no library implementations of mempcy and memset for AMD gpus and + // There are no library implementations of memcpy and memset for AMD gpus and // these can be difficult to lower in the backend. if (T.getArch() == Triple::r600 || T.getArch() == Triple::amdgcn) { @@ -623,19 +615,14 @@ static StringRef sanitizeFunctionName(StringRef funcName) { return GlobalValue::dropLLVMManglingEscape(funcName); } -bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, - LibFunc &F) const { - StringRef const *Start = &StandardNames[0]; - StringRef const *End = &StandardNames[NumLibFuncs]; - +bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const { funcName = sanitizeFunctionName(funcName); if (funcName.empty()) return false; - StringRef const *I = std::lower_bound( - Start, End, funcName, [](StringRef LHS, StringRef RHS) { - return LHS < RHS; - }); + const auto *Start = std::begin(StandardNames); + const auto *End = std::end(StandardNames); + const auto *I = std::lower_bound(Start, End, funcName); if (I != End && *I == funcName) { F = (LibFunc)(I - Start); return true; @@ -1481,6 +1468,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return false; } case LibFunc::NumLibFuncs: + case LibFunc::NotLibFunc: break; } @@ -1599,14 +1587,6 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F, return I->ScalarFnName; } -TargetLibraryInfo TargetLibraryAnalysis::run(Module &M, - ModuleAnalysisManager &) { - if (PresetInfoImpl) - return TargetLibraryInfo(*PresetInfoImpl); - - return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple()))); -} - TargetLibraryInfo TargetLibraryAnalysis::run(Function &F, FunctionAnalysisManager &) { if (PresetInfoImpl) diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index eb04c34453fb..c9c294873ea6 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -59,11 +60,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), - IE = ExitingBlocks.end(); - I != IE; ++I) { - BasicBlock *BB = *I; - + for (BasicBlock *BB : ExitingBlocks) { // If we pass the updated counter back through a phi, we need to know // which latch the updated value will be coming from. if (!L->isLoopLatch(BB)) { @@ -97,13 +94,11 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; - for (pred_iterator PI = pred_begin(L->getHeader()), - PIE = pred_end(L->getHeader()); - PI != PIE; ++PI) { - if (!L->contains(*PI)) + for (BasicBlock *Pred : predecessors(L->getHeader())) { + if (!L->contains(Pred)) continue; - if (!DT.dominates(*I, *PI)) { + if (!DT.dominates(BB, Pred)) { NotAlways = true; break; } @@ -127,7 +122,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // Note that this block may not be the loop latch block, even if the loop // has a latch block. - ExitBlock = *I; + ExitBlock = BB; ExitCount = EC; break; } @@ -227,6 +222,16 @@ unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } +bool TargetTransformInfo::collectFlatAddressOperands( + SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { + return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); +} + +bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { + return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } @@ -283,21 +288,22 @@ bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { return TTIImpl->shouldFavorBackedgeIndex(L); } -bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { - return TTIImpl->isLegalMaskedStore(DataType); +bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedStore(DataType, Alignment); } -bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { - return TTIImpl->isLegalMaskedLoad(DataType); +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedLoad(DataType, Alignment); } bool TargetTransformInfo::isLegalNTStore(Type *DataType, - unsigned Alignment) const { + Align Alignment) const { return TTIImpl->isLegalNTStore(DataType, Alignment); } -bool TargetTransformInfo::isLegalNTLoad(Type *DataType, - unsigned Alignment) const { +bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const { return TTIImpl->isLegalNTLoad(DataType, Alignment); } @@ -359,14 +365,6 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } -unsigned TargetTransformInfo::getJumpBufAlignment() const { - return TTIImpl->getJumpBufAlignment(); -} - -unsigned TargetTransformInfo::getJumpBufSize() const { - return TTIImpl->getJumpBufSize(); -} - bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } @@ -470,8 +468,16 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return Cost; } -unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { - return TTIImpl->getNumberOfRegisters(Vector); +unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { + return TTIImpl->getNumberOfRegisters(ClassID); +} + +unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const { + return TTIImpl->getRegisterClassForType(Vector, Ty); +} + +const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { + return TTIImpl->getRegisterClassName(ClassID); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { @@ -1276,6 +1282,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { return getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } + case Instruction::ExtractValue: + return 0; // Model all ExtractValue nodes as free. case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast(I); Type *Ty = Shuffle->getType(); diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp index 9311dfbc6eba..072d291f3f93 100644 --- a/lib/Analysis/TypeMetadataUtils.cpp +++ b/lib/Analysis/TypeMetadataUtils.cpp @@ -127,3 +127,35 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad( findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr, Offset->getZExtValue(), CI, DT); } + +Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) { + if (I->getType()->isPointerTy()) { + if (Offset == 0) + return I; + return nullptr; + } + + const DataLayout &DL = M.getDataLayout(); + + if (auto *C = dyn_cast(I)) { + const StructLayout *SL = DL.getStructLayout(C->getType()); + if (Offset >= SL->getSizeInBytes()) + return nullptr; + + unsigned Op = SL->getElementContainingOffset(Offset); + return getPointerAtOffset(cast(I->getOperand(Op)), + Offset - SL->getElementOffset(Op), M); + } + if (auto *C = dyn_cast(I)) { + ArrayType *VTableTy = C->getType(); + uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); + + unsigned Op = Offset / ElemSize; + if (Op >= C->getNumOperands()) + return nullptr; + + return getPointerAtOffset(cast(I->getOperand(Op)), + Offset % ElemSize, M); + } + return nullptr; +} diff --git a/lib/Analysis/VFABIDemangling.cpp b/lib/Analysis/VFABIDemangling.cpp new file mode 100644 index 000000000000..6fd8ae63f5f0 --- /dev/null +++ b/lib/Analysis/VFABIDemangling.cpp @@ -0,0 +1,418 @@ +//===- VFABIDemangling.cpp - Vector Function ABI demangling utilities. ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/VectorUtils.h" + +using namespace llvm; + +namespace { +/// Utilities for the Vector Function ABI name parser. + +/// Return types for the parser functions. +enum class ParseRet { + OK, // Found. + None, // Not found. + Error // Syntax error. +}; + +/// Extracts the `` information from the mangled string, and +/// sets the `ISA` accordingly. +ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) { + if (MangledName.empty()) + return ParseRet::Error; + + ISA = StringSwitch(MangledName.take_front(1)) + .Case("n", VFISAKind::AdvancedSIMD) + .Case("s", VFISAKind::SVE) + .Case("b", VFISAKind::SSE) + .Case("c", VFISAKind::AVX) + .Case("d", VFISAKind::AVX2) + .Case("e", VFISAKind::AVX512) + .Default(VFISAKind::Unknown); + + MangledName = MangledName.drop_front(1); + + return ParseRet::OK; +} + +/// Extracts the `` information from the mangled string, and +/// sets `IsMasked` accordingly. The input string `MangledName` is +/// left unmodified. +ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) { + if (MangledName.consume_front("M")) { + IsMasked = true; + return ParseRet::OK; + } + + if (MangledName.consume_front("N")) { + IsMasked = false; + return ParseRet::OK; + } + + return ParseRet::Error; +} + +/// Extract the `` information from the mangled string, and +/// sets `VF` accordingly. A ` == "x"` token is interpreted as a scalable +/// vector length. On success, the `` token is removed from +/// the input string `ParseString`. +/// +ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) { + if (ParseString.consume_front("x")) { + VF = 0; + IsScalable = true; + return ParseRet::OK; + } + + if (ParseString.consumeInteger(10, VF)) + return ParseRet::Error; + + IsScalable = false; + return ParseRet::OK; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `Pos` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +/// +/// The function expects to be one of "ls", "Rs", "Us" or +/// "Ls". +ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString, + VFParamKind &PKind, int &Pos, + const StringRef Token) { + if (ParseString.consume_front(Token)) { + PKind = VFABI::getVFParamKindFromString(Token); + if (ParseString.consumeInteger(10, Pos)) + return ParseRet::Error; + return ParseRet::OK; + } + + return ParseRet::None; +} + +/// The function looks for the following stringt at the beginning of +/// the input string `ParseString`: +/// +/// +/// +/// is one of "ls", "Rs", "Us" or "Ls". +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString, + VFParamKind &PKind, int &StepOrPos) { + ParseRet Ret; + + // "ls" + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "ls"); + if (Ret != ParseRet::None) + return Ret; + + // "Rs" + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Rs"); + if (Ret != ParseRet::None) + return Ret; + + // "Ls" + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Ls"); + if (Ret != ParseRet::None) + return Ret; + + // "Us" + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Us"); + if (Ret != ParseRet::None) + return Ret; + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// {"n"} +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `LinearStep` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +/// +/// The function expects to be one of "l", "R", "U" or +/// "L". +ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString, + VFParamKind &PKind, int &LinearStep, + const StringRef Token) { + if (ParseString.consume_front(Token)) { + PKind = VFABI::getVFParamKindFromString(Token); + const bool Negate = ParseString.consume_front("n"); + if (ParseString.consumeInteger(10, LinearStep)) + LinearStep = 1; + if (Negate) + LinearStep *= -1; + return ParseRet::OK; + } + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// ["l" | "R" | "U" | "L"] {"n"} +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `LinearStep` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString, + VFParamKind &PKind, int &StepOrPos) { + // "l" {"n"} + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "l") == + ParseRet::OK) + return ParseRet::OK; + + // "R" {"n"} + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "R") == + ParseRet::OK) + return ParseRet::OK; + + // "L" {"n"} + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "L") == + ParseRet::OK) + return ParseRet::OK; + + // "U" {"n"} + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "U") == + ParseRet::OK) + return ParseRet::OK; + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// "u" +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `Pos` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseUniform(StringRef &ParseString, VFParamKind &PKind, int &Pos) { + // "u" + const char *UniformToken = "u"; + if (ParseString.consume_front(UniformToken)) { + PKind = VFABI::getVFParamKindFromString(UniformToken); + if (ParseString.consumeInteger(10, Pos)) + return ParseRet::Error; + + return ParseRet::OK; + } + return ParseRet::None; +} + +/// Looks into the part of the mangled name in search +/// for valid paramaters at the beginning of the string +/// `ParseString`. +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` +/// accordingly, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind, + int &StepOrPos) { + if (ParseString.consume_front("v")) { + PKind = VFParamKind::Vector; + StepOrPos = 0; + return ParseRet::OK; + } + + const ParseRet HasLinearRuntime = + tryParseLinearWithRuntimeStep(ParseString, PKind, StepOrPos); + if (HasLinearRuntime != ParseRet::None) + return HasLinearRuntime; + + const ParseRet HasLinearCompileTime = + tryParseLinearWithCompileTimeStep(ParseString, PKind, StepOrPos); + if (HasLinearCompileTime != ParseRet::None) + return HasLinearCompileTime; + + const ParseRet HasUniform = tryParseUniform(ParseString, PKind, StepOrPos); + if (HasUniform != ParseRet::None) + return HasUniform; + + return ParseRet::None; +} + +/// Looks into the part of the mangled name in search +/// of a valid 'aligned' clause. The function should be invoked +/// after parsing a parameter via `tryParseParameter`. +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` +/// accordingly, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) { + uint64_t Val; + // "a" + if (ParseString.consume_front("a")) { + if (ParseString.consumeInteger(10, Val)) + return ParseRet::Error; + + if (!isPowerOf2_64(Val)) + return ParseRet::Error; + + Alignment = Align(Val); + + return ParseRet::OK; + } + + return ParseRet::None; +} +} // namespace + +// Format of the ABI name: +// _ZGV_[()] +Optional VFABI::tryDemangleForVFABI(StringRef MangledName) { + // Assume there is no custom name , and therefore the + // vector name consists of + // _ZGV_. + StringRef VectorName = MangledName; + + // Parse the fixed size part of the manled name + if (!MangledName.consume_front("_ZGV")) + return None; + + // Extract ISA. An unknow ISA is also supported, so we accept all + // values. + VFISAKind ISA; + if (tryParseISA(MangledName, ISA) != ParseRet::OK) + return None; + + // Extract . + bool IsMasked; + if (tryParseMask(MangledName, IsMasked) != ParseRet::OK) + return None; + + // Parse the variable size, starting from . + unsigned VF; + bool IsScalable; + if (tryParseVLEN(MangledName, VF, IsScalable) != ParseRet::OK) + return None; + + // Parse the . + ParseRet ParamFound; + SmallVector Parameters; + do { + const unsigned ParameterPos = Parameters.size(); + VFParamKind PKind; + int StepOrPos; + ParamFound = tryParseParameter(MangledName, PKind, StepOrPos); + + // Bail off if there is a parsing error in the parsing of the parameter. + if (ParamFound == ParseRet::Error) + return None; + + if (ParamFound == ParseRet::OK) { + Align Alignment; + // Look for the alignment token "a ". + const ParseRet AlignFound = tryParseAlign(MangledName, Alignment); + // Bail off if there is a syntax error in the align token. + if (AlignFound == ParseRet::Error) + return None; + + // Add the parameter. + Parameters.push_back({ParameterPos, PKind, StepOrPos, Alignment}); + } + } while (ParamFound == ParseRet::OK); + + // A valid MangledName mus have at least one valid entry in the + // . + if (Parameters.empty()) + return None; + + // Check for the and the optional , which + // are separated from the prefix with "_" + if (!MangledName.consume_front("_")) + return None; + + // The rest of the string must be in the format: + // [()] + const StringRef ScalarName = + MangledName.take_while([](char In) { return In != '('; }); + + if (ScalarName.empty()) + return None; + + // Reduce MangledName to [()]. + MangledName = MangledName.ltrim(ScalarName); + // Find the optional custom name redirection. + if (MangledName.consume_front("(")) { + if (!MangledName.consume_back(")")) + return None; + // Update the vector variant with the one specified by the user. + VectorName = MangledName; + // If the vector name is missing, bail out. + if (VectorName.empty()) + return None; + } + + // When is "M", we need to add a parameter that is used as + // global predicate for the function. + if (IsMasked) { + const unsigned Pos = Parameters.size(); + Parameters.push_back({Pos, VFParamKind::GlobalPredicate}); + } + + // Asserts for parameters of type `VFParamKind::GlobalPredicate`, as + // prescribed by the Vector Function ABI specifications supported by + // this parser: + // 1. Uniqueness. + // 2. Must be the last in the parameter list. + const auto NGlobalPreds = std::count_if( + Parameters.begin(), Parameters.end(), [](const VFParameter PK) { + return PK.ParamKind == VFParamKind::GlobalPredicate; + }); + assert(NGlobalPreds < 2 && "Cannot have more than one global predicate."); + if (NGlobalPreds) + assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate && + "The global predicate must be the last parameter"); + + const VFShape Shape({VF, IsScalable, ISA, Parameters}); + return VFInfo({Shape, ScalarName, VectorName}); +} + +VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) { + const VFParamKind ParamKind = StringSwitch(Token) + .Case("v", VFParamKind::Vector) + .Case("l", VFParamKind::OMP_Linear) + .Case("R", VFParamKind::OMP_LinearRef) + .Case("L", VFParamKind::OMP_LinearVal) + .Case("U", VFParamKind::OMP_LinearUVal) + .Case("ls", VFParamKind::OMP_LinearPos) + .Case("Ls", VFParamKind::OMP_LinearValPos) + .Case("Rs", VFParamKind::OMP_LinearRefPos) + .Case("Us", VFParamKind::OMP_LinearUValPos) + .Case("u", VFParamKind::OMP_Uniform) + .Default(VFParamKind::Unknown); + + if (ParamKind != VFParamKind::Unknown) + return ParamKind; + + // This function should never be invoked with an invalid input. + llvm_unreachable("This fuction should be invoken only on parameters" + " that have a textual representation in the mangled name" + " of the Vector Function ABI"); +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index c70906dcc629..bbf389991836 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -558,12 +558,18 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return true; } + // Don't let an assume affect itself - this would cause the problems + // `isEphemeralValueOf` is trying to prevent, and it would also make + // the loop below go out of bounds. + if (Inv == CxtI) + return false; + // The context comes first, but they're both in the same block. Make sure // there is nothing in between that might interrupt the control flow. for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) return false; return !isEphemeralValueOf(Inv, CxtI); @@ -1049,7 +1055,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, break; } case Instruction::Select: { - const Value *LHS, *RHS; + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { computeKnownBits(RHS, Known, Depth + 1, Q); @@ -1095,7 +1101,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // RHS from matchSelectPattern returns the negation part of abs pattern. // If the negate has an NSW flag we can assume the sign bit of the result // will be 0 because that makes abs(INT_MIN) undefined. - if (Q.IIQ.hasNoSignedWrap(cast(RHS))) + if (match(RHS, m_Neg(m_Specific(LHS))) && + Q.IIQ.hasNoSignedWrap(cast(RHS))) MaxHighZeros = 1; } @@ -1366,7 +1373,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, else if (LR == I) L = LL; else - break; + continue; // Check for recurrence with L and R flipped. // Ok, we have a PHI of the form L op= R. Check for low // zero bits. computeKnownBits(R, Known2, Depth + 1, Q); @@ -1714,9 +1721,9 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, // Aligned pointers have trailing zeros - refine Known.Zero set if (V->getType()->isPointerTy()) { - unsigned Align = V->getPointerAlignment(Q.DL); + const MaybeAlign Align = V->getPointerAlignment(Q.DL); if (Align) - Known.Zero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align->value())); } // computeKnownBitsFromAssume strictly refines Known. @@ -2066,7 +2073,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (const auto *Call = dyn_cast(V)) { if (Call->isReturnNonNull()) return true; - if (const auto *RP = getArgumentAliasingToReturnedPointer(Call)) + if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) return isKnownNonZero(RP, Depth, Q); } } @@ -2300,7 +2307,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, cast(Select)->getOpcode() == Instruction::Select && "Input should be a Select!"); - const Value *LHS, *RHS, *LHS2, *RHS2; + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; if (SPF != SPF_SMAX && SPF != SPF_SMIN) return false; @@ -2308,6 +2315,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, if (!match(RHS, m_APInt(CLow))) return false; + const Value *LHS2 = nullptr, *RHS2 = nullptr; SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; if (getInverseMinMaxFlavor(SPF) != SPF2) return false; @@ -2384,253 +2392,256 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, if (Depth == MaxDepth) return 1; // Limit search depth. - const Operator *U = dyn_cast(V); - switch (Operator::getOpcode(V)) { - default: break; - case Instruction::SExt: - Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); - return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; + if (auto *U = dyn_cast(V)) { + switch (Operator::getOpcode(V)) { + default: break; + case Instruction::SExt: + Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); + return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; - case Instruction::SDiv: { - const APInt *Denominator; - // sdiv X, C -> adds log(C) sign bits. - if (match(U->getOperand(1), m_APInt(Denominator))) { + case Instruction::SDiv: { + const APInt *Denominator; + // sdiv X, C -> adds log(C) sign bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { - // Ignore non-positive denominator. - if (!Denominator->isStrictlyPositive()) - break; + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; - // Calculate the incoming numerator bits. - unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + // Calculate the incoming numerator bits. + unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - // Add floor(log(C)) bits to the numerator bits. - return std::min(TyBits, NumBits + Denominator->logBase2()); + // Add floor(log(C)) bits to the numerator bits. + return std::min(TyBits, NumBits + Denominator->logBase2()); + } + break; } - break; - } - - case Instruction::SRem: { - const APInt *Denominator; - // srem X, C -> we know that the result is within [-C+1,C) when C is a - // positive constant. This let us put a lower bound on the number of sign - // bits. - if (match(U->getOperand(1), m_APInt(Denominator))) { - - // Ignore non-positive denominator. - if (!Denominator->isStrictlyPositive()) - break; - // Calculate the incoming numerator bits. SRem by a positive constant - // can't lower the number of sign bits. - unsigned NumrBits = - ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + case Instruction::SRem: { + const APInt *Denominator; + // srem X, C -> we know that the result is within [-C+1,C) when C is a + // positive constant. This let us put a lower bound on the number of sign + // bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { - // Calculate the leading sign bit constraints by examining the - // denominator. Given that the denominator is positive, there are two - // cases: - // - // 1. the numerator is positive. The result range is [0,C) and [0,C) u< - // (1 << ceilLogBase2(C)). - // - // 2. the numerator is negative. Then the result range is (-C,0] and - // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). - // - // Thus a lower bound on the number of sign bits is `TyBits - - // ceilLogBase2(C)`. + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; - unsigned ResBits = TyBits - Denominator->ceilLogBase2(); - return std::max(NumrBits, ResBits); + // Calculate the incoming numerator bits. SRem by a positive constant + // can't lower the number of sign bits. + unsigned NumrBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + + // Calculate the leading sign bit constraints by examining the + // denominator. Given that the denominator is positive, there are two + // cases: + // + // 1. the numerator is positive. The result range is [0,C) and [0,C) u< + // (1 << ceilLogBase2(C)). + // + // 2. the numerator is negative. Then the result range is (-C,0] and + // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). + // + // Thus a lower bound on the number of sign bits is `TyBits - + // ceilLogBase2(C)`. + + unsigned ResBits = TyBits - Denominator->ceilLogBase2(); + return std::max(NumrBits, ResBits); + } + break; } - break; - } - case Instruction::AShr: { - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - // ashr X, C -> adds C sign bits. Vectors too. - const APInt *ShAmt; - if (match(U->getOperand(1), m_APInt(ShAmt))) { - if (ShAmt->uge(TyBits)) - break; // Bad shift. - unsigned ShAmtLimited = ShAmt->getZExtValue(); - Tmp += ShAmtLimited; - if (Tmp > TyBits) Tmp = TyBits; - } - return Tmp; - } - case Instruction::Shl: { - const APInt *ShAmt; - if (match(U->getOperand(1), m_APInt(ShAmt))) { - // shl destroys sign bits. + case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (ShAmt->uge(TyBits) || // Bad shift. - ShAmt->uge(Tmp)) break; // Shifted all sign bits out. - Tmp2 = ShAmt->getZExtValue(); - return Tmp - Tmp2; + // ashr X, C -> adds C sign bits. Vectors too. + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + if (ShAmt->uge(TyBits)) + break; // Bad shift. + unsigned ShAmtLimited = ShAmt->getZExtValue(); + Tmp += ShAmtLimited; + if (Tmp > TyBits) Tmp = TyBits; + } + return Tmp; } - break; - } - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: // NOT is handled here. - // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - FirstAnswer = std::min(Tmp, Tmp2); - // We computed what we know about the sign bits as our first - // answer. Now proceed to the generic code that uses - // computeKnownBits, and pick whichever answer is better. + case Instruction::Shl: { + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (ShAmt->uge(TyBits) || // Bad shift. + ShAmt->uge(Tmp)) break; // Shifted all sign bits out. + Tmp2 = ShAmt->getZExtValue(); + return Tmp - Tmp2; + } + break; } - break; - - case Instruction::Select: { - // If we have a clamp pattern, we know that the number of sign bits will be - // the minimum of the clamp min/max range. - const Value *X; - const APInt *CLow, *CHigh; - if (isSignedMinMaxClamp(U, X, CLow, CHigh)) - return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); - - Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp == 1) break; - Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); - return std::min(Tmp, Tmp2); - } - - case Instruction::Add: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) break; - - // Special case decrementing a value (ADD X, -1): - if (const auto *CRHS = dyn_cast(U->getOperand(1))) - if (CRHS->isAllOnesValue()) { - KnownBits Known(TyBits); - computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); - - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) - return TyBits; - - // If we are subtracting one from a positive number, there is no carry - // out of the result. - if (Known.isNonNegative()) - return Tmp; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // computeKnownBits, and pick whichever answer is better. } + break; - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp2 == 1) break; - return std::min(Tmp, Tmp2)-1; + case Instruction::Select: { + // If we have a clamp pattern, we know that the number of sign bits will + // be the minimum of the clamp min/max range. + const Value *X; + const APInt *CLow, *CHigh; + if (isSignedMinMaxClamp(U, X, CLow, CHigh)) + return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); + + Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp == 1) break; + Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); + return std::min(Tmp, Tmp2); + } - case Instruction::Sub: - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp2 == 1) break; - - // Handle NEG. - if (const auto *CLHS = dyn_cast(U->getOperand(0))) - if (CLHS->isNullValue()) { - KnownBits Known(TyBits); - computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) - return TyBits; - - // If the input is known to be positive (the sign bit is known clear), - // the output of the NEG has the same number of sign bits as the input. - if (Known.isNonNegative()) - return Tmp2; - - // Otherwise, we treat this like a SUB. - } + case Instruction::Add: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) break; + + // Special case decrementing a value (ADD X, -1): + if (const auto *CRHS = dyn_cast(U->getOperand(1))) + if (CRHS->isAllOnesValue()) { + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); + + // If the input is known to be 0 or 1, the output is 0/-1, which is + // all sign bits set. + if ((Known.Zero | 1).isAllOnesValue()) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (Known.isNonNegative()) + return Tmp; + } - // Sub can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) break; - return std::min(Tmp, Tmp2)-1; + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp2 == 1) break; + return std::min(Tmp, Tmp2) - 1; - case Instruction::Mul: { - // The output of the Mul can be at most twice the valid bits in the inputs. - unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (SignBitsOp0 == 1) break; - unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (SignBitsOp1 == 1) break; - unsigned OutValidBits = - (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); - return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; - } + case Instruction::Sub: + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp2 == 1) break; + + // Handle NEG. + if (const auto *CLHS = dyn_cast(U->getOperand(0))) + if (CLHS->isNullValue()) { + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); + // If the input is known to be 0 or 1, the output is 0/-1, which is + // all sign bits set. + if ((Known.Zero | 1).isAllOnesValue()) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the + // input. + if (Known.isNonNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } - case Instruction::PHI: { - const PHINode *PN = cast(U); - unsigned NumIncomingValues = PN->getNumIncomingValues(); - // Don't analyze large in-degree PHIs. - if (NumIncomingValues > 4) break; - // Unreachable blocks may have zero-operand PHI nodes. - if (NumIncomingValues == 0) break; - - // Take the minimum of all incoming values. This can't infinitely loop - // because of our depth threshold. - Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q); - for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { - if (Tmp == 1) return Tmp; - Tmp = std::min( - Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q)); + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) break; + return std::min(Tmp, Tmp2) - 1; + + case Instruction::Mul: { + // The output of the Mul can be at most twice the valid bits in the + // inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (SignBitsOp0 == 1) break; + unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (SignBitsOp1 == 1) break; + unsigned OutValidBits = + (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); + return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; } - return Tmp; - } - case Instruction::Trunc: - // FIXME: it's tricky to do anything useful for this, but it is an important - // case for targets like X86. - break; + case Instruction::PHI: { + const PHINode *PN = cast(U); + unsigned NumIncomingValues = PN->getNumIncomingValues(); + // Don't analyze large in-degree PHIs. + if (NumIncomingValues > 4) break; + // Unreachable blocks may have zero-operand PHI nodes. + if (NumIncomingValues == 0) break; + + // Take the minimum of all incoming values. This can't infinitely loop + // because of our depth threshold. + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q); + for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { + if (Tmp == 1) return Tmp; + Tmp = std::min( + Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q)); + } + return Tmp; + } - case Instruction::ExtractElement: - // Look through extract element. At the moment we keep this simple and skip - // tracking the specific element. But at least we might find information - // valid for all elements of the vector (for example if vector is sign - // extended, shifted, etc). - return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - - case Instruction::ShuffleVector: { - // TODO: This is copied almost directly from the SelectionDAG version of - // ComputeNumSignBits. It would be better if we could share common - // code. If not, make sure that changes are translated to the DAG. - - // Collect the minimum number of sign bits that are shared by every vector - // element referenced by the shuffle. - auto *Shuf = cast(U); - int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements(); - int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements(); - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - for (int i = 0; i != NumMaskElts; ++i) { - int M = Shuf->getMaskValue(i); - assert(M < NumElts * 2 && "Invalid shuffle mask constant"); - // For undef elements, we don't know anything about the common state of - // the shuffle result. - if (M == -1) - return 1; - if (M < NumElts) - DemandedLHS.setBit(M % NumElts); - else - DemandedRHS.setBit(M % NumElts); + case Instruction::Trunc: + // FIXME: it's tricky to do anything useful for this, but it is an + // important case for targets like X86. + break; + + case Instruction::ExtractElement: + // Look through extract element. At the moment we keep this simple and + // skip tracking the specific element. But at least we might find + // information valid for all elements of the vector (for example if vector + // is sign extended, shifted, etc). + return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + + case Instruction::ShuffleVector: { + // TODO: This is copied almost directly from the SelectionDAG version of + // ComputeNumSignBits. It would be better if we could share common + // code. If not, make sure that changes are translated to the DAG. + + // Collect the minimum number of sign bits that are shared by every vector + // element referenced by the shuffle. + auto *Shuf = cast(U); + int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements(); + int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements(); + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + for (int i = 0; i != NumMaskElts; ++i) { + int M = Shuf->getMaskValue(i); + assert(M < NumElts * 2 && "Invalid shuffle mask constant"); + // For undef elements, we don't know anything about the common state of + // the shuffle result. + if (M == -1) + return 1; + if (M < NumElts) + DemandedLHS.setBit(M % NumElts); + else + DemandedRHS.setBit(M % NumElts); + } + Tmp = std::numeric_limits::max(); + if (!!DemandedLHS) + Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q); + if (!!DemandedRHS) { + Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q); + Tmp = std::min(Tmp, Tmp2); + } + // If we don't know anything, early out and try computeKnownBits + // fall-back. + if (Tmp == 1) + break; + assert(Tmp <= V->getType()->getScalarSizeInBits() && + "Failed to determine minimum sign bits"); + return Tmp; } - Tmp = std::numeric_limits::max(); - if (!!DemandedLHS) - Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q); - if (!!DemandedRHS) { - Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q); - Tmp = std::min(Tmp, Tmp2); } - // If we don't know anything, early out and try computeKnownBits fall-back. - if (Tmp == 1) - break; - assert(Tmp <= V->getType()->getScalarSizeInBits() && - "Failed to determine minimum sign bits"); - return Tmp; - } } // Finally, if we can prove that the top bits of the result are 0's or 1's, @@ -2655,8 +2666,6 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, /// through SExt instructions only if LookThroughSExt is true. bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool LookThroughSExt, unsigned Depth) { - const unsigned MaxDepth = 6; - assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); @@ -3651,23 +3660,28 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { return Len == ~0ULL ? 1 : Len; } -const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) { +const Value * +llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, + bool MustPreserveNullness) { assert(Call && "getArgumentAliasingToReturnedPointer only works on nonnull calls"); if (const Value *RV = Call->getReturnedArgOperand()) return RV; // This can be used only as a aliasing property. - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + Call, MustPreserveNullness)) return Call->getArgOperand(0); return nullptr; } bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - const CallBase *Call) { + const CallBase *Call, bool MustPreserveNullness) { return Call->getIntrinsicID() == Intrinsic::launder_invariant_group || Call->getIntrinsicID() == Intrinsic::strip_invariant_group || Call->getIntrinsicID() == Intrinsic::aarch64_irg || - Call->getIntrinsicID() == Intrinsic::aarch64_tagp; + Call->getIntrinsicID() == Intrinsic::aarch64_tagp || + (!MustPreserveNullness && + Call->getIntrinsicID() == Intrinsic::ptrmask); } /// \p PN defines a loop-variant pointer to an object. Check if the @@ -3725,7 +3739,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, // because it should be in sync with CaptureTracking. Not using it may // cause weird miscompilations where 2 aliasing pointers are assumed to // noalias. - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) { + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { V = RP; continue; } @@ -3865,6 +3879,18 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { return true; } +bool llvm::mustSuppressSpeculation(const LoadInst &LI) { + if (!LI.isUnordered()) + return true; + const Function &F = *LI.getFunction(); + // Speculative load may create a race that did not exist in the source. + return F.hasFnAttribute(Attribute::SanitizeThread) || + // Speculative load may load data from dirty regions. + F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeHWAddress); +} + + bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { @@ -3909,17 +3935,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, } case Instruction::Load: { const LoadInst *LI = cast(Inst); - if (!LI->isUnordered() || - // Speculative load may create a race that did not exist in the source. - LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) || - // Speculative load may load data from dirty regions. - LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || - LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) + if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), - LI->getType(), LI->getAlignment(), - DL, CtxI, DT); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()), + DL, CtxI, DT); } case Instruction::Call: { auto *CI = cast(Inst); @@ -4221,22 +4242,9 @@ OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { - // A memory operation returns normally if it isn't volatile. A volatile - // operation is allowed to trap. - // - // An atomic operation isn't guaranteed to return in a reasonable amount of - // time because it's possible for another thread to interfere with it for an + // Note: An atomic operation isn't guaranteed to return in a reasonable amount + // of time because it's possible for another thread to interfere with it for an // arbitrary length of time, but programs aren't allowed to rely on that. - if (const LoadInst *LI = dyn_cast(I)) - return !LI->isVolatile(); - if (const StoreInst *SI = dyn_cast(I)) - return !SI->isVolatile(); - if (const AtomicCmpXchgInst *CXI = dyn_cast(I)) - return !CXI->isVolatile(); - if (const AtomicRMWInst *RMWI = dyn_cast(I)) - return !RMWI->isVolatile(); - if (const MemIntrinsic *MII = dyn_cast(I)) - return !MII->isVolatile(); // If there is no successor, then execution can't transfer to it. if (const auto *CRI = dyn_cast(I)) @@ -4277,10 +4285,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { // FIXME: This isn't aggressive enough; a call which only writes to a global // is guaranteed to return. - return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() || - match(I, m_Intrinsic()) || - match(I, m_Intrinsic()) || - match(I, m_Intrinsic()); + return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory(); } // Other instructions return normally. @@ -4572,12 +4577,12 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, // TODO: Allow FP min/max with nnan/nsz. assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); - Value *A, *B; + Value *A = nullptr, *B = nullptr; SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); if (!SelectPatternResult::isMinOrMax(L.Flavor)) return {SPF_UNKNOWN, SPNB_NA, false}; - Value *C, *D; + Value *C = nullptr, *D = nullptr; SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); if (L.Flavor != R.Flavor) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -5627,8 +5632,8 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower, } static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, - APInt &Upper) { - const Value *LHS, *RHS; + APInt &Upper, const InstrInfoQuery &IIQ) { + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); if (R.Flavor == SPF_UNKNOWN) return; @@ -5640,7 +5645,8 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, // then the result of abs(X) is [0..SIGNED_MAX], // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. Lower = APInt::getNullValue(BitWidth); - if (cast(RHS)->hasNoSignedWrap()) + if (match(RHS, m_Neg(m_Specific(LHS))) && + IIQ.hasNoSignedWrap(cast(RHS))) Upper = APInt::getSignedMaxValue(BitWidth) + 1; else Upper = APInt::getSignedMinValue(BitWidth) + 1; @@ -5694,7 +5700,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { else if (auto *II = dyn_cast(V)) setLimitsForIntrinsic(*II, Lower, Upper); else if (auto *SI = dyn_cast(V)) - setLimitsForSelectPattern(*SI, Lower, Upper); + setLimitsForSelectPattern(*SI, Lower, Upper, IIQ); ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper); @@ -5704,3 +5710,111 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { return CR; } + +static Optional +getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) { + // Skip over the first indices. + gep_type_iterator GTI = gep_type_begin(GEP); + for (unsigned i = 1; i != Idx; ++i, ++GTI) + /*skip along*/; + + // Compute the offset implied by the rest of the indices. + int64_t Offset = 0; + for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { + ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); + if (!OpC) + return None; + if (OpC->isZero()) + continue; // No offset. + + // Handle struct indices, which add their field offset to the pointer. + if (StructType *STy = GTI.getStructTypeOrNull()) { + Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + continue; + } + + // Otherwise, we have a sequential type like an array or vector. Multiply + // the index by the ElementSize. + uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size * OpC->getSExtValue(); + } + + return Offset; +} + +Optional llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2, + const DataLayout &DL) { + Ptr1 = Ptr1->stripPointerCasts(); + Ptr2 = Ptr2->stripPointerCasts(); + + // Handle the trivial case first. + if (Ptr1 == Ptr2) { + return 0; + } + + const GEPOperator *GEP1 = dyn_cast(Ptr1); + const GEPOperator *GEP2 = dyn_cast(Ptr2); + + // If one pointer is a GEP see if the GEP is a constant offset from the base, + // as in "P" and "gep P, 1". + // Also do this iteratively to handle the the following case: + // Ptr_t1 = GEP Ptr1, c1 + // Ptr_t2 = GEP Ptr_t1, c2 + // Ptr2 = GEP Ptr_t2, c3 + // where we will return c1+c2+c3. + // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base + // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases + // are the same, and return the difference between offsets. + auto getOffsetFromBase = [&DL](const GEPOperator *GEP, + const Value *Ptr) -> Optional { + const GEPOperator *GEP_T = GEP; + int64_t OffsetVal = 0; + bool HasSameBase = false; + while (GEP_T) { + auto Offset = getOffsetFromIndex(GEP_T, 1, DL); + if (!Offset) + return None; + OffsetVal += *Offset; + auto Op0 = GEP_T->getOperand(0)->stripPointerCasts(); + if (Op0 == Ptr) { + HasSameBase = true; + break; + } + GEP_T = dyn_cast(Op0); + } + if (!HasSameBase) + return None; + return OffsetVal; + }; + + if (GEP1) { + auto Offset = getOffsetFromBase(GEP1, Ptr2); + if (Offset) + return -*Offset; + } + if (GEP2) { + auto Offset = getOffsetFromBase(GEP2, Ptr1); + if (Offset) + return Offset; + } + + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical + // base. After that base, they may have some number of common (and + // potentially variable) indices. After that they handle some constant + // offset, which determines their offset from each other. At this point, we + // handle no other case. + if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) + return None; + + // Skip any common indices and track the GEP types. + unsigned Idx = 1; + for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) + if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) + break; + + auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL); + auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL); + if (!Offset1 || !Offset2) + return None; + return *Offset2 - *Offset1; +} diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 986756eb2627..600f57ab9d71 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -56,6 +56,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: case Intrinsic::sqrt: // Begin floating-point. case Intrinsic::sin: case Intrinsic::cos: @@ -98,6 +99,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: return (ScalarOpdIdx == 2); default: return false; @@ -830,15 +832,15 @@ void InterleavedAccessInfo::collectConstStrideAccesses( /*Assume=*/true, /*ShouldCheckWrap=*/false); const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); - PointerType *PtrTy = dyn_cast(Ptr->getType()); + PointerType *PtrTy = cast(Ptr->getType()); uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); // An alignment of 0 means target ABI alignment. - unsigned Align = getLoadStoreAlignment(&I); - if (!Align) - Align = DL.getABITypeAlignment(PtrTy->getElementType()); + MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I)); + if (!Alignment) + Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType())); - AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align); + AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment); } } @@ -925,7 +927,7 @@ void InterleavedAccessInfo::analyzeInterleaving( if (!Group) { LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); - Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); + Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment); } if (B->mayWriteToMemory()) StoreGroups.insert(Group); @@ -964,6 +966,10 @@ void InterleavedAccessInfo::analyzeInterleaving( // instructions that precede it. if (isInterleaved(A)) { InterleaveGroup *StoreGroup = getInterleaveGroup(A); + + LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to " + "dependence between " << *A << " and "<< *B << '\n'); + StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } @@ -1028,7 +1034,7 @@ void InterleavedAccessInfo::analyzeInterleaving( Group->getIndex(B) + DistanceToB / static_cast(DesB.Size); // Try to insert A into B's group. - if (Group->insertMember(A, IndexA, DesA.Align)) { + if (Group->insertMember(A, IndexA, DesA.Alignment)) { LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' << " into the interleave group with" << *B << '\n'); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 72d2357c2933..5292b0e62744 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -622,6 +622,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); KEYWORD(amdgpu_kernel); + KEYWORD(tailcc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 87dff6468f2d..594537307d00 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1122,7 +1122,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, if (ParseToken(lltok::StringConstant, "expected partition string")) return true; } else if (Lex.getKind() == lltok::kw_align) { - unsigned Alignment; + MaybeAlign Alignment; if (ParseOptionalAlignment(Alignment)) return true; GV->setAlignment(Alignment); } else if (Lex.getKind() == lltok::MetadataVar) { @@ -1229,12 +1229,13 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, // As a hack, we allow function alignment to be initially parsed as an // attribute on a function declaration/definition or added to an attribute // group and later moved to the alignment field. - unsigned Alignment; + MaybeAlign Alignment; if (inAttrGrp) { Lex.Lex(); - if (ParseToken(lltok::equal, "expected '=' here") || - ParseUInt32(Alignment)) + uint32_t Value = 0; + if (ParseToken(lltok::equal, "expected '=' here") || ParseUInt32(Value)) return true; + Alignment = Align(Value); } else { if (ParseOptionalAlignment(Alignment)) return true; @@ -1603,7 +1604,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { continue; } case lltok::kw_align: { - unsigned Alignment; + MaybeAlign Alignment; if (ParseOptionalAlignment(Alignment)) return true; B.addAlignmentAttr(Alignment); @@ -1720,7 +1721,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { continue; } case lltok::kw_align: { - unsigned Alignment; + MaybeAlign Alignment; if (ParseOptionalAlignment(Alignment)) return true; B.addAlignmentAttr(Alignment); @@ -1955,6 +1956,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' /// ::= 'amdgpu_kernel' +/// ::= 'tailcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -2000,6 +2002,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; + case lltok::kw_tailcc: CC = CallingConv::Tail; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); @@ -2067,16 +2070,19 @@ bool LLParser::ParseOptionalFunctionMetadata(Function &F) { /// ParseOptionalAlignment /// ::= /* empty */ /// ::= 'align' 4 -bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { - Alignment = 0; +bool LLParser::ParseOptionalAlignment(MaybeAlign &Alignment) { + Alignment = None; if (!EatIfPresent(lltok::kw_align)) return false; LocTy AlignLoc = Lex.getLoc(); - if (ParseUInt32(Alignment)) return true; - if (!isPowerOf2_32(Alignment)) + uint32_t Value = 0; + if (ParseUInt32(Value)) + return true; + if (!isPowerOf2_32(Value)) return Error(AlignLoc, "alignment is not a power of two"); - if (Alignment > Value::MaximumAlignment) + if (Value > Value::MaximumAlignment) return Error(AlignLoc, "huge alignments are not supported yet"); + Alignment = Align(Value); return false; } @@ -2113,7 +2119,7 @@ bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, /// /// This returns with AteExtraComma set to true if it ate an excess comma at the /// end. -bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment, +bool LLParser::ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma) { AteExtraComma = false; while (EatIfPresent(lltok::comma)) { @@ -2551,6 +2557,7 @@ bool LLParser::ParseOptionalOperandBundles( /// bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, bool &isVarArg){ + unsigned CurValID = 0; isVarArg = false; assert(Lex.getKind() == lltok::lparen); Lex.Lex(); // eat the (. @@ -2575,6 +2582,12 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, if (Lex.getKind() == lltok::LocalVar) { Name = Lex.getStrVal(); Lex.Lex(); + } else if (Lex.getKind() == lltok::LocalVarID) { + if (Lex.getUIntVal() != CurValID) + return Error(TypeLoc, "argument expected to be numbered '%" + + Twine(CurValID) + "'"); + ++CurValID; + Lex.Lex(); } if (!FunctionType::isValidArgumentType(ArgTy)) @@ -2602,6 +2615,13 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, Name = Lex.getStrVal(); Lex.Lex(); } else { + if (Lex.getKind() == lltok::LocalVarID) { + if (Lex.getUIntVal() != CurValID) + return Error(TypeLoc, "argument expected to be numbered '%" + + Twine(CurValID) + "'"); + Lex.Lex(); + } + ++CurValID; Name = ""; } @@ -3093,7 +3113,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ParseToken(lltok::rbrace, "expected end of struct constant")) return true; - ID.ConstantStructElts = make_unique(Elts.size()); + ID.ConstantStructElts = std::make_unique(Elts.size()); ID.UIntVal = Elts.size(); memcpy(ID.ConstantStructElts.get(), Elts.data(), Elts.size() * sizeof(Elts[0])); @@ -3115,7 +3135,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (isPackedStruct) { - ID.ConstantStructElts = make_unique(Elts.size()); + ID.ConstantStructElts = std::make_unique(Elts.size()); memcpy(ID.ConstantStructElts.get(), Elts.data(), Elts.size() * sizeof(Elts[0])); ID.UIntVal = Elts.size(); @@ -5354,7 +5374,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { LocTy BuiltinLoc; std::string Section; std::string Partition; - unsigned Alignment; + MaybeAlign Alignment; std::string GC; GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; unsigned AddrSpace = 0; @@ -5471,7 +5491,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Fn->setCallingConv(CC); Fn->setAttributes(PAL); Fn->setUnnamedAddr(UnnamedAddr); - Fn->setAlignment(Alignment); + Fn->setAlignment(MaybeAlign(Alignment)); Fn->setSection(Section); Fn->setPartition(Partition); Fn->setComdat(C); @@ -5788,7 +5808,19 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS); case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS); case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS); - case lltok::kw_phi: return ParsePHI(Inst, PFS); + case lltok::kw_phi: { + FastMathFlags FMF = EatFastMathFlagsIfPresent(); + int Res = ParsePHI(Inst, PFS); + if (Res != 0) + return Res; + if (FMF.any()) { + if (!Inst->getType()->isFPOrFPVectorTy()) + return Error(Loc, "fast-math-flags specified for phi without " + "floating-point scalar or vector return type"); + Inst->setFastMathFlags(FMF); + } + return 0; + } case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS); // Call. case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None); @@ -6837,7 +6869,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { Value *Size = nullptr; LocTy SizeLoc, TyLoc, ASLoc; - unsigned Alignment = 0; + MaybeAlign Alignment; unsigned AddrSpace = 0; Type *Ty = nullptr; @@ -6885,7 +6917,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { if (Size && !Size->getType()->isIntegerTy()) return Error(SizeLoc, "element count must have integer type"); - AllocaInst *AI = new AllocaInst(Ty, AddrSpace, Size, Alignment); + AllocaInst *AI = + new AllocaInst(Ty, AddrSpace, Size, Alignment ? Alignment->value() : 0); AI->setUsedWithInAlloca(IsInAlloca); AI->setSwiftError(IsSwiftError); Inst = AI; @@ -6898,7 +6931,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Value *Val; LocTy Loc; - unsigned Alignment = 0; + MaybeAlign Alignment; bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; @@ -6947,7 +6980,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Value *Val, *Ptr; LocTy Loc, PtrLoc; - unsigned Alignment = 0; + MaybeAlign Alignment; bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; @@ -8074,7 +8107,7 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID, if (ParseToken(lltok::rparen, "expected ')' here")) return true; - auto FS = llvm::make_unique( + auto FS = std::make_unique( GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs), std::move(Calls), std::move(TypeIdInfo.TypeTests), std::move(TypeIdInfo.TypeTestAssumeVCalls), @@ -8134,7 +8167,7 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID, return true; auto GS = - llvm::make_unique(GVFlags, GVarFlags, std::move(Refs)); + std::make_unique(GVFlags, GVarFlags, std::move(Refs)); GS->setModulePath(ModulePath); GS->setVTableFuncs(std::move(VTableFuncs)); @@ -8175,7 +8208,7 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID, if (ParseToken(lltok::rparen, "expected ')' here")) return true; - auto AS = llvm::make_unique(GVFlags); + auto AS = std::make_unique(GVFlags); AS->setModulePath(ModulePath); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 610e2e262008..abc423b4e3cd 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -281,14 +281,14 @@ namespace llvm { void ParseOptionalVisibility(unsigned &Res); void ParseOptionalDLLStorageClass(unsigned &Res); bool ParseOptionalCallingConv(unsigned &CC); - bool ParseOptionalAlignment(unsigned &Alignment); + bool ParseOptionalAlignment(MaybeAlign &Alignment); bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering); bool ParseScope(SyncScope::ID &SSID); bool ParseOrdering(AtomicOrdering &Ordering); bool ParseOptionalStackAlignment(unsigned &Alignment); - bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); + bool ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma); bool ParseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, bool &AteExtraComma); bool ParseOptionalCommaInAlloca(bool &IsInAlloca); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 0e9ba4db4742..f49feb2dc14d 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -168,6 +168,7 @@ enum Kind { kw_amdgpu_ps, kw_amdgpu_cs, kw_amdgpu_kernel, + kw_tailcc, // Attributes: kw_attributes, diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index b13c6237f411..b7f552a6fccb 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -42,7 +42,7 @@ llvm::parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots, bool UpgradeDebugInfo, StringRef DataLayoutString) { std::unique_ptr M = - make_unique(F.getBufferIdentifier(), Context); + std::make_unique(F.getBufferIdentifier(), Context); if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, UpgradeDebugInfo, DataLayoutString)) @@ -71,9 +71,9 @@ ParsedModuleAndIndex llvm::parseAssemblyWithIndex( MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots, bool UpgradeDebugInfo, StringRef DataLayoutString) { std::unique_ptr M = - make_unique(F.getBufferIdentifier(), Context); + std::make_unique(F.getBufferIdentifier(), Context); std::unique_ptr Index = - make_unique(/*HaveGVs=*/true); + std::make_unique(/*HaveGVs=*/true); if (parseAssemblyInto(F, M.get(), Index.get(), Err, Slots, UpgradeDebugInfo, DataLayoutString)) @@ -123,7 +123,7 @@ static bool parseSummaryIndexAssemblyInto(MemoryBufferRef F, std::unique_ptr llvm::parseSummaryIndexAssembly(MemoryBufferRef F, SMDiagnostic &Err) { std::unique_ptr Index = - make_unique(/*HaveGVs=*/false); + std::make_unique(/*HaveGVs=*/false); if (parseSummaryIndexAssemblyInto(F, *Index, Err)) return nullptr; diff --git a/lib/BinaryFormat/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp index eb6bd33ce583..d06cccdf0dfd 100644 --- a/lib/BinaryFormat/Dwarf.cpp +++ b/lib/BinaryFormat/Dwarf.cpp @@ -22,7 +22,7 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { switch (Tag) { default: return StringRef(); -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ case DW_TAG_##NAME: \ return "DW_TAG_" #NAME; #include "llvm/BinaryFormat/Dwarf.def" @@ -31,7 +31,7 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { unsigned llvm::dwarf::getTag(StringRef TagString) { return StringSwitch(TagString) -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ .Case("DW_TAG_" #NAME, DW_TAG_##NAME) #include "llvm/BinaryFormat/Dwarf.def" .Default(DW_TAG_invalid); @@ -41,7 +41,7 @@ unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) { switch (Tag) { default: return 0; -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ case DW_TAG_##NAME: \ return VERSION; #include "llvm/BinaryFormat/Dwarf.def" @@ -52,7 +52,7 @@ unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) { switch (Tag) { default: return 0; -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ case DW_TAG_##NAME: \ return DWARF_VENDOR_##VENDOR; #include "llvm/BinaryFormat/Dwarf.def" @@ -149,6 +149,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { return "DW_OP_LLVM_fragment"; case DW_OP_LLVM_tag_offset: return "DW_OP_LLVM_tag_offset"; + case DW_OP_LLVM_entry_value: + return "DW_OP_LLVM_entry_value"; } } @@ -160,6 +162,7 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) { .Case("DW_OP_LLVM_convert", DW_OP_LLVM_convert) .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment) .Case("DW_OP_LLVM_tag_offset", DW_OP_LLVM_tag_offset) + .Case("DW_OP_LLVM_entry_value", DW_OP_LLVM_entry_value) .Default(0); } @@ -472,6 +475,17 @@ StringRef llvm::dwarf::RangeListEncodingString(unsigned Encoding) { } } +StringRef llvm::dwarf::LocListEncodingString(unsigned Encoding) { + switch (Encoding) { + default: + return StringRef(); +#define HANDLE_DW_LLE(ID, NAME) \ + case DW_LLE_##NAME: \ + return "DW_LLE_" #NAME; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + StringRef llvm::dwarf::CallFrameString(unsigned Encoding, Triple::ArchType Arch) { assert(Arch != llvm::Triple::ArchType::UnknownArch); diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp index 7dfe23690a50..bbcbbabededb 100644 --- a/lib/BinaryFormat/Magic.cpp +++ b/lib/BinaryFormat/Magic.cpp @@ -210,6 +210,11 @@ file_magic llvm::identify_magic(StringRef Magic) { return file_magic::coff_object; break; + case 0x2d: // YAML '-' + if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:")) + return file_magic::tapi_file; + break; + default: break; } diff --git a/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index 9c30d563a314..e70caa83c8c1 100644 --- a/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -434,6 +434,13 @@ static Expected ReadSignature(BitstreamCursor &Stream) { return std::move(Err); if (Signature[2] == 'A' && Signature[3] == 'G') return ClangSerializedDiagnosticsBitstream; + } else if (Signature[0] == 'R' && Signature[1] == 'M') { + if (Error Err = tryRead(Signature[2], 8)) + return std::move(Err); + if (Error Err = tryRead(Signature[3], 8)) + return std::move(Err); + if (Signature[2] == 'R' && Signature[3] == 'K') + return LLVMBitstreamRemarks; } else { if (Error Err = tryRead(Signature[2], 4)) return std::move(Err); @@ -627,6 +634,9 @@ void BitcodeAnalyzer::printStats(BCDumpOptions O, case ClangSerializedDiagnosticsBitstream: O.OS << "Clang Serialized Diagnostics\n"; break; + case LLVMBitstreamRemarks: + O.OS << "LLVM Remarks\n"; + break; } O.OS << " # Toplevel Blocks: " << NumTopBlocks << "\n"; O.OS << "\n"; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 29dc7f616392..15eead1de31a 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -722,7 +722,7 @@ private: /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. - Error parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); + Error parseAlignmentValue(uint64_t Exponent, MaybeAlign &Alignment); Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); Error parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false); @@ -1063,7 +1063,7 @@ static int getDecodedUnaryOpcode(unsigned Val, Type *Ty) { switch (Val) { default: return -1; - case bitc::UNOP_NEG: + case bitc::UNOP_FNEG: return IsFP ? Instruction::FNeg : -1; } } @@ -1544,12 +1544,12 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { } Error BitcodeReader::parseAlignmentValue(uint64_t Exponent, - unsigned &Alignment) { + MaybeAlign &Alignment) { // Note: Alignment in bitcode files is incremented by 1, so that zero // can be used for default alignment. if (Exponent > Value::MaxAlignmentExponent + 1) return error("Invalid alignment value"); - Alignment = (1 << static_cast(Exponent)) >> 1; + Alignment = decodeMaybeAlign(Exponent); return Error::success(); } @@ -2377,6 +2377,8 @@ Error BitcodeReader::parseConstants() { CurTy = flattenPointerTypes(CurFullTy); continue; // Skip the ValueList manipulation. case bitc::CST_CODE_NULL: // NULL + if (CurTy->isVoidTy() || CurTy->isFunctionTy() || CurTy->isLabelTy()) + return error("Invalid type for a constant null value"); V = Constant::getNullValue(CurTy); break; case bitc::CST_CODE_INTEGER: // INTEGER: [intval] @@ -3110,7 +3112,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { uint64_t RawLinkage = Record[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); - unsigned Alignment; + MaybeAlign Alignment; if (Error Err = parseAlignmentValue(Record[4], Alignment)) return Err; std::string Section; @@ -3241,7 +3243,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { Context, getPointerElementFlatType(PTy))); } - unsigned Alignment; + MaybeAlign Alignment; if (Error Err = parseAlignmentValue(Record[5], Alignment)) return Err; Func->setAlignment(Alignment); @@ -3646,6 +3648,11 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, break; } Record.clear(); + + // Upgrade data layout string. + std::string DL = llvm::UpgradeDataLayoutString( + TheModule->getDataLayoutStr(), TheModule->getTargetTriple()); + TheModule->setDataLayout(DL); } } @@ -4622,31 +4629,48 @@ Error BitcodeReader::parseFunctionBody(Function *F) { InstructionList.push_back(I); break; case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] - if (Record.size() < 1 || ((Record.size()-1)&1)) + if (Record.size() < 1) return error("Invalid record"); + // The first record specifies the type. FullTy = getFullyStructuredTypeByID(Record[0]); Type *Ty = flattenPointerTypes(FullTy); if (!Ty) return error("Invalid record"); - PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); + // Phi arguments are pairs of records of [value, basic block]. + // There is an optional final record for fast-math-flags if this phi has a + // floating-point type. + size_t NumArgs = (Record.size() - 1) / 2; + if ((Record.size() - 1) % 2 == 1 && !Ty->isFPOrFPVectorTy()) + return error("Invalid record"); + + PHINode *PN = PHINode::Create(Ty, NumArgs); InstructionList.push_back(PN); - for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { + for (unsigned i = 0; i != NumArgs; i++) { Value *V; // With the new function encoding, it is possible that operands have // negative IDs (for forward references). Use a signed VBR // representation to keep the encoding small. if (UseRelativeIDs) - V = getValueSigned(Record, 1+i, NextValueNo, Ty); + V = getValueSigned(Record, i * 2 + 1, NextValueNo, Ty); else - V = getValue(Record, 1+i, NextValueNo, Ty); - BasicBlock *BB = getBasicBlock(Record[2+i]); + V = getValue(Record, i * 2 + 1, NextValueNo, Ty); + BasicBlock *BB = getBasicBlock(Record[i * 2 + 2]); if (!V || !BB) return error("Invalid record"); PN->addIncoming(V, BB); } I = PN; + + // If there are an even number of records, the final record must be FMF. + if (Record.size() % 2 == 0) { + assert(isa(I) && "Unexpected phi type"); + FastMathFlags FMF = getDecodedFastMathFlags(Record[Record.size() - 1]); + if (FMF.any()) + I->setFastMathFlags(FMF); + } + break; } @@ -4726,7 +4750,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } Type *OpTy = getTypeByID(Record[1]); Value *Size = getFnValueByID(Record[2], OpTy); - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(AlignRecord & ~FlagMask, Align)) { return Err; } @@ -4737,7 +4761,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { const DataLayout &DL = TheModule->getDataLayout(); unsigned AS = DL.getAllocaAddrSpace(); - AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align); + AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align ? Align->value() : 0); AI->setUsedWithInAlloca(InAlloca); AI->setSwiftError(SwiftError); I = AI; @@ -4765,7 +4789,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType())) return Err; - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align); @@ -4802,7 +4826,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Invalid record"); SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align, Ordering, SSID); @@ -4824,10 +4848,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) return Err; - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new StoreInst(Val, Ptr, Record[OpNum+1], Align); + I = new StoreInst(Val, Ptr, Record[OpNum + 1], Align); InstructionList.push_back(I); break; } @@ -4857,10 +4881,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID); + I = new StoreInst(Val, Ptr, Record[OpNum + 1], Align, Ordering, SSID); InstructionList.push_back(I); break; } @@ -5312,7 +5336,7 @@ Error BitcodeReader::materializeModule() { UpgradeModuleFlags(*TheModule); - UpgradeRetainReleaseMarker(*TheModule); + UpgradeARCRuntime(*TheModule); return Error::success(); } @@ -5874,7 +5898,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, HasRelBF); setSpecialRefs(Refs, NumRORefs, NumWORefs); - auto FS = llvm::make_unique( + auto FS = std::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0, std::move(Refs), std::move(Calls), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), @@ -5900,7 +5924,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { uint64_t RawFlags = Record[1]; unsigned AliaseeID = Record[2]; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); - auto AS = llvm::make_unique(Flags); + auto AS = std::make_unique(Flags); // The module path string ref set in the summary must be owned by the // index's module string table. Since we don't have a module path // string table section in the per-module index, we create a single @@ -5934,7 +5958,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::vector Refs = makeRefList(ArrayRef(Record).slice(RefArrayStart)); auto FS = - llvm::make_unique(Flags, GVF, std::move(Refs)); + std::make_unique(Flags, GVF, std::move(Refs)); FS->setModulePath(getThisModule()->first()); auto GUID = getValueInfoFromValueId(ValueID); FS->setOriginalName(GUID.second); @@ -5961,7 +5985,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { VTableFuncs.push_back({Callee, Offset}); } auto VS = - llvm::make_unique(Flags, GVF, std::move(Refs)); + std::make_unique(Flags, GVF, std::move(Refs)); VS->setModulePath(getThisModule()->first()); VS->setVTableFuncs(VTableFuncs); auto GUID = getValueInfoFromValueId(ValueID); @@ -6019,7 +6043,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; setSpecialRefs(Refs, NumRORefs, NumWORefs); - auto FS = llvm::make_unique( + auto FS = std::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount, std::move(Refs), std::move(Edges), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), @@ -6046,7 +6070,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { uint64_t RawFlags = Record[2]; unsigned AliaseeValueId = Record[3]; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); - auto AS = llvm::make_unique(Flags); + auto AS = std::make_unique(Flags); LastSeenSummary = AS.get(); AS->setModulePath(ModuleIdMap[ModuleId]); @@ -6075,7 +6099,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::vector Refs = makeRefList(ArrayRef(Record).slice(RefArrayStart)); auto FS = - llvm::make_unique(Flags, GVF, std::move(Refs)); + std::make_unique(Flags, GVF, std::move(Refs)); LastSeenSummary = FS.get(); FS->setModulePath(ModuleIdMap[ModuleId]); ValueInfo VI = getValueInfoFromValueId(ValueID).first; @@ -6438,7 +6462,7 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, Context); std::unique_ptr M = - llvm::make_unique(ModuleIdentifier, Context); + std::make_unique(ModuleIdentifier, Context); M->setMaterializer(R); // Delay parsing Metadata if ShouldLazyLoadMetadata is true. @@ -6485,7 +6509,7 @@ Expected> BitcodeModule::getSummary() { if (Error JumpFailed = Stream.JumpToBit(ModuleBit)) return std::move(JumpFailed); - auto Index = llvm::make_unique(/*HaveGVs=*/false); + auto Index = std::make_unique(/*HaveGVs=*/false); ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index, ModuleIdentifier, 0); diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp index 108f71189585..4da51dda8b74 100644 --- a/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/lib/Bitcode/Reader/MetadataLoader.cpp @@ -515,7 +515,7 @@ class MetadataLoader::MetadataLoaderImpl { GV.getMetadata(LLVMContext::MD_dbg, MDs); GV.eraseMetadata(LLVMContext::MD_dbg); for (auto *MD : MDs) - if (auto *DGV = dyn_cast_or_null(MD)) { + if (auto *DGV = dyn_cast(MD)) { auto *DGVE = DIGlobalVariableExpression::getDistinct( Context, DGV, DIExpression::get(Context, {})); GV.addMetadata(LLVMContext::MD_dbg, *DGVE); @@ -987,7 +987,7 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata( assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString"); // Lookup first if the metadata hasn't already been loaded. if (auto *MD = MetadataList.lookup(ID)) { - auto *N = dyn_cast_or_null(MD); + auto *N = cast(MD); if (!N->isTemporary()) return; } @@ -2133,7 +2133,7 @@ MetadataLoader::MetadataLoader(BitstreamCursor &Stream, Module &TheModule, BitcodeReaderValueList &ValueList, bool IsImporting, std::function getTypeByID) - : Pimpl(llvm::make_unique( + : Pimpl(std::make_unique( Stream, TheModule, ValueList, std::move(getTypeByID), IsImporting)) {} Error MetadataLoader::parseMetadata(bool ModuleLevel) { diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp index 76ca89147e52..be59c1f92836 100644 --- a/lib/Bitcode/Writer/BitWriter.cpp +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -19,7 +19,7 @@ using namespace llvm; int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { std::error_code EC; - raw_fd_ostream OS(Path, EC, sys::fs::F_None); + raw_fd_ostream OS(Path, EC, sys::fs::OF_None); if (EC) return -1; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 5c7b970a3a75..deb4019ea8ba 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -86,7 +86,7 @@ static cl::opt cl::desc("Number of metadatas above which we emit an index " "to enable lazy-loading")); -cl::opt WriteRelBFToSummary( +static cl::opt WriteRelBFToSummary( "write-relbf-to-summary", cl::Hidden, cl::init(false), cl::desc("Write relative block frequency to function summary ")); @@ -520,7 +520,7 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) { static unsigned getEncodedUnaryOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unknown binary instruction!"); - case Instruction::FNeg: return bitc::UNOP_NEG; + case Instruction::FNeg: return bitc::UNOP_FNEG; } } @@ -2880,6 +2880,11 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, pushValueSigned(PN.getIncomingValue(i), InstID, Vals64); Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i))); } + + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) + Vals64.push_back(Flags); + // Emit a Vals64 vector and exit. Stream.EmitRecord(Code, Vals64, AbbrevToUse); Vals64.clear(); diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 444f618d8b8c..f64b775a8b77 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -232,7 +232,7 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, if (!MO.isReg() || !MO.isImplicit()) return false; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) return false; @@ -252,7 +252,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs( if (!MO.isReg()) continue; if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { - const unsigned Reg = MO.getReg(); + const Register Reg = MO.getReg(); for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) PassthruRegs.insert(*SubRegs); @@ -365,7 +365,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); @@ -375,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" @@ -418,7 +418,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Ignore KILLs and passthru registers for liveness... if (MI.isKill() || (PassthruRegs.count(Reg) != 0)) @@ -471,7 +471,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" @@ -506,7 +506,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (FirstReg != 0) { @@ -790,7 +790,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( CriticalPathSU = SU; } } - + assert(CriticalPathSU && "Failed to find SUnit critical path"); CriticalPathMI = CriticalPathSU->getInstr(); } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index d158e70b86ac..4f24f077d120 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -309,7 +309,8 @@ static const Value *getNoopInput(const Value *V, NoopInput = Op; } else if (isa(I) && TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { - DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + DataBits = std::min((uint64_t)DataBits, + I->getType()->getPrimitiveSizeInBits().getFixedSize()); NoopInput = Op; } else if (auto CS = ImmutableCallSite(I)) { const Value *ReturnedOp = CS.getReturnedArgOperand(); @@ -523,7 +524,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!TM.Options.GuaranteedTailCallOpt || !isa(Term))) + ((!TM.Options.GuaranteedTailCallOpt && + CS.getCallingConv() != CallingConv::Tail) || !isa(Term))) return false; // If I will have a chain, make sure no other instruction that will have a @@ -536,9 +538,11 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // Debug info intrinsics do not get in the way of tail call optimization. if (isa(BBI)) continue; - // A lifetime end intrinsic should not stop tail call optimization. + // A lifetime end or assume intrinsic should not stop tail call + // optimization. if (const IntrinsicInst *II = dyn_cast(BBI)) - if (II->getIntrinsicID() == Intrinsic::lifetime_end) + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::assume) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !isSafeToSpeculativelyExecute(&*BBI)) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 54f6cc2d5571..73c53d6c4af5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -91,10 +91,12 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" @@ -159,30 +161,30 @@ static gcp_map_type &getGCMap(void *&P) { return *(gcp_map_type*)P; } -/// getGVAlignmentLog2 - Return the alignment to use for the specified global -/// value in log2 form. This rounds up to the preferred alignment if possible -/// and legal. -static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL, - unsigned InBits = 0) { - unsigned NumBits = 0; +/// getGVAlignment - Return the alignment to use for the specified global +/// value. This rounds up to the preferred alignment if possible and legal. +Align AsmPrinter::getGVAlignment(const GlobalValue *GV, const DataLayout &DL, + Align InAlign) { + Align Alignment; if (const GlobalVariable *GVar = dyn_cast(GV)) - NumBits = DL.getPreferredAlignmentLog(GVar); + Alignment = Align(DL.getPreferredAlignment(GVar)); - // If InBits is specified, round it to it. - if (InBits > NumBits) - NumBits = InBits; + // If InAlign is specified, round it to it. + if (InAlign > Alignment) + Alignment = InAlign; // If the GV has a specified alignment, take it into account. - if (GV->getAlignment() == 0) - return NumBits; + const MaybeAlign GVAlign(GV->getAlignment()); + if (!GVAlign) + return Alignment; - unsigned GVAlign = Log2_32(GV->getAlignment()); + assert(GVAlign && "GVAlign must be set"); // If the GVAlign is larger than NumBits, or if we are required to obey // NumBits because the GV has an assigned section, obey it. - if (GVAlign > NumBits || GV->hasSection()) - NumBits = GVAlign; - return NumBits; + if (*GVAlign > Alignment || GV->hasSection()) + Alignment = *GVAlign; + return Alignment; } AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr Streamer) @@ -248,13 +250,14 @@ const MCSection *AsmPrinter::getCurrentSection() const { void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); } bool AsmPrinter::doInitialization(Module &M) { - MMI = getAnalysisIfAvailable(); + auto *MMIWP = getAnalysisIfAvailable(); + MMI = MMIWP ? &MMIWP->getMMI() : nullptr; // Initialize TargetLoweringObjectFile. const_cast(getObjFileLowering()) @@ -311,7 +314,7 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { - Handlers.emplace_back(llvm::make_unique(this), + Handlers.emplace_back(std::make_unique(this), DbgTimerName, DbgTimerDescription, CodeViewLineTablesGroupName, CodeViewLineTablesGroupDescription); @@ -380,7 +383,7 @@ bool AsmPrinter::doInitialization(Module &M) { if (mdconst::extract_or_null( MMI->getModule()->getModuleFlag("cfguardtable"))) - Handlers.emplace_back(llvm::make_unique(this), CFGuardName, + Handlers.emplace_back(std::make_unique(this), CFGuardName, CFGuardDescription, DWARFGroupName, DWARFGroupDescription); @@ -425,7 +428,10 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); return; case GlobalValue::PrivateLinkage: + return; case GlobalValue::InternalLinkage: + if (MAI->hasDotLGloblDirective()) + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_LGlobal); return; case GlobalValue::AppendingLinkage: case GlobalValue::AvailableExternallyLinkage: @@ -501,7 +507,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, DL); + const Align Alignment = getGVAlignment(GV, DL); for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, @@ -513,12 +519,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle common symbols if (GVKind.isCommon()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - unsigned Align = 1 << AlignLog; - if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) - Align = 0; - // .comm _foo, 42, 4 - OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + const bool SupportsAlignment = + getObjFileLowering().getCommDirectiveSupportsAlignment(); + OutStreamer->EmitCommonSymbol(GVSym, Size, + SupportsAlignment ? Alignment.value() : 0); return; } @@ -531,10 +536,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { TheSection->isVirtualSection()) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. - unsigned Align = 1 << AlignLog; EmitLinkage(GV, GVSym); // .zerofill __DATA, __bss, _foo, 400, 5 - OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align); + OutStreamer->EmitZerofill(TheSection, GVSym, Size, Alignment.value()); return; } @@ -544,7 +548,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { getObjFileLowering().getBSSSection() == TheSection) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - unsigned Align = 1 << AlignLog; // Use .lcomm only if it supports user-specified alignment. // Otherwise, while it would still be correct to use .lcomm in some @@ -554,17 +557,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Prefer to simply fall back to .local / .comm in this case. if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 - OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align); + OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Alignment.value()); return; } - if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) - Align = 0; - // .local _foo OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 - OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + const bool SupportsAlignment = + getObjFileLowering().getCommDirectiveSupportsAlignment(); + OutStreamer->EmitCommonSymbol(GVSym, Size, + SupportsAlignment ? Alignment.value() : 0); return; } @@ -585,11 +588,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isThreadBSS()) { TheSection = getObjFileLowering().getTLSBSSSection(); - OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); + OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); } else if (GVKind.isThreadData()) { OutStreamer->SwitchSection(TheSection); - EmitAlignment(AlignLog, GV); + EmitAlignment(Alignment, GV); OutStreamer->EmitLabel(MangSym); EmitGlobalConstant(GV->getParent()->getDataLayout(), @@ -625,7 +628,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer->SwitchSection(TheSection); EmitLinkage(GV, EmittedInitSym); - EmitAlignment(AlignLog, GV); + EmitAlignment(Alignment, GV); OutStreamer->EmitLabel(EmittedInitSym); @@ -664,6 +667,10 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM)); EmitVisibility(CurrentFnSym, F.getVisibility()); + if (MAI->needsFunctionDescriptors() && + F.getLinkage() != GlobalValue::InternalLinkage) + EmitLinkage(&F, CurrentFnDescSym); + EmitLinkage(&F, CurrentFnSym); if (MAI->hasFunctionAlignment()) EmitAlignment(MF->getAlignment(), &F); @@ -699,8 +706,13 @@ void AsmPrinter::EmitFunctionHeader() { } } - // Emit the CurrentFnSym. This is a virtual function to allow targets to - // do their wild and crazy things as required. + // Emit the function descriptor. This is a virtual function to allow targets + // to emit their specific function descriptor. + if (MAI->needsFunctionDescriptors()) + EmitFunctionDescriptor(); + + // Emit the CurrentFnSym. This is a virtual function to allow targets to do + // their wild and crazy things as required. EmitFunctionEntryLabel(); // If the function had address-taken blocks that got deleted, then we have @@ -783,7 +795,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { - unsigned RegNo = MI->getOperand(0).getReg(); + Register RegNo = MI->getOperand(0).getReg(); SmallString<128> Str; raw_svector_ostream OS(Str); @@ -910,7 +922,8 @@ static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) { OS << "DEBUG_LABEL: "; const DILabel *V = MI->getDebugLabel(); - if (auto *SP = dyn_cast(V->getScope())) { + if (auto *SP = dyn_cast( + V->getScope()->getNonLexicalBlockFileScope())) { StringRef Name = SP->getName(); if (!Name.empty()) OS << Name << ":"; @@ -1024,7 +1037,7 @@ void AsmPrinter::EmitFunctionBody() { // Get MachineDominatorTree or compute it on the fly if it's unavailable MDT = getAnalysisIfAvailable(); if (!MDT) { - OwnedMDT = make_unique(); + OwnedMDT = std::make_unique(); OwnedMDT->getBase().recalculate(*MF); MDT = OwnedMDT.get(); } @@ -1032,7 +1045,7 @@ void AsmPrinter::EmitFunctionBody() { // Get MachineLoopInfo or compute it on the fly if it's unavailable MLI = getAnalysisIfAvailable(); if (!MLI) { - OwnedMLI = make_unique(); + OwnedMLI = std::make_unique(); OwnedMLI->getBase().analyze(MDT->getBase()); MLI = OwnedMLI.get(); } @@ -1052,9 +1065,13 @@ void AsmPrinter::EmitFunctionBody() { ++NumInstsInFunction; } - // If there is a pre-instruction symbol, emit a label for it here. + // If there is a pre-instruction symbol, emit a label for it here. If the + // instruction was duplicated and the label has already been emitted, + // don't re-emit the same label. + // FIXME: Consider strengthening that to an assertion. if (MCSymbol *S = MI.getPreInstrSymbol()) - OutStreamer->EmitLabel(S); + if (S->isUndefined()) + OutStreamer->EmitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1107,9 +1124,13 @@ void AsmPrinter::EmitFunctionBody() { break; } - // If there is a post-instruction symbol, emit a label for it here. + // If there is a post-instruction symbol, emit a label for it here. If + // the instruction was duplicated and the label has already been emitted, + // don't re-emit the same label. + // FIXME: Consider strengthening that to an assertion. if (MCSymbol *S = MI.getPostInstrSymbol()) - OutStreamer->EmitLabel(S); + if (S->isUndefined()) + OutStreamer->EmitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1313,11 +1334,10 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, // Set the symbol type to function if the alias has a function type. // This affects codegen when the aliasee is not a function. - if (IsFunction) { - OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); - if (isa(GIS)) - OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction); - } + if (IsFunction) + OutStreamer->EmitSymbolAttribute(Name, isa(GIS) + ? MCSA_ELF_TypeIndFunction + : MCSA_ELF_TypeFunction); EmitVisibility(Name, GIS.getVisibility()); @@ -1349,60 +1369,28 @@ void AsmPrinter::emitRemarksSection(Module &M) { RemarkStreamer *RS = M.getContext().getRemarkStreamer(); if (!RS) return; - const remarks::Serializer &Serializer = RS->getSerializer(); + remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer(); + + Optional> Filename; + if (Optional FilenameRef = RS->getFilename()) { + Filename = *FilenameRef; + sys::fs::make_absolute(*Filename); + assert(!Filename->empty() && "The filename can't be empty."); + } + + std::string Buf; + raw_string_ostream OS(Buf); + std::unique_ptr MetaSerializer = + Filename ? RemarkSerializer.metaSerializer(OS, StringRef(*Filename)) + : RemarkSerializer.metaSerializer(OS); + MetaSerializer->emit(); // Switch to the right section: .remarks/__remarks. MCSection *RemarksSection = OutContext.getObjectFileInfo()->getRemarksSection(); OutStreamer->SwitchSection(RemarksSection); - // Emit the magic number. - OutStreamer->EmitBytes(remarks::Magic); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); - - // Emit the version number: little-endian uint64_t. - // The version number is located at the offset 0x0 in the section. - std::array Version; - support::endian::write64le(Version.data(), remarks::Version); - OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size())); - - // Emit the string table in the section. - // Note: we need to use the streamer here to emit it in the section. We can't - // just use the serialize function with a raw_ostream because of the way - // MCStreamers work. - uint64_t StrTabSize = - Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0; - // Emit the total size of the string table (the size itself excluded): - // little-endian uint64_t. - // The total size is located after the version number. - // Note: even if no string table is used, emit 0. - std::array StrTabSizeBuf; - support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); - OutStreamer->EmitBinaryData( - StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size())); - - if (const Optional &StrTab = Serializer.StrTab) { - std::vector StrTabStrings = StrTab->serialize(); - // Emit a list of null-terminated strings. - // Note: the order is important here: the ID used in the remarks corresponds - // to the position of the string in the section. - for (StringRef Str : StrTabStrings) { - OutStreamer->EmitBytes(Str); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); - } - } - - // Emit the null-terminated absolute path to the remark file. - // The path is located at the offset 0x4 in the section. - StringRef FilenameRef = RS->getFilename(); - SmallString<128> Filename = FilenameRef; - sys::fs::make_absolute(Filename); - assert(!Filename.empty() && "The filename can't be empty."); - OutStreamer->EmitBytes(Filename); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); + OutStreamer->EmitBinaryData(OS.str()); } bool AsmPrinter::doFinalization(Module &M) { @@ -1455,7 +1443,7 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->SwitchSection(TLOF.getDataSection()); const DataLayout &DL = M.getDataLayout(); - EmitAlignment(Log2_32(DL.getPointerSize())); + EmitAlignment(Align(DL.getPointerSize())); for (const auto &Stub : Stubs) { OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), @@ -1482,7 +1470,7 @@ bool AsmPrinter::doFinalization(Module &M) { COFF::IMAGE_SCN_LNK_COMDAT, SectionKind::getReadOnly(), Stub.first->getName(), COFF::IMAGE_COMDAT_SELECT_ANY)); - EmitAlignment(Log2_32(DL.getPointerSize())); + EmitAlignment(Align(DL.getPointerSize())); OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global); OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), @@ -1607,8 +1595,7 @@ bool AsmPrinter::doFinalization(Module &M) { "expected llvm.used to be an array type"); if (const auto *A = cast(LU->getInitializer())) { for (const Value *Op : A->operands()) { - const auto *GV = - cast(Op->stripPointerCastsNoFollowAliases()); + const auto *GV = cast(Op->stripPointerCasts()); // Global symbols with internal or private linkage are not visible to // the linker, and thus would cause an error when the linker tried to // preserve the symbol due to the `/include:` directive. @@ -1679,8 +1666,27 @@ MCSymbol *AsmPrinter::getCurExceptionSym() { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; + // Get the function symbol. - CurrentFnSym = getSymbol(&MF.getFunction()); + if (MAI->needsFunctionDescriptors()) { + assert(TM.getTargetTriple().isOSAIX() && "Function descriptor is only" + " supported on AIX."); + assert(CurrentFnDescSym && "The function descriptor symbol needs to be" + " initalized first."); + + // Get the function entry point symbol. + CurrentFnSym = + OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName()); + + const Function &F = MF.getFunction(); + MCSectionXCOFF *FnEntryPointSec = + cast(getObjFileLowering().SectionForGlobal(&F, TM)); + // Set the containing csect. + cast(CurrentFnSym)->setContainingCsect(FnEntryPointSec); + } else { + CurrentFnSym = getSymbol(&MF.getFunction()); + } + CurrentFnSymForSize = CurrentFnSym; CurrentFnBegin = nullptr; CurExceptionSym = nullptr; @@ -1765,7 +1771,7 @@ void AsmPrinter::EmitConstantPool() { if (CurSection != CPSections[i].S) { OutStreamer->SwitchSection(CPSections[i].S); - EmitAlignment(Log2_32(CPSections[i].Alignment)); + EmitAlignment(Align(CPSections[i].Alignment)); CurSection = CPSections[i].S; Offset = 0; } @@ -1812,7 +1818,7 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer->SwitchSection(ReadOnlySection); } - EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL))); + EmitAlignment(Align(MJTI->getEntryAlignment(DL))); // Jump tables in code sections are marked with a data_region directive // where that's supported. @@ -2025,10 +2031,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, } // Emit the function pointers in the target-specific order - unsigned Align = Log2_32(DL.getPointerPrefAlignment()); llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) { return L.Priority < R.Priority; }); + const Align Align = DL.getPointerPrefAlignment(); for (Structor &S : Structors) { const TargetLoweringObjectFile &Obj = getObjFileLowering(); const MCSymbol *KeySym = nullptr; @@ -2149,23 +2155,20 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of -// two boundary. For example, if you pass in 3 here, you will get an 8 -// byte alignment. If a global value is specified, and if that global has +// two boundary. If a global value is specified, and if that global has // an explicit alignment requested, it will override the alignment request // if required for correctness. -void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { +void AsmPrinter::EmitAlignment(Align Alignment, const GlobalObject *GV) const { if (GV) - NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits); + Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment); - if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. + if (Alignment == Align::None()) + return; // 1-byte aligned: no need to emit alignment. - assert(NumBits < - static_cast(std::numeric_limits::digits) && - "undefined behavior"); if (getCurrentSection()->getKind().isText()) - OutStreamer->EmitCodeAlignment(1u << NumBits); + OutStreamer->EmitCodeAlignment(Alignment.value()); else - OutStreamer->EmitValueToAlignment(1u << NumBits); + OutStreamer->EmitValueToAlignment(Alignment.value()); } //===----------------------------------------------------------------------===// @@ -2481,6 +2484,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL, } static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { + assert(ET && "Unknown float type"); APInt API = APF.bitcastToAPInt(); // First print a comment with what we think the original floating-point value @@ -2488,11 +2492,7 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { if (AP.isVerbose()) { SmallString<8> StrVal; APF.toString(StrVal); - - if (ET) - ET->print(AP.OutStreamer->GetCommentOS()); - else - AP.OutStreamer->GetCommentOS() << "Printing Type"; + ET->print(AP.OutStreamer->GetCommentOS()); AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; } @@ -2670,7 +2670,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, const GlobalValue *FinalGV = dyn_cast(GV->getOperand(0)); const MCSymbol *FinalSym = AP.getSymbol(FinalGV); *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel( - FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); + FinalGV, FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); // Update GOT equivalent usage information --NumUses; @@ -2930,7 +2930,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. -void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { // End the previous funclet and start a new one. if (MBB.isEHFuncletEntry()) { for (const HandlerInfo &HI : Handlers) { @@ -2940,8 +2940,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { } // Emit an alignment directive for this block, if needed. - if (unsigned Align = MBB.getAlignment()) - EmitAlignment(Align); + const Align Alignment = MBB.getAlignment(); + if (Alignment != Align::None()) + EmitAlignment(Alignment); MCCodePaddingContext Context; setupCodePaddingContext(MBB, Context); OutStreamer->EmitCodePaddingBasicBlockStart(Context); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 7721e996aca5..420df26a2b8b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -72,7 +72,7 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, const MDNode *LocMDNode) const { if (!DiagInfo) { - DiagInfo = make_unique(); + DiagInfo = std::make_unique(); MCContext &Context = MMI->getContext(); Context.setInlineSourceManager(&DiagInfo->SrcMgr); @@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); Sym->print(OS, AP->MAI); + MMI->getContext().registerInlineAsmLabel(Sym); } else if (MI->getOperand(OpNo).isMBB()) { const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); Sym->print(OS, AP->MAI); diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index db2ff458eb2e..09f7496cd4ef 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -73,18 +73,18 @@ class HashingByteStreamer final : public ByteStreamer { class BufferByteStreamer final : public ByteStreamer { private: SmallVectorImpl &Buffer; - SmallVectorImpl &Comments; + std::vector &Comments; +public: /// Only verbose textual output needs comments. This will be set to /// true for that case, and false otherwise. If false, comments passed in to /// the emit methods will be ignored. - bool GenerateComments; + const bool GenerateComments; -public: BufferByteStreamer(SmallVectorImpl &Buffer, - SmallVectorImpl &Comments, - bool GenerateComments) - : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {} + std::vector &Comments, bool GenerateComments) + : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) { + } void EmitInt8(uint8_t Byte, const Twine &Comment) override { Buffer.push_back(Byte); if (GenerateComments) diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 932959c311fa..c6457f3626d1 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -98,7 +98,8 @@ using namespace llvm::codeview; namespace { class CVMCAdapter : public CodeViewRecordStreamer { public: - CVMCAdapter(MCStreamer &OS) : OS(&OS) {} + CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable) + : OS(&OS), TypeTable(TypeTable) {} void EmitBytes(StringRef Data) { OS->EmitBytes(Data); } @@ -110,8 +111,24 @@ public: void AddComment(const Twine &T) { OS->AddComment(T); } + void AddRawComment(const Twine &T) { OS->emitRawComment(T); } + + bool isVerboseAsm() { return OS->isVerboseAsm(); } + + std::string getTypeName(TypeIndex TI) { + std::string TypeName; + if (!TI.isNoneType()) { + if (TI.isSimple()) + TypeName = TypeIndex::simpleTypeName(TI); + else + TypeName = TypeTable.getTypeName(TI); + } + return TypeName; + } + private: MCStreamer *OS = nullptr; + TypeCollection &TypeTable; }; } // namespace @@ -617,13 +634,6 @@ emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, OS.EmitBytes(NullTerminatedString); } -static StringRef getTypeLeafName(TypeLeafKind TypeKind) { - for (const EnumEntry &EE : getTypeLeafNames()) - if (EE.Value == TypeKind) - return EE.Name; - return ""; -} - void CodeViewDebug::emitTypeInformation() { if (TypeTable.empty()) return; @@ -632,30 +642,11 @@ void CodeViewDebug::emitTypeInformation() { OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); emitCodeViewMagicVersion(); - SmallString<8> CommentPrefix; - if (OS.isVerboseAsm()) { - CommentPrefix += '\t'; - CommentPrefix += Asm->MAI->getCommentString(); - CommentPrefix += ' '; - } - TypeTableCollection Table(TypeTable.records()); - SmallString<512> CommentBlock; - raw_svector_ostream CommentOS(CommentBlock); - std::unique_ptr SP; - std::unique_ptr TDV; TypeVisitorCallbackPipeline Pipeline; - if (OS.isVerboseAsm()) { - // To construct block comment describing the type record for readability. - SP = llvm::make_unique(CommentOS); - SP->setPrefix(CommentPrefix); - TDV = llvm::make_unique(Table, SP.get(), false); - Pipeline.addCallbackToPipeline(*TDV); - } - // To emit type record using Codeview MCStreamer adapter - CVMCAdapter CVMCOS(OS); + CVMCAdapter CVMCOS(OS, Table); TypeRecordMapping typeMapping(CVMCOS); Pipeline.addCallbackToPipeline(typeMapping); @@ -664,17 +655,6 @@ void CodeViewDebug::emitTypeInformation() { // This will fail if the record data is invalid. CVType Record = Table.getType(*B); - CommentBlock.clear(); - - auto RecordLen = Record.length(); - auto RecordKind = Record.kind(); - if (OS.isVerboseAsm()) - CVMCOS.AddComment("Record length"); - CVMCOS.EmitIntValue(RecordLen - 2, 2); - if (OS.isVerboseAsm()) - CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind)); - CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind)); - Error E = codeview::visitTypeRecord(Record, *B, Pipeline); if (E) { @@ -682,13 +662,6 @@ void CodeViewDebug::emitTypeInformation() { llvm_unreachable("produced malformed type record"); } - if (OS.isVerboseAsm()) { - // emitRawComment will insert its own tab and comment string before - // the first line, so strip off our first one. It also prints its own - // newline. - OS.emitRawComment( - CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); - } B = Table.getNext(*B); } } @@ -1135,7 +1108,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, if (!BeginLabel->isDefined() || !EndLabel->isDefined()) continue; - DIType *DITy = std::get<2>(HeapAllocSite); + const DIType *DITy = std::get<2>(HeapAllocSite); MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); OS.AddComment("Call site offset"); OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0); @@ -1363,7 +1336,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { const TargetRegisterInfo *TRI = TSI.getRegisterInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); const Function &GV = MF->getFunction(); - auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique()}); + auto Insertion = FnDebugInfo.insert({&GV, std::make_unique()}); assert(Insertion.second && "function already has info"); CurFn = Insertion.first->second.get(); CurFn->FuncId = NextFuncId++; @@ -2633,17 +2606,6 @@ void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI, emitLocalVariable(FI, L); } -/// Only call this on endian-specific types like ulittle16_t and little32_t, or -/// structs composed of them. -template -static void copyBytesForDefRange(SmallString<20> &BytePrefix, - SymbolKind SymKind, const T &DefRangeHeader) { - BytePrefix.resize(2 + sizeof(T)); - ulittle16_t SymKindLE = ulittle16_t(SymKind); - memcpy(&BytePrefix[0], &SymKindLE, 2); - memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T)); -} - void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var) { // LocalSym record, see SymbolRecord.h for more info. @@ -2692,8 +2654,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, (bool(Flags & LocalSymFlags::IsParameter) ? (EncFP == FI.EncodedParamFramePtrReg) : (EncFP == FI.EncodedLocalFramePtrReg))) { - little32_t FPOffset = little32_t(Offset); - copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset); + DefRangeFramePointerRelHeader DRHdr; + DRHdr.Offset = Offset; + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { uint16_t RegRelFlags = 0; if (DefRange.IsSubfield) { @@ -2701,28 +2664,27 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, (DefRange.StructOffset << DefRangeRegisterRelSym::OffsetInParentShift); } - DefRangeRegisterRelSym::Header DRHdr; + DefRangeRegisterRelHeader DRHdr; DRHdr.Register = Reg; DRHdr.Flags = RegRelFlags; DRHdr.BasePointerOffset = Offset; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); if (DefRange.IsSubfield) { - DefRangeSubfieldRegisterSym::Header DRHdr; + DefRangeSubfieldRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DefRange.StructOffset; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { - DefRangeRegisterSym::Header DRHdr; + DefRangeRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } } - OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix); } } @@ -2896,6 +2858,14 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { CurFn = nullptr; } +// Usable locations are valid with non-zero line numbers. A line number of zero +// corresponds to optimized code that doesn't have a distinct source location. +// In this case, we try to use the previous or next source location depending on +// the context. +static bool isUsableDebugLoc(DebugLoc DL) { + return DL && DL.getLine() != 0; +} + void CodeViewDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); @@ -2907,19 +2877,21 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { // If the first instruction of a new MBB has no location, find the first // instruction with a location and use that. DebugLoc DL = MI->getDebugLoc(); - if (!DL && MI->getParent() != PrevInstBB) { + if (!isUsableDebugLoc(DL) && MI->getParent() != PrevInstBB) { for (const auto &NextMI : *MI->getParent()) { if (NextMI.isDebugInstr()) continue; DL = NextMI.getDebugLoc(); - if (DL) + if (isUsableDebugLoc(DL)) break; } + // FIXME: Handle the case where the BB has no valid locations. This would + // probably require doing a real dataflow analysis. } PrevInstBB = MI->getParent(); // If we still don't have a debug location, don't record a location. - if (!DL) + if (!isUsableDebugLoc(DL)) return; maybeRecordLocation(DL, Asm->MF); @@ -3026,7 +2998,7 @@ void CodeViewDebug::collectGlobalVariableInfo() { auto Insertion = ScopeGlobals.insert( {Scope, std::unique_ptr()}); if (Insertion.second) - Insertion.first->second = llvm::make_unique(); + Insertion.first->second = std::make_unique(); VariableList = Insertion.first->second.get(); } else if (GV->hasComdat()) // Emit this global variable into a COMDAT section. diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index ce57b789d7fa..7ffd77926cf7 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -148,7 +148,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector ChildBlocks; std::vector> Annotations; - std::vector> HeapAllocSites; + std::vector> + HeapAllocSites; const MCSymbol *Begin = nullptr; const MCSymbol *End = nullptr; diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index ddd60575b6c0..7f9d6c618ad3 100644 --- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -41,7 +41,7 @@ using EntryIndex = DbgValueHistoryMap::EntryIndex; static Register isDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue()); assert(MI.getNumOperands() == 4); - // If the location of variable is an entry value (DW_OP_entry_value) + // If the location of variable is an entry value (DW_OP_LLVM_entry_value) // do not consider it as a register location. if (MI.getDebugExpression()->isEntryValue()) return 0; @@ -177,13 +177,13 @@ static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV, IndicesToErase.push_back(Index); Entry.endEntry(NewIndex); } - if (unsigned Reg = isDescribedByReg(DV)) + if (Register Reg = isDescribedByReg(DV)) TrackedRegs[Reg] |= !Overlaps; } // If the new debug value is described by a register, add tracking of // that register if it is not already tracked. - if (unsigned NewReg = isDescribedByReg(DV)) { + if (Register NewReg = isDescribedByReg(DV)) { if (!TrackedRegs.count(NewReg)) addRegDescribedVar(RegVars, NewReg, Var); LiveEntries[Var].insert(NewIndex); @@ -234,7 +234,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, DbgLabelInstrMap &DbgLabels) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - unsigned FrameReg = TRI->getFrameRegister(*MF); + Register FrameReg = TRI->getFrameRegister(*MF); RegDescribedVarsMap RegVars; DbgValueEntriesMap LiveEntries; for (const auto &MBB : *MF) { @@ -275,7 +275,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, continue; // If this is a virtual register, only clobber it since it doesn't // have aliases. - if (TRI->isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries, MI); // If this is a register def operand, it may end a debug value @@ -296,7 +296,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, // Don't consider SP to be clobbered by register masks. for (auto It : RegVars) { unsigned int Reg = It.first; - if (Reg != SP && TRI->isPhysicalRegister(Reg) && + if (Reg != SP && Register::isPhysicalRegister(Reg) && MO.clobbersPhysReg(Reg)) RegsToClobber.push_back(Reg); } diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h index 789291771b5a..0db86b09d19a 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -38,21 +38,18 @@ public: : CU(CU), EntryOffset(EntryOffset) {} }; struct Entry { - const MCSymbol *BeginSym; - const MCSymbol *EndSym; + const MCSymbol *Begin; + const MCSymbol *End; size_t ByteOffset; size_t CommentOffset; - Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset, - size_t CommentOffset) - : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset), - CommentOffset(CommentOffset) {} }; private: SmallVector Lists; SmallVector Entries; SmallString<256> DWARFBytes; - SmallVector Comments; + std::vector Comments; + MCSymbol *Sym; /// Only verbose textual output needs comments. This will be set to /// true for that case, and false otherwise. @@ -63,6 +60,12 @@ public: size_t getNumLists() const { return Lists.size(); } const List &getList(size_t LI) const { return Lists[LI]; } ArrayRef getLists() const { return Lists; } + MCSymbol *getSym() const { + return Sym; + } + void setSym(MCSymbol *Sym) { + this->Sym = Sym; + } class ListBuilder; class EntryBuilder; @@ -93,7 +96,7 @@ private: /// Until the next call, bytes added to the stream will be added to this /// entry. void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) { - Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size()); + Entries.push_back({BeginSym, EndSym, DWARFBytes.size(), Comments.size()}); } /// Finalize a .debug_loc entry, deleting if it's empty. diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9548ad9918c1..a61c98ec1c18 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -208,7 +208,7 @@ void DwarfCompileUnit::addLocationAttribute( if (!Loc) { addToAccelTable = true; Loc = new (DIEValueAllocator) DIELoc; - DwarfExpr = llvm::make_unique(*Asm, *this, *Loc); + DwarfExpr = std::make_unique(*Asm, *this, *Loc); } if (Expr) { @@ -326,14 +326,13 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { // emitted into and the subprogram was contained within. If these are the // same then extend our current range, otherwise add this as a new range. if (CURanges.empty() || !SameAsPrevCU || - (&CURanges.back().getEnd()->getSection() != - &Range.getEnd()->getSection())) { + (&CURanges.back().End->getSection() != + &Range.End->getSection())) { CURanges.push_back(Range); - DD->addSectionLabel(Range.getStart()); return; } - CURanges.back().setEnd(Range.getEnd()); + CURanges.back().End = Range.End; } void DwarfCompileUnit::initStmtList() { @@ -399,7 +398,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { } else { const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - if (RI->isPhysicalRegister(Location.getReg())) + if (Register::isPhysicalRegister(Location.getReg())) addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); } } @@ -468,14 +467,6 @@ void DwarfCompileUnit::constructScopeDIE( void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector Range) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - // Emit the offset into .debug_ranges or .debug_rnglists as a relocatable - // label. emitDIE() will handle emitting it appropriately. - const MCSymbol *RangeSectionSym = - DD->getDwarfVersion() >= 5 - ? TLOF.getDwarfRnglistsSection()->getBeginSymbol() - : TLOF.getDwarfRangesSection()->getBeginSymbol(); HasRangeLists = true; @@ -494,12 +485,17 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, // (DW_RLE_startx_endx etc.). if (DD->getDwarfVersion() >= 5) addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index); - else if (isDwoUnit()) - addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), - RangeSectionSym); - else - addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), - RangeSectionSym); + else { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const MCSymbol *RangeSectionSym = + TLOF.getDwarfRangesSection()->getBeginSymbol(); + if (isDwoUnit()) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + else + addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + } } void DwarfCompileUnit::attachRangesOrLowHighPC( @@ -507,7 +503,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( if (Ranges.size() == 1 || !DD->useRangesSection()) { const RangeSpan &Front = Ranges.front(); const RangeSpan &Back = Ranges.back(); - attachLowHighPC(Die, Front.getStart(), Back.getEnd()); + attachLowHighPC(Die, Front.Begin, Back.End); } else addScopeRangeList(Die, std::move(Ranges)); } @@ -517,8 +513,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( SmallVector List; List.reserve(Ranges.size()); for (const InsnRange &R : Ranges) - List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first), - DD->getLabelAfterInsn(R.second))); + List.push_back( + {DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second)}); attachRangesOrLowHighPC(Die, std::move(List)); } @@ -647,8 +643,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); DwarfExpr.addFragmentOffset(Expr); SmallVector Ops; - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(Offset); + DIExpression::appendOffset(Ops, Offset); // According to // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf // cuda-gdb requires DW_AT_address_class for all variables to be able to @@ -892,32 +887,117 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } -DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, - const DISubprogram &CalleeSP, - bool IsTail, - const MCExpr *PCOffset) { +/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location atom. +static bool useGNUAnalogForDwarf5Feature(DwarfDebug *DD) { + return DD->getDwarfVersion() == 4 && DD->tuneForGDB(); +} + +dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Tag; + switch (Tag) { + case dwarf::DW_TAG_call_site: + return dwarf::DW_TAG_GNU_call_site; + case dwarf::DW_TAG_call_site_parameter: + return dwarf::DW_TAG_GNU_call_site_parameter; + default: + llvm_unreachable("DWARF5 tag with no GNU analog"); + } +} + +dwarf::Attribute +DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Attr; + switch (Attr) { + case dwarf::DW_AT_call_all_calls: + return dwarf::DW_AT_GNU_all_call_sites; + case dwarf::DW_AT_call_target: + return dwarf::DW_AT_GNU_call_site_target; + case dwarf::DW_AT_call_origin: + return dwarf::DW_AT_abstract_origin; + case dwarf::DW_AT_call_pc: + return dwarf::DW_AT_low_pc; + case dwarf::DW_AT_call_value: + return dwarf::DW_AT_GNU_call_site_value; + case dwarf::DW_AT_call_tail_call: + return dwarf::DW_AT_GNU_tail_call; + default: + llvm_unreachable("DWARF5 attribute with no GNU analog"); + } +} + +dwarf::LocationAtom +DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Loc; + switch (Loc) { + case dwarf::DW_OP_entry_value: + return dwarf::DW_OP_GNU_entry_value; + default: + llvm_unreachable("DWARF5 location atom with no GNU analog"); + } +} + +DIE &DwarfCompileUnit::constructCallSiteEntryDIE( + DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail, + const MCSymbol *PCAddr, const MCExpr *PCOffset, unsigned CallReg) { // Insert a call site entry DIE within ScopeDIE. - DIE &CallSiteDIE = - createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr); + DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site), + ScopeDIE, nullptr); - // For the purposes of showing tail call frames in backtraces, a key piece of - // information is DW_AT_call_origin, a pointer to the callee DIE. - DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP); - assert(CalleeDIE && "Could not create DIE for call site entry origin"); - addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE); + if (CallReg) { + // Indirect call. + addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), + MachineLocation(CallReg)); + } else { + DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); + assert(CalleeDIE && "Could not create DIE for call site entry origin"); + addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), + *CalleeDIE); + } - if (IsTail) { + if (IsTail) // Attach DW_AT_call_tail_call to tail calls for standards compliance. - addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call); - } else { - // Attach the return PC to allow the debugger to disambiguate call paths - // from one function to another. + addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call)); + + // Attach the return PC to allow the debugger to disambiguate call paths + // from one function to another. + if (DD->getDwarfVersion() == 4 && DD->tuneForGDB()) { + assert(PCAddr && "Missing PC information for a call"); + addLabelAddress(CallSiteDIE, dwarf::DW_AT_low_pc, PCAddr); + } else if (!IsTail || DD->tuneForGDB()) { assert(PCOffset && "Missing return PC information for a call"); addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset); } + return CallSiteDIE; } +void DwarfCompileUnit::constructCallSiteParmEntryDIEs( + DIE &CallSiteDIE, SmallVector &Params) { + for (const auto &Param : Params) { + unsigned Register = Param.getRegister(); + auto CallSiteDieParam = + DIE::get(DIEValueAllocator, + getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter)); + insertDIE(CallSiteDieParam); + addAddress(*CallSiteDieParam, dwarf::DW_AT_location, + MachineLocation(Register)); + + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + DwarfExpr.setCallSiteParamValueFlag(); + + DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr); + + addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value), + DwarfExpr.finalize()); + + CallSiteDIE.addChild(CallSiteDieParam); + } +} + DIE *DwarfCompileUnit::constructImportedEntityDIE( const DIImportedEntity *Module) { DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); @@ -997,11 +1077,11 @@ void DwarfCompileUnit::createAbstractEntity(const DINode *Node, assert(Scope && Scope->isAbstractScope()); auto &Entity = getAbstractEntities()[Node]; if (isa(Node)) { - Entity = llvm::make_unique( + Entity = std::make_unique( cast(Node), nullptr /* IA */);; DU->addScopeVariable(Scope, cast(Entity.get())); } else if (isa(Node)) { - Entity = llvm::make_unique( + Entity = std::make_unique( cast(Node), nullptr /* IA */); DU->addScopeLabel(Scope, cast(Entity.get())); } @@ -1081,16 +1161,8 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie())); } -/// addVariableAddress - Add DW_AT_location attribute for a -/// DbgVariable based on provided MachineLocation. void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, MachineLocation Location) { - // addBlockByrefAddress is obsolete and will be removed soon. - // The clang frontend always generates block byref variables with a - // complex expression that encodes exactly what addBlockByrefAddress - // would do. - assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) && - "block byref variable without a complex expression"); if (DV.hasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); else @@ -1133,7 +1205,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, if (DIExpr->isEntryValue()) { DwarfExpr.setEntryValueFlag(); - DwarfExpr.addEntryValueExpression(Cursor); + DwarfExpr.beginEntryValueExpression(Cursor); } const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index ea980dfda17e..1b7ea2673ac0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -227,12 +227,35 @@ public: void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + /// This takes a DWARF 5 tag and returns it or a GNU analog. + dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const; + + /// This takes a DWARF 5 attribute and returns it or a GNU analog. + dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const; + + /// This takes a DWARF 5 location atom and either returns it or a GNU analog. + dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const; + /// Construct a call site entry DIE describing a call within \p Scope to a - /// callee described by \p CalleeSP. \p IsTail specifies whether the call is - /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the + /// callee described by \p CalleeSP. + /// \p IsTail specifies whether the call is a tail call. + /// \p PCAddr (used for GDB + DWARF 4 tuning) points to the PC value after + /// the call instruction. + /// \p PCOffset (used for cases other than GDB + DWARF 4 tuning) must be + /// non-zero for non-tail calls (in the case of non-gdb tuning, since for + /// GDB + DWARF 5 tuning we still generate PC info for tail calls) or be the /// function-local offset to PC value after the call instruction. - DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP, - bool IsTail, const MCExpr *PCOffset); + /// \p CallReg is a register location for an indirect call. For direct calls + /// the \p CallReg is set to 0. + DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, + bool IsTail, const MCSymbol *PCAddr, + const MCExpr *PCOffset, unsigned CallReg); + /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params + /// were collected by the \ref collectCallSiteParameters. + /// Note: The order of parameters does not matter, since debuggers recognize + /// call site parameters by the DW_AT_location attribute. + void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE, + SmallVector &Params); /// Construct import_module DIE. DIE *constructImportedEntityDIE(const DIImportedEntity *Module); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 71bb2b0858cc..c505e77e5acd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -39,6 +40,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" @@ -83,6 +85,8 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" +STATISTIC(NumCSParams, "Number of dbg call site params created"); + static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); @@ -166,26 +170,26 @@ static const char *const DbgTimerDescription = "DWARF Debug Writer"; static constexpr unsigned ULEB128PadSize = 4; void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) { - BS.EmitInt8( + getActiveStreamer().EmitInt8( Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) : dwarf::OperationEncodingString(Op)); } void DebugLocDwarfExpression::emitSigned(int64_t Value) { - BS.EmitSLEB128(Value, Twine(Value)); + getActiveStreamer().EmitSLEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) { - BS.EmitULEB128(Value, Twine(Value)); + getActiveStreamer().EmitULEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::emitData1(uint8_t Value) { - BS.EmitInt8(Value, Twine(Value)); + getActiveStreamer().EmitInt8(Value, Twine(Value)); } void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) { assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit"); - BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); + getActiveStreamer().EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); } bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, @@ -194,54 +198,34 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, return false; } -bool DbgVariable::isBlockByrefVariable() const { - assert(getVariable() && "Invalid complex DbgVariable!"); - return getVariable()->getType()->isBlockByrefStruct(); +void DebugLocDwarfExpression::enableTemporaryBuffer() { + assert(!IsBuffering && "Already buffering?"); + if (!TmpBuf) + TmpBuf = std::make_unique(OutBS.GenerateComments); + IsBuffering = true; } -const DIType *DbgVariable::getType() const { - DIType *Ty = getVariable()->getType(); - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (Ty->isBlockByrefStruct()) { - /* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - DIType *subType = Ty; - uint16_t tag = Ty->getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) - subType = cast(Ty)->getBaseType(); - - auto Elements = cast(subType)->getElements(); - for (unsigned i = 0, N = Elements.size(); i < N; ++i) { - auto *DT = cast(Elements[i]); - if (getName() == DT->getName()) - return DT->getBaseType(); - } +void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } + +unsigned DebugLocDwarfExpression::getTemporaryBufferSize() { + return TmpBuf ? TmpBuf->Bytes.size() : 0; +} + +void DebugLocDwarfExpression::commitTemporaryBuffer() { + if (!TmpBuf) + return; + for (auto Byte : enumerate(TmpBuf->Bytes)) { + const char *Comment = (Byte.index() < TmpBuf->Comments.size()) + ? TmpBuf->Comments[Byte.index()].c_str() + : ""; + OutBS.EmitInt8(Byte.value(), Comment); } - return Ty; + TmpBuf->Bytes.clear(); + TmpBuf->Comments.clear(); +} + +const DIType *DbgVariable::getType() const { + return getVariable()->getType(); } /// Get .debug_loc entry for the instruction range starting at MI. @@ -275,7 +259,7 @@ void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) { assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at"); - ValueLoc = llvm::make_unique(getDebugLocValue(DbgValue)); + ValueLoc = std::make_unique(getDebugLocValue(DbgValue)); if (auto *E = DbgValue->getDebugExpression()) if (E->getNumElements()) FrameIndexExprs.push_back({0, E}); @@ -551,6 +535,157 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } +/// Try to interpret values loaded into registers that forward parameters +/// for \p CallMI. Store parameters with interpreted value into \p Params. +static void collectCallSiteParameters(const MachineInstr *CallMI, + ParamSet &Params) { + auto *MF = CallMI->getMF(); + auto CalleesMap = MF->getCallSitesInfo(); + auto CallFwdRegsInfo = CalleesMap.find(CallMI); + + // There is no information for the call instruction. + if (CallFwdRegsInfo == CalleesMap.end()) + return; + + auto *MBB = CallMI->getParent(); + const auto &TRI = MF->getSubtarget().getRegisterInfo(); + const auto &TII = MF->getSubtarget().getInstrInfo(); + const auto &TLI = MF->getSubtarget().getTargetLowering(); + + // Skip the call instruction. + auto I = std::next(CallMI->getReverseIterator()); + + DenseSet ForwardedRegWorklist; + // Add all the forwarding registers into the ForwardedRegWorklist. + for (auto ArgReg : CallFwdRegsInfo->second) { + bool InsertedReg = ForwardedRegWorklist.insert(ArgReg.Reg).second; + assert(InsertedReg && "Single register used to forward two arguments?"); + (void)InsertedReg; + } + + // We erase, from the ForwardedRegWorklist, those forwarding registers for + // which we successfully describe a loaded value (by using + // the describeLoadedValue()). For those remaining arguments in the working + // list, for which we do not describe a loaded value by + // the describeLoadedValue(), we try to generate an entry value expression + // for their call site value desctipion, if the call is within the entry MBB. + // The RegsForEntryValues maps a forwarding register into the register holding + // the entry value. + // TODO: Handle situations when call site parameter value can be described + // as the entry value within basic blocks other then the first one. + bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin(); + DenseMap RegsForEntryValues; + + // If the MI is an instruction defining one or more parameters' forwarding + // registers, add those defines. We can currently only describe forwarded + // registers that are explicitly defined, but keep track of implicit defines + // also to remove those registers from the work list. + auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI, + SmallVectorImpl &Explicit, + SmallVectorImpl &Implicit) { + if (MI.isDebugInstr()) + return; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && + Register::isPhysicalRegister(MO.getReg())) { + for (auto FwdReg : ForwardedRegWorklist) { + if (TRI->regsOverlap(FwdReg, MO.getReg())) { + if (MO.isImplicit()) + Implicit.push_back(FwdReg); + else + Explicit.push_back(FwdReg); + break; + } + } + } + } + }; + + auto finishCallSiteParam = [&](DbgValueLoc DbgLocVal, unsigned Reg) { + unsigned FwdReg = Reg; + if (ShouldTryEmitEntryVals) { + auto EntryValReg = RegsForEntryValues.find(Reg); + if (EntryValReg != RegsForEntryValues.end()) + FwdReg = EntryValReg->second; + } + + DbgCallSiteParam CSParm(FwdReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + }; + + // Search for a loading value in forwaring registers. + for (; I != MBB->rend(); ++I) { + // If the next instruction is a call we can not interpret parameter's + // forwarding registers or we finished the interpretation of all parameters. + if (I->isCall()) + return; + + if (ForwardedRegWorklist.empty()) + return; + + SmallVector ExplicitFwdRegDefs; + SmallVector ImplicitFwdRegDefs; + getForwardingRegsDefinedByMI(*I, ExplicitFwdRegDefs, ImplicitFwdRegDefs); + if (ExplicitFwdRegDefs.empty() && ImplicitFwdRegDefs.empty()) + continue; + + // If the MI clobbers more then one forwarding register we must remove + // all of them from the working list. + for (auto Reg : concat(ExplicitFwdRegDefs, ImplicitFwdRegDefs)) + ForwardedRegWorklist.erase(Reg); + + // The describeLoadedValue() hook currently does not have any information + // about which register it should describe in case of multiple defines, so + // for now we only handle instructions where a forwarded register is (at + // least partially) defined by the instruction's single explicit define. + if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty()) + continue; + unsigned Reg = ExplicitFwdRegDefs[0]; + + if (auto ParamValue = TII->describeLoadedValue(*I)) { + if (ParamValue->first.isImm()) { + int64_t Val = ParamValue->first.getImm(); + DbgValueLoc DbgLocVal(ParamValue->second, Val); + finishCallSiteParam(DbgLocVal, Reg); + } else if (ParamValue->first.isReg()) { + Register RegLoc = ParamValue->first.getReg(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register FP = TRI->getFrameRegister(*MF); + bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); + if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { + DbgValueLoc DbgLocVal(ParamValue->second, + MachineLocation(RegLoc, + /*IsIndirect=*/IsSPorFP)); + finishCallSiteParam(DbgLocVal, Reg); + } else if (ShouldTryEmitEntryVals) { + ForwardedRegWorklist.insert(RegLoc); + RegsForEntryValues[RegLoc] = Reg; + } + } + } + } + + // Emit the call site parameter's value as an entry value. + if (ShouldTryEmitEntryVals) { + // Create an expression where the register's entry value is used. + DIExpression *EntryExpr = DIExpression::get( + MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1}); + for (auto RegEntry : ForwardedRegWorklist) { + unsigned FwdReg = RegEntry; + auto EntryValReg = RegsForEntryValues.find(RegEntry); + if (EntryValReg != RegsForEntryValues.end()) + FwdReg = EntryValReg->second; + + DbgValueLoc DbgLocVal(EntryExpr, MachineLocation(RegEntry)); + DbgCallSiteParam CSParm(FwdReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + } + } +} + void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, DIE &ScopeDIE, const MachineFunction &MF) { @@ -563,10 +698,11 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls // because one of its requirements is not met: call site entries for // optimized-out calls are elided. - CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls); + CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls)); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); assert(TII && "TargetInstrInfo not found: cannot label tail calls"); + bool ApplyGNUExtensions = getDwarfVersion() == 4 && tuneForGDB(); // Emit call site entries for each call or tail call in the function. for (const MachineBasicBlock &MBB : MF) { @@ -581,30 +717,66 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, return; // If this is a direct call, find the callee's subprogram. + // In the case of an indirect call find the register that holds + // the callee. const MachineOperand &CalleeOp = MI.getOperand(0); - if (!CalleeOp.isGlobal()) - continue; - const Function *CalleeDecl = dyn_cast(CalleeOp.getGlobal()); - if (!CalleeDecl || !CalleeDecl->getSubprogram()) + if (!CalleeOp.isGlobal() && !CalleeOp.isReg()) continue; + unsigned CallReg = 0; + const DISubprogram *CalleeSP = nullptr; + const Function *CalleeDecl = nullptr; + if (CalleeOp.isReg()) { + CallReg = CalleeOp.getReg(); + if (!CallReg) + continue; + } else { + CalleeDecl = dyn_cast(CalleeOp.getGlobal()); + if (!CalleeDecl || !CalleeDecl->getSubprogram()) + continue; + CalleeSP = CalleeDecl->getSubprogram(); + } + // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). - // TODO: Add support for indirect calls. bool IsTail = TII->isTailCall(MI); - // For tail calls, no return PC information is needed. For regular calls, - // the return PC is needed to disambiguate paths in the call graph which - // could lead to some target function. + // For tail calls, for non-gdb tuning, no return PC information is needed. + // For regular calls (and tail calls in GDB tuning), the return PC + // is needed to disambiguate paths in the call graph which could lead to + // some target function. const MCExpr *PCOffset = - IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI); + (IsTail && !tuneForGDB()) ? nullptr + : getFunctionLocalOffsetAfterInsn(&MI); + + // Address of a call-like instruction for a normal call or a jump-like + // instruction for a tail call. This is needed for GDB + DWARF 4 tuning. + const MCSymbol *PCAddr = + ApplyGNUExtensions ? const_cast(getLabelAfterInsn(&MI)) + : nullptr; + + assert((IsTail || PCOffset || PCAddr) && + "Call without return PC information"); - assert((IsTail || PCOffset) && "Call without return PC information"); LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> " - << CalleeDecl->getName() << (IsTail ? " [tail]" : "") - << "\n"); - CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(), - IsTail, PCOffset); + << (CalleeDecl ? CalleeDecl->getName() + : StringRef(MF.getSubtarget() + .getRegisterInfo() + ->getName(CallReg))) + << (IsTail ? " [IsTail]" : "") << "\n"); + + DIE &CallSiteDIE = + CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, IsTail, PCAddr, + PCOffset, CallReg); + + // GDB and LLDB support call site parameter debug info. + if (Asm->TM.Options.EnableDebugEntryValues && + (tuneForGDB() || tuneForLLDB())) { + ParamSet Params; + // Try to interpret values of call site parameters. + collectCallSiteParameters(&MI, Params); + CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params); + } } } } @@ -680,7 +852,7 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { CompilationDir = DIUnit->getDirectory(); - auto OwnedUnit = llvm::make_unique( + auto OwnedUnit = std::make_unique( InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); DwarfCompileUnit &NewCU = *OwnedUnit; InfoHolder.addUnit(std::move(OwnedUnit)); @@ -793,8 +965,6 @@ void DwarfDebug::beginModule() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.setRnglistsTableBaseSym( Asm->createTempSymbol("rnglists_table_base")); - Holder.setLoclistsTableBaseSym( - Asm->createTempSymbol("loclists_table_base")); if (useSplitDwarf()) InfoHolder.setRnglistsTableBaseSym( @@ -907,7 +1077,7 @@ void DwarfDebug::finalizeModuleInfo() { // If we're splitting the dwarf out now that we've got the entire // CU then add the dwo id to it. auto *SkCU = TheCU.getSkeleton(); - if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) { + if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) { finishUnitAttributes(TheCU.getCUNode(), TheCU); TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name, Asm->TM.Options.MCOptions.SplitDwarfFile); @@ -951,7 +1121,7 @@ void DwarfDebug::finalizeModuleInfo() { // 2.17.3). U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); else - U.setBaseAddress(TheCU.getRanges().front().getStart()); + U.setBaseAddress(TheCU.getRanges().front().Begin); U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } @@ -959,15 +1129,19 @@ void DwarfDebug::finalizeModuleInfo() { // is a bit pessimistic under LTO. if (!AddrPool.isEmpty() && (getDwarfVersion() >= 5 || - (SkCU && !empty(TheCU.getUnitDie().children())))) + (SkCU && !TheCU.getUnitDie().children().empty()))) U.addAddrTableBase(); if (getDwarfVersion() >= 5) { if (U.hasRangeLists()) U.addRnglistsBase(); - if (!DebugLocs.getLists().empty() && !useSplitDwarf()) - U.addLoclistsBase(); + if (!DebugLocs.getLists().empty() && !useSplitDwarf()) { + DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base")); + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base, + DebugLocs.getSym(), + TLOF.getDwarfLoclistsSection()->getBeginSymbol()); + } } auto *CUNode = cast(P.first); @@ -1105,7 +1279,7 @@ void DwarfDebug::collectVariableInfoFromMFTable( continue; ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode()); - auto RegVar = llvm::make_unique( + auto RegVar = std::make_unique( cast(Var.first), Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); if (DbgVariable *DbgVar = MFVars.lookup(Var)) @@ -1316,13 +1490,13 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode()); if (isa(Node)) { ConcreteEntities.push_back( - llvm::make_unique(cast(Node), + std::make_unique(cast(Node), Location)); InfoHolder.addScopeVariable(&Scope, cast(ConcreteEntities.back().get())); } else if (isa(Node)) { ConcreteEntities.push_back( - llvm::make_unique(cast(Node), + std::make_unique(cast(Node), Location, Sym)); InfoHolder.addScopeLabel(&Scope, cast(ConcreteEntities.back().get())); @@ -1419,11 +1593,14 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, LexicalScope *Scope = nullptr; const DILabel *Label = cast(IL.first); + // The scope could have an extra lexical block file. + const DILocalScope *LocalScope = + Label->getScope()->getNonLexicalBlockFileScope(); // Get inlined DILocation if it is inlined label. if (const DILocation *IA = IL.second) - Scope = LScopes.findInlinedScope(Label->getScope(), IA); + Scope = LScopes.findInlinedScope(LocalScope, IA); else - Scope = LScopes.findLexicalScope(Label->getScope()); + Scope = LScopes.findLexicalScope(LocalScope); // If label scope is not found then skip this label. if (!Scope) continue; @@ -1607,6 +1784,9 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; + SectionLabels.insert(std::make_pair(&Asm->getFunctionBegin()->getSection(), + Asm->getFunctionBegin())); + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function @@ -1654,7 +1834,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { collectEntityInfo(TheCU, SP, Processed); // Add the range of this function to the list of ranges for the CU. - TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); + TheCU.addRange({Asm->getFunctionBegin(), Asm->getFunctionEnd()}); // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram @@ -1836,9 +2016,10 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_enumeration_type: return dwarf::PubIndexEntryDescriptor( - dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus - ? dwarf::GIEL_STATIC - : dwarf::GIEL_EXTERNAL); + dwarf::GIEK_TYPE, + dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage()) + ? dwarf::GIEL_EXTERNAL + : dwarf::GIEL_STATIC); case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_base_type: case dwarf::DW_TAG_subrange_type: @@ -1967,7 +2148,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, DWARFExpression Expr(Data, getDwarfVersion(), PtrSize); using Encoding = DWARFExpression::Operation::Encoding; - uint32_t Offset = 0; + uint64_t Offset = 0; for (auto &Op : Expr) { assert(Op.getCode() != dwarf::DW_OP_const_type && "3 operand ops not yet supported"); @@ -1990,7 +2171,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, if (Comment != End) Comment++; } else { - for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J) + for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J) Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); } Offset = Op.getOperandEndOffset(I); @@ -2020,7 +2201,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, if (DIExpr->isEntryValue()) { DwarfExpr.setEntryValueFlag(); - DwarfExpr.addEntryValueExpression(Cursor); + DwarfExpr.beginEntryValueExpression(Cursor); } const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); @@ -2083,7 +2264,7 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, } // Emit the common part of the DWARF 5 range/locations list tables header. -static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder, +static void emitListsTableHeaderStart(AsmPrinter *Asm, MCSymbol *TableStart, MCSymbol *TableEnd) { // Build the table header, which starts with the length field. @@ -2108,7 +2289,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, const DwarfFile &Holder) { MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); - emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd); + emitListsTableHeaderStart(Asm, TableStart, TableEnd); Asm->OutStreamer->AddComment("Offset entry count"); Asm->emitInt32(Holder.getRangeLists().size()); @@ -2125,94 +2306,147 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, // designates the end of the table for the caller to emit when the table is // complete. static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, - const DwarfFile &Holder) { + const DwarfDebug &DD) { MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start"); MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end"); - emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd); + emitListsTableHeaderStart(Asm, TableStart, TableEnd); + + const auto &DebugLocs = DD.getDebugLocs(); // FIXME: Generate the offsets table and use DW_FORM_loclistx with the // DW_AT_loclists_base attribute. Until then set the number of offsets to 0. Asm->OutStreamer->AddComment("Offset entry count"); Asm->emitInt32(0); - Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym()); + Asm->OutStreamer->EmitLabel(DebugLocs.getSym()); return TableEnd; } -// Emit locations into the .debug_loc/.debug_rnglists section. -void DwarfDebug::emitDebugLoc() { - if (DebugLocs.getLists().empty()) - return; +template +static void emitRangeList( + DwarfDebug &DD, AsmPrinter *Asm, MCSymbol *Sym, const Ranges &R, + const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair, + unsigned StartxLength, unsigned EndOfList, + StringRef (*StringifyEnum)(unsigned), + bool ShouldUseBaseAddress, + PayloadEmitter EmitPayload) { - bool IsLocLists = getDwarfVersion() >= 5; - MCSymbol *TableEnd = nullptr; - if (IsLocLists) { - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfLoclistsSection()); - TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder - : InfoHolder); - } else { - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getDwarfLocSection()); - } + auto Size = Asm->MAI->getCodePointerSize(); + bool UseDwarf5 = DD.getDwarfVersion() >= 5; - unsigned char Size = Asm->MAI->getCodePointerSize(); - for (const auto &List : DebugLocs.getLists()) { - Asm->OutStreamer->EmitLabel(List.Label); + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer->EmitLabel(Sym); - const DwarfCompileUnit *CU = List.CU; - const MCSymbol *Base = CU->getBaseAddress(); - for (const auto &Entry : DebugLocs.getEntries(List)) { + // Gather all the ranges that apply to the same section so they can share + // a base address entry. + MapVector> SectionRanges; + + for (const auto &Range : R) + SectionRanges[&Range.Begin->getSection()].push_back(&Range); + + const MCSymbol *CUBase = CU.getBaseAddress(); + bool BaseIsSet = false; + for (const auto &P : SectionRanges) { + auto *Base = CUBase; + if (!Base && ShouldUseBaseAddress) { + const MCSymbol *Begin = P.second.front()->Begin; + const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection()); + if (!UseDwarf5) { + Base = NewBase; + BaseIsSet = true; + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->AddComment(" base address"); + Asm->OutStreamer->EmitSymbolValue(Base, Size); + } else if (NewBase != Begin || P.second.size() > 1) { + // Only use a base address if + // * the existing pool address doesn't match (NewBase != Begin) + // * or, there's more than one entry to share the base address + Base = NewBase; + BaseIsSet = true; + Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx)); + Asm->emitInt8(BaseAddressx); + Asm->OutStreamer->AddComment(" base address index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); + } + } else if (BaseIsSet && !UseDwarf5) { + BaseIsSet = false; + assert(!Base); + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } + + for (const auto *RS : P.second) { + const MCSymbol *Begin = RS->Begin; + const MCSymbol *End = RS->End; + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); if (Base) { - // Set up the range. This range is relative to the entry point of the - // compile unit. This is a hard coded 0 for low_pc when we're emitting - // ranges, or the DW_AT_low_pc on the compile unit otherwise. - if (IsLocLists) { - Asm->OutStreamer->AddComment("DW_LLE_offset_pair"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1); + if (UseDwarf5) { + // Emit offset_pair when we have a base. + Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair)); + Asm->emitInt8(OffsetPair); Asm->OutStreamer->AddComment(" starting offset"); - Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base); + Asm->EmitLabelDifferenceAsULEB128(Begin, Base); Asm->OutStreamer->AddComment(" ending offset"); - Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base); + Asm->EmitLabelDifferenceAsULEB128(End, Base); } else { - Asm->EmitLabelDifference(Entry.BeginSym, Base, Size); - Asm->EmitLabelDifference(Entry.EndSym, Base, Size); + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); } - - emitDebugLocEntryLocation(Entry, CU); - continue; - } - - // We have no base address. - if (IsLocLists) { - // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or - // DW_LLE_startx_length in case if there is only a single range. - // That should reduce the size of the debug data emited. - // For now just use the DW_LLE_startx_length for all cases. - Asm->OutStreamer->AddComment("DW_LLE_startx_length"); - Asm->emitInt8(dwarf::DW_LLE_startx_length); - Asm->OutStreamer->AddComment(" start idx"); - Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym)); + } else if (UseDwarf5) { + Asm->OutStreamer->AddComment(StringifyEnum(StartxLength)); + Asm->emitInt8(StartxLength); + Asm->OutStreamer->AddComment(" start index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); Asm->OutStreamer->AddComment(" length"); - Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym); + Asm->EmitLabelDifferenceAsULEB128(End, Begin); } else { - Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size); - Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size); + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->EmitSymbolValue(End, Size); } - - emitDebugLocEntryLocation(Entry, CU); + EmitPayload(*RS); } + } - if (IsLocLists) { - // .debug_loclists section ends with DW_LLE_end_of_list. - Asm->OutStreamer->AddComment("DW_LLE_end_of_list"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1); - } else { - // Terminate the .debug_loc list with two 0 values. - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } + if (UseDwarf5) { + Asm->OutStreamer->AddComment(StringifyEnum(EndOfList)); + Asm->emitInt8(EndOfList); + } else { + // Terminate the list with two 0 values. + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); } +} + +static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) { + emitRangeList( + DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU, + dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair, + dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list, + llvm::dwarf::LocListEncodingString, + /* ShouldUseBaseAddress */ true, + [&](const DebugLocStream::Entry &E) { + DD.emitDebugLocEntryLocation(E, List.CU); + }); +} + +// Emit locations into the .debug_loc/.debug_rnglists section. +void DwarfDebug::emitDebugLoc() { + if (DebugLocs.getLists().empty()) + return; + + MCSymbol *TableEnd = nullptr; + if (getDwarfVersion() >= 5) { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLoclistsSection()); + TableEnd = emitLoclistsTableHeader(Asm, *this); + } else { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); + } + + for (const auto &List : DebugLocs.getLists()) + emitLocList(*this, Asm, List); if (TableEnd) Asm->OutStreamer->EmitLabel(TableEnd); @@ -2232,9 +2466,9 @@ void DwarfDebug::emitDebugLocDWO() { // Ideally/in v5, this could use SectionLabels to reuse existing addresses // in the address pool to minimize object size/relocations. Asm->emitInt8(dwarf::DW_LLE_startx_length); - unsigned idx = AddrPool.getIndex(Entry.BeginSym); + unsigned idx = AddrPool.getIndex(Entry.Begin); Asm->EmitULEB128(idx); - Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); + Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4); emitDebugLocEntryLocation(Entry, List.CU); } @@ -2360,7 +2594,7 @@ void DwarfDebug::emitDebugARanges() { // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. unsigned Padding = - OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize); + offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize)); ContentSize += Padding; ContentSize += (List.size() + 1) * TupleSize; @@ -2405,93 +2639,13 @@ void DwarfDebug::emitDebugARanges() { /// Emit a single range list. We handle both DWARF v5 and earlier. static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm, const RangeSpanList &List) { - - auto DwarfVersion = DD.getDwarfVersion(); - // Emit our symbol so we can find the beginning of the range. - Asm->OutStreamer->EmitLabel(List.getSym()); - // Gather all the ranges that apply to the same section so they can share - // a base address entry. - MapVector> SectionRanges; - // Size for our labels. - auto Size = Asm->MAI->getCodePointerSize(); - - for (const RangeSpan &Range : List.getRanges()) - SectionRanges[&Range.getStart()->getSection()].push_back(&Range); - - const DwarfCompileUnit &CU = List.getCU(); - const MCSymbol *CUBase = CU.getBaseAddress(); - bool BaseIsSet = false; - for (const auto &P : SectionRanges) { - // Don't bother with a base address entry if there's only one range in - // this section in this range list - for example ranges for a CU will - // usually consist of single regions from each of many sections - // (-ffunction-sections, or just C++ inline functions) except under LTO - // or optnone where there may be holes in a single CU's section - // contributions. - auto *Base = CUBase; - if (!Base && (P.second.size() > 1 || DwarfVersion < 5) && - (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) { - BaseIsSet = true; - // FIXME/use care: This may not be a useful base address if it's not - // the lowest address/range in this object. - Base = P.second.front()->getStart(); - if (DwarfVersion >= 5) { - Base = DD.getSectionLabel(&Base->getSection()); - Asm->OutStreamer->AddComment("DW_RLE_base_addressx"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1); - Asm->OutStreamer->AddComment(" base address index"); - Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); - } else { - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->AddComment(" base address"); - Asm->OutStreamer->EmitSymbolValue(Base, Size); - } - } else if (BaseIsSet && DwarfVersion < 5) { - BaseIsSet = false; - assert(!Base); - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } - - for (const auto *RS : P.second) { - const MCSymbol *Begin = RS->getStart(); - const MCSymbol *End = RS->getEnd(); - assert(Begin && "Range without a begin symbol?"); - assert(End && "Range without an end symbol?"); - if (Base) { - if (DwarfVersion >= 5) { - // Emit DW_RLE_offset_pair when we have a base. - Asm->OutStreamer->AddComment("DW_RLE_offset_pair"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_offset_pair, 1); - Asm->OutStreamer->AddComment(" starting offset"); - Asm->EmitLabelDifferenceAsULEB128(Begin, Base); - Asm->OutStreamer->AddComment(" ending offset"); - Asm->EmitLabelDifferenceAsULEB128(End, Base); - } else { - Asm->EmitLabelDifference(Begin, Base, Size); - Asm->EmitLabelDifference(End, Base, Size); - } - } else if (DwarfVersion >= 5) { - Asm->OutStreamer->AddComment("DW_RLE_startx_length"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1); - Asm->OutStreamer->AddComment(" start index"); - Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); - Asm->OutStreamer->AddComment(" length"); - Asm->EmitLabelDifferenceAsULEB128(End, Begin); - } else { - Asm->OutStreamer->EmitSymbolValue(Begin, Size); - Asm->OutStreamer->EmitSymbolValue(End, Size); - } - } - } - if (DwarfVersion >= 5) { - Asm->OutStreamer->AddComment("DW_RLE_end_of_list"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_end_of_list, 1); - } else { - // Terminate the list with two 0 values. - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } + emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(), + dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair, + dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list, + llvm::dwarf::RangeListEncodingString, + List.getCU().getCUNode()->getRangesBaseAddress() || + DD.getDwarfVersion() >= 5, + [](auto) {}); } static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm, @@ -2637,7 +2791,7 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { - auto OwnedUnit = llvm::make_unique( + auto OwnedUnit = std::make_unique( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); @@ -2737,7 +2891,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - auto OwnedUnit = llvm::make_unique(CU, Asm, this, &InfoHolder, + auto OwnedUnit = std::make_unique(CU, Asm, this, &InfoHolder, getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); @@ -2879,10 +3033,6 @@ uint16_t DwarfDebug::getDwarfVersion() const { return Asm->OutStreamer->getContext().getDwarfVersion(); } -void DwarfDebug::addSectionLabel(const MCSymbol *Sym) { - SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym)); -} - const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { return SectionLabels.find(S)->second; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 3ac474e2bdda..c8c511f67c2a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -153,7 +153,7 @@ public: assert(!ValueLoc && "Already initialized?"); assert(!Value.getExpression()->isFragment() && "Fragments not supported."); - ValueLoc = llvm::make_unique(Value); + ValueLoc = std::make_unique(Value); if (auto *E = ValueLoc->getExpression()) if (E->getNumElements()) FrameIndexExprs.push_back({0, E}); @@ -216,7 +216,6 @@ public: return !FrameIndexExprs.empty(); } - bool isBlockByrefVariable() const; const DIType *getType() const; static bool classof(const DbgEntity *N) { @@ -254,6 +253,25 @@ public: } }; +/// Used for tracking debug info about call site parameters. +class DbgCallSiteParam { +private: + unsigned Register; ///< Parameter register at the callee entry point. + DbgValueLoc Value; ///< Corresponding location for the parameter value at + ///< the call site. +public: + DbgCallSiteParam(unsigned Reg, DbgValueLoc Val) + : Register(Reg), Value(Val) { + assert(Reg && "Parameter register cannot be undef"); + } + + unsigned getRegister() const { return Register; } + DbgValueLoc getValue() const { return Value; } +}; + +/// Collection used for storing debug call site parameters. +using ParamSet = SmallVector; + /// Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 2858afaa1cf1..1c5a244d7c5d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/ErrorHandling.h" @@ -97,7 +98,7 @@ void DwarfExpression::addAnd(unsigned Mask) { bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg, unsigned MaxSize) { - if (!TRI.isPhysicalRegister(MachineReg)) { + if (!llvm::Register::isPhysicalRegister(MachineReg)) { if (isFrameRegister(TRI, MachineReg)) { DwarfRegs.push_back({-1, 0, nullptr}); return true; @@ -241,15 +242,22 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return false; } - // Handle simple register locations. - if (!isMemoryLocation() && !HasComplexExpression) { + // Handle simple register locations. If we are supposed to emit + // a call site parameter expression and if that expression is just a register + // location, emit it with addBReg and offset 0, because we should emit a DWARF + // expression representing a value, rather than a location. + if (!isMemoryLocation() && !HasComplexExpression && + (!isParameterValue() || isEntryValue())) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); addOpPiece(Reg.Size); } - if (isEntryValue() && DwarfVersion >= 4) + if (isEntryValue()) + finalizeEntryValue(); + + if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4) emitOp(dwarf::DW_OP_stack_value); DwarfRegs.clear(); @@ -275,19 +283,27 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // Pattern-match combinations for which more efficient representations exist. // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) { - SignedOffset = Op->getArg(0); - ExprCursor.take(); + uint64_t Offset = Op->getArg(0); + uint64_t IntMax = static_cast(std::numeric_limits::max()); + if (Offset <= IntMax) { + SignedOffset = Offset; + ExprCursor.take(); + } } // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset] // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset] // If Reg is a subregister we need to mask it out before subtracting. if (Op && Op->getOp() == dwarf::DW_OP_constu) { + uint64_t Offset = Op->getArg(0); + uint64_t IntMax = static_cast(std::numeric_limits::max()); auto N = ExprCursor.peekNext(); - if (N && (N->getOp() == dwarf::DW_OP_plus || - (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { - int Offset = Op->getArg(0); - SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset; + if (N && N->getOp() == dwarf::DW_OP_plus && Offset <= IntMax) { + SignedOffset = Offset; + ExprCursor.consume(2); + } else if (N && N->getOp() == dwarf::DW_OP_minus && + !SubRegisterSizeInBits && Offset <= IntMax + 1) { + SignedOffset = -static_cast(Offset); ExprCursor.consume(2); } } @@ -300,17 +316,34 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return true; } -void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) { +void DwarfExpression::beginEntryValueExpression( + DIExpressionCursor &ExprCursor) { auto Op = ExprCursor.take(); - assert(Op && Op->getOp() == dwarf::DW_OP_entry_value); + (void)Op; + assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value); assert(!isMemoryLocation() && "We don't support entry values of memory locations yet"); + assert(!IsEmittingEntryValue && "Already emitting entry value?"); + assert(Op->getArg(0) == 1 && + "Can currently only emit entry values covering a single operation"); - if (DwarfVersion >= 5) - emitOp(dwarf::DW_OP_entry_value); - else - emitOp(dwarf::DW_OP_GNU_entry_value); - emitUnsigned(Op->getArg(0)); + emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value)); + IsEmittingEntryValue = true; + enableTemporaryBuffer(); +} + +void DwarfExpression::finalizeEntryValue() { + assert(IsEmittingEntryValue && "Entry value not open?"); + disableTemporaryBuffer(); + + // Emit the entry value's size operand. + unsigned Size = getTemporaryBufferSize(); + emitUnsigned(Size); + + // Emit the entry value's DWARF block operand. + commitTemporaryBuffer(); + + IsEmittingEntryValue = false; } /// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?". @@ -340,7 +373,17 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, while (ExprCursor) { auto Op = ExprCursor.take(); - switch (Op->getOp()) { + uint64_t OpNum = Op->getOp(); + + if (OpNum >= dwarf::DW_OP_reg0 && OpNum <= dwarf::DW_OP_reg31) { + emitOp(OpNum); + continue; + } else if (OpNum >= dwarf::DW_OP_breg0 && OpNum <= dwarf::DW_OP_breg31) { + addBReg(OpNum - dwarf::DW_OP_breg0, Op->getArg(0)); + continue; + } + + switch (OpNum) { case dwarf::DW_OP_LLVM_fragment: { unsigned SizeInBits = Op->getArg(1); unsigned FragmentOffset = Op->getArg(0); @@ -389,10 +432,13 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_lit0: case dwarf::DW_OP_not: case dwarf::DW_OP_dup: - emitOp(Op->getOp()); + emitOp(OpNum); break; case dwarf::DW_OP_deref: assert(!isRegisterLocation()); + // For more detailed explanation see llvm.org/PR43343. + assert(!isParameterValue() && "Parameter entry values should not be " + "dereferenced due to safety reasons."); if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor)) // Turning this into a memory location description makes the deref // implicit. @@ -458,12 +504,21 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_LLVM_tag_offset: TagOffset = Op->getArg(0); break; + case dwarf::DW_OP_regx: + emitOp(dwarf::DW_OP_regx); + emitUnsigned(Op->getArg(0)); + break; + case dwarf::DW_OP_bregx: + emitOp(dwarf::DW_OP_bregx); + emitUnsigned(Op->getArg(0)); + emitSigned(Op->getArg(1)); + break; default: llvm_unreachable("unhandled opcode found in expression"); } } - if (isImplicitLocation()) + if (isImplicitLocation() && !isParameterValue()) // Turn this into an implicit location description. addStackValue(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index ec2ef6e575f7..1ad46669f9b2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H +#include "ByteStreamer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -26,7 +27,6 @@ namespace llvm { class AsmPrinter; class APInt; -class ByteStreamer; class DwarfCompileUnit; class DIELoc; class TargetRegisterInfo; @@ -95,6 +95,13 @@ public: /// Base class containing the logic for constructing DWARF expressions /// independently of whether they are emitted into a DIE or into a .debug_loc /// entry. +/// +/// Some DWARF operations, e.g. DW_OP_entry_value, need to calculate the size +/// of a succeeding DWARF block before the latter is emitted to the output. +/// To handle such cases, data can conditionally be emitted to a temporary +/// buffer, which can later on be committed to the main output. The size of the +/// temporary buffer is queryable, allowing for the size of the data to be +/// emitted before the data is committed. class DwarfExpression { protected: /// Holds information about all subregisters comprising a register location. @@ -104,6 +111,9 @@ protected: const char *Comment; }; + /// Whether we are currently emitting an entry value operation. + bool IsEmittingEntryValue = false; + DwarfCompileUnit &CU; /// The register location, if any. @@ -120,7 +130,7 @@ protected: enum { Unknown = 0, Register, Memory, Implicit }; /// The flags of location description being produced. - enum { EntryValue = 1 }; + enum { EntryValue = 1, CallSiteParamValue }; unsigned LocationKind : 3; unsigned LocationFlags : 2; @@ -147,6 +157,10 @@ public: return LocationFlags & EntryValue; } + bool isParameterValue() { + return LocationFlags & CallSiteParamValue; + } + Optional TagOffset; protected: @@ -174,6 +188,22 @@ protected: virtual void emitBaseTypeRef(uint64_t Idx) = 0; + /// Start emitting data to the temporary buffer. The data stored in the + /// temporary buffer can be committed to the main output using + /// commitTemporaryBuffer(). + virtual void enableTemporaryBuffer() = 0; + + /// Disable emission to the temporary buffer. This does not commit data + /// in the temporary buffer to the main output. + virtual void disableTemporaryBuffer() = 0; + + /// Return the emitted size, in number of bytes, for the data stored in the + /// temporary buffer. + virtual unsigned getTemporaryBufferSize() = 0; + + /// Commit the data stored in the temporary buffer to the main output. + virtual void commitTemporaryBuffer() = 0; + /// Emit a normalized unsigned constant. void emitConstu(uint64_t Value); @@ -233,6 +263,10 @@ protected: /// expression. See PR21176 for more details. void addStackValue(); + /// Finalize an entry value by emitting its size operand, and committing the + /// DWARF block which has been emitted to the temporary buffer. + void finalizeEntryValue(); + ~DwarfExpression() = default; public: @@ -264,6 +298,11 @@ public: LocationFlags |= EntryValue; } + /// Lock this down to become a call site parameter location. + void setCallSiteParamValueFlag() { + LocationFlags |= CallSiteParamValue; + } + /// Emit a machine register location. As an optimization this may also consume /// the prefix of a DwarfExpression if a more efficient representation for /// combining the register location and the first operation exists. @@ -278,8 +317,11 @@ public: DIExpressionCursor &Expr, unsigned MachineReg, unsigned FragmentOffsetInBits = 0); - /// Emit entry value dwarf operation. - void addEntryValueExpression(DIExpressionCursor &ExprCursor); + /// Begin emission of an entry value dwarf operation. The entry value's + /// first operand is the size of the DWARF block (its second operand), + /// which needs to be calculated at time of emission, so we don't emit + /// any operands here. + void beginEntryValueExpression(DIExpressionCursor &ExprCursor); /// Emit all remaining operations in the DIExpressionCursor. /// @@ -299,31 +341,62 @@ public: /// DwarfExpression implementation for .debug_loc entries. class DebugLocDwarfExpression final : public DwarfExpression { - ByteStreamer &BS; + + struct TempBuffer { + SmallString<32> Bytes; + std::vector Comments; + BufferByteStreamer BS; + + TempBuffer(bool GenerateComments) : BS(Bytes, Comments, GenerateComments) {} + }; + + std::unique_ptr TmpBuf; + BufferByteStreamer &OutBS; + bool IsBuffering = false; + + /// Return the byte streamer that currently is being emitted to. + ByteStreamer &getActiveStreamer() { return IsBuffering ? TmpBuf->BS : OutBS; } void emitOp(uint8_t Op, const char *Comment = nullptr) override; void emitSigned(int64_t Value) override; void emitUnsigned(uint64_t Value) override; void emitData1(uint8_t Value) override; void emitBaseTypeRef(uint64_t Idx) override; + + void enableTemporaryBuffer() override; + void disableTemporaryBuffer() override; + unsigned getTemporaryBufferSize() override; + void commitTemporaryBuffer() override; + bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; - public: - DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU) - : DwarfExpression(DwarfVersion, CU), BS(BS) {} + DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS, + DwarfCompileUnit &CU) + : DwarfExpression(DwarfVersion, CU), OutBS(BS) {} }; /// DwarfExpression implementation for singular DW_AT_location. class DIEDwarfExpression final : public DwarfExpression { -const AsmPrinter &AP; - DIELoc &DIE; + const AsmPrinter &AP; + DIELoc &OutDIE; + DIELoc TmpDIE; + bool IsBuffering = false; + + /// Return the DIE that currently is being emitted to. + DIELoc &getActiveDIE() { return IsBuffering ? TmpDIE : OutDIE; } void emitOp(uint8_t Op, const char *Comment = nullptr) override; void emitSigned(int64_t Value) override; void emitUnsigned(uint64_t Value) override; void emitData1(uint8_t Value) override; void emitBaseTypeRef(uint64_t Idx) override; + + void enableTemporaryBuffer() override; + void disableTemporaryBuffer() override; + unsigned getTemporaryBufferSize() override; + void commitTemporaryBuffer() override; + bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; public: @@ -331,7 +404,7 @@ public: DIELoc *finalize() { DwarfExpression::finalize(); - return &DIE; + return &OutDIE; } }; diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 244678ce9dc1..35fa51fb24c4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -32,15 +32,9 @@ class LexicalScope; class MCSection; // Data structure to hold a range for range lists. -class RangeSpan { -public: - RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} - const MCSymbol *getStart() const { return Start; } - const MCSymbol *getEnd() const { return End; } - void setEnd(const MCSymbol *E) { End = E; } - -private: - const MCSymbol *Start, *End; +struct RangeSpan { + const MCSymbol *Begin; + const MCSymbol *End; }; class RangeSpanList { @@ -86,10 +80,6 @@ class DwarfFile { /// The table is shared by all units. MCSymbol *RnglistsTableBaseSym = nullptr; - /// DWARF v5: The symbol that designates the base of the locations list table. - /// The table is shared by all units. - MCSymbol *LoclistsTableBaseSym = nullptr; - /// The variables of a lexical scope. struct ScopeVars { /// We need to sort Args by ArgNo and check for duplicates. This could also @@ -167,9 +157,6 @@ public: MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; } void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; } - MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; } - void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; } - /// \returns false if the variable was merged with a previous one. bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 991ab94b50ab..37c68c085792 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -47,31 +47,42 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, - DwarfCompileUnit &CU, - DIELoc &DIE) - : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), - DIE(DIE) {} + DwarfCompileUnit &CU, DIELoc &DIE) + : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {} void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) { - CU.addUInt(DIE, dwarf::DW_FORM_data1, Op); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op); } void DIEDwarfExpression::emitSigned(int64_t Value) { - CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); + CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value); } void DIEDwarfExpression::emitUnsigned(uint64_t Value) { - CU.addUInt(DIE, dwarf::DW_FORM_udata, Value); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value); } void DIEDwarfExpression::emitData1(uint8_t Value) { - CU.addUInt(DIE, dwarf::DW_FORM_data1, Value); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value); } void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) { - CU.addBaseTypeRef(DIE, Idx); + CU.addBaseTypeRef(getActiveDIE(), Idx); } +void DIEDwarfExpression::enableTemporaryBuffer() { + assert(!IsBuffering && "Already buffering?"); + IsBuffering = true; +} + +void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } + +unsigned DIEDwarfExpression::getTemporaryBufferSize() { + return TmpDIE.ComputeSize(&AP); +} + +void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); } + bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) { return MachineReg == TRI.getFrameRegister(*AP.MF); @@ -205,6 +216,10 @@ void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) { MDNodeToDieMap.insert(std::make_pair(Desc, D)); } +void DwarfUnit::insertDIE(DIE *D) { + MDNodeToDieMap.insert(std::make_pair(nullptr, D)); +} + void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present, @@ -718,7 +733,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { return ""; // FIXME: Decide whether to implement this for non-C++ languages. - if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage())) return ""; std::string CS; @@ -942,6 +957,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (CTy->isAppleBlockExtension()) addFlag(Buffer, dwarf::DW_AT_APPLE_block); + if (CTy->getExportSymbols()) + addFlag(Buffer, dwarf::DW_AT_export_symbols); + // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type // inside C++ composite types to point to the base class with the vtable. // Rust uses DW_AT_containing_type to link a vtable to the type @@ -1696,15 +1714,6 @@ void DwarfUnit::addRnglistsBase() { TLOF.getDwarfRnglistsSection()->getBeginSymbol()); } -void DwarfUnit::addLoclistsBase() { - assert(DD->getDwarfVersion() >= 5 && - "DW_AT_loclists_base requires DWARF version 5 or later"); - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base, - DU->getLoclistsTableBaseSym(), - TLOF.getDwarfLoclistsSection()->getBeginSymbol()); -} - void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { addFlag(D, dwarf::DW_AT_declaration); StringRef Name = CTy->getName(); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 56c934a35ae8..46c52a1faf4b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -127,6 +127,8 @@ public: /// the mappings are kept in DwarfDebug. void insertDIE(const DINode *Desc, DIE *D); + void insertDIE(DIE *D); + /// Add a flag that is true to the DIE. void addFlag(DIE &Die, dwarf::Attribute Attribute); @@ -214,15 +216,6 @@ public: /// Add thrown types. void addThrownTypes(DIE &Die, DINodeArray ThrownTypes); - // FIXME: Should be reformulated in terms of addComplexAddress. - /// Start with the address based on the location provided, and generate the - /// DWARF information necessary to find the actual Block variable (navigating - /// the Block struct) based on the starting location. Add the DWARF - /// information to the die. Obsolete, please use addComplexAddress instead. - void addBlockByrefAddress(const DbgVariable &DV, DIE &Die, - dwarf::Attribute Attribute, - const MachineLocation &Location); - /// Add a new type attribute to the specified entity. /// /// This takes and attribute parameter because DW_AT_friend attributes are @@ -279,9 +272,6 @@ public: /// Add the DW_AT_rnglists_base attribute to the unit DIE. void addRnglistsBase(); - /// Add the DW_AT_loclists_base attribute to the unit DIE. - void addLoclistsBase(); - virtual DwarfCompileUnit &getCU() = 0; void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 99e3687b36b8..31dfaaac836e 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -426,7 +426,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { // EHABI). In this case LSDASection will be NULL. if (LSDASection) Asm->OutStreamer->SwitchSection(LSDASection); - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); // Emit the LSDA. MCSymbol *GCCETSym = @@ -602,11 +602,11 @@ MCSymbol *EHStreamer::emitExceptionTable() { } if (HaveTTData) { - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); emitTypeInfos(TTypeEncoding, TTBaseLabel); } - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); return GCCETSym; } diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 39392b79e960..3849644d1584 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -72,7 +72,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, **/ // Align to address width. - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); // Emit PointCount. OS.AddComment("safe point count"); diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 3145cc90dc73..b4eda5fa8c58 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, report_fatal_error(" Too much descriptor for ocaml GC"); } AP.emitInt16(NumDescriptors); - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), IE = Info.funcinfo_end(); @@ -180,7 +180,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.emitInt16(K->StackOffset); } - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); } } } diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 155e91ce61a1..0398675577cd 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -982,8 +982,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { OS.EmitValueToAlignment(4); OS.EmitLabel(LSDALabel); - const Function *Per = - dyn_cast(F.getPersonalityFn()->stripPointerCasts()); + const auto *Per = cast(F.getPersonalityFn()->stripPointerCasts()); StringRef PerName = Per->getName(); int BaseState = -1; if (PerName == "_except_handler4") { diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index dc7eaf6a5fe7..27b298dcf6af 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -382,7 +382,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); - NewLI->setAlignment(LI->getAlignment()); + NewLI->setAlignment(MaybeAlign(LI->getAlignment())); NewLI->setVolatile(LI->isVolatile()); NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); @@ -469,7 +469,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); - NewSI->setAlignment(SI->getAlignment()); + NewSI->setAlignment(MaybeAlign(SI->getAlignment())); NewSI->setVolatile(SI->isVolatile()); NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); @@ -1376,7 +1376,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( Builder.SetInsertPoint(BB); LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); // Atomics require at least natural alignment. - InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8); + InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8)); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1711,7 +1711,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'expected' argument, if present. if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); - AllocaCASExpected->setAlignment(AllocaAlignment); + AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment)); unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); AllocaCASExpected_i8 = @@ -1730,7 +1730,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( Args.push_back(IntValue); } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); - AllocaValue->setAlignment(AllocaAlignment); + AllocaValue->setAlignment(MaybeAlign(AllocaAlignment)); AllocaValue_i8 = Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); @@ -1742,7 +1742,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'ret' argument. if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); - AllocaResult->setAlignment(AllocaAlignment); + AllocaResult->setAlignment(MaybeAlign(AllocaAlignment)); unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index fb54b5d6c8d8..455916eeb82f 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -129,9 +129,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { getAnalysis()); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, getAnalysis()); - return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), - MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable()); + auto *MMIWP = getAnalysisIfAvailable(); + return Folder.OptimizeFunction( + MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, @@ -161,6 +162,11 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Avoid matching if this pointer gets reused. TriedMerging.erase(MBB); + // Update call site info. + std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) { + if (MI.isCall(MachineInstr::IgnoreBundle)) + MF->eraseCallSiteInfo(&MI); + }); // Remove the block. MF->erase(MBB); EHScopeMembership.erase(MBB); @@ -1306,6 +1312,8 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { /// result in infinite loops. static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2) { + assert(MBB1 && MBB2 && "Unknown MachineBasicBlock"); + // Right now, we use a simple heuristic. If MBB2 ends with a call, and // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block @@ -1843,7 +1851,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, template static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, Container &Set) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Set.insert(*AI); } else { @@ -1871,7 +1879,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (const MachineOperand &MO : Loc->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { @@ -1909,7 +1917,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, return Loc; if (!MO.isReg() || MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (Uses.count(Reg)) { @@ -1937,14 +1945,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (const MachineOperand &MO : PI->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { addRegAndItsAliases(Reg, TRI, Uses); } else { if (Uses.erase(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Uses.erase(*SubRegs); // Use sub-registers to be conservative } @@ -2010,7 +2018,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -2060,13 +2068,13 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (!AllDefsSet.count(Reg)) { continue; } - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) ActiveDefsSet.erase(*AI); } else { @@ -2078,8 +2086,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Reg || Register::isVirtualRegister(Reg)) continue; addRegAndItsAliases(Reg, TRI, ActiveDefsSet); addRegAndItsAliases(Reg, TRI, AllDefsSet); diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index 3ad6266d4f35..6efdc9efa968 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -64,19 +64,18 @@ class BranchRelaxation : public MachineFunctionPass { /// Compute the offset immediately following this block. \p MBB is the next /// block. unsigned postOffset(const MachineBasicBlock &MBB) const { - unsigned PO = Offset + Size; - unsigned Align = MBB.getAlignment(); - if (Align == 0) + const unsigned PO = Offset + Size; + const Align Alignment = MBB.getAlignment(); + if (Alignment == 1) return PO; - unsigned AlignAmt = 1 << Align; - unsigned ParentAlign = MBB.getParent()->getAlignment(); - if (Align <= ParentAlign) - return PO + OffsetToAlignment(PO, AlignAmt); + const Align ParentAlign = MBB.getParent()->getAlignment(); + if (Alignment <= ParentAlign) + return PO + offsetToAlignment(PO, Alignment); // The alignment of this MBB is larger than the function's alignment, so we // can't tell whether or not it will insert nops. Assume that it will. - return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt); + return PO + Alignment.value() + offsetToAlignment(PO, Alignment); } }; @@ -128,9 +127,8 @@ void BranchRelaxation::verify() { #ifndef NDEBUG unsigned PrevNum = MF->begin()->getNumber(); for (MachineBasicBlock &MBB : *MF) { - unsigned Align = MBB.getAlignment(); - unsigned Num = MBB.getNumber(); - assert(BlockInfo[Num].Offset % (1u << Align) == 0); + const unsigned Num = MBB.getNumber(); + assert(isAligned(MBB.getAlignment(), BlockInfo[Num].Offset)); assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset); assert(BlockInfo[Num].Size == computeBlockSize(MBB)); PrevNum = Num; @@ -143,7 +141,7 @@ void BranchRelaxation::verify() { LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() { for (auto &MBB : *MF) { const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; - dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) + dbgs() << format("%%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) << format("size=%#x\n", BBI.Size); } } diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp index cc4b2caa9bed..709164e5f178 100644 --- a/lib/CodeGen/BreakFalseDeps.cpp +++ b/lib/CodeGen/BreakFalseDeps.cpp @@ -9,12 +9,11 @@ /// \file Break False Dependency pass. /// /// Some instructions have false dependencies which cause unnecessary stalls. -/// For exmaple, instructions that only write part of a register, and implicitly -/// need to read the other parts of the register. This may cause unwanted +/// For example, instructions may write part of a register and implicitly +/// need to read the other parts of the register. This may cause unwanted /// stalls preventing otherwise unrelated instructions from executing in /// parallel in an out-of-order CPU. -/// This pass is aimed at identifying and avoiding these depepndencies when -/// possible. +/// This pass is aimed at identifying and avoiding these dependencies. // //===----------------------------------------------------------------------===// @@ -24,6 +23,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -109,7 +109,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, MachineOperand &MO = MI->getOperand(OpIdx); assert(MO.isUndef() && "Expected undef machine operand"); - unsigned OriginalReg = MO.getReg(); + Register OriginalReg = MO.getReg(); // Update only undef operands that have reg units that are mapped to one root. for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { @@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { - unsigned reg = MI->getOperand(OpIdx).getReg(); + Register reg = MI->getOperand(OpIdx).getReg(); unsigned Clearance = RDA->getClearance(MI, reg); LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); @@ -178,6 +178,7 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) { assert(!MI->isDebugInstr() && "Won't process debug values"); // Break dependence on undef uses. Do this before updating LiveRegs below. + // This can remove a false dependence with no additional instructions. unsigned OpNum; unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); if (Pref) { @@ -189,6 +190,11 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) { UndefReads.push_back(std::make_pair(MI, OpNum)); } + // The code below allows the target to create a new instruction to break the + // dependence. That opposes the goal of minimizing size, so bail out now. + if (MF->getFunction().hasMinSize()) + return; + const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -209,6 +215,11 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { if (UndefReads.empty()) return; + // The code below allows the target to create a new instruction to break the + // dependence. That opposes the goal of minimizing size, so bail out now. + if (MF->getFunction().hasMinSize()) + return; + // Collect this block's live out register units. LiveRegSet.init(*TRI); // We do not need to care about pristine registers as they are just preserved diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 7164fdfb7886..bf97aaee3665 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -40,7 +40,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineRegisterInfo &MRI = MF.getRegInfo(); VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) continue; VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); @@ -48,10 +48,11 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, } // Return the preferred allocation register for reg, given a COPY instruction. -static unsigned copyHint(const MachineInstr *mi, unsigned reg, +static Register copyHint(const MachineInstr *mi, unsigned reg, const TargetRegisterInfo &tri, const MachineRegisterInfo &mri) { - unsigned sub, hreg, hsub; + unsigned sub, hsub; + Register hreg; if (mi->getOperand(0).getReg() == reg) { sub = mi->getOperand(0).getSubReg(); hreg = mi->getOperand(1).getReg(); @@ -65,11 +66,11 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, if (!hreg) return 0; - if (TargetRegisterInfo::isVirtualRegister(hreg)) - return sub == hsub ? hreg : 0; + if (Register::isVirtualRegister(hreg)) + return sub == hsub ? hreg : Register(); const TargetRegisterClass *rc = mri.getRegClass(reg); - unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); + Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); if (rc->contains(CopiedPReg)) return CopiedPReg; @@ -112,7 +113,7 @@ static bool isRematerializable(const LiveInterval &LI, // If the original (pre-splitting) registers match this // copy came from a split. - if (!TargetRegisterInfo::isVirtualRegister(Reg) || + if (!Register::isVirtualRegister(Reg) || VRM->getOriginal(Reg) != Original) return false; @@ -243,7 +244,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // Get allocation hints from copies. if (!mi->isCopy()) continue; - unsigned hint = copyHint(mi, li.reg, tri, mri); + Register hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; // Force hweight onto the stack so that x86 doesn't add hidden precision, @@ -251,8 +252,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint)) - CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint))); + if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint)) + CopyHints.insert( + CopyHint(hint, hweight, Register::isPhysicalRegister(hint))); } Hint.clear(); diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 497fcb147849..a397039180a4 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -32,7 +32,6 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) { // No stack is used. StackOffset = 0; - MaxStackArgAlign = 1; clearByValRegsInfo(); UsedRegs.resize((TRI.getNumRegs()+31)/32); @@ -41,20 +40,21 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, /// Allocate space on the stack large enough to pass an argument by value. /// The size and alignment information of the argument is encoded in /// its parameter attribute. -void CCState::HandleByVal(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, - ISD::ArgFlagsTy ArgFlags) { - unsigned Align = ArgFlags.getByValAlign(); +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, int MinSize, + int MinAlignment, ISD::ArgFlagsTy ArgFlags) { + Align MinAlign(MinAlignment); + Align Alignment(ArgFlags.getByValAlign()); unsigned Size = ArgFlags.getByValSize(); if (MinSize > (int)Size) Size = MinSize; - if (MinAlign > (int)Align) - Align = MinAlign; - ensureMaxAlignment(Align); - MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align); + if (MinAlign > Alignment) + Alignment = MinAlign; + ensureMaxAlignment(Alignment); + MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, + Alignment.value()); Size = unsigned(alignTo(Size, MinAlign)); - unsigned Offset = AllocateStack(Size, Align); + unsigned Offset = AllocateStack(Size, Alignment.value()); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } @@ -90,13 +90,8 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Formal argument #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString() << '\n'; -#endif - llvm_unreachable(nullptr); - } + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) + report_fatal_error("unable to allocate function argument #" + Twine(i)); } } @@ -122,13 +117,8 @@ void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Return operand #" << i << " has unhandled type " - << EVT(VT).getEVTString() << '\n'; -#endif - llvm_unreachable(nullptr); - } + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + report_fatal_error("unable to allocate function return #" + Twine(i)); } } @@ -209,7 +199,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs, MVT VT, CCAssignFn Fn) { unsigned SavedStackOffset = StackOffset; - unsigned SavedMaxStackArgAlign = MaxStackArgAlign; + Align SavedMaxStackArgAlign = MaxStackArgAlign; unsigned NumLocs = Locs.size(); // Set the 'inreg' flag if it is used for this calling convention. diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index c37ed57781d4..ad9525f927e8 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -28,6 +28,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); + initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); initializeExpandMemCmpPassPass(Registry); @@ -53,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLocalStackSlotPassPass(Registry); initializeLowerIntrinsicsPass(Registry); initializeMIRCanonicalizerPass(Registry); + initializeMIRNamerPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); @@ -63,10 +65,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineFunctionPrinterPassPass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); - initializeMachineModuleInfoPass(Registry); + initializeMachineModuleInfoWrapperPassPass(Registry); initializeMachineOptimizationRemarkEmitterPassPass(Registry); initializeMachineOutlinerPass(Registry); initializeMachinePipelinerPass(Registry); + initializeModuloScheduleTestPass(Registry); initializeMachinePostDominatorTreePass(Registry); initializeMachineRegionInfoPassPass(Registry); initializeMachineSchedulerPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 52b4bbea012b..fa4432ea23ec 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -344,7 +344,7 @@ class TypePromotionTransaction; // Get the DominatorTree, building if necessary. DominatorTree &getDT(Function &F) { if (!DT) - DT = llvm::make_unique(F); + DT = std::make_unique(F); return *DT; } @@ -424,7 +424,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = SubtargetInfo->getTargetLowering(); TRI = SubtargetInfo->getRegisterInfo(); } - TLInfo = &getAnalysis().getTLI(); + TLInfo = &getAnalysis().getTLI(F); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); @@ -1524,7 +1524,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL) { BasicBlock *UserBB = User->getParent(); DenseMap InsertedTruncs; - TruncInst *TruncI = dyn_cast(User); + auto *TruncI = cast(User); bool MadeChange = false; for (Value::user_iterator TruncUI = TruncI->user_begin(), @@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, TheUse = InsertedShift; } - // If we removed all uses, nuke the shift. + // If we removed all uses, or there are none, nuke the shift. if (ShiftI->use_empty()) { salvageDebugInfo(*ShiftI); ShiftI->eraseFromParent(); + MadeChange = true; } return MadeChange; @@ -1811,7 +1812,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { AllocaInst *AI; if ((AI = dyn_cast(Val)) && AI->getAlignment() < PrefAlign && DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) - AI->setAlignment(PrefAlign); + AI->setAlignment(MaybeAlign(PrefAlign)); // Global variables can only be aligned if they are defined in this // object (i.e. they are uniquely initialized in this object), and // over-aligning global variables that have an explicit section is @@ -1821,7 +1822,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { GV->getPointerAlignment(*DL) < PrefAlign && DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) - GV->setAlignment(PrefAlign); + GV->setAlignment(MaybeAlign(PrefAlign)); } // If this is a memcpy (or similar) then we may be able to improve the // alignment @@ -1867,24 +1868,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { }); return true; } - case Intrinsic::objectsize: { - // Lower all uses of llvm.objectsize.* - Value *RetVal = - lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); - - resetIteratorIfInvalidatedWhileCalling(BB, [&]() { - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); - }); - return true; - } - case Intrinsic::is_constant: { - // If is_constant hasn't folded away yet, lower it to false now. - Constant *RetVal = ConstantInt::get(II->getType(), 0); - resetIteratorIfInvalidatedWhileCalling(BB, [&]() { - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); - }); - return true; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { ZExtInst *ExtVal = dyn_cast(CI->getArgOperand(0)); @@ -2024,17 +2011,18 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. const Function *F = BB->getParent(); - SmallVector TailCalls; + SmallVector TailCallBBs; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { // Look through bitcasts. Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts(); CallInst *CI = dyn_cast(IncomingVal); + BasicBlock *PredBB = PN->getIncomingBlock(I); // Make sure the phi value is indeed produced by the tail call. - if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + if (CI && CI->hasOneUse() && CI->getParent() == PredBB && TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCalls.push_back(CI); + TailCallBBs.push_back(PredBB); } } else { SmallPtrSet VisitedBBs; @@ -2052,24 +2040,20 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT CallInst *CI = dyn_cast(&*RI); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCalls.push_back(CI); + TailCallBBs.push_back(*PI); } } bool Changed = false; - for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { - CallInst *CI = TailCalls[i]; - CallSite CS(CI); - + for (auto const &TailCallBB : TailCallBBs) { // Make sure the call instruction is followed by an unconditional branch to // the return block. - BasicBlock *CallBB = CI->getParent(); - BranchInst *BI = dyn_cast(CallBB->getTerminator()); + BranchInst *BI = dyn_cast(TailCallBB->getTerminator()); if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) continue; - // Duplicate the return into CallBB. - (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB); + // Duplicate the return into TailCallBB. + (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB); ModifiedDT = Changed = true; ++NumRetsDup; } @@ -2683,26 +2667,26 @@ private: void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, Value *NewVal) { - Actions.push_back(llvm::make_unique( + Actions.push_back(std::make_unique( Inst, Idx, NewVal)); } void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { Actions.push_back( - llvm::make_unique( + std::make_unique( Inst, RemovedInsts, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, Value *New) { Actions.push_back( - llvm::make_unique(Inst, New)); + std::make_unique(Inst, New)); } void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { Actions.push_back( - llvm::make_unique(Inst, NewTy)); + std::make_unique(Inst, NewTy)); } Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, @@ -2732,7 +2716,7 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst, void TypePromotionTransaction::moveBefore(Instruction *Inst, Instruction *Before) { Actions.push_back( - llvm::make_unique( + std::make_unique( Inst, Before)); } @@ -3048,7 +3032,7 @@ public: To = dyn_cast(OldReplacement); OldReplacement = Get(From); } - assert(Get(To) == To && "Replacement PHI node is already replaced."); + assert(To && Get(To) == To && "Replacement PHI node is already replaced."); Put(From, To); From->replaceAllUsesWith(To); AllPhiNodes.erase(From); @@ -3334,7 +3318,7 @@ private: // So the values are different and does not match. So we need them to // match. (But we register no more than one match per PHI node, so that // we won't later try to replace them twice.) - if (!MatchedPHIs.insert(FirstPhi).second) + if (MatchedPHIs.insert(FirstPhi).second) Matcher.insert({ FirstPhi, SecondPhi }); // But me must check it. WorkList.push_back({ FirstPhi, SecondPhi }); @@ -3412,11 +3396,10 @@ private: Select->setFalseValue(ST.Get(Map[FalseValue])); } else { // Must be a Phi node then. - PHINode *PHI = cast(V); - auto *CurrentPhi = dyn_cast(Current); + auto *PHI = cast(V); // Fill the Phi node with values from predecessors. for (auto B : predecessors(PHI->getParent())) { - Value *PV = CurrentPhi->getIncomingValueForBlock(B); + Value *PV = cast(Current)->getIncomingValueForBlock(B); assert(Map.find(PV) != Map.end() && "No predecessor Value!"); PHI->addIncoming(ST.Get(Map[PV]), B); } @@ -3785,13 +3768,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // poisoned value regular value // It should be OK since undef covers valid value. if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { - const Instruction *ExtInst = - dyn_cast(*Inst->user_begin()); + const auto *ExtInst = cast(*Inst->user_begin()); if (ExtInst->hasOneUse()) { - const Instruction *AndInst = - dyn_cast(*ExtInst->user_begin()); + const auto *AndInst = dyn_cast(*ExtInst->user_begin()); if (AndInst && AndInst->getOpcode() == Instruction::And) { - const ConstantInt *Cst = dyn_cast(AndInst->getOperand(1)); + const auto *Cst = dyn_cast(AndInst->getOperand(1)); if (Cst && Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) return true; @@ -4793,8 +4774,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, << " for " << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); - } else if (AddrSinkUsingGEPs || - (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) { + } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && + TM && SubtargetInfo->addrSinkUsingGEPs())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode @@ -5816,7 +5797,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { return false; IRBuilder<> Builder(Load->getNextNode()); - auto *NewAnd = dyn_cast( + auto *NewAnd = cast( Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); // Mark this instruction as "inserted by CGP", so that other // optimizations don't touch it. @@ -6193,35 +6174,49 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { // OpsToSink can contain multiple uses in a use chain (e.g. // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating - // uses must come first, which means they are sunk first, temporarily creating - // invalid IR. This will be fixed once their dominated users are sunk and - // updated. + // uses must come first, so we process the ops in reverse order so as to not + // create invalid IR. BasicBlock *TargetBB = I->getParent(); bool Changed = false; SmallVector ToReplace; - for (Use *U : OpsToSink) { + for (Use *U : reverse(OpsToSink)) { auto *UI = cast(U->get()); if (UI->getParent() == TargetBB || isa(UI)) continue; ToReplace.push_back(U); } - SmallPtrSet MaybeDead; + SetVector MaybeDead; + DenseMap NewInstructions; + Instruction *InsertPoint = I; for (Use *U : ToReplace) { auto *UI = cast(U->get()); Instruction *NI = UI->clone(); + NewInstructions[UI] = NI; MaybeDead.insert(UI); LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); - NI->insertBefore(I); + NI->insertBefore(InsertPoint); + InsertPoint = NI; InsertedInsts.insert(NI); - U->set(NI); + + // Update the use for the new instruction, making sure that we update the + // sunk instruction uses, if it is part of a chain that has already been + // sunk. + Instruction *OldI = cast(U->getUser()); + if (NewInstructions.count(OldI)) + NewInstructions[OldI]->setOperand(U->getOperandNo(), NI); + else + U->set(NI); Changed = true; } // Remove instructions that are dead after sinking. - for (auto *I : MaybeDead) - if (!I->hasNUsesOrMore(1)) + for (auto *I : MaybeDead) { + if (!I->hasNUsesOrMore(1)) { + LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n"); I->eraseFromParent(); + } + } return Changed; } @@ -7106,7 +7101,6 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { for (auto &I : reverse(BB)) { if (makeBitReverse(I, *DL, *TLI)) { MadeBitReverse = MadeChange = true; - ModifiedDT = true; break; } } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 4144c243a341..702e7e244bce 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; const TargetRegisterClass *NewRC = nullptr; @@ -272,7 +272,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isDef()) continue; @@ -303,7 +303,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isUse()) continue; @@ -457,6 +457,7 @@ BreakAntiDependencies(const std::vector &SUnits, if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } + assert(Max && "Failed to find bottom of the critical path"); #ifndef NDEBUG { @@ -612,7 +613,7 @@ BreakAntiDependencies(const std::vector &SUnits, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index b99be5d7a87c..a169c3cb16b2 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,6 +23,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -71,39 +73,13 @@ static DFAInput getDFAInsnInput(const std::vector &InsnClass) { // -------------------------------------------------------------------- -DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, - const DFAStateInput (*SIT)[2], - const unsigned *SET): - InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { - // Make sure DFA types are large enough for the number of terms & resources. - static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= - (8 * sizeof(DFAInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); - static_assert( - (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); -} - -// Read the DFA transition table and update CachedTable. -// -// Format of the transition tables: -// DFAStateInputTable[][2] = pairs of for all valid -// transitions -// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable -// for the ith state -// -void DFAPacketizer::ReadTable(unsigned int state) { - unsigned ThisState = DFAStateEntryTable[state]; - unsigned NextStateInTable = DFAStateEntryTable[state+1]; - // Early exit in case CachedTable has already contains this - // state's transitions. - if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0]))) - return; - - for (unsigned i = ThisState; i < NextStateInTable; i++) - CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = - DFAStateInputTable[i][1]; -} +// Make sure DFA types are large enough for the number of terms & resources. +static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= + (8 * sizeof(DFAInput)), + "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); +static_assert( + (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), + "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); // Return the DFAInput for an instruction class. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { @@ -129,9 +105,7 @@ DFAInput DFAPacketizer::getInsnInput(const std::vector &InsnClass) { bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); - UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); - ReadTable(CurrentState); - return CachedTable.count(StateTrans) != 0; + return A.canAdd(InsnInput); } // Reserve the resources occupied by a MCInstrDesc and change the current @@ -139,10 +113,7 @@ bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); - UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); - ReadTable(CurrentState); - assert(CachedTable.count(StateTrans) != 0); - CurrentState = CachedTable[StateTrans]; + A.add(InsnInput); } // Check if the resources occupied by a machine instruction are available @@ -159,19 +130,33 @@ void DFAPacketizer::reserveResources(MachineInstr &MI) { reserveResources(&MID); } +unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) { + ArrayRef NfaPaths = A.getNfaPaths(); + assert(!NfaPaths.empty() && "Invalid bundle!"); + const NfaPath &RS = NfaPaths.front(); + + // RS stores the cumulative resources used up to and including the I'th + // instruction. The 0th instruction is the base case. + if (InstIdx == 0) + return RS[0]; + // Return the difference between the cumulative resources used by InstIdx and + // its predecessor. + return RS[InstIdx] ^ RS[InstIdx - 1]; +} + namespace llvm { // This class extends ScheduleDAGInstrs and overrides the schedule method // to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { private: - AliasAnalysis *AA; + AAResults *AA; /// Ordered list of DAG postprocessing steps. std::vector> Mutations; public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA); + AAResults *AA); // Actual scheduling work. void schedule() override; @@ -189,7 +174,7 @@ protected: DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA) + AAResults *AA) : ScheduleDAGInstrs(MF, &MLI), AA(AA) { CanHandleTerminators = true; } @@ -207,9 +192,10 @@ void DefaultVLIWScheduler::schedule() { } VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, - MachineLoopInfo &mli, AliasAnalysis *aa) + MachineLoopInfo &mli, AAResults *aa) : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); + ResourceTracker->setTrackResources(true); VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); } @@ -224,8 +210,11 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, LLVM_DEBUG({ if (!CurrentPacketMIs.empty()) { dbgs() << "Finalizing packet:\n"; - for (MachineInstr *MI : CurrentPacketMIs) - dbgs() << " * " << *MI; + unsigned Idx = 0; + for (MachineInstr *MI : CurrentPacketMIs) { + unsigned R = ResourceTracker->getUsedResources(Idx++); + dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI; + } } }); if (CurrentPacketMIs.size() > 1) { diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 049ce7063307..9a537c859a67 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -75,8 +75,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; @@ -140,8 +140,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. @@ -159,8 +159,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) LivePhysRegs.set(*AI); } diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index fe78acf4d80a..6d5306c1dc0c 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -154,7 +154,7 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC, const MachineOperand &MO) { assert(lowersToCopies(MI)); - unsigned SrcReg = MO.getReg(); + Register SrcReg = MO.getReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); if (DstRC == SrcRC) return false; @@ -194,8 +194,8 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes) { if (!MO.readsReg()) return; - unsigned MOReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(MOReg)) + Register MOReg = MO.getReg(); + if (!Register::isVirtualRegister(MOReg)) return; unsigned MOSubReg = MO.getSubReg(); @@ -203,7 +203,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes); UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg); - unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg); + unsigned MORegIdx = Register::virtReg2Index(MOReg); VRegInfo &MORegInfo = VRegInfos[MORegIdx]; LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes; // Any change at all? @@ -219,7 +219,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes) { for (const MachineOperand &MO : MI.uses()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO); addUsedLanesOnOperand(MO, UsedOnMO); @@ -230,8 +230,8 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes, const MachineOperand &MO) const { unsigned OpNum = MI.getOperandNo(&MO); - assert(lowersToCopies(MI) && DefinedByCopy[ - TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]); + assert(lowersToCopies(MI) && + DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]); switch (MI.getOpcode()) { case TargetOpcode::COPY: @@ -250,7 +250,7 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, return MO2UsedLanes; const MachineOperand &Def = MI.getOperand(0); - unsigned DefReg = Def.getReg(); + Register DefReg = Def.getReg(); const TargetRegisterClass *RC = MRI->getRegClass(DefReg); LaneBitmask MO1UsedLanes; if (RC->CoveredBySubRegs) @@ -285,10 +285,10 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use, if (MI.getOpcode() == TargetOpcode::PATCHPOINT) return; const MachineOperand &Def = *MI.defs().begin(); - unsigned DefReg = Def.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = Def.getReg(); + if (!Register::isVirtualRegister(DefReg)) return; - unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + unsigned DefRegIdx = Register::virtReg2Index(DefReg); if (!DefinedByCopy.test(DefRegIdx)) return; @@ -360,7 +360,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { if (lowersToCopies(DefMI)) { // Start optimisatically with no used or defined lanes for copy // instructions. The following dataflow analysis will add more bits. - unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + unsigned RegIdx = Register::virtReg2Index(Reg); DefinedByCopy.set(RegIdx); PutInWorklist(RegIdx); @@ -377,17 +377,17 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { for (const MachineOperand &MO : DefMI.uses()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; LaneBitmask MODefinedLanes; - if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (Register::isPhysicalRegister(MOReg)) { MODefinedLanes = LaneBitmask::getAll(); } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) { MODefinedLanes = LaneBitmask::getAll(); } else { - assert(TargetRegisterInfo::isVirtualRegister(MOReg)); + assert(Register::isVirtualRegister(MOReg)); if (MRI->hasOneDef(MOReg)) { const MachineOperand &MODef = *MRI->def_begin(MOReg); const MachineInstr &MODefMI = *MODef.getParent(); @@ -428,10 +428,10 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { if (lowersToCopies(UseMI)) { assert(UseMI.getDesc().getNumDefs() == 1); const MachineOperand &Def = *UseMI.defs().begin(); - unsigned DefReg = Def.getReg(); + Register DefReg = Def.getReg(); // The used lanes of COPY-like instruction operands are determined by the // following dataflow analysis. - if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + if (Register::isVirtualRegister(DefReg)) { // But ignore copies across incompatible register classes. bool CrossCopy = false; if (lowersToCopies(UseMI)) { @@ -470,10 +470,10 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, if (!lowersToCopies(MI)) return false; const MachineOperand &Def = MI.getOperand(0); - unsigned DefReg = Def.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = Def.getReg(); + if (!Register::isVirtualRegister(DefReg)) return false; - unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + unsigned DefRegIdx = Register::virtReg2Index(DefReg); if (!DefinedByCopy.test(DefRegIdx)) return false; @@ -482,8 +482,8 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, if (UsedLanes.any()) return false; - unsigned MOReg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(MOReg)) { + Register MOReg = MO.getReg(); + if (Register::isVirtualRegister(MOReg)) { const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO); } @@ -494,7 +494,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { // First pass: Populate defs/uses of vregs with initial values unsigned NumVirtRegs = MRI->getNumVirtRegs(); for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); // Determine used/defined lanes and add copy instructions to worklist. VRegInfo &Info = VRegInfos[RegIdx]; @@ -508,7 +508,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { Worklist.pop_front(); WorklistMembers.reset(RegIdx); VRegInfo &Info = VRegInfos[RegIdx]; - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); // Transfer UsedLanes to operands of DefMI (backwards dataflow). MachineOperand &Def = *MRI->def_begin(Reg); @@ -522,7 +522,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); const VRegInfo &Info = VRegInfos[RegIdx]; dbgs() << printReg(Reg, nullptr) << " Used: " << PrintLaneMask(Info.UsedLanes) @@ -536,10 +536,10 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { for (MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; - unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + unsigned RegIdx = Register::virtReg2Index(Reg); const VRegInfo &RegInfo = VRegInfos[RegIdx]; if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) { LLVM_DEBUG(dbgs() diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 0a83760befaa..e5694218b5c3 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" @@ -140,6 +141,18 @@ private: /// speculated. bool canSpeculateInstrs(MachineBasicBlock *MBB); + /// Return true if all non-terminator instructions in MBB can be safely + /// predicated. + bool canPredicateInstrs(MachineBasicBlock *MBB); + + /// Scan through instruction dependencies and update InsertAfter array. + /// Return false if any dependency is incompatible with if conversion. + bool InstrDependenciesAllowIfConv(MachineInstr *I); + + /// Predicate all instructions of the basic block with current condition + /// except for terminators. Reverse the condition if ReversePredicate is set. + void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate); + /// Find a valid insertion point in Head. bool findInsertionPoint(); @@ -163,11 +176,14 @@ public: /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, /// initialize the internal state, and return true. - bool canConvertIf(MachineBasicBlock *MBB); + /// If predicate is set try to predicate the block otherwise try to + /// speculatively execute it. + bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false); /// convertIf - If-convert the last block passed to canConvertIf(), assuming /// it is possible. Add any erased blocks to RemovedBlocks. - void convertIf(SmallVectorImpl &RemovedBlocks); + void convertIf(SmallVectorImpl &RemovedBlocks, + bool Predicate = false); }; } // end anonymous namespace @@ -225,37 +241,112 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { } // Check for any dependencies on Head instructions. - for (const MachineOperand &MO : I->operands()) { - if (MO.isRegMask()) { - LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); - return false; - } - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); + if (!InstrDependenciesAllowIfConv(&(*I))) + return false; + } + return true; +} - // Remember clobbered regunits. - if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) - ClobberedRegUnits.set(*Units); +/// Check that there is no dependencies preventing if conversion. +/// +/// If instruction uses any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { + for (const MachineOperand &MO : I->operands()) { + if (MO.isRegMask()) { + LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); - if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (!DefMI || DefMI->getParent() != Head) - continue; - if (InsertAfter.insert(DefMI).second) - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " - << *DefMI); - if (DefMI->isTerminator()) { - LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); - return false; - } + // Remember clobbered regunits. + if (MO.isDef() && Register::isPhysicalRegister(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + ClobberedRegUnits.set(*Units); + + if (!MO.readsReg() || !Register::isVirtualRegister(Reg)) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI).second) + LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on " + << *DefMI); + if (DefMI->isTerminator()) { + LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; } } return true; } +/// canPredicateInstrs - Returns true if all the instructions in MBB can safely +/// be predicates. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + LLVM_DEBUG(dbgs() << "Can't predicate: " << *I); + return false; + } + + // Check that instruction is predicable and that it is not already + // predicated. + if (!TII->isPredicable(*I) || TII->isPredicated(*I)) { + return false; + } + + // Check for any dependencies on Head instructions. + if (!InstrDependenciesAllowIfConv(&(*I))) + return false; + } + return true; +} + +// Apply predicate to all instructions in the machine block. +void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) { + auto Condition = Cond; + if (ReversePredicate) + TII->reverseBranchCondition(Condition); + // Terminators don't need to be predicated as they will be removed. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + TII->PredicateInstruction(*I, Condition); + } +} /// Find an insertion point in Head for the speculated instructions. The /// insertion point must be: @@ -288,8 +379,8 @@ bool SSAIfConv::findInsertionPoint() { // We're ignoring regmask operands. That is conservatively correct. if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; // I clobbers Reg, so it isn't live before I. if (MO.isDef()) @@ -337,7 +428,7 @@ bool SSAIfConv::findInsertionPoint() { /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is /// a potential candidate for if-conversion. Fill out the internal state. /// -bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { Head = MBB; TBB = FBB = Tail = nullptr; @@ -378,8 +469,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { } // This is a triangle or a diamond. - // If Tail doesn't have any phis, there must be side effects. - if (Tail->empty() || !Tail->front().isPHI()) { + // Skip if we cannot predicate and there are no phis skip as there must be + // side effects that can only be handled with predication. + if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) { LLVM_DEBUG(dbgs() << "No phis in tail.\n"); return false; } @@ -423,8 +515,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (PI.PHI->getOperand(i+1).getMBB() == FPred) PI.FReg = PI.PHI->getOperand(i).getReg(); } - assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); - assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); + assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI"); // Get target information. if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, @@ -437,10 +529,17 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { // Check that the conditional instructions can be speculated. InsertAfter.clear(); ClobberedRegUnits.reset(); - if (TBB != Tail && !canSpeculateInstrs(TBB)) - return false; - if (FBB != Tail && !canSpeculateInstrs(FBB)) - return false; + if (Predicate) { + if (TBB != Tail && !canPredicateInstrs(TBB)) + return false; + if (FBB != Tail && !canPredicateInstrs(FBB)) + return false; + } else { + if (TBB != Tail && !canSpeculateInstrs(TBB)) + return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + } // Try to find a valid insertion point for the speculated instructions in the // head basic block. @@ -467,7 +566,7 @@ void SSAIfConv::replacePHIInstrs() { for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - unsigned DstReg = PI.PHI->getOperand(0).getReg(); + Register DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); @@ -494,7 +593,7 @@ void SSAIfConv::rewritePHIOperands() { // equal. DstReg = PI.TReg; } else { - unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + Register PHIDst = PI.PHI->getOperand(0).getReg(); DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); @@ -521,7 +620,8 @@ void SSAIfConv::rewritePHIOperands() { /// /// Any basic blocks erased will be added to RemovedBlocks. /// -void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { +void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks, + bool Predicate) { assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); // Update statistics. @@ -531,11 +631,16 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { ++NumDiamondsConv; // Move all instructions into Head, except for the terminators. - if (TBB != Tail) + if (TBB != Tail) { + if (Predicate) + PredicateBlock(TBB, /*ReversePredicate=*/false); Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); - if (FBB != Tail) + } + if (FBB != Tail) { + if (Predicate) + PredicateBlock(FBB, /*ReversePredicate=*/true); Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); - + } // Are there extra Tail predecessors? bool ExtraPreds = Tail->pred_size() != 2; if (ExtraPreds) @@ -587,7 +692,6 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { LLVM_DEBUG(dbgs() << *Head); } - //===----------------------------------------------------------------------===// // EarlyIfConverter Pass //===----------------------------------------------------------------------===// @@ -613,8 +717,6 @@ public: private: bool tryConvertIf(MachineBasicBlock*); - void updateDomTree(ArrayRef Removed); - void updateLoops(ArrayRef Removed); void invalidateTraces(); bool shouldConvertIf(); }; @@ -642,32 +744,36 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +namespace { /// Update the dominator tree after if-conversion erased some blocks. -void EarlyIfConverter::updateDomTree(ArrayRef Removed) { +void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, + ArrayRef Removed) { // convertIf can remove TBB, FBB, and Tail can be merged into Head. // TBB and FBB should not dominate any blocks. // Tail children should be transferred to Head. MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); - for (unsigned i = 0, e = Removed.size(); i != e; ++i) { - MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + for (auto B : Removed) { + MachineDomTreeNode *Node = DomTree->getNode(B); assert(Node != HeadNode && "Cannot erase the head node"); while (Node->getNumChildren()) { assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); } - DomTree->eraseNode(Removed[i]); + DomTree->eraseNode(B); } } /// Update LoopInfo after if-conversion. -void EarlyIfConverter::updateLoops(ArrayRef Removed) { +void updateLoops(MachineLoopInfo *Loops, + ArrayRef Removed) { if (!Loops) return; // If-conversion doesn't change loop structure, and it doesn't mess with back // edges, so updating LoopInfo is simply removing the dead blocks. - for (unsigned i = 0, e = Removed.size(); i != e; ++i) - Loops->removeBlock(Removed[i]); + for (auto B : Removed) + Loops->removeBlock(B); } +} // namespace /// Invalidate MachineTraceMetrics before if-conversion. void EarlyIfConverter::invalidateTraces() { @@ -783,8 +889,8 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { SmallVector RemovedBlocks; IfConv.convertIf(RemovedBlocks); Changed = true; - updateDomTree(RemovedBlocks); - updateLoops(RemovedBlocks); + updateDomTree(DomTree, IfConv, RemovedBlocks); + updateLoops(Loops, RemovedBlocks); } return Changed; } @@ -822,3 +928,132 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +//===----------------------------------------------------------------------===// +// EarlyIfPredicator Pass +//===----------------------------------------------------------------------===// + +namespace { +class EarlyIfPredicator : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + TargetSchedModel SchedModel; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + SSAIfConv IfConv; + +public: + static char ID; + EarlyIfPredicator() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return "Early If-predicator"; } + +protected: + bool tryConvertIf(MachineBasicBlock *); + bool shouldConvertIf(); +}; +} // end anonymous namespace + +#undef DEBUG_TYPE +#define DEBUG_TYPE "early-if-predicator" + +char EarlyIfPredicator::ID = 0; +char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; + +INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, + false) + +void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Apply the target heuristic to decide if the transformation is profitable. +bool EarlyIfPredicator::shouldConvertIf() { + if (IfConv.isTriangle()) { + MachineBasicBlock &IfBlock = + (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB; + + unsigned ExtraPredCost = 0; + unsigned Cycles = 0; + for (MachineInstr &I : IfBlock) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + Cycles += NumCycles - 1; + ExtraPredCost += TII->getPredicationCost(I); + } + + return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost, + BranchProbability::getUnknown()); + } + unsigned TExtra = 0; + unsigned FExtra = 0; + unsigned TCycle = 0; + unsigned FCycle = 0; + for (MachineInstr &I : *IfConv.TBB) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + TCycle += NumCycles - 1; + TExtra += TII->getPredicationCost(I); + } + for (MachineInstr &I : *IfConv.FBB) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + FCycle += NumCycles - 1; + FExtra += TII->getPredicationCost(I); + } + return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB, + FCycle, FExtra, + BranchProbability::getUnknown()); +} + +/// Attempt repeated if-conversion on MBB, return true if successful. +/// +bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) { + // If-convert MBB and update analyses. + SmallVector RemovedBlocks; + IfConv.convertIf(RemovedBlocks, /*Predicate*/ true); + Changed = true; + updateDomTree(DomTree, IfConv, RemovedBlocks); + updateLoops(Loops, RemovedBlocks); + } + return Changed; +} + +bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n" + << "********** Function: " << MF.getName() << '\n'); + if (skipFunction(MF.getFunction())) + return false; + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + MRI = &MF.getRegInfo(); + SchedModel.init(&STI); + DomTree = &getAnalysis(); + Loops = getAnalysisIfAvailable(); + + bool Changed = false; + IfConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree post-order. The post-order enables nested + // if-conversion in a single pass. The tryConvertIf() function may erase + // blocks, but only blocks dominated by the head block. This makes it safe to + // update the dominator tree while the post-order iterator is still active. + for (auto DomNode : post_order(DomTree)) + if (tryConvertIf(DomNode->getBlock())) + Changed = true; + + return Changed; +} diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp index a2dd5eee33b7..2cca05ea6f55 100644 --- a/lib/CodeGen/ExecutionDomainFix.cpp +++ b/lib/CodeGen/ExecutionDomainFix.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/ExecutionDomainFix.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp index b425482e6adf..9916f2de0414 100644 --- a/lib/CodeGen/ExpandMemCmp.cpp +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -795,7 +795,7 @@ public: TPC->getTM().getSubtargetImpl(F)->getTargetLowering(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); const TargetTransformInfo *TTI = &getAnalysis().getTTI(F); auto PA = runImpl(F, TLI, TTI, TL); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 0ab70aff7dc4..1fc57fac1489 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -79,17 +79,17 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned InsReg = MI->getOperand(2).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register InsReg = MI->getOperand(2).getReg(); assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); unsigned SubIdx = MI->getOperand(3).getImm(); assert(SubIdx != 0 && "Invalid index for insert_subreg"); - unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); + Register DstSubReg = TRI->getSubReg(DstReg, SubIdx); - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + assert(Register::isPhysicalRegister(DstReg) && "Insert destination must be in a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + assert(Register::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 9c53550eaa9d..c1d22ef89195 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -72,7 +72,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { return *I->second; GCStrategy *S = getGCStrategy(F.getGC()); - Functions.push_back(llvm::make_unique(F, *S)); + Functions.push_back(std::make_unique(F, *S)); GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; return *GFI; diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 90571d090bfb..0dc0a5bce747 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -249,7 +249,7 @@ GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); } @@ -310,7 +310,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; FI = &getAnalysis().getFunctionInfo(MF.getFunction()); - MMI = &getAnalysis(); + MMI = &getAnalysis().getMMI(); TII = MF.getSubtarget().getInstrInfo(); // Find the size of the stack frame. There may be no correct static frame diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp index 4518dbee1a9f..7d9d812d34bc 100644 --- a/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_ANYEXT: case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: + case TargetOpcode::G_GEP: return true; } return false; @@ -65,9 +66,9 @@ std::unique_ptr llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) { std::unique_ptr Config; if (Level == CodeGenOpt::None) - Config = make_unique(); + Config = std::make_unique(); else - Config = make_unique(); + Config = std::make_unique(); return Config; } @@ -332,7 +333,7 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const { const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( const MachineOperand &MO) const { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!MO.isDef()) addNodeIDRegNum(Reg); LLT Ty = MRI.getType(Reg); diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 461bc6038c2c..51a74793f029 100644 --- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -162,6 +162,17 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, return buildConstant(DstOps[0], Cst->getSExtValue()); break; } + case TargetOpcode::G_SEXT_INREG: { + assert(DstOps.size() == 1 && "Invalid dst ops"); + assert(SrcOps.size() == 2 && "Invalid src ops"); + const DstOp &Dst = DstOps[0]; + const SrcOp &Src0 = SrcOps[0]; + const SrcOp &Src1 = SrcOps[1]; + if (auto MaybeCst = + ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI())) + return buildConstant(Dst, MaybeCst->getSExtValue()); + break; + } } bool CanCopy = checkCopyToDefsPossible(DstOps); if (!canPerformCSEForOpc(Opc)) diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index a5d8205a34a8..cdad92f7db4f 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -11,14 +11,16 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "call-lowering" @@ -32,66 +34,70 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, ArrayRef> ArgRegs, Register SwiftErrorVReg, std::function GetCalleeReg) const { + CallLoweringInfo Info; auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout(); // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. - SmallVector OrigArgs; unsigned i = 0; unsigned NumFixedArgs = CS.getFunctionType()->getNumParams(); for (auto &Arg : CS.args()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); - // We don't currently support swiftself args. - if (OrigArg.Flags.isSwiftSelf()) - return false; - OrigArgs.push_back(OrigArg); + Info.OrigArgs.push_back(OrigArg); ++i; } - MachineOperand Callee = MachineOperand::CreateImm(0); if (const Function *F = CS.getCalledFunction()) - Callee = MachineOperand::CreateGA(F, 0); + Info.Callee = MachineOperand::CreateGA(F, 0); else - Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - - ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; - if (!OrigRet.Ty->isVoidTy()) - setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS); - - return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs, - SwiftErrorVReg); + Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); + + Info.OrigRet = ArgInfo{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; + if (!Info.OrigRet.Ty->isVoidTy()) + setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CS); + + Info.KnownCallees = + CS.getInstruction()->getMetadata(LLVMContext::MD_callees); + Info.CallConv = CS.getCallingConv(); + Info.SwiftErrorVReg = SwiftErrorVReg; + Info.IsMustTailCall = CS.isMustTailCall(); + Info.IsTailCall = CS.isTailCall() && + isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()); + Info.IsVarArg = CS.getFunctionType()->isVarArg(); + return lowerCall(MIRBuilder, Info); } template void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const { + auto &Flags = Arg.Flags[0]; const AttributeList &Attrs = FuncInfo.getAttributes(); if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) - Arg.Flags.setZExt(); + Flags.setZExt(); if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) - Arg.Flags.setSExt(); + Flags.setSExt(); if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) - Arg.Flags.setInReg(); + Flags.setInReg(); if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) - Arg.Flags.setSRet(); + Flags.setSRet(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) - Arg.Flags.setSwiftSelf(); + Flags.setSwiftSelf(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) - Arg.Flags.setSwiftError(); + Flags.setSwiftError(); if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) - Arg.Flags.setByVal(); + Flags.setByVal(); if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) - Arg.Flags.setInAlloca(); + Flags.setInAlloca(); - if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca()) { Type *ElementTy = cast(Arg.Ty)->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); - Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. @@ -100,11 +106,11 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); - Arg.Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) - Arg.Flags.setNest(); - Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); + Flags.setNest(); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); } template void @@ -159,7 +165,7 @@ void CallLowering::unpackRegs(ArrayRef DstRegs, Register SrcReg, } bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, - ArrayRef Args, + SmallVectorImpl &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -171,7 +177,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, bool CallLowering::handleAssignments(CCState &CCInfo, SmallVectorImpl &ArgLocs, MachineIRBuilder &MIRBuilder, - ArrayRef Args, + SmallVectorImpl &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -180,14 +186,99 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT CurVT = MVT::getVT(Args[i].Ty); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) { - // Try to use the register type if we couldn't assign the VT. - if (!Handler.isArgumentHandler() || !CurVT.isValid()) + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) { + if (!CurVT.isValid()) return false; - CurVT = TLI->getRegisterTypeForCallingConv( + MVT NewVT = TLI->getRegisterTypeForCallingConv( F.getContext(), F.getCallingConv(), EVT(CurVT)); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) - return false; + + // If we need to split the type over multiple regs, check it's a scenario + // we currently support. + unsigned NumParts = TLI->getNumRegistersForCallingConv( + F.getContext(), F.getCallingConv(), CurVT); + if (NumParts > 1) { + // For now only handle exact splits. + if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) + return false; + } + + // For incoming arguments (physregs to vregs), we could have values in + // physregs (or memlocs) which we want to extract and copy to vregs. + // During this, we might have to deal with the LLT being split across + // multiple regs, so we have to record this information for later. + // + // If we have outgoing args, then we have the opposite case. We have a + // vreg with an LLT which we want to assign to a physical location, and + // we might have to record that the value has to be split later. + if (Handler.isIncomingArgumentHandler()) { + if (NumParts == 1) { + // Try to use the register type if we couldn't assign the VT. + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) + return false; + } else { + // We're handling an incoming arg which is split over multiple regs. + // E.g. passing an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + Args[i].Regs.clear(); + Args[i].Flags.clear(); + LLT NewLLT = getLLTForMVT(NewVT); + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) { + Register Reg = + MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); + ISD::ArgFlagsTy Flags = OrigFlags; + if (Part == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align::None()); + if (Part == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Reg); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; + } + } + } + } else { + // Handling an outgoing arg that might need to be split. + if (NumParts < 2) + return false; // Don't know how to deal with this type combination. + + // This type is passed via multiple registers in the calling convention. + // We need to extract the individual parts. + Register LargeReg = Args[i].Regs[0]; + LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); + auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); + assert(Unmerge->getNumOperands() == NumParts + 1); + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + // We're going to replace the regs and flags with the split ones. + Args[i].Regs.clear(); + Args[i].Flags.clear(); + for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (PartIdx == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align::None()); + if (PartIdx == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[PartIdx], CCInfo)) + return false; + } + } } } @@ -202,18 +293,32 @@ bool CallLowering::handleAssignments(CCState &CCInfo, continue; } - assert(Args[i].Regs.size() == 1 && - "Can't handle multiple virtual regs yet"); - // FIXME: Pack registers if we have more than one. Register ArgReg = Args[i].Regs[0]; + MVT OrigVT = MVT::getVT(Args[i].Ty); + MVT VAVT = VA.getValVT(); if (VA.isRegLoc()) { - MVT OrigVT = MVT::getVT(Args[i].Ty); - MVT VAVT = VA.getValVT(); - if (Handler.isArgumentHandler() && VAVT != OrigVT) { - if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) - return false; // Can't handle this type of arg yet. + if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) { + if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) { + // Expected to be multiple regs for a single incoming arg. + unsigned NumArgRegs = Args[i].Regs.size(); + if (NumArgRegs < 2) + return false; + + assert((j + (NumArgRegs - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += NumArgRegs - 1; + // Merge the split registers into the expected larger result vreg + // of the original call. + MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); + continue; + } const LLT VATy(VAVT); Register NewReg = MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); @@ -234,10 +339,28 @@ bool CallLowering::handleAssignments(CCState &CCInfo, } else { MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); } + } else if (!Handler.isIncomingArgumentHandler()) { + assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + // This is an outgoing argument that might have been split. + for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += Args[i].Regs.size() - 1; } else { Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); } } else if (VA.isMemLoc()) { + // Don't currently support loading/storing a type that needs to be split + // to the stack. Should be easy, just not implemented yet. + if (Args[i].Regs.size() > 1) { + LLVM_DEBUG( + dbgs() + << "Load/store a split arg to/from the stack not implemented yet"); + return false; + } MVT VT = MVT::getVT(Args[i].Ty); unsigned Size = VT == MVT::iPTR ? DL.getPointerSize() : alignTo(VT.getSizeInBits(), 8) / 8; @@ -253,6 +376,81 @@ bool CallLowering::handleAssignments(CCState &CCInfo, return true; } +bool CallLowering::analyzeArgInfo(CCState &CCState, + SmallVectorImpl &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const { + for (unsigned i = 0, e = Args.size(); i < e; ++i) { + MVT VT = MVT::getVT(Args[i].Ty); + CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg; + if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) { + // Bail out on anything we can't handle. + LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString() + << " (arg number = " << i << "\n"); + return false; + } + } + return true; +} + +bool CallLowering::resultsCompatible(CallLoweringInfo &Info, + MachineFunction &MF, + SmallVectorImpl &InArgs, + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const { + const Function &F = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = F.getCallingConv(); + + if (CallerCC == CalleeCC) + return true; + + SmallVector ArgLocs1; + CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext()); + if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed, + CalleeAssignFnVarArg)) + return false; + + SmallVector ArgLocs2; + CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext()); + if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed, + CalleeAssignFnVarArg)) + return false; + + // We need the argument locations to match up exactly. If there's more in + // one than the other, then we are done. + if (ArgLocs1.size() != ArgLocs2.size()) + return false; + + // Make sure that each location is passed in exactly the same way. + for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) { + const CCValAssign &Loc1 = ArgLocs1[i]; + const CCValAssign &Loc2 = ArgLocs2[i]; + + // We need both of them to be the same. So if one is a register and one + // isn't, we're done. + if (Loc1.isRegLoc() != Loc2.isRegLoc()) + return false; + + if (Loc1.isRegLoc()) { + // If they don't have the same register location, we're done. + if (Loc1.getLocReg() != Loc2.getLocReg()) + return false; + + // They matched, so we can move to the next ArgLoc. + continue; + } + + // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match. + if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) + return false; + } + + return true; +} + Register CallLowering::ValueHandler::extendRegister(Register ValReg, CCValAssign &VA) { LLT LocTy{VA.getLocVT()}; diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp index 31cb1dbbc9b5..b4562a5c6601 100644 --- a/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/lib/CodeGen/GlobalISel/Combiner.cpp @@ -27,6 +27,18 @@ using namespace llvm; +namespace llvm { +cl::OptionCategory GICombinerOptionCategory( + "GlobalISel Combiner", + "Control the rules which are enabled. These options all take a comma " + "separated list of rules to disable and may be specified by number " + "or number range (e.g. 1-10)." +#ifndef NDEBUG + " They may also be specified by name." +#endif +); +} // end namespace llvm + namespace { /// This class acts as the glue the joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the @@ -92,7 +104,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, return false; Builder = - CSEInfo ? make_unique() : make_unique(); + CSEInfo ? std::make_unique() : std::make_unique(); MRI = &MF.getRegInfo(); Builder->setMF(MF); if (CSEInfo) diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9cbf3dd83ff1..854769d283f7 100644 --- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -8,19 +8,36 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gi-combiner" using namespace llvm; +// Option to allow testing of the combiner while no targets know about indexed +// addressing. +static cl::opt + ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), + cl::desc("Force all indexed operations to be " + "legal for the GlobalISel combiner")); + + CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, - MachineIRBuilder &B) - : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {} + MachineIRBuilder &B, GISelKnownBits *KB, + MachineDominatorTree *MDT) + : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), + KB(KB), MDT(MDT) { + (void)this->KB; +} void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const { @@ -55,8 +72,8 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::COPY) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); // Simple Copy Propagation. @@ -66,12 +83,183 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { return false; } void CombinerHelper::applyCombineCopy(MachineInstr &MI) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); MI.eraseFromParent(); replaceRegWith(MRI, DstReg, SrcReg); } +bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { + bool IsUndef = false; + SmallVector Ops; + if (matchCombineConcatVectors(MI, IsUndef, Ops)) { + applyCombineConcatVectors(MI, IsUndef, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && + "Invalid instruction"); + IsUndef = true; + MachineInstr *Undef = nullptr; + + // Walk over all the operands of concat vectors and check if they are + // build_vector themselves or undef. + // Then collect their operands in Ops. + for (const MachineOperand &MO : MI.operands()) { + // Skip the instruction definition. + if (MO.isDef()) + continue; + Register Reg = MO.getReg(); + MachineInstr *Def = MRI.getVRegDef(Reg); + assert(Def && "Operand not defined"); + switch (Def->getOpcode()) { + case TargetOpcode::G_BUILD_VECTOR: + IsUndef = false; + // Remember the operands of the build_vector to fold + // them into the yet-to-build flattened concat vectors. + for (const MachineOperand &BuildVecMO : Def->operands()) { + // Skip the definition. + if (BuildVecMO.isDef()) + continue; + Ops.push_back(BuildVecMO.getReg()); + } + break; + case TargetOpcode::G_IMPLICIT_DEF: { + LLT OpType = MRI.getType(Reg); + // Keep one undef value for all the undef operands. + if (!Undef) { + Builder.setInsertPt(*MI.getParent(), MI); + Undef = Builder.buildUndef(OpType.getScalarType()); + } + assert(MRI.getType(Undef->getOperand(0).getReg()) == + OpType.getScalarType() && + "All undefs should have the same type"); + // Break the undef vector in as many scalar elements as needed + // for the flattening. + for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); + EltIdx != EltEnd; ++EltIdx) + Ops.push_back(Undef->getOperand(0).getReg()); + break; + } + default: + return false; + } + } + return true; +} +void CombinerHelper::applyCombineConcatVectors( + MachineInstr &MI, bool IsUndef, const ArrayRef Ops) { + // We determined that the concat_vectors can be flatten. + // Generate the flattened build_vector. + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + // Note: IsUndef is sort of redundant. We could have determine it by + // checking that at all Ops are undef. Alternatively, we could have + // generate a build_vector of undefs and rely on another combine to + // clean that up. For now, given we already gather this information + // in tryCombineConcatVectors, just save compile time and issue the + // right thing. + if (IsUndef) + Builder.buildUndef(NewDstReg); + else + Builder.buildBuildVector(NewDstReg, Ops); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + +bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { + SmallVector Ops; + if (matchCombineShuffleVector(MI, Ops)) { + applyCombineShuffleVector(MI, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, + SmallVectorImpl &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Invalid instruction kind"); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + Register Src1 = MI.getOperand(1).getReg(); + LLT SrcType = MRI.getType(Src1); + unsigned DstNumElts = DstType.getNumElements(); + unsigned SrcNumElts = SrcType.getNumElements(); + + // If the resulting vector is smaller than the size of the source + // vectors being concatenated, we won't be able to replace the + // shuffle vector into a concat_vectors. + // + // Note: We may still be able to produce a concat_vectors fed by + // extract_vector_elt and so on. It is less clear that would + // be better though, so don't bother for now. + if (DstNumElts < 2 * SrcNumElts) + return false; + + // Check that the shuffle mask can be broken evenly between the + // different sources. + if (DstNumElts % SrcNumElts != 0) + return false; + + // Mask length is a multiple of the source vector length. + // Check if the shuffle is some kind of concatenation of the input + // vectors. + unsigned NumConcat = DstNumElts / SrcNumElts; + SmallVector ConcatSrcs(NumConcat, -1); + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask); + for (unsigned i = 0; i != DstNumElts; ++i) { + int Idx = Mask[i]; + // Undef value. + if (Idx < 0) + continue; + // Ensure the indices in each SrcType sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) + return false; + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. + // Collect the different operands for that. + Register UndefReg; + Register Src2 = MI.getOperand(2).getReg(); + for (auto Src : ConcatSrcs) { + if (Src < 0) { + if (!UndefReg) { + Builder.setInsertPt(*MI.getParent(), MI); + UndefReg = Builder.buildUndef(SrcType).getReg(0); + } + Ops.push_back(UndefReg); + } else if (Src == 0) + Ops.push_back(Src1); + else + Ops.push_back(Src2); + } + return true; +} + +void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, + const ArrayRef Ops) { + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + Builder.buildConcatVectors(NewDstReg, Ops); + + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + namespace { /// Select a preference between two uses. CurrentUse is the current preference @@ -279,7 +467,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, // up the type and extend so that it uses the preferred use. if (UseMI->getOpcode() == Preferred.ExtendOpcode || UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { - unsigned UseDstReg = UseMI->getOperand(0).getReg(); + Register UseDstReg = UseMI->getOperand(0).getReg(); MachineOperand &UseSrcMO = UseMI->getOperand(1); const LLT &UseDstTy = MRI.getType(UseDstReg); if (UseDstReg != ChosenDstReg) { @@ -342,8 +530,212 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, Observer.changedInstr(MI); } -bool CombinerHelper::matchCombineBr(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); +bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { + assert(DefMI.getParent() == UseMI.getParent()); + if (&DefMI == &UseMI) + return false; + + // Loop through the basic block until we find one of the instructions. + MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); + for (; &*I != &DefMI && &*I != &UseMI; ++I) + return &*I == &DefMI; + + llvm_unreachable("Block must contain instructions"); +} + +bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) { + if (MDT) + return MDT->dominates(&DefMI, &UseMI); + else if (DefMI.getParent() != UseMI.getParent()) + return false; + + return isPredecessor(DefMI, UseMI); +} + +bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + +#ifndef NDEBUG + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); +#endif + + Base = MI.getOperand(1).getReg(); + MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); + if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return false; + + LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); + + for (auto &Use : MRI.use_instructions(Base)) { + if (Use.getOpcode() != TargetOpcode::G_GEP) + continue; + + Offset = Use.getOperand(2).getReg(); + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " + << Use); + continue; + } + + // Make sure the offset calculation is before the potentially indexed op. + // FIXME: we really care about dependency here. The offset calculation might + // be movable. + MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); + if (!OffsetDef || !dominates(*OffsetDef, MI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " + << Use); + continue; + } + + // FIXME: check whether all uses of Base are load/store with foldable + // addressing modes. If so, using the normal addr-modes is better than + // forming an indexed one. + + bool MemOpDominatesAddrUses = true; + for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) { + if (!dominates(MI, GEPUse)) { + MemOpDominatesAddrUses = false; + break; + } + } + + if (!MemOpDominatesAddrUses) { + LLVM_DEBUG( + dbgs() << " Ignoring candidate as memop does not dominate uses: " + << Use); + continue; + } + + LLVM_DEBUG(dbgs() << " Found match: " << Use); + Addr = Use.getOperand(0).getReg(); + return true; + } + + return false; +} + +bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + +#ifndef NDEBUG + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); +#endif + + Addr = MI.getOperand(1).getReg(); + MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI); + if (!AddrDef || MRI.hasOneUse(Addr)) + return false; + + Base = AddrDef->getOperand(1).getReg(); + Offset = AddrDef->getOperand(2).getReg(); + + LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); + + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { + LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); + return false; + } + + MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); + if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); + return false; + } + + if (MI.getOpcode() == TargetOpcode::G_STORE) { + // Would require a copy. + if (Base == MI.getOperand(0).getReg()) { + LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); + return false; + } + + // We're expecting one use of Addr in MI, but it could also be the + // value stored, which isn't actually dominated by the instruction. + if (MI.getOperand(0).getReg() == Addr) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); + return false; + } + } + + // FIXME: check whether all uses of the base pointer are constant GEPs. That + // might allow us to end base's liveness here by adjusting the constant. + + for (auto &UseMI : MRI.use_instructions(Addr)) { + if (!dominates(MI, UseMI)) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); + return false; + } + } + + return true; +} + +bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && + Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) + return false; + + bool IsStore = Opcode == TargetOpcode::G_STORE; + Register Addr, Base, Offset; + bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset); + if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset)) + return false; + + + unsigned NewOpcode; + switch (Opcode) { + case TargetOpcode::G_LOAD: + NewOpcode = TargetOpcode::G_INDEXED_LOAD; + break; + case TargetOpcode::G_SEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; + break; + case TargetOpcode::G_ZEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; + break; + case TargetOpcode::G_STORE: + NewOpcode = TargetOpcode::G_INDEXED_STORE; + break; + default: + llvm_unreachable("Unknown load/store opcode"); + } + + MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr); + MachineIRBuilder MIRBuilder(MI); + auto MIB = MIRBuilder.buildInstr(NewOpcode); + if (IsStore) { + MIB.addDef(Addr); + MIB.addUse(MI.getOperand(0).getReg()); + } else { + MIB.addDef(MI.getOperand(0).getReg()); + MIB.addDef(Addr); + } + + MIB.addUse(Base); + MIB.addUse(Offset); + MIB.addImm(IsPre); + MI.eraseFromParent(); + AddrDef.eraseFromParent(); + + LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); + return true; +} + +bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { + if (MI.getOpcode() != TargetOpcode::G_BR) + return false; + // Try to match the following: // bb1: // %c(s32) = G_ICMP pred, %a, %b @@ -380,9 +772,14 @@ bool CombinerHelper::matchCombineBr(MachineInstr &MI) { return true; } -bool CombinerHelper::tryCombineBr(MachineInstr &MI) { - if (!matchCombineBr(MI)) +bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { + if (!matchElideBrByInvertingCond(MI)) return false; + applyElideBrByInvertingCond(MI); + return true; +} + +void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); MachineBasicBlock::iterator BrIt(MI); MachineInstr *BrCond = &*std::prev(BrIt); @@ -401,11 +798,509 @@ bool CombinerHelper::tryCombineBr(MachineInstr &MI) { BrCond->getOperand(1).setMBB(BrTarget); Observer.changedInstr(*BrCond); MI.eraseFromParent(); +} + +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction().hasMinSize(); + return MF.getFunction().hasOptSize(); +} + +// Returns a list of types to use for memory op lowering in MemOps. A partial +// port of findOptimalMemOpLowering in TargetLowering. +static bool findGISelOptimalMemOpLowering( + std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + bool AllowOverlap, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes, const TargetLowering &TLI) { + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + if (SrcAlign != 0 && SrcAlign < DstAlign) + return false; + + LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset, + ZeroMemset, MemcpyStrSrc, FuncAttributes); + + if (Ty == LLT()) { + // Use the largest scalar type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + Ty = LLT::scalar(64); + while (DstAlign && DstAlign < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign)) + Ty = LLT::scalar(Ty.getSizeInBytes()); + assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); + // FIXME: check for the largest legal type we can load/store to. + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned TySize = Ty.getSizeInBytes(); + while (TySize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + LLT NewTy = Ty; + // FIXME: check for mem op safety and legality of the types. Not all of + // SDAGisms map cleanly to GISel concepts. + if (NewTy.isVector()) + NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); + NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); + unsigned NewTySize = NewTy.getSizeInBytes(); + assert(NewTySize > 0 && "Could not find appropriate type"); + + // If the new LLT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + bool Fast; + // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). + MVT VT = getMVTForLLT(Ty); + if (NumMemOps && AllowOverlap && NewTySize < Size && + TLI.allowsMisalignedMemoryAccesses( + VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) && + Fast) + TySize = Size; + else { + Ty = NewTy; + TySize = NewTySize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(Ty); + Size -= TySize; + } + + return true; +} + +static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { + if (Ty.isVector()) + return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getNumElements()); + return IntegerType::get(C, Ty.getSizeInBits()); +} + +// Get a vectorized representation of the memset value operand, GISel edition. +static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { + MachineRegisterInfo &MRI = *MIB.getMRI(); + unsigned NumBits = Ty.getScalarSizeInBits(); + auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); + if (!Ty.isVector() && ValVRegAndVal) { + unsigned KnownVal = ValVRegAndVal->Value; + APInt Scalar = APInt(8, KnownVal); + APInt SplatVal = APInt::getSplat(NumBits, Scalar); + return MIB.buildConstant(Ty, SplatVal).getReg(0); + } + // FIXME: for vector types create a G_BUILD_VECTOR. + if (Ty.isVector()) + return Register(); + + // Extend the byte value to the larger type, and then multiply by a magic + // value 0x010101... in order to replicate it across every byte. + LLT ExtType = Ty.getScalarType(); + auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); + if (NumBits > 8) { + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); + auto MagicMI = MIB.buildConstant(ExtType, Magic); + Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); + } + + assert(ExtType == Ty && "Vector memset value type not supported yet"); + return Val; +} + +bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val, + unsigned KnownLen, unsigned Align, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memset length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); + std::vector MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + + auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); + bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0, + /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + if (NewAlign > Align) { + Align = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Align) + MFI.setObjectAlignment(FI, Align); + } + } + + MachineIRBuilder MIB(MI); + // Find the largest store and generate the bit pattern for it. + LLT LargestTy = MemOps[0]; + for (unsigned i = 1; i < MemOps.size(); i++) + if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) + LargestTy = MemOps[i]; + + // The memset stored value is always defined as an s8, so in order to make it + // work with larger store types we need to repeat the bit pattern across the + // wider type. + Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); + + if (!MemSetValue) + return false; + + // Generate the stores. For each store type in the list, we generate the + // matching store of that type to the destination address. + LLT PtrTy = MRI.getType(Dst); + unsigned DstOff = 0; + unsigned Size = KnownLen; + for (unsigned I = 0; I < MemOps.size(); I++) { + LLT Ty = MemOps[I]; + unsigned TySize = Ty.getSizeInBytes(); + if (TySize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(I == MemOps.size() - 1 && I != 0); + DstOff -= TySize - Size; + } + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + Register Value = MemSetValue; + if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { + MVT VT = getMVTForLLT(Ty); + MVT LargestVT = getMVTForLLT(LargestTy); + if (!LargestTy.isVector() && !Ty.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); + else + Value = getMemsetValue(Val, Ty, MIB); + if (!Value) + return false; + } + + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); + + Register Ptr = Dst; + if (DstOff != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); + Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + } + + MIB.buildStore(Value, Ptr, *StoreMMO); + DstOff += Ty.getSizeInBytes(); + Size -= TySize; + } + + MI.eraseFromParent(); + return true; +} + + +bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, + Register Src, unsigned KnownLen, + unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memcpy length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + unsigned Alignment = MinAlign(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + // FIXME: infer better src pointer alignment like SelectionDAG does here. + // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining + // if the memcpy is in a tail call position. + + unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); + std::vector MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), + SrcAlign, + /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Now we need to emit a pair of load and stores for each of the types we've + // collected. I.e. for each type, generate a load from the source pointer of + // that type width, and then generate a corresponding store to the dest buffer + // of that value loaded. This can result in a sequence of loads and stores + // mixed types, depending on what the target specifies as good types to use. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + unsigned Size = KnownLen; + for (auto CopyTy : MemOps) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + if (CopyTy.getSizeInBytes() > Size) + CurrOffset -= CopyTy.getSizeInBytes() - Size; + + // Construct MMOs for the accesses. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + Register Offset; + if (CurrOffset != 0) { + Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) + .getReg(0); + LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + } + auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); + + // Create the store. + Register StorePtr = + CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + MIB.buildStore(LdVal, StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + Size -= CopyTy.getSizeInBytes(); + } + + MI.eraseFromParent(); return true; } +bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, + Register Src, unsigned KnownLen, + unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memmove length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + unsigned Alignment = MinAlign(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); + std::vector MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due + // to a bug in it's findOptimalMemOpLowering implementation. For now do the + // same thing here. + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), + SrcAlign, + /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Memmove requires that we perform the loads first before issuing the stores. + // Apart from that, this loop is pretty much doing the same thing as the + // memcpy codegen function. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + SmallVector LoadVals; + for (auto CopyTy : MemOps) { + // Construct MMO for the load. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + } + LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); + CurrOffset += CopyTy.getSizeInBytes(); + } + + CurrOffset = 0; + for (unsigned I = 0; I < MemOps.size(); ++I) { + LLT CopyTy = MemOps[I]; + // Now store the values loaded. + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + Register StorePtr = Dst; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + } + MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + } + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { + // This combine is fairly complex so it's not written with a separate + // matcher function. + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID(); + assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || + ID == Intrinsic::memset) && + "Expected a memcpy like intrinsic"); + + auto MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + bool IsVolatile = MemOp->isVolatile(); + // Don't try to optimize volatile. + if (IsVolatile) + return false; + + unsigned DstAlign = MemOp->getBaseAlignment(); + unsigned SrcAlign = 0; + Register Dst = MI.getOperand(1).getReg(); + Register Src = MI.getOperand(2).getReg(); + Register Len = MI.getOperand(3).getReg(); + + if (ID != Intrinsic::memset) { + assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); + MemOp = *(++MMOIt); + SrcAlign = MemOp->getBaseAlignment(); + } + + // See if this is a constant length copy + auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); + if (!LenVRegAndVal) + return false; // Leave it to the legalizer to lower it to a libcall. + unsigned KnownLen = LenVRegAndVal->Value; + + if (KnownLen == 0) { + MI.eraseFromParent(); + return true; + } + + if (MaxLen && KnownLen > MaxLen) + return false; + + if (ID == Intrinsic::memcpy) + return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (ID == Intrinsic::memmove) + return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (ID == Intrinsic::memset) + return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); + return false; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; - return tryCombineExtendingLoads(MI); + if (tryCombineExtendingLoads(MI)) + return true; + if (tryCombineIndexedLoadStore(MI)) + return true; + return false; } diff --git a/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp new file mode 100644 index 000000000000..be8efa8795f3 --- /dev/null +++ b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -0,0 +1,383 @@ +//===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Provides analysis for querying information about KnownBits during GISel +/// passes. +// +//===------------------ +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" + +#define DEBUG_TYPE "gisel-known-bits" + +using namespace llvm; + +char llvm::GISelKnownBitsAnalysis::ID = 0; + +INITIALIZE_PASS_BEGIN(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) +INITIALIZE_PASS_END(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) + +GISelKnownBits::GISelKnownBits(MachineFunction &MF) + : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), + DL(MF.getFunction().getParent()->getDataLayout()) {} + +Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset); + // TODO: How to handle cases with Base + Offset? +} + +MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) { + if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) { + int FrameIdx = MI.getOperand(1).getIndex(); + return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF()); + } + return None; +} + +void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + const MachineInstr &MI = *MRI.getVRegDef(R); + computeKnownBitsForAlignment(Known, inferPtrAlignment(MI)); +} + +void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known, + MaybeAlign Alignment) { + if (Alignment) + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2(Alignment)); +} + +KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { + return getKnownBits(MI.getOperand(0).getReg()); +} + +KnownBits GISelKnownBits::getKnownBits(Register R) { + KnownBits Known; + LLT Ty = MRI.getType(R); + APInt DemandedElts = + Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1); + computeKnownBitsImpl(R, Known, DemandedElts); + return Known; +} + +bool GISelKnownBits::signBitIsZero(Register R) { + LLT Ty = MRI.getType(R); + unsigned BitWidth = Ty.getScalarSizeInBits(); + return maskedValueIsZero(R, APInt::getSignMask(BitWidth)); +} + +APInt GISelKnownBits::getKnownZeroes(Register R) { + return getKnownBits(R).Zero; +} + +APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } + +void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + MachineInstr &MI = *MRI.getVRegDef(R); + unsigned Opcode = MI.getOpcode(); + LLT DstTy = MRI.getType(R); + + // Handle the case where this is called on a register that does not have a + // type constraint (i.e. it has a register class constraint instead). This is + // unlikely to occur except by looking through copies but it is possible for + // the initial register being queried to be in this state. + if (!DstTy.isValid()) { + Known = KnownBits(); + return; + } + + unsigned BitWidth = DstTy.getSizeInBits(); + Known = KnownBits(BitWidth); // Don't know anything + + if (DstTy.isVector()) + return; // TODO: Handle vectors. + + if (Depth == getMaxDepth()) + return; + + if (!DemandedElts) + return; // No demanded elts, better to assume we don't know anything. + + KnownBits Known2; + + switch (Opcode) { + default: + TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, + Depth); + break; + case TargetOpcode::COPY: { + MachineOperand Dst = MI.getOperand(0); + MachineOperand Src = MI.getOperand(1); + // Look through trivial copies but don't look through trivial copies of the + // form `%1:(s32) = OP %0:gpr32` known-bits analysis is currently unable to + // determine the bit width of a register class. + // + // We can't use NoSubRegister by name as it's defined by each target but + // it's always defined to be 0 by tablegen. + if (Dst.getSubReg() == 0 /*NoSubRegister*/ && Src.getReg().isVirtual() && + Src.getSubReg() == 0 /*NoSubRegister*/ && + MRI.getType(Src.getReg()).isValid()) { + // Don't increment Depth for this one since we didn't do any work. + computeKnownBitsImpl(Src.getReg(), Known, DemandedElts, Depth); + } + break; + } + case TargetOpcode::G_CONSTANT: { + auto CstVal = getConstantVRegVal(R, MRI); + if (!CstVal) + break; + Known.One = *CstVal; + Known.Zero = ~Known.One; + break; + } + case TargetOpcode::G_FRAME_INDEX: { + computeKnownBitsForFrameIndex(R, Known, DemandedElts); + break; + } + case TargetOpcode::G_SUB: { + // If low bits are known to be zero in both operands, then we know they are + // going to be 0 in the result. Both addition and complement operations + // preserve the low zero bits. + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + if (KnownZeroLow == 0) + break; + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); + break; + } + case TargetOpcode::G_XOR: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = KnownZeroOut; + break; + } + case TargetOpcode::G_GEP: { + // G_GEP is like G_ADD. FIXME: Is this true for all targets? + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) + break; + LLVM_FALLTHROUGH; + } + case TargetOpcode::G_ADD: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + // Output known-0 bits are also known if the top bits of each input are + // known to be clear. For example, if one input has the top 10 bits clear + // and the other has the top 8 bits clear, we know the top 7 bits of the + // output must be clear. + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); + if (KnownZeroHigh > 1) + Known.Zero.setHighBits(KnownZeroHigh - 1); + break; + } + case TargetOpcode::G_AND: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-1 bits are only known if set in both the LHS & RHS. + Known.One &= Known2.One; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + Known.Zero |= Known2.Zero; + break; + } + case TargetOpcode::G_OR: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + Known.Zero &= Known2.Zero; + // Output known-1 are known to be set if set in either the LHS | RHS. + Known.One |= Known2.One; + break; + } + case TargetOpcode::G_MUL: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conservative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + unsigned TrailZ = + Known.countMinTrailingZeros() + Known2.countMinTrailingZeros(); + unsigned LeadZ = + std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(), + BitWidth) - + BitWidth; + + Known.resetAll(); + Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); + Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); + break; + } + case TargetOpcode::G_SELECT: { + computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts, + Depth + 1); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + } + case TargetOpcode::G_FCMP: + case TargetOpcode::G_ICMP: { + if (TL.getBooleanContents(DstTy.isVector(), + Opcode == TargetOpcode::G_FCMP) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + } + case TargetOpcode::G_SEXT: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + // If the sign bit is known to be zero or one, then sext will extend + // it to the top bits, else it will just zext. + Known = Known.sext(BitWidth); + break; + } + case TargetOpcode::G_ANYEXT: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + break; + } + case TargetOpcode::G_LOAD: { + if (MI.hasOneMemOperand()) { + const MachineMemOperand *MMO = *MI.memoperands_begin(); + if (const MDNode *Ranges = MMO->getRanges()) { + computeKnownBitsFromRangeMetadata(*Ranges, Known); + } + } + break; + } + case TargetOpcode::G_ZEXTLOAD: { + // Everything above the retrieved bits is zero + if (MI.hasOneMemOperand()) + Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); + break; + } + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_SHL: { + KnownBits RHSKnown; + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + if (!RHSKnown.isConstant()) { + LLVM_DEBUG( + MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI); + break; + } + uint64_t Shift = RHSKnown.getConstant().getZExtValue(); + LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n'); + + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + + switch (Opcode) { + case TargetOpcode::G_ASHR: + Known.Zero = Known.Zero.ashr(Shift); + Known.One = Known.One.ashr(Shift); + break; + case TargetOpcode::G_LSHR: + Known.Zero = Known.Zero.lshr(Shift); + Known.One = Known.One.lshr(Shift); + Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift); + break; + case TargetOpcode::G_SHL: + Known.Zero = Known.Zero.shl(Shift); + Known.One = Known.One.shl(Shift); + Known.Zero.setBits(0, Shift); + break; + } + break; + } + case TargetOpcode::G_INTTOPTR: + case TargetOpcode::G_PTRTOINT: + // Fall through and handle them the same as zext/trunc. + LLVM_FALLTHROUGH; + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_TRUNC: { + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + unsigned SrcBitWidth = SrcTy.isPointer() + ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) + : SrcTy.getSizeInBits(); + assert(SrcBitWidth && "SrcBitWidth can't be zero"); + Known = Known.zextOrTrunc(SrcBitWidth, true); + computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); + Known = Known.zextOrTrunc(BitWidth, true); + if (BitWidth > SrcBitWidth) + Known.Zero.setBitsFrom(SrcBitWidth); + break; + } + } + + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" + << Depth << "] Computed for: " << MI << "[" << Depth + << "] Known: 0x" + << (Known.Zero | Known.One).toString(16, false) << "\n" + << "[" << Depth << "] Zero: 0x" + << Known.Zero.toString(16, false) << "\n" + << "[" << Depth << "] One: 0x" + << Known.One.toString(16, false) << "\n"); +} + +void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { + return false; +} diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 6e99bdbd8264..45cef4aca888 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -334,7 +335,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { - const CmpInst *CI = dyn_cast(&U); + auto *CI = dyn_cast(&U); Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); @@ -345,11 +346,12 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildICmp(Pred, Res, Op0, Op1); else if (Pred == CmpInst::FCMP_FALSE) MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getNullValue(CI->getType()))); + Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType()))); + Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { + assert(CI && "Instruction should be CmpInst"); MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1}, MachineInstr::copyFlagsFromInstruction(*CI)); } @@ -588,8 +590,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, Register CondRHS = getOrCreateVReg(*CB.CmpRHS); Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); } else { - assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE && - "Can only handle ULE ranges"); + assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && + "Can only handle SLE ranges"); const APInt& Low = cast(CB.CmpLHS)->getValue(); const APInt& High = cast(CB.CmpRHS)->getValue(); @@ -598,7 +600,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, if (cast(CB.CmpLHS)->isMinValue(true)) { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); Cond = - MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0); + MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0); } else { const LLT &CmpTy = MRI->getType(CmpOpReg); auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS); @@ -728,7 +730,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. - Pred = CmpInst::ICMP_ULE; + Pred = CmpInst::ICMP_SLE; LHS = I->Low; MHS = Cond; RHS = I->High; @@ -879,7 +881,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { return true; } - + const MDNode *Ranges = + Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { Register Addr; MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); @@ -888,7 +891,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { unsigned BaseAlign = getMemOpAlignment(LI); auto MMO = MF->getMachineMemOperand( Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, - MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -1075,36 +1078,29 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0)); - - BaseReg = NewBaseReg; + BaseReg = + MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0); Offset = 0; } Register IdxReg = getOrCreateVReg(*Idx); - if (MRI->getType(IdxReg) != OffsetTy) { - Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); - IdxReg = NewIdxReg; - } + if (MRI->getType(IdxReg) != OffsetTy) + IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); // N = N + Idx * ElementSize; // Avoid doing it for ElementSize of 1. Register GepOffsetReg; if (ElementSize != 1) { - GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); auto ElementSizeMIB = MIRBuilder.buildConstant( getLLTForType(*OffsetIRTy, *DL), ElementSize); - MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg); + GepOffsetReg = + MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0); } else GepOffsetReg = IdxReg; - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); - BaseReg = NewBaseReg; + BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0); } } @@ -1119,54 +1115,51 @@ bool IRTranslator::translateGetElementPtr(const User &U, return true; } -bool IRTranslator::translateMemfunc(const CallInst &CI, +bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned ID) { + Intrinsic::ID ID) { // If the source is undef, then just emit a nop. - if (isa(CI.getArgOperand(1))) { - switch (ID) { - case Intrinsic::memmove: - case Intrinsic::memcpy: - case Intrinsic::memset: - return true; - default: - break; - } - } - - LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL); - Type *DstTy = CI.getArgOperand(0)->getType(); - if (cast(DstTy)->getAddressSpace() != 0 || - SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0)) - return false; + if (isa(CI.getArgOperand(1))) + return true; - SmallVector Args; - for (int i = 0; i < 3; ++i) { - const auto &Arg = CI.getArgOperand(i); - Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType()); + ArrayRef Res; + auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true); + for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) + ICall.addUse(getOrCreateVReg(**AI)); + + unsigned DstAlign = 0, SrcAlign = 0; + unsigned IsVol = + cast(CI.getArgOperand(CI.getNumArgOperands() - 1)) + ->getZExtValue(); + + if (auto *MCI = dyn_cast(&CI)) { + DstAlign = std::max(MCI->getDestAlignment(), 1); + SrcAlign = std::max(MCI->getSourceAlignment(), 1); + } else if (auto *MMI = dyn_cast(&CI)) { + DstAlign = std::max(MMI->getDestAlignment(), 1); + SrcAlign = std::max(MMI->getSourceAlignment(), 1); + } else { + auto *MSI = cast(&CI); + DstAlign = std::max(MSI->getDestAlignment(), 1); } - const char *Callee; - switch (ID) { - case Intrinsic::memmove: - case Intrinsic::memcpy: { - Type *SrcTy = CI.getArgOperand(1)->getType(); - if(cast(SrcTy)->getAddressSpace() != 0) - return false; - Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove"; - break; - } - case Intrinsic::memset: - Callee = "memset"; - break; - default: - return false; - } + // We need to propagate the tail call flag from the IR inst as an argument. + // Otherwise, we have to pessimize and assume later that we cannot tail call + // any memory intrinsics. + ICall.addImm(CI.isTailCall() ? 1 : 0); - return CLI->lowerCall(MIRBuilder, CI.getCallingConv(), - MachineOperand::CreateES(Callee), - CallLowering::ArgInfo({0}, CI.getType()), Args); + // Create mem operands to store the alignment and volatile info. + auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + ICall.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(CI.getArgOperand(0)), + MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); + if (ID != Intrinsic::memset) + ICall.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(CI.getArgOperand(1)), + MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); + + return true; } void IRTranslator::getStackGuard(Register DstReg, @@ -1186,7 +1179,7 @@ void IRTranslator::getStackGuard(Register DstReg, MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment(0)); + DL->getPointerABIAlignment(0).value()); MIB.setMemRefs({MemRef}); } @@ -1208,6 +1201,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { break; case Intrinsic::bswap: return TargetOpcode::G_BSWAP; + case Intrinsic::bitreverse: + return TargetOpcode::G_BITREVERSE; case Intrinsic::ceil: return TargetOpcode::G_FCEIL; case Intrinsic::cos: @@ -1383,16 +1378,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); + MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression()); } else if (const auto *CI = dyn_cast(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); } else { - Register Reg = getOrCreateVReg(*V); - // FIXME: This does not handle register-indirect values at offset 0. The - // direct/indirect thing shouldn't really be handled by something as - // implicit as reg+noreg vs reg+imm in the first palce, but it seems - // pretty baked in right now. - MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + for (Register Reg : getOrCreateVRegs(*V)) { + // FIXME: This does not handle register-indirect values at offset 0. The + // direct/indirect thing shouldn't really be handled by something as + // implicit as reg+noreg vs reg+imm in the first place, but it seems + // pretty baked in right now. + MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + } } return true; } @@ -1433,7 +1429,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: - return translateMemfunc(CI, MIRBuilder, ID); + return translateMemFunc(CI, MIRBuilder, ID); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); Register Reg = getOrCreateVReg(CI); @@ -1441,18 +1437,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MIRBuilder.buildConstant(Reg, TypeID); return true; } - case Intrinsic::objectsize: { - // If we don't know by now, we're never going to know. - const ConstantInt *Min = cast(CI.getArgOperand(1)); + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); - MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0); - return true; - } case Intrinsic::is_constant: - // If this wasn't constant-folded away by now, then it's not a - // constant. - MIRBuilder.buildConstant(getOrCreateVReg(CI), 0); - return true; + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::stackguard: getStackGuard(getOrCreateVReg(CI), MIRBuilder); return true; @@ -1551,6 +1541,46 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI, return true; } +bool IRTranslator::translateCallSite(const ImmutableCallSite &CS, + MachineIRBuilder &MIRBuilder) { + const Instruction &I = *CS.getInstruction(); + ArrayRef Res = getOrCreateVRegs(I); + + SmallVector, 8> Args; + Register SwiftInVReg = 0; + Register SwiftErrorVReg = 0; + for (auto &Arg : CS.args()) { + if (CLI->supportSwiftError() && isSwiftError(Arg)) { + assert(SwiftInVReg == 0 && "Expected only one swift error argument"); + LLT Ty = getLLTForType(*Arg->getType(), *DL); + SwiftInVReg = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( + &I, &MIRBuilder.getMBB(), Arg)); + Args.emplace_back(makeArrayRef(SwiftInVReg)); + SwiftErrorVReg = + SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); + continue; + } + Args.push_back(getOrCreateVRegs(*Arg)); + } + + // We don't set HasCalls on MFI here yet because call lowering may decide to + // optimize into tail calls. Instead, we defer that to selection where a final + // scan is done to check if any instructions are calls. + bool Success = + CLI->lowerCall(MIRBuilder, CS, Res, Args, SwiftErrorVReg, + [&]() { return getOrCreateVReg(*CS.getCalledValue()); }); + + // Check if we just inserted a tail call. + if (Success) { + assert(!HasTailCall && "Can't tail call return twice from block?"); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt())); + } + + return Success; +} + bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const CallInst &CI = cast(U); auto TII = MF->getTarget().getIntrinsicInfo(); @@ -1570,34 +1600,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ID = static_cast(TII->getIntrinsicID(F)); } - if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { - ArrayRef Res = getOrCreateVRegs(CI); - - SmallVector, 8> Args; - Register SwiftInVReg = 0; - Register SwiftErrorVReg = 0; - for (auto &Arg: CI.arg_operands()) { - if (CLI->supportSwiftError() && isSwiftError(Arg)) { - assert(SwiftInVReg == 0 && "Expected only one swift error argument"); - LLT Ty = getLLTForType(*Arg->getType(), *DL); - SwiftInVReg = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &CI, &MIRBuilder.getMBB(), Arg)); - Args.emplace_back(makeArrayRef(SwiftInVReg)); - SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg); - continue; - } - Args.push_back(getOrCreateVRegs(*Arg)); - } - - MF->getFrameInfo().setHasCalls(true); - bool Success = - CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*CI.getCalledValue()); }); - - return Success; - } + if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) + return translateCallSite(&CI, MIRBuilder); assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -1615,14 +1619,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (isa(CI)) MIB->copyIRFlags(CI); - for (auto &Arg : CI.arg_operands()) { + for (auto &Arg : enumerate(CI.arg_operands())) { // Some intrinsics take metadata parameters. Reject them. - if (isa(Arg)) - return false; - ArrayRef VRegs = getOrCreateVRegs(*Arg); - if (VRegs.size() > 1) + if (isa(Arg.value())) return false; - MIB.addUse(VRegs[0]); + + // If this is required to be an immediate, don't materialize it in a + // register. + if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { + if (ConstantInt *CI = dyn_cast(Arg.value())) { + // imm arguments are more convenient than cimm (and realistically + // probably sufficient), so use them. + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + MIB.addImm(CI->getSExtValue()); + } else { + MIB.addFPImm(cast(Arg.value())); + } + } else { + ArrayRef VRegs = getOrCreateVRegs(*Arg.value()); + if (VRegs.size() > 1) + return false; + MIB.addUse(VRegs[0]); + } } // Add a MachineMemOperand if it is a target mem intrinsic. @@ -1630,13 +1649,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { - unsigned Align = Info.align; - if (Align == 0) - Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())); + MaybeAlign Align = Info.align; + if (!Align) + Align = MaybeAlign( + DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()))); uint64_t Size = Info.memVT.getStoreSize(); - MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Align)); + MIB.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(Info.ptrVal), Info.flags, Size, Align->value())); } return true; @@ -1672,30 +1692,7 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - ArrayRef Res; - if (!I.getType()->isVoidTy()) - Res = getOrCreateVRegs(I); - SmallVector, 8> Args; - Register SwiftErrorVReg = 0; - Register SwiftInVReg = 0; - for (auto &Arg : I.arg_operands()) { - if (CLI->supportSwiftError() && isSwiftError(Arg)) { - assert(SwiftInVReg == 0 && "Expected only one swift error argument"); - LLT Ty = getLLTForType(*Arg->getType(), *DL); - SwiftInVReg = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &I, &MIRBuilder.getMBB(), Arg)); - Args.push_back(makeArrayRef(SwiftInVReg)); - SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); - continue; - } - - Args.push_back(getOrCreateVRegs(*Arg)); - } - - if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*I.getCalledValue()); })) + if (!translateCallSite(&I, MIRBuilder)) return false; MCSymbol *EndSymbol = Context.createTempSymbol(); @@ -1811,36 +1808,25 @@ bool IRTranslator::translateAlloca(const User &U, Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); Register TySize = - getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty))); + getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); - LLT PtrTy = getLLTForType(*AI.getType(), *DL); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); - - Register SPTmp = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildCopy(SPTmp, SPReg); - - Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize); - - // Handle alignment. We have to realign if the allocation granule was smaller - // than stack alignment, or the specific alloca requires more than stack - // alignment. unsigned StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlignment(); - Align = std::max(Align, StackAlign); - if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) { - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. This doesn't overflow because we're computing - // an address inside an alloca. - Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align)); - AllocTmp = AlignedAlloc; - } + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign - 1); + auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, + MachineInstr::NoUWrap); + auto AlignCst = + MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign - 1)); + auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); - MIRBuilder.buildCopy(SPReg, AllocTmp); - MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp); + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Align); MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI); assert(MF->getFrameInfo().hasVarSizedObjects()); @@ -1926,7 +1912,7 @@ bool IRTranslator::translateShuffleVector(const User &U, .addDef(getOrCreateVReg(U)) .addUse(getOrCreateVReg(*U.getOperand(0))) .addUse(getOrCreateVReg(*U.getOperand(1))) - .addUse(getOrCreateVReg(*U.getOperand(2))); + .addShuffleMask(cast(U.getOperand(2))); return true; } @@ -1991,7 +1977,6 @@ bool IRTranslator::translateAtomicRMW(const User &U, unsigned Opcode = 0; switch (I.getOperation()) { default: - llvm_unreachable("Unknown atomicrmw op"); return false; case AtomicRMWInst::Xchg: Opcode = TargetOpcode::G_ATOMICRMW_XCHG; @@ -2026,6 +2011,12 @@ bool IRTranslator::translateAtomicRMW(const User &U, case AtomicRMWInst::UMin: Opcode = TargetOpcode::G_ATOMICRMW_UMIN; break; + case AtomicRMWInst::FAdd: + Opcode = TargetOpcode::G_ATOMICRMW_FADD; + break; + case AtomicRMWInst::FSub: + Opcode = TargetOpcode::G_ATOMICRMW_FSUB; + break; } MIRBuilder.buildAtomicRMW( @@ -2197,6 +2188,20 @@ void IRTranslator::finalizeFunction() { FuncInfo.clear(); } +/// Returns true if a BasicBlock \p BB within a variadic function contains a +/// variadic musttail call. +static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) { + if (!IsVarArg) + return false; + + // Walk the block backwards, because tail calls usually only appear at the end + // of a block. + return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) { + const auto *CI = dyn_cast(&I); + return CI && CI->isMustTailCall(); + }); +} + bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF = &CurMF; const Function &F = MF->getFunction(); @@ -2212,26 +2217,26 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { : TPC->isGISelCSEEnabled(); if (EnableCSE) { - EntryBuilder = make_unique(CurMF); + EntryBuilder = std::make_unique(CurMF); CSEInfo = &Wrapper.get(TPC->getCSEConfig()); EntryBuilder->setCSEInfo(CSEInfo); - CurBuilder = make_unique(CurMF); + CurBuilder = std::make_unique(CurMF); CurBuilder->setCSEInfo(CSEInfo); } else { - EntryBuilder = make_unique(); - CurBuilder = make_unique(); + EntryBuilder = std::make_unique(); + CurBuilder = std::make_unique(); } CLI = MF->getSubtarget().getCallLowering(); CurBuilder->setMF(*MF); EntryBuilder->setMF(*MF); MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); - ORE = llvm::make_unique(&F); + ORE = std::make_unique(&F); FuncInfo.MF = MF; FuncInfo.BPI = nullptr; const auto &TLI = *MF->getSubtarget().getTargetLowering(); const TargetMachine &TM = MF->getTarget(); - SL = make_unique(this, FuncInfo); + SL = std::make_unique(this, FuncInfo); SL->init(TLI, TM, *DL); EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); @@ -2258,6 +2263,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { SwiftError.setFunction(CurMF); SwiftError.createEntriesInEntryBlock(DbgLoc); + bool IsVarArg = F.isVarArg(); + bool HasMustTailInVarArgFn = false; + // Create all blocks, in IR order, to preserve the layout. for (const BasicBlock &BB: F) { auto *&MBB = BBToMBB[&BB]; @@ -2267,8 +2275,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { if (BB.hasAddressTaken()) MBB->setHasAddressTaken(); + + if (!HasMustTailInVarArgFn) + HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB); } + MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn); + // Make our arguments/constants entry block fallthrough to the IR entry block. EntryBB->addSuccessor(&getMBB(F.front())); @@ -2286,18 +2299,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - // We don't currently support translating swifterror or swiftself functions. - for (auto &Arg : F.args()) { - if (Arg.hasSwiftSelfAttr()) { - OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - F.getSubprogram(), &F.getEntryBlock()); - R << "unable to lower arguments due to swiftself: " - << ore::NV("Prototype", F.getType()); - reportTranslationError(*MF, *TPC, *ORE, R); - return false; - } - } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); @@ -2322,8 +2323,15 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Set the insertion point of all the following translations to // the end of this basic block. CurBuilder->setMBB(MBB); - + HasTailCall = false; for (const Instruction &Inst : *BB) { + // If we translated a tail call in the last step, then we know + // everything after the call is either a return, or something that is + // handled by the call itself. (E.g. a lifetime marker or assume + // intrinsic.) In this case, we should stop translating the block and + // move on. + if (HasTailCall) + break; #ifndef NDEBUG Verifier.setCurrentInst(&Inst); #endif // ifndef NDEBUG diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 70694fe6b6c8..7c4fd2d140d3 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -12,11 +12,14 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -45,6 +48,7 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) @@ -53,6 +57,8 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { } void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -64,11 +70,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); + GISelKnownBits &KB = getAnalysis().get(MF); const TargetPassConfig &TPC = getAnalysis(); - const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); CodeGenCoverage CoverageInfo; assert(ISel && "Cannot work without InstructionSelector"); + ISel->setupMF(MF, KB, CoverageInfo); // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); @@ -124,7 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; } - if (!ISel->select(MI, CoverageInfo)) { + if (!ISel->select(MI)) { // FIXME: It would be nice to dump all inserted instructions. It's // not obvious how, esp. considering select() can insert after MI. reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI); @@ -159,10 +167,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { --MII; if (MI.getOpcode() != TargetOpcode::COPY) continue; - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(SrcReg) && + Register::isVirtualRegister(DstReg)) { auto SrcRC = MRI.getRegClass(SrcReg); auto DstRC = MRI.getRegClass(DstReg); if (SrcRC == DstRC) { @@ -179,7 +187,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // that the size of the now-constrained vreg is unchanged and that it has a // register class. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(I); + unsigned VReg = Register::index2VirtReg(I); MachineInstr *MI = nullptr; if (!MRI.def_empty(VReg)) @@ -217,6 +225,22 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { auto &TLI = *MF.getSubtarget().getTargetLowering(); TLI.finalizeLowering(MF); + // Determine if there are any calls in this machine function. Ported from + // SelectionDAG. + MachineFrameInfo &MFI = MF.getFrameInfo(); + for (const auto &MBB : MF) { + if (MFI.hasCalls() && MF.hasInlineAsm()) + break; + + for (const auto &MI : MBB) { + if ((MI.isCall() && !MI.isReturn()) || MI.isStackAligningInlineAsm()) + MFI.setHasCalls(true); + if (MI.isInlineAsm()) + MF.setHasInlineAsm(true); + } + } + + LLVM_DEBUG({ dbgs() << "Rules covered by selecting function: " << MF.getName() << ":"; for (auto RuleID : CoverageInfo.covered()) diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 2ad35b3a72c9..28143b30d4e8 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -79,5 +79,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, return true; return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && - !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands()); + !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index b5b26bff34bb..1593e21fe07e 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -184,11 +184,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { : TPC.isGISelCSEEnabled(); if (EnableCSE) { - MIRBuilder = make_unique(); + MIRBuilder = std::make_unique(); CSEInfo = &Wrapper.get(TPC.getCSEConfig()); MIRBuilder->setCSEInfo(CSEInfo); } else - MIRBuilder = make_unique(); + MIRBuilder = std::make_unique(); // This observer keeps the worklist updated. LegalizerWorkListManager WorkListObserver(InstList, ArtifactList); // We want both WorkListObserver as well as CSEInfo to observe all changes. @@ -206,8 +206,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { WrapperObserver.erasingInstr(*DeadMI); }; + auto stopLegalizing = [&](MachineInstr &MI) { + Helper.MIRBuilder.stopObservingChanges(); + reportGISelFailure(MF, TPC, MORE, "gisel-legalize", + "unable to legalize instruction", MI); + }; bool Changed = false; + SmallVector RetryList; do { + assert(RetryList.empty() && "Expected no instructions in RetryList"); + unsigned NumArtifacts = ArtifactList.size(); while (!InstList.empty()) { MachineInstr &MI = *InstList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); @@ -222,14 +230,31 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Error out if we couldn't legalize this instruction. We may want to // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { - Helper.MIRBuilder.stopObservingChanges(); - reportGISelFailure(MF, TPC, MORE, "gisel-legalize", - "unable to legalize instruction", MI); + // Move illegal artifacts to RetryList instead of aborting because + // legalizing InstList may generate artifacts that allow + // ArtifactCombiner to combine away them. + if (isArtifact(MI)) { + RetryList.push_back(&MI); + continue; + } + stopLegalizing(MI); return false; } WorkListObserver.printNewInstrs(); Changed |= Res == LegalizerHelper::Legalized; } + // Try to combine the instructions in RetryList again if there + // are new artifacts. If not, stop legalizing. + if (!RetryList.empty()) { + if (ArtifactList.size() > NumArtifacts) { + while (!RetryList.empty()) + ArtifactList.insert(RetryList.pop_back_val()); + } else { + MachineInstr *MI = *RetryList.begin(); + stopLegalizing(*MI); + return false; + } + } while (!ArtifactList.empty()) { MachineInstr &MI = *ArtifactList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f5cf7fc9bd9b..21512e543878 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -171,6 +172,26 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } +static LLT getGCDType(LLT OrigTy, LLT TargetTy) { + if (OrigTy.isVector() && TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy.getElementType()); + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigTy.getElementType()); + } + + if (OrigTy.isVector() && !TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy); + return TargetTy; + } + + assert(!OrigTy.isVector() && !TargetTy.isVector()); + + int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), + TargetTy.getSizeInBits()); + return LLT::scalar(GCD); +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef PartRegs, @@ -219,11 +240,29 @@ void LegalizerHelper::insertParts(Register DstReg, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::SDIV_I32; + case 64: + return RTLIB::SDIV_I64; + case 128: + return RTLIB::SDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_UDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::UDIV_I32; + case 64: + return RTLIB::UDIV_I64; + case 128: + return RTLIB::UDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_SREM: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; @@ -288,6 +327,35 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } +/// True if an instruction is in tail position in its caller. Intended for +/// legalizing libcalls as tail calls when possible. +static bool isLibCallInTailPosition(MachineInstr &MI) { + const Function &F = MI.getParent()->getParent()->getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore NoAlias and NonNull because they don't affect the + // call sequence. + AttributeList CallerAttrs = F.getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) + .removeAttribute(Attribute::NoAlias) + .removeAttribute(Attribute::NonNull) + .hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + return false; + + // Only tail call if the following instruction is a standard return. + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + MachineInstr *Next = MI.getNextNode(); + if (!Next || TII.isTailCall(*Next) || !Next->isReturn()) + return false; + + return true; +} + LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, @@ -296,9 +364,12 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); - MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), Result, Args)) + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(Libcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = Result; + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; return LegalizerHelper::Legalized; @@ -317,6 +388,74 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Args); } +LegalizerHelper::LegalizeResult +llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + + SmallVector Args; + // Add all the args, except for the last which is an imm denoting 'tail'. + for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) { + Register Reg = MI.getOperand(i).getReg(); + + // Need derive an IR type for call lowering. + LLT OpLLT = MRI.getType(Reg); + Type *OpTy = nullptr; + if (OpLLT.isPointer()) + OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); + else + OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); + Args.push_back({Reg, OpTy}); + } + + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); + RTLIB::Libcall RTLibcall; + switch (ID) { + case Intrinsic::memcpy: + RTLibcall = RTLIB::MEMCPY; + break; + case Intrinsic::memset: + RTLibcall = RTLIB::MEMSET; + break; + case Intrinsic::memmove: + RTLibcall = RTLIB::MEMMOVE; + break; + default: + return LegalizerHelper::UnableToLegalize; + } + const char *Name = TLI.getLibcallName(RTLibcall); + + MIRBuilder.setInstr(MI); + + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); + Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 && + isLibCallInTailPosition(MI); + + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) + return LegalizerHelper::UnableToLegalize; + + if (Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + // We must have a return following the call to get past + // isLibCallInTailPosition. + assert(MI.getNextNode() && MI.getNextNode()->isReturn() && + "Expected instr following MI to be a return?"); + + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + MI.getNextNode()->eraseFromParent(); + } + + return LegalizerHelper::Legalized; +} + static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType) { auto ToMVT = MVT::getVT(ToType); @@ -518,6 +657,65 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + // FIXME: support the general case where the requested NarrowTy may not be + // the same as the source type. E.g. s128 = sext(s32) + if ((SrcTy.getSizeInBits() != SizeOp0 / 2) || + SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) { + LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + // Shift the sign bit of the low register through the high register. + auto ShiftAmt = + MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1); + auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ZEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + uint64_t SizeOp1 = SrcTy.getSizeInBits(); + if (SizeOp0 % SizeOp1 != 0) + return UnableToLegalize; + + // Generate a merge where the bottom bits are taken from the source, and + // zero everything else. + Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); + unsigned NumParts = SizeOp0 / SizeOp1; + SmallVector Srcs = {MI.getOperand(1).getReg()}; + for (unsigned Part = 1; Part < NumParts; ++Part) + Srcs.push_back(ZeroReg); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_TRUNC: { + if (TypeIdx != 1) + return UnableToLegalize; + + uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { + LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0)); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -530,15 +728,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildConstant(CarryIn, 0); - + Register CarryIn; for (int i = 0; i < NumParts; ++i) { Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], - Src2Regs[i], CarryIn); + if (i == 0) + MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); + else { + MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], + Src2Regs[i], CarryIn); + } DstRegs.push_back(DstReg); CarryIn = CarryOut; @@ -730,7 +930,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, for (unsigned j = 1; j < MI.getNumOperands(); j += 2) MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); } - MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); @@ -763,6 +963,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, CmpInst::Predicate Pred = static_cast(MI.getOperand(1).getPredicate()); + LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); @@ -771,18 +972,109 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); } else { - const LLT s1 = LLT::scalar(1); - MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); + MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( - ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); + ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); } Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT_INREG: { + if (TypeIdx != 0) + return UnableToLegalize; + + if (!MI.getOperand(2).isImm()) + return UnableToLegalize; + int64_t SizeInBits = MI.getOperand(2).getImm(); + + // So long as the new type has more bits than the bits we're extending we + // don't need to break it apart. + if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { + Observer.changingInstr(MI); + // We don't lose any non-extension bits by truncating the src and + // sign-extending the dst. + MachineOperand &MO1 = MI.getOperand(1); + auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg()); + MO1.setReg(TruncMIB->getOperand(0).getReg()); + + MachineOperand &MO2 = MI.getOperand(0); + Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt}); + MO2.setReg(DstExt); + Observer.changedInstr(MI); + return Legalized; + } + + // Break it apart. Components below the extension point are unmodified. The + // component containing the extension point becomes a narrower SEXT_INREG. + // Components above it are ashr'd from the component containing the + // extension point. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + + // List the registers where the destination will be scattered. + SmallVector DstRegs; + // List the registers where the source will be split. + SmallVector SrcRegs; + + // Create all the temporary registers. + for (int i = 0; i < NumParts; ++i) { + Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); + + SrcRegs.push_back(SrcReg); + } + + // Explode the big arguments into smaller chunks. + MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg()); + + Register AshrCstReg = + MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) + ->getOperand(0) + .getReg(); + Register FullExtensionReg = 0; + Register PartialExtensionReg = 0; + + // Do the operation on each small part. + for (int i = 0; i < NumParts; ++i) { + if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) + DstRegs.push_back(SrcRegs[i]); + else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { + assert(PartialExtensionReg && + "Expected to visit partial extension before full"); + if (FullExtensionReg) { + DstRegs.push_back(FullExtensionReg); + continue; + } + DstRegs.push_back(MIRBuilder + .buildInstr(TargetOpcode::G_ASHR, {NarrowTy}, + {PartialExtensionReg, AshrCstReg}) + ->getOperand(0) + .getReg()); + FullExtensionReg = DstRegs.back(); + } else { + DstRegs.push_back( + MIRBuilder + .buildInstr( + TargetOpcode::G_SEXT_INREG, {NarrowTy}, + {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) + ->getOperand(0) + .getReg()); + PartialExtensionReg = DstRegs.back(); + } + } + + // Gather the destination registers into the final destination. + Register DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } } } @@ -892,7 +1184,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); - Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : MRI.createGenericVirtualRegister(WideTy); auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); @@ -903,6 +1195,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, if (WideSize > DstSize) MIRBuilder.buildTrunc(DstReg, ResultReg); + else if (DstTy.isPointer()) + MIRBuilder.buildIntToPtr(DstReg, ResultReg); MI.eraseFromParent(); return Legalized; @@ -1218,6 +1512,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_BITREVERSE: { + Observer.changingInstr(MI); + + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); + + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + MI.getOperand(0).setReg(DstExt); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); + auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); + MIRBuilder.buildTrunc(DstReg, Shift); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1310,13 +1622,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: - if (TypeIdx != 0) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_SITOFP: if (TypeIdx != 1) return UnableToLegalize; @@ -1483,6 +1797,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FMUL: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: case TargetOpcode::G_FCANONICALIZE: @@ -1553,6 +1868,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_SEXT_INREG: + if (TypeIdx != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); + Observer.changedInstr(MI); + return Legalized; } } @@ -1579,6 +1903,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: + return lowerSADDO_SSUBO(MI); case TargetOpcode::G_SMULO: case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the @@ -1669,6 +1996,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FMAD: + return lowerFMad(MI); case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -1690,11 +2019,57 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { LLT DstTy = MRI.getType(DstReg); auto &MMO = **MI.memoperands_begin(); - if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { - // In the case of G_LOAD, this was a non-extending load already and we're - // about to lower to the same instruction. - if (MI.getOpcode() == TargetOpcode::G_LOAD) + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 load (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = + MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } MIRBuilder.buildLoad(DstReg, PtrReg, MMO); MI.eraseFromParent(); return Legalized; @@ -1723,6 +2098,51 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } + case TargetOpcode::G_STORE: { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the GEP and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -1797,6 +2217,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerUITOFP(MI, TypeIdx, Ty); case G_SITOFP: return lowerSITOFP(MI, TypeIdx, Ty); + case G_FPTOUI: + return lowerFPTOUI(MI, TypeIdx, Ty); case G_SMIN: case G_SMAX: case G_UMIN: @@ -1807,6 +2229,31 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); + case G_UNMERGE_VALUES: + return lowerUnmergeValues(MI); + case TargetOpcode::G_SEXT_INREG: { + assert(MI.getOperand(2).isImm() && "Expected immediate"); + int64_t SizeInBits = MI.getOperand(2).getImm(); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register TmpRes = MRI.createGenericVirtualRegister(DstTy); + + auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); + MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()}); + MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()}); + MI.eraseFromParent(); + return Legalized; + } + case G_SHUFFLE_VECTOR: + return lowerShuffleVector(MI); + case G_DYN_STACKALLOC: + return lowerDynStackAlloc(MI); + case G_EXTRACT: + return lowerExtract(MI); + case G_INSERT: + return lowerInsert(MI); } } @@ -2282,6 +2729,105 @@ LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + const int NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // TODO: Create sequence of extracts. + if (DstTy == NarrowTy) + return UnableToLegalize; + + LLT GCDTy = getGCDType(SrcTy, NarrowTy); + if (DstTy == GCDTy) { + // This would just be a copy of the same unmerge. + // TODO: Create extracts, pad with undef and create intermediate merges. + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + const int NumUnmerge = Unmerge->getNumOperands() - 1; + const int PartsPerUnmerge = NumDst / NumUnmerge; + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) + MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); + MIB.addUse(Unmerge.getReg(I)); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "not a vector type index"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = DstTy.getElementType(); + + int DstNumElts = DstTy.getNumElements(); + int NarrowNumElts = NarrowTy.getNumElements(); + int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; + LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + + SmallVector ConcatOps; + SmallVector SubBuildVector; + + Register UndefReg; + if (WidenedDstTy != DstTy) + UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + + // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as + // necessary. + // + // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 + // -> <2 x s16> + // + // %4:_(s16) = G_IMPLICIT_DEF + // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 + // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 + // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 + // %3:_(<3 x s16>) = G_EXTRACT %7, 0 + for (int I = 0; I != NumConcat; ++I) { + for (int J = 0; J != NarrowNumElts; ++J) { + int SrcIdx = NarrowNumElts * I + J; + + if (SrcIdx < DstNumElts) { + Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); + SubBuildVector.push_back(SrcReg); + } else + SubBuildVector.push_back(UndefReg); + } + + auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); + ConcatOps.push_back(BuildVec.getReg(0)); + SubBuildVector.clear(); + } + + if (DstTy == WidenedDstTy) + MIRBuilder.buildConcatVectors(DstReg, ConcatOps); + else { + auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); + MIRBuilder.buildExtract(DstReg, Concat, 0); + } + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -2395,6 +2941,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FDIV: case G_FREM: case G_FMA: + case G_FMAD: case G_FPOW: case G_FEXP: case G_FEXP2: @@ -2411,6 +2958,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FSIN: case G_FSQRT: case G_BSWAP: + case G_BITREVERSE: case G_SDIV: case G_SMIN: case G_SMAX: @@ -2453,6 +3001,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); case G_PHI: return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + case G_UNMERGE_VALUES: + return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); + case G_BUILD_VECTOR: + return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); @@ -2604,11 +3156,11 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, switch (MI.getOpcode()) { case TargetOpcode::G_SHL: { // Short: ShAmt < NewBitSize - auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); - auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); - auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. @@ -2622,41 +3174,25 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, ResultRegs[1] = Hi.getReg(0); break; } - case TargetOpcode::G_LSHR: { - // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); - - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); - - // Long: ShAmt >= NewBitSize - auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. - auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. - - auto Lo = MIRBuilder.buildSelect( - HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); - auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); - - ResultRegs[0] = Lo.getReg(0); - ResultRegs[1] = Hi.getReg(0); - break; - } + case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: { // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize - - // Sign of Hi part. - auto HiL = MIRBuilder.buildAShr( - HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); - - auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + MachineInstrBuilder HiL; + if (MI.getOpcode() == TargetOpcode::G_LSHR) { + HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. + } else { + auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); + HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. + } + auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, + {InH, AmtExcess}); // Lo from Hi part. auto Lo = MIRBuilder.buildSelect( HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); @@ -2701,12 +3237,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_IMPLICIT_DEF: { + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_LOAD: { + if (TypeIdx != 0) + return UnableToLegalize; Observer.changingInstr(MI); moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_STORE: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: @@ -2748,6 +3294,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_UNMERGE_VALUES: { + if (TypeIdx != 1) + return UnableToLegalize; + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + int NumDst = MI.getNumOperands() - 1; + moreElementsVectorSrc(MI, MoreTy, NumDst); + + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + for (int I = 0; I != NumDst; ++I) + MIB.addDef(MI.getOperand(I).getReg()); + + int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); + for (int I = NumDst; I != NewNumDst; ++I) + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + + MIB.addUse(MI.getOperand(NumDst).getReg()); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_PHI: return moreElementsVectorPhi(MI, TypeIdx, MoreTy); default: @@ -3310,6 +3876,48 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + if (SrcTy != S64 && SrcTy != S32) + return UnableToLegalize; + if (DstTy != S32 && DstTy != S64) + return UnableToLegalize; + + // FPTOSI gives same result as FPTOUI for positive signed integers. + // FPTOUI needs to deal with fp values that convert to unsigned integers + // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. + + APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); + APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() + : APFloat::IEEEdouble(), + APInt::getNullValue(SrcTy.getSizeInBits())); + TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); + + MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); + + MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); + // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on + // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. + MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); + MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); + MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); + MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); + + MachineInstrBuilder FCMP = + MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold); + MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); + + MI.eraseFromParent(); + return Legalized; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -3419,3 +4027,251 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { + // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned Flags = MI.getFlags(); + + auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), + Flags); + MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { + const unsigned NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst0Reg); + + + // Expand scalarizing unmerge as bitcast to integer and shift. + if (!DstTy.isVector() && SrcTy.isVector() && + SrcTy.getElementType() == DstTy) { + LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); + Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); + + MIRBuilder.buildTrunc(Dst0Reg, Cast); + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned Offset = DstSize; + for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { + auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); + auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); + MIRBuilder.buildTrunc(MI.getOperand(I), Shift); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(1).getReg(); + Register Src1Reg = MI.getOperand(2).getReg(); + LLT Src0Ty = MRI.getType(Src0Reg); + LLT DstTy = MRI.getType(DstReg); + LLT IdxTy = LLT::scalar(32); + + const Constant *ShufMask = MI.getOperand(3).getShuffleMask(); + + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(ShufMask, Mask); + + if (DstTy.isScalar()) { + if (Src0Ty.isVector()) + return UnableToLegalize; + + // This is just a SELECT. + assert(Mask.size() == 1 && "Expected a single mask element"); + Register Val; + if (Mask[0] < 0 || Mask[0] > 1) + Val = MIRBuilder.buildUndef(DstTy).getReg(0); + else + Val = Mask[0] == 0 ? Src0Reg : Src1Reg; + MIRBuilder.buildCopy(DstReg, Val); + MI.eraseFromParent(); + return Legalized; + } + + Register Undef; + SmallVector BuildVec; + LLT EltTy = DstTy.getElementType(); + + for (int Idx : Mask) { + if (Idx < 0) { + if (!Undef.isValid()) + Undef = MIRBuilder.buildUndef(EltTy).getReg(0); + BuildVec.push_back(Undef); + continue; + } + + if (Src0Ty.isScalar()) { + BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); + } else { + int NumElts = Src0Ty.getNumElements(); + Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; + int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); + BuildVec.push_back(Extract.getReg(0)); + } + } + + MIRBuilder.buildBuildVector(DstReg, BuildVec); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + unsigned Align = MI.getOperand(2).getImm(); + + const auto &MF = *MI.getMF(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + + LLT PtrTy = MRI.getType(Dst); + LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); + + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); + auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); + SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); + + // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't + // have to generate an extra instruction to negate the alloc and then use + // G_GEP to add the negative offset. + auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); + if (Align) { + APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); + AlignMask.negate(); + auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); + Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); + } + + SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); + MIRBuilder.buildCopy(SPReg, SPTmp); + MIRBuilder.buildCopy(Dst, SPTmp); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtract(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + unsigned Offset = MI.getOperand(2).getImm(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (DstTy.isScalar() && + (SrcTy.isScalar() || + (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { + LLT SrcIntTy = SrcTy; + if (!SrcTy.isScalar()) { + SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); + } + + if (Offset == 0) + MIRBuilder.buildTrunc(Dst, Src); + else { + auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); + auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); + MIRBuilder.buildTrunc(Dst, Shr); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register InsertSrc = MI.getOperand(2).getReg(); + uint64_t Offset = MI.getOperand(3).getImm(); + + LLT DstTy = MRI.getType(Src); + LLT InsertTy = MRI.getType(InsertSrc); + + if (InsertTy.isScalar() && + (DstTy.isScalar() || + (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { + LLT IntDstTy = DstTy; + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); + } + + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, + InsertTy.getSizeInBits()); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildBitcast(Dst, Or); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { + Register Dst0 = MI.getOperand(0).getReg(); + Register Dst1 = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; + + LLT Ty = MRI.getType(Dst0); + LLT BoolTy = MRI.getType(Dst1); + + if (IsAdd) + MIRBuilder.buildAdd(Dst0, LHS, RHS); + else + MIRBuilder.buildSub(Dst0, LHS, RHS); + + // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. + + auto Zero = MIRBuilder.buildConstant(Ty, 0); + + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + auto ResultLowerThanLHS = + MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); + auto ConditionRHS = MIRBuilder.buildICmp( + IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); + + MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); + MI.eraseFromParent(); + return Legalized; +} diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 6e1de95b3277..70045512fae5 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -215,7 +215,30 @@ bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const { return true; } const bool AllCovered = (FirstUncovered >= NumTypeIdxs); - LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + if (NumTypeIdxs > 0) + LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); + return AllCovered; +#else + return true; +#endif +} + +bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const { +#ifndef NDEBUG + if (Rules.empty()) { + LLVM_DEBUG( + dbgs() << ".. imm index coverage check SKIPPED: no rules defined\n"); + return true; + } + const int64_t FirstUncovered = ImmIdxsCovered.find_first_unset(); + if (FirstUncovered < 0) { + LLVM_DEBUG(dbgs() << ".. imm index coverage check SKIPPED:" + " user-defined predicate detected\n"); + return true; + } + const bool AllCovered = (FirstUncovered >= NumImmIdxs); + LLVM_DEBUG(dbgs() << ".. the first uncovered imm index: " << FirstUncovered << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); return AllCovered; #else @@ -387,8 +410,6 @@ unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const { LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias << "\n"); OpcodeIdx = getOpcodeIdxForOpcode(Alias); - LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to " - << RulesForOpcode[OpcodeIdx].getAlias() << "\n"); assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases"); } @@ -412,7 +433,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( std::initializer_list Opcodes) { unsigned Representative = *Opcodes.begin(); - assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && + assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && "Initializer list must have at least two opcodes"); for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) @@ -677,12 +698,23 @@ void LegalizerInfo::verify(const MCInstrInfo &MII) const { ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc) : Acc; }); + const unsigned NumImmIdxs = std::accumulate( + MCID.opInfo_begin(), MCID.opInfo_end(), 0U, + [](unsigned Acc, const MCOperandInfo &OpInfo) { + return OpInfo.isGenericImm() + ? std::max(OpInfo.getGenericImmIndex() + 1U, Acc) + : Acc; + }); LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode << "): " << NumTypeIdxs << " type ind" - << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n"); + << (NumTypeIdxs == 1 ? "ex" : "ices") << ", " + << NumImmIdxs << " imm ind" + << (NumImmIdxs == 1 ? "ex" : "ices") << "\n"); const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode); if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs)) FailedOpcodes.push_back(Opcode); + else if (!RuleSet.verifyImmIdxsCoverage(NumImmIdxs)) + FailedOpcodes.push_back(Opcode); } if (!FailedOpcodes.empty()) { errs() << "The following opcodes have ill-defined legalization rules:"; diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp index 3592409710a7..f882ecbf5db3 100644 --- a/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/lib/CodeGen/GlobalISel/Localizer.cpp @@ -79,7 +79,7 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { return true; case TargetOpcode::G_GLOBAL_VALUE: { unsigned RematCost = TTI->getGISelRematGlobalCost(); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); unsigned MaxUses = maxUses(RematCost); if (MaxUses == UINT_MAX) return true; // Remats are "free" so always localize. @@ -121,7 +121,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LLVM_DEBUG(dbgs() << "Should localize: " << MI); assert(MI.getDesc().getNumDefs() == 1 && "More than one definition not supported yet"); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // Check if all the users of MI are local. // We are going to invalidation the list of use operands, so we // can't use range iterator. @@ -151,7 +151,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedMI); // Set a new register for the definition. - unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); + Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); LocalizedMI->getOperand(0).setReg(NewReg); NewVRegIt = @@ -177,7 +177,7 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { // many users, but this case may be better served by regalloc improvements. for (MachineInstr *MI : LocalizedInstrs) { - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); MachineBasicBlock &MBB = *MI->getParent(); // All of the user MIs of this reg. SmallPtrSet Users; @@ -220,5 +220,6 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { LocalizedSetVecT LocalizedInstrs; bool Changed = localizeInterBlock(MF, LocalizedInstrs); - return Changed |= localizeIntraBlock(LocalizedInstrs); + Changed |= localizeIntraBlock(LocalizedInstrs); + return Changed; } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b7a73326b85c..df770f6664ca 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -107,9 +107,13 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable, assert( cast(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); + // DBG_VALUE insts now carry IR-level indirection in their DIExpression + // rather than encoding it in the instruction itself. + const DIExpression *DIExpr = cast(Expr); + DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return insertInstr(BuildMI(getMF(), getDL(), getTII().get(TargetOpcode::DBG_VALUE), - /*IsIndirect*/ true, Reg, Variable, Expr)); + /*IsIndirect*/ false, Reg, Variable, DIExpr)); } MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, @@ -120,11 +124,15 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, assert( cast(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); + // DBG_VALUE insts now carry IR-level indirection in their DIExpression + // rather than encoding it in the instruction itself. + const DIExpression *DIExpr = cast(Expr); + DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return buildInstr(TargetOpcode::DBG_VALUE) .addFrameIndex(FI) - .addImm(0) + .addReg(0) .addMetadata(Variable) - .addMetadata(Expr); + .addMetadata(DIExpr); } MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, @@ -148,7 +156,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, MIB.addReg(0U); } - return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); + return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr); } MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { @@ -160,6 +168,17 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { return MIB.addMetadata(Label); } +MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res, + const SrcOp &Size, + unsigned Align) { + assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type"); + auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC); + Res.addDefToMIB(*getMRI(), MIB); + Size.addSrcToMIB(MIB); + MIB.addImm(Align); + return MIB; +} + MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res, int Idx) { assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); @@ -207,11 +226,7 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res, Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type"); - auto MIB = buildInstr(TargetOpcode::G_GEP); - Res.addDefToMIB(*getMRI(), MIB); - Op0.addSrcToMIB(MIB); - Op1.addSrcToMIB(MIB); - return MIB; + return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1}); } Optional @@ -697,17 +712,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional Flags) { - return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}); + return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags); } MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional Flags) { - return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}); + return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags); } MachineInstrBuilder @@ -774,26 +791,28 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, .addMemOperand(&MMO); } -MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode, - Register OldValRes, - Register Addr, - Register Val, - MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildAtomicRMW( + unsigned Opcode, const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + #ifndef NDEBUG - LLT OldValResTy = getMRI()->getType(OldValRes); - LLT AddrTy = getMRI()->getType(Addr); - LLT ValTy = getMRI()->getType(Val); + LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); + LLT AddrTy = Addr.getLLTTy(*getMRI()); + LLT ValTy = Val.getLLTTy(*getMRI()); assert(OldValResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); assert(ValTy.isValid() && "invalid operand type"); assert(OldValResTy == ValTy && "type mismatch"); + assert(MMO.isAtomic() && "not atomic mem operand"); #endif - return buildInstr(Opcode) - .addDef(OldValRes) - .addUse(Addr) - .addUse(Val) - .addMemOperand(&MMO); + auto MIB = buildInstr(Opcode); + OldValRes.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + Val.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } MachineInstrBuilder @@ -864,6 +883,21 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr, MMO); } +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFAdd( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val, + MMO); +} + MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { return buildInstr(TargetOpcode::G_FENCE) @@ -1037,8 +1071,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, "input operands do not cover output register"); if (SrcOps.size() == 1) return buildCast(DstOps[0], SrcOps[0]); - if (DstOps[0].getLLTTy(*getMRI()).isVector()) - return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + if (DstOps[0].getLLTTy(*getMRI()).isVector()) { + if (SrcOps[0].getLLTTy(*getMRI()).isVector()) + return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps); + } break; } case TargetOpcode::G_EXTRACT_VECTOR_ELT: { diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 42be88fcf947..f0e35c65c53b 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -92,7 +92,7 @@ void RegBankSelect::init(MachineFunction &MF) { MBPI = nullptr; } MIRBuilder.setMF(MF); - MORE = llvm::make_unique(MF, MBFI); + MORE = std::make_unique(MF, MBFI); } void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { @@ -139,7 +139,7 @@ bool RegBankSelect::repairReg( "need new vreg for each breakdown"); // An empty range of new register means no repairing. - assert(!empty(NewVRegs) && "We should not have to repair"); + assert(!NewVRegs.empty() && "We should not have to repair"); MachineInstr *MI; if (ValMapping.NumBreakDowns == 1) { @@ -154,7 +154,7 @@ bool RegBankSelect::repairReg( std::swap(Src, Dst); assert((RepairPt.getNumInsertPoints() == 1 || - TargetRegisterInfo::isPhysicalRegister(Dst)) && + Register::isPhysicalRegister(Dst)) && "We are about to create several defs for Dst"); // Build the instruction used to repair, then clone it at the right @@ -398,7 +398,7 @@ void RegBankSelect::tryAvoidingSplit( // Check if this is a physical or virtual register. Register Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // We are going to split every outgoing edges. // Check that this is possible. // FIXME: The machine representation is currently broken @@ -687,8 +687,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { // iterator before hand. MachineInstr &MI = *MII++; - // Ignore target-specific instructions: they should use proper regclasses. - if (isTargetSpecificOpcode(MI.getOpcode())) + // Ignore target-specific post-isel instructions: they should use proper + // regclasses. + if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode()) continue; if (!assignInstr(MI)) { diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp index 4e41f338934d..fc9c802693ab 100644 --- a/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "registerbank" diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 159422e38878..3fcc55286beb 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -82,7 +82,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { const RegisterBank * RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); assert(Reg && "NoRegister does not have a register bank"); @@ -97,8 +97,7 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterClass & RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Reg must be a physreg"); + assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg"); const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); if (RegRCIt != PhysRegMinimalRCs.end()) return *RegRCIt->second; @@ -284,7 +283,7 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length, ++NumPartialMappingsCreated; auto &PartMapping = MapOfPartialMappings[Hash]; - PartMapping = llvm::make_unique(StartIdx, Length, RegBank); + PartMapping = std::make_unique(StartIdx, Length, RegBank); return *PartMapping; } @@ -318,7 +317,7 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown, ++NumValueMappingsCreated; auto &ValMapping = MapOfValueMappings[Hash]; - ValMapping = llvm::make_unique(BreakDown, NumBreakDowns); + ValMapping = std::make_unique(BreakDown, NumBreakDowns); return *ValMapping; } @@ -342,7 +341,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const { // mapping, because we use the pointer of the ValueMapping // to hash and we expect them to uniquely identify an instance // of value mapping. - Res = llvm::make_unique(std::distance(Begin, End)); + Res = std::make_unique(std::distance(Begin, End)); unsigned Idx = 0; for (Iterator It = Begin; It != End; ++It, ++Idx) { const ValueMapping *ValMap = *It; @@ -392,7 +391,7 @@ RegisterBankInfo::getInstructionMappingImpl( ++NumInstructionMappingsCreated; auto &InstrMapping = MapOfInstructionMappings[Hash]; - InstrMapping = llvm::make_unique( + InstrMapping = std::make_unique( ID, Cost, OperandsMapping, NumOperands); return *InstrMapping; } @@ -456,7 +455,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { "This mapping is too complex for this function"); iterator_range::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); - if (empty(NewRegs)) { + if (NewRegs.empty()) { LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); continue; } @@ -489,7 +488,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { unsigned RegisterBankInfo::getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 766ea1d60bac..45618d7992ad 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -43,10 +43,9 @@ unsigned llvm::constrainOperandRegClass( const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx) { - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "PhysReg not implemented"); + assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); // If we created a new virtual register because the class is not compatible @@ -73,10 +72,9 @@ unsigned llvm::constrainOperandRegClass( MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx) { - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "PhysReg not implemented"); + assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); // Some of the target independent instructions, like COPY, may not impose any @@ -130,9 +128,9 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n'); assert(MO.isReg() && "Unsupported non-reg operand"); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Physical registers don't need to be constrained. - if (TRI.isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // Register operands with a value of 0 (e.g. predicate operands) don't need @@ -170,9 +168,8 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI.use_nodbg_empty(Reg)) + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg) || !MRI.use_nodbg_empty(Reg)) return false; } return true; @@ -219,11 +216,33 @@ Optional llvm::getConstantVRegVal(unsigned VReg, } Optional llvm::getConstantVRegValWithLookThrough( - unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { + unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, + bool HandleFConstant) { SmallVector, 4> SeenOpcodes; MachineInstr *MI; - while ((MI = MRI.getVRegDef(VReg)) && - MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) { + auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { + return Opcode == TargetOpcode::G_CONSTANT || + (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT); + }; + auto GetImmediateValue = [HandleFConstant, + &MRI](const MachineInstr &MI) -> Optional { + const MachineOperand &CstVal = MI.getOperand(1); + if (!CstVal.isImm() && !CstVal.isCImm() && + (!HandleFConstant || !CstVal.isFPImm())) + return None; + if (!CstVal.isFPImm()) { + unsigned BitWidth = + MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) + : CstVal.getCImm()->getValue(); + assert(Val.getBitWidth() == BitWidth && + "Value bitwidth doesn't match definition type"); + return Val; + } + return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + }; + while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && + LookThroughInstrs) { switch (MI->getOpcode()) { case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: @@ -235,7 +254,7 @@ Optional llvm::getConstantVRegValWithLookThrough( break; case TargetOpcode::COPY: VReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VReg)) + if (Register::isPhysicalRegister(VReg)) return None; break; case TargetOpcode::G_INTTOPTR: @@ -245,16 +264,13 @@ Optional llvm::getConstantVRegValWithLookThrough( return None; } } - if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT || - (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm())) + if (!MI || !IsConstantOpcode(MI->getOpcode())) return None; - const MachineOperand &CstVal = MI->getOperand(1); - unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits(); - APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) - : CstVal.getCImm()->getValue(); - assert(Val.getBitWidth() == BitWidth && - "Value bitwidth doesn't match definition type"); + Optional MaybeVal = GetImmediateValue(*MI); + if (!MaybeVal) + return None; + APInt &Val = *MaybeVal; while (!SeenOpcodes.empty()) { std::pair OpcodeAndSize = SeenOpcodes.pop_back_val(); switch (OpcodeAndSize.first) { @@ -291,7 +307,7 @@ llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, if (!DstTy.isValid()) return nullptr; while (DefMI->getOpcode() == TargetOpcode::COPY) { - unsigned SrcReg = DefMI->getOperand(1).getReg(); + Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid() || SrcTy != DstTy) break; @@ -395,6 +411,40 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, return false; } +Optional llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, + uint64_t Imm, + const MachineRegisterInfo &MRI) { + auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + if (MaybeOp1Cst) { + LLT Ty = MRI.getType(Op1); + APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); + switch (Opcode) { + default: + break; + case TargetOpcode::G_SEXT_INREG: + return C1.trunc(Imm).sext(C1.getBitWidth()); + } + } + return None; +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved(); } + +MVT llvm::getMVTForLLT(LLT Ty) { + if (!Ty.isVector()) + return MVT::getIntegerVT(Ty.getSizeInBits()); + + return MVT::getVectorVT( + MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), + Ty.getNumElements()); +} + +LLT llvm::getLLTForMVT(MVT Ty) { + if (!Ty.isVector()) + return LLT::scalar(Ty.getSizeInBits()); + + return LLT::vector(Ty.getVectorNumElements(), + Ty.getVectorElementType().getSizeInBits()); +} diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 09201c2e7bae..d4fa45fcb405 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -456,14 +456,14 @@ bool GlobalMerge::doMerge(const SmallVectorImpl &Globals, bool HasExternal = false; StringRef FirstExternalName; - unsigned MaxAlign = 1; + Align MaxAlign; unsigned CurIdx = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getValueType(); // Make sure we use the same alignment AsmPrinter would use. - unsigned Align = DL.getPreferredAlignment(Globals[j]); - unsigned Padding = alignTo(MergedSize, Align) - MergedSize; + Align Alignment(DL.getPreferredAlignment(Globals[j])); + unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize; MergedSize += Padding; MergedSize += DL.getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { @@ -478,7 +478,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl &Globals, Inits.push_back(Globals[j]->getInitializer()); StructIdxs.push_back(CurIdx++); - MaxAlign = std::max(MaxAlign, Align); + MaxAlign = std::max(MaxAlign, Alignment); if (Globals[j]->hasExternalLinkage() && !HasExternal) { HasExternal = true; diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp index 5f57cabbe865..6a0f98d2e2b4 100644 --- a/lib/CodeGen/HardwareLoops.cpp +++ b/lib/CodeGen/HardwareLoops.cpp @@ -183,7 +183,7 @@ bool HardwareLoops::runOnFunction(Function &F) { TTI = &getAnalysis().getTTI(F); DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable(); - LibInfo = TLIP ? &TLIP->getTLI() : nullptr; + LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); AC = &getAnalysis().getAssumptionCache(F); M = F.getParent(); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index b17a253fe23f..d9caa5660695 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -285,14 +285,113 @@ namespace { Prediction); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, - unsigned TCycle, unsigned TExtra, - MachineBasicBlock &FBB, - unsigned FCycle, unsigned FExtra, - BranchProbability Prediction) const { - return TCycle > 0 && FCycle > 0 && - TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction); + bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo, + MachineBasicBlock &CommBB, unsigned Dups, + BranchProbability Prediction, bool Forked) const { + const MachineFunction &MF = *TBBInfo.BB->getParent(); + if (MF.getFunction().hasMinSize()) { + MachineBasicBlock::iterator TIB = TBBInfo.BB->begin(); + MachineBasicBlock::iterator FIB = FBBInfo.BB->begin(); + MachineBasicBlock::iterator TIE = TBBInfo.BB->end(); + MachineBasicBlock::iterator FIE = FBBInfo.BB->end(); + + unsigned Dups1, Dups2; + if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TBBInfo.BB, *FBBInfo.BB, + /*SkipUnconditionalBranches*/ true)) + llvm_unreachable("should already have been checked by ValidDiamond"); + + unsigned BranchBytes = 0; + unsigned CommonBytes = 0; + + // Count common instructions at the start of the true and false blocks. + for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + + // Count instructions at the end of the true and false blocks, after + // the ones we plan to predicate. Analyzable branches will be removed + // (unless this is a forked diamond), and all other instructions are + // common between the two blocks. + for (auto &I : make_range(TIE, TBBInfo.BB->end())) { + if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : make_range(FIE, FBBInfo.BB->end())) { + if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : CommBB.terminators()) { + if (I.isBranch()) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } + } + + // The common instructions in one branch will be eliminated, halving + // their code size. + CommonBytes /= 2; + + // Count the instructions which we need to predicate. + unsigned NumPredicatedInstructions = 0; + for (auto &I : make_range(TIB, TIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + for (auto &I : make_range(FIB, FIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + + // Even though we're optimising for size at the expense of performance, + // avoid creating really long predicated blocks. + if (NumPredicatedInstructions > 15) + return false; + + // Some targets (e.g. Thumb2) need to insert extra instructions to + // start predicated blocks. + unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions( + MF, NumPredicatedInstructions); + + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes + << ", CommonBytes=" << CommonBytes + << ", NumPredicatedInstructions=" + << NumPredicatedInstructions + << ", ExtraPredicateBytes=" << ExtraPredicateBytes + << ")\n"); + return (BranchBytes + CommonBytes) > ExtraPredicateBytes; + } else { + unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups; + unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups; + bool Res = TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt( + *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB, + FCycle, FBBInfo.ExtraCost2, Prediction); + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle + << ", FCycle=" << FCycle + << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra=" + << FBBInfo.ExtraCost2 << ") = " << Res << "\n"); + return Res; + } } /// Returns true if Block ends without a terminator. @@ -356,8 +455,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. BranchFolder BF(true, false, MBFI, *MBPI); - BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), - getAnalysisIfAvailable()); + auto *MMIWP = getAnalysisIfAvailable(); + BFChange = BF.OptimizeFunction( + MF, TII, ST.getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" @@ -496,8 +597,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (MadeChange && IfCvtBranchFold) { BranchFolder BF(false, false, MBFI, *MBPI); - BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable()); + auto *MMIWP = getAnalysisIfAvailable(); + BF.OptimizeFunction( + MF, TII, MF.getSubtarget().getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } MadeChange |= BFChange; @@ -569,6 +672,9 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, BranchProbability Prediction) const { Dups = 0; + if (TrueBBI.BB == FalseBBI.BB) + return false; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -835,6 +941,8 @@ bool IfConverter::ValidForkedDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; @@ -892,6 +1000,8 @@ bool IfConverter::ValidDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; // The size is used to decide whether to if-convert, and the shared portions @@ -912,6 +1022,12 @@ void IfConverter::AnalyzeBranches(BBInfo &BBI) { BBI.BrCond.clear(); BBI.IsBrAnalyzable = !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + if (!BBI.IsBrAnalyzable) { + BBI.TrueBB = nullptr; + BBI.FalseBB = nullptr; + BBI.BrCond.clear(); + } + SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); BBI.IsBrReversible = (RevCond.size() == 0) || !TII->reverseBranchCondition(RevCond); @@ -1173,13 +1289,9 @@ void IfConverter::AnalyzeBlock( if (CanRevCond) { BBInfo TrueBBICalc, FalseBBICalc; - auto feasibleDiamond = [&]() { - bool MeetsSize = MeetIfcvtSizeLimit( - *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + - TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, - *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + - FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, - Prediction); + auto feasibleDiamond = [&](bool Forked) { + bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB, + Dups + Dups2, Prediction, Forked); bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, /* IsTriangle */ false, /* RevCond */ false, /* hasCommonTail */ true); @@ -1191,7 +1303,7 @@ void IfConverter::AnalyzeBlock( if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(false)) { // Diamond: // EBB // / \_ @@ -1200,14 +1312,14 @@ void IfConverter::AnalyzeBlock( // \ / // TailBB // Note TailBB can be empty. - Tokens.push_back(llvm::make_unique( + Tokens.push_back(std::make_unique( BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2, (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); Enqueued = true; } } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(true)) { // ForkedDiamond: // if TBB and FBB have a common tail that includes their conditional // branch instructions, then we can If Convert this pattern. @@ -1218,7 +1330,7 @@ void IfConverter::AnalyzeBlock( // / \ / \ // FalseBB TrueBB FalseBB // - Tokens.push_back(llvm::make_unique( + Tokens.push_back(std::make_unique( BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2, (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); Enqueued = true; @@ -1238,7 +1350,7 @@ void IfConverter::AnalyzeBlock( // | / // FBB Tokens.push_back( - llvm::make_unique(BBI, ICTriangle, TNeedSub, Dups)); + std::make_unique(BBI, ICTriangle, TNeedSub, Dups)); Enqueued = true; } @@ -1247,7 +1359,7 @@ void IfConverter::AnalyzeBlock( TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back( - llvm::make_unique(BBI, ICTriangleRev, TNeedSub, Dups)); + std::make_unique(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } @@ -1263,7 +1375,7 @@ void IfConverter::AnalyzeBlock( // | // FBB Tokens.push_back( - llvm::make_unique(BBI, ICSimple, TNeedSub, Dups)); + std::make_unique(BBI, ICSimple, TNeedSub, Dups)); Enqueued = true; } @@ -1275,7 +1387,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.NonPredSize + FalseBBI.ExtraCost, FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { - Tokens.push_back(llvm::make_unique(BBI, ICTriangleFalse, + Tokens.push_back(std::make_unique(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } @@ -1287,7 +1399,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back( - llvm::make_unique(BBI, ICTriangleFRev, FNeedSub, Dups)); + std::make_unique(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } @@ -1297,7 +1409,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back( - llvm::make_unique(BBI, ICSimpleFalse, FNeedSub, Dups)); + std::make_unique(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; } } @@ -1730,6 +1842,11 @@ bool IfConverter::IfConvertDiamondCommon( ++i; } while (NumDups1 != 0) { + // Since this instruction is going to be deleted, update call + // site info state if the instruction is call instruction. + if (DI2->isCall(MachineInstr::IgnoreBundle)) + MBB2.getParent()->eraseCallSiteInfo(&*DI2); + ++DI2; if (DI2 == MBB2.end()) break; @@ -1758,14 +1875,27 @@ bool IfConverter::IfConvertDiamondCommon( if (!BBI1->IsBrAnalyzable) verifySameBranchInstructions(&MBB1, &MBB2); #endif - BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB); - // Remove duplicated instructions. + // Remove duplicated instructions from the tail of MBB1: any branch + // instructions, and the common instructions counted by NumDups2. DI1 = MBB1.end(); + while (DI1 != MBB1.begin()) { + MachineBasicBlock::iterator Prev = std::prev(DI1); + if (!Prev->isBranch() && !Prev->isDebugInstr()) + break; + DI1 = Prev; + } for (unsigned i = 0; i != NumDups2; ) { // NumDups2 only counted non-dbg_value instructions, so this won't // run off the head of the list. assert(DI1 != MBB1.begin()); + --DI1; + + // Since this instruction is going to be deleted, update call + // site info state if the instruction is call instruction. + if (DI1->isCall(MachineInstr::IgnoreBundle)) + MBB1.getParent()->eraseCallSiteInfo(&*DI1); + // skip dbg_value instructions if (!DI1->isDebugInstr()) ++i; @@ -1815,7 +1945,7 @@ bool IfConverter::IfConvertDiamondCommon( for (const MachineOperand &MO : FI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -1983,7 +2113,7 @@ static bool MaySpeculate(const MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef() && !LaterRedefs.count(Reg)) @@ -2050,6 +2180,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, break; MachineInstr *MI = MF.CloneMachineInstr(&I); + // Make a copy of the call site info. + if (MI->isCall(MachineInstr::IgnoreBundle)) + MF.copyCallSiteInfo(&I,MI); + ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; unsigned ExtraPredCost = TII->getPredicationCost(I); diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 1e82ea659617..b7dcaec90106 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -278,12 +278,12 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A, if (!(MOA.isReg() && MOA.getReg())) continue; - unsigned RegA = MOA.getReg(); + Register RegA = MOA.getReg(); for (auto MOB : B->operands()) { if (!(MOB.isReg() && MOB.getReg())) continue; - unsigned RegB = MOB.getReg(); + Register RegB = MOB.getReg(); if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef())) return false; @@ -517,7 +517,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // // we must ensure that there are no instructions between the 'test' and // conditional jump that modify %rax. - const unsigned PointerReg = MBP.LHS.getReg(); + const Register PointerReg = MBP.LHS.getReg(); assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block"); @@ -689,7 +689,7 @@ void ImplicitNullChecks::rewriteNullChecks( for (const MachineOperand &MO : FaultingInstr->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg || MBB->isLiveIn(Reg)) continue; MBB->addLiveIn(Reg); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 41ae8061a917..2408f18678e4 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" #include "Spiller.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" @@ -26,6 +25,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -346,8 +346,7 @@ void InlineSpiller::collectRegsToSpill() { } bool InlineSpiller::isSibling(unsigned Reg) { - return TargetRegisterInfo::isVirtualRegister(Reg) && - VRM.getOriginal(Reg) == Original; + return Register::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original; } /// It is beneficial to spill to earlier place in the same BB in case @@ -377,7 +376,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); #endif - unsigned SrcReg = CopyMI.getOperand(1).getReg(); + Register SrcReg = CopyMI.getOperand(1).getReg(); LiveInterval &SrcLI = LIS.getInterval(SrcReg); VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx); LiveQueryResult SrcQ = SrcLI.Query(Idx); @@ -845,9 +844,8 @@ foldMemoryOperand(ArrayRef> Ops, for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) { if (!MO->isReg()) continue; - unsigned Reg = MO->getReg(); - if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || - MRI.isReserved(Reg)) { + Register Reg = MO->getReg(); + if (!Reg || Register::isVirtualRegister(Reg) || MRI.isReserved(Reg)) { continue; } // Skip non-Defs, including undef uses and internal reads. @@ -869,7 +867,7 @@ foldMemoryOperand(ArrayRef> Ops, --NumSpills; LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); if (MI->isCall()) - MI->getMF()->updateCallSiteInfo(MI, FoldMI); + MI->getMF()->moveCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); // Insert any new instructions other than FoldMI into the LIS maps. @@ -1111,8 +1109,8 @@ void InlineSpiller::spillAll() { void InlineSpiller::spill(LiveRangeEdit &edit) { ++NumSpilledRanges; Edit = &edit; - assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) - && "Trying to spill a stack slot."); + assert(!Register::isStackSlot(edit.getReg()) && + "Trying to spill a stack slot."); // Share a stack slot among all descendants of Original. Original = VRM.getOriginal(edit.getReg()); StackSlot = VRM.getStackSlot(Original); @@ -1147,7 +1145,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, // save a copy of LiveInterval in StackSlotToOrigLI because the original // LiveInterval may be cleared after all its references are spilled. if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) { - auto LI = llvm::make_unique(OrigLI.reg, OrigLI.weight); + auto LI = std::make_unique(OrigLI.reg, OrigLI.weight); LI->assign(OrigLI, Allocator); StackSlotToOrigLI[StackSlot] = std::move(LI); } @@ -1459,7 +1457,7 @@ void HoistSpillHelper::hoistAllSpills() { LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); unsigned Original = VRM.getPreSplitReg(Reg); if (!MRI.def_empty(Reg)) Virt2SiblingsMap[Original].insert(Reg); diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp index 9525da849e2a..770c4952d169 100644 --- a/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -940,8 +940,8 @@ public: /// \param V input value /// \param Result result polynomial static void computePolynomial(Value &V, Polynomial &Result) { - if (isa(&V)) - computePolynomialBinOp(*dyn_cast(&V), Result); + if (auto *BO = dyn_cast(&V)) + computePolynomialBinOp(*BO, Result); else Result = Polynomial(&V); } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 886ae7e94adb..1c362aec6e67 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -96,14 +96,15 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) { /// addPassesToX helper drives creation and initialization of TargetPassConfig. static TargetPassConfig * addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM, - bool DisableVerify, MachineModuleInfo &MMI) { + bool DisableVerify, + MachineModuleInfoWrapperPass &MMIWP) { // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM.createPassConfig(PM); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); PM.add(PassConfig); - PM.add(&MMI); + PM.add(&MMIWP); if (PassConfig->addISelPasses()) return nullptr; @@ -139,7 +140,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, std::unique_ptr MAB( getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); - auto FOut = llvm::make_unique(Out); + auto FOut = std::make_unique(Out); MCStreamer *S = getTarget().createAsmStreamer( Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE), @@ -186,17 +187,15 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, return false; } -bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, - CodeGenFileType FileType, - bool DisableVerify, - MachineModuleInfo *MMI) { +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, bool DisableVerify, + MachineModuleInfoWrapperPass *MMIWP) { // Add common CodeGen passes. - if (!MMI) - MMI = new MachineModuleInfo(this); + if (!MMIWP) + MMIWP = new MachineModuleInfoWrapperPass(this); TargetPassConfig *PassConfig = - addPassesToGenerateCode(*this, PM, DisableVerify, *MMI); + addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP); if (!PassConfig) return true; @@ -206,12 +205,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // testing to be meaningful, we need to ensure that the symbols created // are MCSymbolXCOFF variants, which requires that // the TargetLoweringObjectFile instance has been initialized. - MCContext &Ctx = MMI->getContext(); + MCContext &Ctx = MMIWP->getMMI().getContext(); const_cast(*this->getObjFileLowering()) .Initialize(Ctx, *this); } PM.add(createPrintMIRPass(Out)); - } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext())) + } else if (addAsmPrinter(PM, Out, DwoOut, FileType, + MMIWP->getMMI().getContext())) return true; PM.add(createFreeMachineFunctionPass()); @@ -227,15 +227,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_pwrite_stream &Out, bool DisableVerify) { // Add common CodeGen passes. - MachineModuleInfo *MMI = new MachineModuleInfo(this); + MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(this); TargetPassConfig *PassConfig = - addPassesToGenerateCode(*this, PM, DisableVerify, *MMI); + addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP); if (!PassConfig) return true; assert(TargetPassConfig::willCompleteCodeGenPipeline() && "Cannot emit MC with limited codegen pipeline"); - Ctx = &MMI->getContext(); + Ctx = &MMIWP->getMMI().getContext(); if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 200ac0ba15bf..cef5085ae079 100644 --- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -73,18 +73,18 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { if (!MDT) { LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n"); - OwnedMDT = make_unique(); + OwnedMDT = std::make_unique(); OwnedMDT->getBase().recalculate(*MF); MDT = OwnedMDT.get(); } // Generate LoopInfo from it. - OwnedMLI = make_unique(); + OwnedMLI = std::make_unique(); OwnedMLI->getBase().analyze(MDT->getBase()); MLI = OwnedMLI.get(); } - OwnedMBFI = make_unique(); + OwnedMBFI = std::make_unique(); OwnedMBFI->calculate(*MF, MBPI, *MLI); return *OwnedMBFI.get(); } diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 503821537ed9..ac3ef0e709f3 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index a669e64692b9..f1b237d83e8c 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -7,14 +7,23 @@ //===----------------------------------------------------------------------===// /// /// This pass implements a data flow analysis that propagates debug location -/// information by inserting additional DBG_VALUE instructions into the machine -/// instruction stream. The pass internally builds debug location liveness -/// ranges to determine the points where additional DBG_VALUEs need to be -/// inserted. +/// information by inserting additional DBG_VALUE insts into the machine +/// instruction stream. Before running, each DBG_VALUE inst corresponds to a +/// source assignment of a variable. Afterwards, a DBG_VALUE inst specifies a +/// variable location for the current basic block (see SourceLevelDebugging.rst). /// /// This is a separate pass from DbgValueHistoryCalculator to facilitate /// testing and improve modularity. /// +/// Each variable location is represented by a VarLoc object that identifies the +/// source variable, its current machine-location, and the DBG_VALUE inst that +/// specifies the location. Each VarLoc is indexed in the (function-scope) +/// VarLocMap, giving each VarLoc a unique index. Rather than operate directly +/// on machine locations, the dataflow analysis in this pass identifies +/// locations by their index in the VarLocMap, meaning all the variable +/// locations in a block can be described by a sparse vector of VarLocMap +/// indexes. +/// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -68,6 +77,7 @@ using namespace llvm; #define DEBUG_TYPE "livedebugvalues" STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); +STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed"); // If @MI is a DBG_VALUE with debug value described by a defined // register, returns the number of this register. In the other case, returns 0. @@ -179,8 +189,16 @@ private: } }; + /// Identity of the variable at this location. const DebugVariable Var; - const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE. + + /// The expression applied to this location. + const DIExpression *Expr; + + /// DBG_VALUE to clone var/expr information from if this location + /// is moved. + const MachineInstr &MI; + mutable UserValueScopes UVS; enum VarLocKind { InvalidKind = 0, @@ -201,9 +219,9 @@ private: const ConstantInt *CImm; } Loc; - VarLoc(const MachineInstr &MI, LexicalScopes &LS, - VarLocKind K = InvalidKind) - : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){ + VarLoc(const MachineInstr &MI, LexicalScopes &LS) + : Var(MI), Expr(MI.getDebugExpression()), MI(MI), + UVS(MI.getDebugLoc(), LS) { static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); @@ -225,17 +243,78 @@ private: "entry values must be register locations"); } - /// The constructor for spill locations. - VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset, - LexicalScopes &LS) - : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) { - assert(MI.isDebugValue() && "not a DBG_VALUE"); - assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); - Kind = SpillLocKind; - Loc.SpillLocation = {SpillBase, SpillOffset}; + /// Take the variable and machine-location in DBG_VALUE MI, and build an + /// entry location using the given expression. + static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS, + const DIExpression *EntryExpr) { + VarLoc VL(MI, LS); + VL.Kind = EntryValueKind; + VL.Expr = EntryExpr; + return VL; + } + + /// Copy the register location in DBG_VALUE MI, updating the register to + /// be NewReg. + static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS, + unsigned NewReg) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Loc.RegNo = NewReg; + return VL; + } + + /// Take the variable described by DBG_VALUE MI, and create a VarLoc + /// locating it in the specified spill location. + static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase, + int SpillOffset, LexicalScopes &LS) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Kind = SpillLocKind; + VL.Loc.SpillLocation = {SpillBase, SpillOffset}; + return VL; } - // Is the Loc field a constant or constant object? + /// Create a DBG_VALUE representing this VarLoc in the given function. + /// Copies variable-specific information such as DILocalVariable and + /// inlining information from the original DBG_VALUE instruction, which may + /// have been several transfers ago. + MachineInstr *BuildDbgValue(MachineFunction &MF) const { + const DebugLoc &DbgLoc = MI.getDebugLoc(); + bool Indirect = MI.isIndirectDebugValue(); + const auto &IID = MI.getDesc(); + const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *DIExpr = MI.getDebugExpression(); + + switch (Kind) { + case EntryValueKind: + // An entry value is a register location -- but with an updated + // expression. + return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr); + case RegisterKind: + // Register locations are like the source DBG_VALUE, but with the + // register number from this VarLoc. + return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, DIExpr); + case SpillLocKind: { + // Spills are indirect DBG_VALUEs, with a base register and offset. + // Use the original DBG_VALUEs expression to build the spilt location + // on top of. FIXME: spill locations created before this pass runs + // are not recognized, and not handled here. + auto *SpillExpr = DIExpression::prepend( + DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset); + unsigned Base = Loc.SpillLocation.SpillBase; + return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr); + } + case ImmediateKind: { + MachineOperand MO = MI.getOperand(0); + return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr); + } + case InvalidKind: + llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc"); + } + llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum"); + } + + /// Is the Loc field a constant or constant object? bool isConstant() const { return Kind == ImmediateKind; } /// If this variable is described by a register, return it, @@ -251,18 +330,42 @@ private: bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const { MI.dump(); } + // TRI can be null. + void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const { + dbgs() << "VarLoc("; + switch (Kind) { + case RegisterKind: + case EntryValueKind: + dbgs() << printReg(Loc.RegNo, TRI); + break; + case SpillLocKind: + dbgs() << printReg(Loc.SpillLocation.SpillBase, TRI); + dbgs() << "[" << Loc.SpillLocation.SpillOffset << "]"; + break; + case ImmediateKind: + dbgs() << Loc.Immediate; + break; + case InvalidKind: + llvm_unreachable("Invalid VarLoc in dump method"); + } + + dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", "; + if (Var.getInlinedAt()) + dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n"; + else + dbgs() << "(null))\n"; + } #endif bool operator==(const VarLoc &Other) const { return Kind == Other.Kind && Var == Other.Var && - Loc.Hash == Other.Loc.Hash; + Loc.Hash == Other.Loc.Hash && Expr == Other.Expr; } /// This operator guarantees that VarLocs are sorted by Variable first. bool operator<(const VarLoc &Other) const { - return std::tie(Var, Kind, Loc.Hash) < - std::tie(Other.Var, Other.Kind, Other.Loc.Hash); + return std::tie(Var, Kind, Loc.Hash, Expr) < + std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr); } }; @@ -271,8 +374,8 @@ private: using VarLocSet = SparseBitVector<>; using VarLocInMBB = SmallDenseMap; struct TransferDebugPair { - MachineInstr *TransferInst; - MachineInstr *DebugInst; + MachineInstr *TransferInst; /// Instruction where this transfer occurs. + unsigned LocationID; /// Location number for the transfer dest. }; using TransferMap = SmallVector; @@ -320,6 +423,14 @@ private: Vars.insert({Var, VarLocID}); } + /// Insert a set of ranges. + void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) { + for (unsigned Id : ToLoad) { + const VarLoc &Var = Map[Id]; + insert(Id, Var.Var); + } + } + /// Empty the set. void clear() { VarLocs.clear(); @@ -333,8 +444,18 @@ private: } }; - bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF, - unsigned &Reg); + /// Tests whether this instruction is a spill to a stack location. + bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); + + /// Decide if @MI is a spill instruction and return true if it is. We use 2 + /// criteria to make this decision: + /// - Is this instruction a store to a spill slot? + /// - Is there a register operand that is both used and killed? + /// TODO: Store optimization can fold spills into other stores (including + /// other spills). We do not handle this yet (more than one memory operand). + bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, + unsigned &Reg); + /// If a given instruction is identified as a spill, return the spill location /// and set \p Reg to the spilled register. Optional isRestoreInstruction(const MachineInstr &MI, @@ -361,13 +482,13 @@ private: void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, DebugParamMap &DebugEntryVals); - bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); + bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); - bool process(MachineInstr &MI, OpenRangesSet &OpenRanges, + void process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, TransferMap &Transfers, DebugParamMap &DebugEntryVals, - bool transferChanges, OverlapMap &OverlapFragments, + OverlapMap &OverlapFragments, VarToFragments &SeenFragments); void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, @@ -376,7 +497,12 @@ private: bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet &Visited, - SmallPtrSetImpl &ArtificialBlocks); + SmallPtrSetImpl &ArtificialBlocks, + VarLocInMBB &PendingInLocs); + + /// Create DBG_VALUE insts for inlocs that have been propagated but + /// had their instruction creation deferred. + void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs); bool ExtendRanges(MachineFunction &MF); @@ -518,7 +644,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, const VarLoc &VL = VarLocIDs[VLL]; Out << " Var: " << VL.Var.getVar()->getName(); Out << " MI: "; - VL.dump(); + VL.dump(TRI, Out); } } Out << "\n"; @@ -567,11 +693,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, ID = VarLocIDs.insert(VL); OpenRanges.insert(ID, VL.Var); } else if (MI.hasOneMemOperand()) { - // It's a stack spill -- fetch spill base and offset. - VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); - VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS); - ID = VarLocIDs.insert(VL); - OpenRanges.insert(ID, VL.Var); + llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?"); } else { // This must be an undefined location. We should leave OpenRanges closed. assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 && @@ -585,7 +707,6 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, TransferMap &Transfers, DebugParamMap &DebugEntryVals, SparseBitVector<> &KillSet) { - MachineFunction *MF = MI.getParent()->getParent(); for (unsigned ID : KillSet) { if (!VarLocIDs[ID].Var.getVar()->isParameter()) continue; @@ -600,20 +721,12 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()]; DIExpression *NewExpr = DIExpression::prepend( ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue); - MachineInstr *EntryValDbgMI = - BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(), - ParamDebugInstr->isIndirectDebugValue(), - ParamDebugInstr->getOperand(0).getReg(), - ParamDebugInstr->getDebugVariable(), NewExpr); - - if (ParamDebugInstr->isIndirectDebugValue()) - EntryValDbgMI->getOperand(1).setImm( - ParamDebugInstr->getOperand(1).getImm()); - - Transfers.push_back({&MI, EntryValDbgMI}); - VarLoc VL(*EntryValDbgMI, LS); - unsigned EntryValLocID = VarLocIDs.insert(VL); - OpenRanges.insert(EntryValLocID, VL.Var); + + VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr); + + unsigned EntryValLocID = VarLocIDs.insert(EntryLoc); + Transfers.push_back({&MI, EntryValLocID}); + OpenRanges.insert(EntryValLocID, EntryLoc.Var); } } @@ -627,21 +740,19 @@ void LiveDebugValues::insertTransferDebugPair( VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind, unsigned NewReg) { const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI; - MachineFunction *MF = MI.getParent()->getParent(); - MachineInstr *NewDebugInstr; auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr, - &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) { + &VarLocIDs](VarLoc &VL) { unsigned LocId = VarLocIDs.insert(VL); // Close this variable's previous location range. DebugVariable V(*DebugInstr); OpenRanges.erase(V); + // Record the new location as an open range, and a postponed transfer + // inserting a DBG_VALUE for this location. OpenRanges.insert(LocId, VL.Var); - // The newly created DBG_VALUE instruction NewDebugInstr must be inserted - // after MI. Keep track of the pairing. - TransferDebugPair MIP = {&MI, NewDebugInstr}; + TransferDebugPair MIP = {&MI, LocId}; Transfers.push_back(MIP); }; @@ -653,37 +764,25 @@ void LiveDebugValues::insertTransferDebugPair( "No register supplied when handling a copy of a debug value"); // Create a DBG_VALUE instruction to describe the Var in its new // register location. - NewDebugInstr = BuildMI( - *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), - DebugInstr->isIndirectDebugValue(), NewReg, - DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression()); - if (DebugInstr->isIndirectDebugValue()) - NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm()); - VarLoc VL(*NewDebugInstr, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for register copy:"; + VL.dump(TRI); + }); return; } case TransferKind::TransferSpill: { // Create a DBG_VALUE instruction to describe the Var in its spilled // location. VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); - auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(), - DIExpression::ApplyOffset, - SpillLocation.SpillOffset); - NewDebugInstr = BuildMI( - *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true, - SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr); - VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase, - SpillLocation.SpillOffset, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + VarLoc VL = VarLoc::CreateSpillLoc(*DebugInstr, SpillLocation.SpillBase, + SpillLocation.SpillOffset, LS); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for spill:"; + VL.dump(TRI); + }); return; } case TransferKind::TransferRestore: { @@ -691,15 +790,14 @@ void LiveDebugValues::insertTransferDebugPair( "No register supplied when handling a restore of a debug value"); MachineFunction *MF = MI.getMF(); DIBuilder DIB(*const_cast(MF->getFunction()).getParent()); - NewDebugInstr = - BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false, - NewReg, DebugInstr->getDebugVariable(), DIB.createExpression()); - VarLoc VL(*NewDebugInstr, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + // DebugInstr refers to the pre-spill location, therefore we can reuse + // its expression. + VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for restore:"; + VL.dump(TRI); + }); return; } } @@ -719,7 +817,7 @@ void LiveDebugValues::transferRegisterDef( // instructions never clobber SP, because some backends (e.g., AArch64) // never list SP in the regmask. if (MO.isReg() && MO.isDef() && MO.getReg() && - TRI->isPhysicalRegister(MO.getReg()) && + Register::isPhysicalRegister(MO.getReg()) && !(MI.isCall() && MO.getReg() == SP)) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) @@ -748,16 +846,8 @@ void LiveDebugValues::transferRegisterDef( } } -/// Decide if @MI is a spill instruction and return true if it is. We use 2 -/// criteria to make this decision: -/// - Is this instruction a store to a spill slot? -/// - Is there a register operand that is both used and killed? -/// TODO: Store optimization can fold spills into other stores (including -/// other spills). We do not handle this yet (more than one memory operand). bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, - MachineFunction *MF, unsigned &Reg) { - SmallVector Accesses; - + MachineFunction *MF) { // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) return false; @@ -766,6 +856,14 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, return false; // This is not a spill instruction, since no valid size was // returned from either function. + return true; +} + +bool LiveDebugValues::isLocationSpill(const MachineInstr &MI, + MachineFunction *MF, unsigned &Reg) { + if (!isSpillInstruction(MI, MF)) + return false; + auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { if (!MO.isReg() || !MO.isUse()) { Reg = 0; @@ -834,7 +932,37 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); - if (isSpillInstruction(MI, MF, Reg)) { + // First, if there are any DBG_VALUEs pointing at a spill slot that is + // written to, then close the variable location. The value in memory + // will have changed. + VarLocSet KillSet; + if (isSpillInstruction(MI, MF)) { + Loc = extractSpillBaseRegAndOffset(MI); + for (unsigned ID : OpenRanges.getVarLocs()) { + const VarLoc &VL = VarLocIDs[ID]; + if (VL.Kind == VarLoc::SpillLocKind && VL.Loc.SpillLocation == *Loc) { + // This location is overwritten by the current instruction -- terminate + // the open range, and insert an explicit DBG_VALUE $noreg. + // + // Doing this at a later stage would require re-interpreting all + // DBG_VALUes and DIExpressions to identify whether they point at + // memory, and then analysing all memory writes to see if they + // overwrite that memory, which is expensive. + // + // At this stage, we already know which DBG_VALUEs are for spills and + // where they are located; it's best to fix handle overwrites now. + KillSet.set(ID); + VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0); + unsigned UndefLocID = VarLocIDs.insert(UndefVL); + Transfers.push_back({&MI, UndefLocID}); + } + } + OpenRanges.erase(KillSet, VarLocIDs); + } + + // Try to recognise spill and restore instructions that may create a new + // variable location. + if (isLocationSpill(MI, MF, Reg)) { TKind = TransferKind::TransferSpill; LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump();); LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI) @@ -854,6 +982,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); } else if (TKind == TransferKind::TransferRestore && + VarLocIDs[ID].Kind == VarLoc::SpillLocKind && VarLocIDs[ID].Loc.SpillLocation == *Loc) { LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); @@ -885,8 +1014,8 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, return false; }; - unsigned SrcReg = SrcRegOp->getReg(); - unsigned DestReg = DestRegOp->getReg(); + Register SrcReg = SrcRegOp->getReg(); + Register DestReg = DestRegOp->getReg(); // We want to recognize instructions where destination register is callee // saved register. If register that could be clobbered by the call is @@ -906,26 +1035,20 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, } /// Terminate all open ranges at the end of the current basic block. -bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, - OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, - const VarLocMap &VarLocIDs) { +bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB, + OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, + const VarLocMap &VarLocIDs) { bool Changed = false; - const MachineBasicBlock *CurMBB = MI.getParent(); - if (!(MI.isTerminator() || (&MI == &CurMBB->back()))) - return false; - - if (OpenRanges.empty()) - return false; LLVM_DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) { // Copy OpenRanges to OutLocs, if not already present. dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; - VarLocIDs[ID].dump(); + VarLocIDs[ID].dump(TRI); }); VarLocSet &VLS = OutLocs[CurMBB]; - Changed = VLS |= OpenRanges.getVarLocs(); + Changed = VLS != OpenRanges.getVarLocs(); // New OutLocs set may be different due to spill, restore or register // copy instruction processing. if (Changed) @@ -995,26 +1118,17 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, } /// This routine creates OpenRanges and OutLocs. -bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, +void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, - TransferMap &Transfers, DebugParamMap &DebugEntryVals, - bool transferChanges, + TransferMap &Transfers, + DebugParamMap &DebugEntryVals, OverlapMap &OverlapFragments, VarToFragments &SeenFragments) { - bool Changed = false; transferDebugValue(MI, OpenRanges, VarLocIDs); transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals); - if (transferChanges) { - transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); - transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); - } else { - // Build up a map of overlapping fragments on the first run through. - if (MI.isDebugValue()) - accumulateFragmentMap(MI, SeenFragments, OverlapFragments); - } - Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs); - return Changed; + transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); + transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); } /// This routine joins the analysis results of all incoming edges in @MBB by @@ -1024,7 +1138,8 @@ bool LiveDebugValues::join( MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet &Visited, - SmallPtrSetImpl &ArtificialBlocks) { + SmallPtrSetImpl &ArtificialBlocks, + VarLocInMBB &PendingInLocs) { LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; @@ -1034,9 +1149,11 @@ bool LiveDebugValues::join( // can be joined. int NumVisited = 0; for (auto p : MBB.predecessors()) { - // Ignore unvisited predecessor blocks. As we are processing - // the blocks in reverse post-order any unvisited block can - // be considered to not remove any incoming values. + // Ignore backedges if we have not visited the predecessor yet. As the + // predecessor hasn't yet had locations propagated into it, most locations + // will not yet be valid, so treat them as all being uninitialized and + // potentially valid. If a location guessed to be correct here is + // invalidated later, we will remove it when we revisit this block. if (!Visited.count(p)) { LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber() << "\n"); @@ -1086,44 +1203,59 @@ bool LiveDebugValues::join( // is the entry block which has no predecessor. assert((NumVisited || MBB.pred_empty()) && "Should have processed at least one predecessor"); - if (InLocsT.empty()) - return false; VarLocSet &ILS = InLocs[&MBB]; + VarLocSet &Pending = PendingInLocs[&MBB]; - // Insert DBG_VALUE instructions, if not already inserted. + // New locations will have DBG_VALUE insts inserted at the start of the + // block, after location propagation has finished. Record the insertions + // that we need to perform in the Pending set. VarLocSet Diff = InLocsT; Diff.intersectWithComplement(ILS); for (auto ID : Diff) { - // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a - // new range is started for the var from the mbb's beginning by inserting - // a new DBG_VALUE. process() will end this range however appropriate. - const VarLoc &DiffIt = VarLocIDs[ID]; - const MachineInstr *DebugInstr = &DiffIt.MI; - MachineInstr *MI = nullptr; - if (DiffIt.isConstant()) { - MachineOperand MO(DebugInstr->getOperand(0)); - MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(), - DebugInstr->getDesc(), false, MO, - DebugInstr->getDebugVariable(), - DebugInstr->getDebugExpression()); - } else { - MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(), - DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(), - DebugInstr->getOperand(0).getReg(), - DebugInstr->getDebugVariable(), - DebugInstr->getDebugExpression()); - if (DebugInstr->isIndirectDebugValue()) - MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm()); - } - LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump();); + Pending.set(ID); ILS.set(ID); ++NumInserted; Changed = true; } + + // We may have lost locations by learning about a predecessor that either + // loses or moves a variable. Find any locations in ILS that are not in the + // new in-locations, and delete those. + VarLocSet Removed = ILS; + Removed.intersectWithComplement(InLocsT); + for (auto ID : Removed) { + Pending.reset(ID); + ILS.reset(ID); + ++NumRemoved; + Changed = true; + } + return Changed; } +void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, + VarLocMap &VarLocIDs) { + // PendingInLocs records all locations propagated into blocks, which have + // not had DBG_VALUE insts created. Go through and create those insts now. + for (auto &Iter : PendingInLocs) { + // Map is keyed on a constant pointer, unwrap it so we can insert insts. + auto &MBB = const_cast(*Iter.first); + VarLocSet &Pending = Iter.second; + + for (unsigned ID : Pending) { + // The ID location is live-in to MBB -- work out what kind of machine + // location it is and create a DBG_VALUE. + const VarLoc &DiffIt = VarLocIDs[ID]; + MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent()); + MBB.insert(MBB.instr_begin(), MI); + + (void)MI; + LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump();); + } + } +} + /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { @@ -1140,6 +1272,9 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. TransferMap Transfers; // DBG_VALUEs associated with spills. + VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but + // that we have deferred creating DBG_VALUE insts + // for immediately. VarToFragments SeenFragments; @@ -1156,8 +1291,6 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { std::greater> Pending; - enum : bool { dontTransferChanges = false, transferChanges = true }; - // Besides parameter's modification, check whether a DBG_VALUE is inlined // in order to deduce whether the variable that it tracks comes from // a different function. If that is the case we can't track its entry value. @@ -1169,7 +1302,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - unsigned FP = TRI->getFrameRegister(MF); + Register FP = TRI->getFrameRegister(MF); auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool { return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP; }; @@ -1195,23 +1328,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { !MI.getDebugExpression()->isFragment()) DebugEntryVals[MI.getDebugVariable()] = &MI; - // Initialize every mbb with OutLocs. - // We are not looking at any spill instructions during the initial pass - // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE - // instructions for spills of registers that are known to be user variables - // within the BB in which the spill occurs. + // Initialize per-block structures and scan for fragment overlaps. for (auto &MBB : MF) { + PendingInLocs[&MBB] = VarLocSet(); + for (auto &MI : MBB) { - process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals, - dontTransferChanges, OverlapFragments, SeenFragments); - } - // Add any entry DBG_VALUE instructions necessitated by parameter - // clobbering. - for (auto &TR : Transfers) { - MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), - TR.DebugInst); + if (MI.isDebugValue()) + accumulateFragmentMap(MI, SeenFragments, OverlapFragments); } - Transfers.clear(); } auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool { @@ -1248,26 +1372,21 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { while (!Worklist.empty()) { MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; Worklist.pop(); - MBBJoined = - join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks); - Visited.insert(MBB); + MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, + ArtificialBlocks, PendingInLocs); + MBBJoined |= Visited.insert(MBB).second; if (MBBJoined) { MBBJoined = false; Changed = true; // Now that we have started to extend ranges across BBs we need to // examine spill instructions to see whether they spill registers that // correspond to user variables. + // First load any pending inlocs. + OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs); for (auto &MI : *MBB) - OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, - DebugEntryVals, transferChanges, OverlapFragments, - SeenFragments); - - // Add any DBG_VALUE instructions necessitated by spills. - for (auto &TR : Transfers) - MBB->insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), - TR.DebugInst); - Transfers.clear(); + DebugEntryVals, OverlapFragments, SeenFragments); + OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs); LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after propagating", dbgs())); @@ -1289,6 +1408,19 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { assert(Pending.empty() && "Pending should be empty"); } + // Add any DBG_VALUE instructions created by location transfers. + for (auto &TR : Transfers) { + MachineBasicBlock *MBB = TR.TransferInst->getParent(); + const VarLoc &VL = VarLocIDs[TR.LocationID]; + MachineInstr *MI = VL.BuildDbgValue(MF); + MBB->insertAfterBundle(TR.TransferInst->getIterator(), MI); + } + Transfers.clear(); + + // Deferred inlocs will not have had any DBG_VALUE insts created; do + // that now. + flushPendingLocs(PendingInLocs, VarLocIDs); + LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs())); return Changed; @@ -1308,7 +1440,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); TFI->determineCalleeSaves(MF, CalleeSavedRegs, - make_unique().get()); + std::make_unique().get()); LS.initialize(MF); bool Changed = ExtendRanges(MF); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 656ec7d4bdfd..2dd462fc72b3 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -99,28 +99,27 @@ enum : unsigned { UndefLocNo = ~0U }; /// usage of the location. class DbgValueLocation { public: - DbgValueLocation(unsigned LocNo, bool WasIndirect) - : LocNo(LocNo), WasIndirect(WasIndirect) { + DbgValueLocation(unsigned LocNo) + : LocNo(LocNo) { static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing"); assert(locNo() == LocNo && "location truncation"); } - DbgValueLocation() : LocNo(0), WasIndirect(0) {} + DbgValueLocation() : LocNo(0) {} unsigned locNo() const { // Fix up the undef location number, which gets truncated. return LocNo == INT_MAX ? UndefLocNo : LocNo; } - bool wasIndirect() const { return WasIndirect; } bool isUndef() const { return locNo() == UndefLocNo; } DbgValueLocation changeLocNo(unsigned NewLocNo) const { - return DbgValueLocation(NewLocNo, WasIndirect); + return DbgValueLocation(NewLocNo); } friend inline bool operator==(const DbgValueLocation &LHS, const DbgValueLocation &RHS) { - return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect; + return LHS.LocNo == RHS.LocNo; } friend inline bool operator!=(const DbgValueLocation &LHS, @@ -129,8 +128,7 @@ public: } private: - unsigned LocNo : 31; - unsigned WasIndirect : 1; + unsigned LocNo; }; /// Map of where a user value is live, and its location. @@ -144,22 +142,51 @@ namespace { class LDVImpl; +/// A UserValue is uniquely identified by the source variable it refers to +/// (Variable), the expression describing how to get the value (Expression) and +/// the specific usage (InlinedAt). InlinedAt differentiates both between +/// inline and non-inline functions, and multiple inlined instances in the same +/// scope. FIXME: The only part of the Expression which matters for UserValue +/// identification is the fragment part. +class UserValueIdentity { +private: + /// The debug info variable we are part of. + const DILocalVariable *Variable; + /// Any complex address expression. + const DIExpression *Expression; + /// Function usage identification. + const DILocation *InlinedAt; + +public: + UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr, + const DILocation *IA) + : Variable(Var), Expression(Expr), InlinedAt(IA) {} + + bool match(const DILocalVariable *Var, const DIExpression *Expr, + const DILocation *IA) const { + // FIXME: The fragment should be part of the identity, but not + // other things in the expression like stack values. + return Var == Variable && Expr == Expression && IA == InlinedAt; + } + + bool match(const UserValueIdentity &Other) const { + return match(Other.Variable, Other.Expression, Other.InlinedAt); + } + + unsigned hash_value() const { + return hash_combine(Variable, Expression, InlinedAt); + } +}; + /// A user value is a part of a debug info user variable. /// /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register /// holds part of a user variable. The part is identified by a byte offset. -/// -/// UserValues are grouped into equivalence classes for easier searching. Two -/// user values are related if they refer to the same variable, or if they are -/// held by the same virtual register. The equivalence class is the transitive -/// closure of that relation. class UserValue { const DILocalVariable *Variable; ///< The debug info variable we are part of. const DIExpression *Expression; ///< Any complex address expression. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. - UserValue *leader; ///< Equivalence class leader. - UserValue *next = nullptr; ///< Next value in equivalence class, or null. /// Numbered locations referenced by locmap. SmallVector locations; @@ -180,49 +207,15 @@ class UserValue { LiveIntervals &LIS); public: + UserValue(const UserValue &) = delete; + /// Create a new UserValue. UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L, LocMap::Allocator &alloc) - : Variable(var), Expression(expr), dl(std::move(L)), leader(this), - locInts(alloc) {} - - /// Get the leader of this value's equivalence class. - UserValue *getLeader() { - UserValue *l = leader; - while (l != l->leader) - l = l->leader; - return leader = l; - } + : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {} - /// Return the next UserValue in the equivalence class. - UserValue *getNext() const { return next; } - - /// Does this UserValue match the parameters? - bool match(const DILocalVariable *Var, const DIExpression *Expr, - const DILocation *IA) const { - // FIXME: The fragment should be part of the equivalence class, but not - // other things in the expression like stack values. - return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA; - } - - /// Merge equivalence classes. - static UserValue *merge(UserValue *L1, UserValue *L2) { - L2 = L2->getLeader(); - if (!L1) - return L2; - L1 = L1->getLeader(); - if (L1 == L2) - return L1; - // Splice L2 before L1's members. - UserValue *End = L2; - while (End->next) { - End->leader = L1; - End = End->next; - } - End->leader = L1; - End->next = L1->next; - L1->next = L2; - return L1; + UserValueIdentity getId() { + return UserValueIdentity(Variable, Expression, dl->getInlinedAt()); } /// Return the location number that matches Loc. @@ -261,8 +254,8 @@ public: void mapVirtRegs(LDVImpl *LDV); /// Add a definition point to this value. - void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) { - DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect); + void addDef(SlotIndex Idx, const MachineOperand &LocMO) { + DbgValueLocation Loc(getLocationNo(LocMO)); // Add a singular (Idx,Idx) -> Loc mapping. LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) @@ -297,11 +290,10 @@ public: /// /// \param LI Scan for copies of the value in LI->reg. /// \param LocNo Location number of LI->reg. - /// \param WasIndirect Indicates if the original use of LI->reg was indirect /// \param Kills Points where the range of LocNo could be extended. /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here. void addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, bool WasIndirect, + LiveInterval *LI, unsigned LocNo, const SmallVectorImpl &Kills, SmallVectorImpl> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS); @@ -335,7 +327,29 @@ public: void print(raw_ostream &, const TargetRegisterInfo *); }; +} // namespace +namespace llvm { +template <> struct DenseMapInfo { + static UserValueIdentity getEmptyKey() { + auto Key = DenseMapInfo::getEmptyKey(); + return UserValueIdentity(Key, nullptr, nullptr); + } + static UserValueIdentity getTombstoneKey() { + auto Key = DenseMapInfo::getTombstoneKey(); + return UserValueIdentity(Key, nullptr, nullptr); + } + static unsigned getHashValue(const UserValueIdentity &Val) { + return Val.hash_value(); + } + static bool isEqual(const UserValueIdentity &LHS, + const UserValueIdentity &RHS) { + return LHS.match(RHS); + } +}; +} // namespace llvm + +namespace { /// A user label is a part of a debug info user label. class UserLabel { const DILabel *Label; ///< The debug info label we are part of. @@ -387,20 +401,20 @@ class LDVImpl { /// All allocated UserLabel instances. SmallVector, 2> userLabels; - /// Map virtual register to eq class leader. - using VRMap = DenseMap; - VRMap virtRegToEqClass; + /// Map virtual register to UserValues which use it. + using VRMap = DenseMap>; + VRMap VirtRegToUserVals; - /// Map user variable to eq class leader. - using UVMap = DenseMap; - UVMap userVarMap; + /// Map unique UserValue identity to UserValue. + using UVMap = DenseMap; + UVMap UserVarMap; /// Find or create a UserValue. UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL); - /// Find the EC leader for VirtReg or null. - UserValue *lookupVirtReg(unsigned VirtReg); + /// Find the UserValues for VirtReg or null. + SmallVectorImpl *lookupVirtReg(unsigned VirtReg); /// Add DBG_VALUE instruction to our maps. /// @@ -440,8 +454,8 @@ public: MF = nullptr; userValues.clear(); userLabels.clear(); - virtRegToEqClass.clear(); - userVarMap.clear(); + VirtRegToUserVals.clear(); + UserVarMap.clear(); // Make sure we call emitDebugValues if the machine function was modified. assert((!ModifiedMF || EmitDone) && "Dbg values are not emitted in LDV"); @@ -449,8 +463,8 @@ public: ModifiedMF = false; } - /// Map virtual register to an equivalence class. - void mapVirtReg(unsigned VirtReg, UserValue *EC); + /// Map virtual register to a UserValue. + void mapVirtReg(unsigned VirtReg, UserValue *UV); /// Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef NewRegs); @@ -521,8 +535,6 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { OS << "undef"; else { OS << I.value().locNo(); - if (I.value().wasIndirect()) - OS << " ind"; } } for (unsigned i = 0, e = locations.size(); i != e; ++i) { @@ -554,37 +566,33 @@ void LDVImpl::print(raw_ostream &OS) { void UserValue::mapVirtRegs(LDVImpl *LDV) { for (unsigned i = 0, e = locations.size(); i != e; ++i) if (locations[i].isReg() && - TargetRegisterInfo::isVirtualRegister(locations[i].getReg())) + Register::isVirtualRegister(locations[i].getReg())) LDV->mapVirtReg(locations[i].getReg(), this); } UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL) { - UserValue *&Leader = userVarMap[Var]; - if (Leader) { - UserValue *UV = Leader->getLeader(); - Leader = UV; - for (; UV; UV = UV->getNext()) - if (UV->match(Var, Expr, DL->getInlinedAt())) - return UV; - } + auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt()); + UserValue *&UVEntry = UserVarMap[Ident]; - userValues.push_back( - llvm::make_unique(Var, Expr, DL, allocator)); - UserValue *UV = userValues.back().get(); - Leader = UserValue::merge(Leader, UV); - return UV; + if (UVEntry) + return UVEntry; + + userValues.push_back(std::make_unique(Var, Expr, DL, allocator)); + return UVEntry = userValues.back().get(); } -void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs"); - UserValue *&Leader = virtRegToEqClass[VirtReg]; - Leader = UserValue::merge(Leader, EC); +void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) { + assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs"); + assert(UserVarMap.find(UV->getId()) != UserVarMap.end() && + "UserValue should exist in UserVarMap"); + VirtRegToUserVals[VirtReg].push_back(UV); } -UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { - if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) - return UV->getLeader(); +SmallVectorImpl *LDVImpl::lookupVirtReg(unsigned VirtReg) { + VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg); + if (Itr != VirtRegToUserVals.end()) + return &Itr->getSecond(); return nullptr; } @@ -606,8 +614,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // could be removed or replaced by asserts. bool Discard = false; if (MI.getOperand(0).isReg() && - TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) { - const unsigned Reg = MI.getOperand(0).getReg(); + Register::isVirtualRegister(MI.getOperand(0).getReg())) { + const Register Reg = MI.getOperand(0).getReg(); if (!LIS->hasInterval(Reg)) { // The DBG_VALUE is described by a virtual register that does not have a // live interval. Discard the DBG_VALUE. @@ -631,19 +639,18 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { } // Get or create the UserValue for (variable,offset) here. - bool IsIndirect = MI.getOperand(1).isImm(); - if (IsIndirect) - assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); + assert(!MI.getOperand(1).isImm() && "DBG_VALUE with indirect flag before " + "LiveDebugVariables"); const DILocalVariable *Var = MI.getDebugVariable(); const DIExpression *Expr = MI.getDebugExpression(); UserValue *UV = getUserValue(Var, Expr, MI.getDebugLoc()); if (!Discard) - UV->addDef(Idx, MI.getOperand(0), IsIndirect); + UV->addDef(Idx, MI.getOperand(0)); else { MachineOperand MO = MachineOperand::CreateReg(0U, false); MO.setIsDebug(); - UV->addDef(Idx, MO, false); + UV->addDef(Idx, MO); } return true; } @@ -666,7 +673,7 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { } } if (!Found) - userLabels.push_back(llvm::make_unique(Label, DL, Idx)); + userLabels.push_back(std::make_unique(Label, DL, Idx)); return true; } @@ -751,14 +758,14 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, } void UserValue::addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, bool WasIndirect, + LiveInterval *LI, unsigned LocNo, const SmallVectorImpl &Kills, SmallVectorImpl> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS) { if (Kills.empty()) return; // Don't track copies from physregs, there are too many uses. - if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) + if (!Register::isVirtualRegister(LI->reg)) return; // Collect all the (vreg, valno) pairs that are copies of LI. @@ -768,13 +775,13 @@ void UserValue::addDefsFromCopies( // Copies of the full value. if (MO.getSubReg() || !MI->isCopy()) continue; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); // Don't follow copies to physregs. These are usually setting up call // arguments, and the argument registers are always call clobbered. We are // better off in the source register which could be a callee-saved register, // or it could be spilled. - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + if (!Register::isVirtualRegister(DstReg)) continue; // Is LocNo extended to reach this copy? If not, another def may be blocking @@ -815,7 +822,7 @@ void UserValue::addDefsFromCopies( MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); - DbgValueLocation NewLoc(LocNo, WasIndirect); + DbgValueLocation NewLoc(LocNo); I.insert(Idx, Idx.getNextSlot(), NewLoc); NewDefs.push_back(std::make_pair(Idx, NewLoc)); break; @@ -845,7 +852,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, } // Register locations are constrained to where the register value is live. - if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) { + if (Register::isVirtualRegister(LocMO.getReg())) { LiveInterval *LI = nullptr; const VNInfo *VNI = nullptr; if (LIS.hasInterval(LocMO.getReg())) { @@ -863,8 +870,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // sub-register in that regclass). For now, simply skip handling copies if // a sub-register is involved. if (LI && !LocMO.getSubReg()) - addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI, - LIS); + addDefsFromCopies(LI, Loc.locNo(), Kills, Defs, MRI, LIS); continue; } @@ -1123,16 +1129,18 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef NewRegs, void LDVImpl::splitRegister(unsigned OldReg, ArrayRef NewRegs) { bool DidChange = false; - for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) - DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); + if (auto *UserVals = lookupVirtReg(OldReg)) + for (auto *UV : *UserVals) + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); if (!DidChange) return; // Map all of the new virtual registers. - UserValue *UV = lookupVirtReg(OldReg); - for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i], UV); + if (auto *UserVals = lookupVirtReg(OldReg)) + for (auto *UV : *UserVals) + for (unsigned i = 0; i != NewRegs.size(); ++i) + mapVirtReg(NewRegs[i], UV); } void LiveDebugVariables:: @@ -1161,10 +1169,10 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, MachineOperand Loc = locations[I]; // Only virtual registers are rewritten. if (Loc.isReg() && Loc.getReg() && - TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { - unsigned VirtReg = Loc.getReg(); + Register::isVirtualRegister(Loc.getReg())) { + Register VirtReg = Loc.getReg(); if (VRM.isAssignedReg(VirtReg) && - TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { + Register::isPhysicalRegister(VRM.getPhys(VirtReg))) { // This can create a %noreg operand in rare cases when the sub-register // index is no longer available. That means the user value is in a // non-existent sub-register, and %noreg is exactly what we want. @@ -1258,7 +1266,7 @@ findNextInsertLocation(MachineBasicBlock *MBB, const TargetRegisterInfo &TRI) { if (!LocMO.isReg()) return MBB->instr_end(); - unsigned Reg = LocMO.getReg(); + Register Reg = LocMO.getReg(); // Find the next instruction in the MBB that define the register Reg. while (I != MBB->end() && !I->isTerminator()) { @@ -1302,21 +1310,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // that the original virtual register was a pointer. Also, add the stack slot // offset for the spilled register to the expression. const DIExpression *Expr = Expression; - uint8_t DIExprFlags = DIExpression::ApplyOffset; - bool IsIndirect = Loc.wasIndirect(); - if (Spilled) { - if (IsIndirect) - DIExprFlags |= DIExpression::DerefAfter; - Expr = - DIExpression::prepend(Expr, DIExprFlags, SpillOffset); - IsIndirect = true; - } + if (Spilled) + Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, SpillOffset); assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index"); do { BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, MO, Variable, Expr); + Spilled, MO, Variable, Expr); // Continue and insert DBG_VALUES after every redefinition of register // associated with the debug value within the range diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 70b2a77fe800..54ac46f2e7ce 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -886,7 +886,7 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, const TargetRegisterInfo &TRI) { // Phys reg should not be tracked at subreg level. // Same for noreg (Reg == 0). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg) + if (!Register::isVirtualRegister(Reg) || !Reg) return; // Remove the values that don't define those lanes. SmallVector ToBeRemoved; @@ -917,7 +917,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, for (VNInfo *VNI : ToBeRemoved) SR.removeValNo(VNI); - assert(!SR.empty() && "At least one value should be defined by this mask"); + // If the subrange is empty at this point, the MIR is invalid. Do not assert + // and let the verifier catch this case. } void LiveInterval::refineSubRanges( @@ -967,7 +968,7 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const { - assert(TargetRegisterInfo::isVirtualRegister(reg)); + assert(Register::isVirtualRegister(reg)); LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg); assert((VRegMask & LaneMask).any()); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp index aa85569063b3..2989930ad093 100644 --- a/lib/CodeGen/LiveIntervals.cpp +++ b/lib/CodeGen/LiveIntervals.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveIntervals.h" -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -22,6 +21,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -108,7 +108,7 @@ LiveIntervals::~LiveIntervals() { void LiveIntervals::releaseMemory() { // Free the live intervals themselves. for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) - delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)]; + delete VirtRegIntervals[Register::index2VirtReg(i)]; VirtRegIntervals.clear(); RegMaskSlots.clear(); RegMaskBits.clear(); @@ -161,7 +161,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (hasInterval(Reg)) OS << getInterval(Reg) << '\n'; } @@ -186,7 +186,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F; + float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); } @@ -201,7 +201,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { void LiveIntervals::computeVirtRegs() { for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; createAndComputeVirtRegInterval(Reg); @@ -441,8 +441,8 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl *dead) { LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n'); - assert(TargetRegisterInfo::isVirtualRegister(li->reg) - && "Can only shrink virtual registers"); + assert(Register::isVirtualRegister(li->reg) && + "Can only shrink virtual registers"); // Shrink subregister live ranges. bool NeedsCleanup = false; @@ -541,8 +541,8 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n'); - assert(TargetRegisterInfo::isVirtualRegister(Reg) - && "Can only shrink virtual registers"); + assert(Register::isVirtualRegister(Reg) && + "Can only shrink virtual registers"); // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; @@ -688,7 +688,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { LiveRange::const_iterator>, 4> SRs; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; const LiveInterval &LI = getInterval(Reg); @@ -986,10 +986,10 @@ public: MO.setIsKill(false); } - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LiveInterval &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { unsigned SubReg = MO.getSubReg(); @@ -1023,7 +1023,7 @@ private: return; LLVM_DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { dbgs() << printReg(Reg); if (LaneMask.any()) dbgs() << " L" << PrintLaneMask(LaneMask); @@ -1288,6 +1288,20 @@ private: const SlotIndex SplitPos = NewIdxDef; OldIdxVNI = OldIdxIn->valno; + SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end; + LiveRange::iterator Prev = std::prev(OldIdxIn); + if (OldIdxIn != LR.begin() && + SlotIndex::isEarlierInstr(NewIdx, Prev->end)) { + // If the segment before OldIdx read a value defined earlier than + // NewIdx, the moved instruction also reads and forwards that + // value. Extend the lifetime of the new def point. + + // Extend to where the previous range started, unless there is + // another redef first. + NewDefEndPoint = std::min(OldIdxIn->start, + std::next(NewIdxOut)->start); + } + // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut. OldIdxOut->valno->def = OldIdxIn->start; *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end, @@ -1305,7 +1319,8 @@ private: // There is no gap between NewSegment and its predecessor. *NewSegment = LiveRange::Segment(Next->start, SplitPos, Next->valno); - *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI); + + *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI); Next->valno->def = SplitPos; } else { // There is a gap between NewSegment and its predecessor @@ -1384,7 +1399,7 @@ private: // Return the last use of reg between NewIdx and OldIdx. SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg, LaneBitmask LaneMask) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { SlotIndex LastUse = Before; for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { if (MO.isUndef()) @@ -1429,7 +1444,7 @@ private: // Check if MII uses Reg. for (MIBundleOperands MO(*MII); MO.isValid(); ++MO) if (MO->isReg() && !MO->isUndef() && - TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && + Register::isPhysicalRegister(MO->getReg()) && TRI.hasRegUnit(MO->getReg(), Reg)) return Idx.getRegSlot(); } @@ -1439,7 +1454,10 @@ private: }; void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) { - assert(!MI.isBundled() && "Can't handle bundled instructions yet."); + // It is fine to move a bundle as a whole, but not an individual instruction + // inside it. + assert((!MI.isBundled() || MI.getOpcode() == TargetOpcode::BUNDLE) && + "Cannot move instruction in bundle"); SlotIndex OldIndex = Indexes->getInstructionIndex(MI); Indexes->removeMachineInstrFromMaps(MI); SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); @@ -1582,8 +1600,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), MOE = MI.operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg() && - TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && + if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) && !hasInterval(MOI->getReg())) { createAndComputeVirtRegInterval(MOI->getReg()); } @@ -1591,7 +1608,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, } for (unsigned Reg : OrigRegs) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; LiveInterval &LI = getInterval(Reg); @@ -1642,7 +1659,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, unsigned Reg = LI.reg; const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); for (unsigned I = 1; I < NumComp; ++I) { - unsigned NewVReg = MRI->createVirtualRegister(RegClass); + Register NewVReg = MRI->createVirtualRegister(RegClass); LiveInterval &NewLI = createEmptyInterval(NewVReg); SplitLIs.push_back(&NewLI); } diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index cd3d248ac878..c2a1cc7c6490 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -46,8 +46,8 @@ void LivePhysRegs::removeDefs(const MachineInstr &MI) { if (O->isReg()) { if (!O->isDef() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; removeReg(Reg); } else if (O->isRegMask()) @@ -60,8 +60,8 @@ void LivePhysRegs::addUses(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->readsReg() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; addReg(Reg); } @@ -86,8 +86,8 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, // Remove killed registers from the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg() && !O->isDebug()) { - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (O->isDef()) { // Note, dead defs are still recorded. The caller should decide how to @@ -292,10 +292,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { if (!MO->isReg() || !MO->isDef() || MO->isDebug()) continue; - unsigned Reg = MO->getReg(); + Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); bool IsNotLive = LiveRegs.available(MRI, Reg); MO->setIsDead(IsNotLive); @@ -309,10 +309,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { if (!MO->isReg() || !MO->readsReg() || MO->isDebug()) continue; - unsigned Reg = MO->getReg(); + Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); bool IsNotLive = LiveRegs.available(MRI, Reg); MO->setIsKill(IsNotLive); diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d670f28df6ba..24b57be0da00 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -372,8 +372,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, report_fatal_error("Use not jointly dominated by defs."); } - if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && - !MBB->isLiveIn(PhysReg)) { + if (Register::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) { MBB->getParent()->verify(); const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); errs() << "The register " << printReg(PhysReg, TRI) diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h deleted file mode 100644 index 11aea5a3b016..000000000000 --- a/lib/CodeGen/LiveRangeCalc.h +++ /dev/null @@ -1,297 +0,0 @@ -//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// The LiveRangeCalc class can be used to compute live ranges from scratch. It -// caches information about values in the CFG to speed up repeated operations -// on the same live range. The cache can be shared by non-overlapping live -// ranges. SplitKit uses that when computing the live range of split products. -// -// A low-level interface is available to clients that know where a variable is -// live, but don't know which value it has as every point. LiveRangeCalc will -// propagate values down the dominator tree, and even insert PHI-defs where -// needed. SplitKit uses this faster interface when possible. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H -#define LLVM_LIB_CODEGEN_LIVERANGECALC_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/MC/LaneBitmask.h" -#include - -namespace llvm { - -template class DomTreeNodeBase; -class MachineDominatorTree; -class MachineFunction; -class MachineRegisterInfo; - -using MachineDomTreeNode = DomTreeNodeBase; - -class LiveRangeCalc { - const MachineFunction *MF = nullptr; - const MachineRegisterInfo *MRI = nullptr; - SlotIndexes *Indexes = nullptr; - MachineDominatorTree *DomTree = nullptr; - VNInfo::Allocator *Alloc = nullptr; - - /// LiveOutPair - A value and the block that defined it. The domtree node is - /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. - using LiveOutPair = std::pair; - - /// LiveOutMap - Map basic blocks to the value leaving the block. - using LiveOutMap = IndexedMap; - - /// Bit vector of active entries in LiveOut, also used as a visited set by - /// findReachingDefs. One entry per basic block, indexed by block number. - /// This is kept as a separate bit vector because it can be cleared quickly - /// when switching live ranges. - BitVector Seen; - - /// Map LiveRange to sets of blocks (represented by bit vectors) that - /// in the live range are defined on entry and undefined on entry. - /// A block is defined on entry if there is a path from at least one of - /// the defs in the live range to the entry of the block, and conversely, - /// a block is undefined on entry, if there is no such path (i.e. no - /// definition reaches the entry of the block). A single LiveRangeCalc - /// object is used to track live-out information for multiple registers - /// in live range splitting (which is ok, since the live ranges of these - /// registers do not overlap), but the defined/undefined information must - /// be kept separate for each individual range. - /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }. - using EntryInfoMap = DenseMap>; - EntryInfoMap EntryInfos; - - /// Map each basic block where a live range is live out to the live-out value - /// and its defining block. - /// - /// For every basic block, MBB, one of these conditions shall be true: - /// - /// 1. !Seen.count(MBB->getNumber()) - /// Blocks without a Seen bit are ignored. - /// 2. LiveOut[MBB].second.getNode() == MBB - /// The live-out value is defined in MBB. - /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB] - /// The live-out value passses through MBB. All predecessors must carry - /// the same value. - /// - /// The domtree node may be null, it can be computed. - /// - /// The map can be shared by multiple live ranges as long as no two are - /// live-out of the same block. - LiveOutMap Map; - - /// LiveInBlock - Information about a basic block where a live range is known - /// to be live-in, but the value has not yet been determined. - struct LiveInBlock { - // The live range set that is live-in to this block. The algorithms can - // handle multiple non-overlapping live ranges simultaneously. - LiveRange &LR; - - // DomNode - Dominator tree node for the block. - // Cleared when the final value has been determined and LI has been updated. - MachineDomTreeNode *DomNode; - - // Position in block where the live-in range ends, or SlotIndex() if the - // range passes through the block. When the final value has been - // determined, the range from the block start to Kill will be added to LI. - SlotIndex Kill; - - // Live-in value filled in by updateSSA once it is known. - VNInfo *Value = nullptr; - - LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) - : LR(LR), DomNode(node), Kill(kill) {} - }; - - /// LiveIn - Work list of blocks where the live-in value has yet to be - /// determined. This list is typically computed by findReachingDefs() and - /// used as a work list by updateSSA(). The low-level interface may also be - /// used to add entries directly. - SmallVector LiveIn; - - /// Check if the entry to block @p MBB can be reached by any of the defs - /// in @p LR. Return true if none of the defs reach the entry to @p MBB. - bool isDefOnEntry(LiveRange &LR, ArrayRef Undefs, - MachineBasicBlock &MBB, BitVector &DefOnEntry, - BitVector &UndefOnEntry); - - /// Find the set of defs that can reach @p Kill. @p Kill must belong to - /// @p UseMBB. - /// - /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill, - /// all paths from the def to @p UseMBB are added to @p LR, and the function - /// returns true. - /// - /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be - /// live in are added to the LiveIn array, and the function returns false. - /// - /// The array @p Undef provides the locations where the range @p LR becomes - /// undefined by operands on other subranges. If @p Undef - /// is non-empty and @p Kill is jointly dominated only by the entries of - /// @p Undef, the function returns false. - /// - /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, - SlotIndex Use, unsigned PhysReg, - ArrayRef Undefs); - - /// updateSSA - Compute the values that will be live in to all requested - /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. - /// - /// Every live-in block must be jointly dominated by the added live-out - /// blocks. No values are read from the live ranges. - void updateSSA(); - - /// Transfer information from the LiveIn vector to the live ranges and update - /// the given @p LiveOuts. - void updateFromLiveIns(); - - /// Extend the live range of @p LR to reach all uses of Reg. - /// - /// If @p LR is a main range, or if @p LI is null, then all uses must be - /// jointly dominated by the definitions from @p LR. If @p LR is a subrange - /// of the live interval @p LI, corresponding to lane mask @p LaneMask, - /// all uses must be jointly dominated by the definitions from @p LR - /// together with definitions of other lanes where @p LR becomes undefined - /// (via operands). - /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e. - /// LaneBitmask::getAll(). - void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask, - LiveInterval *LI = nullptr); - - /// Reset Map and Seen fields. - void resetLiveOutMap(); - -public: - LiveRangeCalc() = default; - - //===--------------------------------------------------------------------===// - // High-level interface. - //===--------------------------------------------------------------------===// - // - // Calculate live ranges from scratch. - // - - /// reset - Prepare caches for a new set of non-overlapping live ranges. The - /// caches must be reset before attempting calculations with a live range - /// that may overlap a previously computed live range, and before the first - /// live range in a function. If live ranges are not known to be - /// non-overlapping, call reset before each. - void reset(const MachineFunction *mf, SlotIndexes *SI, - MachineDominatorTree *MDT, VNInfo::Allocator *VNIA); - - //===--------------------------------------------------------------------===// - // Mid-level interface. - //===--------------------------------------------------------------------===// - // - // Modify existing live ranges. - // - - /// Extend the live range of @p LR to reach @p Use. - /// - /// The existing values in @p LR must be live so they jointly dominate @p Use. - /// If @p Use is not dominated by a single existing value, PHI-defs are - /// inserted as required to preserve SSA form. - /// - /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg, - ArrayRef Undefs); - - /// createDeadDefs - Create a dead def in LI for every def operand of Reg. - /// Each instruction defining Reg gets a new VNInfo with a corresponding - /// minimal live range. - void createDeadDefs(LiveRange &LR, unsigned Reg); - - /// Extend the live range of @p LR to reach all uses of Reg. - /// - /// All uses must be jointly dominated by existing liveness. PHI-defs are - /// inserted as needed to preserve SSA form. - void extendToUses(LiveRange &LR, unsigned PhysReg) { - extendToUses(LR, PhysReg, LaneBitmask::getAll()); - } - - /// Calculates liveness for the register specified in live interval @p LI. - /// Creates subregister live ranges as needed if subreg liveness tracking is - /// enabled. - void calculate(LiveInterval &LI, bool TrackSubRegs); - - /// For live interval \p LI with correct SubRanges construct matching - /// information for the main live range. Expects the main live range to not - /// have any segments or value numbers. - void constructMainRangeFromSubranges(LiveInterval &LI); - - //===--------------------------------------------------------------------===// - // Low-level interface. - //===--------------------------------------------------------------------===// - // - // These functions can be used to compute live ranges where the live-in and - // live-out blocks are already known, but the SSA value in each block is - // unknown. - // - // After calling reset(), add known live-out values and known live-in blocks. - // Then call calculateValues() to compute the actual value that is - // live-in to each block, and add liveness to the live ranges. - // - - /// setLiveOutValue - Indicate that VNI is live out from MBB. The - /// calculateValues() function will not add liveness for MBB, the caller - /// should take care of that. - /// - /// VNI may be null only if MBB is a live-through block also passed to - /// addLiveInBlock(). - void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { - Seen.set(MBB->getNumber()); - Map[MBB] = LiveOutPair(VNI, nullptr); - } - - /// addLiveInBlock - Add a block with an unknown live-in value. This - /// function can only be called once per basic block. Once the live-in value - /// has been determined, calculateValues() will add liveness to LI. - /// - /// @param LR The live range that is live-in to the block. - /// @param DomNode The domtree node for the block. - /// @param Kill Index in block where LI is killed. If the value is - /// live-through, set Kill = SLotIndex() and also call - /// setLiveOutValue(MBB, 0). - void addLiveInBlock(LiveRange &LR, - MachineDomTreeNode *DomNode, - SlotIndex Kill = SlotIndex()) { - LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); - } - - /// calculateValues - Calculate the value that will be live-in to each block - /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA - /// form. Add liveness to all live-in blocks up to the Kill point, or the - /// whole block for live-through blocks. - /// - /// Every predecessor of a live-in block must have been given a value with - /// setLiveOutValue, the value may be null for live-trough blocks. - void calculateValues(); - - /// A diagnostic function to check if the end of the block @p MBB is - /// jointly dominated by the blocks corresponding to the slot indices - /// in @p Defs. This function is mainly for use in self-verification - /// checks. - LLVM_ATTRIBUTE_UNUSED - static bool isJointlyDominated(const MachineBasicBlock *MBB, - ArrayRef Defs, - const SlotIndexes &Indexes); -}; - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 882e562ba95c..34bac082bcd7 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -32,7 +32,7 @@ void LiveRangeEdit::Delegate::anchor() { } LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, bool createSubRanges) { - unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); @@ -52,7 +52,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, } unsigned LiveRangeEdit::createFrom(unsigned OldReg) { - unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } @@ -114,7 +114,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, continue; // We can't remat physreg uses, unless it is a constant. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(MO.getReg())) { if (MRI.isConstantPhysReg(MO.getReg())) continue; return false; @@ -232,7 +232,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, LLVM_DEBUG(dbgs() << " folded: " << *FoldMI); LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); if (UseMI->isCall()) - UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI); + UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI); UseMI->eraseFromParent(); DefMI->addRegisterDead(LI->reg, nullptr); Dead.push_back(DefMI); @@ -308,8 +308,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; - unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MOI->getReg(); + if (!Register::isVirtualRegister(Reg)) { // Check if MI reads any unreserved physregs. if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; @@ -349,7 +349,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // Remove all operands that aren't physregs. for (unsigned i = MI->getNumOperands(); i; --i) { const MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) continue; MI->RemoveOperand(i-1); } diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp index 8818f1ce0ad9..cbf112ee2bd5 100644 --- a/lib/CodeGen/LiveRangeShrink.cpp +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -172,10 +172,10 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDead() || MO.isDebug()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Do not move the instruction if it def/uses a physical register, // unless it is a constant physical register or a noreg. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { if (!Reg || MRI.isConstantPhysReg(Reg)) continue; Insert = nullptr; diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index ce99e5535c25..72c79e5f8a75 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { } void LiveRegMatrix::unassign(LiveInterval &VirtReg) { - unsigned PhysReg = VRM->getPhys(VirtReg.reg); + Register PhysReg = VRM->getPhys(VirtReg.reg); LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from " << printReg(PhysReg, TRI) << ':'); VRM->clearVirt(VirtReg.reg); diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index 6afb7fb7aa11..97763def1f40 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -47,8 +47,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { if (O->isReg()) { if (!O->isDef() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; removeReg(Reg); } else if (O->isRegMask()) @@ -59,8 +59,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->readsReg() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; addReg(Reg); } @@ -70,8 +70,8 @@ void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (!O->isDef() && !O->readsReg()) continue; diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp index f55977d72723..8df84ebf4f06 100644 --- a/lib/CodeGen/LiveStacks.cpp +++ b/lib/CodeGen/LiveStacks.cpp @@ -58,9 +58,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { assert(Slot >= 0 && "Spill slot indice must be >= 0"); SS2IntervalMap::iterator I = S2IMap.find(Slot); if (I == S2IMap.end()) { - I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot), - std::forward_as_tuple( - TargetRegisterInfo::index2StackSlot(Slot), 0.0F)) + I = S2IMap + .emplace( + std::piecewise_construct, std::forward_as_tuple(Slot), + std::forward_as_tuple(Register::index2StackSlot(Slot), 0.0F)) .first; S2RCMap.insert(std::make_pair(Slot, RC)); } else { diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index aaff982ef1b0..9bd55c6f750f 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -26,6 +26,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -82,7 +83,7 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { - assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && + assert(Register::isVirtualRegister(RegIdx) && "getVarInfo: not a virtual register!"); VirtRegInfo.grow(RegIdx); return VirtRegInfo[RegIdx]; @@ -214,7 +215,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, MachineOperand &MO = LastDef->getOperand(i); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) continue; - unsigned DefReg = MO.getReg(); + Register DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) @@ -519,10 +520,9 @@ void LiveVariables::runOnInstr(MachineInstr &MI, } if (!MO.isReg() || MO.getReg() == 0) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MO.isUse()) { - if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && - MRI->isReserved(MOReg))) + if (!(Register::isPhysicalRegister(MOReg) && MRI->isReserved(MOReg))) MO.setIsKill(false); if (MO.readsReg()) UseRegs.push_back(MOReg); @@ -530,8 +530,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, assert(MO.isDef()); // FIXME: We should not remove any dead flags. However the MIPS RDDSP // instruction needs it at the moment: http://llvm.org/PR27116. - if (TargetRegisterInfo::isPhysicalRegister(MOReg) && - !MRI->isReserved(MOReg)) + if (Register::isPhysicalRegister(MOReg) && !MRI->isReserved(MOReg)) MO.setIsDead(false); DefRegs.push_back(MOReg); } @@ -541,7 +540,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, // Process all uses. for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { unsigned MOReg = UseRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + if (Register::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegUse(MOReg, MI); @@ -554,7 +553,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + if (Register::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegDef(MOReg, &MI, Defs); @@ -566,7 +565,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { // Mark live-in registers as live-in. SmallVector Defs; for (const auto &LI : MBB->liveins()) { - assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) && + assert(Register::isPhysicalRegister(LI.PhysReg) && "Cannot have a live-in virtual register!"); HandlePhysRegDef(LI.PhysReg, nullptr, Defs); } @@ -654,7 +653,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { - const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + const unsigned Reg = Register::index2VirtReg(i); for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); @@ -692,8 +691,8 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) { MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isKill()) { MO.setIsKill(false); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { bool removed = getVarInfo(Reg).removeKill(MI); assert(removed && "kill not in register's VarInfo?"); (void)removed; @@ -783,7 +782,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, for (; BBI != BBE; ++BBI) { for (MachineInstr::mop_iterator I = BBI->operands_begin(), E = BBI->operands_end(); I != E; ++I) { - if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) { + if (I->isReg() && Register::isVirtualRegister(I->getReg())) { if (I->isDef()) Defs.insert(I->getReg()); else if (I->isKill()) @@ -794,7 +793,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, // Update info for all live variables for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); // If the Defs is defined in the successor it can't be live in BB. if (Defs.count(Reg)) diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index b14d76a585f7..2392d4d00b56 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -261,7 +261,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Remember how big this blob of stack space is MFI.setLocalFrameSize(Offset); - MFI.setLocalFrameMaxAlign(MaxAlign); + MFI.setLocalFrameMaxAlign(assumeAligned(MaxAlign)); } static inline bool @@ -351,6 +351,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { assert(MFI.isObjectPreAllocated(FrameIdx) && "Only pre-allocated locals expected!"); + // We need to keep the references to the stack protector slot through frame + // index operands so that it gets resolved by PEI rather than this pass. + // This avoids accesses to the stack protector though virtual base + // registers, and forces PEI to address it using fp/sp/bp. + if (MFI.hasStackProtectorIndex() && + FrameIdx == MFI.getStackProtectorIndex()) + continue; + LLVM_DEBUG(dbgs() << "Considering: " << MI); unsigned idx = 0; diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp index c8cf6abda4fc..ed48365b0102 100644 --- a/lib/CodeGen/LowerEmuTLS.cpp +++ b/lib/CodeGen/LowerEmuTLS.cpp @@ -142,7 +142,7 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer"); EmuTlsTmplVar->setConstant(true); EmuTlsTmplVar->setInitializer(const_cast(InitValue)); - EmuTlsTmplVar->setAlignment(GVAlignment); + EmuTlsTmplVar->setAlignment(Align(GVAlignment)); copyLinkageVisibility(M, GV, EmuTlsTmplVar); } @@ -155,9 +155,8 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { ArrayRef ElementValueArray(ElementValues, 4); EmuTlsVar->setInitializer( ConstantStruct::get(EmuTlsVarType, ElementValueArray)); - unsigned MaxAlignment = std::max( - DL.getABITypeAlignment(WordType), - DL.getABITypeAlignment(VoidPtrType)); + Align MaxAlignment(std::max(DL.getABITypeAlignment(WordType), + DL.getABITypeAlignment(VoidPtrType))); EmuTlsVar->setAlignment(MaxAlignment); return true; } diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp index f49bc854e23f..c9bb5461aa3c 100644 --- a/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -23,12 +23,14 @@ // //===----------------------------------------------------------------------===// +#include "MIRVRegNamerUtils.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -71,28 +73,6 @@ public: } // end anonymous namespace -enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; -class TypedVReg { - VRType type; - unsigned reg; - -public: - TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} - TypedVReg(VRType type) : type(type), reg(~0U) { - assert(type != RSE_Reg && "Expected a non-register type."); - } - - bool isReg() const { return type == RSE_Reg; } - bool isFrameIndex() const { return type == RSE_FrameIndex; } - bool isCandidate() const { return type == RSE_NewCandidate; } - - VRType getType() const { return type; } - unsigned getReg() const { - assert(this->isReg() && "Expected a virtual or physical register."); - return reg; - } -}; - char MIRCanonicalizer::ID; char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; @@ -190,7 +170,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, if (!MO.isReg()) continue; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) continue; if (!MO.isDef()) @@ -207,7 +187,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, continue; MachineOperand &MO = II->getOperand(0); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; if (!MO.isDef()) continue; @@ -220,7 +200,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, } if (II->getOperand(i).isReg()) { - if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg())) + if (!Register::isVirtualRegister(II->getOperand(i).getReg())) if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == PhysRegDefs.end()) { continue; @@ -340,12 +320,12 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { if (!MI->getOperand(1).isReg()) continue; - const unsigned Dst = MI->getOperand(0).getReg(); - const unsigned Src = MI->getOperand(1).getReg(); + const Register Dst = MI->getOperand(0).getReg(); + const Register Src = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Dst)) + if (!Register::isVirtualRegister(Dst)) continue; - if (!TargetRegisterInfo::isVirtualRegister(Src)) + if (!Register::isVirtualRegister(Src)) continue; // Not folding COPY instructions if regbankselect has not set the RCs. // Why are we only considering Register Classes? Because the verifier @@ -370,258 +350,6 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { return Changed; } -/// Here we find our candidates. What makes an interesting candidate? -/// An candidate for a canonicalization tree root is normally any kind of -/// instruction that causes side effects such as a store to memory or a copy to -/// a physical register or a return instruction. We use these as an expression -/// tree root that we walk inorder to build a canonical walk which should result -/// in canoncal vreg renaming. -static std::vector populateCandidates(MachineBasicBlock *MBB) { - std::vector Candidates; - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - - for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - MachineInstr *MI = &*II; - - bool DoesMISideEffect = false; - - if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { - const unsigned Dst = MI->getOperand(0).getReg(); - DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); - - for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { - if (DoesMISideEffect) - break; - DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); - } - } - - if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) - continue; - - LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump();); - Candidates.push_back(MI); - } - - return Candidates; -} - -static void doCandidateWalk(std::vector &VRegs, - std::queue &RegQueue, - std::vector &VisitedMIs, - const MachineBasicBlock *MBB) { - - const MachineFunction &MF = *MBB->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - while (!RegQueue.empty()) { - - auto TReg = RegQueue.front(); - RegQueue.pop(); - - if (TReg.isFrameIndex()) { - LLVM_DEBUG(dbgs() << "Popping frame index.\n";); - VRegs.push_back(TypedVReg(RSE_FrameIndex)); - continue; - } - - assert(TReg.isReg() && "Expected vreg or physreg."); - unsigned Reg = TReg.getReg(); - - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - LLVM_DEBUG({ - dbgs() << "Popping vreg "; - MRI.def_begin(Reg)->dump(); - dbgs() << "\n"; - }); - - if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { - return TR.isReg() && TR.getReg() == Reg; - })) { - VRegs.push_back(TypedVReg(Reg)); - } - } else { - LLVM_DEBUG(dbgs() << "Popping physreg.\n";); - VRegs.push_back(TypedVReg(Reg)); - continue; - } - - for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { - MachineInstr *Def = RI->getParent(); - - if (Def->getParent() != MBB) - continue; - - if (llvm::any_of(VisitedMIs, - [&](const MachineInstr *VMI) { return Def == VMI; })) { - break; - } - - LLVM_DEBUG({ - dbgs() << "\n========================\n"; - dbgs() << "Visited MI: "; - Def->dump(); - dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; - dbgs() << "\n========================\n"; - }); - VisitedMIs.push_back(Def); - for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { - - MachineOperand &MO = Def->getOperand(I); - if (MO.isFI()) { - LLVM_DEBUG(dbgs() << "Pushing frame index.\n";); - RegQueue.push(TypedVReg(RSE_FrameIndex)); - } - - if (!MO.isReg()) - continue; - RegQueue.push(TypedVReg(MO.getReg())); - } - } - } -} - -namespace { -class NamedVRegCursor { - MachineRegisterInfo &MRI; - unsigned virtualVRegNumber; - -public: - NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {} - - void SkipVRegs() { - unsigned VRegGapIndex = 1; - if (!virtualVRegNumber) { - VRegGapIndex = 0; - virtualVRegNumber = MRI.createIncompleteVirtualRegister(); - } - const unsigned VR_GAP = (++VRegGapIndex * 1000); - - unsigned I = virtualVRegNumber; - const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; - - virtualVRegNumber = E; - } - - unsigned getVirtualVReg() const { return virtualVRegNumber; } - - unsigned incrementVirtualVReg(unsigned incr = 1) { - virtualVRegNumber += incr; - return virtualVRegNumber; - } - - unsigned createVirtualRegister(unsigned VReg) { - if (!virtualVRegNumber) - SkipVRegs(); - std::string S; - raw_string_ostream OS(S); - OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); - OS.flush(); - virtualVRegNumber++; - if (auto RC = MRI.getRegClassOrNull(VReg)) - return MRI.createVirtualRegister(RC, OS.str()); - return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str()); - } -}; -} // namespace - -static std::map -GetVRegRenameMap(const std::vector &VRegs, - const std::vector &renamedInOtherBB, - MachineRegisterInfo &MRI, NamedVRegCursor &NVC) { - std::map VRegRenameMap; - bool FirstCandidate = true; - - for (auto &vreg : VRegs) { - if (vreg.isFrameIndex()) { - // We skip one vreg for any frame index because there is a good chance - // (especially when comparing SelectionDAG to GlobalISel generated MIR) - // that in the other file we are just getting an incoming vreg that comes - // from a copy from a frame index. So it's safe to skip by one. - unsigned LastRenameReg = NVC.incrementVirtualVReg(); - (void)LastRenameReg; - LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); - continue; - } else if (vreg.isCandidate()) { - - // After the first candidate, for every subsequent candidate, we skip mod - // 10 registers so that the candidates are more likely to start at the - // same vreg number making it more likely that the canonical walk from the - // candidate insruction. We don't need to skip from the first candidate of - // the BasicBlock because we already skip ahead several vregs for each BB. - unsigned LastRenameReg = NVC.getVirtualVReg(); - if (FirstCandidate) - NVC.incrementVirtualVReg(LastRenameReg % 10); - FirstCandidate = false; - continue; - } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { - unsigned LastRenameReg = NVC.incrementVirtualVReg(); - (void)LastRenameReg; - LLVM_DEBUG({ - dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; - }); - continue; - } - - auto Reg = vreg.getReg(); - if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { - LLVM_DEBUG(dbgs() << "Vreg " << Reg - << " already renamed in other BB.\n";); - continue; - } - - auto Rename = NVC.createVirtualRegister(Reg); - - if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { - LLVM_DEBUG(dbgs() << "Mapping vreg ";); - if (MRI.reg_begin(Reg) != MRI.reg_end()) { - LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); - } else { - LLVM_DEBUG(dbgs() << Reg;); - } - LLVM_DEBUG(dbgs() << " to ";); - if (MRI.reg_begin(Rename) != MRI.reg_end()) { - LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); - } else { - LLVM_DEBUG(dbgs() << Rename;); - } - LLVM_DEBUG(dbgs() << "\n";); - - VRegRenameMap.insert(std::pair(Reg, Rename)); - } - } - - return VRegRenameMap; -} - -static bool doVRegRenaming(std::vector &RenamedInOtherBB, - const std::map &VRegRenameMap, - MachineRegisterInfo &MRI) { - bool Changed = false; - for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { - - auto VReg = I->first; - auto Rename = I->second; - - RenamedInOtherBB.push_back(Rename); - - std::vector RenameMOs; - for (auto &MO : MRI.reg_operands(VReg)) { - RenameMOs.push_back(&MO); - } - - for (auto *MO : RenameMOs) { - Changed = true; - MO->setReg(Rename); - - if (!MO->isDef()) - MO->setIsKill(false); - } - } - - return Changed; -} - static bool doDefKillClear(MachineBasicBlock *MBB) { bool Changed = false; @@ -646,9 +374,7 @@ static bool doDefKillClear(MachineBasicBlock *MBB) { static bool runOnBasicBlock(MachineBasicBlock *MBB, std::vector &bbNames, - std::vector &renamedInOtherBB, - unsigned &basicBlockNum, unsigned &VRegGapIndex, - NamedVRegCursor &NVC) { + unsigned &basicBlockNum, NamedVRegCursor &NVC) { if (CanonicalizeBasicBlockNumber != ~0U) { if (CanonicalizeBasicBlockNumber != basicBlockNum++) @@ -687,74 +413,20 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, Changed |= rescheduleCanonically(IdempotentInstCount, MBB); LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); - std::vector Candidates = populateCandidates(MBB); - std::vector VisitedMIs; - llvm::copy(Candidates, std::back_inserter(VisitedMIs)); - - std::vector VRegs; - for (auto candidate : Candidates) { - VRegs.push_back(TypedVReg(RSE_NewCandidate)); - - std::queue RegQueue; - - // Here we walk the vreg operands of a non-root node along our walk. - // The root nodes are the original candidates (stores normally). - // These are normally not the root nodes (except for the case of copies to - // physical registers). - for (unsigned i = 1; i < candidate->getNumOperands(); i++) { - if (candidate->mayStore() || candidate->isBranch()) - break; - - MachineOperand &MO = candidate->getOperand(i); - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) - continue; - - LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); - RegQueue.push(TypedVReg(MO.getReg())); - } - - // Here we walk the root candidates. We start from the 0th operand because - // the root is normally a store to a vreg. - for (unsigned i = 0; i < candidate->getNumOperands(); i++) { - - if (!candidate->mayStore() && !candidate->isBranch()) - break; - - MachineOperand &MO = candidate->getOperand(i); - - // TODO: Do we want to only add vregs here? - if (!MO.isReg() && !MO.isFI()) - continue; - - LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); - - RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) - : TypedVReg(RSE_FrameIndex)); - } - - doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); - } - - // If we have populated no vregs to rename then bail. - // The rest of this function does the vreg remaping. - if (VRegs.size() == 0) - return Changed; - - auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC); - Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + Changed |= NVC.renameVRegs(MBB); // Here we renumber the def vregs for the idempotent instructions from the top // of the MachineBasicBlock so that they are named in the order that we sorted // them alphabetically. Eventually we wont need SkipVRegs because we will use // named vregs instead. if (IdempotentInstCount) - NVC.SkipVRegs(); + NVC.skipVRegs(); auto MII = MBB->begin(); for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { MachineInstr &MI = *MII++; Changed = true; - unsigned vRegToRename = MI.getOperand(0).getReg(); + Register vRegToRename = MI.getOperand(0).getReg(); auto Rename = NVC.createVirtualRegister(vRegToRename); std::vector RenameMOs; @@ -799,9 +471,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { << "\n\n================================================\n\n";); std::vector BBNames; - std::vector RenamedInOtherBB; - unsigned GapIdx = 0; unsigned BBNum = 0; bool Changed = false; @@ -809,8 +479,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); NamedVRegCursor NVC(MRI); for (auto MBB : RPOList) - Changed |= - runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx, NVC); + Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC); return Changed; } diff --git a/lib/CodeGen/MIRNamerPass.cpp b/lib/CodeGen/MIRNamerPass.cpp new file mode 100644 index 000000000000..9d719f3917ce --- /dev/null +++ b/lib/CodeGen/MIRNamerPass.cpp @@ -0,0 +1,77 @@ +//===----------------------- MIRNamer.cpp - MIR Namer ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to rename virtual register operands with the goal +// of making it easier to author easier to read tests for MIR. This pass reuses +// the vreg renamer used by MIRCanonicalizerPass. +// +// Basic Usage: +// +// llc -o - -run-pass mir-namer example.mir +// +//===----------------------------------------------------------------------===// + +#include "MIRVRegNamerUtils.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +namespace llvm { +extern char &MIRNamerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-namer" + +namespace { + +class MIRNamer : public MachineFunctionPass { +public: + static char ID; + MIRNamer() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Rename virtual register operands"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + bool Changed = false; + + if (MF.empty()) + return Changed; + + NamedVRegCursor NVC(MF.getRegInfo()); + + ReversePostOrderTraversal RPOT(&*MF.begin()); + for (auto &MBB : RPOT) + Changed |= NVC.renameVRegs(MBB); + + return Changed; + } +}; + +} // end anonymous namespace + +char MIRNamer::ID; + +char &llvm::MIRNamerID = MIRNamer::ID; + +INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false, + false) + +INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false, + false) diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 4899bd3f5811..ad5c617623f2 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -249,6 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("successors", MIToken::kw_successors) .Case("floatpred", MIToken::kw_floatpred) .Case("intpred", MIToken::kw_intpred) + .Case("shufflemask", MIToken::kw_shufflemask) .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) .Case("unknown-size", MIToken::kw_unknown_size) diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 0fe3f9f706db..200f9d026cc8 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -117,6 +117,7 @@ struct MIToken { kw_successors, kw_floatpred, kw_intpred, + kw_shufflemask, kw_pre_instr_symbol, kw_post_instr_symbol, kw_unknown_size, @@ -146,6 +147,7 @@ struct MIToken { IntegerLiteral, FloatingPointLiteral, HexLiteral, + VectorLiteral, VirtualRegister, ConstantPoolItem, JumpTableIndex, diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c0b800a0b870..6498acc9fa51 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -451,6 +451,7 @@ public: bool parseBlockAddressOperand(MachineOperand &Dest); bool parseIntrinsicOperand(MachineOperand &Dest); bool parsePredicateOperand(MachineOperand &Dest); + bool parseShuffleMaskOperand(MachineOperand &Dest); bool parseTargetIndexOperand(MachineOperand &Dest); bool parseCustomRegisterMaskOperand(MachineOperand &Dest); bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); @@ -640,7 +641,7 @@ bool MIParser::parseBasicBlockDefinition( return error(Loc, Twine("redefinition of machine basic block with id #") + Twine(ID)); if (Alignment) - MBB->setAlignment(Alignment); + MBB->setAlignment(Align(Alignment)); if (HasAddressTaken) MBB->setHasAddressTaken(); MBB->setIsEHPad(IsLandingPad); @@ -1078,7 +1079,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) { static std::string getRegisterName(const TargetRegisterInfo *TRI, unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg"); + assert(Register::isPhysicalRegister(Reg) && "expected phys reg"); return StringRef(TRI->getName(Reg)).lower(); } @@ -1408,11 +1409,11 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (Token.is(MIToken::dot)) { if (parseSubRegisterIndex(SubReg)) return true; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("subregister index expects a virtual register"); } if (Token.is(MIToken::colon)) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("register class specification expects a virtual register"); lex(); if (parseRegisterClassOrBank(*RegInfo)) @@ -1436,12 +1437,13 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) return error("inconsistent type for generic virtual register"); + MRI.setRegClassOrRegBank(Reg, static_cast(nullptr)); MRI.setType(Reg, Ty); } } } else if (consumeIfPresent(MIToken::lparen)) { // Virtual registers may have a tpe with GlobalISel. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("unexpected type on physical register"); LLT Ty; @@ -1454,8 +1456,9 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) return error("inconsistent type for generic virtual register"); + MRI.setRegClassOrRegBank(Reg, static_cast(nullptr)); MRI.setType(Reg, Ty); - } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (Register::isVirtualRegister(Reg)) { // Generic virtual registers must have a type. // If we end up here this means the type hasn't been specified and // this is bad! @@ -2285,6 +2288,49 @@ bool MIParser::parsePredicateOperand(MachineOperand &Dest) { return false; } +bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_shufflemask)); + + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax shufflemask(, ...)"); + + SmallVector ShufMask; + LLVMContext &Ctx = MF.getFunction().getContext(); + Type *I32Ty = Type::getInt32Ty(Ctx); + + bool AllZero = true; + bool AllUndef = true; + + do { + if (Token.is(MIToken::kw_undef)) { + ShufMask.push_back(UndefValue::get(I32Ty)); + AllZero = false; + } else if (Token.is(MIToken::IntegerLiteral)) { + AllUndef = false; + const APSInt &Int = Token.integerValue(); + if (!Int.isNullValue()) + AllZero = false; + ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue())); + } else + return error("expected integer constant"); + + lex(); + } while (consumeIfPresent(MIToken::comma)); + + if (expectAndConsume(MIToken::rparen)) + return error("shufflemask should be terminated by ')'."); + + if (AllZero || AllUndef) { + VectorType *VT = VectorType::get(I32Ty, ShufMask.size()); + Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT); + Dest = MachineOperand::CreateShuffleMask(C); + } else + Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask)); + + return false; +} + bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { assert(Token.is(MIToken::kw_target_index)); lex(); @@ -2432,6 +2478,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, case MIToken::kw_floatpred: case MIToken::kw_intpred: return parsePredicateOperand(Dest); + case MIToken::kw_shufflemask: + return parseShuffleMaskOperand(Dest); case MIToken::Error: return true; case MIToken::Identifier: diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index b242934def80..55fac93d8991 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -216,7 +216,7 @@ std::unique_ptr MIRParserImpl::parseIRModule() { return nullptr; // Create an empty module when the MIR file is empty. NoMIRDocuments = true; - return llvm::make_unique(Filename, Context); + return std::make_unique(Filename, Context); } std::unique_ptr M; @@ -236,7 +236,7 @@ std::unique_ptr MIRParserImpl::parseIRModule() { NoMIRDocuments = true; } else { // Create an new, empty module. - M = llvm::make_unique(Filename, Context); + M = std::make_unique(Filename, Context); NoLLVMIR = true; } return M; @@ -306,7 +306,7 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { static bool isSSA(const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg)) return false; } @@ -355,10 +355,10 @@ bool MIRParserImpl::initializeCallSiteInfo( if (MILoc.Offset >= CallB->size()) return error(Twine(MF.getName()) + Twine(" call instruction offset out of range.") + - "Unable to reference instruction at bb: " + + " Unable to reference instruction at bb: " + Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset)); - auto CallI = std::next(CallB->begin(), MILoc.Offset); - if (!CallI->isCall()) + auto CallI = std::next(CallB->instr_begin(), MILoc.Offset); + if (!CallI->isCall(MachineInstr::IgnoreBundle)) return error(Twine(MF.getName()) + Twine(" call site info should reference call " "instruction. Instruction at bb:") + @@ -393,7 +393,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, } if (YamlMF.Alignment) - MF.setAlignment(YamlMF.Alignment); + MF.setAlignment(Align(YamlMF.Alignment)); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasWinCFI(YamlMF.HasWinCFI); @@ -949,6 +949,6 @@ llvm::createMIRParser(std::unique_ptr Contents, "Can't read MIR with a Context that discards named Values"))); return nullptr; } - return llvm::make_unique( - llvm::make_unique(std::move(Contents), Filename, Context)); + return std::make_unique( + std::make_unique(std::move(Contents), Filename, Context)); } diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 0a95a0ced0f5..1a4e21ac06a9 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -197,7 +197,7 @@ void MIRPrinter::print(const MachineFunction &MF) { yaml::MachineFunction YamlMF; YamlMF.Name = MF.getName(); - YamlMF.Alignment = MF.getAlignment(); + YamlMF.Alignment = MF.getAlignment().value(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasWinCFI = MF.hasWinCFI(); @@ -290,7 +290,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, // Print the virtual register definitions. for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; if (RegInfo.getVRegName(Reg) != "") @@ -473,10 +473,11 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF, yaml::CallSiteInfo::MachineInstrLoc CallLocation; // Prepare instruction position. - MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator(); + MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator(); CallLocation.BlockNum = CallI->getParent()->getNumber(); // Get call instruction offset from the beginning of block. - CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI); + CallLocation.Offset = + std::distance(CallI->getParent()->instr_begin(), CallI); YmlCS.CallLocation = CallLocation; // Construct call arguments and theirs forwarding register info. for (auto ArgReg : CSInfo.second) { @@ -628,9 +629,9 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { OS << "landing-pad"; HasAttributes = true; } - if (MBB.getAlignment()) { + if (MBB.getAlignment() != Align::None()) { OS << (HasAttributes ? ", " : " ("); - OS << "align " << MBB.getAlignment(); + OS << "align " << MBB.getAlignment().value(); HasAttributes = true; } if (HasAttributes) @@ -842,7 +843,8 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, case MachineOperand::MO_CFIIndex: case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: - case MachineOperand::MO_BlockAddress: { + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ShuffleMask: { unsigned TiedOperandIdx = 0; if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx); diff --git a/lib/CodeGen/MIRVRegNamerUtils.cpp b/lib/CodeGen/MIRVRegNamerUtils.cpp new file mode 100644 index 000000000000..6629000f468f --- /dev/null +++ b/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -0,0 +1,348 @@ +//===---------- MIRVRegNamerUtils.cpp - MIR VReg Renaming Utilities -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MIRVRegNamerUtils.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "mir-vregnamer-utils" + +namespace { + +// TypedVReg and VRType are used to tell the renamer what to do at points in a +// sequence of values to be renamed. A TypedVReg can either contain +// an actual VReg, a FrameIndex, or it could just be a barrier for the next +// candidate (side-effecting instruction). This tells the renamer to increment +// to the next vreg name, or to skip modulo some skip-gap value. +enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; +class TypedVReg { + VRType Type; + Register Reg; + +public: + TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {} + TypedVReg(VRType Type) : Type(Type), Reg(~0U) { + assert(Type != RSE_Reg && "Expected a non-Register Type."); + } + + bool isReg() const { return Type == RSE_Reg; } + bool isFrameIndex() const { return Type == RSE_FrameIndex; } + bool isCandidate() const { return Type == RSE_NewCandidate; } + + VRType getType() const { return Type; } + Register getReg() const { + assert(this->isReg() && "Expected a virtual or physical Register."); + return Reg; + } +}; + +/// Here we find our candidates. What makes an interesting candidate? +/// A candidate for a canonicalization tree root is normally any kind of +/// instruction that causes side effects such as a store to memory or a copy to +/// a physical register or a return instruction. We use these as an expression +/// tree root that we walk in order to build a canonical walk which should +/// result in canonical vreg renaming. +std::vector populateCandidates(MachineBasicBlock *MBB) { + std::vector Candidates; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + MachineInstr *MI = &*II; + + bool DoesMISideEffect = false; + + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { + const Register Dst = MI->getOperand(0).getReg(); + DoesMISideEffect |= !Register::isVirtualRegister(Dst); + + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { + if (DoesMISideEffect) + break; + DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); + } + } + + if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) + continue; + + LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump();); + Candidates.push_back(MI); + } + + return Candidates; +} + +void doCandidateWalk(std::vector &VRegs, + std::queue &RegQueue, + std::vector &VisitedMIs, + const MachineBasicBlock *MBB) { + + const MachineFunction &MF = *MBB->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + while (!RegQueue.empty()) { + + auto TReg = RegQueue.front(); + RegQueue.pop(); + + if (TReg.isFrameIndex()) { + LLVM_DEBUG(dbgs() << "Popping frame index.\n";); + VRegs.push_back(TypedVReg(RSE_FrameIndex)); + continue; + } + + assert(TReg.isReg() && "Expected vreg or physreg."); + Register Reg = TReg.getReg(); + + if (Register::isVirtualRegister(Reg)) { + LLVM_DEBUG({ + dbgs() << "Popping vreg "; + MRI.def_begin(Reg)->dump(); + dbgs() << "\n"; + }); + + if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { + return TR.isReg() && TR.getReg() == Reg; + })) { + VRegs.push_back(TypedVReg(Reg)); + } + } else { + LLVM_DEBUG(dbgs() << "Popping physreg.\n";); + VRegs.push_back(TypedVReg(Reg)); + continue; + } + + for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { + MachineInstr *Def = RI->getParent(); + + if (Def->getParent() != MBB) + continue; + + if (llvm::any_of(VisitedMIs, + [&](const MachineInstr *VMI) { return Def == VMI; })) { + break; + } + + LLVM_DEBUG({ + dbgs() << "\n========================\n"; + dbgs() << "Visited MI: "; + Def->dump(); + dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; + dbgs() << "\n========================\n"; + }); + VisitedMIs.push_back(Def); + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + + MachineOperand &MO = Def->getOperand(I); + if (MO.isFI()) { + LLVM_DEBUG(dbgs() << "Pushing frame index.\n";); + RegQueue.push(TypedVReg(RSE_FrameIndex)); + } + + if (!MO.isReg()) + continue; + RegQueue.push(TypedVReg(MO.getReg())); + } + } + } +} + +std::map +getVRegRenameMap(const std::vector &VRegs, + const std::vector &renamedInOtherBB, + MachineRegisterInfo &MRI, NamedVRegCursor &NVC) { + std::map VRegRenameMap; + bool FirstCandidate = true; + + for (auto &vreg : VRegs) { + if (vreg.isFrameIndex()) { + // We skip one vreg for any frame index because there is a good chance + // (especially when comparing SelectionDAG to GlobalISel generated MIR) + // that in the other file we are just getting an incoming vreg that comes + // from a copy from a frame index. So it's safe to skip by one. + unsigned LastRenameReg = NVC.incrementVirtualVReg(); + (void)LastRenameReg; + LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); + continue; + } else if (vreg.isCandidate()) { + + // After the first candidate, for every subsequent candidate, we skip mod + // 10 registers so that the candidates are more likely to start at the + // same vreg number making it more likely that the canonical walk from the + // candidate insruction. We don't need to skip from the first candidate of + // the BasicBlock because we already skip ahead several vregs for each BB. + unsigned LastRenameReg = NVC.getVirtualVReg(); + if (FirstCandidate) + NVC.incrementVirtualVReg(LastRenameReg % 10); + FirstCandidate = false; + continue; + } else if (!Register::isVirtualRegister(vreg.getReg())) { + unsigned LastRenameReg = NVC.incrementVirtualVReg(); + (void)LastRenameReg; + LLVM_DEBUG({ + dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; + }); + continue; + } + + auto Reg = vreg.getReg(); + if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { + LLVM_DEBUG(dbgs() << "Vreg " << Reg + << " already renamed in other BB.\n";); + continue; + } + + auto Rename = NVC.createVirtualRegister(Reg); + + if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { + LLVM_DEBUG(dbgs() << "Mapping vreg ";); + if (MRI.reg_begin(Reg) != MRI.reg_end()) { + LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); + } else { + LLVM_DEBUG(dbgs() << Reg;); + } + LLVM_DEBUG(dbgs() << " to ";); + if (MRI.reg_begin(Rename) != MRI.reg_end()) { + LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); + } else { + LLVM_DEBUG(dbgs() << Rename;); + } + LLVM_DEBUG(dbgs() << "\n";); + + VRegRenameMap.insert(std::pair(Reg, Rename)); + } + } + + return VRegRenameMap; +} + +bool doVRegRenaming(std::vector &renamedInOtherBB, + const std::map &VRegRenameMap, + MachineRegisterInfo &MRI) { + bool Changed = false; + for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { + + auto VReg = I->first; + auto Rename = I->second; + + renamedInOtherBB.push_back(Rename); + + std::vector RenameMOs; + for (auto &MO : MRI.reg_operands(VReg)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + Changed = true; + MO->setReg(Rename); + + if (!MO->isDef()) + MO->setIsKill(false); + } + } + + return Changed; +} + +bool renameVRegs(MachineBasicBlock *MBB, + std::vector &renamedInOtherBB, + NamedVRegCursor &NVC) { + bool Changed = false; + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + std::vector Candidates = populateCandidates(MBB); + std::vector VisitedMIs; + llvm::copy(Candidates, std::back_inserter(VisitedMIs)); + + std::vector VRegs; + for (auto candidate : Candidates) { + VRegs.push_back(TypedVReg(RSE_NewCandidate)); + + std::queue RegQueue; + + // Here we walk the vreg operands of a non-root node along our walk. + // The root nodes are the original candidates (stores normally). + // These are normally not the root nodes (except for the case of copies to + // physical registers). + for (unsigned i = 1; i < candidate->getNumOperands(); i++) { + if (candidate->mayStore() || candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) + continue; + + LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); + RegQueue.push(TypedVReg(MO.getReg())); + } + + // Here we walk the root candidates. We start from the 0th operand because + // the root is normally a store to a vreg. + for (unsigned i = 0; i < candidate->getNumOperands(); i++) { + + if (!candidate->mayStore() && !candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + + // TODO: Do we want to only add vregs here? + if (!MO.isReg() && !MO.isFI()) + continue; + + LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); + + RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) + : TypedVReg(RSE_FrameIndex)); + } + + doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); + } + + // If we have populated no vregs to rename then bail. + // The rest of this function does the vreg remaping. + if (VRegs.size() == 0) + return Changed; + + auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC); + Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + return Changed; +} +} // anonymous namespace + +void NamedVRegCursor::skipVRegs() { + unsigned VRegGapIndex = 1; + if (!virtualVRegNumber) { + VRegGapIndex = 0; + virtualVRegNumber = MRI.createIncompleteVirtualRegister(); + } + const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize); + + unsigned I = virtualVRegNumber; + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; + + virtualVRegNumber = E; +} + +unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) { + if (!virtualVRegNumber) + skipVRegs(); + std::string S; + raw_string_ostream OS(S); + OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); + OS.flush(); + virtualVRegNumber++; + if (auto RC = MRI.getRegClassOrNull(VReg)) + return MRI.createVirtualRegister(RC, OS.str()); + return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str()); +} + +bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) { + return ::renameVRegs(MBB, RenamedInOtherBB, *this); +} diff --git a/lib/CodeGen/MIRVRegNamerUtils.h b/lib/CodeGen/MIRVRegNamerUtils.h new file mode 100644 index 000000000000..c5b52a968538 --- /dev/null +++ b/lib/CodeGen/MIRVRegNamerUtils.h @@ -0,0 +1,91 @@ + +//===------------ MIRVRegNamerUtils.h - MIR VReg Renaming Utilities -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The purpose of these utilities is to abstract out parts of the MIRCanon pass +// that are responsible for renaming virtual registers with the purpose of +// sharing code with a MIRVRegNamer pass that could be the analog of the +// opt -instnamer pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H +#define LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { + +/// NamedVRegCursor - The cursor is an object that keeps track of what the next +/// vreg name should be. It does book keeping to determine when to skip the +/// index value and by how much, or if the next vreg name should be an increment +/// from the previous. +class NamedVRegCursor { + MachineRegisterInfo &MRI; + + /// virtualVRegNumber - Book keeping of the last vreg position. + unsigned virtualVRegNumber; + + /// SkipGapSize - Used to calculate a modulo amount to skip by after every + /// sequence of instructions starting from a given side-effecting + /// MachineInstruction for a given MachineBasicBlock. The general idea is that + /// for a given program compiled with two different opt pipelines, there + /// shouldn't be greater than SkipGapSize difference in how many vregs are in + /// play between the two and for every def-use graph of vregs we rename we + /// will round up to the next SkipGapSize'th number so that we have a high + /// change of landing on the same name for two given matching side-effects + /// for the two compilation outcomes. + const unsigned SkipGapSize; + + /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs. + std::vector RenamedInOtherBB; + +public: + NamedVRegCursor() = delete; + /// 1000 for the SkipGapSize was a good heuristic at the time of the writing + /// of the MIRCanonicalizerPass. Adjust as needed. + NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000) + : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {} + + /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to + /// produce the next vreg name. + void skipVRegs(); + + unsigned getVirtualVReg() const { return virtualVRegNumber; } + + /// incrementVirtualVReg - This increments an index value that us used to + /// create a new vreg name. This is not a Register. + unsigned incrementVirtualVReg(unsigned incr = 1) { + virtualVRegNumber += incr; + return virtualVRegNumber; + } + + /// createVirtualRegister - Given an existing vreg, create a named vreg to + /// take its place. + unsigned createVirtualRegister(unsigned VReg); + + /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting + /// instructions, walk the def-use from each side-effecting root (in sorted + /// root order) and rename the encountered vregs in the def-use graph in a + /// canonical ordering. This method maintains book keeping for which vregs + /// were already renamed in RenamedInOtherBB. + // @return changed + bool renameVRegs(MachineBasicBlock *MBB); +}; + +} // namespace llvm + +#endif diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 4d29e883d879..854bef3aab05 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -39,6 +39,12 @@ using namespace llvm; #define DEBUG_TYPE "codegen" +static cl::opt PrintSlotIndexes( + "print-slotindexes", + cl::desc("When printing machine IR, annotate instructions and blocks with " + "SlotIndexes when available"), + cl::init(true), cl::Hidden); + MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) : BB(B), Number(-1), xParent(&MF) { Insts.Parent = this; @@ -291,7 +297,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, return; } - if (Indexes) + if (Indexes && PrintSlotIndexes) OS << Indexes->getMBBStartIdx(this) << '\t'; OS << "bb." << getNumber(); @@ -320,9 +326,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "landing-pad"; HasAttributes = true; } - if (getAlignment()) { + if (getAlignment() != Align::None()) { OS << (HasAttributes ? ", " : " ("); - OS << "align " << getAlignment(); + OS << "align " << Log2(getAlignment()); HasAttributes = true; } if (HasAttributes) @@ -402,7 +408,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, bool IsInBundle = false; for (const MachineInstr &MI : instrs()) { - if (Indexes) { + if (Indexes && PrintSlotIndexes) { if (Indexes->hasIndex(MI)) OS << Indexes->getInstructionIndex(MI); OS << '\t'; @@ -484,9 +490,9 @@ void MachineBasicBlock::sortUniqueLiveIns() { } unsigned -MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { +MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) { assert(getParent() && "MBB must be inserted in function"); - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg"); + assert(PhysReg.isPhysical() && "Expected physreg"); assert(RC && "Register class is required"); assert((isEHPad() || this == &getParent()->front()) && "Only the entry block and landing pads can have physreg live ins"); @@ -500,14 +506,14 @@ MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { if (LiveIn) for (;I != E && I->isCopy(); ++I) if (I->getOperand(1).getReg() == PhysReg) { - unsigned VirtReg = I->getOperand(0).getReg(); + Register VirtReg = I->getOperand(0).getReg(); if (!MRI.constrainRegClass(VirtReg, RC)) llvm_unreachable("Incompatible live-in register class."); return VirtReg; } // No luck, create a virtual register. - unsigned VirtReg = MRI.createVirtualRegister(RC); + Register VirtReg = MRI.createVirtualRegister(RC); BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg) .addReg(PhysReg, RegState::Kill); if (!LiveIn) @@ -772,7 +778,8 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) { while (!FromMBB->succ_empty()) { MachineBasicBlock *Succ = *FromMBB->succ_begin(); - // If probability list is empty it means we don't use it (disabled optimization). + // If probability list is empty it means we don't use it (disabled + // optimization). if (!FromMBB->Probs.empty()) { auto Prob = *FromMBB->Probs.begin(); addSuccessor(Succ, Prob); @@ -798,13 +805,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) { FromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. - for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), - ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) - for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { - MachineOperand &MO = MI->getOperand(i); - if (MO.getMBB() == FromMBB) - MO.setMBB(this); - } + Succ->replacePhiUsesWith(FromMBB, this); } normalizeSuccProbs(); } @@ -907,8 +908,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (!OI->isReg() || OI->getReg() == 0 || !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; - unsigned Reg = OI->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || + Register Reg = OI->getReg(); + if (Register::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(*MI)) { KilledRegs.push_back(Reg); LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI); @@ -928,7 +929,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (!OI->isReg() || OI->getReg() == 0) continue; - unsigned Reg = OI->getReg(); + Register Reg = OI->getReg(); if (!is_contained(UsedRegs, Reg)) UsedRegs.push_back(Reg); } @@ -979,13 +980,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, } } - // Fix PHI nodes in Succ so they refer to NMBB instead of this - for (MachineBasicBlock::instr_iterator - i = Succ->instr_begin(),e = Succ->instr_end(); - i != e && i->isPHI(); ++i) - for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) - if (i->getOperand(ni+1).getMBB() == this) - i->getOperand(ni+1).setMBB(NMBB); + // Fix PHI nodes in Succ so they refer to NMBB instead of this. + Succ->replacePhiUsesWith(this, NMBB); // Inherit live-ins from the successor for (const auto &LI : Succ->liveins()) @@ -1000,7 +996,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false)) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I); break; @@ -1033,7 +1029,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { if (I->getOperand(ni+1).getMBB() == NMBB) { MachineOperand &MO = I->getOperand(ni); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); PHISrcRegs.insert(Reg); if (MO.isUndef()) continue; @@ -1049,7 +1045,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, MachineRegisterInfo *MRI = &getParent()->getRegInfo(); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) continue; @@ -1217,6 +1213,16 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, replaceSuccessor(Old, New); } +void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + for (MachineInstr &MI : phis()) + for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { + MachineOperand &MO = MI.getOperand(i); + if (MO.getMBB() == Old) + MO.setMBB(New); + } +} + /// Various pieces of code can cause excess edges in the CFG to be inserted. If /// we have proven that MBB can only branch to DestA and DestB, remove any other /// MBB successors from the CFG. DestA and DestB can be null. diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 639b588766a1..ac19bc0bd8ea 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -79,16 +79,17 @@ STATISTIC(CondBranchTakenFreq, STATISTIC(UncondBranchTakenFreq, "Potential frequency of taking unconditional branches"); -static cl::opt AlignAllBlock("align-all-blocks", - cl::desc("Force the alignment of all " - "blocks in the function."), - cl::init(0), cl::Hidden); +static cl::opt AlignAllBlock( + "align-all-blocks", + cl::desc("Force the alignment of all blocks in the function in log2 format " + "(e.g 4 means align on 16B boundaries)."), + cl::init(0), cl::Hidden); static cl::opt AlignAllNonFallThruBlocks( "align-all-nofallthru-blocks", - cl::desc("Force the alignment of all " - "blocks that have no fall-through predecessors (i.e. don't add " - "nops that are executed)."), + cl::desc("Force the alignment of all blocks that have no fall-through " + "predecessors (i.e. don't add nops that are executed). In log2 " + "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden); // FIXME: Find a good default for this flag and remove the flag. @@ -2763,8 +2764,8 @@ void MachineBlockPlacement::alignBlocks() { if (!L) continue; - unsigned Align = TLI->getPrefLoopAlignment(L); - if (!Align) + const Align Align = TLI->getPrefLoopAlignment(L); + if (Align == 1) continue; // Don't care about loop alignment. // If the block is cold relative to the function entry don't waste space @@ -2981,7 +2982,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { F = &MF; MBPI = &getAnalysis(); - MBFI = llvm::make_unique( + MBFI = std::make_unique( getAnalysis()); MLI = &getAnalysis(); TII = MF.getSubtarget().getInstrInfo(); @@ -3038,8 +3039,9 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, TailMergeSize); + auto *MMIWP = getAnalysisIfAvailable(); if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable(), MLI, + MMIWP ? &MMIWP->getMMI() : nullptr, MLI, /*AfterPlacement=*/true)) { // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); @@ -3062,14 +3064,14 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (AlignAllBlock) // Align all of the blocks in the function to a specific alignment. for (MachineBasicBlock &MBB : MF) - MBB.setAlignment(AlignAllBlock); + MBB.setAlignment(Align(1ULL << AlignAllBlock)); else if (AlignAllNonFallThruBlocks) { // Align all of the blocks that have no fall-through predecessors to a // specific alignment. for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) { auto LayoutPred = std::prev(MBI); if (!LayoutPred->isSuccessor(&*MBI)) - MBI->setAlignment(AlignAllNonFallThruBlocks); + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); } } if (ViewBlockLayoutWithBFI != GVDT_None && diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 2df6d40d9293..d9bd32b2fbab 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -66,6 +67,7 @@ namespace { AliasAnalysis *AA; MachineDominatorTree *DT; MachineRegisterInfo *MRI; + MachineBlockFrequencyInfo *MBFI; public: static char ID; // Pass identification @@ -83,6 +85,8 @@ namespace { AU.addPreservedID(MachineLoopInfoID); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } void releaseMemory() override { @@ -133,6 +137,11 @@ namespace { bool isPRECandidate(MachineInstr *MI); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); + /// Heuristics to see if it's profitable to move common computations of MBB + /// and MBB1 to CandidateBB. + bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1); }; } // end anonymous namespace @@ -158,15 +167,15 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, for (MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI->isCopy()) continue; - unsigned SrcReg = DefMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = DefMI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) continue; if (DefMI->getOperand(0).getSubReg()) continue; @@ -189,14 +198,16 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); LLVM_DEBUG(dbgs() << "*** to: " << *MI); - // Update matching debug values. - DefMI->changeDebugValuesDefReg(SrcReg); - // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); // Coalesce single use copies. if (OnlyOneUse) { + // If (and only if) we've eliminated all uses of the copy, also + // copy-propagate to any debug-users of MI, or they'll be left using + // an undefined value. + DefMI->changeDebugValuesDefReg(SrcReg); + DefMI->eraseFromParent(); ++NumCoalesces; } @@ -271,10 +282,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) continue; // Reading either caller preserved or constant physregs is ok. if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI)) @@ -290,10 +301,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineOperand &MO = MOP.value(); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) continue; // Check against PhysRefs even if the def is "dead". if (PhysRefs.count(Reg)) @@ -367,8 +378,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + Register MOReg = MO.getReg(); + if (Register::isVirtualRegister(MOReg)) continue; if (PhysRefs.count(MOReg)) return false; @@ -424,8 +435,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // If CSReg is used at all uses of Reg, CSE should not increase register // pressure of CSReg. bool MayIncreasePressure = true; - if (TargetRegisterInfo::isVirtualRegister(CSReg) && - TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) { MayIncreasePressure = false; SmallPtrSet CSUses; for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { @@ -453,8 +463,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // of the redundant computation are copies, do not cse. bool HasVRegUse = false; for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.isUse() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) { HasVRegUse = true; break; } @@ -586,8 +595,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned OldReg = MO.getReg(); - unsigned NewReg = CSMI->getOperand(i).getReg(); + Register OldReg = MO.getReg(); + Register NewReg = CSMI->getOperand(i).getReg(); // Go through implicit defs of CSMI and MI, if a def is not dead at MI, // we should make sure it is not dead at CSMI. @@ -604,8 +613,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { continue; } - assert(TargetRegisterInfo::isVirtualRegister(OldReg) && - TargetRegisterInfo::isVirtualRegister(NewReg) && + assert(Register::isVirtualRegister(OldReg) && + Register::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) { @@ -769,11 +778,11 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) { return false; for (auto def : MI->defs()) - if (!TRI->isVirtualRegister(def.getReg())) + if (!Register::isVirtualRegister(def.getReg())) return false; for (auto use : MI->uses()) - if (use.isReg() && !TRI->isVirtualRegister(use.getReg())) + if (use.isReg() && !Register::isVirtualRegister(use.getReg())) return false; return true; @@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, if (!CMBB->isLegalToHoistInto()) continue; + if (!isProfitableToHoistInto(CMBB, MBB, MBB1)) + continue; + // Two instrs are partial redundant if their basic blocks are reachable // from one to another but one doesn't dominate another. if (CMBB != MBB1) { @@ -812,8 +824,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, assert(MI->getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); - unsigned VReg = MI->getOperand(0).getReg(); - unsigned NewReg = MRI->cloneVirtualRegister(VReg); + Register VReg = MI->getOperand(0).getReg(); + Register NewReg = MRI->cloneVirtualRegister(VReg); if (!isProfitableToCSE(NewReg, VReg, CMBB, MI)) continue; MachineInstr &NewMI = @@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { return Changed; } +bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1) { + if (CandidateBB->getParent()->getFunction().hasMinSize()) + return true; + assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); + assert(DT->dominates(CandidateBB, MBB1) && + "CandidateBB should dominate MBB1"); + return MBFI->getBlockFreq(CandidateBB) <= + MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); +} + bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis().getAAResults(); DT = &getAnalysis(); + MBFI = &getAnalysis(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); bool ChangedPRE, ChangedCSE; ChangedPRE = PerformSimplePRE(DT); diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 0584ec0bd2b3..e9f462fd1b37 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -137,7 +137,7 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { MachineInstr *DefInstr = nullptr; // We need a virtual register definition. - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) DefInstr = MRI->getUniqueVRegDef(MO.getReg()); // PHI's have no depth etc. if (DefInstr && DefInstr->isPHI()) @@ -168,7 +168,7 @@ MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, unsigned IDepth = 0; for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) continue; if (!MO.isUse()) continue; @@ -223,7 +223,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, for (const MachineOperand &MO : NewRoot->operands()) { // Check for virtual register operand. - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) continue; if (!MO.isDef()) continue; diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 9fc12ac89e12..ebe76e31dca9 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -119,8 +119,8 @@ public: void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) { assert(MI->isCopy() && "Tracking non-copy?"); - unsigned Def = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); // Remember Def is defined by the copy. for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) @@ -163,8 +163,8 @@ public: // Check that the available copy isn't clobbered by any regmasks between // itself and the destination. - unsigned AvailSrc = AvailCopy->getOperand(1).getReg(); - unsigned AvailDef = AvailCopy->getOperand(0).getReg(); + Register AvailSrc = AvailCopy->getOperand(1).getReg(); + Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getIterator(), DestCopy.getIterator())) for (const MachineOperand &MO : MI.operands()) @@ -205,8 +205,11 @@ public: } private: + typedef enum { DebugUse = false, RegularUse = true } DebugType; + void ClobberRegister(unsigned Reg); - void ReadRegister(unsigned Reg); + void ReadRegister(unsigned Reg, MachineInstr &Reader, + DebugType DT); void CopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); void forwardUses(MachineInstr &MI); @@ -217,6 +220,9 @@ private: /// Candidates for deletion. SmallSetVector MaybeDeadCopies; + /// Multimap tracking debug users in current BB + DenseMap> CopyDbgUsers; + CopyTracker Tracker; bool Changed; @@ -231,13 +237,19 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) -void MachineCopyPropagation::ReadRegister(unsigned Reg) { +void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader, + DebugType DT) { // If 'Reg' is defined by a copy, the copy is no longer a candidate - // for elimination. + // for elimination. If a copy is "read" by a debug user, record the user + // for propagation. for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) { - LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump()); - MaybeDeadCopies.remove(Copy); + if (DT == RegularUse) { + LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump()); + MaybeDeadCopies.remove(Copy); + } else { + CopyDbgUsers[Copy].push_back(&Reader); + } } } } @@ -250,8 +262,8 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg) { /// isNopCopy("ecx = COPY eax", AH, CL) == false static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src, unsigned Def, const TargetRegisterInfo *TRI) { - unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg(); - unsigned PreviousDef = PreviousCopy.getOperand(0).getReg(); + Register PreviousSrc = PreviousCopy.getOperand(1).getReg(); + Register PreviousDef = PreviousCopy.getOperand(0).getReg(); if (Src == PreviousSrc) { assert(Def == PreviousDef); return true; @@ -288,7 +300,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, // Copy was redundantly redefining either Src or Def. Remove earlier kill // flags between Copy and PrevCopy because the value will be reused now. assert(Copy.isCopy()); - unsigned CopyDef = Copy.getOperand(0).getReg(); + Register CopyDef = Copy.getOperand(0).getReg(); assert(CopyDef == Src || CopyDef == Def); for (MachineInstr &MI : make_range(PrevCopy->getIterator(), Copy.getIterator())) @@ -307,7 +319,7 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - unsigned CopySrcReg = Copy.getOperand(1).getReg(); + Register CopySrcReg = Copy.getOperand(1).getReg(); // If the new register meets the opcode register constraints, then allow // forwarding. @@ -398,9 +410,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!Copy) continue; - unsigned CopyDstReg = Copy->getOperand(0).getReg(); + Register CopyDstReg = Copy->getOperand(0).getReg(); const MachineOperand &CopySrc = Copy->getOperand(1); - unsigned CopySrcReg = CopySrc.getReg(); + Register CopySrcReg = CopySrc.getReg(); // FIXME: Don't handle partial uses of wider COPYs yet. if (MOUse.getReg() != CopyDstReg) { @@ -456,11 +468,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Analyze copies (which don't overlap themselves). if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(), MI->getOperand(1).getReg())) { - unsigned Def = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); - assert(!TargetRegisterInfo::isVirtualRegister(Def) && - !TargetRegisterInfo::isVirtualRegister(Src) && + assert(!Register::isVirtualRegister(Def) && + !Register::isVirtualRegister(Src) && "MachineCopyPropagation should be run after register allocation!"); // The two copies cancel out and the source of the first copy @@ -488,14 +500,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If Src is defined by a previous copy, the previous copy cannot be // eliminated. - ReadRegister(Src); + ReadRegister(Src, *MI, RegularUse); for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - ReadRegister(Reg); + ReadRegister(Reg, *MI, RegularUse); } LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); @@ -515,7 +527,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; Tracker.clobberRegister(Reg, *TRI); @@ -529,12 +541,12 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Clobber any earlyclobber regs first. for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isEarlyClobber()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If we have a tied earlyclobber, that means it is also read by this // instruction, so we need to make sure we don't remove it as dead // later. if (MO.isTied()) - ReadRegister(Reg); + ReadRegister(Reg, *MI, RegularUse); Tracker.clobberRegister(Reg, *TRI); } @@ -548,18 +560,18 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { RegMask = &MO; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(!TargetRegisterInfo::isVirtualRegister(Reg) && + assert(!Register::isVirtualRegister(Reg) && "MachineCopyPropagation should be run after register allocation!"); if (MO.isDef() && !MO.isEarlyClobber()) { Defs.push_back(Reg); continue; - } else if (!MO.isDebug() && MO.readsReg()) - ReadRegister(Reg); + } else if (MO.readsReg()) + ReadRegister(Reg, *MI, MO.isDebug() ? DebugUse : RegularUse); } // The instruction has a register mask operand which means that it clobbers @@ -571,7 +583,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.begin(); DI != MaybeDeadCopies.end();) { MachineInstr *MaybeDead = *DI; - unsigned Reg = MaybeDead->getOperand(0).getReg(); + Register Reg = MaybeDead->getOperand(0).getReg(); assert(!MRI->isReserved(Reg)); if (!RegMask->clobbersPhysReg(Reg)) { @@ -609,9 +621,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDead->dump()); assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); - // Update matching debug values. + // Update matching debug values, if any. assert(MaybeDead->isCopy()); - MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg()); + unsigned SrcReg = MaybeDead->getOperand(1).getReg(); + MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]); MaybeDead->eraseFromParent(); Changed = true; @@ -620,6 +633,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } MaybeDeadCopies.clear(); + CopyDbgUsers.clear(); Tracker.clear(); } diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 1dfba8638c22..706c706d7527 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -18,12 +18,15 @@ using namespace llvm; +namespace llvm { // Always verify dominfo if expensive checking is enabled. #ifdef EXPENSIVE_CHECKS -static bool VerifyMachineDomInfo = true; +bool VerifyMachineDomInfo = true; #else -static bool VerifyMachineDomInfo = false; +bool VerifyMachineDomInfo = false; #endif +} // namespace llvm + static cl::opt VerifyMachineDomInfoX( "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden, cl::desc("Verify machine dominator info (time consuming)")); @@ -64,21 +67,11 @@ void MachineDominatorTree::releaseMemory() { } void MachineDominatorTree::verifyAnalysis() const { - if (DT && VerifyMachineDomInfo) { - MachineFunction &F = *getRoot()->getParent(); - - DomTreeBase OtherDT; - OtherDT.recalculate(F); - if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || - DT->compare(OtherDT)) { - errs() << "MachineDominatorTree for function " << F.getName() - << " is not up to date!\nComputed:\n"; - DT->print(errs()); - errs() << "\nActual:\n"; - OtherDT.print(errs()); + if (DT && VerifyMachineDomInfo) + if (!DT->verify(DomTreeT::VerificationLevel::Basic)) { + errs() << "MachineDominatorTree verification failed\n"; abort(); } - } } void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index bae3a4333bda..604f5145b1a0 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -28,25 +28,26 @@ using namespace llvm; -void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { +void MachineFrameInfo::ensureMaxAlignment(Align Alignment) { if (!StackRealignable) - assert(Align <= StackAlignment && - "For targets without stack realignment, Align is out of limit!"); - if (MaxAlignment < Align) MaxAlignment = Align; + assert(Alignment <= StackAlignment && + "For targets without stack realignment, Alignment is out of limit!"); + if (MaxAlignment < Alignment) + MaxAlignment = Alignment; } /// Clamp the alignment if requested and emit a warning. -static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, - unsigned StackAlign) { - if (!ShouldClamp || Align <= StackAlign) - return Align; - LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Align - << " exceeds the stack alignment " << StackAlign +static inline Align clampStackAlignment(bool ShouldClamp, Align Alignment, + Align StackAlignment) { + if (!ShouldClamp || Alignment <= StackAlignment) + return Alignment; + LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Alignment.value() + << " exceeds the stack alignment " << StackAlignment.value() << " when stack realignment is off" << '\n'); - return StackAlign; + return StackAlignment; } -int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, +int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment, bool IsSpillSlot, const AllocaInst *Alloca, uint8_t StackID) { @@ -61,8 +62,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, return Index; } -int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, - unsigned Alignment) { +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, Align Alignment) { Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -70,7 +70,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, return Index; } -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, +int MachineFrameInfo::CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); @@ -88,7 +88,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. Note that unlike the non-fixed case, if the // stack needs realignment, we can't assume that the stack will in fact be // aligned. - unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align Alignment = + commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, @@ -100,7 +101,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable) { - unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align Alignment = + commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, @@ -232,7 +234,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ OS << "variable sized"; else OS << "size=" << SO.Size; - OS << ", align=" << SO.Alignment; + OS << ", align=" << SO.Alignment.value(); if (i < NumFixedObjects) OS << ", fixed"; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 4df5ce2dcedc..7d2ee230ca9f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -78,10 +78,11 @@ using namespace llvm; #define DEBUG_TYPE "codegen" -static cl::opt -AlignAllFunctions("align-all-functions", - cl::desc("Force the alignment of all functions."), - cl::init(0), cl::Hidden); +static cl::opt AlignAllFunctions( + "align-all-functions", + cl::desc("Force the alignment of all functions in log2 format (e.g. 4 " + "means align on 16B boundaries)."), + cl::init(0), cl::Hidden); static const char *getPropertyName(MachineFunctionProperties::Property Prop) { using P = MachineFunctionProperties::Property; @@ -181,7 +182,7 @@ void MachineFunction::init() { STI->getTargetLowering()->getPrefFunctionAlignment()); if (AlignAllFunctions) - Alignment = AlignAllFunctions; + Alignment = Align(1ULL << AlignAllFunctions); JumpTableInfo = nullptr; @@ -200,7 +201,7 @@ void MachineFunction::init() { "Target-incompatible DataLayout attached\n"); PSVManager = - llvm::make_unique(*(getSubtarget(). + std::make_unique(*(getSubtarget(). getInstrInfo())); } @@ -823,30 +824,47 @@ try_next:; return FilterID; } -void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) { +void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, + const MDNode *MD) { MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true); MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true); I->setPreInstrSymbol(*this, BeginLabel); I->setPostInstrSymbol(*this, EndLabel); - DIType *DI = dyn_cast(MD); + const DIType *DI = dyn_cast(MD); CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI)); } -void MachineFunction::updateCallSiteInfo(const MachineInstr *Old, - const MachineInstr *New) { - if (!Target.Options.EnableDebugEntryValues || Old == New) - return; +void MachineFunction::moveCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New) { + assert(New->isCall() && "Call site info refers only to call instructions!"); - assert(Old->isCall() && (!New || New->isCall()) && - "Call site info referes only to call instructions!"); - CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old); + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old); if (CSIt == CallSitesInfo.end()) return; + CallSiteInfo CSInfo = std::move(CSIt->second); CallSitesInfo.erase(CSIt); - if (New) - CallSitesInfo[New] = CSInfo; + CallSitesInfo[New] = CSInfo; +} + +void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) { + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(MI); + if (CSIt == CallSitesInfo.end()) + return; + CallSitesInfo.erase(CSIt); +} + +void MachineFunction::copyCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New) { + assert(New->isCall() && "Call site info refers only to call instructions!"); + + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old); + if (CSIt == CallSitesInfo.end()) + return; + + CallSiteInfo CSInfo = CSIt->second; + CallSitesInfo[New] = CSInfo; } /// \} @@ -881,13 +899,13 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { // alignment. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: - return TD.getPointerABIAlignment(0); + return TD.getPointerABIAlignment(0).value(); case MachineJumpTableInfo::EK_GPRel64BlockAddress: - return TD.getABIIntegerTypeAlignment(64); + return TD.getABIIntegerTypeAlignment(64).value(); case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: - return TD.getABIIntegerTypeAlignment(32); + return TD.getABIIntegerTypeAlignment(32).value(); case MachineJumpTableInfo::EK_Inline: return 1; } diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 0da4cf3fc90c..03149aa7db4a 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -41,7 +41,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) { if (F.hasAvailableExternallyLinkage()) return false; - MachineModuleInfo &MMI = getAnalysis(); + MachineModuleInfo &MMI = getAnalysis().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(F); MachineFunctionProperties &MFProps = MF.getProperties(); @@ -101,8 +101,8 @@ bool MachineFunctionPass::runOnFunction(Function &F) { } void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); // MachineFunctionPass preserves all LLVM IR passes, but there's no // high-level way to express this. Instead, just list a bunch of diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e5c398a2d10c..fec20b2b1a05 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -636,8 +636,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, if (Check == IgnoreDefs) continue; else if (Check == IgnoreVRegDefs) { - if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) || - !TargetRegisterInfo::isVirtualRegister(OMO.getReg())) + if (!Register::isVirtualRegister(MO.getReg()) || + !Register::isVirtualRegister(OMO.getReg())) if (!MO.isIdenticalTo(OMO)) return false; } else { @@ -692,8 +692,8 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Reg.isVirtual()) continue; MRI.markUsesInDebugValueAsUndef(Reg); } @@ -832,6 +832,10 @@ const DIExpression *MachineInstr::getDebugExpression() const { return cast(getOperand(3).getMetadata()); } +bool MachineInstr::isDebugEntryValue() const { + return isDebugValue() && getDebugExpression()->isEntryValue(); +} + const TargetRegisterClass* MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, @@ -873,7 +877,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, } const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( - unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, + Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ExploreBundle) const { // Check every operands inside the bundle if we have // been asked to. @@ -890,7 +894,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( } const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl( - unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { assert(CurRC && "Invalid initial register class"); // Check if Reg is constrained by some of its use/def from MI. @@ -933,7 +937,7 @@ unsigned MachineInstr::getBundleSize() const { /// Returns true if the MachineInstr has an implicit-use operand of exactly /// the given register (not considering sub/super-registers). -bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const { +bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg) @@ -946,12 +950,12 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const { /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. int MachineInstr::findRegisterUseOperandIdx( - unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const { + Register Reg, bool isKill, const TargetRegisterInfo *TRI) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg))) @@ -965,7 +969,7 @@ int MachineInstr::findRegisterUseOperandIdx( /// indicating if this instruction reads or writes Reg. This also considers /// partial defines. std::pair -MachineInstr::readsWritesVirtualRegister(unsigned Reg, +MachineInstr::readsWritesVirtualRegister(Register Reg, SmallVectorImpl *Ops) const { bool PartDef = false; // Partial redefine. bool FullDef = false; // Full define. @@ -994,9 +998,9 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg, /// that are not dead are skipped. If TargetRegisterInfo is non-null, then it /// also checks if there is a def of a super-register. int -MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, +MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap, const TargetRegisterInfo *TRI) const { - bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); + bool isPhys = Register::isPhysicalRegister(Reg); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); // Accept regmask operands when Overlap is set. @@ -1005,10 +1009,9 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, return i; if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); bool Found = (MOReg == Reg); - if (!Found && TRI && isPhys && - TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (!Found && TRI && isPhys && Register::isPhysicalRegister(MOReg)) { if (Overlap) Found = TRI->regsOverlap(MOReg, Reg); else @@ -1142,10 +1145,10 @@ void MachineInstr::clearKillInfo() { } } -void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg, +void MachineInstr::substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (Register::isPhysicalRegister(ToReg)) { if (SubIdx) ToReg = RegInfo.getSubReg(ToReg, SubIdx); for (MachineOperand &MO : operands()) { @@ -1165,7 +1168,7 @@ void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg, /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. -bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { +bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. // // Treat volatile loads as stores. This is not strictly necessary for @@ -1194,7 +1197,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { return true; } -bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other, +bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const { const MachineFunction *MF = getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -1206,7 +1209,7 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other, return false; // Let the target decide if memory accesses cannot possibly overlap. - if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) + if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) return false; // FIXME: Need to handle multiple memory operands to support all targets. @@ -1312,7 +1315,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { /// isDereferenceableInvariantLoad - Return true if this instruction will never /// trap and is loading from a location whose value is invariant across a run of /// this function. -bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const { +bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. if (!mayLoad()) return false; @@ -1364,7 +1367,7 @@ unsigned MachineInstr::isConstantValuePHI() const { assert(getNumOperands() >= 3 && "It's illegal to have a PHI without source operands"); - unsigned Reg = getOperand(1).getReg(); + Register Reg = getOperand(1).getReg(); for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) if (getOperand(i).getReg() != Reg) return 0; @@ -1726,7 +1729,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, MFI = &MF->getFrameInfo(); Context = &MF->getFunction().getContext(); } else { - CtxPtr = llvm::make_unique(); + CtxPtr = std::make_unique(); Context = CtxPtr.get(); } @@ -1780,10 +1783,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << '\n'; } -bool MachineInstr::addRegisterKilled(unsigned IncomingReg, +bool MachineInstr::addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool isPhysReg = Register::isPhysicalRegister(IncomingReg); bool hasAliases = isPhysReg && MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); bool Found = false; @@ -1799,7 +1802,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, if (MO.isDebug()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -1814,8 +1817,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, MO.setIsKill(); Found = true; } - } else if (hasAliases && MO.isKill() && - TargetRegisterInfo::isPhysicalRegister(Reg)) { + } else if (hasAliases && MO.isKill() && Register::isPhysicalRegister(Reg)) { // A super-register kill already exists. if (RegInfo->isSuperRegister(IncomingReg, Reg)) return true; @@ -1847,23 +1849,23 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, return Found; } -void MachineInstr::clearRegisterKills(unsigned Reg, +void MachineInstr::clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo) { - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + if (!Register::isPhysicalRegister(Reg)) RegInfo = nullptr; for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; - unsigned OpReg = MO.getReg(); + Register OpReg = MO.getReg(); if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg) MO.setIsKill(false); } } -bool MachineInstr::addRegisterDead(unsigned Reg, +bool MachineInstr::addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg); + bool isPhysReg = Register::isPhysicalRegister(Reg); bool hasAliases = isPhysReg && MCRegAliasIterator(Reg, RegInfo, false).isValid(); bool Found = false; @@ -1872,7 +1874,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; @@ -1880,7 +1882,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, MO.setIsDead(); Found = true; } else if (hasAliases && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Register::isPhysicalRegister(MOReg)) { // There exists a super-register that's marked dead. if (RegInfo->isSuperRegister(Reg, MOReg)) return true; @@ -1913,7 +1915,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, return true; } -void MachineInstr::clearRegisterDeads(unsigned Reg) { +void MachineInstr::clearRegisterDeads(Register Reg) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) continue; @@ -1921,7 +1923,7 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) { } } -void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { +void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) continue; @@ -1929,9 +1931,9 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { } } -void MachineInstr::addRegisterDefined(unsigned Reg, +void MachineInstr::addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo); if (MO) return; @@ -1947,7 +1949,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg, true /*IsImp*/)); } -void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, +void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, const TargetRegisterInfo &TRI) { bool HasRegMask = false; for (MachineOperand &MO : operands()) { @@ -1956,18 +1958,19 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, continue; } if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg.isPhysical()) + continue; // If there are no uses, including partial uses, the def is dead. if (llvm::none_of(UsedRegs, - [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + [&](MCRegister Use) { return TRI.regsOverlap(Use, Reg); })) MO.setIsDead(); } // This is a call with a register mask operand. // Mask clobbers are always dead, so add defs for the non-dead defines. if (HasRegMask) - for (ArrayRef::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + for (ArrayRef::iterator I = UsedRegs.begin(), E = UsedRegs.end(); I != E; ++I) addRegisterDefined(*I, &TRI); } @@ -1979,8 +1982,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.isDef() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. HashComponents.push_back(hash_value(MO)); @@ -2012,7 +2014,7 @@ void MachineInstr::emitError(StringRef Msg) const { MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr) { assert(isa(Variable) && "not a variable"); assert(cast(Expr)->isValid() && "not an expression"); @@ -2048,7 +2050,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - bool IsIndirect, unsigned Reg, + bool IsIndirect, Register Reg, const MDNode *Variable, const MDNode *Expr) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr); @@ -2118,10 +2120,24 @@ void MachineInstr::collectDebugValues( } } -void MachineInstr::changeDebugValuesDefReg(unsigned Reg) { +void MachineInstr::changeDebugValuesDefReg(Register Reg) { // Collect matching debug values. SmallVector DbgValues; - collectDebugValues(DbgValues); + + if (!getOperand(0).isReg()) + return; + + unsigned DefReg = getOperand(0).getReg(); + auto *MRI = getRegInfo(); + for (auto &MO : MRI->use_operands(DefReg)) { + auto *DI = MO.getParent(); + if (!DI->isDebugValue()) + continue; + if (DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() == DefReg){ + DbgValues.push_back(DI); + } + } // Propagate Reg to debug value instructions. for (auto *DBI : DbgValues) diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 32e266e9401e..feb849ced353 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -154,10 +154,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, continue; } - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (LocalDefSet.count(Reg)) { MO.setIsInternalRead(); if (MO.isKill()) @@ -177,7 +177,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, for (unsigned i = 0, e = Defs.size(); i != e; ++i) { MachineOperand &MO = *Defs[i]; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -194,7 +194,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, DeadDefSet.erase(Reg); } - if (!MO.isDead()) { + if (!MO.isDead() && Register::isPhysicalRegister(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; if (LocalDefSet.insert(SubReg).second) @@ -316,7 +316,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, bool AllDefsDead = true; PhysRegInfo PRI = {false, false, false, false, false, false, false, false}; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(Register::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); for (; isValid(); ++*this) { MachineOperand &MO = deref(); @@ -329,8 +329,8 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); - if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) + Register MOReg = MO.getReg(); + if (!MOReg || !Register::isPhysicalRegister(MOReg)) continue; if (!TRI->regsOverlap(MOReg, Reg)) diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 1107e609c258..6a898ff6ef88 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -153,7 +153,6 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -424,10 +423,10 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(Register::isPhysicalRegister(Reg) && "Not expecting virtual register!"); if (!MO.isDef()) { @@ -526,7 +525,7 @@ void MachineLICMBase::HoistRegionPostRA() { for (const MachineOperand &MO : TI->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) @@ -554,7 +553,7 @@ void MachineLICMBase::HoistRegionPostRA() { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously @@ -852,8 +851,8 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; // FIXME: It seems bad to use RegSeen only for some of these calculations. @@ -922,12 +921,12 @@ static bool isInvariantStore(const MachineInstr &MI, // Check that all register operands are caller-preserved physical registers. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If operand is a virtual register, check if it comes from a copy of a // physical register. - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) Reg = TRI->lookThruCopyLike(MO.getReg(), MRI); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return false; if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF())) return false; @@ -955,17 +954,17 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineFunction *MF = MI.getMF(); // Check that we are copying a constant physical register. - unsigned CopySrcReg = MI.getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + Register CopySrcReg = MI.getOperand(1).getReg(); + if (Register::isVirtualRegister(CopySrcReg)) return false; if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF)) return false; - unsigned CopyDstReg = MI.getOperand(0).getReg(); + Register CopyDstReg = MI.getOperand(0).getReg(); // Check if any of the uses of the copy are invariant stores. - assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) && - "copy dst is not a virtual reg"); + assert(Register::isVirtualRegister(CopyDstReg) && + "copy dst is not a virtual reg"); for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI)) @@ -1010,11 +1009,11 @@ bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Don't hoist an instruction that uses or defines a physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -1061,8 +1060,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { // A PHI may cause a copy to be inserted. @@ -1104,7 +1103,7 @@ bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, const MachineOperand &MO = UseMI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MOReg != Reg) continue; @@ -1132,8 +1131,8 @@ bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const { if (!DefMO.isReg() || !DefMO.isDef()) continue; --NumDefs; - unsigned Reg = DefMO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = DefMO.getReg(); + if (Register::isPhysicalRegister(Reg)) continue; if (!TII->hasLowDefLatency(SchedModel, MI, i)) @@ -1225,8 +1224,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI); @@ -1304,7 +1303,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { MachineFunction &MF = *MI->getMF(); const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. - unsigned Reg = MRI->createVirtualRegister(RC); + Register Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg, @@ -1378,20 +1377,20 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI, // Physical registers may not differ here. assert((!MO.isReg() || MO.getReg() == 0 || - !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + !Register::isPhysicalRegister(MO.getReg()) || MO.getReg() == Dup->getOperand(i).getReg()) && "Instructions with different phys regs are not identical!"); if (MO.isReg() && MO.isDef() && - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + !Register::isPhysicalRegister(MO.getReg())) Defs.push_back(i); } SmallVector OrigRCs; for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Idx = Defs[i]; - unsigned Reg = MI->getOperand(Idx).getReg(); - unsigned DupReg = Dup->getOperand(Idx).getReg(); + Register Reg = MI->getOperand(Idx).getReg(); + Register DupReg = Dup->getOperand(Idx).getReg(); OrigRCs.push_back(MRI->getRegClass(DupReg)); if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { @@ -1403,8 +1402,8 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI, } for (unsigned Idx : Defs) { - unsigned Reg = MI->getOperand(Idx).getReg(); - unsigned DupReg = Dup->getOperand(Idx).getReg(); + Register Reg = MI->getOperand(Idx).getReg(); + Register DupReg = Dup->getOperand(Idx).getReg(); MRI->replaceRegWith(Reg, DupReg); MRI->clearKillFlags(DupReg); } diff --git a/lib/CodeGen/MachineLoopUtils.cpp b/lib/CodeGen/MachineLoopUtils.cpp new file mode 100644 index 000000000000..e074b76082f0 --- /dev/null +++ b/lib/CodeGen/MachineLoopUtils.cpp @@ -0,0 +1,132 @@ +//=- MachineLoopUtils.cpp - Functions for manipulating loops ----------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +using namespace llvm; + +namespace { +// MI's parent and BB are clones of each other. Find the equivalent copy of MI +// in BB. +MachineInstr &findEquivalentInstruction(MachineInstr &MI, + MachineBasicBlock *BB) { + MachineBasicBlock *PB = MI.getParent(); + unsigned Offset = std::distance(PB->instr_begin(), MachineBasicBlock::instr_iterator(MI)); + return *std::next(BB->instr_begin(), Offset); +} +} // namespace + +MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, + MachineBasicBlock *Loop, + MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { + MachineFunction &MF = *Loop->getParent(); + MachineBasicBlock *Preheader = *Loop->pred_begin(); + if (Preheader == Loop) + Preheader = *std::next(Loop->pred_begin()); + MachineBasicBlock *Exit = *Loop->succ_begin(); + if (Exit == Loop) + Exit = *std::next(Loop->succ_begin()); + + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(Loop->getBasicBlock()); + if (Direction == LPD_Front) + MF.insert(Loop->getIterator(), NewBB); + else + MF.insert(std::next(Loop->getIterator()), NewBB); + + // FIXME: Add DenseMapInfo trait for Register so we can use it as a key. + DenseMap Remaps; + auto InsertPt = NewBB->end(); + for (MachineInstr &MI : *Loop) { + MachineInstr *NewMI = MF.CloneMachineInstr(&MI); + NewBB->insert(InsertPt, NewMI); + for (MachineOperand &MO : NewMI->defs()) { + Register OrigR = MO.getReg(); + if (OrigR.isPhysical()) + continue; + Register &R = Remaps[OrigR]; + R = MRI.createVirtualRegister(MRI.getRegClass(OrigR)); + MO.setReg(R); + + if (Direction == LPD_Back) { + // Replace all uses outside the original loop with the new register. + // FIXME: is the use_iterator stable enough to mutate register uses + // while iterating? + SmallVector Uses; + for (auto &Use : MRI.use_operands(OrigR)) + if (Use.getParent()->getParent() != Loop) + Uses.push_back(&Use); + for (auto *Use : Uses) { + MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg())); + Use->setReg(R); + } + } + } + } + + for (auto I = NewBB->getFirstNonPHI(); I != NewBB->end(); ++I) + for (MachineOperand &MO : I->uses()) + if (MO.isReg() && Remaps.count(MO.getReg())) + MO.setReg(Remaps[MO.getReg()]); + + for (auto I = NewBB->begin(); I->isPHI(); ++I) { + MachineInstr &MI = *I; + unsigned LoopRegIdx = 3, InitRegIdx = 1; + if (MI.getOperand(2).getMBB() != Preheader) + std::swap(LoopRegIdx, InitRegIdx); + MachineInstr &OrigPhi = findEquivalentInstruction(MI, Loop); + assert(OrigPhi.isPHI()); + if (Direction == LPD_Front) { + // When peeling front, we are only left with the initial value from the + // preheader. + Register R = MI.getOperand(LoopRegIdx).getReg(); + if (Remaps.count(R)) + R = Remaps[R]; + OrigPhi.getOperand(InitRegIdx).setReg(R); + MI.RemoveOperand(LoopRegIdx + 1); + MI.RemoveOperand(LoopRegIdx + 0); + } else { + // When peeling back, the initial value is the loop-carried value from + // the original loop. + Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg(); + MI.getOperand(LoopRegIdx).setReg(LoopReg); + MI.RemoveOperand(InitRegIdx + 1); + MI.RemoveOperand(InitRegIdx + 0); + } + } + + DebugLoc DL; + if (Direction == LPD_Front) { + Preheader->replaceSuccessor(Loop, NewBB); + NewBB->addSuccessor(Loop); + Loop->replacePhiUsesWith(Preheader, NewBB); + if (TII->removeBranch(*Preheader) > 0) + TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL); + TII->removeBranch(*NewBB); + TII->insertBranch(*NewBB, Loop, nullptr, {}, DL); + } else { + Loop->replaceSuccessor(Exit, NewBB); + Exit->replacePhiUsesWith(Loop, NewBB); + NewBB->addSuccessor(Exit); + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + bool CanAnalyzeBr = !TII->analyzeBranch(*Loop, TBB, FBB, Cond); + (void)CanAnalyzeBr; + assert(CanAnalyzeBr && "Must be able to analyze the loop branch!"); + TII->removeBranch(*Loop); + TII->insertBranch(*Loop, TBB == Exit ? NewBB : TBB, + FBB == Exit ? NewBB : FBB, Cond, DL); + if (TII->removeBranch(*NewBB) > 0) + TII->insertBranch(*NewBB, Exit, nullptr, {}, DL); + } + + return NewBB; +} diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index aadcd7319799..e0b4e9cac229 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -36,11 +36,6 @@ using namespace llvm; using namespace llvm::dwarf; -// Handle the Pass registration stuff necessary to use DataLayout's. -INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", - "Machine Module Information", false, false) -char MachineModuleInfo::ID = 0; - // Out of line virtual method. MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; @@ -121,7 +116,7 @@ ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size() - 1; Entry.Fn = BB->getParent(); - Entry.Symbols.push_back(Context.createTempSymbol()); + Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken())); return Entry.Symbols; } @@ -193,27 +188,15 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { Map->UpdateForRAUWBlock(cast(getValPtr()), cast(V2)); } -MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) - : ImmutablePass(ID), TM(*TM), - Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), - TM->getObjFileLowering(), nullptr, false) { - initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); -} - -MachineModuleInfo::~MachineModuleInfo() = default; - -bool MachineModuleInfo::doInitialization(Module &M) { +void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; CurCallSite = 0; UsesMSVCFloatingPoint = UsesMorestackAddr = false; HasSplitStack = HasNosplitStack = false; AddrLabelSymbols = nullptr; - TheModule = &M; - DbgInfoAvailable = !llvm::empty(M.debug_compile_units()); - return false; } -bool MachineModuleInfo::doFinalization(Module &M) { +void MachineModuleInfo::finalize() { Personalities.clear(); delete AddrLabelSymbols; @@ -223,10 +206,30 @@ bool MachineModuleInfo::doFinalization(Module &M) { delete ObjFileMMI; ObjFileMMI = nullptr; +} - return false; +MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) + : TM(std::move(MMI.TM)), + Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(), + MMI.TM.getObjFileLowering(), nullptr, nullptr, false) { + ObjFileMMI = MMI.ObjFileMMI; + CurCallSite = MMI.CurCallSite; + UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint; + UsesMorestackAddr = MMI.UsesMorestackAddr; + HasSplitStack = MMI.HasSplitStack; + HasNosplitStack = MMI.HasNosplitStack; + AddrLabelSymbols = MMI.AddrLabelSymbols; + TheModule = MMI.TheModule; } +MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) + : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), + TM->getObjFileLowering(), nullptr, nullptr, false) { + initialize(); +} + +MachineModuleInfo::~MachineModuleInfo() { finalize(); } + //===- Address of Block Management ----------------------------------------===// ArrayRef @@ -305,12 +308,13 @@ public: FreeMachineFunction() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } bool runOnFunction(Function &F) override { - MachineModuleInfo &MMI = getAnalysis(); + MachineModuleInfo &MMI = + getAnalysis().getMMI(); MMI.deleteMachineFunctionFor(F); return true; } @@ -327,3 +331,36 @@ char FreeMachineFunction::ID; FunctionPass *llvm::createFreeMachineFunctionPass() { return new FreeMachineFunction(); } + +MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass( + const LLVMTargetMachine *TM) + : ImmutablePass(ID), MMI(TM) { + initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +// Handle the Pass registration stuff necessary to use DataLayout's. +INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo", + "Machine Module Information", false, false) +char MachineModuleInfoWrapperPass::ID = 0; + +bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { + MMI.initialize(); + MMI.TheModule = &M; + MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + return false; +} + +bool MachineModuleInfoWrapperPass::doFinalization(Module &M) { + MMI.finalize(); + return false; +} + +AnalysisKey MachineModuleAnalysis::Key; + +MachineModuleInfo MachineModuleAnalysis::run(Module &M, + ModuleAnalysisManager &) { + MachineModuleInfo MMI(TM); + MMI.TheModule = &M; + MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + return MMI; +} diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp index 4fa4ea7f6cf5..8b19501ec3cf 100644 --- a/lib/CodeGen/MachineOperand.cpp +++ b/lib/CodeGen/MachineOperand.cpp @@ -49,7 +49,7 @@ static MachineFunction *getMFIfAvailable(MachineOperand &MO) { getMFIfAvailable(const_cast(MO))); } -void MachineOperand::setReg(unsigned Reg) { +void MachineOperand::setReg(Register Reg) { if (getReg() == Reg) return; // No change. @@ -71,9 +71,9 @@ void MachineOperand::setReg(unsigned Reg) { SmallContents.RegNo = Reg; } -void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, +void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Reg.isVirtual()); if (SubIdx && getSubReg()) SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); setReg(Reg); @@ -81,8 +81,8 @@ void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, setSubReg(SubIdx); } -void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); +void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) { + assert(Reg.isPhysical()); if (getSubReg()) { Reg = TRI.getSubReg(Reg, getSubReg()); // Note that getSubReg() may return 0 if the sub-register doesn't exist. @@ -114,7 +114,7 @@ void MachineOperand::setIsDef(bool Val) { bool MachineOperand::isRenamable() const { assert(isReg() && "Wrong MachineOperand accessor"); - assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + assert(Register::isPhysicalRegister(getReg()) && "isRenamable should only be checked on physical registers"); if (!IsRenamable) return false; @@ -132,7 +132,7 @@ bool MachineOperand::isRenamable() const { void MachineOperand::setIsRenamable(bool Val) { assert(isReg() && "Wrong MachineOperand accessor"); - assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + assert(Register::isPhysicalRegister(getReg()) && "setIsRenamable should only be called on physical registers"); IsRenamable = Val; } @@ -169,7 +169,7 @@ void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { } void MachineOperand::ChangeToES(const char *SymName, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an external symbol"); @@ -182,7 +182,7 @@ void MachineOperand::ChangeToES(const char *SymName, } void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a global address"); @@ -215,7 +215,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) { } void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a FrameIndex"); @@ -230,7 +230,7 @@ void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. -void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, +void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { MachineRegisterInfo *RegInfo = nullptr; @@ -333,6 +333,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getIntrinsicID() == Other.getIntrinsicID(); case MachineOperand::MO_Predicate: return getPredicate() == Other.getPredicate(); + case MachineOperand::MO_ShuffleMask: + return getShuffleMask() == Other.getShuffleMask(); } llvm_unreachable("Invalid machine operand type"); } @@ -381,6 +383,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); case MachineOperand::MO_Predicate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); + case MachineOperand::MO_ShuffleMask: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getShuffleMask()); } llvm_unreachable("Invalid machine operand type"); } @@ -425,12 +429,10 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, return; } - int Reg = TRI->getLLVMRegNum(DwarfReg, true); - if (Reg == -1) { + if (Optional Reg = TRI->getLLVMRegNum(DwarfReg, true)) + OS << printReg(*Reg, TRI); + else OS << ""; - return; - } - OS << printReg(Reg, TRI); } static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB, @@ -746,7 +748,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, printTargetFlags(OS, *this); switch (getType()) { case MachineOperand::MO_Register: { - unsigned Reg = getReg(); + Register Reg = getReg(); if (isImplicit()) OS << (isDef() ? "implicit-def " : "implicit "); else if (PrintDef && isDef()) @@ -762,13 +764,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "undef "; if (isEarlyClobber()) OS << "early-clobber "; - if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable()) + if (Register::isPhysicalRegister(getReg()) && isRenamable()) OS << "renamable "; // isDebug() is exactly true for register operands of a DBG_VALUE. So we // simply infer it when parsing and do not need to print it. const MachineRegisterInfo *MRI = nullptr; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { MRI = &MF->getRegInfo(); } @@ -783,7 +785,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << ".subreg" << SubReg; } // Print the register class / bank. - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) { @@ -936,6 +938,20 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << CmpInst::getPredicateName(Pred) << ')'; break; } + case MachineOperand::MO_ShuffleMask: + OS << "shufflemask("; + const Constant* C = getShuffleMask(); + const int NumElts = C->getType()->getVectorNumElements(); + + StringRef Separator; + for (int I = 0; I != NumElts; ++I) { + OS << Separator; + C->getAggregateElement(I)->printAsOperand(OS, false, MST); + Separator = ", "; + } + + OS << ')'; + break; } } @@ -963,7 +979,8 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, return false; return isDereferenceableAndAlignedPointer( - BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); + BasePtr, Align::None(), APInt(DL.getPointerSizeInBits(), Offset + Size), + DL); } /// getConstantPool - Return a MachinePointerInfo record that refers to the @@ -1049,17 +1066,6 @@ uint64_t MachineMemOperand::getAlignment() const { return MinAlign(getBaseAlignment(), getOffset()); } -void MachineMemOperand::print(raw_ostream &OS) const { - ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST); -} - -void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { - SmallVector SSNs; - LLVMContext Ctx; - print(OS, MST, SSNs, Ctx, nullptr, nullptr); -} - void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, SmallVectorImpl &SSNs, const LLVMContext &Context, diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 27db9106b337..b82403ae1b85 100644 --- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -76,7 +76,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction( else MBFI = nullptr; - ORE = llvm::make_unique(MF, MBFI); + ORE = std::make_unique(MF, MBFI); return false; } diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 80a235aeaa5c..8cd66825a58a 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -846,8 +846,8 @@ struct MachineOutliner : public ModulePass { StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.setPreservesAll(); ModulePass::getAnalysisUsage(AU); } @@ -1128,7 +1128,7 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF, IRBuilder<> Builder(EntryBB); Builder.CreateRetVoid(); - MachineModuleInfo &MMI = getAnalysis(); + MachineModuleInfo &MMI = getAnalysis().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); const TargetSubtargetInfo &STI = MF.getSubtarget(); @@ -1260,7 +1260,7 @@ bool MachineOutliner::outline(Module &M, true /* isImp = true */)); } if (MI.isCall()) - MI.getMF()->updateCallSiteInfo(&MI); + MI.getMF()->eraseCallSiteInfo(&MI); }; // Copy over the defs in the outlined range. // First inst in outlined range <-- Anything that's defined in this @@ -1303,6 +1303,12 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, if (F.empty()) continue; + // Disable outlining from noreturn functions right now. Noreturn requires + // special handling for the case where what we are outlining could be a + // tail call. + if (F.hasFnAttribute(Attribute::NoReturn)) + continue; + // There's something in F. Check if it has a MachineFunction associated with // it. MachineFunction *MF = MMI.getMachineFunction(F); @@ -1421,7 +1427,7 @@ bool MachineOutliner::runOnModule(Module &M) { if (M.empty()) return false; - MachineModuleInfo &MMI = getAnalysis(); + MachineModuleInfo &MMI = getAnalysis().getMMI(); // If the user passed -enable-machine-outliner=always or // -enable-machine-outliner, the pass will run on all functions in the module. diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 54df522d371a..89c9f6093a97 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -56,6 +56,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ModuloSchedule.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" @@ -153,6 +154,17 @@ static cl::opt SwpShowResMask("pipeliner-show-mask", cl::Hidden, static cl::opt SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false)); +static cl::opt EmitTestAnnotations( + "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), + cl::desc("Instead of emitting the pipelined code, annotate instructions " + "with the generated schedule for feeding into the " + "-modulo-schedule-test pass")); + +static cl::opt ExperimentalCodeGen( + "pipeliner-experimental-cg", cl::Hidden, cl::init(false), + cl::desc( + "Use the experimental peeling code generator for software pipelining")); + namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. @@ -314,7 +326,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; - if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) { + if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { LLVM_DEBUG( dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n"); NumFailLoop++; @@ -349,7 +361,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { // If the operand uses a subregister, replace it with a new register // without subregisters, and generate a copy to the new register. - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB(); MachineBasicBlock::iterator At = PredB.getFirstTerminator(); const DebugLoc &DL = PredB.findDebugLoc(At); @@ -515,14 +527,49 @@ void SwingSchedulerDAG::schedule() { return; } - generatePipelinedLoop(Schedule); + // Generate the schedule as a ModuloSchedule. + DenseMap Cycles, Stages; + std::vector OrderedInsts; + for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle(); + ++Cycle) { + for (SUnit *SU : Schedule.getInstructions(Cycle)) { + OrderedInsts.push_back(SU->getInstr()); + Cycles[SU->getInstr()] = Cycle; + Stages[SU->getInstr()] = Schedule.stageScheduled(SU); + } + } + DenseMap> NewInstrChanges; + for (auto &KV : NewMIs) { + Cycles[KV.first] = Cycles[KV.second]; + Stages[KV.first] = Stages[KV.second]; + NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)]; + } + + ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles), + std::move(Stages)); + if (EmitTestAnnotations) { + assert(NewInstrChanges.empty() && + "Cannot serialize a schedule with InstrChanges!"); + ModuloScheduleTestAnnotater MSTI(MF, MS); + MSTI.annotate(); + return; + } + // The experimental code generator can't work if there are InstChanges. + if (ExperimentalCodeGen && NewInstrChanges.empty()) { + PeelingModuloScheduleExpander MSE(MF, MS, &LIS); + MSE.expand(); + } else { + ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges)); + MSE.expand(); + MSE.cleanup(); + } ++NumPipelined; } /// Clean up after the software pipeliner runs. void SwingSchedulerDAG::finishBlock() { - for (MachineInstr *I : NewMIs) - MF.DeleteMachineInstr(I); + for (auto &KV : NewMIs) + MF.DeleteMachineInstr(KV.second); NewMIs.clear(); // Call the superclass. @@ -546,14 +593,6 @@ static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); } -/// Return the Phi register value that comes from the incoming block. -static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { - for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) - if (Phi.getOperand(i + 1).getMBB() != LoopBB) - return Phi.getOperand(i).getReg(); - return 0; -} - /// Return the Phi register value that comes the loop block. static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) @@ -658,7 +697,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { if (BaseOp1->isIdenticalTo(*BaseOp2) && (int)Offset1 < (int)Offset2) { - assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && + assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) && "What happened to the chain edge?"); SDep Dep(Load, SDep::Barrier); Dep.setLatency(1); @@ -730,7 +769,7 @@ void SwingSchedulerDAG::updatePhiDependences() { MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; - unsigned Reg = MOI->getReg(); + Register Reg = MOI->getReg(); if (MOI->isDef()) { // If the register is used by a Phi, then create an anti dependence. for (MachineRegisterInfo::use_instr_iterator @@ -809,7 +848,7 @@ void SwingSchedulerDAG::changeDependences() { continue; // Get the MI and SUnit for the instruction that defines the original base. - unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg(); + Register OrigBase = I.getInstr()->getOperand(BasePos).getReg(); MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase); if (!DefMI) continue; @@ -958,7 +997,7 @@ struct FuncUnitSorter { unsigned F1 = 0, F2 = 0; unsigned MFUs1 = minFuncUnits(IS1, F1); unsigned MFUs2 = minFuncUnits(IS2, F2); - if (MFUs1 == 1 && MFUs2 == 1) + if (MFUs1 == MFUs2) return Resources.lookup(F1) < Resources.lookup(F2); return MFUs1 > MFUs2; } @@ -1514,8 +1553,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, continue; for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isUse()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) Uses.insert(Reg); else if (MRI.isAllocatable(Reg)) for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) @@ -1525,8 +1564,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, for (SUnit *SU : NS) for (const MachineOperand &MO : SU->getInstr()->operands()) if (MO.isReg() && MO.isDef() && !MO.isDead()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { if (!Uses.count(Reg)) LiveOutRegs.push_back(RegisterMaskPair(Reg, LaneBitmask::getNone())); @@ -2012,836 +2051,6 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { return scheduleFound && Schedule.getMaxStageCount() > 0; } -/// Given a schedule for the loop, generate a new version of the loop, -/// and replace the old version. This function generates a prolog -/// that contains the initial iterations in the pipeline, and kernel -/// loop, and the epilogue that contains the code for the final -/// iterations. -void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { - // Create a new basic block for the kernel and add it to the CFG. - MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); - - unsigned MaxStageCount = Schedule.getMaxStageCount(); - - // Remember the registers that are used in different stages. The index is - // the iteration, or stage, that the instruction is scheduled in. This is - // a map between register names in the original block and the names created - // in each stage of the pipelined loop. - ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; - InstrMapTy InstrMap; - - SmallVector PrologBBs; - - MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); - assert(PreheaderBB != nullptr && - "Need to add code to handle loops w/o preheader"); - // Generate the prolog instructions that set up the pipeline. - generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs); - MF.insert(BB->getIterator(), KernelBB); - - // Rearrange the instructions to generate the new, pipelined loop, - // and update register names as needed. - for (int Cycle = Schedule.getFirstCycle(), - LastCycle = Schedule.getFinalCycle(); - Cycle <= LastCycle; ++Cycle) { - std::deque &CycleInstrs = Schedule.getInstructions(Cycle); - // This inner loop schedules each instruction in the cycle. - for (SUnit *CI : CycleInstrs) { - if (CI->getInstr()->isPHI()) - continue; - unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr())); - MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum); - updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap); - KernelBB->push_back(NewMI); - InstrMap[NewMI] = CI->getInstr(); - } - } - - // Copy any terminator instructions to the new kernel, and update - // names as needed. - for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), - E = BB->instr_end(); - I != E; ++I) { - MachineInstr *NewMI = MF.CloneMachineInstr(&*I); - updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap); - KernelBB->push_back(NewMI); - InstrMap[NewMI] = &*I; - } - - KernelBB->transferSuccessors(BB); - KernelBB->replaceSuccessor(BB, KernelBB); - - generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, - VRMap, InstrMap, MaxStageCount, MaxStageCount, false); - generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap, - InstrMap, MaxStageCount, MaxStageCount, false); - - LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); - - SmallVector EpilogBBs; - // Generate the epilog instructions to complete the pipeline. - generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs, - PrologBBs); - - // We need this step because the register allocation doesn't handle some - // situations well, so we insert copies to help out. - splitLifetimes(KernelBB, EpilogBBs, Schedule); - - // Remove dead instructions due to loop induction variables. - removeDeadInstructions(KernelBB, EpilogBBs); - - // Add branches between prolog and epilog blocks. - addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap); - - // Remove the original loop since it's no longer referenced. - for (auto &I : *BB) - LIS.RemoveMachineInstrFromMaps(I); - BB->clear(); - BB->eraseFromParent(); - - delete[] VRMap; -} - -/// Generate the pipeline prolog code. -void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, - MBBVectorTy &PrologBBs) { - MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); - assert(PreheaderBB != nullptr && - "Need to add code to handle loops w/o preheader"); - MachineBasicBlock *PredBB = PreheaderBB; - InstrMapTy InstrMap; - - // Generate a basic block for each stage, not including the last stage, - // which will be generated in the kernel. Each basic block may contain - // instructions from multiple stages/iterations. - for (unsigned i = 0; i < LastStage; ++i) { - // Create and insert the prolog basic block prior to the original loop - // basic block. The original loop is removed later. - MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); - PrologBBs.push_back(NewBB); - MF.insert(BB->getIterator(), NewBB); - NewBB->transferSuccessors(PredBB); - PredBB->addSuccessor(NewBB); - PredBB = NewBB; - - // Generate instructions for each appropriate stage. Process instructions - // in original program order. - for (int StageNum = i; StageNum >= 0; --StageNum) { - for (MachineBasicBlock::iterator BBI = BB->instr_begin(), - BBE = BB->getFirstTerminator(); - BBI != BBE; ++BBI) { - if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) { - if (BBI->isPHI()) - continue; - MachineInstr *NewMI = - cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule); - updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule, - VRMap); - NewBB->push_back(NewMI); - InstrMap[NewMI] = &*BBI; - } - } - } - rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap); - LLVM_DEBUG({ - dbgs() << "prolog:\n"; - NewBB->dump(); - }); - } - - PredBB->replaceSuccessor(BB, KernelBB); - - // Check if we need to remove the branch from the preheader to the original - // loop, and replace it with a branch to the new loop. - unsigned numBranches = TII->removeBranch(*PreheaderBB); - if (numBranches) { - SmallVector Cond; - TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc()); - } -} - -/// Generate the pipeline epilog code. The epilog code finishes the iterations -/// that were started in either the prolog or the kernel. We create a basic -/// block for each stage that needs to complete. -void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, - MBBVectorTy &PrologBBs) { - // We need to change the branch from the kernel to the first epilog block, so - // this call to analyze branch uses the kernel rather than the original BB. - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector Cond; - bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); - assert(!checkBranch && "generateEpilog must be able to analyze the branch"); - if (checkBranch) - return; - - MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); - if (*LoopExitI == KernelBB) - ++LoopExitI; - assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); - MachineBasicBlock *LoopExitBB = *LoopExitI; - - MachineBasicBlock *PredBB = KernelBB; - MachineBasicBlock *EpilogStart = LoopExitBB; - InstrMapTy InstrMap; - - // Generate a basic block for each stage, not including the last stage, - // which was generated for the kernel. Each basic block may contain - // instructions from multiple stages/iterations. - int EpilogStage = LastStage + 1; - for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { - MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); - EpilogBBs.push_back(NewBB); - MF.insert(BB->getIterator(), NewBB); - - PredBB->replaceSuccessor(LoopExitBB, NewBB); - NewBB->addSuccessor(LoopExitBB); - - if (EpilogStart == LoopExitBB) - EpilogStart = NewBB; - - // Add instructions to the epilog depending on the current block. - // Process instructions in original program order. - for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { - for (auto &BBI : *BB) { - if (BBI.isPHI()) - continue; - MachineInstr *In = &BBI; - if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) { - // Instructions with memoperands in the epilog are updated with - // conservative values. - MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); - updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap); - NewBB->push_back(NewMI); - InstrMap[NewMI] = In; - } - } - } - generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, - VRMap, InstrMap, LastStage, EpilogStage, i == 1); - generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap, - InstrMap, LastStage, EpilogStage, i == 1); - PredBB = NewBB; - - LLVM_DEBUG({ - dbgs() << "epilog:\n"; - NewBB->dump(); - }); - } - - // Fix any Phi nodes in the loop exit block. - for (MachineInstr &MI : *LoopExitBB) { - if (!MI.isPHI()) - break; - for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { - MachineOperand &MO = MI.getOperand(i); - if (MO.getMBB() == BB) - MO.setMBB(PredBB); - } - } - - // Create a branch to the new epilog from the kernel. - // Remove the original branch and add a new branch to the epilog. - TII->removeBranch(*KernelBB); - TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); - // Add a branch to the loop exit. - if (EpilogBBs.size() > 0) { - MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); - SmallVector Cond1; - TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); - } -} - -/// Replace all uses of FromReg that appear outside the specified -/// basic block with ToReg. -static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, - MachineBasicBlock *MBB, - MachineRegisterInfo &MRI, - LiveIntervals &LIS) { - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), - E = MRI.use_end(); - I != E;) { - MachineOperand &O = *I; - ++I; - if (O.getParent()->getParent() != MBB) - O.setReg(ToReg); - } - if (!LIS.hasInterval(ToReg)) - LIS.createEmptyInterval(ToReg); -} - -/// Return true if the register has a use that occurs outside the -/// specified loop. -static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, - MachineRegisterInfo &MRI) { - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), - E = MRI.use_end(); - I != E; ++I) - if (I->getParent()->getParent() != BB) - return true; - return false; -} - -/// Generate Phis for the specific block in the generated pipelined code. -/// This function looks at the Phis from the original code to guide the -/// creation of new Phis. -void SwingSchedulerDAG::generateExistingPhis( - MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, - bool IsLast) { - // Compute the stage number for the initial value of the Phi, which - // comes from the prolog. The prolog to use depends on to which kernel/ - // epilog that we're adding the Phi. - unsigned PrologStage = 0; - unsigned PrevStage = 0; - bool InKernel = (LastStageNum == CurStageNum); - if (InKernel) { - PrologStage = LastStageNum - 1; - PrevStage = CurStageNum; - } else { - PrologStage = LastStageNum - (CurStageNum - LastStageNum); - PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; - } - - for (MachineBasicBlock::iterator BBI = BB->instr_begin(), - BBE = BB->getFirstNonPHI(); - BBI != BBE; ++BBI) { - unsigned Def = BBI->getOperand(0).getReg(); - - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(*BBI, BB, InitVal, LoopVal); - - unsigned PhiOp1 = 0; - // The Phi value from the loop body typically is defined in the loop, but - // not always. So, we need to check if the value is defined in the loop. - unsigned PhiOp2 = LoopVal; - if (VRMap[LastStageNum].count(LoopVal)) - PhiOp2 = VRMap[LastStageNum][LoopVal]; - - int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI)); - int LoopValStage = - Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal))); - unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum); - if (NumStages == 0) { - // We don't need to generate a Phi anymore, but we need to rename any uses - // of the Phi value. - unsigned NewReg = VRMap[PrevStage][LoopVal]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI, - Def, InitVal, NewReg); - if (VRMap[CurStageNum].count(LoopVal)) - VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; - } - // Adjust the number of Phis needed depending on the number of prologs left, - // and the distance from where the Phi is first scheduled. The number of - // Phis cannot exceed the number of prolog stages. Each stage can - // potentially define two values. - unsigned MaxPhis = PrologStage + 2; - if (!InKernel && (int)PrologStage <= LoopValStage) - MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); - unsigned NumPhis = std::min(NumStages, MaxPhis); - - unsigned NewReg = 0; - unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; - // In the epilog, we may need to look back one stage to get the correct - // Phi name because the epilog and prolog blocks execute the same stage. - // The correct name is from the previous block only when the Phi has - // been completely scheduled prior to the epilog, and Phi value is not - // needed in multiple stages. - int StageDiff = 0; - if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && - NumPhis == 1) - StageDiff = 1; - // Adjust the computations below when the phi and the loop definition - // are scheduled in different stages. - if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) - StageDiff = StageScheduled - LoopValStage; - for (unsigned np = 0; np < NumPhis; ++np) { - // If the Phi hasn't been scheduled, then use the initial Phi operand - // value. Otherwise, use the scheduled version of the instruction. This - // is a little complicated when a Phi references another Phi. - if (np > PrologStage || StageScheduled >= (int)LastStageNum) - PhiOp1 = InitVal; - // Check if the Phi has already been scheduled in a prolog stage. - else if (PrologStage >= AccessStage + StageDiff + np && - VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) - PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; - // Check if the Phi has already been scheduled, but the loop instruction - // is either another Phi, or doesn't occur in the loop. - else if (PrologStage >= AccessStage + StageDiff + np) { - // If the Phi references another Phi, we need to examine the other - // Phi to get the correct value. - PhiOp1 = LoopVal; - MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); - int Indirects = 1; - while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { - int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1)); - if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) - PhiOp1 = getInitPhiReg(*InstOp1, BB); - else - PhiOp1 = getLoopPhiReg(*InstOp1, BB); - InstOp1 = MRI.getVRegDef(PhiOp1); - int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1)); - int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); - if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && - VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { - PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; - break; - } - ++Indirects; - } - } else - PhiOp1 = InitVal; - // If this references a generated Phi in the kernel, get the Phi operand - // from the incoming block. - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - - MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); - bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); - // In the epilog, a map lookup is needed to get the value from the kernel, - // or previous epilog block. How is does this depends on if the - // instruction is scheduled in the previous block. - if (!InKernel) { - int StageDiffAdj = 0; - if (LoopValStage != -1 && StageScheduled > LoopValStage) - StageDiffAdj = StageScheduled - LoopValStage; - // Use the loop value defined in the kernel, unless the kernel - // contains the last definition of the Phi. - if (np == 0 && PrevStage == LastStageNum && - (StageScheduled != 0 || LoopValStage != 0) && - VRMap[PrevStage - StageDiffAdj].count(LoopVal)) - PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; - // Use the value defined by the Phi. We add one because we switch - // from looking at the loop value to the Phi definition. - else if (np > 0 && PrevStage == LastStageNum && - VRMap[PrevStage - np + 1].count(Def)) - PhiOp2 = VRMap[PrevStage - np + 1][Def]; - // Use the loop value defined in the kernel. - else if (static_cast(LoopValStage) > PrologStage + 1 && - VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) - PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; - // Use the value defined by the Phi, unless we're generating the first - // epilog and the Phi refers to a Phi in a different stage. - else if (VRMap[PrevStage - np].count(Def) && - (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled))) - PhiOp2 = VRMap[PrevStage - np][Def]; - } - - // Check if we can reuse an existing Phi. This occurs when a Phi - // references another Phi, and the other Phi is scheduled in an - // earlier stage. We can try to reuse an existing Phi up until the last - // stage of the current Phi. - if (LoopDefIsPhi) { - if (static_cast(PrologStage - np) >= StageScheduled) { - int LVNumStages = Schedule.getStagesForPhi(LoopVal); - int StageDiff = (StageScheduled - LoopValStage); - LVNumStages -= StageDiff; - // Make sure the loop value Phi has been processed already. - if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { - NewReg = PhiOp2; - unsigned ReuseStage = CurStageNum; - if (Schedule.isLoopCarried(this, *PhiInst)) - ReuseStage -= LVNumStages; - // Check if the Phi to reuse has been generated yet. If not, then - // there is nothing to reuse. - if (VRMap[ReuseStage - np].count(LoopVal)) { - NewReg = VRMap[ReuseStage - np][LoopVal]; - - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, Def, NewReg); - // Update the map with the new Phi name. - VRMap[CurStageNum - np][Def] = NewReg; - PhiOp2 = NewReg; - if (VRMap[LastStageNum - np - 1].count(LoopVal)) - PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; - - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - continue; - } - } - } - if (InKernel && StageDiff > 0 && - VRMap[CurStageNum - StageDiff - np].count(LoopVal)) - PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; - } - - const TargetRegisterClass *RC = MRI.getRegClass(Def); - NewReg = MRI.createVirtualRegister(RC); - - MachineInstrBuilder NewPhi = - BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NewReg); - NewPhi.addReg(PhiOp1).addMBB(BB1); - NewPhi.addReg(PhiOp2).addMBB(BB2); - if (np == 0) - InstrMap[NewPhi] = &*BBI; - - // We define the Phis after creating the new pipelined code, so - // we need to rename the Phi values in scheduled instructions. - - unsigned PrevReg = 0; - if (InKernel && VRMap[PrevStage - np].count(LoopVal)) - PrevReg = VRMap[PrevStage - np][LoopVal]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, - Def, NewReg, PrevReg); - // If the Phi has been scheduled, use the new name for rewriting. - if (VRMap[CurStageNum - np].count(Def)) { - unsigned R = VRMap[CurStageNum - np][Def]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, - R, NewReg); - } - - // Check if we need to rename any uses that occurs after the loop. The - // register to replace depends on whether the Phi is scheduled in the - // epilog. - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - - // In the kernel, a dependent Phi uses the value from this Phi. - if (InKernel) - PhiOp2 = NewReg; - - // Update the map with the new Phi name. - VRMap[CurStageNum - np][Def] = NewReg; - } - - while (NumPhis++ < NumStages) { - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis, - &*BBI, Def, NewReg, 0); - } - - // Check if we need to rename a Phi that has been eliminated due to - // scheduling. - if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) - replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); - } -} - -/// Generate Phis for the specified block in the generated pipelined code. -/// These are new Phis needed because the definition is scheduled after the -/// use in the pipelined sequence. -void SwingSchedulerDAG::generatePhis( - MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, - bool IsLast) { - // Compute the stage number that contains the initial Phi value, and - // the Phi from the previous stage. - unsigned PrologStage = 0; - unsigned PrevStage = 0; - unsigned StageDiff = CurStageNum - LastStageNum; - bool InKernel = (StageDiff == 0); - if (InKernel) { - PrologStage = LastStageNum - 1; - PrevStage = CurStageNum; - } else { - PrologStage = LastStageNum - StageDiff; - PrevStage = LastStageNum + StageDiff - 1; - } - - for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), - BBE = BB->instr_end(); - BBI != BBE; ++BBI) { - for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = BBI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI)); - assert(StageScheduled != -1 && "Expecting scheduled instruction."); - unsigned Def = MO.getReg(); - unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum); - // An instruction scheduled in stage 0 and is used after the loop - // requires a phi in the epilog for the last definition from either - // the kernel or prolog. - if (!InKernel && NumPhis == 0 && StageScheduled == 0 && - hasUseAfterLoop(Def, BB, MRI)) - NumPhis = 1; - if (!InKernel && (unsigned)StageScheduled > PrologStage) - continue; - - unsigned PhiOp2 = VRMap[PrevStage][Def]; - if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) - if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) - PhiOp2 = getLoopPhiReg(*InstOp2, BB2); - // The number of Phis can't exceed the number of prolog stages. The - // prolog stage number is zero based. - if (NumPhis > PrologStage + 1 - StageScheduled) - NumPhis = PrologStage + 1 - StageScheduled; - for (unsigned np = 0; np < NumPhis; ++np) { - unsigned PhiOp1 = VRMap[PrologStage][Def]; - if (np <= PrologStage) - PhiOp1 = VRMap[PrologStage - np][Def]; - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) - PhiOp1 = getInitPhiReg(*InstOp1, NewBB); - } - if (!InKernel) - PhiOp2 = VRMap[PrevStage - np][Def]; - - const TargetRegisterClass *RC = MRI.getRegClass(Def); - unsigned NewReg = MRI.createVirtualRegister(RC); - - MachineInstrBuilder NewPhi = - BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NewReg); - NewPhi.addReg(PhiOp1).addMBB(BB1); - NewPhi.addReg(PhiOp2).addMBB(BB2); - if (np == 0) - InstrMap[NewPhi] = &*BBI; - - // Rewrite uses and update the map. The actions depend upon whether - // we generating code for the kernel or epilog blocks. - if (InKernel) { - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, PhiOp1, NewReg); - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, PhiOp2, NewReg); - - PhiOp2 = NewReg; - VRMap[PrevStage - np - 1][Def] = NewReg; - } else { - VRMap[CurStageNum - np][Def] = NewReg; - if (np == NumPhis - 1) - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, Def, NewReg); - } - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - } - } - } -} - -/// Remove instructions that generate values with no uses. -/// Typically, these are induction variable operations that generate values -/// used in the loop itself. A dead instruction has a definition with -/// no uses, or uses that occur in the original loop only. -void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs) { - // For each epilog block, check that the value defined by each instruction - // is used. If not, delete it. - for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), - MBE = EpilogBBs.rend(); - MBB != MBE; ++MBB) - for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), - ME = (*MBB)->instr_rend(); - MI != ME;) { - // From DeadMachineInstructionElem. Don't delete inline assembly. - if (MI->isInlineAsm()) { - ++MI; - continue; - } - bool SawStore = false; - // Check if it's safe to remove the instruction due to side effects. - // We can, and want to, remove Phis here. - if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { - ++MI; - continue; - } - bool used = true; - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - unsigned reg = MOI->getReg(); - // Assume physical registers are used, unless they are marked dead. - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - used = !MOI->isDead(); - if (used) - break; - continue; - } - unsigned realUses = 0; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), - EI = MRI.use_end(); - UI != EI; ++UI) { - // Check if there are any uses that occur only in the original - // loop. If so, that's not a real use. - if (UI->getParent()->getParent() != BB) { - realUses++; - used = true; - break; - } - } - if (realUses > 0) - break; - used = false; - } - if (!used) { - LIS.RemoveMachineInstrFromMaps(*MI); - MI++->eraseFromParent(); - continue; - } - ++MI; - } - // In the kernel block, check if we can remove a Phi that generates a value - // used in an instruction removed in the epilog block. - for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), - BBE = KernelBB->getFirstNonPHI(); - BBI != BBE;) { - MachineInstr *MI = &*BBI; - ++BBI; - unsigned reg = MI->getOperand(0).getReg(); - if (MRI.use_begin(reg) == MRI.use_end()) { - LIS.RemoveMachineInstrFromMaps(*MI); - MI->eraseFromParent(); - } - } -} - -/// For loop carried definitions, we split the lifetime of a virtual register -/// that has uses past the definition in the next iteration. A copy with a new -/// virtual register is inserted before the definition, which helps with -/// generating a better register assignment. -/// -/// v1 = phi(a, v2) v1 = phi(a, v2) -/// v2 = phi(b, v3) v2 = phi(b, v3) -/// v3 = .. v4 = copy v1 -/// .. = V1 v3 = .. -/// .. = v4 -void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs, - SMSchedule &Schedule) { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - for (auto &PHI : KernelBB->phis()) { - unsigned Def = PHI.getOperand(0).getReg(); - // Check for any Phi definition that used as an operand of another Phi - // in the same block. - for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), - E = MRI.use_instr_end(); - I != E; ++I) { - if (I->isPHI() && I->getParent() == KernelBB) { - // Get the loop carried definition. - unsigned LCDef = getLoopPhiReg(PHI, KernelBB); - if (!LCDef) - continue; - MachineInstr *MI = MRI.getVRegDef(LCDef); - if (!MI || MI->getParent() != KernelBB || MI->isPHI()) - continue; - // Search through the rest of the block looking for uses of the Phi - // definition. If one occurs, then split the lifetime. - unsigned SplitReg = 0; - for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), - KernelBB->instr_end())) - if (BBJ.readsRegister(Def)) { - // We split the lifetime when we find the first use. - if (SplitReg == 0) { - SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); - BuildMI(*KernelBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), SplitReg) - .addReg(Def); - } - BBJ.substituteRegister(Def, SplitReg, 0, *TRI); - } - if (!SplitReg) - continue; - // Search through each of the epilog blocks for any uses to be renamed. - for (auto &Epilog : EpilogBBs) - for (auto &I : *Epilog) - if (I.readsRegister(Def)) - I.substituteRegister(Def, SplitReg, 0, *TRI); - break; - } - } - } -} - -/// Remove the incoming block from the Phis in a basic block. -static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { - for (MachineInstr &MI : *BB) { - if (!MI.isPHI()) - break; - for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) - if (MI.getOperand(i + 1).getMBB() == Incoming) { - MI.RemoveOperand(i + 1); - MI.RemoveOperand(i); - break; - } - } -} - -/// Create branches from each prolog basic block to the appropriate epilog -/// block. These edges are needed if the loop ends before reaching the -/// kernel. -void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB, - MBBVectorTy &PrologBBs, - MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs, - SMSchedule &Schedule, ValueMapTy *VRMap) { - assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); - MachineInstr *IndVar = Pass.LI.LoopInductionVar; - MachineInstr *Cmp = Pass.LI.LoopCompare; - MachineBasicBlock *LastPro = KernelBB; - MachineBasicBlock *LastEpi = KernelBB; - - // Start from the blocks connected to the kernel and work "out" - // to the first prolog and the last epilog blocks. - SmallVector PrevInsts; - unsigned MaxIter = PrologBBs.size() - 1; - unsigned LC = UINT_MAX; - unsigned LCMin = UINT_MAX; - for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { - // Add branches to the prolog that go to the corresponding - // epilog, and the fall-thru prolog/kernel block. - MachineBasicBlock *Prolog = PrologBBs[j]; - MachineBasicBlock *Epilog = EpilogBBs[i]; - // We've executed one iteration, so decrement the loop count and check for - // the loop end. - SmallVector Cond; - // Check if the LOOP0 has already been removed. If so, then there is no need - // to reduce the trip count. - if (LC != 0) - LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond, - PrevInsts, j, MaxIter); - - // Record the value of the first trip count, which is used to determine if - // branches and blocks can be removed for constant trip counts. - if (LCMin == UINT_MAX) - LCMin = LC; - - unsigned numAdded = 0; - if (TargetRegisterInfo::isVirtualRegister(LC)) { - Prolog->addSuccessor(Epilog); - numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); - } else if (j >= LCMin) { - Prolog->addSuccessor(Epilog); - Prolog->removeSuccessor(LastPro); - LastEpi->removeSuccessor(Epilog); - numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); - removePhis(Epilog, LastEpi); - // Remove the blocks that are no longer referenced. - if (LastPro != LastEpi) { - LastEpi->clear(); - LastEpi->eraseFromParent(); - } - LastPro->clear(); - LastPro->eraseFromParent(); - } else { - numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); - removePhis(Epilog, Prolog); - } - LastPro = Prolog; - LastEpi = Epilog; - for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), - E = Prolog->instr_rend(); - I != E && numAdded > 0; ++I, --numAdded) - updateInstruction(&*I, false, j, 0, Schedule, VRMap); - } -} - /// Return true if we can compute the amount the instruction changes /// during each iteration. Set Delta to the amount of the change. bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { @@ -2854,7 +2063,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { if (!BaseOp->isReg()) return false; - unsigned BaseReg = BaseOp->getReg(); + Register BaseReg = BaseOp->getReg(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Check if there is a Phi. If so, get the definition in the loop. @@ -2874,261 +2083,6 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { return true; } -/// Update the memory operand with a new offset when the pipeliner -/// generates a new copy of the instruction that refers to a -/// different memory location. -void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI, - MachineInstr &OldMI, unsigned Num) { - if (Num == 0) - return; - // If the instruction has memory operands, then adjust the offset - // when the instruction appears in different stages. - if (NewMI.memoperands_empty()) - return; - SmallVector NewMMOs; - for (MachineMemOperand *MMO : NewMI.memoperands()) { - // TODO: Figure out whether isAtomic is really necessary (see D57601). - if (MMO->isVolatile() || MMO->isAtomic() || - (MMO->isInvariant() && MMO->isDereferenceable()) || - (!MMO->getValue())) { - NewMMOs.push_back(MMO); - continue; - } - unsigned Delta; - if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { - int64_t AdjOffset = Delta * Num; - NewMMOs.push_back( - MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); - } else { - NewMMOs.push_back( - MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); - } - } - NewMI.setMemRefs(MF, NewMMOs); -} - -/// Clone the instruction for the new pipelined loop and update the -/// memory operands, if needed. -MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI, - unsigned CurStageNum, - unsigned InstStageNum) { - MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); - // Check for tied operands in inline asm instructions. This should be handled - // elsewhere, but I'm not sure of the best solution. - if (OldMI->isInlineAsm()) - for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { - const auto &MO = OldMI->getOperand(i); - if (MO.isReg() && MO.isUse()) - break; - unsigned UseIdx; - if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) - NewMI->tieOperands(i, UseIdx); - } - updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); - return NewMI; -} - -/// Clone the instruction for the new pipelined loop. If needed, this -/// function updates the instruction using the values saved in the -/// InstrChanges structure. -MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI, - unsigned CurStageNum, - unsigned InstStageNum, - SMSchedule &Schedule) { - MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); - DenseMap>::iterator It = - InstrChanges.find(getSUnit(OldMI)); - if (It != InstrChanges.end()) { - std::pair RegAndOffset = It->second; - unsigned BasePos, OffsetPos; - if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) - return nullptr; - int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); - MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); - if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum) - NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); - NewMI->getOperand(OffsetPos).setImm(NewOffset); - } - updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); - return NewMI; -} - -/// Update the machine instruction with new virtual registers. This -/// function may change the defintions and/or uses. -void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef, - unsigned CurStageNum, - unsigned InstrStageNum, - SMSchedule &Schedule, - ValueMapTy *VRMap) { - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - unsigned reg = MO.getReg(); - if (MO.isDef()) { - // Create a new virtual register for the definition. - const TargetRegisterClass *RC = MRI.getRegClass(reg); - unsigned NewReg = MRI.createVirtualRegister(RC); - MO.setReg(NewReg); - VRMap[CurStageNum][reg] = NewReg; - if (LastDef) - replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); - } else if (MO.isUse()) { - MachineInstr *Def = MRI.getVRegDef(reg); - // Compute the stage that contains the last definition for instruction. - int DefStageNum = Schedule.stageScheduled(getSUnit(Def)); - unsigned StageNum = CurStageNum; - if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { - // Compute the difference in stages between the defintion and the use. - unsigned StageDiff = (InstrStageNum - DefStageNum); - // Make an adjustment to get the last definition. - StageNum -= StageDiff; - } - if (VRMap[StageNum].count(reg)) - MO.setReg(VRMap[StageNum][reg]); - } - } -} - -/// Return the instruction in the loop that defines the register. -/// If the definition is a Phi, then follow the Phi operand to -/// the instruction in the loop. -MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) { - SmallPtrSet Visited; - MachineInstr *Def = MRI.getVRegDef(Reg); - while (Def->isPHI()) { - if (!Visited.insert(Def).second) - break; - for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) - if (Def->getOperand(i + 1).getMBB() == BB) { - Def = MRI.getVRegDef(Def->getOperand(i).getReg()); - break; - } - } - return Def; -} - -/// Return the new name for the value from the previous stage. -unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage, - unsigned LoopVal, unsigned LoopStage, - ValueMapTy *VRMap, - MachineBasicBlock *BB) { - unsigned PrevVal = 0; - if (StageNum > PhiStage) { - MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); - if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) - // The name is defined in the previous stage. - PrevVal = VRMap[StageNum - 1][LoopVal]; - else if (VRMap[StageNum].count(LoopVal)) - // The previous name is defined in the current stage when the instruction - // order is swapped. - PrevVal = VRMap[StageNum][LoopVal]; - else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) - // The loop value hasn't yet been scheduled. - PrevVal = LoopVal; - else if (StageNum == PhiStage + 1) - // The loop value is another phi, which has not been scheduled. - PrevVal = getInitPhiReg(*LoopInst, BB); - else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) - // The loop value is another phi, which has been scheduled. - PrevVal = - getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), - LoopStage, VRMap, BB); - } - return PrevVal; -} - -/// Rewrite the Phi values in the specified block to use the mappings -/// from the initial operand. Once the Phi is scheduled, we switch -/// to using the loop value instead of the Phi value, so those names -/// do not need to be rewritten. -void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB, - unsigned StageNum, - SMSchedule &Schedule, - ValueMapTy *VRMap, - InstrMapTy &InstrMap) { - for (auto &PHI : BB->phis()) { - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(PHI, BB, InitVal, LoopVal); - unsigned PhiDef = PHI.getOperand(0).getReg(); - - unsigned PhiStage = - (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef))); - unsigned LoopStage = - (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal))); - unsigned NumPhis = Schedule.getStagesForPhi(PhiDef); - if (NumPhis > StageNum) - NumPhis = StageNum; - for (unsigned np = 0; np <= NumPhis; ++np) { - unsigned NewVal = - getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); - if (!NewVal) - NewVal = InitVal; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI, - PhiDef, NewVal); - } - } -} - -/// Rewrite a previously scheduled instruction to use the register value -/// from the new instruction. Make sure the instruction occurs in the -/// basic block, and we don't change the uses in the new instruction. -void SwingSchedulerDAG::rewriteScheduledInstr( - MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap, - unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, - unsigned NewReg, unsigned PrevReg) { - bool InProlog = (CurStageNum < Schedule.getMaxStageCount()); - int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum; - // Rewrite uses that have been scheduled already to use the new - // Phi register. - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), - EI = MRI.use_end(); - UI != EI;) { - MachineOperand &UseOp = *UI; - MachineInstr *UseMI = UseOp.getParent(); - ++UI; - if (UseMI->getParent() != BB) - continue; - if (UseMI->isPHI()) { - if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) - continue; - if (getLoopPhiReg(*UseMI, BB) != OldReg) - continue; - } - InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); - assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); - SUnit *OrigMISU = getSUnit(OrigInstr->second); - int StageSched = Schedule.stageScheduled(OrigMISU); - int CycleSched = Schedule.cycleScheduled(OrigMISU); - unsigned ReplaceReg = 0; - // This is the stage for the scheduled instruction. - if (StagePhi == StageSched && Phi->isPHI()) { - int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi)); - if (PrevReg && InProlog) - ReplaceReg = PrevReg; - else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) && - (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI())) - ReplaceReg = PrevReg; - else - ReplaceReg = NewReg; - } - // The scheduled instruction occurs before the scheduled Phi, and the - // Phi is not loop carried. - if (!InProlog && StagePhi + 1 == StageSched && - !Schedule.isLoopCarried(this, *Phi)) - ReplaceReg = NewReg; - if (StagePhi > StageSched && Phi->isPHI()) - ReplaceReg = NewReg; - if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) - ReplaceReg = NewReg; - if (ReplaceReg) { - MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); - UseOp.setReg(ReplaceReg); - } - } -} - /// Check if we can change the instruction to use an offset value from the /// previous iteration. If so, return true and set the base and offset values /// so that we can rewrite the load, if necessary. @@ -3147,7 +2101,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, unsigned BasePosLd, OffsetPosLd; if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd)) return false; - unsigned BaseReg = MI->getOperand(BasePosLd).getReg(); + Register BaseReg = MI->getOperand(BasePosLd).getReg(); // Look for the Phi instruction. MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); @@ -3202,7 +2156,7 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, unsigned BasePos, OffsetPos; if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) return; - unsigned BaseReg = MI->getOperand(BasePos).getReg(); + Register BaseReg = MI->getOperand(BasePos).getReg(); MachineInstr *LoopDef = findDefInLoop(BaseReg); int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef)); int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef)); @@ -3221,11 +2175,29 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, NewMI->getOperand(OffsetPos).setImm(NewOffset); SU->setInstr(NewMI); MISUnitMap[NewMI] = SU; - NewMIs.insert(NewMI); + NewMIs[MI] = NewMI; } } } +/// Return the instruction in the loop that defines the register. +/// If the definition is a Phi, then follow the Phi operand to +/// the instruction in the loop. +MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) { + SmallPtrSet Visited; + MachineInstr *Def = MRI.getVRegDef(Reg); + while (Def->isPHI()) { + if (!Visited.insert(Def).second) + break; + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) + if (Def->getOperand(i + 1).getMBB() == BB) { + Def = MRI.getVRegDef(Def->getOperand(i).getReg()); + break; + } + } + return Def; +} + /// Return true for an order or output dependence that is loop carried /// potentially. A dependence is loop carried if the destination defines a valu /// that may be used or defined by the source in a subsequent iteration. @@ -3499,10 +2471,10 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, ++I, ++Pos) { for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); unsigned BasePos, OffsetPos; if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) if (MI->getOperand(BasePos).getReg() == Reg) @@ -3676,7 +2648,7 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { assert(StageDef != -1 && "Instruction should have been scheduled."); for (auto &SI : SU.Succs) if (SI.isAssignedRegDep()) - if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg())) + if (Register::isPhysicalRegister(SI.getReg())) if (stageScheduled(SI.getSUnit()) != StageDef) return false; } @@ -3810,7 +2782,7 @@ void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque &Instrs) { NewMI->getOperand(OffsetPos).setImm(NewOffset); SU->setInstr(NewMI); MISUnitMap[NewMI] = SU; - NewMIs.insert(NewMI); + NewMIs[MI] = NewMI; } } OverlapReg = 0; @@ -3847,40 +2819,6 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { ScheduledInstrs[cycle].push_front(*I); } } - // Iterate over the definitions in each instruction, and compute the - // stage difference for each use. Keep the maximum value. - for (auto &I : InstrToCycle) { - int DefStage = stageScheduled(I.first); - MachineInstr *MI = I.first->getInstr(); - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &Op = MI->getOperand(i); - if (!Op.isReg() || !Op.isDef()) - continue; - - unsigned Reg = Op.getReg(); - unsigned MaxDiff = 0; - bool PhiIsSwapped = false; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), - EI = MRI.use_end(); - UI != EI; ++UI) { - MachineOperand &UseOp = *UI; - MachineInstr *UseMI = UseOp.getParent(); - SUnit *SUnitUse = SSD->getSUnit(UseMI); - int UseStage = stageScheduled(SUnitUse); - unsigned Diff = 0; - if (UseStage != -1 && UseStage >= DefStage) - Diff = UseStage - DefStage; - if (MI->isPHI()) { - if (isLoopCarried(SSD, *MI)) - ++Diff; - else - PhiIsSwapped = true; - } - MaxDiff = std::max(Diff, MaxDiff); - } - RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); - } - } // Erase all the elements in the later stages. Only one iteration should // remain in the scheduled list, and it contains all the instructions. @@ -4085,4 +3023,3 @@ void ResourceManager::clearResources() { return DFAResources->clearResources(); std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0); } - diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp index 7f220ed1fd8f..f4daff667e86 100644 --- a/lib/CodeGen/MachinePostDominators.cpp +++ b/lib/CodeGen/MachinePostDominators.cpp @@ -17,7 +17,9 @@ using namespace llvm; namespace llvm { template class DominatorTreeBase; // PostDomTreeBase -} + +extern bool VerifyMachineDomInfo; +} // namespace llvm char MachinePostDominatorTree::ID = 0; @@ -25,33 +27,52 @@ char MachinePostDominatorTree::ID = 0; INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", "MachinePostDominator Tree Construction", true, true) -MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { +MachinePostDominatorTree::MachinePostDominatorTree() + : MachineFunctionPass(ID), PDT(nullptr) { initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); - DT = new PostDomTreeBase(); } -FunctionPass * -MachinePostDominatorTree::createMachinePostDominatorTreePass() { +FunctionPass *MachinePostDominatorTree::createMachinePostDominatorTreePass() { return new MachinePostDominatorTree(); } -bool -MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { - DT->recalculate(F); +bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + PDT = std::make_unique(); + PDT->recalculate(F); return false; } -MachinePostDominatorTree::~MachinePostDominatorTree() { - delete DT; -} - -void -MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { +void MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } -void -MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const { - DT->print(OS); +MachineBasicBlock *MachinePostDominatorTree::findNearestCommonDominator( + ArrayRef Blocks) const { + assert(!Blocks.empty()); + + MachineBasicBlock *NCD = Blocks.front(); + for (MachineBasicBlock *BB : Blocks.drop_front()) { + NCD = PDT->findNearestCommonDominator(NCD, BB); + + // Stop when the root is reached. + if (PDT->isVirtualRoot(PDT->getNode(NCD))) + return nullptr; + } + + return NCD; +} + +void MachinePostDominatorTree::verifyAnalysis() const { + if (PDT && VerifyMachineDomInfo) + if (!PDT->verify(PostDomTreeT::VerificationLevel::Basic)) { + errs() << "MachinePostDominatorTree verification failed\n"; + + abort(); + } +} + +void MachinePostDominatorTree::print(llvm::raw_ostream &OS, + const Module *M) const { + PDT->print(OS); } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f0fd0405d69d..b88d4ea462ef 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -144,7 +144,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) { } unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); + unsigned Reg = Register::index2VirtReg(getNumVirtRegs()); VRegInfo.grow(Reg); RegAllocHints.grow(Reg); insertVRegByName(Name, Reg); @@ -202,7 +202,7 @@ void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); } void MachineRegisterInfo::clearVirtRegs() { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (!VRegInfo[Reg].second) continue; verifyUseList(Reg); @@ -255,7 +255,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { void MachineRegisterInfo::verifyUseLists() const { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) - verifyUseList(TargetRegisterInfo::index2VirtReg(i)); + verifyUseList(Register::index2VirtReg(i)); for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i) verifyUseList(i); #endif @@ -386,7 +386,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { MachineOperand &O = *I; ++I; - if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (Register::isPhysicalRegister(ToReg)) { O.substPhysReg(ToReg, *TRI); } else { O.setReg(ToReg); @@ -498,7 +498,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const { // Lane masks are only defined for vregs. - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const TargetRegisterClass &TRC = *getRegClass(Reg); return TRC.getLaneMask(); } @@ -517,7 +517,7 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { } bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); const TargetRegisterInfo *TRI = getTargetRegisterInfo(); if (TRI->isConstantPhysReg(PhysReg)) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index e8b42047b49f..258a5f9e0482 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -95,7 +95,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, while (I != BB->end() && I->isPHI()) { bool Same = true; for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { - unsigned SrcReg = I->getOperand(i).getReg(); + Register SrcReg = I->getOperand(i).getReg(); MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB(); if (AVals[SrcBB] != SrcReg) { Same = false; @@ -118,7 +118,7 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode, const TargetRegisterClass *RC, MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); } @@ -292,7 +292,7 @@ public: MachineSSAUpdater *Updater) { // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, - BB, BB->getFirstTerminator(), + BB, BB->getFirstNonPHI(), Updater->VRC, Updater->MRI, Updater->TII); return NewDef->getOperand(0).getReg(); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index ae1170ad1be6..f0721ea3b76d 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -82,6 +82,10 @@ cl::opt DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); +cl::opt VerifyScheduling( + "verify-misched", cl::Hidden, + cl::desc("Verify machine instrs before and after machine scheduling")); + } // end namespace llvm #ifndef NDEBUG @@ -122,9 +126,6 @@ static cl::opt EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true)); -static cl::opt VerifyScheduling("verify-misched", cl::Hidden, - cl::desc("Verify machine instrs before and after machine scheduling")); - // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -198,6 +199,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID; INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) @@ -210,7 +212,7 @@ MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(MachineDominatorsID); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -234,7 +236,7 @@ PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(MachineDominatorsID); + AU.addRequired(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -933,8 +935,8 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { if (TrackLaneMasks && !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; // Ignore re-defs. @@ -985,7 +987,7 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, "ShouldTrackLaneMasks requires ShouldTrackPressure"); } -// Setup the register pressure trackers for the top scheduled top and bottom +// Setup the register pressure trackers for the top scheduled and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { VRegUses.clear(); @@ -1095,7 +1097,7 @@ void ScheduleDAGMILive::updatePressureDiffs( for (const RegisterMaskPair &P : LiveUses) { unsigned Reg = P.RegUnit; /// FIXME: Currently assuming single-use physregs. - if (!TRI->isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; if (ShouldTrackLaneMasks) { @@ -1319,8 +1321,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // Visit each live out vreg def to find def/use pairs that cross iterations. for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { unsigned Reg = P.RegUnit; - if (!TRI->isVirtualRegister(Reg)) - continue; + if (!Register::isVirtualRegister(Reg)) + continue; const LiveInterval &LI = LIS->getInterval(Reg); const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); if (!DefVNI) @@ -1538,14 +1540,14 @@ namespace llvm { std::unique_ptr createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return EnableMemOpCluster ? llvm::make_unique(TII, TRI) + return EnableMemOpCluster ? std::make_unique(TII, TRI) : nullptr; } std::unique_ptr createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return EnableMemOpCluster ? llvm::make_unique(TII, TRI) + return EnableMemOpCluster ? std::make_unique(TII, TRI) : nullptr; } @@ -1657,7 +1659,7 @@ namespace llvm { std::unique_ptr createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return llvm::make_unique(TII, TRI); + return std::make_unique(TII, TRI); } } // end namespace llvm @@ -1687,13 +1689,13 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check for pure vreg copies. const MachineOperand &SrcOp = Copy->getOperand(1); - unsigned SrcReg = SrcOp.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) + Register SrcReg = SrcOp.getReg(); + if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) return; const MachineOperand &DstOp = Copy->getOperand(0); - unsigned DstReg = DstOp.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead()) + Register DstReg = DstOp.getReg(); + if (!Register::isVirtualRegister(DstReg) || DstOp.isDead()) return; // Check if either the dest or source is local. If it's live across a back @@ -2914,14 +2916,12 @@ int biasPhysReg(const SUnit *SU, bool isTop) { unsigned UnscheduledOper = isTop ? 0 : 1; // If we have already scheduled the physreg produce/consumer, immediately // schedule the copy. - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(ScheduledOper).getReg())) + if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg())) return 1; // If the physreg is at the boundary, defer it. Otherwise schedule it // immediately to free the dependent. We can hoist the copy later. bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(UnscheduledOper).getReg())) + if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg())) return AtBoundary ? -1 : 1; } @@ -2931,7 +2931,7 @@ int biasPhysReg(const SUnit *SU, bool isTop) { // physical registers. bool DoBias = true; for (const MachineOperand &Op : MI->defs()) { - if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) { + if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) { DoBias = false; break; } @@ -3259,7 +3259,8 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { // Find already scheduled copies with a single physreg dependence and move // them just above the scheduled instruction. for (SDep &Dep : Deps) { - if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg())) + if (Dep.getKind() != SDep::Data || + !Register::isPhysicalRegister(Dep.getReg())) continue; SUnit *DepSU = Dep.getSUnit(); if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) @@ -3298,7 +3299,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// default scheduler if the target does not set a default. ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { ScheduleDAGMILive *DAG = - new ScheduleDAGMILive(C, llvm::make_unique(C)); + new ScheduleDAGMILive(C, std::make_unique(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate @@ -3450,7 +3451,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { } ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, llvm::make_unique(C), + return new ScheduleDAGMI(C, std::make_unique(C), /*RemoveKillFlags=*/true); } @@ -3561,10 +3562,10 @@ public: } // end anonymous namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, llvm::make_unique(true)); + return new ScheduleDAGMILive(C, std::make_unique(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, llvm::make_unique(false)); + return new ScheduleDAGMILive(C, std::make_unique(false)); } static MachineSchedRegistry ILPMaxRegistry( @@ -3658,7 +3659,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); return new ScheduleDAGMILive( - C, llvm::make_unique(Alternate, TopDown)); + C, std::make_unique(Alternate, TopDown)); } static MachineSchedRegistry ShufflerRegistry( diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 41db2c88ce50..27a2e7023f22 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -36,8 +36,9 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -114,15 +115,12 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); @@ -195,11 +193,10 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, if (!MI.isCopy()) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - !TargetRegisterInfo::isVirtualRegister(DstReg) || - !MRI->hasOneNonDBGUse(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(SrcReg) || + !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg)) return false; const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); @@ -233,8 +230,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "Only makes sense for vregs"); + assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs"); // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) @@ -416,13 +412,13 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // We don't move live definitions of physical registers, // so sinking their uses won't enable any opportunities. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // If this instruction is the only user of a virtual register, @@ -615,10 +611,10 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -817,8 +813,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (Reg == 0 || !Register::isPhysicalRegister(Reg)) + continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; } @@ -958,8 +955,9 @@ private: /// Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; - /// Track DBG_VALUEs of (unmodified) register units. - DenseMap> SeenDbgInstrs; + /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an + /// entry in this map for each unit it touches. + DenseMap> SeenDbgInstrs; /// Sink Copy instructions unused in the same block close to their uses in /// successors. @@ -1030,7 +1028,7 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, const TargetRegisterInfo *TRI) { for (auto U : UsedOpsInCopy) { MachineOperand &MO = MI->getOperand(U); - unsigned SrcReg = MO.getReg(); + Register SrcReg = MO.getReg(); if (!UsedRegUnits.available(SrcReg)) { MachineBasicBlock::iterator NI = std::next(MI->getIterator()); for (MachineInstr &UI : make_range(NI, CurBB.end())) { @@ -1053,7 +1051,7 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S) SuccBB->removeLiveIn(*S); for (auto U : UsedOpsInCopy) { - unsigned Reg = MI->getOperand(U).getReg(); + Register Reg = MI->getOperand(U).getReg(); if (!SuccBB->isLiveIn(Reg)) SuccBB->addLiveIn(Reg); } @@ -1069,7 +1067,7 @@ static bool hasRegisterDependency(MachineInstr *MI, MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -1094,6 +1092,14 @@ static bool hasRegisterDependency(MachineInstr *MI, return HasRegDependency; } +static SmallSet getRegUnits(unsigned Reg, + const TargetRegisterInfo *TRI) { + SmallSet RegUnits; + for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI) + RegUnits.insert(*RI); + return RegUnits; +} + bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, @@ -1130,15 +1136,17 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // for DBG_VALUEs later, record them when they're encountered. if (MI->isDebugValue()) { auto &MO = MI->getOperand(0); - if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) { + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { // Bail if we can already tell the sink would be rejected, rather // than needlessly accumulating lots of DBG_VALUEs. if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) continue; - // Record debug use of this register. - SeenDbgInstrs[MO.getReg()].push_back(MI); + // Record debug use of each reg unit. + SmallSet Units = getRegUnits(MO.getReg(), TRI); + for (unsigned Reg : Units) + SeenDbgInstrs[Reg].push_back(MI); } continue; } @@ -1177,15 +1185,22 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && "Unexpected predecessor"); - // Collect DBG_VALUEs that must sink with this copy. + // Collect DBG_VALUEs that must sink with this copy. We've previously + // recorded which reg units that DBG_VALUEs read, if this instruction + // writes any of those units then the corresponding DBG_VALUEs must sink. + SetVector DbgValsToSinkSet; SmallVector DbgValsToSink; for (auto &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned reg = MO.getReg(); - for (auto *MI : SeenDbgInstrs.lookup(reg)) - DbgValsToSink.push_back(MI); + + SmallSet Units = getRegUnits(MO.getReg(), TRI); + for (unsigned Reg : Units) + for (auto *MI : SeenDbgInstrs.lookup(Reg)) + DbgValsToSinkSet.insert(MI); } + DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(), + DbgValsToSinkSet.end()); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index f9505df4e7f4..66a3bc2f8cc4 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -634,7 +634,7 @@ struct DataDep { /// Create a DataDep from an SSA form virtual register. DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) : UseOp(UseOp) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + assert(Register::isVirtualRegister(VirtReg)); MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); assert(!DefI.atEnd() && "Register has no defs"); DefMI = DefI->getParent(); @@ -660,10 +660,10 @@ static bool getDataDeps(const MachineInstr &UseMI, const MachineOperand &MO = *I; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { HasPhysRegs = true; continue; } @@ -687,7 +687,7 @@ static void getPHIDeps(const MachineInstr &UseMI, assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI"); for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) { if (UseMI.getOperand(i + 1).getMBB() == Pred) { - unsigned Reg = UseMI.getOperand(i).getReg(); + Register Reg = UseMI.getOperand(i).getReg(); Deps.push_back(DataDep(MRI, Reg, i)); return; } @@ -708,8 +708,8 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, const MachineOperand &MO = *MI; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; // Track live defs and kills for updating RegUnits. if (MO.isDef()) { @@ -765,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { assert(TBI.HasValidInstrHeights && "Missing height info"); unsigned MaxLen = 0; for (const LiveInReg &LIR : TBI.LiveIns) { - if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + if (!Register::isVirtualRegister(LIR.Reg)) continue; const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. @@ -902,8 +902,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, const MachineOperand &MO = *MOI; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (MO.readsReg()) ReadOps.push_back(MI.getOperandNo(MOI)); @@ -930,7 +930,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, // Now we know the height of MI. Update any regunits read. for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { - unsigned Reg = MI.getOperand(ReadOps[i]).getReg(); + Register Reg = MI.getOperand(ReadOps[i]).getReg(); for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; // Set the height to the highest reader of the unit. @@ -979,7 +979,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); unsigned Reg = DefMI->getOperand(DefOp).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const MachineBasicBlock *DefMBB = DefMI->getParent(); // Reg is live-in to all blocks in Trace that follow DefMBB. @@ -1026,7 +1026,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { if (MBB) { TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; for (LiveInReg &LI : TBI.LiveIns) { - if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + if (Register::isVirtualRegister(LI.Reg)) { // For virtual registers, the def latency is included. unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; if (Height < LI.Height) diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 0ad792ac62cf..969743edca52 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -22,7 +22,6 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -37,6 +36,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -122,7 +122,7 @@ namespace { // Add Reg and any sub-registers to RV void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) RV.push_back(*SubRegs); } @@ -159,7 +159,7 @@ namespace { // Add register to vregsPassed if it belongs there. Return true if // anything changed. bool addPassed(unsigned Reg) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) return false; @@ -178,7 +178,7 @@ namespace { // Add register to vregsRequired if it belongs there. Return true if // anything changed. bool addRequired(unsigned Reg) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (regsLiveOut.count(Reg)) return false; @@ -552,7 +552,7 @@ void MachineVerifier::report_context_vreg(unsigned VReg) const { } void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const { - if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) { + if (Register::isVirtualRegister(VRegOrUnit)) { report_context_vreg(VRegOrUnit); } else { errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; @@ -797,7 +797,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { regsLive.clear(); if (MRI->tracksLiveness()) { for (const auto &LI : MBB->liveins()) { - if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) { + if (!Register::isPhysicalRegister(LI.PhysReg)) { report("MBB live-in list contains non-physical register", MBB); continue; } @@ -957,7 +957,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { // Generic opcodes must not have physical register operands. for (unsigned I = 0; I < MI->getNumOperands(); ++I) { const MachineOperand *MO = &MI->getOperand(I); - if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg())) + if (MO->isReg() && Register::isPhysicalRegister(MO->getReg())) report("Generic instruction cannot have physical register", MO, I); } @@ -1368,7 +1368,108 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } } + switch (IntrID) { + case Intrinsic::memcpy: + if (MI->getNumOperands() != 5) + report("Expected memcpy intrinsic to have 5 operands", MI); + break; + case Intrinsic::memmove: + if (MI->getNumOperands() != 5) + report("Expected memmove intrinsic to have 5 operands", MI); + break; + case Intrinsic::memset: + if (MI->getNumOperands() != 5) + report("Expected memset intrinsic to have 5 operands", MI); + break; + } + break; + } + case TargetOpcode::G_SEXT_INREG: { + if (!MI->getOperand(2).isImm()) { + report("G_SEXT_INREG expects an immediate operand #2", MI); + break; + } + + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + verifyVectorElementMatch(DstTy, SrcTy, MI); + + int64_t Imm = MI->getOperand(2).getImm(); + if (Imm <= 0) + report("G_SEXT_INREG size must be >= 1", MI); + if (Imm >= SrcTy.getScalarSizeInBits()) + report("G_SEXT_INREG size must be less than source bit width", MI); + break; + } + case TargetOpcode::G_SHUFFLE_VECTOR: { + const MachineOperand &MaskOp = MI->getOperand(3); + if (!MaskOp.isShuffleMask()) { + report("Incorrect mask operand type for G_SHUFFLE_VECTOR", MI); + break; + } + + const Constant *Mask = MaskOp.getShuffleMask(); + auto *MaskVT = dyn_cast(Mask->getType()); + if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) { + report("Invalid shufflemask constant type", MI); + break; + } + + if (!Mask->getAggregateElement(0u)) { + report("Invalid shufflemask constant type", MI); + break; + } + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg()); + LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg()); + + if (Src0Ty != Src1Ty) + report("Source operands must be the same type", MI); + + if (Src0Ty.getScalarType() != DstTy.getScalarType()) + report("G_SHUFFLE_VECTOR cannot change element type", MI); + + // Don't check that all operands are vector because scalars are used in + // place of 1 element vectors. + int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1; + int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + + SmallVector MaskIdxes; + ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes); + + if (static_cast(MaskIdxes.size()) != DstNumElts) + report("Wrong result type for shufflemask", MI); + + for (int Idx : MaskIdxes) { + if (Idx < 0) + continue; + + if (Idx >= 2 * SrcNumElts) + report("Out of bounds shuffle index", MI); + } + + break; + } + case TargetOpcode::G_DYN_STACKALLOC: { + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &AllocOp = MI->getOperand(1); + const MachineOperand &AlignOp = MI->getOperand(2); + + if (!DstOp.isReg() || !MRI->getType(DstOp.getReg()).isPointer()) { + report("dst operand 0 must be a pointer type", MI); + break; + } + + if (!AllocOp.isReg() || !MRI->getType(AllocOp.getReg()).isScalar()) { + report("src operand 1 must be a scalar reg type", MI); + break; + } + + if (!AlignOp.isImm()) { + report("src operand 2 must be an immediate type", MI); + break; + } break; } default: @@ -1525,11 +1626,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Operand should be tied", MO, MONum); else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) report("Tied def doesn't match MCInstrDesc", MO, MONum); - else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) { + else if (Register::isPhysicalRegister(MO->getReg())) { const MachineOperand &MOTied = MI->getOperand(TiedTo); if (!MOTied.isReg()) report("Tied counterpart must be a register", &MOTied, TiedTo); - else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) && + else if (Register::isPhysicalRegister(MOTied.getReg()) && MO->getReg() != MOTied.getReg()) report("Tied physical registers must match.", &MOTied, TiedTo); } @@ -1543,7 +1644,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { switch (MO->getType()) { case MachineOperand::MO_Register: { - const unsigned Reg = MO->getReg(); + const Register Reg = MO->getReg(); if (!Reg) return; if (MRI->tracksLiveness() && !MI->isDebugValue()) @@ -1581,7 +1682,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check register classes. unsigned SubIdx = MO->getSubReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (SubIdx) { report("Illegal subregister index for physical register", MO, MONum); return; @@ -1817,7 +1918,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, if (MO->isDead()) { LiveQueryResult LRQ = LR.Query(DefIdx); if (!LRQ.isDeadDef()) { - assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) && + assert(Register::isVirtualRegister(VRegOrUnit) && "Expecting a virtual register."); // A dead subreg def only tells us that the specific subreg is dead. There // could be other non-dead defs of other subregs, or we could have other @@ -1845,8 +1946,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsKilled, Reg); // Check that LiveVars knows this kill. - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && - MO->isKill()) { + if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) { LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); if (!is_contained(VI.Kills, MI)) report("Kill missing from LiveVariables", MO, MONum); @@ -1856,7 +1956,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts && !LiveInts->isNotInMIMap(*MI)) { SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI); // Check the cached regunit intervals. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { if (MRI->isReservedRegUnit(*Units)) continue; @@ -1865,7 +1965,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } } - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (LiveInts->hasInterval(Reg)) { // This is a virtual register interval. const LiveInterval &LI = LiveInts->getInterval(Reg); @@ -1900,7 +2000,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Use of a dead register. if (!regsLive.count(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // Reserved registers may be used even when 'dead'. bool Bad = !isReserved(Reg); // We are fine if just any subregister has a defined value. @@ -1922,7 +2022,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!MOP.isReg() || !MOP.isImplicit()) continue; - if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg())) + if (!Register::isPhysicalRegister(MOP.getReg())) continue; for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); @@ -1960,7 +2060,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsDefined, Reg); // Verify SSA form. - if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + if (MRI->isSSA() && Register::isVirtualRegister(Reg) && std::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); @@ -1969,7 +2069,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI); DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg); @@ -2007,7 +2107,7 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { while (!regMasks.empty()) { const uint32_t *Mask = regMasks.pop_back_val(); for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) - if (TargetRegisterInfo::isPhysicalRegister(*I) && + if (Register::isPhysicalRegister(*I) && MachineOperand::clobbersPhysReg(Mask, *I)) regsDead.push_back(*I); } @@ -2119,8 +2219,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() || MODef.isEarlyClobber() || MODef.isDebug()) report("Unexpected flag on PHI operand", &MODef, 0); - unsigned DefReg = MODef.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = MODef.getReg(); + if (!Register::isVirtualRegister(DefReg)) report("Expected first PHI operand to be a virtual register", &MODef, 0); for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { @@ -2212,7 +2312,7 @@ void MachineVerifier::visitMachineFunctionAfter() { void MachineVerifier::verifyLiveVariables() { assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; @@ -2238,7 +2338,7 @@ void MachineVerifier::verifyLiveVariables() { void MachineVerifier::verifyLiveIntervals() { assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); // Spilling and splitting may leave unused registers around. Skip them. if (MRI->reg_nodbg_empty(Reg)) @@ -2315,11 +2415,11 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || !MOI->isDef()) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (MOI->getReg() != Reg) continue; } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + if (!Register::isPhysicalRegister(MOI->getReg()) || !TRI->hasRegUnit(MOI->getReg(), Reg)) continue; } @@ -2402,7 +2502,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, return; // RegUnit intervals are allowed dead phis. - if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() && + if (!Register::isVirtualRegister(Reg) && VNI->isPHIDef() && S.start == VNI->def && S.end == VNI->def.getDeadSlot()) return; @@ -2446,7 +2546,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // The following checks only apply to virtual registers. Physreg liveness // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; @@ -2519,8 +2619,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, while (true) { assert(LiveInts->isLiveInToMBB(LR, &*MFI)); // We don't know how to track physregs into a landing pad. - if (!TargetRegisterInfo::isVirtualRegister(Reg) && - MFI->isEHPad()) { + if (!Register::isVirtualRegister(Reg) && MFI->isEHPad()) { if (&*MFI == EndMBB) break; ++MFI; @@ -2580,7 +2679,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { unsigned Reg = LI.reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); LaneBitmask Mask; diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 2db1e86905a4..d21eae222af0 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -176,7 +176,7 @@ std::unique_ptr llvm::createMacroFusionDAGMutation( ShouldSchedulePredTy shouldScheduleAdjacent) { if(EnableMacroFusion) - return llvm::make_unique(shouldScheduleAdjacent, true); + return std::make_unique(shouldScheduleAdjacent, true); return nullptr; } @@ -184,6 +184,6 @@ std::unique_ptr llvm::createBranchMacroFusionDAGMutation( ShouldSchedulePredTy shouldScheduleAdjacent) { if(EnableMacroFusion) - return llvm::make_unique(shouldScheduleAdjacent, false); + return std::make_unique(shouldScheduleAdjacent, false); return nullptr; } diff --git a/lib/CodeGen/ModuloSchedule.cpp b/lib/CodeGen/ModuloSchedule.cpp new file mode 100644 index 000000000000..7ce3c5861801 --- /dev/null +++ b/lib/CodeGen/ModuloSchedule.cpp @@ -0,0 +1,2022 @@ +//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ModuloSchedule.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "pipeliner" +using namespace llvm; + +void ModuloSchedule::print(raw_ostream &OS) { + for (MachineInstr *MI : ScheduledInstrs) + OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI; +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleExpander implementation +//===----------------------------------------------------------------------===// + +/// Return the register values for the operands of a Phi instruction. +/// This function assume the instruction is a Phi. +static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, + unsigned &InitVal, unsigned &LoopVal) { + assert(Phi.isPHI() && "Expecting a Phi."); + + InitVal = 0; + LoopVal = 0; + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() != Loop) + InitVal = Phi.getOperand(i).getReg(); + else + LoopVal = Phi.getOperand(i).getReg(); + + assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); +} + +/// Return the Phi register value that comes from the incoming block. +static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() != LoopBB) + return Phi.getOperand(i).getReg(); + return 0; +} + +/// Return the Phi register value that comes the loop block. +static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() == LoopBB) + return Phi.getOperand(i).getReg(); + return 0; +} + +void ModuloScheduleExpander::expand() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = *BB->pred_begin(); + if (Preheader == BB) + Preheader = *std::next(BB->pred_begin()); + + // Iterate over the definitions in each instruction, and compute the + // stage difference for each use. Keep the maximum value. + for (MachineInstr *MI : Schedule.getInstructions()) { + int DefStage = Schedule.getStage(MI); + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || !Op.isDef()) + continue; + + Register Reg = Op.getReg(); + unsigned MaxDiff = 0; + bool PhiIsSwapped = false; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), + EI = MRI.use_end(); + UI != EI; ++UI) { + MachineOperand &UseOp = *UI; + MachineInstr *UseMI = UseOp.getParent(); + int UseStage = Schedule.getStage(UseMI); + unsigned Diff = 0; + if (UseStage != -1 && UseStage >= DefStage) + Diff = UseStage - DefStage; + if (MI->isPHI()) { + if (isLoopCarried(*MI)) + ++Diff; + else + PhiIsSwapped = true; + } + MaxDiff = std::max(Diff, MaxDiff); + } + RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); + } + } + + generatePipelinedLoop(); +} + +void ModuloScheduleExpander::generatePipelinedLoop() { + LoopInfo = TII->analyzeLoopForPipelining(BB); + assert(LoopInfo && "Must be able to analyze loop!"); + + // Create a new basic block for the kernel and add it to the CFG. + MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + + unsigned MaxStageCount = Schedule.getNumStages() - 1; + + // Remember the registers that are used in different stages. The index is + // the iteration, or stage, that the instruction is scheduled in. This is + // a map between register names in the original block and the names created + // in each stage of the pipelined loop. + ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; + InstrMapTy InstrMap; + + SmallVector PrologBBs; + + // Generate the prolog instructions that set up the pipeline. + generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs); + MF.insert(BB->getIterator(), KernelBB); + + // Rearrange the instructions to generate the new, pipelined loop, + // and update register names as needed. + for (MachineInstr *CI : Schedule.getInstructions()) { + if (CI->isPHI()) + continue; + unsigned StageNum = Schedule.getStage(CI); + MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum); + updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap); + KernelBB->push_back(NewMI); + InstrMap[NewMI] = CI; + } + + // Copy any terminator instructions to the new kernel, and update + // names as needed. + for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), + E = BB->instr_end(); + I != E; ++I) { + MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + updateInstruction(NewMI, false, MaxStageCount, 0, VRMap); + KernelBB->push_back(NewMI); + InstrMap[NewMI] = &*I; + } + + NewKernel = KernelBB; + KernelBB->transferSuccessors(BB); + KernelBB->replaceSuccessor(BB, KernelBB); + + generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, + InstrMap, MaxStageCount, MaxStageCount, false); + generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, + MaxStageCount, MaxStageCount, false); + + LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); + + SmallVector EpilogBBs; + // Generate the epilog instructions to complete the pipeline. + generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs); + + // We need this step because the register allocation doesn't handle some + // situations well, so we insert copies to help out. + splitLifetimes(KernelBB, EpilogBBs); + + // Remove dead instructions due to loop induction variables. + removeDeadInstructions(KernelBB, EpilogBBs); + + // Add branches between prolog and epilog blocks. + addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); + + delete[] VRMap; +} + +void ModuloScheduleExpander::cleanup() { + // Remove the original loop since it's no longer referenced. + for (auto &I : *BB) + LIS.RemoveMachineInstrFromMaps(I); + BB->clear(); + BB->eraseFromParent(); +} + +/// Generate the pipeline prolog code. +void ModuloScheduleExpander::generateProlog(unsigned LastStage, + MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, + MBBVectorTy &PrologBBs) { + MachineBasicBlock *PredBB = Preheader; + InstrMapTy InstrMap; + + // Generate a basic block for each stage, not including the last stage, + // which will be generated in the kernel. Each basic block may contain + // instructions from multiple stages/iterations. + for (unsigned i = 0; i < LastStage; ++i) { + // Create and insert the prolog basic block prior to the original loop + // basic block. The original loop is removed later. + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + PrologBBs.push_back(NewBB); + MF.insert(BB->getIterator(), NewBB); + NewBB->transferSuccessors(PredBB); + PredBB->addSuccessor(NewBB); + PredBB = NewBB; + + // Generate instructions for each appropriate stage. Process instructions + // in original program order. + for (int StageNum = i; StageNum >= 0; --StageNum) { + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstTerminator(); + BBI != BBE; ++BBI) { + if (Schedule.getStage(&*BBI) == StageNum) { + if (BBI->isPHI()) + continue; + MachineInstr *NewMI = + cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum); + updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap); + NewBB->push_back(NewMI); + InstrMap[NewMI] = &*BBI; + } + } + } + rewritePhiValues(NewBB, i, VRMap, InstrMap); + LLVM_DEBUG({ + dbgs() << "prolog:\n"; + NewBB->dump(); + }); + } + + PredBB->replaceSuccessor(BB, KernelBB); + + // Check if we need to remove the branch from the preheader to the original + // loop, and replace it with a branch to the new loop. + unsigned numBranches = TII->removeBranch(*Preheader); + if (numBranches) { + SmallVector Cond; + TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc()); + } +} + +/// Generate the pipeline epilog code. The epilog code finishes the iterations +/// that were started in either the prolog or the kernel. We create a basic +/// block for each stage that needs to complete. +void ModuloScheduleExpander::generateEpilog(unsigned LastStage, + MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, + MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs) { + // We need to change the branch from the kernel to the first epilog block, so + // this call to analyze branch uses the kernel rather than the original BB. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); + assert(!checkBranch && "generateEpilog must be able to analyze the branch"); + if (checkBranch) + return; + + MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); + if (*LoopExitI == KernelBB) + ++LoopExitI; + assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); + MachineBasicBlock *LoopExitBB = *LoopExitI; + + MachineBasicBlock *PredBB = KernelBB; + MachineBasicBlock *EpilogStart = LoopExitBB; + InstrMapTy InstrMap; + + // Generate a basic block for each stage, not including the last stage, + // which was generated for the kernel. Each basic block may contain + // instructions from multiple stages/iterations. + int EpilogStage = LastStage + 1; + for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); + EpilogBBs.push_back(NewBB); + MF.insert(BB->getIterator(), NewBB); + + PredBB->replaceSuccessor(LoopExitBB, NewBB); + NewBB->addSuccessor(LoopExitBB); + + if (EpilogStart == LoopExitBB) + EpilogStart = NewBB; + + // Add instructions to the epilog depending on the current block. + // Process instructions in original program order. + for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { + for (auto &BBI : *BB) { + if (BBI.isPHI()) + continue; + MachineInstr *In = &BBI; + if ((unsigned)Schedule.getStage(In) == StageNum) { + // Instructions with memoperands in the epilog are updated with + // conservative values. + MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); + updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap); + NewBB->push_back(NewMI); + InstrMap[NewMI] = In; + } + } + } + generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, + InstrMap, LastStage, EpilogStage, i == 1); + generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, + LastStage, EpilogStage, i == 1); + PredBB = NewBB; + + LLVM_DEBUG({ + dbgs() << "epilog:\n"; + NewBB->dump(); + }); + } + + // Fix any Phi nodes in the loop exit block. + LoopExitBB->replacePhiUsesWith(BB, PredBB); + + // Create a branch to the new epilog from the kernel. + // Remove the original branch and add a new branch to the epilog. + TII->removeBranch(*KernelBB); + TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); + // Add a branch to the loop exit. + if (EpilogBBs.size() > 0) { + MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); + SmallVector Cond1; + TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); + } +} + +/// Replace all uses of FromReg that appear outside the specified +/// basic block with ToReg. +static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, + MachineBasicBlock *MBB, + MachineRegisterInfo &MRI, + LiveIntervals &LIS) { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), + E = MRI.use_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + if (O.getParent()->getParent() != MBB) + O.setReg(ToReg); + } + if (!LIS.hasInterval(ToReg)) + LIS.createEmptyInterval(ToReg); +} + +/// Return true if the register has a use that occurs outside the +/// specified loop. +static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, + MachineRegisterInfo &MRI) { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), + E = MRI.use_end(); + I != E; ++I) + if (I->getParent()->getParent() != BB) + return true; + return false; +} + +/// Generate Phis for the specific block in the generated pipelined code. +/// This function looks at the Phis from the original code to guide the +/// creation of new Phis. +void ModuloScheduleExpander::generateExistingPhis( + MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + // Compute the stage number for the initial value of the Phi, which + // comes from the prolog. The prolog to use depends on to which kernel/ + // epilog that we're adding the Phi. + unsigned PrologStage = 0; + unsigned PrevStage = 0; + bool InKernel = (LastStageNum == CurStageNum); + if (InKernel) { + PrologStage = LastStageNum - 1; + PrevStage = CurStageNum; + } else { + PrologStage = LastStageNum - (CurStageNum - LastStageNum); + PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; + } + + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstNonPHI(); + BBI != BBE; ++BBI) { + Register Def = BBI->getOperand(0).getReg(); + + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(*BBI, BB, InitVal, LoopVal); + + unsigned PhiOp1 = 0; + // The Phi value from the loop body typically is defined in the loop, but + // not always. So, we need to check if the value is defined in the loop. + unsigned PhiOp2 = LoopVal; + if (VRMap[LastStageNum].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum][LoopVal]; + + int StageScheduled = Schedule.getStage(&*BBI); + int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal)); + unsigned NumStages = getStagesForReg(Def, CurStageNum); + if (NumStages == 0) { + // We don't need to generate a Phi anymore, but we need to rename any uses + // of the Phi value. + unsigned NewReg = VRMap[PrevStage][LoopVal]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def, + InitVal, NewReg); + if (VRMap[CurStageNum].count(LoopVal)) + VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; + } + // Adjust the number of Phis needed depending on the number of prologs left, + // and the distance from where the Phi is first scheduled. The number of + // Phis cannot exceed the number of prolog stages. Each stage can + // potentially define two values. + unsigned MaxPhis = PrologStage + 2; + if (!InKernel && (int)PrologStage <= LoopValStage) + MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); + unsigned NumPhis = std::min(NumStages, MaxPhis); + + unsigned NewReg = 0; + unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; + // In the epilog, we may need to look back one stage to get the correct + // Phi name because the epilog and prolog blocks execute the same stage. + // The correct name is from the previous block only when the Phi has + // been completely scheduled prior to the epilog, and Phi value is not + // needed in multiple stages. + int StageDiff = 0; + if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && + NumPhis == 1) + StageDiff = 1; + // Adjust the computations below when the phi and the loop definition + // are scheduled in different stages. + if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) + StageDiff = StageScheduled - LoopValStage; + for (unsigned np = 0; np < NumPhis; ++np) { + // If the Phi hasn't been scheduled, then use the initial Phi operand + // value. Otherwise, use the scheduled version of the instruction. This + // is a little complicated when a Phi references another Phi. + if (np > PrologStage || StageScheduled >= (int)LastStageNum) + PhiOp1 = InitVal; + // Check if the Phi has already been scheduled in a prolog stage. + else if (PrologStage >= AccessStage + StageDiff + np && + VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) + PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; + // Check if the Phi has already been scheduled, but the loop instruction + // is either another Phi, or doesn't occur in the loop. + else if (PrologStage >= AccessStage + StageDiff + np) { + // If the Phi references another Phi, we need to examine the other + // Phi to get the correct value. + PhiOp1 = LoopVal; + MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); + int Indirects = 1; + while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { + int PhiStage = Schedule.getStage(InstOp1); + if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) + PhiOp1 = getInitPhiReg(*InstOp1, BB); + else + PhiOp1 = getLoopPhiReg(*InstOp1, BB); + InstOp1 = MRI.getVRegDef(PhiOp1); + int PhiOpStage = Schedule.getStage(InstOp1); + int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); + if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && + VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { + PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; + break; + } + ++Indirects; + } + } else + PhiOp1 = InitVal; + // If this references a generated Phi in the kernel, get the Phi operand + // from the incoming block. + if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) + if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) + PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); + + MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); + bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); + // In the epilog, a map lookup is needed to get the value from the kernel, + // or previous epilog block. How is does this depends on if the + // instruction is scheduled in the previous block. + if (!InKernel) { + int StageDiffAdj = 0; + if (LoopValStage != -1 && StageScheduled > LoopValStage) + StageDiffAdj = StageScheduled - LoopValStage; + // Use the loop value defined in the kernel, unless the kernel + // contains the last definition of the Phi. + if (np == 0 && PrevStage == LastStageNum && + (StageScheduled != 0 || LoopValStage != 0) && + VRMap[PrevStage - StageDiffAdj].count(LoopVal)) + PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; + // Use the value defined by the Phi. We add one because we switch + // from looking at the loop value to the Phi definition. + else if (np > 0 && PrevStage == LastStageNum && + VRMap[PrevStage - np + 1].count(Def)) + PhiOp2 = VRMap[PrevStage - np + 1][Def]; + // Use the loop value defined in the kernel. + else if (static_cast(LoopValStage) > PrologStage + 1 && + VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) + PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; + // Use the value defined by the Phi, unless we're generating the first + // epilog and the Phi refers to a Phi in a different stage. + else if (VRMap[PrevStage - np].count(Def) && + (!LoopDefIsPhi || (PrevStage != LastStageNum) || + (LoopValStage == StageScheduled))) + PhiOp2 = VRMap[PrevStage - np][Def]; + } + + // Check if we can reuse an existing Phi. This occurs when a Phi + // references another Phi, and the other Phi is scheduled in an + // earlier stage. We can try to reuse an existing Phi up until the last + // stage of the current Phi. + if (LoopDefIsPhi) { + if (static_cast(PrologStage - np) >= StageScheduled) { + int LVNumStages = getStagesForPhi(LoopVal); + int StageDiff = (StageScheduled - LoopValStage); + LVNumStages -= StageDiff; + // Make sure the loop value Phi has been processed already. + if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { + NewReg = PhiOp2; + unsigned ReuseStage = CurStageNum; + if (isLoopCarried(*PhiInst)) + ReuseStage -= LVNumStages; + // Check if the Phi to reuse has been generated yet. If not, then + // there is nothing to reuse. + if (VRMap[ReuseStage - np].count(LoopVal)) { + NewReg = VRMap[ReuseStage - np][LoopVal]; + + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, + Def, NewReg); + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + PhiOp2 = NewReg; + if (VRMap[LastStageNum - np - 1].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; + + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + continue; + } + } + } + if (InKernel && StageDiff > 0 && + VRMap[CurStageNum - StageDiff - np].count(LoopVal)) + PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; + } + + const TargetRegisterClass *RC = MRI.getRegClass(Def); + NewReg = MRI.createVirtualRegister(RC); + + MachineInstrBuilder NewPhi = + BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + NewPhi.addReg(PhiOp1).addMBB(BB1); + NewPhi.addReg(PhiOp2).addMBB(BB2); + if (np == 0) + InstrMap[NewPhi] = &*BBI; + + // We define the Phis after creating the new pipelined code, so + // we need to rename the Phi values in scheduled instructions. + + unsigned PrevReg = 0; + if (InKernel && VRMap[PrevStage - np].count(LoopVal)) + PrevReg = VRMap[PrevStage - np][LoopVal]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, + NewReg, PrevReg); + // If the Phi has been scheduled, use the new name for rewriting. + if (VRMap[CurStageNum - np].count(Def)) { + unsigned R = VRMap[CurStageNum - np][Def]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R, + NewReg); + } + + // Check if we need to rename any uses that occurs after the loop. The + // register to replace depends on whether the Phi is scheduled in the + // epilog. + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + + // In the kernel, a dependent Phi uses the value from this Phi. + if (InKernel) + PhiOp2 = NewReg; + + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + } + + while (NumPhis++ < NumStages) { + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def, + NewReg, 0); + } + + // Check if we need to rename a Phi that has been eliminated due to + // scheduling. + if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) + replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); + } +} + +/// Generate Phis for the specified block in the generated pipelined code. +/// These are new Phis needed because the definition is scheduled after the +/// use in the pipelined sequence. +void ModuloScheduleExpander::generatePhis( + MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + // Compute the stage number that contains the initial Phi value, and + // the Phi from the previous stage. + unsigned PrologStage = 0; + unsigned PrevStage = 0; + unsigned StageDiff = CurStageNum - LastStageNum; + bool InKernel = (StageDiff == 0); + if (InKernel) { + PrologStage = LastStageNum - 1; + PrevStage = CurStageNum; + } else { + PrologStage = LastStageNum - StageDiff; + PrevStage = LastStageNum + StageDiff - 1; + } + + for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), + BBE = BB->instr_end(); + BBI != BBE; ++BBI) { + for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = BBI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || + !Register::isVirtualRegister(MO.getReg())) + continue; + + int StageScheduled = Schedule.getStage(&*BBI); + assert(StageScheduled != -1 && "Expecting scheduled instruction."); + Register Def = MO.getReg(); + unsigned NumPhis = getStagesForReg(Def, CurStageNum); + // An instruction scheduled in stage 0 and is used after the loop + // requires a phi in the epilog for the last definition from either + // the kernel or prolog. + if (!InKernel && NumPhis == 0 && StageScheduled == 0 && + hasUseAfterLoop(Def, BB, MRI)) + NumPhis = 1; + if (!InKernel && (unsigned)StageScheduled > PrologStage) + continue; + + unsigned PhiOp2 = VRMap[PrevStage][Def]; + if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) + if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) + PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + // The number of Phis can't exceed the number of prolog stages. The + // prolog stage number is zero based. + if (NumPhis > PrologStage + 1 - StageScheduled) + NumPhis = PrologStage + 1 - StageScheduled; + for (unsigned np = 0; np < NumPhis; ++np) { + unsigned PhiOp1 = VRMap[PrologStage][Def]; + if (np <= PrologStage) + PhiOp1 = VRMap[PrologStage - np][Def]; + if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { + if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) + PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); + if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) + PhiOp1 = getInitPhiReg(*InstOp1, NewBB); + } + if (!InKernel) + PhiOp2 = VRMap[PrevStage - np][Def]; + + const TargetRegisterClass *RC = MRI.getRegClass(Def); + Register NewReg = MRI.createVirtualRegister(RC); + + MachineInstrBuilder NewPhi = + BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + NewPhi.addReg(PhiOp1).addMBB(BB1); + NewPhi.addReg(PhiOp2).addMBB(BB2); + if (np == 0) + InstrMap[NewPhi] = &*BBI; + + // Rewrite uses and update the map. The actions depend upon whether + // we generating code for the kernel or epilog blocks. + if (InKernel) { + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1, + NewReg); + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2, + NewReg); + + PhiOp2 = NewReg; + VRMap[PrevStage - np - 1][Def] = NewReg; + } else { + VRMap[CurStageNum - np][Def] = NewReg; + if (np == NumPhis - 1) + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, + NewReg); + } + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + } + } + } +} + +/// Remove instructions that generate values with no uses. +/// Typically, these are induction variable operations that generate values +/// used in the loop itself. A dead instruction has a definition with +/// no uses, or uses that occur in the original loop only. +void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs) { + // For each epilog block, check that the value defined by each instruction + // is used. If not, delete it. + for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), + MBE = EpilogBBs.rend(); + MBB != MBE; ++MBB) + for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), + ME = (*MBB)->instr_rend(); + MI != ME;) { + // From DeadMachineInstructionElem. Don't delete inline assembly. + if (MI->isInlineAsm()) { + ++MI; + continue; + } + bool SawStore = false; + // Check if it's safe to remove the instruction due to side effects. + // We can, and want to, remove Phis here. + if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { + ++MI; + continue; + } + bool used = true; + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + Register reg = MOI->getReg(); + // Assume physical registers are used, unless they are marked dead. + if (Register::isPhysicalRegister(reg)) { + used = !MOI->isDead(); + if (used) + break; + continue; + } + unsigned realUses = 0; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), + EI = MRI.use_end(); + UI != EI; ++UI) { + // Check if there are any uses that occur only in the original + // loop. If so, that's not a real use. + if (UI->getParent()->getParent() != BB) { + realUses++; + used = true; + break; + } + } + if (realUses > 0) + break; + used = false; + } + if (!used) { + LIS.RemoveMachineInstrFromMaps(*MI); + MI++->eraseFromParent(); + continue; + } + ++MI; + } + // In the kernel block, check if we can remove a Phi that generates a value + // used in an instruction removed in the epilog block. + for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), + BBE = KernelBB->getFirstNonPHI(); + BBI != BBE;) { + MachineInstr *MI = &*BBI; + ++BBI; + Register reg = MI->getOperand(0).getReg(); + if (MRI.use_begin(reg) == MRI.use_end()) { + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + } +} + +/// For loop carried definitions, we split the lifetime of a virtual register +/// that has uses past the definition in the next iteration. A copy with a new +/// virtual register is inserted before the definition, which helps with +/// generating a better register assignment. +/// +/// v1 = phi(a, v2) v1 = phi(a, v2) +/// v2 = phi(b, v3) v2 = phi(b, v3) +/// v3 = .. v4 = copy v1 +/// .. = V1 v3 = .. +/// .. = v4 +void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (auto &PHI : KernelBB->phis()) { + Register Def = PHI.getOperand(0).getReg(); + // Check for any Phi definition that used as an operand of another Phi + // in the same block. + for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), + E = MRI.use_instr_end(); + I != E; ++I) { + if (I->isPHI() && I->getParent() == KernelBB) { + // Get the loop carried definition. + unsigned LCDef = getLoopPhiReg(PHI, KernelBB); + if (!LCDef) + continue; + MachineInstr *MI = MRI.getVRegDef(LCDef); + if (!MI || MI->getParent() != KernelBB || MI->isPHI()) + continue; + // Search through the rest of the block looking for uses of the Phi + // definition. If one occurs, then split the lifetime. + unsigned SplitReg = 0; + for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), + KernelBB->instr_end())) + if (BBJ.readsRegister(Def)) { + // We split the lifetime when we find the first use. + if (SplitReg == 0) { + SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); + BuildMI(*KernelBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), SplitReg) + .addReg(Def); + } + BBJ.substituteRegister(Def, SplitReg, 0, *TRI); + } + if (!SplitReg) + continue; + // Search through each of the epilog blocks for any uses to be renamed. + for (auto &Epilog : EpilogBBs) + for (auto &I : *Epilog) + if (I.readsRegister(Def)) + I.substituteRegister(Def, SplitReg, 0, *TRI); + break; + } + } + } +} + +/// Remove the incoming block from the Phis in a basic block. +static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { + for (MachineInstr &MI : *BB) { + if (!MI.isPHI()) + break; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) + if (MI.getOperand(i + 1).getMBB() == Incoming) { + MI.RemoveOperand(i + 1); + MI.RemoveOperand(i); + break; + } + } +} + +/// Create branches from each prolog basic block to the appropriate epilog +/// block. These edges are needed if the loop ends before reaching the +/// kernel. +void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, + MBBVectorTy &PrologBBs, + MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs, + ValueMapTy *VRMap) { + assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); + MachineBasicBlock *LastPro = KernelBB; + MachineBasicBlock *LastEpi = KernelBB; + + // Start from the blocks connected to the kernel and work "out" + // to the first prolog and the last epilog blocks. + SmallVector PrevInsts; + unsigned MaxIter = PrologBBs.size() - 1; + for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { + // Add branches to the prolog that go to the corresponding + // epilog, and the fall-thru prolog/kernel block. + MachineBasicBlock *Prolog = PrologBBs[j]; + MachineBasicBlock *Epilog = EpilogBBs[i]; + + SmallVector Cond; + Optional StaticallyGreater = + LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); + unsigned numAdded = 0; + if (!StaticallyGreater.hasValue()) { + Prolog->addSuccessor(Epilog); + numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); + } else if (*StaticallyGreater == false) { + Prolog->addSuccessor(Epilog); + Prolog->removeSuccessor(LastPro); + LastEpi->removeSuccessor(Epilog); + numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); + removePhis(Epilog, LastEpi); + // Remove the blocks that are no longer referenced. + if (LastPro != LastEpi) { + LastEpi->clear(); + LastEpi->eraseFromParent(); + } + if (LastPro == KernelBB) { + LoopInfo->disposed(); + NewKernel = nullptr; + } + LastPro->clear(); + LastPro->eraseFromParent(); + } else { + numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); + removePhis(Epilog, Prolog); + } + LastPro = Prolog; + LastEpi = Epilog; + for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), + E = Prolog->instr_rend(); + I != E && numAdded > 0; ++I, --numAdded) + updateInstruction(&*I, false, j, 0, VRMap); + } + + if (NewKernel) { + LoopInfo->setPreheader(PrologBBs[MaxIter]); + LoopInfo->adjustTripCount(-(MaxIter + 1)); + } +} + +/// Return true if we can compute the amount the instruction changes +/// during each iteration. Set Delta to the amount of the change. +bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const MachineOperand *BaseOp; + int64_t Offset; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + return false; + + if (!BaseOp->isReg()) + return false; + + Register BaseReg = BaseOp->getReg(); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Check if there is a Phi. If so, get the definition in the loop. + MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); + if (BaseDef && BaseDef->isPHI()) { + BaseReg = getLoopPhiReg(*BaseDef, MI.getParent()); + BaseDef = MRI.getVRegDef(BaseReg); + } + if (!BaseDef) + return false; + + int D = 0; + if (!TII->getIncrementValue(*BaseDef, D) && D >= 0) + return false; + + Delta = D; + return true; +} + +/// Update the memory operand with a new offset when the pipeliner +/// generates a new copy of the instruction that refers to a +/// different memory location. +void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI, + MachineInstr &OldMI, + unsigned Num) { + if (Num == 0) + return; + // If the instruction has memory operands, then adjust the offset + // when the instruction appears in different stages. + if (NewMI.memoperands_empty()) + return; + SmallVector NewMMOs; + for (MachineMemOperand *MMO : NewMI.memoperands()) { + // TODO: Figure out whether isAtomic is really necessary (see D57601). + if (MMO->isVolatile() || MMO->isAtomic() || + (MMO->isInvariant() && MMO->isDereferenceable()) || + (!MMO->getValue())) { + NewMMOs.push_back(MMO); + continue; + } + unsigned Delta; + if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { + int64_t AdjOffset = Delta * Num; + NewMMOs.push_back( + MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); + } else { + NewMMOs.push_back( + MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); + } + } + NewMI.setMemRefs(MF, NewMMOs); +} + +/// Clone the instruction for the new pipelined loop and update the +/// memory operands, if needed. +MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI, + unsigned CurStageNum, + unsigned InstStageNum) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + // Check for tied operands in inline asm instructions. This should be handled + // elsewhere, but I'm not sure of the best solution. + if (OldMI->isInlineAsm()) + for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { + const auto &MO = OldMI->getOperand(i); + if (MO.isReg() && MO.isUse()) + break; + unsigned UseIdx; + if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) + NewMI->tieOperands(i, UseIdx); + } + updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); + return NewMI; +} + +/// Clone the instruction for the new pipelined loop. If needed, this +/// function updates the instruction using the values saved in the +/// InstrChanges structure. +MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr( + MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + auto It = InstrChanges.find(OldMI); + if (It != InstrChanges.end()) { + std::pair RegAndOffset = It->second; + unsigned BasePos, OffsetPos; + if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) + return nullptr; + int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); + MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); + if (Schedule.getStage(LoopDef) > (signed)InstStageNum) + NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); + NewMI->getOperand(OffsetPos).setImm(NewOffset); + } + updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); + return NewMI; +} + +/// Update the machine instruction with new virtual registers. This +/// function may change the defintions and/or uses. +void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, + bool LastDef, + unsigned CurStageNum, + unsigned InstrStageNum, + ValueMapTy *VRMap) { + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + continue; + Register reg = MO.getReg(); + if (MO.isDef()) { + // Create a new virtual register for the definition. + const TargetRegisterClass *RC = MRI.getRegClass(reg); + Register NewReg = MRI.createVirtualRegister(RC); + MO.setReg(NewReg); + VRMap[CurStageNum][reg] = NewReg; + if (LastDef) + replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); + } else if (MO.isUse()) { + MachineInstr *Def = MRI.getVRegDef(reg); + // Compute the stage that contains the last definition for instruction. + int DefStageNum = Schedule.getStage(Def); + unsigned StageNum = CurStageNum; + if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { + // Compute the difference in stages between the defintion and the use. + unsigned StageDiff = (InstrStageNum - DefStageNum); + // Make an adjustment to get the last definition. + StageNum -= StageDiff; + } + if (VRMap[StageNum].count(reg)) + MO.setReg(VRMap[StageNum][reg]); + } + } +} + +/// Return the instruction in the loop that defines the register. +/// If the definition is a Phi, then follow the Phi operand to +/// the instruction in the loop. +MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) { + SmallPtrSet Visited; + MachineInstr *Def = MRI.getVRegDef(Reg); + while (Def->isPHI()) { + if (!Visited.insert(Def).second) + break; + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) + if (Def->getOperand(i + 1).getMBB() == BB) { + Def = MRI.getVRegDef(Def->getOperand(i).getReg()); + break; + } + } + return Def; +} + +/// Return the new name for the value from the previous stage. +unsigned ModuloScheduleExpander::getPrevMapVal( + unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage, + ValueMapTy *VRMap, MachineBasicBlock *BB) { + unsigned PrevVal = 0; + if (StageNum > PhiStage) { + MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); + if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) + // The name is defined in the previous stage. + PrevVal = VRMap[StageNum - 1][LoopVal]; + else if (VRMap[StageNum].count(LoopVal)) + // The previous name is defined in the current stage when the instruction + // order is swapped. + PrevVal = VRMap[StageNum][LoopVal]; + else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) + // The loop value hasn't yet been scheduled. + PrevVal = LoopVal; + else if (StageNum == PhiStage + 1) + // The loop value is another phi, which has not been scheduled. + PrevVal = getInitPhiReg(*LoopInst, BB); + else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) + // The loop value is another phi, which has been scheduled. + PrevVal = + getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), + LoopStage, VRMap, BB); + } + return PrevVal; +} + +/// Rewrite the Phi values in the specified block to use the mappings +/// from the initial operand. Once the Phi is scheduled, we switch +/// to using the loop value instead of the Phi value, so those names +/// do not need to be rewritten. +void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB, + unsigned StageNum, + ValueMapTy *VRMap, + InstrMapTy &InstrMap) { + for (auto &PHI : BB->phis()) { + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(PHI, BB, InitVal, LoopVal); + Register PhiDef = PHI.getOperand(0).getReg(); + + unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef)); + unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal)); + unsigned NumPhis = getStagesForPhi(PhiDef); + if (NumPhis > StageNum) + NumPhis = StageNum; + for (unsigned np = 0; np <= NumPhis; ++np) { + unsigned NewVal = + getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); + if (!NewVal) + NewVal = InitVal; + rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef, + NewVal); + } + } +} + +/// Rewrite a previously scheduled instruction to use the register value +/// from the new instruction. Make sure the instruction occurs in the +/// basic block, and we don't change the uses in the new instruction. +void ModuloScheduleExpander::rewriteScheduledInstr( + MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum, + unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg, + unsigned PrevReg) { + bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1); + int StagePhi = Schedule.getStage(Phi) + PhiNum; + // Rewrite uses that have been scheduled already to use the new + // Phi register. + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), + EI = MRI.use_end(); + UI != EI;) { + MachineOperand &UseOp = *UI; + MachineInstr *UseMI = UseOp.getParent(); + ++UI; + if (UseMI->getParent() != BB) + continue; + if (UseMI->isPHI()) { + if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) + continue; + if (getLoopPhiReg(*UseMI, BB) != OldReg) + continue; + } + InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); + assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); + MachineInstr *OrigMI = OrigInstr->second; + int StageSched = Schedule.getStage(OrigMI); + int CycleSched = Schedule.getCycle(OrigMI); + unsigned ReplaceReg = 0; + // This is the stage for the scheduled instruction. + if (StagePhi == StageSched && Phi->isPHI()) { + int CyclePhi = Schedule.getCycle(Phi); + if (PrevReg && InProlog) + ReplaceReg = PrevReg; + else if (PrevReg && !isLoopCarried(*Phi) && + (CyclePhi <= CycleSched || OrigMI->isPHI())) + ReplaceReg = PrevReg; + else + ReplaceReg = NewReg; + } + // The scheduled instruction occurs before the scheduled Phi, and the + // Phi is not loop carried. + if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi)) + ReplaceReg = NewReg; + if (StagePhi > StageSched && Phi->isPHI()) + ReplaceReg = NewReg; + if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) + ReplaceReg = NewReg; + if (ReplaceReg) { + MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); + UseOp.setReg(ReplaceReg); + } + } +} + +bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) { + if (!Phi.isPHI()) + return false; + unsigned DefCycle = Schedule.getCycle(&Phi); + int DefStage = Schedule.getStage(&Phi); + + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal); + MachineInstr *Use = MRI.getVRegDef(LoopVal); + if (!Use || Use->isPHI()) + return true; + unsigned LoopCycle = Schedule.getCycle(Use); + int LoopStage = Schedule.getStage(Use); + return (LoopCycle > DefCycle) || (LoopStage <= DefStage); +} + +//===----------------------------------------------------------------------===// +// PeelingModuloScheduleExpander implementation +//===----------------------------------------------------------------------===// +// This is a reimplementation of ModuloScheduleExpander that works by creating +// a fully correct steady-state kernel and peeling off the prolog and epilogs. +//===----------------------------------------------------------------------===// + +namespace { +// Remove any dead phis in MBB. Dead phis either have only one block as input +// (in which case they are the identity) or have no uses. +void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, + LiveIntervals *LIS) { + bool Changed = true; + while (Changed) { + Changed = false; + for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) { + MachineInstr &MI = *I++; + assert(MI.isPHI()); + if (MRI.use_empty(MI.getOperand(0).getReg())) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MI); + MI.eraseFromParent(); + Changed = true; + } else if (MI.getNumExplicitOperands() == 3) { + MRI.constrainRegClass(MI.getOperand(1).getReg(), + MRI.getRegClass(MI.getOperand(0).getReg())); + MRI.replaceRegWith(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg()); + if (LIS) + LIS->RemoveMachineInstrFromMaps(MI); + MI.eraseFromParent(); + Changed = true; + } + } + } +} + +/// Rewrites the kernel block in-place to adhere to the given schedule. +/// KernelRewriter holds all of the state required to perform the rewriting. +class KernelRewriter { + ModuloSchedule &S; + MachineBasicBlock *BB; + MachineBasicBlock *PreheaderBB, *ExitBB; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals *LIS; + + // Map from register class to canonical undef register for that class. + DenseMap Undefs; + // Map from to phi register for all created phis. Note that + // this map is only used when InitReg is non-undef. + DenseMap, Register> Phis; + // Map from LoopReg to phi register where the InitReg is undef. + DenseMap UndefPhis; + + // Reg is used by MI. Return the new register MI should use to adhere to the + // schedule. Insert phis as necessary. + Register remapUse(Register Reg, MachineInstr &MI); + // Insert a phi that carries LoopReg from the loop body and InitReg otherwise. + // If InitReg is not given it is chosen arbitrarily. It will either be undef + // or will be chosen so as to share another phi. + Register phi(Register LoopReg, Optional InitReg = {}, + const TargetRegisterClass *RC = nullptr); + // Create an undef register of the given register class. + Register undef(const TargetRegisterClass *RC); + +public: + KernelRewriter(MachineLoop &L, ModuloSchedule &S, + LiveIntervals *LIS = nullptr); + void rewrite(); +}; +} // namespace + +KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S, + LiveIntervals *LIS) + : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()), + ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()), + TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) { + PreheaderBB = *BB->pred_begin(); + if (PreheaderBB == BB) + PreheaderBB = *std::next(BB->pred_begin()); +} + +void KernelRewriter::rewrite() { + // Rearrange the loop to be in schedule order. Note that the schedule may + // contain instructions that are not owned by the loop block (InstrChanges and + // friends), so we gracefully handle unowned instructions and delete any + // instructions that weren't in the schedule. + auto InsertPt = BB->getFirstTerminator(); + MachineInstr *FirstMI = nullptr; + for (MachineInstr *MI : S.getInstructions()) { + if (MI->isPHI()) + continue; + if (MI->getParent()) + MI->removeFromParent(); + BB->insert(InsertPt, MI); + if (!FirstMI) + FirstMI = MI; + } + assert(FirstMI && "Failed to find first MI in schedule"); + + // At this point all of the scheduled instructions are between FirstMI + // and the end of the block. Kill from the first non-phi to FirstMI. + for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(*I); + (I++)->eraseFromParent(); + } + + // Now remap every instruction in the loop. + for (MachineInstr &MI : *BB) { + if (MI.isPHI() || MI.isTerminator()) + continue; + for (MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit()) + continue; + Register Reg = remapUse(MO.getReg(), MI); + MO.setReg(Reg); + } + } + EliminateDeadPhis(BB, MRI, LIS); + + // Ensure a phi exists for all instructions that are either referenced by + // an illegal phi or by an instruction outside the loop. This allows us to + // treat remaps of these values the same as "normal" values that come from + // loop-carried phis. + for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) { + if (MI->isPHI()) { + Register R = MI->getOperand(0).getReg(); + phi(R); + continue; + } + + for (MachineOperand &Def : MI->defs()) { + for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) { + if (MI.getParent() != BB) { + phi(Def.getReg()); + break; + } + } + } + } +} + +Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { + MachineInstr *Producer = MRI.getUniqueVRegDef(Reg); + if (!Producer) + return Reg; + + int ConsumerStage = S.getStage(&MI); + if (!Producer->isPHI()) { + // Non-phi producers are simple to remap. Insert as many phis as the + // difference between the consumer and producer stages. + if (Producer->getParent() != BB) + // Producer was not inside the loop. Use the register as-is. + return Reg; + int ProducerStage = S.getStage(Producer); + assert(ConsumerStage != -1 && + "In-loop consumer should always be scheduled!"); + assert(ConsumerStage >= ProducerStage); + unsigned StageDiff = ConsumerStage - ProducerStage; + + for (unsigned I = 0; I < StageDiff; ++I) + Reg = phi(Reg); + return Reg; + } + + // First, dive through the phi chain to find the defaults for the generated + // phis. + SmallVector, 4> Defaults; + Register LoopReg = Reg; + auto LoopProducer = Producer; + while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) { + LoopReg = getLoopPhiReg(*LoopProducer, BB); + Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB)); + LoopProducer = MRI.getUniqueVRegDef(LoopReg); + assert(LoopProducer); + } + int LoopProducerStage = S.getStage(LoopProducer); + + Optional IllegalPhiDefault; + + if (LoopProducerStage == -1) { + // Do nothing. + } else if (LoopProducerStage > ConsumerStage) { + // This schedule is only representable if ProducerStage == ConsumerStage+1. + // In addition, Consumer's cycle must be scheduled after Producer in the + // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP + // functions. +#ifndef NDEBUG // Silence unused variables in non-asserts mode. + int LoopProducerCycle = S.getCycle(LoopProducer); + int ConsumerCycle = S.getCycle(&MI); +#endif + assert(LoopProducerCycle <= ConsumerCycle); + assert(LoopProducerStage == ConsumerStage + 1); + // Peel off the first phi from Defaults and insert a phi between producer + // and consumer. This phi will not be at the front of the block so we + // consider it illegal. It will only exist during the rewrite process; it + // needs to exist while we peel off prologs because these could take the + // default value. After that we can replace all uses with the loop producer + // value. + IllegalPhiDefault = Defaults.front(); + Defaults.erase(Defaults.begin()); + } else { + assert(ConsumerStage >= LoopProducerStage); + int StageDiff = ConsumerStage - LoopProducerStage; + if (StageDiff > 0) { + LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size() + << " to " << (Defaults.size() + StageDiff) << "\n"); + // If we need more phis than we have defaults for, pad out with undefs for + // the earliest phis, which are at the end of the defaults chain (the + // chain is in reverse order). + Defaults.resize(Defaults.size() + StageDiff, Defaults.empty() + ? Optional() + : Defaults.back()); + } + } + + // Now we know the number of stages to jump back, insert the phi chain. + auto DefaultI = Defaults.rbegin(); + while (DefaultI != Defaults.rend()) + LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg)); + + if (IllegalPhiDefault.hasValue()) { + // The consumer optionally consumes LoopProducer in the same iteration + // (because the producer is scheduled at an earlier cycle than the consumer) + // or the initial value. To facilitate this we create an illegal block here + // by embedding a phi in the middle of the block. We will fix this up + // immediately prior to pruning. + auto RC = MRI.getRegClass(Reg); + Register R = MRI.createVirtualRegister(RC); + BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(IllegalPhiDefault.getValue()) + .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect. + .addReg(LoopReg) + .addMBB(BB); // Block choice is arbitrary and has no effect. + return R; + } + + return LoopReg; +} + +Register KernelRewriter::phi(Register LoopReg, Optional InitReg, + const TargetRegisterClass *RC) { + // If the init register is not undef, try and find an existing phi. + if (InitReg.hasValue()) { + auto I = Phis.find({LoopReg, InitReg.getValue()}); + if (I != Phis.end()) + return I->second; + } else { + for (auto &KV : Phis) { + if (KV.first.first == LoopReg) + return KV.second; + } + } + + // InitReg is either undef or no existing phi takes InitReg as input. Try and + // find a phi that takes undef as input. + auto I = UndefPhis.find(LoopReg); + if (I != UndefPhis.end()) { + Register R = I->second; + if (!InitReg.hasValue()) + // Found a phi taking undef as input, and this input is undef so return + // without any more changes. + return R; + // Found a phi taking undef as input, so rewrite it to take InitReg. + MachineInstr *MI = MRI.getVRegDef(R); + MI->getOperand(1).setReg(InitReg.getValue()); + Phis.insert({{LoopReg, InitReg.getValue()}, R}); + MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + UndefPhis.erase(I); + return R; + } + + // Failed to find any existing phi to reuse, so create a new one. + if (!RC) + RC = MRI.getRegClass(LoopReg); + Register R = MRI.createVirtualRegister(RC); + if (InitReg.hasValue()) + MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); + BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(InitReg.hasValue() ? *InitReg : undef(RC)) + .addMBB(PreheaderBB) + .addReg(LoopReg) + .addMBB(BB); + if (!InitReg.hasValue()) + UndefPhis[LoopReg] = R; + else + Phis[{LoopReg, *InitReg}] = R; + return R; +} + +Register KernelRewriter::undef(const TargetRegisterClass *RC) { + Register &R = Undefs[RC]; + if (R == 0) { + // Create an IMPLICIT_DEF that defines this register if we need it. + // All uses of this should be removed by the time we have finished unrolling + // prologs and epilogs. + R = MRI.createVirtualRegister(RC); + auto *InsertBB = &PreheaderBB->getParent()->front(); + BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), R); + } + return R; +} + +namespace { +/// Describes an operand in the kernel of a pipelined loop. Characteristics of +/// the operand are discovered, such as how many in-loop PHIs it has to jump +/// through and defaults for these phis. +class KernelOperandInfo { + MachineBasicBlock *BB; + MachineRegisterInfo &MRI; + SmallVector PhiDefaults; + MachineOperand *Source; + MachineOperand *Target; + +public: + KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI, + const SmallPtrSetImpl &IllegalPhis) + : MRI(MRI) { + Source = MO; + BB = MO->getParent()->getParent(); + while (isRegInLoop(MO)) { + MachineInstr *MI = MRI.getVRegDef(MO->getReg()); + if (MI->isFullCopy()) { + MO = &MI->getOperand(1); + continue; + } + if (!MI->isPHI()) + break; + // If this is an illegal phi, don't count it in distance. + if (IllegalPhis.count(MI)) { + MO = &MI->getOperand(3); + continue; + } + + Register Default = getInitPhiReg(*MI, BB); + MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1) + : &MI->getOperand(3); + PhiDefaults.push_back(Default); + } + Target = MO; + } + + bool operator==(const KernelOperandInfo &Other) const { + return PhiDefaults.size() == Other.PhiDefaults.size(); + } + + void print(raw_ostream &OS) const { + OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in " + << *Source->getParent(); + } + +private: + bool isRegInLoop(MachineOperand *MO) { + return MO->isReg() && MO->getReg().isVirtual() && + MRI.getVRegDef(MO->getReg())->getParent() == BB; + } +}; +} // namespace + +MachineBasicBlock * +PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) { + MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII); + if (LPD == LPD_Front) + PeeledFront.push_back(NewBB); + else + PeeledBack.push_front(NewBB); + for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator(); + ++I, ++NI) { + CanonicalMIs[&*I] = &*I; + CanonicalMIs[&*NI] = &*I; + BlockMIs[{NewBB, &*I}] = &*NI; + BlockMIs[{BB, &*I}] = &*I; + } + return NewBB; +} + +void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { + BitVector LS(Schedule.getNumStages(), true); + BitVector AS(Schedule.getNumStages(), true); + LiveStages[BB] = LS; + AvailableStages[BB] = AS; + + // Peel out the prologs. + LS.reset(); + for (int I = 0; I < Schedule.getNumStages() - 1; ++I) { + LS[I] = 1; + Prologs.push_back(peelKernel(LPD_Front)); + LiveStages[Prologs.back()] = LS; + AvailableStages[Prologs.back()] = LS; + } + + // Create a block that will end up as the new loop exiting block (dominated by + // all prologs and epilogs). It will only contain PHIs, in the same order as + // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property + // that the exiting block is a (sub) clone of BB. This in turn gives us the + // property that any value deffed in BB but used outside of BB is used by a + // PHI in the exiting block. + MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock(); + + // Push out the epilogs, again in reverse order. + // We can't assume anything about the minumum loop trip count at this point, + // so emit a fairly complex epilog: + // K[0, 1, 2] // Kernel runs stages 0, 1, 2 + // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0]. + // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2. + // + // This creates a single-successor single-predecessor sequence of blocks for + // each epilog, which are kept this way for simplicity at this stage and + // cleaned up by the optimizer later. + for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) { + Epilogs.push_back(nullptr); + for (int J = Schedule.getNumStages() - 1; J >= I; --J) { + LS.reset(); + LS[J] = 1; + Epilogs.back() = peelKernel(LPD_Back); + LiveStages[Epilogs.back()] = LS; + AvailableStages[Epilogs.back()] = AS; + } + } + + // Now we've defined all the prolog and epilog blocks as a fallthrough + // sequence, add the edges that will be followed if the loop trip count is + // lower than the number of stages (connecting prologs directly with epilogs). + auto PI = Prologs.begin(); + auto EI = Epilogs.begin(); + assert(Prologs.size() == Epilogs.size()); + for (; PI != Prologs.end(); ++PI, ++EI) { + MachineBasicBlock *Pred = *(*EI)->pred_begin(); + (*PI)->addSuccessor(*EI); + for (MachineInstr &MI : (*EI)->phis()) { + Register Reg = MI.getOperand(1).getReg(); + MachineInstr *Use = MRI.getUniqueVRegDef(Reg); + if (Use && Use->getParent() == Pred) + Reg = getEquivalentRegisterIn(Reg, *PI); + MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false)); + MI.addOperand(MachineOperand::CreateMBB(*PI)); + } + } + + // Create a list of all blocks in order. + SmallVector Blocks; + llvm::copy(PeeledFront, std::back_inserter(Blocks)); + Blocks.push_back(BB); + llvm::copy(PeeledBack, std::back_inserter(Blocks)); + + // Iterate in reverse order over all instructions, remapping as we go. + for (MachineBasicBlock *B : reverse(Blocks)) { + for (auto I = B->getFirstInstrTerminator()->getReverseIterator(); + I != std::next(B->getFirstNonPHI()->getReverseIterator());) { + MachineInstr *MI = &*I++; + rewriteUsesOf(MI); + } + } + // Now all remapping has been done, we're free to optimize the generated code. + for (MachineBasicBlock *B : reverse(Blocks)) + EliminateDeadPhis(B, MRI, LIS); + EliminateDeadPhis(ExitingBB, MRI, LIS); +} + +MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() { + MachineFunction &MF = *BB->getParent(); + MachineBasicBlock *Exit = *BB->succ_begin(); + if (Exit == BB) + Exit = *std::next(BB->succ_begin()); + + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + MF.insert(std::next(BB->getIterator()), NewBB); + + // Clone all phis in BB into NewBB and rewrite. + for (MachineInstr &MI : BB->phis()) { + auto RC = MRI.getRegClass(MI.getOperand(0).getReg()); + Register OldR = MI.getOperand(3).getReg(); + Register R = MRI.createVirtualRegister(RC); + SmallVector Uses; + for (MachineInstr &Use : MRI.use_instructions(OldR)) + if (Use.getParent() != BB) + Uses.push_back(&Use); + for (MachineInstr *Use : Uses) + Use->substituteRegister(OldR, R, /*SubIdx=*/0, + *MRI.getTargetRegisterInfo()); + MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(OldR) + .addMBB(BB); + BlockMIs[{NewBB, &MI}] = NI; + CanonicalMIs[NI] = &MI; + } + BB->replaceSuccessor(Exit, NewBB); + Exit->replacePhiUsesWith(BB, NewBB); + NewBB->addSuccessor(Exit); + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond); + (void)CanAnalyzeBr; + assert(CanAnalyzeBr && "Must be able to analyze the loop branch!"); + TII->removeBranch(*BB); + TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB, + Cond, DebugLoc()); + TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc()); + return NewBB; +} + +Register +PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg, + MachineBasicBlock *BB) { + MachineInstr *MI = MRI.getUniqueVRegDef(Reg); + unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg); + return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg(); +} + +void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) { + if (MI->isPHI()) { + // This is an illegal PHI. The loop-carried (desired) value is operand 3, + // and it is produced by this block. + Register PhiR = MI->getOperand(0).getReg(); + Register R = MI->getOperand(3).getReg(); + int RMIStage = getStage(MRI.getUniqueVRegDef(R)); + if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage)) + R = MI->getOperand(1).getReg(); + MRI.setRegClass(R, MRI.getRegClass(PhiR)); + MRI.replaceRegWith(PhiR, R); + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + return; + } + + int Stage = getStage(MI); + if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 || + LiveStages[MI->getParent()].test(Stage)) + // Instruction is live, no rewriting to do. + return; + + for (MachineOperand &DefMO : MI->defs()) { + SmallVector, 4> Subs; + for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) { + // Only PHIs can use values from this block by construction. + // Match with the equivalent PHI in B. + assert(UseMI.isPHI()); + Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(), + MI->getParent()); + Subs.emplace_back(&UseMI, Reg); + } + for (auto &Sub : Subs) + Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0, + *MRI.getTargetRegisterInfo()); + } + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); +} + +void PeelingModuloScheduleExpander::fixupBranches() { + std::unique_ptr Info = + TII->analyzeLoopForPipelining(BB); + assert(Info); + + // Work outwards from the kernel. + bool KernelDisposed = false; + int TC = Schedule.getNumStages() - 1; + for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend(); + ++PI, ++EI, --TC) { + MachineBasicBlock *Prolog = *PI; + MachineBasicBlock *Fallthrough = *Prolog->succ_begin(); + MachineBasicBlock *Epilog = *EI; + SmallVector Cond; + TII->removeBranch(*Prolog); + Optional StaticallyGreater = + Info->createTripCountGreaterCondition(TC, *Prolog, Cond); + if (!StaticallyGreater.hasValue()) { + LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n"); + // Dynamically branch based on Cond. + TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc()); + } else if (*StaticallyGreater == false) { + LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n"); + // Prolog never falls through; branch to epilog and orphan interior + // blocks. Leave it to unreachable-block-elim to clean up. + Prolog->removeSuccessor(Fallthrough); + for (MachineInstr &P : Fallthrough->phis()) { + P.RemoveOperand(2); + P.RemoveOperand(1); + } + TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc()); + KernelDisposed = true; + } else { + LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n"); + // Prolog always falls through; remove incoming values in epilog. + Prolog->removeSuccessor(Epilog); + for (MachineInstr &P : Epilog->phis()) { + P.RemoveOperand(4); + P.RemoveOperand(3); + } + } + } + + if (!KernelDisposed) { + Info->adjustTripCount(-(Schedule.getNumStages() - 1)); + Info->setPreheader(Prologs.back()); + } else { + Info->disposed(); + } +} + +void PeelingModuloScheduleExpander::rewriteKernel() { + KernelRewriter KR(*Schedule.getLoop(), Schedule); + KR.rewrite(); +} + +void PeelingModuloScheduleExpander::expand() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = Schedule.getLoop()->getLoopPreheader(); + LLVM_DEBUG(Schedule.dump()); + + rewriteKernel(); + peelPrologAndEpilogs(); + fixupBranches(); +} + +void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = Schedule.getLoop()->getLoopPreheader(); + + // Dump the schedule before we invalidate and remap all its instructions. + // Stash it in a string so we can print it if we found an error. + std::string ScheduleDump; + raw_string_ostream OS(ScheduleDump); + Schedule.print(OS); + OS.flush(); + + // First, run the normal ModuleScheduleExpander. We don't support any + // InstrChanges. + assert(LIS && "Requires LiveIntervals!"); + ModuloScheduleExpander MSE(MF, Schedule, *LIS, + ModuloScheduleExpander::InstrChangesTy()); + MSE.expand(); + MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel(); + if (!ExpandedKernel) { + // The expander optimized away the kernel. We can't do any useful checking. + MSE.cleanup(); + return; + } + // Before running the KernelRewriter, re-add BB into the CFG. + Preheader->addSuccessor(BB); + + // Now run the new expansion algorithm. + KernelRewriter KR(*Schedule.getLoop(), Schedule); + KR.rewrite(); + peelPrologAndEpilogs(); + + // Collect all illegal phis that the new algorithm created. We'll give these + // to KernelOperandInfo. + SmallPtrSet IllegalPhis; + for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) { + if (NI->isPHI()) + IllegalPhis.insert(&*NI); + } + + // Co-iterate across both kernels. We expect them to be identical apart from + // phis and full COPYs (we look through both). + SmallVector, 8> KOIs; + auto OI = ExpandedKernel->begin(); + auto NI = BB->begin(); + for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) { + while (OI->isPHI() || OI->isFullCopy()) + ++OI; + while (NI->isPHI() || NI->isFullCopy()) + ++NI; + assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!"); + // Analyze every operand separately. + for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin(); + OOpI != OI->operands_end(); ++OOpI, ++NOpI) + KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis), + KernelOperandInfo(&*NOpI, MRI, IllegalPhis)); + } + + bool Failed = false; + for (auto &OldAndNew : KOIs) { + if (OldAndNew.first == OldAndNew.second) + continue; + Failed = true; + errs() << "Modulo kernel validation error: [\n"; + errs() << " [golden] "; + OldAndNew.first.print(errs()); + errs() << " "; + OldAndNew.second.print(errs()); + errs() << "]\n"; + } + + if (Failed) { + errs() << "Golden reference kernel:\n"; + ExpandedKernel->print(errs()); + errs() << "New kernel:\n"; + BB->print(errs()); + errs() << ScheduleDump; + report_fatal_error( + "Modulo kernel validation (-pipeliner-experimental-cg) failed"); + } + + // Cleanup by removing BB from the CFG again as the original + // ModuloScheduleExpander intended. + Preheader->removeSuccessor(BB); + MSE.cleanup(); +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleTestPass implementation +//===----------------------------------------------------------------------===// +// This pass constructs a ModuloSchedule from its module and runs +// ModuloScheduleExpander. +// +// The module is expected to contain a single-block analyzable loop. +// The total order of instructions is taken from the loop as-is. +// Instructions are expected to be annotated with a PostInstrSymbol. +// This PostInstrSymbol must have the following format: +// "Stage=%d Cycle=%d". +//===----------------------------------------------------------------------===// + +namespace { +class ModuloScheduleTest : public MachineFunctionPass { +public: + static char ID; + + ModuloScheduleTest() : MachineFunctionPass(ID) { + initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void runOnLoop(MachineFunction &MF, MachineLoop &L); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // namespace + +char ModuloScheduleTest::ID = 0; + +INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", + "Modulo Schedule test pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test", + "Modulo Schedule test pass", false, false) + +bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) { + MachineLoopInfo &MLI = getAnalysis(); + for (auto *L : MLI) { + if (L->getTopBlock() != L->getBottomBlock()) + continue; + runOnLoop(MF, *L); + return false; + } + return false; +} + +static void parseSymbolString(StringRef S, int &Cycle, int &Stage) { + std::pair StageAndCycle = getToken(S, "_"); + std::pair StageTokenAndValue = + getToken(StageAndCycle.first, "-"); + std::pair CycleTokenAndValue = + getToken(StageAndCycle.second, "-"); + if (StageTokenAndValue.first != "Stage" || + CycleTokenAndValue.first != "_Cycle") { + llvm_unreachable( + "Bad post-instr symbol syntax: see comment in ModuloScheduleTest"); + return; + } + + StageTokenAndValue.second.drop_front().getAsInteger(10, Stage); + CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle); + + dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n"; +} + +void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) { + LiveIntervals &LIS = getAnalysis(); + MachineBasicBlock *BB = L.getTopBlock(); + dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n"; + + DenseMap Cycle, Stage; + std::vector Instrs; + for (MachineInstr &MI : *BB) { + if (MI.isTerminator()) + continue; + Instrs.push_back(&MI); + if (MCSymbol *Sym = MI.getPostInstrSymbol()) { + dbgs() << "Parsing post-instr symbol for " << MI; + parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]); + } + } + + ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle), + std::move(Stage)); + ModuloScheduleExpander MSE( + MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy()); + MSE.expand(); + MSE.cleanup(); +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleTestAnnotater implementation +//===----------------------------------------------------------------------===// + +void ModuloScheduleTestAnnotater::annotate() { + for (MachineInstr *MI : S.getInstructions()) { + SmallVector SV; + raw_svector_ostream OS(SV); + OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI); + MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str()); + MI->setPostInstrSymbol(MF, Sym); + } +} diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index c70b62252139..1a493964e678 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -97,7 +97,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, InstrSet &PHIsInCycle) { assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); // See if we already saw this register. if (!PHIsInCycle.insert(MI).second) @@ -109,16 +109,15 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, // Scan the PHI operands. for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { - unsigned SrcReg = MI->getOperand(i).getReg(); + Register SrcReg = MI->getOperand(i).getReg(); if (SrcReg == DstReg) continue; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); // Skip over register-to-register moves. - if (SrcMI && SrcMI->isCopy() && - !SrcMI->getOperand(0).getSubReg() && + if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() && !SrcMI->getOperand(1).getSubReg() && - TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) { + Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) { SrcReg = SrcMI->getOperand(1).getReg(); SrcMI = MRI->getVRegDef(SrcReg); } @@ -142,8 +141,8 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, /// other PHIs in a cycle. bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); - unsigned DstReg = MI->getOperand(0).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + Register DstReg = MI->getOperand(0).getReg(); + assert(Register::isVirtualRegister(DstReg) && "PHI destination is not a virtual register"); // See if we already saw this register. @@ -177,7 +176,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { InstrSet PHIsInCycle; if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && SingleValReg != 0) { - unsigned OldReg = MI->getOperand(0).getReg(); + Register OldReg = MI->getOperand(0).getReg(); if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) continue; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 948a5835438c..4dd4c4b1084e 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -31,7 +31,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" @@ -168,7 +170,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { // Remove dead IMPLICIT_DEF instructions. for (MachineInstr *DefMI : ImpDefs) { - unsigned DefReg = DefMI->getOperand(0).getReg(); + Register DefReg = DefMI->getOperand(0).getReg(); if (MRI->use_nodbg_empty(DefReg)) { if (LIS) LIS->RemoveMachineInstrFromMaps(*DefMI); @@ -183,6 +185,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MF.DeleteMachineInstr(I.first); } + // TODO: we should use the incremental DomTree updater here. + if (Changed) + if (auto *MDT = getAnalysisIfAvailable()) + MDT->getBase().recalculate(MF); + LoweredPHIs.clear(); ImpDefs.clear(); VRegPHIUseCount.clear(); @@ -240,7 +247,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineInstr *MPhi = MBB.remove(&*MBB.begin()); unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; - unsigned DestReg = MPhi->getOperand(0).getReg(); + Register DestReg = MPhi->getOperand(0).getReg(); assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); bool isDead = MPhi->getOperand(0).isDead(); @@ -252,11 +259,12 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. + MachineInstr *PHICopy = nullptr; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (allPhiOperandsUndefined(*MPhi, *MRI)) // If all sources of a PHI node are implicit_def or undef uses, just emit an // implicit_def instead of a copy. - BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); else { // Can we reuse an earlier PHI node? This only happens for critical edges, @@ -273,15 +281,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } - BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), DestReg) - .addReg(IncomingReg); + // Give the target possiblity to handle special cases fallthrough otherwise + PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + IncomingReg, DestReg); } // Update live variable information if there is any. if (LV) { - MachineInstr &PHICopy = *std::prev(AfterPHIsIt); - if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); @@ -302,7 +308,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // killed. Note that because the value is defined in several places (once // each for each incoming block), the "def" block and instruction fields // for the VarInfo is not filled in. - LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + LV->addVirtualRegisterKilled(IncomingReg, *PHICopy); } // Since we are going to be deleting the PHI node, if it is the last use of @@ -312,15 +318,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // If the result is dead, update LV. if (isDead) { - LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->addVirtualRegisterDead(DestReg, *PHICopy); LV->removeVirtualRegisterDead(DestReg, *MPhi); } } // Update LiveIntervals for the new copy or implicit def. if (LIS) { - SlotIndex DestCopyIndex = - LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt)); + SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy); SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); if (IncomingReg) { @@ -368,11 +373,11 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // IncomingReg register in the corresponding predecessor basic block. SmallPtrSet MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { - unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || isImplicitlyDefined(SrcReg, *MRI); - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + assert(Register::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); // Get the MachineBasicBlock equivalent of the BasicBlock that is the source @@ -406,9 +411,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { - NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg) - .addReg(SrcReg, 0, SrcSubReg); + NewSrcInstr = + TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(), + SrcReg, SrcSubReg, IncomingReg); } } @@ -457,7 +462,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } } else { // We just inserted this copy. - KillInst = std::prev(InsertPos); + KillInst = NewSrcInstr; } } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); @@ -567,7 +572,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { - unsigned Reg = BBI->getOperand(i).getReg(); + Register Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); // Is there a critical edge from PreMBB to MBB? if (PreMBB->succ_size() == 1) diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index a3fa1b0ad8ed..529fde84e39a 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -78,7 +78,7 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { MIB.add(MO); FirstActualI->eraseFromParent(); - MF.ensureAlignment(4); + MF.ensureAlignment(Align(16)); return true; } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index b918396aa8c5..54f1d38ed106 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -418,7 +418,7 @@ namespace { const MachineRegisterInfo &MRI, const TargetInstrInfo *TII = nullptr) : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) { - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!Register::isPhysicalRegister(Reg)) { Def = MRI.getVRegDef(Reg); DefIdx = MRI.def_begin(Reg).getOperandNo(); } @@ -460,8 +460,8 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx)) return false; - if (TargetRegisterInfo::isPhysicalRegister(DstReg) || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (Register::isPhysicalRegister(DstReg) || + Register::isPhysicalRegister(SrcReg)) return false; if (MRI->hasOneNonDBGUse(SrcReg)) @@ -581,7 +581,7 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, MRI->constrainRegClass(DstReg, DstRC); } - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); @@ -609,8 +609,8 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || - TargetRegisterInfo::isPhysicalRegister(SrcReg) || - (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) + Register::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && Register::isPhysicalRegister(SrcReg2))) return false; // Attempt to optimize the comparison instruction. @@ -663,7 +663,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // Thus, instead of maintaining untested code, we will revisit that if // that changes at some point. unsigned Reg = RegSubReg.Reg; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); @@ -675,7 +675,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, do { CurSrcPair = SrcToLook.pop_back_val(); // As explained above, do not handle physical registers - if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + if (Register::isPhysicalRegister(CurSrcPair.Reg)) return false; ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); @@ -723,7 +723,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // constraints to the register allocator. Moreover, if we want to extend // the live-range of a physical register, unlike SSA virtual register, // we will have to check that they aren't redefine before the related use. - if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + if (Register::isPhysicalRegister(CurSrcPair.Reg)) return false; // Keep following the chain if the value isn't any better yet. @@ -761,7 +761,7 @@ insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, // NewRC is only correct if no subregisters are involved. findNextSource() // should have rejected those cases already. assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); - unsigned NewVR = MRI.createVirtualRegister(NewRC); + Register NewVR = MRI.createVirtualRegister(NewRC); MachineBasicBlock *MBB = OrigPHI.getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(), TII.get(TargetOpcode::PHI), NewVR); @@ -1170,7 +1170,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { "Coalescer can understand multiple defs?!"); const MachineOperand &MODef = MI.getOperand(0); // Do not rewrite physical definitions. - if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + if (Register::isPhysicalRegister(MODef.getReg())) return false; bool Changed = false; @@ -1221,7 +1221,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { MachineInstr & PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def, RewriteMapTy &RewriteMap) { - assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + assert(!Register::isPhysicalRegister(Def.Reg) && "We do not rewrite physical registers"); // Find the new source to use in the COPY rewrite. @@ -1229,7 +1229,7 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, // Insert the COPY. const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); - unsigned NewVReg = MRI->createVirtualRegister(DefRC); + Register NewVReg = MRI->createVirtualRegister(DefRC); MachineInstr *NewCopy = BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), @@ -1280,7 +1280,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( while (CpyRewriter.getNextRewritableSource(Src, Def)) { // If a physical register is here, this is probably for a good reason. // Do not rewrite that. - if (TargetRegisterInfo::isPhysicalRegister(Def.Reg)) + if (Register::isPhysicalRegister(Def.Reg)) return false; // If we do not know how to rewrite this definition, there is no point @@ -1315,12 +1315,11 @@ bool PeepholeOptimizer::isLoadFoldable( if (MCID.getNumDefs() != 1) return false; - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. - if (!MI.getOperand(0).getSubReg() && - TargetRegisterInfo::isVirtualRegister(Reg) && + if (!MI.getOperand(0).getSubReg() && Register::isVirtualRegister(Reg) && MRI->hasOneNonDBGUser(Reg)) { FoldAsLoadDefCandidates.insert(Reg); return true; @@ -1336,8 +1335,8 @@ bool PeepholeOptimizer::isMoveImmediate( return false; if (MCID.getNumDefs() != 1) return false; - unsigned Reg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(Reg)) { ImmDefMIs.insert(std::make_pair(Reg, &MI)); ImmDefRegs.insert(Reg); return true; @@ -1359,8 +1358,8 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr &MI, // Ignore dead implicit defs. if (MO.isImplicit() && MO.isDead()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (ImmDefRegs.count(Reg) == 0) continue; @@ -1393,12 +1392,12 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, DenseMap &CopyMIs) { assert(MI.isCopy() && "expected a COPY machine instruction"); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + Register DstReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(DstReg)) return false; if (CopySrcRegs.insert(SrcReg).second) { @@ -1416,7 +1415,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, if (SrcSubReg != PrevSrcSubReg) return false; - unsigned PrevDstReg = PrevCopy->getOperand(0).getReg(); + Register PrevDstReg = PrevCopy->getOperand(0).getReg(); // Only replace if the copy register class is the same. // @@ -1433,8 +1432,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, } bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { - return TargetRegisterInfo::isPhysicalRegister(Reg) && - !MRI->isAllocatable(Reg); + return Register::isPhysicalRegister(Reg) && !MRI->isAllocatable(Reg); } bool PeepholeOptimizer::foldRedundantNAPhysCopy( @@ -1444,9 +1442,9 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( if (DisableNAPhysCopyOpt) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) { // %vreg = COPY %physreg // Avoid using a datastructure which can track multiple live non-allocatable // phys->virt copies since LLVM doesn't seem to do this. @@ -1454,7 +1452,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( return false; } - if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + if (!(Register::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) return false; // %physreg = COPY %vreg @@ -1467,7 +1465,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( return false; } - unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + Register PrevDstReg = PrevCopy->second->getOperand(0).getReg(); if (PrevDstReg == SrcReg) { // Remove the virt->phys copy: we saw the virtual register definition, and // the non-allocatable physical register's state hasn't changed since then. @@ -1489,7 +1487,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( static bool isVirtualRegisterOperand(MachineOperand &MO) { if (!MO.isReg()) return false; - return TargetRegisterInfo::isVirtualRegister(MO.getReg()); + return Register::isVirtualRegister(MO.getReg()); } bool PeepholeOptimizer::findTargetRecurrence( @@ -1662,7 +1660,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { for (const MachineOperand &MO : MI->operands()) { // Visit all operands: definitions can be implicit or explicit. if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isDef() && isNAPhysCopy(Reg)) { const auto &Def = NAPhysToVirtMIs.find(Reg); if (Def != NAPhysToVirtMIs.end()) { @@ -1778,7 +1776,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); if (MI->isCall()) - MI->getMF()->updateCallSiteInfo(MI, FoldMI); + MI->getMF()->moveCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); MRI->markUsesInDebugValueAsUndef(FoldedReg); @@ -1810,7 +1808,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() { assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. - assert(Def->getNumOperands() == 2 && "Invalid number of operands"); + // There may be implicit uses preventing the copy to be moved across + // some target specific register definitions + assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 && + "Invalid number of operands"); + assert(!Def->hasImplicitDef() && "Only implicit uses are allowed"); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. @@ -1855,6 +1857,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { SrcIdx = OpIdx; } + // In some rare case, Def has no input, SrcIdx is out of bound, + // getOperand(SrcIdx) will fail below. + if (SrcIdx >= Def->getNumOperands()) + return ValueTrackerResult(); + // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY // will break the assumed guarantees for the upper bits. for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) { @@ -2087,7 +2094,7 @@ ValueTrackerResult ValueTracker::getNextSource() { // If we can still move up in the use-def chain, move to the next // definition. - if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { + if (!Register::isPhysicalRegister(Reg) && OneRegSrc) { MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg); if (DI != MRI.def_end()) { Def = DI->getParent(); diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp index 2752e186875c..0d2f6f99ca96 100644 --- a/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -76,7 +76,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn, } for (auto I = F.use_begin(), E = F.use_end(); I != E;) { - auto *CI = dyn_cast(I->getUser()); + auto *CI = cast(I->getUser()); assert(CI->getCalledFunction() && "Cannot lower an indirect call!"); ++I; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index b38987ad1c90..11bff45f9ad5 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -73,9 +73,9 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { LLVM_DEBUG(dbgs() << "Processing " << *MI); - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { // For virtual registers, mark all uses as , and convert users to // implicit-def when possible. for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { @@ -100,8 +100,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { for (MachineOperand &MO : UserMI->operands()) { if (!MO.isReg()) continue; - unsigned UserReg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || + Register UserReg = MO.getReg(); + if (!Register::isPhysicalRegister(UserReg) || !TRI->regsOverlap(Reg, UserReg)) continue; // UserMI uses or redefines Reg. Set flags on all uses. diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index d463bee67595..729f06dda62b 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -898,7 +898,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI.getUseLocalStackAllocationBlock()) { - unsigned Align = MFI.getLocalFrameMaxAlign(); + unsigned Align = MFI.getLocalFrameMaxAlign().value(); // Adjust to alignment boundary. Offset = alignTo(Offset, Align, Skew); diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index da3ef4b771f3..74e721dbd138 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -129,7 +129,7 @@ const PseudoSourceValue * PseudoSourceValueManager::getFixedStack(int FI) { std::unique_ptr &V = FSValues[FI]; if (!V) - V = llvm::make_unique(FI, TII); + V = std::make_unique(FI, TII); return V.get(); } @@ -138,7 +138,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { std::unique_ptr &E = GlobalCallEntries[GV]; if (!E) - E = llvm::make_unique(GV, TII); + E = std::make_unique(GV, TII); return E.get(); } @@ -147,6 +147,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { std::unique_ptr &E = ExternalCallEntries[ES]; if (!E) - E = llvm::make_unique(ES, TII); + E = std::make_unique(ES, TII); return E.get(); } diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp index f05c97ad621e..2850033e6419 100644 --- a/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/lib/CodeGen/ReachingDefAnalysis.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 1cbe75c27d13..156daaa03bb5 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() { NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; enqueue(&LIS->getInterval(Reg)); @@ -154,7 +154,7 @@ void RegAllocBase::allocatePhysRegs() { continue; } LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && + assert(Register::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); enqueue(SplitVirtReg); ++NumNewQueued; diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 2ffa5e389f89..44d0233604e7 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -90,7 +90,7 @@ namespace { explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {} unsigned getSparseSetIndex() const { - return TargetRegisterInfo::virtReg2Index(VirtReg); + return Register::virtReg2Index(VirtReg); } }; @@ -200,11 +200,11 @@ namespace { void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { - return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { - return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); @@ -264,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) { /// Returns false if \p VirtReg is known to not live out of the current block. bool RegAllocFast::mayLiveOut(unsigned VirtReg) { - if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) { + if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) { // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } @@ -272,7 +272,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { // If this block loops back to itself, it would be necessary to check whether // the use comes after the def. if (MBB->isSuccessor(MBB)) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return true; } @@ -282,7 +282,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { unsigned C = 0; for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) { if (UseInst.getParent() != MBB || ++C >= Limit) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } @@ -293,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { /// Returns false if \p VirtReg is known to not be live into the current block. bool RegAllocFast::mayLiveIn(unsigned VirtReg) { - if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) + if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) return !MBB->pred_empty(); // See if the first \p Limit def of the register are all in the current block. @@ -301,7 +301,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) { unsigned C = 0; for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) { if (DefInst.getParent() != MBB || ++C >= Limit) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return !MBB->pred_empty(); } } @@ -394,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) { /// Mark virtreg as no longer available. void RegAllocFast::killVirtReg(unsigned VirtReg) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end() && LRI->PhysReg) @@ -405,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) { /// stack slot if needed. void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && @@ -455,9 +455,8 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { if (MO.isUndef()) return; - unsigned PhysReg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && - "Bad usePhysReg operand"); + Register PhysReg = MO.getReg(); + assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { @@ -626,9 +625,9 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const { static const unsigned ChainLengthLimit = 3; unsigned C = 0; do { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg); if (!VRegDef || !isCoalescable(*VRegDef)) @@ -646,7 +645,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { unsigned C = 0; for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) { if (isCoalescable(MI)) { - unsigned Reg = MI.getOperand(1).getReg(); + Register Reg = MI.getOperand(1).getReg(); Reg = traceCopyChain(Reg); if (Reg != 0) return Reg; @@ -662,7 +661,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { const unsigned VirtReg = LR.VirtReg; - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); @@ -671,8 +670,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { << " with hint " << printReg(Hint0, TRI) << '\n'); // Take hint when possible. - if (TargetRegisterInfo::isPhysicalRegister(Hint0) && - MRI->isAllocatable(Hint0) && RC.contains(Hint0)) { + if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) && + RC.contains(Hint0)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint0); if (Cost < spillDirty) { @@ -692,9 +691,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { // Try other hint. unsigned Hint1 = traceCopies(VirtReg); - if (TargetRegisterInfo::isPhysicalRegister(Hint1) && - MRI->isAllocatable(Hint1) && RC.contains(Hint1) && - !isRegUsedInInstr(Hint1)) { + if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) && + RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint1); if (Cost < spillDirty) { @@ -752,8 +750,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { assert(MO.isUndef() && "expected undef use"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg"); + Register VirtReg = MO.getReg(); + assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg"); LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); MCPhysReg PhysReg; @@ -778,14 +776,13 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { /// Allocates a register for VirtReg and mark it as dirty. MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register"); + assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (!LRI->PhysReg) { // If there is no hint, peek at the only use of this register. - if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && + if ((!Hint || !Register::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); // It's a copy, use the destination register as a hint. @@ -812,8 +809,7 @@ RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register"); + assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); @@ -866,7 +862,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, } // Handle subregister index. - MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register()); MO.setIsRenamable(true); MO.setSubReg(0); @@ -893,8 +889,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, SmallSet ThroughRegs; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { @@ -908,8 +904,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (ThroughRegs.count(PhysRegState[*AI])) @@ -922,8 +919,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (MO.isUse()) { if (!MO.isTied()) continue; LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO @@ -947,8 +945,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (!MO.isEarlyClobber()) continue; // Note: defineVirtReg may invalidate MO. @@ -961,8 +960,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, UsedInInstr.clear(); for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) << " as used in instr\n"); markRegUsedInInstr(Reg); @@ -1002,10 +1002,8 @@ void RegAllocFast::dumpState() { e = LiveVirtRegs.end(); i != e; ++i) { if (!i->PhysReg) continue; - assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && - "Bad map key"); - assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && - "Bad map value"); + assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key"); + assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); } } @@ -1045,9 +1043,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { continue; } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { VirtOpEnd = i+1; if (MO.isUse()) { hasTiedOps = hasTiedOps || @@ -1096,8 +1094,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { for (unsigned I = 0; I != VirtOpEnd; ++I) { MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (MO.isUse()) { if (MO.isUndef()) { HasUndefUse = true; @@ -1124,8 +1123,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { for (MachineOperand &MO : MI.uses()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; assert(MO.isUndef() && "Should only have undef virtreg uses left"); @@ -1139,8 +1138,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { if (hasEarlyClobbers) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MO.isTied()) continue; markRegUsedInInstr(Reg); @@ -1166,10 +1166,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->isAllocatable(Reg)) + if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg)) continue; definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); } @@ -1180,10 +1179,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We have already dealt with phys regs in the previous scan. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { @@ -1215,8 +1214,8 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { // mostly constants and frame indices. if (!MO.isReg()) return; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) return; // See if this virtual register has already been allocated to a physical diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 771fc46415db..d27db678f02a 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -685,7 +685,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); const unsigned Reg = LI->reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg) && + assert(Register::isVirtualRegister(Reg) && "Can only enqueue virtual registers"); unsigned Prio; @@ -899,7 +899,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + assert(Register::isVirtualRegister(Intf->reg) && "Only expecting virtual register interference from query"); // Do not allow eviction of a virtual register if we are in the middle @@ -984,7 +984,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, continue; // Cannot evict non virtual reg interference. - if (!TargetRegisterInfo::isVirtualRegister(Intf->reg)) + if (!Register::isVirtualRegister(Intf->reg)) return false; // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) @@ -2881,7 +2881,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { continue; } // Get the current assignment. - Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + Register OtherPhysReg = Register::isPhysicalRegister(OtherReg) ? OtherReg : VRM->getPhys(OtherReg); // Push the collected information. @@ -2919,7 +2919,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { SmallVector RecoloringCandidates; HintsInfo Info; unsigned Reg = VirtReg.reg; - unsigned PhysReg = VRM->getPhys(Reg); + Register PhysReg = VRM->getPhys(Reg); // Start the recoloring algorithm from the input live-interval, then // it will propagate to the ones that are copy-related with it. Visited.insert(Reg); @@ -2932,7 +2932,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { Reg = RecoloringCandidates.pop_back_val(); // We cannot recolor physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); @@ -2940,7 +2940,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { // Get the live interval mapped with this virtual register to be able // to check for the interference with the new color. LiveInterval &LI = LIS->getInterval(Reg); - unsigned CurrPhys = VRM->getPhys(Reg); + Register CurrPhys = VRM->getPhys(Reg); // Check that the new color matches the register class constraints and // that it is free for this live range. if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || @@ -3021,7 +3021,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { /// getting rid of 2 copies. void RAGreedy::tryHintsRecoloring() { for (LiveInterval *LI : SetOfBrokenHints) { - assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && + assert(Register::isVirtualRegister(LI->reg) && "Recoloring is possible only for virtual registers"); // Some dead defs may be around (e.g., because of debug uses). // Ignore those. diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 7a5a6c148ed4..3c4a46b12f99 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -558,7 +558,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, // Iterate over all live ranges. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; VRegsToAlloc.insert(Reg); @@ -824,11 +824,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { if (!VRegsToAlloc.empty()) { const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); std::unique_ptr ConstraintsRoot = - llvm::make_unique(); - ConstraintsRoot->addConstraint(llvm::make_unique()); - ConstraintsRoot->addConstraint(llvm::make_unique()); + std::make_unique(); + ConstraintsRoot->addConstraint(std::make_unique()); + ConstraintsRoot->addConstraint(std::make_unique()); if (PBQPCoalescing) - ConstraintsRoot->addConstraint(llvm::make_unique()); + ConstraintsRoot->addConstraint(std::make_unique()); ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); bool PBQPAllocComplete = false; @@ -848,7 +848,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { std::string GraphFileName = FullyQualifiedName + "." + RS.str() + ".pbqpgraph"; std::error_code EC; - raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); + raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_Text); LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" << GraphFileName << "\"\n"); G.dump(OS); diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index b37dfada7101..757ff0e44953 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -142,6 +142,13 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { auto SetRegAsDefined = [&RegMask] (unsigned Reg) { RegMask[Reg / 32] &= ~(1u << Reg % 32); }; + + // Some targets can clobber registers "inside" a call, typically in + // linker-generated code. + for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF)) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + SetRegAsDefined(*AI); + // Scan all the physical registers. When a register is defined in the current // function set it and all the aliasing registers as defined in the regmask. // FIXME: Rewrite to use regunits. @@ -164,7 +171,8 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { SetRegAsDefined(PReg); } - if (TargetFrameLowering::isSafeForNoCSROpt(F)) { + if (TargetFrameLowering::isSafeForNoCSROpt(F) && + MF.getSubtarget().getFrameLowering()->isProfitableForNoCSROpt(F)) { ++NumCSROpt; LLVM_DEBUG(dbgs() << MF.getName() << " function optimized for not having CSR.\n"); diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp index fc4be82d215e..0205e6193741 100644 --- a/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -130,7 +130,11 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) { }; if (const Function *F = findCalledFunction(M, MI)) { - UpdateRegMask(*F); + if (F->isDefinitionExact()) { + UpdateRegMask(*F); + } else { + LLVM_DEBUG(dbgs() << "Function definition is not exact\n"); + } } else { LLVM_DEBUG(dbgs() << "Failed to find call target function\n"); } diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 2db6ab454cea..6ff5ddbc023d 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -406,8 +406,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { Partial = SrcSub || DstSub; // If one register is a physreg, it must be Dst. - if (TargetRegisterInfo::isPhysicalRegister(Src)) { - if (TargetRegisterInfo::isPhysicalRegister(Dst)) + if (Register::isPhysicalRegister(Src)) { + if (Register::isPhysicalRegister(Dst)) return false; std::swap(Src, Dst); std::swap(SrcSub, DstSub); @@ -416,7 +416,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); - if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + if (Register::isPhysicalRegister(Dst)) { // Eliminate DstSub on a physreg. if (DstSub) { Dst = TRI.getSubReg(Dst, DstSub); @@ -474,8 +474,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { CrossClass = NewRC != DstRC || NewRC != SrcRC; } // Check our invariants - assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); - assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + assert(Register::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(Register::isPhysicalRegister(Dst) && DstSub) && "Cannot have a physical SubIdx"); SrcReg = Src; DstReg = Dst; @@ -483,7 +483,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { } bool CoalescerPair::flip() { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) return false; std::swap(SrcReg, DstReg); std::swap(SrcIdx, DstIdx); @@ -507,8 +507,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { } // Now check that Dst matches DstReg. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + if (Register::isPhysicalRegister(DstReg)) { + if (!Register::isPhysicalRegister(Dst)) return false; assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); // DstSub could be set for a physreg from INSERT_SUBREG. @@ -802,7 +802,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return { false, false }; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); + Register NewReg = NewDstMO.getReg(); if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) return { false, false }; @@ -835,8 +835,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return { false, false }; - if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && - TargetRegisterInfo::isVirtualRegister(IntB.reg) && + if (Register::isVirtualRegister(IntA.reg) && + Register::isVirtualRegister(IntB.reg) && !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) return { false, false }; if (NewMI != DefMI) { @@ -877,7 +877,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + if (Register::isPhysicalRegister(NewReg)) UseMO.substPhysReg(NewReg, *TRI); else UseMO.setReg(NewReg); @@ -1188,7 +1188,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just /// defining a subregister. static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { - assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(!Register::isPhysicalRegister(Reg) && "This code cannot handle physreg aliasing"); for (const MachineOperand &Op : MI.operands()) { if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) @@ -1209,7 +1209,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (Register::isPhysicalRegister(SrcReg)) return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); @@ -1240,7 +1240,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, return false; // Only support subregister destinations when the def is read-undef. MachineOperand &DstOperand = CopyMI->getOperand(0); - unsigned CopyDstReg = DstOperand.getReg(); + Register CopyDstReg = DstOperand.getReg(); if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; @@ -1254,7 +1254,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (Register::isPhysicalRegister(DstReg)) { unsigned NewDstReg = DstReg; unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), @@ -1269,7 +1269,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { // Theoretically, some stack frame reference could exist. Just make sure // it hasn't actually happened. - assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + assert(Register::isVirtualRegister(DstReg) && "Only expect to deal with virtual or physical registers"); } } @@ -1317,7 +1317,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (MO.isReg()) { assert(MO.isImplicit() && "No explicit operands after implicit operands."); // Discard VReg implicit defs. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) ImplicitOps.push_back(MO); } } @@ -1336,12 +1336,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineOperand &MO = NewMI.getOperand(i); if (MO.isReg() && MO.isDef()) { assert(MO.isImplicit() && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(MO.getReg())); + Register::isPhysicalRegister(MO.getReg())); NewMIImplDefs.push_back(MO.getReg()); } } - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (Register::isVirtualRegister(DstReg)) { unsigned NewIdx = NewMI.getOperand(0).getSubReg(); if (DefRC != nullptr) { @@ -1428,7 +1428,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + assert(Register::isPhysicalRegister(DstReg) && "Only expect virtual or physical registers in remat"); NewMI.getOperand(0).setIsDead(true); NewMI.addOperand(MachineOperand::CreateReg( @@ -1480,7 +1480,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugValue()) { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) UseMO.substPhysReg(DstReg, *TRI); else UseMO.setReg(DstReg); @@ -1651,7 +1651,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + bool DstIsPhys = Register::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { @@ -2411,8 +2411,8 @@ std::pair JoinVals::followCopyChain( assert(MI && "No defining instruction"); if (!MI->isFullCopy()) return std::make_pair(VNI, TrackReg); - unsigned SrcReg = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return std::make_pair(VNI, TrackReg); const LiveInterval &LI = LIS->getInterval(SrcReg); @@ -3189,9 +3189,9 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl &ErasedInstrs, MachineInstr *MI = Indexes->getInstructionFromIndex(Def); assert(MI && "No instruction to erase"); if (MI->isCopy()) { - unsigned Reg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && - Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + Register Reg = MI->getOperand(1).getReg(); + if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() && + Reg != CP.getDstReg()) ShrinkRegs.push_back(Reg); } ErasedInstrs.insert(MI); @@ -3463,10 +3463,10 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { if (Copy->getOperand(1).isUndef()) return false; - unsigned SrcReg = Copy->getOperand(1).getReg(); - unsigned DstReg = Copy->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) - || TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register SrcReg = Copy->getOperand(1).getReg(); + Register DstReg = Copy->getOperand(0).getReg(); + if (Register::isPhysicalRegister(SrcReg) || + Register::isPhysicalRegister(DstReg)) return false; return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) @@ -3526,12 +3526,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) return false; // Check if the destination of this copy has any other affinity. - if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + if (Register::isPhysicalRegister(DstReg) || // If SrcReg is a physical register, the copy won't be coalesced. // Ignoring it may have other side effect (like missing // rematerialization). So keep it. - TargetRegisterInfo::isPhysicalRegister(SrcReg) || - !isTerminalReg(DstReg, Copy, MRI)) + Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI)) return false; // DstReg is a terminal node. Check if it interferes with any other @@ -3554,7 +3553,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (OtherReg == SrcReg) OtherReg = OtherSrcReg; // Check if OtherReg is a non-terminal. - if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || + if (Register::isPhysicalRegister(OtherReg) || isTerminalReg(OtherReg, MI, MRI)) continue; // Check that OtherReg interfere with DstReg. diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 7d9b3aa9b2d7..bf192d1c530d 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -134,6 +134,22 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const { } dbgs() << '\n'; } + +LLVM_DUMP_METHOD +void PressureChange::dump() const { + dbgs() << "[" << getPSetOrMax() << ", " << getUnitInc() << "]\n"; +} + +void RegPressureDelta::dump() const { + dbgs() << "[Excess="; + Excess.dump(); + dbgs() << ", CriticalMax="; + CriticalMax.dump(); + dbgs() << ", CurrentMax="; + CurrentMax.dump(); + dbgs() << "]\n"; +} + #endif void RegPressureTracker::increaseRegPressure(unsigned RegUnit, @@ -219,7 +235,7 @@ void LiveRegSet::clear() { } static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return &LIS.getInterval(Reg); return LIS.getCachedRegUnit(Reg); } @@ -345,7 +361,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { assert(isBottomClosed() && "need bottom-up tracking to intialize."); for (const RegisterMaskPair &Pair : P.LiveOutRegs) { unsigned RegUnit = Pair.RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) + if (Register::isVirtualRegister(RegUnit) && !RPTracker.hasUntiedDef(RegUnit)) increaseSetPressure(LiveThruPressure, *MRI, RegUnit, LaneBitmask::getNone(), Pair.LaneMask); @@ -406,7 +422,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit, SlotIndex Pos, LaneBitmask SafeDefault, bool(*Property)(const LiveRange &LR, SlotIndex Pos)) { - if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { + if (Register::isVirtualRegister(RegUnit)) { const LiveInterval &LI = LIS.getInterval(RegUnit); LaneBitmask Result; if (TrackLaneMasks && LI.hasSubRanges()) { @@ -483,7 +499,7 @@ class RegisterOperandsCollector { void collectOperand(const MachineOperand &MO) const { if (!MO.isReg() || !MO.getReg()) return; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isUse()) { if (!MO.isUndef() && !MO.isInternalRead()) pushReg(Reg, RegOpers.Uses); @@ -503,7 +519,7 @@ class RegisterOperandsCollector { void pushReg(unsigned Reg, SmallVectorImpl &RegUnits) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll())); } else if (MRI.isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) @@ -514,7 +530,7 @@ class RegisterOperandsCollector { void collectOperandLanes(const MachineOperand &MO) const { if (!MO.isReg() || !MO.getReg()) return; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); unsigned SubRegIdx = MO.getSubReg(); if (MO.isUse()) { if (!MO.isUndef() && !MO.isInternalRead()) @@ -535,7 +551,7 @@ class RegisterOperandsCollector { void pushRegLanes(unsigned Reg, unsigned SubRegIdx, SmallVectorImpl &RegUnits) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LaneBitmask LaneMask = SubRegIdx != 0 ? TRI.getSubRegIndexLaneMask(SubRegIdx) : MRI.getMaxLaneMaskForVReg(Reg); @@ -590,7 +606,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, // If the def is all that is live after the instruction, then in case // of a subregister def we need a read-undef flag. unsigned RegUnit = I->RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + if (Register::isVirtualRegister(RegUnit) && AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none()) AddFlagsMI->setRegisterDefReadUndef(RegUnit); @@ -616,7 +632,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, if (AddFlagsMI != nullptr) { for (const RegisterMaskPair &P : DeadDefs) { unsigned RegUnit = P.RegUnit; - if (!TargetRegisterInfo::isVirtualRegister(RegUnit)) + if (!Register::isVirtualRegister(RegUnit)) continue; LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit, Pos.getDeadSlot()); @@ -825,7 +841,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, if (TrackUntiedDefs) { for (const RegisterMaskPair &Def : RegOpers.Defs) { unsigned RegUnit = Def.RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + if (Register::isVirtualRegister(RegUnit) && (LiveRegs.contains(RegUnit) & Def.LaneMask).none()) UntiedDefs.insert(RegUnit); } diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index bb19110e6d70..ec0868acab38 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -49,7 +49,7 @@ using namespace llvm; STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); -void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { +void RegScavenger::setRegUsed(Register Reg, LaneBitmask LaneMask) { LiveUnits.addRegMasked(Reg, LaneMask); } @@ -96,12 +96,12 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) { } } -void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) { +void RegScavenger::addRegUnits(BitVector &BV, Register Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.set(*RUI); } -void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) { +void RegScavenger::removeRegUnits(BitVector &BV, Register Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.reset(*RUI); } @@ -133,8 +133,8 @@ void RegScavenger::determineKillsAndDefs() { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { @@ -204,8 +204,8 @@ void RegScavenger::forward() { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { if (MO.isUndef()) @@ -278,14 +278,14 @@ void RegScavenger::backward() { --MBBI; } -bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { +bool RegScavenger::isRegUsed(Register Reg, bool includeReserved) const { if (isReserved(Reg)) return includeReserved; return !LiveUnits.available(Reg); } -unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { - for (unsigned Reg : *RC) { +Register RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { + for (Register Reg : *RC) { if (!isRegUsed(Reg)) { LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) << "\n"); @@ -297,13 +297,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { BitVector Mask(TRI->getNumRegs()); - for (unsigned Reg : *RC) + for (Register Reg : *RC) if (!isRegUsed(Reg)) Mask.set(Reg); return Mask; } -unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, +Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI) { @@ -329,7 +329,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) continue; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (Register::isVirtualRegister(MO.getReg())) { if (MO.isDef()) isVirtDefInsn = true; else if (MO.isKill()) @@ -430,7 +430,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, // be usefull for this other vreg as well later. bool FoundVReg = false; for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { FoundVReg = true; break; } @@ -457,7 +457,7 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) { } RegScavenger::ScavengedInfo & -RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, +RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, MachineBasicBlock::iterator Before, MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching @@ -531,7 +531,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, return Scavenged[SI]; } -unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, +Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill) { MachineInstr &MI = *I; @@ -542,7 +542,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Exclude all the registers being used by the instruction. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + !Register::isVirtualRegister(MO.getReg())) for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) Candidates.reset(*AI); } @@ -556,7 +556,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; - unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + Register SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. if (!isRegUsed(SReg)) { @@ -576,7 +576,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } -unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, +Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill) { @@ -620,8 +620,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, /// \p ReserveAfter controls whether the scavenged register needs to be reserved /// after the current instruction, otherwise it will only be reserved before the /// current instruction. -static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, - unsigned VReg, bool ReserveAfter) { +static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, + Register VReg, bool ReserveAfter) { const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); #ifndef NDEBUG // Verify that all definitions and uses are in the same basic block. @@ -664,7 +664,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, // spill/reload if necessary. int SPAdj = 0; const TargetRegisterClass &RC = *MRI.getRegClass(VReg); - unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), + Register SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), ReserveAfter, SPAdj); MRI.replaceRegWith(VReg, SReg); ++NumScavengedRegs; @@ -694,17 +694,17 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, for (const MachineOperand &MO : NMI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We only care about virtual registers and ignore virtual registers // created by the target callbacks in the process (those will be handled // in a scavenging round). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || - TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + if (!Register::isVirtualRegister(Reg) || + Register::virtReg2Index(Reg) >= InitialNumVirtRegs) continue; if (!MO.readsReg()) continue; - unsigned SReg = scavengeVReg(MRI, RS, Reg, true); + Register SReg = scavengeVReg(MRI, RS, Reg, true); N->addRegisterKilled(SReg, &TRI, false); RS.setRegUsed(SReg); } @@ -716,10 +716,10 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Only vregs, no newly created vregs (see above). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || - TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + if (!Register::isVirtualRegister(Reg) || + Register::virtReg2Index(Reg) >= InitialNumVirtRegs) continue; // We have to look at all operands anyway so we can precalculate here // whether there is a reading operand. This allows use to skip the use @@ -730,14 +730,14 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, NextInstructionReadsVReg = true; } if (MO.isDef()) { - unsigned SReg = scavengeVReg(MRI, RS, Reg, false); + Register SReg = scavengeVReg(MRI, RS, Reg, false); I->addRegisterDead(SReg, &TRI, false); } } } #ifndef NDEBUG for (const MachineOperand &MO : MBB.front().operands()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; assert(!MO.isInternalRead() && "Cannot assign inside bundles"); assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index 22cff48c3051..e3f5abb6301f 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -138,7 +138,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:"); for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses; ++I) { - unsigned NewVReg = MRI->createVirtualRegister(RegClass); + Register NewVReg = MRI->createVirtualRegister(RegClass); LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg); Intervals.push_back(&NewLI); LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg)); @@ -390,7 +390,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) { // there can't be any further splitting. bool Changed = false; for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (!LIS->hasInterval(Reg)) continue; LiveInterval &LI = LIS->getInterval(Reg); diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index a6bc7330e2cc..ddbbd0f8d6e9 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -871,7 +871,7 @@ public: report_fatal_error("TargetLowering instance is required"); auto *DL = &F.getParent()->getDataLayout(); - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &ACT = getAnalysis().getAssumptionCache(F); // Compute DT and LI only for functions that have the attribute. diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index 7776dffb4e9c..b4037499d7d1 100644 --- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -173,15 +173,30 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %mask_1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %mask_1, label %cond.load, label %else // - - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx); + } // Create "cond" block // @@ -290,13 +305,29 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %mask_1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %mask_1, label %cond.store, label %else // - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx); + } // Create "cond" block // @@ -392,15 +423,30 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %Mask1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %Mask1, label %cond.load, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -499,14 +545,29 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx + // %Mask1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %Mask1, label %cond.store, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -555,6 +616,32 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { // The result vector Value *VResult = PassThru; + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + LoadInst *Load = + Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx)); + VResult = + Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); + ++MemIndex; + } + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // @@ -563,8 +650,14 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { // br i1 %mask_1, label %cond.load, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -633,13 +726,44 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { unsigned VectorWidth = VecType->getNumElements(); + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *OneElt = + Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + Builder.CreateAlignedStore(OneElt, NewPtr, 1); + ++MemIndex; + } + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // br i1 %mask_1, label %cond.store, label %else // - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -727,17 +851,24 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, switch (II->getIntrinsicID()) { default: break; - case Intrinsic::masked_load: + case Intrinsic::masked_load: { // Scalarize unsupported vector masked load - if (TTI->isLegalMaskedLoad(CI->getType())) + unsigned Alignment = + cast(CI->getArgOperand(1))->getZExtValue(); + if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment))) return false; scalarizeMaskedLoad(CI, ModifiedDT); return true; - case Intrinsic::masked_store: - if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) + } + case Intrinsic::masked_store: { + unsigned Alignment = + cast(CI->getArgOperand(2))->getZExtValue(); + if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), + MaybeAlign(Alignment))) return false; scalarizeMaskedStore(CI, ModifiedDT); return true; + } case Intrinsic::masked_gather: if (TTI->isLegalMaskedGather(CI->getType())) return false; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index d5ad7e92299d..96a1f86c3e04 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -205,10 +204,10 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (ExitMI) { for (const MachineOperand &MO : ExitMI->operands()) { if (!MO.isReg() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); - } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) { + } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO)); } } @@ -285,7 +284,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We do not need to track any dependencies for constant registers. if (MRI.isConstantPhysReg(Reg)) return; @@ -361,7 +360,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // No point in tracking lanemasks if we don't have interesting subregisters. const TargetRegisterClass &RC = *MRI.getRegClass(Reg); if (!RC.HasDisjunctSubRegs) @@ -373,6 +372,13 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const return TRI->getSubRegIndexLaneMask(SubReg); } +bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) { + auto RegUse = CurrentVRegUses.find(MO.getReg()); + if (RegUse == CurrentVRegUses.end()) + return true; + return (RegUse->LaneMask & getLaneMaskForMO(MO)).none(); +} + /// Adds register output and data dependencies from this SUnit to instructions /// that occur later in the same scheduling region if they read from or write to /// the virtual register defined at OperIdx. @@ -382,7 +388,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); LaneBitmask DefLaneMask; LaneBitmask KillLaneMask; @@ -393,6 +399,18 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // earlier instruction. KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask; + if (MO.getSubReg() != 0 && MO.isUndef()) { + // There may be other subregister defs on the same instruction of the same + // register in later operands. The lanes of other defs will now be live + // after this instruction, so these should not be treated as killed by the + // instruction even though they appear to be killed in this one operand. + for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &OtherMO = MI->getOperand(I); + if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg) + KillLaneMask &= ~getLaneMaskForMO(OtherMO); + } + } + // Clear undef flag, we'll re-add it later once we know which subregister // Def is first. MO.setIsUndef(false); @@ -402,8 +420,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { } if (MO.isDead()) { - assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && - "Dead defs should have no uses"); + assert(deadDefHasNoUse(MO) && "Dead defs should have no uses"); } else { // Add data dependence to all uses we found so far. const TargetSubtargetInfo &ST = MF.getSubtarget(); @@ -491,7 +508,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); const MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Remember the use. Data dependencies will be added when we find the def. LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) @@ -514,7 +531,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// Returns true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). -static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { +static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA)); } @@ -701,7 +718,7 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { map.reComputeSize(); } -void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, +void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker, PressureDiffs *PDiffs, LiveIntervals *LIS, @@ -821,10 +838,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, const MachineOperand &MO = MI.getOperand(j); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { addPhysRegDeps(SU, j); - } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (Register::isVirtualRegister(Reg)) { HasVRegDef = true; addVRegDefDeps(SU, j); } @@ -838,10 +855,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // additional use dependencies. if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { addPhysRegDeps(SU, j); - } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) { + } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { addVRegUseDeps(SU, j); } } @@ -1071,7 +1088,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, for (MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -1102,7 +1119,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { if (MO.isReg()) { if (!MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; LiveRegs.removeReg(Reg); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 49c922f560fa..e8950b58d42d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -111,10 +110,20 @@ static cl::opt MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); +static cl::opt + EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), + cl::desc("DAG combiner enable merging multiple stores " + "into a wider store")); + static cl::opt TokenFactorInlineLimit( "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors")); +static cl::opt StoreMergeDependenceLimit( + "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the number of times for the same StoreNode and RootNode " + "to bail out in store merging dependence check")); + namespace { class DAGCombiner { @@ -152,6 +161,14 @@ namespace { /// which have not yet been combined to the worklist. SmallPtrSet CombinedNodes; + /// Map from candidate StoreNode to the pair of RootNode and count. + /// The count is used to track how many times we have seen the StoreNode + /// with the same RootNode bail out in dependence check. If we have seen + /// the bail out for the same pair many times over a limit, we won't + /// consider the StoreNode with the same RootNode as store merging + /// candidate again. + DenseMap> StoreRootCountMap; + // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; @@ -236,6 +253,7 @@ namespace { void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); PruningList.remove(N); + StoreRootCountMap.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) @@ -361,6 +379,7 @@ namespace { SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); @@ -421,7 +440,6 @@ namespace { SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); - SDValue visitFP_ROUND_INREG(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); @@ -470,7 +488,7 @@ namespace { SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); - SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + SDValue visitShiftByConstant(SDNode *N); SDValue foldSelectOfConstants(SDNode *N); SDValue foldVSelectOfConstants(SDNode *N); @@ -497,6 +515,7 @@ namespace { bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; bool isOneUseSetCC(SDValue N) const; + bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -510,7 +529,7 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue V, const SDLoc &DL); - SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); + SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); @@ -521,11 +540,11 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); + SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue MatchStoreCombine(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); @@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } +bool TargetLowering::DAGCombinerInfo:: +recursivelyDeleteUnusedNodes(SDNode *N) { + return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); +} + void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); @@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) { DAG.DeleteNode(N); } -/// Return 1 if we can compute the negated form of the specified expression for -/// the same cost as the expression itself, or 2 if we can compute the negated -/// form more cheaply than the expression itself. -static char isNegatibleForFree(SDValue Op, bool LegalOperations, - const TargetLowering &TLI, - const TargetOptions *Options, - bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return 2; - - // Don't allow anything with multiple uses unless we know it is free. - EVT VT = Op.getValueType(); - const SDNodeFlags Flags = Op->getFlags(); - if (!Op.hasOneUse() && - !(Op.getOpcode() == ISD::FP_EXTEND && - TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) - return 0; - - // Don't recurse exponentially. - if (Depth > 6) - return 0; - - switch (Op.getOpcode()) { - default: return false; - case ISD::ConstantFP: { - if (!LegalOperations) - return 1; - - // Don't invert constant FP values after legalization unless the target says - // the negated constant is legal. - return TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(neg(cast(Op)->getValueAPF()), VT, - ForCodeSize); - } - case ISD::BUILD_VECTOR: { - // Only permit BUILD_VECTOR of constants. - if (llvm::any_of(Op->op_values(), [&](SDValue N) { - return !N.isUndef() && !isa(N); - })) - return 0; - if (!LegalOperations) - return 1; - if (TLI.isOperationLegal(ISD::ConstantFP, VT) && - TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return 1; - return llvm::all_of(Op->op_values(), [&](SDValue N) { - return N.isUndef() || - TLI.isFPImmLegal(neg(cast(N)->getValueAPF()), VT, - ForCodeSize); - }); - } - case ISD::FADD: - if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) - return 0; - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return 1; - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - } -} - -/// If isNegatibleForFree returns true, return the newly negated expression. -static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return Op.getOperand(0); - - assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags Flags = Op->getFlags(); - - switch (Op.getOpcode()) { - default: llvm_unreachable("Unknown code"); - case ISD::ConstantFP: { - APFloat V = cast(Op)->getValueAPF(); - V.changeSign(); - return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); - } - case ISD::BUILD_VECTOR: { - SmallVector Ops; - for (SDValue C : Op->op_values()) { - if (C.isUndef()) { - Ops.push_back(C); - continue; - } - APFloat V = cast(C)->getValueAPF(); - V.changeSign(); - Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); - } - return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); - } - case ISD::FADD: - assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(0), Flags); - case ISD::FSUB: - // fold (fneg (fsub 0, B)) -> B - if (ConstantFPSDNode *N0CFP = - isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) - if (N0CFP->isZero()) - return Op.getOperand(1); - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0), Flags); - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - - // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - Op.getOperand(0), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), Flags); - - case ISD::FP_EXTEND: - case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1)); - case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1)); - } -} - // APInts must be the same size for most operations, this helper // function zero extends the shorter of the pair so that they match. // We provide an Offset so that we can create bitwidths that won't overflow. @@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); if (!OpNode.getNode()) return SDValue(); - AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } @@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); - AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); @@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); for (SDNode *LN : UpdatedNodes) { - AddToWorklist(LN); AddUsersToWorklist(LN); + AddToWorklist(LN); } if (!NIsValid) continue; @@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: return visitMULFIX(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); - case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); @@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { + // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) + if (SDValue Carry = getAsCarry(TLI, N0)) { + SDValue X = N1; + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X); + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), NegX, Zero, + Carry); + } + } + return SDValue(); } @@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { return SDValue(); } +// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and +// UMULFIXSAT here. +SDValue DAGCombiner::visitMULFIX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Scale = N->getOperand(2); + EVT VT = N0.getValueType(); + + // fold (mulfix x, undef, scale) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + + // Canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale); + + // fold (mulfix x, 0, scale) -> 0 + if (isNullConstant(N1)) + return DAG.getConstant(0, SDLoc(N), VT); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // x * 15 --> (x << 4) - x // x * -33 --> -((x << 5) + x) // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) - if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { + if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). @@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (VT.isVector()) { // fold (mulhs x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhs x, 0) -> 0 @@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, - DAG.getConstant(N0.getValueSizeInBits() - 1, DL, + DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (VT.isVector()) { // fold (mulhu x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhu x, 0) -> 0 @@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // (umul_lohi N0, 0) -> (0, 0) + if (isNullConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, Zero, Zero); + } + + // (umul_lohi N0, 1) -> (N0, 0) + if (isOneConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, N->getOperand(0), Zero); + } + // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { @@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitMULO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); bool IsSigned = (ISD::SMULO == N->getOpcode()); + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); + + // fold (mulo x, 0) -> 0 + no carry out + if (isNullOrNullSplat(N1)) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(0, DL, CarryVT)); + // (mulo x, 2) -> (addo x, x) - if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1))) + if (ConstantSDNode *C2 = isConstOrConstSplat(N1)) if (C2->getAPIntValue() == 2) - return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N), - N->getVTList(), N->getOperand(0), N->getOperand(0)); + return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, + N->getVTList(), N0, N0); return SDValue(); } @@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && Level <= AfterLegalizeTypes) { // Input types must be integer and the same. - if (XVT.isInteger() && XVT == Y.getValueType()) { + if (XVT.isInteger() && XVT == Y.getValueType() && + !(VT.isVector() && TLI.isTypeLegal(VT) && + !XVT.isVector() && !TLI.isTypeLegal(XVT))) { SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } @@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, return true; } - // Do not change the width of a volatile load. - if (LoadN->isVolatile()) + // Do not change the width of a volatile or atomic loads. + if (!LoadN->isSimple()) return false; // Do not generate loads of non-round integer types since these can @@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, if (!MemVT.isRound()) return false; - // Don't change the width of a volatile load. - if (LDST->isVolatile()) + // Don't change the width of a volatile or atomic loads. + if (!LDST->isSimple()) return false; // Verify that we are actually reducing a load width here. if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) return false; - // Ensure that this isn't going to produce an unsupported unaligned access. + // Ensure that this isn't going to produce an unsupported memory access. if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, LDST->getAddressSpace(), ShAmt / 8, @@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { return T1; } +/// Try to replace shift/logic that tests if a bit is clear with mask + setcc. +/// For a target with a bit test, this is expected to become test + set and save +/// at least 1 instruction. +static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { + assert(And->getOpcode() == ISD::AND && "Expected an 'and' op"); + + // This is probably not worthwhile without a supported type. + EVT VT = And->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + // Look through an optional extension and find a 'not'. + // TODO: Should we favor test+set even without the 'not' op? + SDValue Not = And->getOperand(0), And1 = And->getOperand(1); + if (Not.getOpcode() == ISD::ANY_EXTEND) + Not = Not.getOperand(0); + if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + return SDValue(); + + // Look though an optional truncation. The source operand may not be the same + // type as the original 'and', but that is ok because we are masking off + // everything but the low bit. + SDValue Srl = Not.getOperand(0); + if (Srl.getOpcode() == ISD::TRUNCATE) + Srl = Srl.getOperand(0); + + // Match a shift-right by constant. + if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || + !isa(Srl.getOperand(1))) + return SDValue(); + + // We might have looked through casts that make this transform invalid. + // TODO: If the source type is wider than the result type, do the mask and + // compare in the source type. + const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); + unsigned VTBitWidth = VT.getSizeInBits(); + if (ShiftAmt.uge(VTBitWidth)) + return SDValue(); + + // Turn this into a bit-test pattern using mask op + setcc: + // and (not (srl X, C)), 1 --> (and X, 1<getOperand(0); SDValue N1 = N->getOperand(1); @@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. @@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned MemBitSize = MemVT.getScalarSizeInBits(); APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); if (DAG.MaskedValueIsZero(N1, ExtBits) && - ((!LegalOperations && !LN0->isVolatile()) || + ((!LegalOperations && LN0->isSimple()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), @@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) return Shifts; + if (TLI.hasBitTest(N0, N1)) + if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) + return V; + return SDValue(); } @@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { return true; } +// Match 2 elements of a packed halfword bswap. +static bool isBSwapHWordPair(SDValue N, MutableArrayRef Parts) { + if (N.getOpcode() == ISD::OR) + return isBSwapHWordElement(N.getOperand(0), Parts) && + isBSwapHWordElement(N.getOperand(1), Parts); + + if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { + ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); + if (!C || C->getAPIntValue() != 16) + return false; + Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); + return true; + } + + return false; +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); // Look for either - // (or (or (and), (and)), (or (and), (and))) - // (or (or (or (and), (and)), (and)), (and)) - if (N0.getOpcode() != ISD::OR) - return SDValue(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); + // (or (bswaphpair), (bswaphpair)) + // (or (or (bswaphpair), (and)), (and)) + // (or (or (and), (bswaphpair)), (and)) SDNode *Parts[4] = {}; - if (N1.getOpcode() == ISD::OR && - N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + if (isBSwapHWordPair(N0, Parts)) { // (or (or (and), (and)), (or (and), (and))) - if (!isBSwapHWordElement(N00, Parts)) + if (!isBSwapHWordPair(N1, Parts)) return SDValue(); - - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - SDValue N10 = N1.getOperand(0); - if (!isBSwapHWordElement(N10, Parts)) - return SDValue(); - SDValue N11 = N1.getOperand(1); - if (!isBSwapHWordElement(N11, Parts)) - return SDValue(); - } else { + } else if (N0.getOpcode() == ISD::OR) { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - if (N00.getOpcode() != ISD::OR) - return SDValue(); - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) - return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && + !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - } + } else + return SDValue(); // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) @@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); - bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - if (!LegalMask) { - std::swap(NewLHS, NewRHS); - ShuffleVectorSDNode::commuteMask(Mask); - LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, + Mask, DAG); + if (LegalShuffle) + return LegalShuffle; } } } @@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return V; // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) - return SDValue(Rot, 0); + if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N))) + return Rot; if (SDValue Load = MatchLoadCombine(N)) return Load; @@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, /// Otherwise, returns an expansion of \p ExtractFrom based on the following /// patterns: /// +/// (or (add v v) (shrl v bitwidth-1)): +/// expands (add v v) -> (shl v 1) +/// /// (or (mul v c0) (shrl (mul v c1) c2)): /// expands (mul v c0) -> (shl (mul v c1) c3) /// @@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, "Existing shift must be valid as a rotate half"); ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); + + // Value and Type of the shift. + SDValue OppShiftLHS = OppShift.getOperand(0); + EVT ShiftedVT = OppShiftLHS.getValueType(); + + // Amount of the existing shift. + ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); + + // (add v v) -> (shl v 1) + if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && + ExtractFrom.getOpcode() == ISD::ADD && + ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && + ExtractFrom.getOperand(0) == OppShiftLHS && + OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS, + DAG.getShiftAmountConstant(1, ShiftedVT, DL)); + // Preconditions: // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) // @@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // op0 must be the same opcode on both sides, have the same LHS argument, // and produce the same value type. - SDValue OppShiftLHS = OppShift.getOperand(0); - EVT ShiftedVT = OppShiftLHS.getValueType(); if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || ShiftedVT != ExtractFrom.getValueType()) return SDValue(); - // Amount of the existing shift. - ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. @@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. -SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, +SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { @@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, - HasPos ? Pos : Neg).getNode(); + HasPos ? Pos : Neg); } - return nullptr; + return SDValue(); } // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) return nullptr; + if (!TLI.isTypeLegal(VT)) + return SDValue(); // The target must have at least one rotate flavor. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) return nullptr; + if (!HasROTL && !HasROTR) + return SDValue(); // Check for truncated rotate. if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { assert(LHS.getValueType() == RHS.getValueType()); - if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { - return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), - SDValue(Rot, 0)).getNode(); + if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot); } } @@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If neither side matched a rotate half, bail if (!LHSShift && !RHSShift) - return nullptr; + return SDValue(); // InstCombine may have combined a constant shl, srl, mul, or udiv with one // side of the rotate, so try to handle that here. In all cases we need to @@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If a side is still missing, nothing else we can do. if (!RHSShift || !LHSShift) - return nullptr; + return SDValue(); // At this point we've matched or extracted a shift op on each side. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return nullptr; // Not shifting the same value. + return SDValue(); // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) - return nullptr; // Shifts must disagree. + return SDValue(); // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { @@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } - return Rot.getNode(); + return Rot; } // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) - return nullptr; + return SDValue(); // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; @@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; - SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; - return nullptr; + return SDValue(); } namespace { @@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, Depth + 1); case ISD::LOAD: { auto L = cast(Op.getNode()); - if (L->isVolatile() || L->isIndexed()) + if (!L->isSimple() || L->isIndexed()) return None; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); @@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { SDValue Chain; SmallVector Stores; for (StoreSDNode *Store = N; Store; Store = dyn_cast(Chain)) { + // TODO: Allow unordered atomics when wider type is legal (see D66309) if (Store->getMemoryVT() != MVT::i8 || - Store->isVolatile() || Store->isIndexed()) + !Store->isSimple() || Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); LoadSDNode *L = P->Load; - assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && + assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && + !L->isIndexed() && "Must be enforced by calculateByteProvider"); assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); @@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants if (isAllOnesConstant(N1) && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isa(RHS) || isa(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isa(N01) || isa(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } @@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } +/// If we have a shift-by-constant of a bitwise logic op that itself has a +/// shift-by-constant operand with identical opcode, we may be able to convert +/// that into 2 independent shifts followed by the logic op. This is a +/// throughput improvement. +static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { + // Match a one-use bitwise logic op. + SDValue LogicOp = Shift->getOperand(0); + if (!LogicOp.hasOneUse()) + return SDValue(); + + unsigned LogicOpcode = LogicOp.getOpcode(); + if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR && + LogicOpcode != ISD::XOR) + return SDValue(); + + // Find a matching one-use shift by constant. + unsigned ShiftOpcode = Shift->getOpcode(); + SDValue C1 = Shift->getOperand(1); + ConstantSDNode *C1Node = isConstOrConstSplat(C1); + assert(C1Node && "Expected a shift with constant operand"); + const APInt &C1Val = C1Node->getAPIntValue(); + auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp, + const APInt *&ShiftAmtVal) { + if (V.getOpcode() != ShiftOpcode || !V.hasOneUse()) + return false; + + ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1)); + if (!ShiftCNode) + return false; + + // Capture the shifted operand and shift amount value. + ShiftOp = V.getOperand(0); + ShiftAmtVal = &ShiftCNode->getAPIntValue(); + + // Shift amount types do not have to match their operand type, so check that + // the constants are the same width. + if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth()) + return false; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits())) + return false; + + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + SDValue X, Y; + const APInt *C0Val; + if (matchFirstShift(LogicOp.getOperand(0), X, C0Val)) + Y = LogicOp.getOperand(1); + else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val)) + Y = LogicOp.getOperand(0); + else + return SDValue(); + + // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) + SDLoc DL(Shift); + EVT VT = Shift->getValueType(0); + EVT ShiftAmtVT = Shift->getOperand(1).getValueType(); + SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT); + SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC); + SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1); + return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); +} + /// Handle transforms common to the three shifts, when the shift amount is a /// constant. /// We are looking for: (shift being one of shl/sra/srl) /// shift (binop X, C0), C1 /// And want to transform into: /// binop (shift X, C1), (shift C0, C1) -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { + assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"); + // Do not turn a 'not' into a regular xor. if (isBitwiseNot(N->getOperand(0))) return SDValue(); // The inner binop must be one-use, since we want to replace it. - SDNode *LHS = N->getOperand(0).getNode(); - if (!LHS->hasOneUse()) return SDValue(); + SDValue LHS = N->getOperand(0); + if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) + return SDValue(); + + // TODO: This is limited to early combining because it may reveal regressions + // otherwise. But since we just checked a target hook to see if this is + // desirable, that should have filtered out cases where this interferes + // with some other pattern matching. + if (!LegalTypes) + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize // it. - switch (LHS->getOpcode()) { + switch (LHS.getOpcode()) { default: return SDValue(); case ISD::OR: @@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select. Enable this in other cases when figure out it's exactly // profitable. - SDValue BinOpLHSVal = LHS->getOperand(0); + SDValue BinOpLHSVal = LHS.getOperand(0); bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL || BinOpLHSVal.getOpcode() == ISD::SRA || BinOpLHSVal.getOpcode() == ISD::SRL) && @@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); - EVT VT = N->getValueType(0); - - if (!TLI.isDesirableToCommuteWithShift(N, Level)) - return SDValue(); - // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), - N->getValueType(0), - LHS->getOperand(1), N->getOperand(1)); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), + N->getOperand(1)); assert(isa(NewRHS) && "Folding was not successful!"); - // Create the new shift. - SDValue NewShift = DAG.getNode(N->getOpcode(), - SDLoc(LHS->getOperand(0)), - VT, LHS->getOperand(0), N->getOperand(1)); - - // Create the new binop. - return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); + SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), + N->getOperand(1)); + return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { @@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } if (N1C && !N1C->isOpaque()) - if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; return SDValue(); @@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. + // sra (add (shl X, N1C), AddC), N1C --> + // sext (add (trunc X to (width - N1C)), AddC') + if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + N0.getOperand(0).getOpcode() == ISD::SHL && + N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { + if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Shl = N0.getOperand(0); + // Determine what the truncate's type would be and ask the target if that + // is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but that + // restriction may conflict with other transforms. + if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). + trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + return DAG.getSExtOrTrunc(Add, DL, VT); + } + } + } + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRA = visitShiftByConstant(N, N1C)) + if (SDValue NewSRA = visitShiftByConstant(N)) return NewSRA; return SDValue(); @@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRL = visitShiftByConstant(N, N1C)) + if (SDValue NewSRL = visitShiftByConstant(N)) return NewSRL; // Attempt to convert a srl of a load into a narrower zero-extending load. @@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +/// If a (v)select has a condition value that is a sign-bit test, try to smear +/// the condition operand sign-bit across the value width and use it as a mask. +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue C1 = N->getOperand(1); + SDValue C2 = N->getOperand(2); + assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && + "Expected select-of-constants"); + + EVT VT = N->getValueType(0); + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || + VT != Cond.getOperand(0).getValueType()) + return SDValue(); + + // The inverted-condition + commuted-select variants of these patterns are + // canonicalized to these forms in IR. + SDValue X = Cond.getOperand(0); + SDValue CondC = Cond.getOperand(1); + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && + isAllOnesOrAllOnesSplat(C2)) { + // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::OR, DL, VT, Sra, C1); + } + if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { + // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::AND, DL, VT, Sra, C1); + } + return SDValue(); +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return Cond; } - // For any constants that differ by 1, we can transform the select into an - // extend and add. Use a target hook because some targets may prefer to - // transform in the other direction. + // Use a target hook because some targets may prefer to transform in the + // other direction. if (TLI.convertSelectOfConstantsToMath(VT)) { - if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { + // For any constants that differ by 1, we can transform the select into an + // extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + if (C1Val - 1 == C2Val) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { + if (C1Val + 1 == C2Val) { // select Cond, C1, C1+1 --> add (sext Cond), C1+1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; } return SDValue(); @@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } -static -std::pair SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { @@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast(N); SDValue Mask = MSC->getMask(); - SDValue Data = MSC->getValue(); SDValue Chain = MSC->getChain(); SDLoc DL(N); @@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSCATTER data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != - TargetLowering::TypeSplitVector) - return SDValue(); - SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); - - EVT MemoryVT = MSC->getMemoryVT(); - unsigned Alignment = MSC->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - SDValue Scale = MSC->getScale(); - SDValue BasePtr = MSC->getBasePtr(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MSC->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MSC->getAAInfo(), MSC->getRanges()); - - SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; - SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), - DataLo.getValueType(), DL, OpsLo, MMO); - - // The order of the Scatter operation after split is well defined. The "Hi" - // part comes after the "Lo". So these two operations should be chained one - // after another. - SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + return SDValue(); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = cast(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getValue(); SDValue Chain = MST->getChain(); - EVT VT = Data.getValueType(); SDLoc DL(N); // Zap masked stores with a zero mask. if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSTORE data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue Ptr = MST->getBasePtr(); - - EVT MemoryVT = MST->getMemoryVT(); - unsigned Alignment = MST->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MST->getAAInfo(), MST->getRanges()); - - Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MST->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MST->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, - MST->getAAInfo(), MST->getRanges()); - - Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - } return SDValue(); } @@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MGT->getPassThru(), MGT->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MGATHER result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MGT->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); - - SDValue Chain = MGT->getChain(); - EVT MemoryVT = MGT->getMemoryVT(); - unsigned Alignment = MGT->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue Scale = MGT->getScale(); - SDValue BasePtr = MGT->getBasePtr(); - SDValue Index = MGT->getIndex(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); - - SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale }; - Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, - MMO); - - SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale }; - Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, - MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); - - SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { GatherRes, Chain }; - return DAG.getMergeValues(RetOps, DL); + return SDValue(); } SDValue DAGCombiner::visitMLOAD(SDNode *N) { @@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MLOAD result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MLD->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); - - SDValue Chain = MLD->getChain(); - SDValue Ptr = MLD->getBasePtr(); - EVT MemoryVT = MLD->getMemoryVT(); - unsigned Alignment = MLD->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); - - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MLD->isExpandingLoad()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, - MLD->getAAInfo(), MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); - - SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { LoadRes, Chain }; - return DAG.getMergeValues(RetOps, DL); - } return SDValue(); } @@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); } + // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C) + APInt Pow2C; + if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() && + isNullOrNullSplat(N2)) { + SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + // The general case for select-of-constants: // vselect Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so @@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); if (isAbs) { - EVT VT = LHS.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) return DAG.getNode(ISD::ABS, DL, VT, LHS); - SDValue Shift = DAG.getNode( - ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); + SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + DL, getShiftAmountTy(VT))); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), - N0.getOperand(1), TLI)) { - if (SDValue FMinMax = combineMinNumMaxNum( - DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) return FMinMax; } @@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { LoadSDNode *LN0 = cast(N0); if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || - !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || - !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + !N0.hasOneUse() || !LN0->isSimple() || + !DstVT.isVector() || !DstVT.isPow2VectorType() || + !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SmallVector SetCCs; @@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); - if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && + if ((LegalOperations || !LN0->isSimple() || + VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) return SDValue(); @@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || ((LegalOperations || VT.isVector() || - cast(N0)->isVolatile()) && + !cast(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; @@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, + const TargetLowering &TLI, EVT VT, + SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!N0.hasOneUse()) + return SDValue(); + + MaskedLoadSDNode *Ld = dyn_cast(N0); + if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + return SDValue(); + + if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SDLoc dl(Ld); + SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); + SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), + Ld->getBasePtr(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), + Ld->getMemOperand(), ExtLoadType, + Ld->isExpandingLoad()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); + return NewLoad; +} + static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations) { assert((N->getOpcode() == ISD::SIGN_EXTEND || @@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { ISD::SEXTLOAD, ISD::SIGN_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD, + ISD::SIGN_EXTEND)) + return foldedExt; + // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD, + ISD::ZERO_EXTEND)) + return foldedExt; + // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); LoadSDNode *LN0 = cast(N0); - if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) + // Reducing the width of a volatile load is illegal. For atomics, we may be + // able to reduce the width provided we never widen again. (see D66309) + if (!LN0->isSimple() || + !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); auto AdjustBigEndianShift = [&](unsigned ShAmt) { @@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else - Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, + Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); @@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. - SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, DL, VT); else @@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast(N0)->getMemoryVT() && - ((!LegalOperations && !cast(N0)->isVolatile() && + ((!LegalOperations && cast(N0)->isSimple() && N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); @@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && EVT == cast(N0)->getMemoryVT() && - ((!LegalOperations && !cast(N0)->isVolatile()) || + ((!LegalOperations && cast(N0)->isSimple()) && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); - if (!LN0->isVolatile() && + if (LN0->isSimple() && LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - ((!LegalOperations && !cast(N0)->isVolatile()) || + ((!LegalOperations && cast(N0)->isSimple()) || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast(N0); @@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { for (int i = 0; i != MaskScale; ++i) NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); - bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - if (!LegalMask) { - std::swap(SV0, SV1); - ShuffleVectorSDNode::commuteMask(NewMask); - LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; } return SDValue(); @@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) - if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) + if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N1, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.UnsafeFPMath || + if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { return N0; } @@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } } - if ((Options.UnsafeFPMath || - (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) - && N1.getOpcode() == ISD::FADD) { + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); @@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode( + ISD::FADD, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } +/// Return true if both inputs are at least as cheap in negated form and at +/// least one input is strictly cheaper in negated form. +bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { + if (char LHSNeg = + TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) + if (char RHSNeg = + TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) + // Both negated operands are at least as cheap as their counterparts. + // Check to see if at least one is cheaper negated. + if (LHSNeg == 2 || RHSNeg == 2) + return true; + + return false; +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); - // fold (fmul A, 1.0) -> A - if (N1CFP && N1CFP->isExactlyValue(1.0)) - return N0; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N0); - // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, DL, VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } + // -N0 * -N1 --> N0 * N1 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) @@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } + // (-N0 * -N1) + N2 --> (N0 * N1) + N2 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + } + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; @@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), @@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { - AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } - } + if (isCheaperToUseNegatedFPOps(N0, N1)) + return DAG.getNode( + ISD::FDIV, SDLoc(N), VT, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); return SDValue(); } @@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT EVT = cast(N->getOperand(1))->getVT(); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - - // fold (fp_round_inreg c1fp) -> c1fp - if (N0CFP && isTypeLegal(EVT)) { - SDLoc DL(N); - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); - return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); - } - - return SDValue(); -} - SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, ForCodeSize)) - return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. @@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { - if (OptLevel == CodeGenOpt::None || LD->isVolatile()) + if (OptLevel == CodeGenOpt::None || !LD->isSimple()) return SDValue(); SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast(Chain.getNode()); - if (!ST || ST->isVolatile()) + // TODO: Relax this restriction for unordered atomics (see D66309) + if (!ST || !ST->isSimple()) return SDValue(); EVT LDType = LD->getValueType(0); @@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). - if (!LD->isVolatile()) { + // TODO: Allow this for unordered atomics (see D66309) + if (LD->isSimple()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. if (!N->hasAnyUseOfValue(0)) { @@ -14241,7 +14138,7 @@ struct LoadedSlice { /// Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. - bool ForCodeSize; + bool ForCodeSize = false; /// Various cost. unsigned Loads = 0; @@ -14250,10 +14147,10 @@ struct LoadedSlice { unsigned ZExts = 0; unsigned Shift = 0; - Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} + explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {} /// Get the cost of one isolated slice. - Cost(const LoadedSlice &LS, bool ForCodeSize = false) + Cost(const LoadedSlice &LS, bool ForCodeSize) : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); @@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { return false; LoadSDNode *LD = cast(N); - if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + if (!LD->isSimple() || !ISD::isNormalLoad(LD) || !LD->getValueType(0).isInteger()) return false; @@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { else if (Chain->getOpcode() == ISD::TokenFactor && SDValue(LD, 1).hasOneUse()) { // LD has only 1 chain use so they are no indirect dependencies. - bool isOk = false; - for (const SDValue &ChainOp : Chain->op_values()) - if (ChainOp.getNode() == LD) { - isOk = true; - break; - } - if (!isOk) + if (!LD->isOperandOf(Chain.getNode())) return Result; } else return Result; // Fail. @@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. -static SDNode * +static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC) { @@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); - if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; + if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue(); // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type - // legalization. - MVT VT = MVT::getIntegerVT(NumBytes*8); + // legalization (and the target doesn't explicitly think this is a bad idea). + MVT VT = MVT::getIntegerVT(NumBytes * 8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DC->isTypeLegal(VT)) - return nullptr; + return SDValue(); + if (St->getMemOperand() && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *St->getMemOperand())) + return SDValue(); // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. @@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), NewAlign) - .getNode(); + St->getPointerInfo().getWithOffset(StOffset), NewAlign); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and @@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast(N); - if (ST->isVolatile()) + if (!ST->isSimple()) return SDValue(); SDValue Chain = ST->getChain(); @@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { std::pair MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) - return SDValue(NewST, 0); + return NewST; // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(0), ST,this)) - return SDValue(NewST, 0); + return NewST; } if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || @@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) return; - // The memory operands must not be volatile/indexed. - if (Ld->isVolatile() || Ld->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Ld->isSimple() || Ld->isIndexed()) return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { - // The memory operands must not be volatile/indexed. - if (Other->isVolatile() || Other->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Other->isSimple() || Other->isIndexed()) return false; // Don't mix temporal stores with non-temporal stores. if (St->isNonTemporal() != Other->isNonTemporal()) @@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!OtherLd->hasNUsesOfValue(1, 0)) return false; - // The memory operands must not be volatile/indexed. - if (OtherLd->isVolatile() || OtherLd->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!OtherLd->isSimple() || + OtherLd->isIndexed()) return false; // Don't mix temporal loads with non-temporal loads. if (cast(Val)->isNonTemporal() != OtherLd->isNonTemporal()) @@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates( return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; + // Check if the pair of StoreNode and the RootNode already bail out many + // times which is over the limit in dependence check. + auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, + SDNode *RootNode) -> bool { + auto RootCount = StoreRootCountMap.find(StoreNode); + if (RootCount != StoreRootCountMap.end() && + RootCount->second.first == RootNode && + RootCount->second.second > StoreMergeDependenceLimit) + return true; + return false; + }; + // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down // through all children. For instance we will find Store{1,2,3} if @@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast(*I2)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else @@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast(*I)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } @@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, - Max)) + Max)) { + // If the searching bail out, record the StoreNode and RootNode in the + // StoreRootCountMap. If we have seen the pair many times over a limit, + // we won't add the StoreNode into StoreNodes set again. + if (Visited.size() >= Max) { + auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode]; + if (RootCount.first == RootNode) + RootCount.second++; + else + RootCount = {RootNode, 1}; + } return false; + } return true; } bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) return false; EVT MemVT = St->getMemoryVT(); @@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { bool RV = false; while (StoreNodes.size() > 1) { - unsigned StartIdx = 0; + size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != StoreNodes[StartIdx + 1].OffsetFromBase) @@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { case MVT::ppcf128: return SDValue(); case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). @@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { return SDValue(); case MVT::f64: if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || + ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { Ptr, ST->getMemOperand()); } - if (!ST->isVolatile() && + if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the @@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - if (((!LegalOperations && !ST->isVolatile()) || + // TODO: May be able to relax for unordered atomics (see D66309) + if (((!LegalOperations && ST->isSimple()) || TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { @@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits); AddToWorklist(Value.getNode()); - if (Shorter) + if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); @@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If this is a load followed by a store to the same location, then the store // is dead/noop. + // TODO: Can relax for unordered atomics (see D66309) if (LoadSDNode *Ld = dyn_cast(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && - ST->isUnindexed() && !ST->isVolatile() && + ST->isUnindexed() && ST->isSimple() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { @@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // TODO: Can relax for unordered atomics (see D66309) if (StoreSDNode *ST1 = dyn_cast(Chain)) { - if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && - !ST1->isVolatile()) { + if (ST->isUnindexed() && ST->isSimple() && + ST1->isUnindexed() && ST1->isSimple()) { if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT()) { // If this is a store followed by a store with the same value to the @@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { break; case ISD::STORE: { StoreSDNode *ST = dyn_cast(Chain); - if (ST->isVolatile() || ST->isIndexed()) + // TODO: Can relax for unordered atomics (see D66309) + if (!ST->isSimple() || ST->isIndexed()) continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, @@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { if (OptLevel == CodeGenOpt::None) return SDValue(); + // Can't change the number of memory accesses for a volatile store or break + // atomicity for an atomic one. + if (!ST->isSimple()) + return SDValue(); + SDValue Val = ST->getValue(); SDLoc DL(ST); @@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { } /// Convert a disguised subvector insertion into a shuffle: -/// insert_vector_elt V, (bitcast X from vector type), IdxC --> -/// bitcast(shuffle (bitcast V), (extended X), Mask) -/// Note: We do not use an insert_subvector node because that requires a legal -/// subvector type. SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { SDValue InsertVal = N->getOperand(1); + SDValue Vec = N->getOperand(0); + + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) + // --> (vector_shuffle X, Y) + if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && + InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa(InsertVal.getOperand(1))) { + ShuffleVectorSDNode *SVN = cast(Vec.getNode()); + ArrayRef Mask = SVN->getMask(); + + SDValue X = Vec.getOperand(0); + SDValue Y = Vec.getOperand(1); + + // Vec's operand 0 is using indices from 0 to N-1 and + // operand 1 from N to 2N - 1, where N is the number of + // elements in the vectors. + int XOffset = -1; + if (InsertVal.getOperand(0) == X) { + XOffset = 0; + } else if (InsertVal.getOperand(0) == Y) { + XOffset = X.getValueType().getVectorNumElements(); + } + + if (XOffset != -1) { + SmallVector NewMask(Mask.begin(), Mask.end()); + + auto *ExtrIndex = cast(InsertVal.getOperand(1)); + NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + assert(NewMask[InsIndex] < + (int)(2 * Vec.getValueType().getVectorNumElements()) && + NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); + + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, + Y, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; + } + } + + // insert_vector_elt V, (bitcast X from vector type), IdxC --> + // bitcast(shuffle (bitcast V), (extended X), Mask) + // Note: We do not use an insert_subvector node because that requires a + // legal subvector type. if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || !InsertVal.getOperand(0).getValueType().isVector()) return SDValue(); @@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { - assert(!OriginalLoad->isVolatile()); + assert(OriginalLoad->isSimple()); EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); @@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorklist(EVE); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddToWorklist(Load.getNode()); AddUsersToWorklist(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorklist(EVE); + AddToWorklist(Load.getNode()); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(VecOp.getNode()) && !Index->hasPredecessor(VecOp.getNode())) { auto *VecLoad = dyn_cast(VecOp); - if (VecLoad && !VecLoad->isVolatile()) + if (VecLoad && VecLoad->isSimple()) return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); } @@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Make sure we found a non-volatile load and the extractelement is // the only use. - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. @@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { // the shuffle mask with -1. } - // Turn this into a shuffle with zero if that's legal. - EVT VecVT = Extract.getOperand(0).getValueType(); - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT)) - return SDValue(); - // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> // bitcast (shuffle V, ZeroVec, VectorMask) SDLoc DL(BV); + EVT VecVT = Extract.getOperand(0).getValueType(); SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); - SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, - ShufMask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0), + ZeroVec, ShufMask, DAG); + if (!Shuf) + return SDValue(); return DAG.getBitcast(VT, Shuf); } @@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } + // A splat of a single element is a SPLAT_VECTOR if supported on the target. + if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) + if (SDValue V = cast(N)->getSplatValue()) { + assert(!V.isUndef() && "Splat of undef should have been handled earlier"); + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { } } - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), - DAG.getBitcast(VT, SV1), Mask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask, DAG); } SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { @@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); + // If the input is a concat_vectors, just make a larger concat by padding + // with smaller undefs. + if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) { + unsigned NumOps = N->getNumOperands() * In.getNumOperands(); + SmallVector Ops(In->op_begin(), In->op_end()); + Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType())); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); + } + SDValue Scalar = peekThroughOneUseBitcasts(In); // concat_vectors(scalar_to_vector(scalar), undef) -> @@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find +// if the subvector can be sourced for free. +static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) { + return V.getOperand(1); + } + auto *IndexC = dyn_cast(Index); + if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && + V.getOperand(0).getValueType() == SubVT && + (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements(); + return V.getOperand(SubIdx); + } + return SDValue(); +} + static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) return SDValue(); + EVT VecVT = BinOp.getValueType(); SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); - SDValue Index = Extract->getOperand(1); - EVT VT = Extract->getValueType(0); + if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType()) + return SDValue(); - // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find - // if the source subvector is the same type as the one being extracted. - auto GetSubVector = [VT, Index](SDValue V) -> SDValue { - if (V.getOpcode() == ISD::INSERT_SUBVECTOR && - V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) { - return V.getOperand(1); - } - auto *IndexC = dyn_cast(Index); - if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && - V.getOperand(0).getValueType() == VT && - (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) { - uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements(); - return V.getOperand(SubIdx); - } + SDValue Index = Extract->getOperand(1); + EVT SubVT = Extract->getValueType(0); + if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT)) return SDValue(); - }; - SDValue Sub0 = GetSubVector(Bop0); - SDValue Sub1 = GetSubVector(Bop1); + + SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT); + SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT); // TODO: We could handle the case where only 1 operand is being inserted by // creating an extract of the other operand, but that requires checking // number of uses and/or costs. - if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT)) + if (!Sub0 || !Sub1) return SDValue(); // We are inserting both operands of the wide binop only to extract back // to the narrow vector size. Eliminate all of the insert/extract: // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y - return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1, + return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1, BinOp->getFlags()); } @@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { auto *Ld = dyn_cast(Extract->getOperand(0)); auto *ExtIdx = dyn_cast(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || + !ExtIdx) return SDValue(); // Allow targets to opt-out. @@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); - if (TLI.isExtractVecEltCheap(VT, SplatIndex) && + if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) @@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SV1 = DAG.getUNDEF(VT); // Avoid introducing shuffles with illegal mask. - if (!TLI.isShuffleMaskLegal(Mask, VT)) { - ShuffleVectorSDNode::commuteMask(Mask); - - if (!TLI.isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) - std::swap(SV0, SV1); - } - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); } if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) @@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SmallVector NewMask(InVecT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; - SDValue Val; // If we have an implict truncate do truncate here as long as it's legal. // if it's not legal, this should if (VT.getScalarType() != InVal.getValueType() && InVal.getValueType().isScalarInteger() && isTypeLegal(VT.getScalarType())) { - Val = + SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); } if (VT.getScalarType() == InVecT.getScalarType() && - VT.getVectorNumElements() <= InVecT.getVectorNumElements() && - TLI.isShuffleMaskLegal(NewMask, VT)) { - Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, - DAG.getUNDEF(InVecT), NewMask); - // If the initial vector is the correct size this shuffle is a - // valid result. - if (VT == InVecT) - return Val; - // If not we must truncate the vector. - if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); - EVT SubVT = - EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), - VT.getVectorNumElements()); - Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, - ZeroIdx); - return Val; + VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, + DAG.getUNDEF(InVecT), NewMask, DAG); + if (LegalShuffle) { + // If the initial vector is the correct size this shuffle is a + // valid result. + if (VT == InVecT) + return LegalShuffle; + // If not we must truncate the vector. + if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); + EVT SubVT = + EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), + VT.getVectorNumElements()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, + LegalShuffle, ZeroIdx); + } } } } @@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } + // Make sure all but the first op are undef or constant. + auto ConcatWithConstantOrUndef = [](SDValue Concat) { + return Concat.getOpcode() == ISD::CONCAT_VECTORS && + std::all_of(std::next(Concat->op_begin()), Concat->op_end(), + [](const SDValue &Op) { + return Op.isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); + }); + }; + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of the concat may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (concat X, undef/constant), (concat Y, undef/constant) --> + // concat (VBinOp X, Y), VecC + if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + EVT NarrowVT = LHS.getOperand(0).getValueType(); + if (NarrowVT == RHS.getOperand(0).getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + SDLoc DL(N); + unsigned NumOperands = LHS.getNumOperands(); + SmallVector ConcatOps; + for (unsigned i = 0; i != NumOperands; ++i) { + // This constant fold for operands 1 and up. + ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i), + RHS.getOperand(i))); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } + } + if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) return V; @@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // Token chains must be identical. if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. - LLD->isVolatile() || RLD->isVolatile() || + // Be conservative for atomics for the moment + // TODO: This does appear to be legal for unordered atomics (see D66309) + !LLD->isSimple() || !RLD->isSimple() || // FIXME: If either is a pre/post inc/dec load, // we'd need to split out the address adjustment. LLD->isIndexed() || RLD->isIndexed() || @@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { - if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) + if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType())) return SDValue(); // If we are before legalize types, we want the other legalization to happen @@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. + // TODO: The operation legality checks could be loosened to include "custom", + // but that may cause regressions for targets that do not have shift + // instructions. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) && + TLI.isOperationLegal(ISD::SHL, VT) && + TLI.isOperationLegal(ISD::SRA, VT)) { SDValue AndLHS = N0->getOperand(0); auto *ConstAndRHS = dyn_cast(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { @@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { +/// For the last iteration, put numerator N into it to gain more precision: +/// Result = N X_i + X_i (N - N A X_i) +SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, + SDNodeFlags Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { AddToWorklist(Est.getNode()); + SDLoc DL(Op); if (Iterations) { - SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // Newton iterations: Est = Est + Est (1 - Arg * Est) + // Newton iterations: Est = Est + Est (N - Arg * Est) + // If this is the last iteration, also multiply by the numerator. for (int i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); + SDValue MulEst = Est; + + if (i == Iterations - 1) { + MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags); + AddToWorklist(MulEst.getNode()); + } + + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, + (i == Iterations - 1 ? N : FPOne), NewEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); + Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags); AddToWorklist(Est.getNode()); } + } else { + // If no iterations are available, multiply with N. + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags); + AddToWorklist(Est.getNode()); } + return Est; } @@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); - AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); - AddToWorklist(Est.getNode()); } // If non-reciprocal square root is requested, multiply the result by Arg. - if (!Reciprocal) { + if (!Reciprocal) Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - } return Est; } @@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - AddToWorklist(AE.getNode()); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - AddToWorklist(AEE.getNode()); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); - AddToWorklist(RHS.getNode()); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // SQRT: LHS = (A * E) * -0.5 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); } - AddToWorklist(LHS.getNode()); Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); - AddToWorklist(Est.getNode()); } return Est; @@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); - AddToWorklist(Fabs.getNode()); - AddToWorklist(IsDenorm.getNode()); - AddToWorklist(Est.getNode()); } else { // X == 0.0 ? 0.0 : Est SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); - AddToWorklist(IsZero.getNode()); - AddToWorklist(Est.getNode()); } } } @@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; + bool IsAtomic; SDValue BasePtr; int64_t Offset; Optional NumBytes; @@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; - return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/, + return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + Offset /*base offset*/, Optional(LSN->getMemoryVT().getStoreSize()), LSN->getMemOperand()}; } if (const auto *LN = cast(N)) - return {false /*isVolatile*/, LN->getOperand(1), + return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, (LN->hasOffset()) ? Optional(LN->getSize()) : Optional(), (MachineMemOperand *)nullptr}; // Default. - return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/, + return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, Optional() /*size*/, (MachineMemOperand *)nullptr}; }; @@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { if (MUC0.IsVolatile && MUC1.IsVolatile) return true; + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + if (MUC0.MMO && MUC1.MMO) { if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) @@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet Visited; // Visited node set. // Get alias information for node. - const bool IsLoad = isa(N) && !cast(N)->isVolatile(); + // TODO: relax aliasing for unordered atomics (see D66309) + const bool IsLoad = isa(N) && cast(N)->isSimple(); // Starting off. Chains.push_back(OriginalChain); @@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LOAD: case ISD::STORE: { // Get alias information for C. + // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa(C.getNode()) && - !cast(C.getNode())->isVolatile(); + cast(C.getNode())->isSimple(); if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); @@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; - if (Chain->isVolatile() || Chain->isIndexed()) + // TODO: Relax for unordered atomics (see D66309) + if (!Chain->isSimple() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. @@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps); CombineTo(St, TF); - AddToWorklist(STChain); - // Add TF operands worklist in reverse order. - for (auto I = TF->getNumOperands(); I;) - AddToWorklist(TF->getOperand(--I).getNode()); + // Add TF and its operands to the worklist. AddToWorklist(TF.getNode()); + for (const SDValue &Op : TF->ops()) + AddToWorklist(Op.getNode()); + AddToWorklist(STChain); return true; } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 22c23ba877e8..6d7260d7aee5 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -174,7 +174,7 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) { if (RegDef) return 0; RegDef = MO.getReg(); - } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + } else if (Register::isVirtualRegister(MO.getReg())) { // This is another use of a vreg. Don't try to sink it. return 0; } @@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { if (!FrameAlign) FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); Flags.setByValSize(FrameSize); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); @@ -1237,8 +1236,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); // Set labels for heapallocsite call. - if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) { - MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); + if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) { + const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); MF->addCodeViewHeapAllocSite(CLI.Call, MD); } @@ -1303,6 +1302,7 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) @@ -1388,9 +1388,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); // A dbg.declare describes the address of a source variable, so lower it // into an indirect DBG_VALUE. + auto *Expr = DI->getExpression(); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - *Op, DI->getVariable(), DI->getExpression()); + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false, + *Op, DI->getVariable(), Expr); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1414,19 +1416,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (const auto *CF = dyn_cast(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { @@ -1453,24 +1455,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); return true; } - case Intrinsic::objectsize: { - ConstantInt *CI = cast(II->getArgOperand(1)); - unsigned long long Res = CI->isZero() ? -1ULL : 0; - Constant *ResCI = ConstantInt::get(II->getType(), Res); - unsigned ResultReg = getRegForValue(ResCI); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } - case Intrinsic::is_constant: { - Constant *ResCI = ConstantInt::get(II->getType(), 0); - unsigned ResultReg = getRegForValue(ResCI); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::expect: { @@ -1677,11 +1667,11 @@ bool FastISel::selectInstruction(const Instruction *I) { /// (fall-through) successor, and update the CFG. void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, const DebugLoc &DbgLoc) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // For more accurate line information if this is the only instruction - // in the block then emit it, otherwise we have the unconditional - // fall-through case, which needs no instructions. + // For more accurate line information if this is the only non-debug + // instruction in the block then emit it, otherwise we have the + // unconditional fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr, @@ -2028,7 +2018,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, unsigned OpNum) { - if (TargetRegisterInfo::isVirtualRegister(Op)) { + if (Register::isVirtualRegister(Op)) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); if (!MRI.constrainRegClass(Op, RegClass)) { @@ -2236,7 +2226,7 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - assert(TargetRegisterInfo::isVirtualRegister(Op0) && + assert(Register::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); @@ -2417,10 +2407,9 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { } else return nullptr; - bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; - bool IsDereferenceable = - I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr; + bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal); + bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load); + bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable); const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); AAMDNodes AAInfo; diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8b1759246b76..cf6711adad48 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -424,7 +425,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { unsigned BitWidth = IntVT.getSizeInBits(); unsigned DestReg = ValueMap[PN]; - if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + if (!Register::isVirtualRegister(DestReg)) return; LiveOutRegInfo.grow(DestReg); LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; @@ -445,7 +446,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); unsigned SrcReg = ValueMap[V]; - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; } @@ -480,7 +481,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when " "its CopyToReg node was created."); unsigned SrcReg = ValueMap[V]; - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 9bc07d35dfc5..c5095995ec2e 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -71,7 +71,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, if (isa(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast(Node->getOperand(I - 1))) - if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + if (Register::isPhysicalRegister(RN->getReg())) continue; NumImpUses = N - I; break; @@ -86,7 +86,7 @@ void InstrEmitter:: EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, unsigned SrcReg, DenseMap &VRBaseMap) { unsigned VRBase = 0; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(SrcReg)) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) @@ -114,7 +114,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned DestReg = cast(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); + TRI->getCommonSubClass(UseRC, RC); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) @@ -219,7 +219,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. VRBase = cast(Node->getOperand(i-NumResults))->getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + assert(Register::isPhysicalRegister(VRBase)); MIB.addReg(VRBase, RegState::Define); } @@ -229,7 +229,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; @@ -272,7 +272,7 @@ unsigned InstrEmitter::getVR(SDValue Op, // does not include operand register class info. const TargetRegisterClass *RC = TLI->getRegClassFor( Op.getSimpleValueType(), Op.getNode()->isDivergent()); - unsigned VReg = MRI->createVirtualRegister(RC); + Register VReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; @@ -319,7 +319,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, if (!ConstrainedRC) { OpRC = TRI->getAllocatableClass(OpRC); assert(OpRC && "Constraints cannot be fulfilled for allocation"); - unsigned NewVReg = MRI->createVirtualRegister(OpRC); + Register NewVReg = MRI->createVirtualRegister(OpRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; @@ -385,9 +385,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, (IIRC && TRI->isDivergentRegClass(IIRC))) : nullptr; - if (OpRC && IIRC && OpRC != IIRC && - TargetRegisterInfo::isVirtualRegister(VReg)) { - unsigned NewVReg = MRI->createVirtualRegister(IIRC); + if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) { + Register NewVReg = MRI->createVirtualRegister(IIRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; @@ -465,7 +464,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, // register instead. RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) .addReg(VReg); return NewReg; @@ -485,7 +484,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } @@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned Reg; MachineInstr *DefMI; RegisterSDNode *R = dyn_cast(Node->getOperand(0)); - if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + if (R && Register::isPhysicalRegister(R->getReg())) { Reg = R->getReg(); DefMI = nullptr; } else { @@ -529,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), Node->isDivergent(), Node->getDebugLoc()); @@ -541,7 +540,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, MachineInstrBuilder CopyMI = BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) CopyMI.addReg(Reg, 0, SubIdx); else CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); @@ -614,7 +613,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, unsigned DstRCIdx = cast(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); - unsigned NewVReg = MRI->createVirtualRegister(DstRC); + Register NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -631,7 +630,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); - unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); + Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); unsigned NumOps = Node->getNumOperands(); @@ -649,7 +648,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, RegisterSDNode *R = dyn_cast(Node->getOperand(i-1)); // Skip physical registers as they don't have a vreg to get and we'll // insert copies for them in TwoAddressInstructionPass anyway. - if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + if (!R || !Register::isPhysicalRegister(R->getReg())) { unsigned SubIdx = cast(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -678,7 +677,7 @@ MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + const DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); assert(cast(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -702,12 +701,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // EmitTargetCodeForFrameDebugValue is responsible for allocation. auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()); + if (SD->isIndirect()) - // Push [fi + 0] onto the DIExpression stack. - FrameMI.addImm(0); - else - // Push fi onto the DIExpression stack. - FrameMI.addReg(0); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + FrameMI.addReg(0); return FrameMI.addMetadata(Var).addMetadata(Expr); } // Otherwise, we're going to create an instruction here. @@ -753,9 +751,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) - MIB.addImm(0U); - else - MIB.addReg(0U, RegState::Debug); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + MIB.addReg(0U, RegState::Debug); MIB.addMetadata(Var); MIB.addMetadata(Expr); @@ -928,12 +926,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. - SmallVector UsedRegs; + SmallVector UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - NumDefs]; + Register Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. @@ -960,8 +958,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { - unsigned Reg = R->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = R->getReg(); + if (Reg.isPhysical()) UsedRegs.push_back(Reg); } } @@ -995,8 +993,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::CopyToReg: { unsigned DestReg = cast(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && - SrcVal.isMachineOpcode() && + if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Instead building a COPY to that vreg destination, build an // IMPLICIT_DEF instruction instead. @@ -1093,16 +1090,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. - MIB.addReg(Reg, RegState::Define | - getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, + RegState::Define | + getImplRegState(Register::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | - getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, + RegState::Define | RegState::EarlyClobber | + getImplRegState(Register::isPhysicalRegister(Reg))); ECRegs.push_back(Reg); } break; @@ -1136,7 +1135,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // then remove the early-clobber flag. for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { - MachineOperand *MO = + MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, false, TRI); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bf817f00f83d..f9fdf525240f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -161,6 +162,7 @@ private: SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue ChainIn); SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSPLAT_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); @@ -236,6 +238,16 @@ public: } ReplacedNode(Old); } + + void ReplaceNodeWithValue(SDValue Old, SDValue New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + DAG.ReplaceAllUsesOfValueWith(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); + ReplacedNode(Old.getNode()); + } }; } // end anonymous namespace @@ -493,8 +505,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. EVT MemVT = ST->getMemoryVT(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *ST->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); @@ -608,8 +620,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { EVT MemVT = ST->getMemoryVT(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *ST->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } @@ -669,8 +681,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *LD->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); } break; @@ -894,11 +906,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { if (SrcVT.getScalarType() == MVT::f16) { EVT ISrcVT = SrcVT.changeTypeToInteger(); EVT IDestVT = DestVT.changeTypeToInteger(); - EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); - SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, - Chain, Ptr, ISrcVT, - LD->getMemOperand()); + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain, + Ptr, ISrcVT, LD->getMemOperand()); Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); Chain = Result.getValue(1); break; @@ -959,15 +970,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal || - TLI.isTypeLegal(Node->getValueType(i))) && + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || - TLI.isTypeLegal(Op.getValueType()) || Op.getOpcode() == ISD::TargetConstant || Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); @@ -1004,7 +1013,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); @@ -1097,38 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These pseudo-ops are the same as the other STRICT_ ops except + // they are registered with setOperationAction() using the input type + // instead of the output type. Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Node->getOperand(1).getValueType()); break; case ISD::SADDSAT: case ISD::UADDSAT: @@ -1139,7 +1124,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: { + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1650,7 +1636,6 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast(CC)->get(); NeedInvert = false; - bool NeedSwap = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: @@ -1664,6 +1649,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, return true; } // Swapping operands didn't work. Try inverting the condition. + bool NeedSwap = false; InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { // If inverting the condition is not enough, try swapping operands @@ -2021,6 +2007,14 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } +SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue SplatVal = Node->getOperand(0); + + return DAG.getSplatBuildVector(VT, DL, SplatVal); +} + // Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result @@ -2074,12 +2068,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) { - LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); + LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG)); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); } - LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); + LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG)); return CallInfo.first; } @@ -2167,6 +2161,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + RTLIB::Libcall LC; switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2815,6 +2812,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::STRICT_FP_ROUND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2829,6 +2832,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::STRICT_FP_EXTEND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getOperand(1).getValueType(), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2873,19 +2882,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::FP_ROUND_INREG: { - // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targeting a temporary location (a stack slot). - - // NOTE: there is a choice here between constantly creating new stack - // slots and always reusing the same one. We currently always create - // new ones, as reuse may inhibit scheduling. - EVT ExtraVT = cast(Node->getOperand(1))->getVT(); - Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, - Node->getValueType(0), dl); - Results.push_back(Tmp1); - break; - } case ISD::UINT_TO_FP: if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { Results.push_back(Tmp1); @@ -2901,33 +2897,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; + case ISD::STRICT_FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) { + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n"); + return true; + } + break; case ISD::FP_TO_UINT: - if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG)) + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) Results.push_back(Tmp1); break; - case ISD::LROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, - RTLIB::LROUND_F64, RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128)); - break; - case ISD::LLROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128)); - break; - case ISD::LRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, - RTLIB::LRINT_F64, RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128)); - break; - case ISD::LLRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128)); + case ISD::STRICT_FP_TO_UINT: + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) { + // Relink the chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2); + // Replace the new UINT result. + ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n"); + return true; + } break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); @@ -3348,6 +3337,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; case ISD::ADDCARRY: @@ -3662,6 +3652,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::BUILD_VECTOR: Results.push_back(ExpandBUILD_VECTOR(Node)); break; + case ISD::SPLAT_VECTOR: + Results.push_back(ExpandSPLAT_VECTOR(Node)); + break; case ISD::SRA: case ISD::SRL: case ISD::SHL: { @@ -3715,6 +3708,33 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } + if (Results.empty() && Node->isStrictFPOpcode()) { + // FIXME: We were asked to expand a strict floating-point operation, + // but there is currently no expansion implemented that would preserve + // the "strict" properties. For now, we just fall back to the non-strict + // version if that is legal on the target. The actual mutation of the + // operation will happen in SelectionDAGISel::DoInstructionSelection. + switch (Node->getOpcode()) { + default: + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + return true; + break; + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These are registered by the operand type instead of the value + // type. Reflect that here. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()) + == TargetLowering::Legal) + return true; + break; + } + } + // Replace the original node with the legalized result. if (Results.empty()) { LLVM_DEBUG(dbgs() << "Cannot expand node\n"); @@ -3956,6 +3976,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); break; + case ISD::LROUND: + case ISD::STRICT_LROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); + break; + case ISD::LLROUND: + case ISD::STRICT_LLROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); + break; + case ISD::LRINT: + case ISD::STRICT_LRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); + break; + case ISD::LLRINT: + case ISD::STRICT_LLRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); + break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b4849b2881e6..72d052473f11 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -42,10 +42,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Convert Float Results to Integer for Non-HW-supported Operations. +// Convert Float Results to Integer //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -58,26 +58,18 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); - case ISD::Register: - case ISD::CopyFromReg: - case ISD::CopyToReg: - assert(isLegalInHWReg(N->getValueType(ResNo)) && - "Unsupported SoftenFloatRes opcode!"); - // Only when isLegalInHWReg, we can skip check of the operands. - R = SDValue(N, ResNo); - break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -89,7 +81,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -102,30 +94,24 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; } - if (R.getNode() && R.getNode() != N) { + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) { + assert(R.getNode() != N); SetSoftenedFloat(SDValue(N, ResNo), R); - // Return true only if the node is changed, assuming that the operands - // are also converted when necessary. - return true; } - - // Otherwise, return false to tell caller to scan operands. - return false; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -144,10 +130,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, we can load better from the constant pool. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) { ConstantFPSDNode *CN = cast(N); // In ppcf128, the high 64 bits are always first in memory regardless // of Endianness. LLVM's APFloat representation is not Endian sensitive, @@ -172,19 +155,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, keep the extracted value in register. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FABS can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -200,57 +177,69 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMAX_F32, RTLIB::FMAX_F64, RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -301,98 +290,123 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -400,48 +414,57 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)), GetSoftenedFloat(N->getOperand(2)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[3] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType(), + N->getOperand(2).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FNEG can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - EVT FloatVT = N->getValueType(ResNo); + EVT FloatVT = N->getValueType(0); if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { // Expand Y = FNEG(X) -> Y = X ^ sign mask APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); @@ -452,13 +475,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, dl).first; + NVT, Ops, CallOptions, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -485,7 +509,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -493,15 +520,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, - false, SDLoc(N)).first; + CallOptions, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) return Res32; EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -515,20 +545,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -536,87 +573,111 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { "Unsupported power type!"); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ROUND_F32, RTLIB::ROUND_F64, RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -625,17 +686,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { - bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -666,23 +729,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); - if (LegalInHWReg) - return ExtendNode; return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -736,14 +793,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(Signed); + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - Op, Signed, dl).first; + Op, CallOptions, dl).first; } //===----------------------------------------------------------------------===// -// Convert Float Operand to Integer for Non-HW-supported Operations. +// Convert Float Operand to Integer //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -753,8 +814,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: - if (CanSkipSoftenFloatOperand(N, OpNo)) - return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -762,11 +821,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; - case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; - case ISD::FABS: Res = SoftenFloatOp_FABS(N); break; - case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; - case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; @@ -776,19 +831,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; - case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: - Res = SoftenFloatOp_STORE(N, OpNo); - // Do not try to analyze or soften this node again if the value is - // or can be held in a register. In that case, Res.getNode() should - // be equal to N. - if (Res.getNode() == N && - isLegalInHWReg(N->getOperand(OpNo).getValueType())) - return false; - // Otherwise, we need to reanalyze and lower the new Res nodes. - break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; } // If the result is null, the sub-method took care of registering results etc. @@ -800,60 +845,16 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); + "Invalid operand promotion"); ReplaceValueWith(SDValue(N, 0), Res); return false; } -bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { - if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) - return false; - - // When the operand type can be kept in registers there is nothing to do for - // the following opcodes. - switch (N->getOperand(OpNo).getOpcode()) { - case ISD::BITCAST: - case ISD::ConstantFP: - case ISD::CopyFromReg: - case ISD::CopyToReg: - case ISD::FABS: - case ISD::FCOPYSIGN: - case ISD::FNEG: - case ISD::Register: - case ISD::SELECT: - case ISD::SELECT_CC: - return true; - } - - switch (N->getOpcode()) { - case ISD::ConstantFP: // Leaf node. - case ISD::CopyFromReg: // Operand is a register that we know to be left - // unchanged by SoftenFloatResult(). - case ISD::Register: // Leaf node. - return true; - } - return false; -} - SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), - GetSoftenedFloat(N->getOperand(0))); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); - - if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) - return SDValue(); - - if (N->getNumOperands() == 3) - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, - N->getOperand(3)), - 0); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { @@ -868,7 +869,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } @@ -885,7 +889,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -895,7 +902,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(2), N->getOperand(3)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -911,34 +919,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) { - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (Op == N->getOperand(0)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op), 0); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { - SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - - if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) { - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (Op == N->getOperand(0)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op), 0); -} - SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool Signed = N->getOpcode() == ISD::FP_TO_SINT; EVT SVT = N->getOperand(0).getValueType(); @@ -962,23 +942,15 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; // Truncate the result if the libcall returns a larger type. return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } -SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); - - if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), - 0); -} - SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); @@ -986,7 +958,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1009,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, use it. if (!NewRHS.getNode()) { @@ -1047,13 +1021,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, RTLIB::LROUND_F128, RTLIB::LROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { @@ -1061,13 +1038,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, RTLIB::LLROUND_F128, RTLIB::LLROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { @@ -1075,13 +1055,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, RTLIB::LRINT_F80, RTLIB::LRINT_F128, RTLIB::LRINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { @@ -1089,13 +1072,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, RTLIB::LLRINT_F128, RTLIB::LLRINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } //===----------------------------------------------------------------------===// @@ -1267,13 +1253,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1341,13 +1328,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1355,13 +1343,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1470,13 +1459,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1555,7 +1545,9 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1732,7 +1724,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1741,8 +1734,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), - false, dl).first; + CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { @@ -1807,49 +1801,53 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, RTLIB::LROUND_F128, RTLIB::LROUND_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, RTLIB::LLROUND_F128, RTLIB::LLROUND_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, RTLIB::LRINT_F80, RTLIB::LRINT_F128, RTLIB::LRINT_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, RTLIB::LLRINT_F128, RTLIB::LLRINT_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } //===----------------------------------------------------------------------===// @@ -2002,6 +2000,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { dbgs() << "\n"); SDValue R = SDValue(); + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + switch (N->getOpcode()) { // These opcodes cannot appear if promotion of FP16 is done in the backend // instead of Clang diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 15ac45c37c66..d5c1b539adbd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_BUILD_VECTOR(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntRes_SPLAT_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; @@ -112,6 +114,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; @@ -148,9 +152,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: @@ -494,7 +501,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; - SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + if (N->getOpcode() == ISD::STRICT_FP_TO_UINT && + !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the @@ -503,7 +523,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe - return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || + N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } @@ -590,7 +611,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand(), N->getIndexType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -623,48 +644,84 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { - // For promoting iN -> iM, this can be expanded by - // 1. ANY_EXTEND iN to iM - // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT - // 4. L/ASHR by M-N + // If the promoted type is legal, we can convert this to: + // 1. ANY_EXTEND iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + // Else it is more efficient to convert this to a min and a max + // operation in the higher precision arithmetic. SDLoc dl(N); SDValue Op1 = N->getOperand(0); SDValue Op2 = N->getOperand(1); unsigned OldBits = Op1.getScalarValueSizeInBits(); unsigned Opcode = N->getOpcode(); - unsigned ShiftOp; - switch (Opcode) { - case ISD::SADDSAT: - case ISD::SSUBSAT: - ShiftOp = ISD::SRA; - break; - case ISD::UADDSAT: - case ISD::USUBSAT: - ShiftOp = ISD::SRL; - break; - default: - llvm_unreachable("Expected opcode to be signed or unsigned saturation " - "addition or subtraction"); - } - - SDValue Op1Promoted = GetPromotedInteger(Op1); - SDValue Op2Promoted = GetPromotedInteger(Op2); + SDValue Op1Promoted, Op2Promoted; + if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { + Op1Promoted = ZExtPromotedInteger(Op1); + Op2Promoted = ZExtPromotedInteger(Op2); + } else { + Op1Promoted = SExtPromotedInteger(Op1); + Op2Promoted = SExtPromotedInteger(Op2); + } EVT PromotedType = Op1Promoted.getValueType(); unsigned NewBits = PromotedType.getScalarSizeInBits(); - unsigned SHLAmount = NewBits - OldBits; - EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); - Op1Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); - Op2Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); - SDValue Result = - DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + unsigned ShiftOp; + switch (Opcode) { + case ISD::SADDSAT: + case ISD::SSUBSAT: + ShiftOp = ISD::SRA; + break; + case ISD::UADDSAT: + case ISD::USUBSAT: + ShiftOp = ISD::SRL; + break; + default: + llvm_unreachable("Expected opcode to be signed or unsigned saturation " + "addition or subtraction"); + } + + unsigned SHLAmount = NewBits - OldBits; + EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + + SDValue Result = + DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + } else { + if (Opcode == ISD::USUBSAT) { + SDValue Max = + DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted); + } + + if (Opcode == ISD::UADDSAT) { + APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Add = + DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); + } + + unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; + APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits); + APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits); + SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Result = + DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); + Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); + Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); + return Result; + } } SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { @@ -673,6 +730,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { SDValue Op1Promoted, Op2Promoted; bool Signed = N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = + N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT; if (Signed) { Op1Promoted = SExtPromotedInteger(N->getOperand(0)); Op2Promoted = SExtPromotedInteger(N->getOperand(1)); @@ -685,7 +744,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { unsigned DiffSize = PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits(); - bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; if (Saturating) { // Promoting the operand and result values changes the saturation width, // which is extends the values that we clamp to on saturation. This could be @@ -1110,6 +1168,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntOp_SPLAT_VECTOR(N); break; case ISD::VSELECT: case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; @@ -1148,7 +1208,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break; case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; @@ -1339,6 +1400,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { GetPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) { + // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the + // operand in place. + return SDValue( + DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Only know how to promote the condition!"); SDValue Cond = N->getOperand(0); @@ -1454,8 +1522,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, EVT DataVT = N->getValueType(0); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else if (OpNo == 4) { - // Need to sign extend the index since the bits will likely be used. - NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); @@ -1470,8 +1542,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, EVT DataVT = N->getValue().getValueType(); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else if (OpNo == 4) { - // Need to sign extend the index since the bits will likely be used. - NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); @@ -1715,7 +1791,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -2473,7 +2550,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2488,7 +2567,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2514,7 +2594,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT RetVT = N->getValueType(0); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2540,7 +2622,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT RetVT = N->getValueType(0); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2743,7 +2827,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, } SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } @@ -2777,38 +2863,53 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); uint64_t Scale = N->getConstantOperandVal(2); - bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; - EVT BoolVT = getSetCCResultType(VT); - SDValue Zero = DAG.getConstant(0, dl, VT); + bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT || + N->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (N->getOpcode() == ISD::SMULFIX || + N->getOpcode() == ISD::SMULFIXSAT); + + // Handle special case when scale is equal to zero. if (!Scale) { SDValue Result; if (!Saturating) { Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); } else { - Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + EVT BoolVT = getSetCCResultType(VT); + unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO; + Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue Product = Result.getValue(0); SDValue Overflow = Result.getValue(1); - - APInt MinVal = APInt::getSignedMinValue(VTSize); - APInt MaxVal = APInt::getSignedMaxValue(VTSize); - SDValue SatMin = DAG.getConstant(MinVal, dl, VT); - SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); - Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + if (Signed) { + APInt MinVal = APInt::getSignedMinValue(VTSize); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + } else { + // For unsigned multiplication, we only need to check the max since we + // can't really overflow towards zero. + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product); + } } SplitInteger(Result, Lo, Hi); return; } + // For SMULFIX[SAT] we only expect to find Scale Result; - bool Signed = (N->getOpcode() == ISD::SMULFIX || - N->getOpcode() == ISD::SMULFIXSAT); unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG, TargetLowering::MulExpansionKind::OnlyLegalOrCustom, @@ -2822,19 +2923,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, "the size of the current value type"); EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - // Shift whole amount by scale. - SDValue ResultLL = Result[0]; - SDValue ResultLH = Result[1]; - SDValue ResultHL = Result[2]; - SDValue ResultHH = Result[3]; - - SDValue SatMax, SatMin; - SDValue NVTZero = DAG.getConstant(0, dl, NVT); - SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); - EVT BoolNVT = getSetCCResultType(NVT); - - // After getting the multplication result in 4 parts, we need to perform a + // After getting the multiplication result in 4 parts, we need to perform a // shift right by the amount of the scale to get the result in that scale. + // // Let's say we multiply 2 64 bit numbers. The resulting value can be held in // 128 bits that are cut into 4 32-bit parts: // @@ -2846,123 +2937,135 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, // // |NVTSize-| // - // The resulting Lo and Hi will only need to be one of these 32-bit parts - // after shifting. + // The resulting Lo and Hi would normally be in LL and LH after the shift. But + // to avoid unneccessary shifting of all 4 parts, we can adjust the shift + // amount and get Lo and Hi using two funnel shifts. Or for the special case + // when Scale is a multiple of NVTSize we can just pick the result without + // shifting. + uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed. + if (Scale % NVTSize) { + SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy); + Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0], + ShiftAmount); + Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1], + ShiftAmount); + } else { + Lo = Result[Part0]; + Hi = Result[Part0 + 1]; + } + + // Unless saturation is requested we are done. The result is in . + if (!Saturating) + return; + + // Can not overflow when there is no integer part. + if (Scale == VTSize) + return; + + // To handle saturation we must check for overflow in the multiplication. + // + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result) + // aren't all zeroes. + // + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result) + // aren't all ones or all zeroes. + // + // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the + // highest bit of HH determines saturation direction in the event of signed + // saturation. + + SDValue ResultHL = Result[2]; + SDValue ResultHH = Result[3]; + + SDValue SatMax, SatMin; + SDValue NVTZero = DAG.getConstant(0, dl, NVT); + SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); + EVT BoolNVT = getSetCCResultType(NVT); + + if (!Signed) { + if (Scale < NVTSize) { + // Overflow happened if ((HH | (HL >> Scale)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale, dl, ShiftTy)); + SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH); + SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE); + } else if (Scale == NVTSize) { + // Overflow happened if (HH != 0). + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE); + } else if (Scale < VTSize) { + // Overflow happened if ((HH >> (Scale - NVTSize)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale - NVTSize, dl, + ShiftTy)); + SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE); + } else + llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT" + "(and saturation can't happen with Scale==VTSize)."); + + Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo); + return; + } + if (Scale < NVTSize) { - // If the scale is less than the size of the VT we expand to, the Hi and - // Lo of the result will be in the first 2 parts of the result after - // shifting right. This only requires shifting by the scale as far as the - // third part in the result (ResultHL). - SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy); - SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy); - Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt); - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt)); - Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); - - // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the - // highest bit of HH determines saturation direction in the event of - // saturation. // The number of overflow bits we can check are VTSize - Scale + 1 (we // include the sign bit). If these top bits are > 0, then we overflowed past // the max value. If these top bits are < -1, then we overflowed past the // min value. Otherwise, we did not overflow. - if (Saturating) { - unsigned OverflowBits = VTSize - Scale + 1; - assert(OverflowBits <= VTSize && OverflowBits > NVTSize && - "Extent of overflow bits must start within HL"); - SDValue HLHiMask = DAG.getConstant( - APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); - SDValue HLLoMask = DAG.getConstant( - APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); - - // HH > 0 or HH == 0 && HL > HLLoMask - SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); - SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); - SDValue HLPos = - DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); - SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, - DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos)); - - // HH < -1 or HH == -1 && HL < HLHiMask - SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); - SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); - SDValue HLNeg = - DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); - SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, - DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg)); - } + unsigned OverflowBits = VTSize - Scale + 1; + assert(OverflowBits <= VTSize && OverflowBits > NVTSize && + "Extent of overflow bits must start within HL"); + SDValue HLHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); + SDValue HLLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); + // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT)); + // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT)); } else if (Scale == NVTSize) { - // If the scales are equal, Lo and Hi are ResultLH and Result HL, - // respectively. Avoid shifting to prevent undefined behavior. - Lo = ResultLH; - Hi = ResultHL; - - // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1. - // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0. - if (Saturating) { - SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); - SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); - SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); - SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, - DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg)); - - SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); - SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); - SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); - SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, - DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos)); - } + // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg)); + // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos)); } else if (Scale < VTSize) { - // If the scale is instead less than the old VT size, but greater than or - // equal to the expanded VT size, the first part of the result (ResultLL) is - // no longer a part of Lo because it would be scaled out anyway. Instead we - // can start shifting right from the fourth part (ResultHH) to the second - // part (ResultLH), and Result LH will be the new Lo. - SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy); - SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy); - Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); - Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt)); - // This is similar to the case when we saturate if Scale < NVTSize, but we - // only need to chech HH. - if (Saturating) { - unsigned OverflowBits = VTSize - Scale + 1; - SDValue HHHiMask = DAG.getConstant( - APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); - SDValue HHLoMask = DAG.getConstant( - APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); - - SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); - SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); - } - } else if (Scale == VTSize) { - assert( - !Signed && - "Only unsigned types can have a scale equal to the operand bit width"); - - Lo = ResultHL; - Hi = ResultHH; - } else { - llvm_unreachable("Expected the scale to be less than or equal to the width " - "of the operands"); - } + // only need to check HH. + unsigned OverflowBits = VTSize - Scale + 1; + SDValue HHHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); + SDValue HHLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); + SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); + } else + llvm_unreachable("Illegal scale for signed fixed point mul."); - if (Saturating) { - APInt LHMax = APInt::getSignedMaxValue(NVTSize); - APInt LLMax = APInt::getAllOnesValue(NVTSize); - APInt LHMin = APInt::getSignedMinValue(NVTSize); - Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi); - Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi); - Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo); - Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); - } + // Saturate to signed maximum. + APInt MaxHi = APInt::getSignedMaxValue(NVTSize); + APInt MaxLo = APInt::getAllOnesValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo); + // Saturate to signed minimum. + APInt MinHi = APInt::getSignedMinValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); } void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, @@ -3030,7 +3133,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -3129,7 +3234,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); return; } @@ -3217,7 +3324,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -3373,7 +3482,8 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -3399,7 +3509,8 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -3759,7 +3870,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -3924,7 +4037,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { @@ -4033,6 +4148,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); } +SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) { + SDLoc dl(N); + + SDValue SplatVal = N->getOperand(0); + + assert(!SplatVal.getValueType().isVector() && "Input must be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + EVT NOutElemVT = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal); + + return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); +} + SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 14fd5be23ccb..b596c174a287 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -81,7 +81,6 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { SDValue Res(&Node, i); - EVT VT = Res.getValueType(); bool Failed = false; // Don't create a value in map. auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0; @@ -135,17 +134,13 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) { + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { - // If the value can be kept in HW registers, softening machinery can - // leave it unchanged and don't put it to any map. - if (Mapped == 0 && - !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat && - isLegalInHWReg(VT))) { + if (Mapped == 0) { dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { @@ -257,13 +252,9 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - Changed = SoftenFloatResult(N, i); - if (Changed) - goto NodeDone; - // If not changed, the result type should be legally in register. - assert(isLegalInHWReg(ResultVT) && - "Unchanged SoftenFloatResult should be legal in register!"); - goto ScanOperands; + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -439,15 +430,9 @@ NodeDone: bool Failed = false; // Check that all result types are legal. - // A value type is illegal if its TypeAction is not TypeLegal, - // and TLI.RegClassForVT does not have a register class for this type. - // For example, the x86_64 target has f128 that is not TypeLegal, - // to have softened operators, but it also has FR128 register class to - // pass and return f128 values. Hence a legalized node can have f128 type. if (!IgnoreNodeResults(&Node)) for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(Node.getValueType(i)) && - !TLI.isTypeLegal(Node.getValueType(i))) { + if (!isTypeLegal(Node.getValueType(i))) { dbgs() << "Result type " << i << " illegal: "; Node.dump(&DAG); Failed = true; @@ -456,8 +441,7 @@ NodeDone: // Check that all operand types are legal. for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && - !isTypeLegal(Node.getOperand(i).getValueType()) && - !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + !isTypeLegal(Node.getOperand(i).getValueType())) { dbgs() << "Operand type " << i << " illegal: "; Node.getOperand(i).dump(&DAG); Failed = true; @@ -713,23 +697,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - // f128 of x86_64 could be kept in SSE registers, - // but sometimes softened to i128. - assert((Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || - Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for softened float"); AnalyzeNewValue(Result); auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; - // Allow repeated calls to save f128 type nodes - // or any node with type that transforms to itself. - // Many operations on these types are not softened. - assert(((OpIdEntry == 0) || - Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && - "Node is already converted to integer!"); + assert((OpIdEntry == 0) && "Node is already converted to integer!"); OpIdEntry = getTableId(Result); } @@ -1003,25 +977,27 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { /// Convert the node into a libcall with the same prototype. SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions, dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; } SmallVector Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; } /// Expand a node into a call to a libcall. Similar to ExpandLibCall except that diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1d489b1b3a33..4afbae69128a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -73,15 +73,6 @@ private: return VT.isSimple() && TLI.isTypeLegal(VT); } - /// Return true if this type can be passed in registers. - /// For example, x86_64's f128, should to be legally in registers - /// and only some operations converted to library calls or integer - /// bitwise operations. - bool isLegalInHWReg(EVT VT) const { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return VT == NVT && isSimpleLegalType(VT); - } - EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -306,6 +297,7 @@ private: SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); @@ -363,6 +355,7 @@ private: SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); @@ -472,14 +465,11 @@ private: // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// Given an operand Op of Float type, returns the integer if the Op is not - /// supported in target HW and converted to the integer. - /// The integer contains exactly the same bits as Op - only the type changed. - /// For example, if Op is an f32 which was softened to an i32, then this - /// method returns an i32, the bits of which coincide with those of Op. - /// If the Op can be efficiently supported in target HW or the operand must - /// stay in a register, the Op is not converted to an integer. - /// In that case, the given op is returned. + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op SDValue GetSoftenedFloat(SDValue Op) { TableId Id = getTableId(Op); auto Iter = SoftenedFloats.find(Id); @@ -494,19 +484,19 @@ private: } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Convert Float Results to Integer for Non-HW-supported Operations. - bool SoftenFloatResult(SDNode *N, unsigned ResNo); + // Convert Float Results to Integer. + void SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_ConstantFP(SDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FABS(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -518,7 +508,7 @@ private: SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FNEG(SDNode *N); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -531,27 +521,17 @@ private: SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Return true if we can skip softening the given operand or SDNode because - // either it was soften before by SoftenFloatResult and references to the - // operand were replaced by ReplaceValueWith or it's value type is legal in HW - // registers and the operand can be left unchanged. - bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); - - // Convert Float Operand to Integer for Non-HW-supported Operations. + // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); - SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); - SDValue SoftenFloatOp_FABS(SDNode *N); - SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); - SDValue SoftenFloatOp_FNEG(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); @@ -559,7 +539,6 @@ private: SDValue SoftenFloatOp_LLROUND(SDNode *N); SDValue SoftenFloatOp_LRINT(SDNode *N); SDValue SoftenFloatOp_LLRINT(SDNode *N); - SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -715,6 +694,7 @@ private: bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); SDValue ScalarizeVecOp_UnaryOp(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); @@ -830,6 +810,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); + SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue WidenVecRes_Convert(SDNode *N); @@ -933,6 +914,8 @@ private: void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVSETCC(const SDNode *N); + //===--------------------------------------------------------------------===// // Generic Expansion: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 943f63f46c47..5562f400b6e1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -52,17 +52,11 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: { - // Expand the floating point operand only if it was converted to integers. - // Otherwise, it is a legal type like f128 that can be saved in a register. - auto SoftenedOp = GetSoftenedFloat(InOp); - if (isLegalInHWReg(SoftenedOp.getValueType())) - break; - SplitInteger(SoftenedOp, Lo, Hi); + case TargetLowering::TypeSoftenFloat: + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); @@ -509,23 +503,6 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, GetSplitOp(Op, Lo, Hi); } -static std::pair SplitVSETCC(const SDNode *N, - SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH, CL, CH; SDLoc dl(N); @@ -537,16 +514,25 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { if (Cond.getValueType().isVector()) { if (SDValue Res = WidenVSELECTAndMask(N)) std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl); - // It seems to improve code to generate two narrow SETCCs as opposed to - // splitting a wide result vector. - else if (Cond.getOpcode() == ISD::SETCC) - std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG); // Check if there are already splitted versions of the vector available and // use those instead of splitting the mask operand again. else if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Cond, CL, CH); - else + // It seems to improve code to generate two narrow SETCCs as opposed to + // splitting a wide result vector. + else if (Cond.getOpcode() == ISD::SETCC) { + // If the condition is a vXi1 vector, and the LHS of the setcc is a legal + // type and the setcc result type is the same vXi1, then leave the setcc + // alone. + EVT CondLHSVT = Cond.getOperand(0).getValueType(); + if (Cond.getValueType().getVectorElementType() == MVT::i1 && + isTypeLegal(CondLHSVT) && + getSetCCResultType(CondLHSVT) == Cond.getValueType()) + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); + else + SplitVecRes_SETCC(Cond.getNode(), CL, CH); + } else std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 10b8b705869e..15c3a0b6cfad 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" @@ -333,14 +334,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + // If we're asked to expand a strict vector floating-point operation, + // by default we're going to simply unroll it. That is usually the + // best approach, except in the case where the resulting strict (scalar) + // operations would themselves use the fallback mutation to non-strict. + // In that specific case, just do the fallback on the vector op. + if (Action == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) { + EVT EltVT = Node->getValueType(0).getVectorElementType(); + if (TLI.getOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Legal) + Action = TargetLowering::Legal; + } break; case ISD::ADD: case ISD::SUB: @@ -439,16 +453,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { break; case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: { + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); break; } - case ISD::FP_ROUND_INREG: - Action = TLI.getOperationAction(Node->getOpcode(), - cast(Node->getOperand(1))->getVT()); - break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::VECREDUCE_ADD: @@ -820,6 +831,13 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::SMULFIX: case ISD::UMULFIX: return ExpandFixedPointMul(Op); + case ISD::SMULFIXSAT: + case ISD::UMULFIXSAT: + // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly + // why. Maybe it results in worse codegen compared to the unroll for some + // targets? This should probably be investigated. And if we still prefer to + // unroll an explanation could be helpful. + return DAG.UnrollVectorOp(Op.getNode()); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -844,6 +862,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: return ExpandStrictFPOp(Op); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -1168,9 +1188,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) { SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { // Attempt to expand using TargetLowering. - SDValue Result; - if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG)) + SDValue Result, Chain; + if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) { + if (Op.getNode()->isStrictFPOpcode()) + // Relink the chain + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain); return Result; + } // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7e4d52617977..3763e886cef2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -52,7 +52,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; - case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; @@ -171,6 +170,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: R = ScalarizeVecRes_StrictFPOp(N); break; @@ -185,6 +186,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: R = ScalarizeVecRes_MULFIX(N); break; } @@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + Res = ScalarizeVecOp_UnaryOp_StrictFP(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); } +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the strict FP operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), + { N->getValueType(0).getScalarType(), MVT::Other }, + { N->getOperand(0), Elt }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector Ops(N->getNumOperands()); @@ -828,7 +851,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; - case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -883,7 +905,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: @@ -977,6 +1001,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: SplitVecRes_MULFIX(N, Lo, Hi); break; } @@ -1560,10 +1585,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; @@ -1622,10 +1651,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; @@ -1651,11 +1684,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, - MMO); + MMO, MGT->getIndexType()); SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, - MMO); + MMO, MGT->getIndexType()); // Build a factor node to remember that this load is independent of the // other one. @@ -1979,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: @@ -2293,7 +2328,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, - OpsLo, MMO); + OpsLo, MMO, MGT->getIndexType()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -2303,7 +2338,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, - OpsHi, MMO); + OpsHi, MMO, MGT->getIndexType()); // Build a factor node to remember that this load is independent of the // other one. @@ -2340,12 +2375,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -2397,12 +2436,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) @@ -2418,7 +2461,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), - DL, OpsLo, MMO); + DL, OpsLo, MMO, N->getIndexType()); MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), @@ -2430,7 +2473,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, // after another. SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + DL, OpsHi, MMO, N->getIndexType()); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2596,7 +2639,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); - return PromoteTargetBoolean(Con, N->getValueType(0)); + + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con); } @@ -2663,7 +2710,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; - case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; @@ -2719,6 +2765,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryCanTrap(N); break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: + // These are binary operations, but with an extra operand that shouldn't + // be widened (the scale). + Res = WidenVecRes_BinaryWithExtraScalarOp(N); + break; + case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -2790,6 +2845,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FP_EXTEND: case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: Res = WidenVecRes_Convert_StrictFP(N); break; @@ -2866,6 +2923,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } +SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { + // Binary op widening, but with an extra operand that shouldn't be widened. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = N->getOperand(2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3, + N->getFlags()); +} + // Given a vector of operations that have been broken up to widen, see // if we can collect them together into the next widest legal VT. This // implementation is trap-safe. @@ -3716,7 +3784,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand(), N->getIndexType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -4094,7 +4162,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: @@ -4434,7 +4504,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, Scale}; SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, - MG->getMemOperand()); + MG->getMemOperand(), MG->getIndexType()); ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); return SDValue(); @@ -4472,7 +4542,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), SDLoc(N), Ops, - MSC->getMemOperand()); + MSC->getMemOperand(), MSC->getIndexType()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { @@ -4504,7 +4574,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return PromoteTargetBoolean(CC, VT); + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, dl, VT, CC); } SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { @@ -4706,7 +4779,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; - unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads. + unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 2cb850fa1a3d..7ee44c808fcb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -498,7 +498,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } else diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 34b4c8502353..ff806bdb822c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!Pred.isArtificial()) AddPredQueued(NewSU, Pred); + // Make sure the clone comes after the original. (InstrEmitter assumes + // this ordering.) + AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); + // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector, 4> DelDeps; @@ -1374,7 +1378,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else @@ -2358,7 +2362,7 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) { PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { unsigned Reg = cast(PredSU->getNode()->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { RetVal = true; continue; } @@ -2379,7 +2383,7 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast(SuccSU->getNode()->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { RetVal = true; continue; } @@ -2948,8 +2952,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyToReg && - TargetRegisterInfo::isVirtualRegister - (cast(N->getOperand(1))->getReg())) + Register::isVirtualRegister( + cast(N->getOperand(1))->getReg())) continue; SDNode *PredFrameSetup = nullptr; @@ -2995,8 +2999,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyFromReg && - TargetRegisterInfo::isVirtualRegister - (cast(N->getOperand(1))->getReg())) + Register::isVirtualRegister( + cast(N->getOperand(1))->getReg())) continue; // Perform checks on the successors of PredSU. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 568c6191e512..d4c1fb36475e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -115,7 +115,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, return; unsigned Reg = cast(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return; unsigned ResNo = User->getOperand(2).getResNo(); @@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// glued together nodes with a single SUnit. -void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { +void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) { // Cluster certain nodes which should be scheduled together. ClusterNodes(); // Populate the SUnits array. @@ -656,7 +656,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { unsigned Reg = cast(Use->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? @@ -808,7 +808,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); - unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC); bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); @@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + + if (MDNode *MD = DAG->getHeapAllocSite(N)) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } + GluedNodes.pop_back(); } auto NewInsn = @@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } } // Insert all the dbg_values which have not already been inserted in source diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5163b4fa4fd3..183ce4b0652d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -26,6 +26,7 @@ namespace llvm { +class AAResults; class InstrItineraryData; /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. @@ -93,7 +94,7 @@ class InstrItineraryData; /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AAResults *AA); /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index ab06b55b49fd..e7bac73678a7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -63,14 +63,13 @@ private: /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; - /// AA - AliasAnalysis for making memory reference queries. - AliasAnalysis *AA; + /// AA - AAResults for making memory reference queries. + AAResults *AA; public: - ScheduleDAGVLIW(MachineFunction &mf, - AliasAnalysis *aa, + ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetSubtargetInfo &STI = mf.getSubtarget(); HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5852e693fa9f..52a71b91d93f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { break; case ISD::TargetExternalSymbol: { ExternalSymbolSDNode *ESN = cast(N); - Erased = TargetExternalSymbols.erase( - std::pair(ESN->getSymbol(), - ESN->getTargetFlags())); + Erased = TargetExternalSymbols.erase(std::pair( + ESN->getSymbol(), ESN->getTargetFlags())); break; } case ISD::MCSymbol: { @@ -1084,6 +1083,7 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); + SDCallSiteDbgInfo.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), @@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t Offset, bool isTargetGA, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); @@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { } SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; @@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, } SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); ID.AddInteger(Index); @@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { } SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, - unsigned char TargetFlags) { + unsigned TargetFlags) { SDNode *&N = - TargetExternalSymbols[std::pair(Sym, - TargetFlags)]; + TargetExternalSymbols[std::pair(Sym, TargetFlags)]; if (N) return SDValue(N, 0); N = newSDNode(true, Sym, TargetFlags, VT); InsertNode(N); @@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, } SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset, - bool isTarget, - unsigned char TargetFlags) { + int64_t Offset, bool isTarget, + unsigned TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; @@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { EVT VT = Node->getValueType(0); SDValue Tmp1 = Node->getOperand(0); SDValue Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); + const MaybeAlign MA(Node->getConstantOperandVal(3)); SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, MachinePointerInfo(V)); SDValue VAList = VAListLoad; - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - + if (MA && *MA > TLI.getMinStackArgumentAlignment()) { VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - getConstant(Align - 1, dl, VAList.getValueType())); + getConstant(MA->value() - 1, dl, VAList.getValueType())); - VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, - getConstant(-(int64_t)Align, dl, VAList.getValueType())); + VAList = + getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)MA->value(), dl, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg @@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, } case ISD::OR: case ISD::XOR: - // If the LHS or RHS don't contribute bits to the or, drop them. - if (MaskedValueIsZero(V.getOperand(0), DemandedBits)) - return V.getOperand(1); - if (MaskedValueIsZero(V.getOperand(1), DemandedBits)) - return V.getOperand(0); - break; + case ISD::SIGN_EXTEND_INREG: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) @@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); break; } - case ISD::SIGN_EXTEND_INREG: - EVT ExVT = cast(V.getOperand(1))->getVT(); - unsigned ExVTBits = ExVT.getScalarSizeInBits(); - - // If none of the extended bits are demanded, eliminate the sextinreg. - if (DemandedBits.getActiveBits() <= ExVTBits) - return V.getOperand(0); - - break; } return SDValue(); } @@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. static const APInt *getValidShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { // Shifting more than the bitwidth is not valid. const APInt &ShAmt = SA->getAPIntValue(); - if (ShAmt.ult(V.getScalarValueSizeInBits())) + if (ShAmt.ult(BitWidth)) return &ShAmt; } return nullptr; } +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the minimum value. +static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MinShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + auto *SA = dyn_cast(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MinShAmt && MinShAmt->ule(ShAmt)) + continue; + MinShAmt = &ShAmt; + } + return MinShAmt; +} + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, return Known; } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return Known; // Limit search depth. KnownBits Known2; @@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - Known = computeKnownBits(Src, DemandedSrc, Depth + 1); - } else { - Known = computeKnownBits(Src, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } + Known = computeKnownBits(Src, DemandedSrc, Depth + 1); break; } case ISD::SCALAR_TO_VECTOR: { @@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); - } else if (auto *BV = dyn_cast(Op.getOperand(1))) { - // If the shift amount is a vector of constants see if we can bound - // the number of upper zero bits. - unsigned ShiftAmountMin = BitWidth; - for (unsigned i = 0; i != BV->getNumOperands(); ++i) { - if (auto *C = dyn_cast(BV->getOperand(i))) { - const APInt &ShAmt = C->getAPIntValue(); - if (ShAmt.ult(BitWidth)) { - ShiftAmountMin = std::min(ShiftAmountMin, - ShAmt.getZExtValue()); - continue; - } - } - // Don't know anything. - ShiftAmountMin = 0; - break; - } - - Known.Zero.setHighBits(ShiftAmountMin); + } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) { + // Minimum shift high bits are known zero. + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); } break; case ISD::SRA: @@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) + if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)) + if (Known2.isNegative() && LowBits.intersects(Known2.One)) Known.One |= ~LowBits; assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); } @@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Val.getNumSignBits(); } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. if (!DemandedElts) @@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; + case ISD::MUL: { + // The output of the Mul can be at most twice the valid bits in the inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (SignBitsOp0 == 1) + break; + unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + if (SignBitsOp1 == 1) + break; + unsigned OutValidBits = + (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); + return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; + } case ISD::TRUNCATE: { // Check if the sign bits of source go down as far as the truncated value. unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); @@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } - return ComputeNumSignBits(Src, Depth + 1); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); } case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded @@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) return true; - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return false; // Limit search depth. // TODO: Handle vectors. @@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getUNDEF(VT); // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Flags); @@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2C && N2C->isNullValue()) return N1; break; - case ISD::FP_ROUND_INREG: { - EVT EVT = cast(N2)->getVT(); - assert(VT == N1.getValueType() && "Not an inreg round!"); - assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && - "Cannot FP_ROUND_INREG integer types"); - assert(EVT.isVector() == VT.isVector() && - "FP_ROUND_INREG type should be vector iff the operand " - "type is vector!"); - assert((!EVT.isVector() || - EVT.getVectorNumElements() == VT.getVectorNumElements()) && - "Vector element counts must match in FP_ROUND_INREG"); - assert(EVT.bitsLE(VT) && "Not rounding down!"); - (void)EVT; - if (cast(N2)->getVT() == VT) return N1; // Not actually rounding. - break; - } case ISD::FP_ROUND: assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && @@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, std::swap(N1, N2); } else { switch (Opcode) { - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: return getUNDEF(VT); // fold op(undef, arg2) -> undef @@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, unsigned Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; + if (Alignment > SrcAlign) + SrcAlign = Alignment; ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), @@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Align && - DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign /= 2; + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; - if (NewAlign > Align) { + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, } Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); if (Value.getNode()) { - Store = DAG.getStore(Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, - MMOFlags); + Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); } } @@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, - MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) { + MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, createOperands(N, Ops); } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, @@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, } auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getPassThru().getValueType() == N->getValueType(0) && @@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, return SDValue(E, 0); } auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getMask().getValueType().getVectorNumElements() == @@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); @@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, CSEMap.InsertNode(N, IP); InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this); return N; } @@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, // TokenFactor. SDValue OldChain = SDValue(OldLoad, 1); SDValue NewChain = SDValue(NewMemOp.getNode(), 1); - if (!OldLoad->hasAnyUseOfValue(1)) + if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1)) return NewChain; SDValue TokenFactor = @@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() { GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, - int64_t o, unsigned char TF) + int64_t o, unsigned TF) : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, // Loads don't have side effects, look through them. if (LoadSDNode *Ld = dyn_cast(*this)) { - if (!Ld->isVolatile()) + if (Ld->isUnordered()) return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); } return false; @@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { SDValue SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, - ArrayRef CandidateBinOps) { + ArrayRef CandidateBinOps, + bool AllowPartials) { // The pattern must end in an extract from index 0. if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isNullConstant(Extract->getOperand(1))) return SDValue(); - SDValue Op = Extract->getOperand(0); - unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); - // Match against one of the candidate binary ops. + SDValue Op = Extract->getOperand(0); if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { return Op.getOpcode() == unsigned(BinOp); })) return SDValue(); + // Floating-point reductions may require relaxed constraints on the final step + // of the reduction because they may reorder intermediate operations. + unsigned CandidateBinOp = Op.getOpcode(); + if (Op.getValueType().isFloatingPoint()) { + SDNodeFlags Flags = Op->getFlags(); + switch (CandidateBinOp) { + case ISD::FADD: + if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) + return SDValue(); + break; + default: + llvm_unreachable("Unhandled FP opcode for binop reduction"); + } + } + + // Matching failed - attempt to see if we did enough stages that a partial + // reduction from a subvector is possible. + auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { + if (!AllowPartials || !Op) + return SDValue(); + EVT OpVT = Op.getValueType(); + EVT OpSVT = OpVT.getScalarType(); + EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts); + if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) + return SDValue(); + BinOp = (ISD::NodeType)CandidateBinOp; + return getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + }; + // At each stage, we're looking for something that looks like: // %s = shufflevector <8 x i32> %op, <8 x i32> undef, // <8 x i32> // <2,3,u,u,u,u,u,u> // <1,u,u,u,u,u,u,u> - unsigned CandidateBinOp = Op.getOpcode(); + // While a partial reduction match would be: + // <2,3,u,u,u,u,u,u> + // <1,u,u,u,u,u,u,u> + unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); + SDValue PrevOp; for (unsigned i = 0; i < Stages; ++i) { + unsigned MaskEnd = (1 << i); + if (Op.getOpcode() != CandidateBinOp) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, // The first operand of the shuffle should be the same as the other operand // of the binop. if (!Shuffle || Shuffle->getOperand(0) != Op) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); // Verify the shuffle has the expected (at this stage of the pyramid) mask. - for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index) - if (Shuffle->getMaskElt(Index) != MaskEnd + Index) - return SDValue(); + for (int Index = 0; Index < (int)MaskEnd; ++Index) + if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index)) + return PartialReduction(PrevOp, MaskEnd); + + PrevOp = Op; } BinOp = (ISD::NodeType)CandidateBinOp; @@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { getShiftAmountOperand(Operands[0].getValueType(), Operands[1]))); break; - case ISD::SIGN_EXTEND_INREG: - case ISD::FP_ROUND_INREG: { + case ISD::SIGN_EXTEND_INREG: { EVT ExtVT = cast(Operands[1])->getVT().getVectorElementType(); Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], @@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; + // TODO: probably too restrictive for atomics, revisit + if (!LD->isSimple()) + return false; if (LD->isIndexed() || Base->isIndexed()) return false; if (LD->getChain() != Base->getChain()) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 9592bc30a4e1..3a53ab9717a4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" #include using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e818dd27c05e..8c15563fcd23 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -833,7 +833,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. - if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || + if (!Register::isVirtualRegister(Regs[Part + i]) || !RegisterVT.isInteger()) continue; @@ -948,8 +948,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - else if (!Regs.empty() && - TargetRegisterInfo::isVirtualRegister(Regs.front())) { + else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline // assembly as well as normal instructions. @@ -1810,7 +1809,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // offsets to its parts don't wrap either. SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); - SDValue Val = RetOp.getValue(i); + SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, @@ -2263,7 +2262,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { if (const BinaryOperator *BOp = dyn_cast(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && - !I.getMetadata(LLVMContext::MD_unpredictable) && + !I.hasMetadata(LLVMContext::MD_unpredictable) && (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, @@ -2600,9 +2599,11 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setDiscardResult(true); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - None, false, getCurSDLoc(), false, false).second; + None, CallOptions, getCurSDLoc()).second; // On PS4, the "return address" must still be within the calling function, // even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. @@ -2618,24 +2619,18 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); - // Subtract the minimum value + // Subtract the minimum value. SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, - DAG.getConstant(B.First, dl, VT)); - - // Check range - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue RangeCmp = DAG.getSetCC( - dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); + SDValue RangeSub = + DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT)); // Determine the type of the test operands. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool UsePtrType = false; - if (!TLI.isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT)) { UsePtrType = true; - else { + } else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { // Switch table case range are encoded into series of masks. @@ -2644,6 +2639,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, break; } } + SDValue Sub = RangeSub; if (UsePtrType) { VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); @@ -2655,20 +2651,29 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + if (!B.OmitRangeCheck) + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); - SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, CopyTo, RangeCmp, - DAG.getBasicBlock(B.Default)); + SDValue Root = CopyTo; + if (!B.OmitRangeCheck) { + // Conditional branch to the default block. + SDValue RangeCmp = DAG.getSetCC(dl, + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + RangeSub.getValueType()), + RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()), + ISD::SETUGT); + + Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp, + DAG.getBasicBlock(B.Default)); + } // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) - BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, - DAG.getBasicBlock(MBB)); + Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB)); - DAG.setRoot(BrRange); + DAG.setRoot(Root); } /// visitBitTestCase - this function produces one "bit test" @@ -3266,8 +3271,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // We care about the legality of the operation after it has been type // legalized. - while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && - VT != TLI.getTypeToTransformTo(Ctx, VT)) + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal) VT = TLI.getTypeToTransformTo(Ctx, VT); // If the vselect is legal, assume we want to leave this as a vector setcc + @@ -3534,17 +3538,32 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); + Constant *MaskV = cast(I.getOperand(2)); SDLoc DL = getCurSDLoc(); - - SmallVector Mask; - ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); - unsigned MaskNumElts = Mask.size(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); + if (MaskV->isNullValue() && VT.isScalableVector()) { + // Canonical splat form of first element of first input vector. + SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + SrcVT.getScalarType(), Src1, + DAG.getConstant(0, DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt)); + return; + } + + // For now, we only handle splats for scalable vectors. + // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation + // for targets that support a SPLAT_VECTOR for non-scalable vector types. + assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle"); + + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(MaskV, Mask); + unsigned MaskNumElts = Mask.size(); + if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; @@ -3825,7 +3844,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = I.getType()->isVectorTy() ? - cast(I.getType())->getVectorNumElements() : 0; + I.getType()->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); @@ -3858,12 +3877,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If this is a scalar constant or a splat vector of constants, // handle it quickly. - const auto *CI = dyn_cast(Idx); - if (!CI && isa(Idx) && - cast(Idx)->getSplatValue()) - CI = cast(cast(Idx)->getSplatValue()); + const auto *C = dyn_cast(Idx); + if (C && isa(C->getType())) + C = C->getSplatValue(); - if (CI) { + if (const auto *CI = dyn_cast_or_null(C)) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); @@ -3872,7 +3890,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : DAG.getConstant(Offs, dl, IdxTy); - // In an inbouds GEP with an offset that is nonnegative even when + // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (Offs.isNonNegative() && cast(I).isInBounds()) @@ -4002,8 +4020,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal); + bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load); bool isDereferenceable = isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); @@ -4118,7 +4136,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - unsigned VReg = + Register VReg = SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. @@ -4132,8 +4150,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { "call visitLoadFromSwiftError when backend supports swifterror"); assert(!I.isVolatile() && - I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && - I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && + !I.hasMetadata(LLVMContext::MD_nontemporal) && + !I.hasMetadata(LLVMContext::MD_invariant_load) && "Support volatile, non temporal, invariant for load_from_swift_error"); const Value *SV = I.getOperand(0); @@ -4209,7 +4227,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { auto MMOFlags = MachineMemOperand::MONone; if (I.isVolatile()) MMOFlags |= MachineMemOperand::MOVolatile; - if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) + if (I.hasMetadata(LLVMContext::MD_nontemporal)) MMOFlags |= MachineMemOperand::MONonTemporal; MMOFlags |= TLI.getMMOFlags(I); @@ -4309,8 +4327,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // are looking for. If first operand of the GEP is a splat vector - we // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, - SDValue &Scale, SelectionDAGBuilder* SDB) { +static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, + ISD::MemIndexType &IndexType, SDValue &Scale, + SelectionDAGBuilder *SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); @@ -4330,8 +4349,13 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, // Ensure all the other indices are 0. for (unsigned i = 1; i < FinalIndex; ++i) { - auto *C = dyn_cast(GEP->getOperand(i)); - if (!C || !C->isZero()) + auto *C = dyn_cast(GEP->getOperand(i)); + if (!C) + return false; + if (isa(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null(C); + if (!CI || !CI->isZero()) return false; } @@ -4346,6 +4370,7 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); + IndexType = ISD::SIGNED_SCALED; if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); @@ -4373,9 +4398,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; + ISD::MemIndexType IndexType; SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -4385,11 +4412,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, - Ops, MMO); + Ops, MMO, IndexType); DAG.setRoot(Scatter); setValue(&I, Scatter); } @@ -4476,9 +4504,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; + ISD::MemIndexType IndexType; SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); bool ConstantMemory = false; if (UniformBase && AA && AA->pointsToConstantMemory( @@ -4500,11 +4530,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, - Ops, MMO); + Ops, MMO, IndexType); SDValue OutChain = Gather.getValue(1); if (!ConstantMemory) @@ -4628,7 +4659,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { auto Flags = MachineMemOperand::MOLoad; if (I.isVolatile()) Flags |= MachineMemOperand::MOVolatile; - if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) + if (I.hasMetadata(LLVMContext::MD_invariant_load)) Flags |= MachineMemOperand::MOInvariant; if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), DAG.getDataLayout())) @@ -4645,9 +4676,27 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, - getValue(I.getPointerOperand()), MMO); + + SDValue Ptr = getValue(I.getPointerOperand()); + + if (TLI.lowerAtomicLoadAsLoadSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for loads to prevent future divergence. + SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); + + setValue(&I, L); + SDValue OutChain = L.getValue(1); + if (!I.isUnordered()) + DAG.setRoot(OutChain); + else + PendingLoads.push_back(OutChain); + return; + } + + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, + Ptr, MMO); SDValue OutChain = L.getValue(1); if (MemVT != VT) @@ -4686,9 +4735,17 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + SDValue Ptr = getValue(I.getPointerOperand()); + if (TLI.lowerAtomicStoreAsStoreSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for stores to prevent future divergence. + SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + DAG.setRoot(S); + return; + } SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, - getValue(I.getPointerOperand()), Val, MMO); + Ptr, Val, MMO); DAG.setRoot(OutChain); @@ -4731,8 +4788,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - SDValue Op = getValue(I.getArgOperand(i)); - Ops.push_back(Op); + const Value *Arg = I.getArgOperand(i); + if (!I.paramHasAttr(i, Attribute::ImmArg)) { + Ops.push_back(getValue(Arg)); + continue; + } + + // Use TargetConstant instead of a regular constant for immarg. + EVT VT = TLI.getValueType(*DL, Arg->getType(), true); + if (const ConstantInt *CI = dyn_cast(Arg)) { + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT)); + } else { + Ops.push_back( + DAG.getTargetConstantFP(*cast(Arg), SDLoc(), VT)); + } } SmallVector ValueVTs; @@ -4749,10 +4820,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - Result = - DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, AAInfo); + Result = DAG.getMemIntrinsicNode( + Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4918,12 +4989,11 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, // Put the exponent in the right bit position for later addition to the // final result: // - // #define LOG2OFe 1.4426950f - // t0 = Op * LOG2OFe + // t0 = Op * log2(e) // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b, dl)); + DAG.getConstantFP(numbers::log2ef, dl, MVT::f32)); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -4941,10 +5011,11 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); - // Scale the exponent by log(2) [0.69314718f]. + // Scale the exponent by log(2). SDValue Exp = GetExponent(DAG, Op1, TLI, dl); - SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218, dl)); + SDValue LogOfExponent = + DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + DAG.getConstantFP(numbers::ln2f, dl, MVT::f32)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -5311,19 +5382,32 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } -// getUnderlyingArgReg - Find underlying register used for a truncated or -// bitcasted argument. -static unsigned getUnderlyingArgReg(const SDValue &N) { +// getUnderlyingArgRegs - Find underlying registers used for a truncated, +// bitcasted, or split argument. Returns a list of +static void +getUnderlyingArgRegs(SmallVectorImpl> &Regs, + const SDValue &N) { switch (N.getOpcode()) { - case ISD::CopyFromReg: - return cast(N.getOperand(1))->getReg(); + case ISD::CopyFromReg: { + SDValue Op = N.getOperand(1); + Regs.emplace_back(cast(Op)->getReg(), + Op.getValueType().getSizeInBits()); + return; + } case ISD::BITCAST: case ISD::AssertZext: case ISD::AssertSext: case ISD::TRUNCATE: - return getUnderlyingArgReg(N.getOperand(0)); + getUnderlyingArgRegs(Regs, N.getOperand(0)); + return; + case ISD::BUILD_PAIR: + case ISD::BUILD_VECTOR: + case ISD::CONCAT_VECTORS: + for (SDValue Op : N->op_values()) + getUnderlyingArgRegs(Regs, Op); + return; default: - return 0; + return; } } @@ -5412,11 +5496,16 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (FI != std::numeric_limits::max()) Op = MachineOperand::CreateFI(FI); + SmallVector, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { - unsigned Reg = getUnderlyingArgReg(N); - if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + getUnderlyingArgRegs(ArgRegsAndSizes, N); + Register Reg; + if (ArgRegsAndSizes.size() == 1) + Reg = ArgRegsAndSizes.front().first; + + if (Reg && Reg.isVirtual()) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned PR = RegInfo.getLiveInPhysReg(Reg); + Register PR = RegInfo.getLiveInPhysReg(Reg); if (PR) Reg = PR; } @@ -5436,29 +5525,42 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (!Op) { + // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg + auto splitMultiRegDbgValue + = [&](ArrayRef> SplitRegs) { + unsigned Offset = 0; + for (auto RegAndSize : SplitRegs) { + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegAndSize.second); + if (!FragmentExpr) + continue; + assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + RegAndSize.first, Variable, *FragmentExpr)); + Offset += RegAndSize.second; + } + }; + // Check if ValueMap has reg number. - DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); + DenseMap::const_iterator + VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, V->getType(), getABIRegCopyCC(V)); if (RFV.occupiesMultipleRegs()) { - unsigned Offset = 0; - for (auto RegAndSize : RFV.getRegsAndSizes()) { - Op = MachineOperand::CreateReg(RegAndSize.first, false); - auto FragmentExpr = DIExpression::createFragmentExpression( - Expr, Offset, RegAndSize.second); - if (!FragmentExpr) - continue; - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - Op->getReg(), Variable, *FragmentExpr)); - Offset += RegAndSize.second; - } + splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; } + Op = MachineOperand::CreateReg(VMI->second, false); IsIndirect = IsDbgDeclare; + } else if (ArgRegsAndSizes.size() > 1) { + // This was split due to the calling convention, and no virtual register + // mapping exists for the value. + splitMultiRegDbgValue(ArgRegsAndSizes); + return true; } } @@ -5468,8 +5570,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); IsIndirect = (Op->isReg()) ? IsIndirect : true; + if (IsIndirect) + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, *Op, Variable, Expr)); return true; @@ -5554,11 +5658,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::sponentry: setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, - TLI.getPointerTy(DAG.getDataLayout()))); + TLI.getFrameIndexTy(DAG.getDataLayout()))); return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, - TLI.getPointerTy(DAG.getDataLayout()), + TLI.getFrameIndexTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; case Intrinsic::read_register: { @@ -5888,65 +5992,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); return; - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: { - SDValue ShAmt = getValue(I.getArgOperand(1)); - if (isa(ShAmt)) { - visitTargetIntrinsic(I, Intrinsic); - return; - } - unsigned NewIntrinsic = 0; - EVT ShAmtVT = MVT::v2i32; - switch (Intrinsic) { - case Intrinsic::x86_mmx_pslli_w: - NewIntrinsic = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntrinsic = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntrinsic = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntrinsic = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntrinsic = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntrinsic = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntrinsic = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntrinsic = Intrinsic::x86_mmx_psra_d; - break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - } - - // The vector shift intrinsics with scalars uses 32b shift amounts but - // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits - // to be zero. - // We must do this early because v2i32 is not a legal type. - SDValue ShOps[2]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); - EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); - Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, - DAG.getConstant(NewIntrinsic, sdl, MVT::i32), - getValue(I.getArgOperand(0)), ShAmt); - setValue(&I, Res); - return; - } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); @@ -6063,6 +6108,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -6075,12 +6122,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(I)); @@ -6272,6 +6323,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op3)); return; } + case Intrinsic::umul_fix_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, + Op3)); + return; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( @@ -6347,29 +6406,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.setRoot(Res); return; } - case Intrinsic::objectsize: { - // If we don't know by now, we're never going to know. - ConstantInt *CI = dyn_cast(I.getArgOperand(1)); - - assert(CI && "Non-constant type in __builtin_object_size?"); - - SDValue Arg = getValue(I.getCalledValue()); - EVT Ty = Arg.getValueType(); - - if (CI->isZero()) - Res = DAG.getConstant(-1ULL, sdl, Ty); - else - Res = DAG.getConstant(0, sdl, Ty); - - setValue(&I, Res); - return; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); case Intrinsic::is_constant: - // If this wasn't constant-folded away by now, then it's not a - // constant. - setValue(&I, DAG.getConstant(0, sdl, MVT::i1)); - return; + llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::annotation: case Intrinsic::ptr_annotation: @@ -6818,6 +6859,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Val); return; } + case Intrinsic::ptrmask: { + SDValue Ptr = getValue(I.getOperand(0)); + SDValue Const = getValue(I.getOperand(1)); + + EVT DestVT = + EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + + setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr, + DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT))); + return; + } } } @@ -6845,6 +6897,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptosi: + Opcode = ISD::STRICT_FP_TO_SINT; + break; + case Intrinsic::experimental_constrained_fptoui: + Opcode = ISD::STRICT_FP_TO_UINT; + break; case Intrinsic::experimental_constrained_fptrunc: Opcode = ISD::STRICT_FP_ROUND; break; @@ -6881,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_log2: Opcode = ISD::STRICT_FLOG2; break; + case Intrinsic::experimental_constrained_lrint: + Opcode = ISD::STRICT_LRINT; + break; + case Intrinsic::experimental_constrained_llrint: + Opcode = ISD::STRICT_LLRINT; + break; case Intrinsic::experimental_constrained_rint: Opcode = ISD::STRICT_FRINT; break; @@ -6899,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_floor: Opcode = ISD::STRICT_FFLOOR; break; + case Intrinsic::experimental_constrained_lround: + Opcode = ISD::STRICT_LROUND; + break; + case Intrinsic::experimental_constrained_llround: + Opcode = ISD::STRICT_LLROUND; + break; case Intrinsic::experimental_constrained_round: Opcode = ISD::STRICT_FROUND; break; @@ -7102,7 +7172,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - unsigned VReg = SwiftError.getOrCreateVRegDefAt( + Register VReg = SwiftError.getOrCreateVRegDefAt( CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); DAG.setRoot(CopyNode); @@ -8021,6 +8091,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(T, SDValue()); + if (T.ConstraintType == TargetLowering::C_Immediate && + OpInfo.CallOperand && !isa(OpInfo.CallOperand)) + // We've delayed emitting a diagnostic like the "n" constraint because + // inlining could cause an integer showing up. + return emitInlineAsmError( + CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an " + "integer constant expression"); + ExtraInfo.update(T); } @@ -8105,7 +8183,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory || - (OpInfo.ConstraintType == TargetLowering::C_Other && + ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect)) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); @@ -8119,13 +8198,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; - } else if ((OpInfo.ConstraintType == TargetLowering::C_Other && + } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && !OpInfo.isIndirect) || OpInfo.ConstraintType == TargetLowering::C_Register || OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { // Otherwise, this outputs to a register (directly for C_Register / - // C_RegisterClass, and a target-defined fashion for C_Other). Find a - // register that we can use. + // C_RegisterClass, and a target-defined fashion for + // C_Immediate/C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( CS, "couldn't allocate output register for constraint '" + @@ -8205,15 +8285,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if (OpInfo.ConstraintType == TargetLowering::C_Other && + if ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; - if (OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate) + if (isa(InOperandVal)) { + emitInlineAsmError(CS, "value out of range for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; @@ -8250,7 +8339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || - OpInfo.ConstraintType == TargetLowering::C_Register) && + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_Immediate) && "Unknown constraint type!"); // TODO: Support this. @@ -8356,6 +8446,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { Val = OpInfo.AssignedRegs.getCopyFromRegs( DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); break; + case TargetLowering::C_Immediate: case TargetLowering::C_Other: Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), OpInfo, DAG); @@ -9018,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); if (Args[i].Ty->isPointerTy()) { Flags.setPointer(); @@ -9073,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { FrameAlign = Args[i].Alignment; else FrameAlign = getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Args[i].IsNest) Flags.setNest(); @@ -9129,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } @@ -9259,7 +9350,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); - assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + assert(!Register::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If this is an InlineAsm we have to match the registers required, not the @@ -9516,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = - TLI->getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment( + TLI->getABIAlignmentForCallingConv(ArgTy, DL)); if (Arg.getType()->isPointerTy()) { Flags.setPointer(); @@ -9577,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); @@ -9586,6 +9677,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setOrigAlign(OriginalAlignment); if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); + if (Arg.hasAttribute(Attribute::Returned)) + Flags.setReturned(); MVT RegisterVT = TLI->getRegisterTypeForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); @@ -9598,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } @@ -9650,7 +9743,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); - unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); + Register SRetReg = + RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); @@ -9748,10 +9842,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } + // Analyses past this point are naive and don't expect an assertion. + if (Res.getOpcode() == ISD::AssertZext) + Res = Res.getOperand(0); + // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast(Res.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), Reg); } @@ -9763,7 +9861,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // FIXME: This isn't very clean... it would be nice to make this more // general. unsigned Reg = cast(Res.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&Arg] = Reg; continue; } @@ -10087,8 +10185,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, break; } case CC_BitTests: { - // FIXME: If Fallthrough is unreachable, skip the range check. - // FIXME: Optimize away range check based on pivot comparisons. BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; @@ -10109,6 +10205,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->DefaultProb -= DefaultProb / 2; } + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + BTB->OmitRangeCheck = true; + } + // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0072e33f23b7..bfcf30b430b6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -426,7 +426,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol) : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), - SL(make_unique(this, funcinfo)), FuncInfo(funcinfo), + SL(std::make_unique(this, funcinfo)), FuncInfo(funcinfo), SwiftError(swifterror) {} void init(GCFunctionInfo *gfi, AliasAnalysis *AA, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index da3049881d31..bc10f7621239 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -280,6 +280,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::SPLAT_VECTOR: return "splat_vector"; case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; @@ -305,6 +306,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SMULFIX: return "smulfix"; case ISD::SMULFIXSAT: return "smulfixsat"; case ISD::UMULFIX: return "umulfix"; + case ISD::UMULFIXSAT: return "umulfixsat"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; @@ -318,22 +320,27 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_ROUND: return "fp_round"; case ISD::STRICT_FP_ROUND: return "strict_fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::LROUND: return "lround"; + case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; + case ISD::STRICT_LLROUND: return "strict_llround"; case ISD::LRINT: return "lrint"; + case ISD::STRICT_LRINT: return "strict_lrint"; case ISD::LLRINT: return "llrint"; + case ISD::STRICT_LLRINT: return "strict_llrint"; // Control flow instructions case ISD::BR: return "br"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index bdf9f2c166e1..1f07a241a824 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -434,9 +435,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - LibInfo = &getAnalysis().getTLI(); + LibInfo = &getAnalysis().getTLI(Fn); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; - ORE = make_unique(&Fn); + ORE = std::make_unique(&Fn); auto *DTWP = getAnalysisIfAvailable(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *LIWP = getAnalysisIfAvailable(); @@ -524,8 +525,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (TargetRegisterInfo::isVirtualRegister(From) && - TargetRegisterInfo::isVirtualRegister(To)) + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -572,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { bool hasFI = MI->getOperand(0).isFI(); Register Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); @@ -582,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { Def->getParent()->insert(std::next(InsertPos), MI); } else LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg" - << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); + << Register::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. @@ -671,8 +671,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (TargetRegisterInfo::isVirtualRegister(From) && - TargetRegisterInfo::isVirtualRegister(To)) + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -760,7 +759,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned DestReg = cast(N->getOperand(1))->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + if (!Register::isVirtualRegister(DestReg)) continue; // Ignore non-integer values. @@ -1652,9 +1651,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // Make sure that the copy dest is not a vreg when the copy source is a // physical register. - if (!OPI2->isReg() || - (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && - TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && + Register::isPhysicalRegister(OPI2->getReg()))) return false; return true; @@ -2234,9 +2232,9 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); - unsigned Reg = + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), - *CurDAG); + CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); @@ -2248,9 +2246,9 @@ void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); - unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getOperand(2).getValueType(), - *CurDAG); + CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); @@ -3323,10 +3321,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; } - case OPC_EmitCopyToReg: { + case OPC_EmitCopyToReg: + case OPC_EmitCopyToReg2: { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + if (Opcode == OPC_EmitCopyToReg2) + DestPhysReg |= MatcherTable[MatcherIndex++] << 8; if (!InputChain.getNode()) InputChain = CurDAG->getEntryNode(); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 395e9a8a4fc5..fad98b6f50dc 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -378,7 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); -#ifndef NDEBUG // Right now we always allocate spill slots that are of the same // size as the value we're about to spill (the size of spillee can // vary since we spill vectors of pointers too). At some point we @@ -387,12 +386,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && "Bad spill: stack slot does not match!"); -#endif + // Note: Using the alignment of the spill slot (rather than the abi or + // preferred alignment) is required for correctness when dealing with spill + // slots with preferred alignments larger than frame alignment.. auto &MF = Builder.DAG.getMachineFunction(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *StoreMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - PtrInfo); + StoreMMO); MMO = getMachineMemOperand(MF, *cast(Loc)); @@ -1011,20 +1016,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - SDValue SpillSlot = - DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); + unsigned Index = *DerivedPtrLocation; + SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); // Note: We know all of these reloads are independent, but don't bother to // exploit that chain wise. DAGCombine will happily do so as needed, so // doing it here would be a small compile time win at most. SDValue Chain = getRoot(); - SDValue SpillLoad = - DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - Relocate.getType()), - getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), - *DerivedPtrLocation)); + auto &MF = DAG.getMachineFunction(); + auto &MFI = MF.getFrameInfo(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *LoadMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); + + auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()); + + SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, + SpillSlot, LoadMMO); DAG.setRoot(SpillLoad.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b260cd91d468..9ab1324533f1 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -37,7 +36,7 @@ using namespace llvm; /// NOTE: The TargetMachine owns TLOF. TargetLowering::TargetLowering(const TargetMachine &tm) - : TargetLoweringBase(tm) {} + : TargetLoweringBase(tm) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; @@ -80,7 +79,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, const CCValAssign &ArgLoc = ArgLocs[I]; if (!ArgLoc.isRegLoc()) continue; - unsigned Reg = ArgLoc.getLocReg(); + Register Reg = ArgLoc.getLocReg(); // Only look at callee saved registers. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) continue; @@ -121,19 +120,25 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, /// result of type RetVT. std::pair TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, - ArrayRef Ops, bool isSigned, - const SDLoc &dl, bool doesNotReturn, - bool isReturnValueUsed, - bool isPostTypeLegalization) const { + ArrayRef Ops, + MakeLibCallOptions CallOptions, + const SDLoc &dl) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (SDValue Op : Ops) { - Entry.Node = Op; + for (unsigned i = 0; i < Ops.size(); ++i) { + SDValue NewOp = Ops[i]; + Entry.Node = NewOp; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); - Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), + CallOptions.IsSExt); + Entry.IsZExt = !Entry.IsSExt; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) { + Entry.IsSExt = Entry.IsZExt = false; + } Args.push_back(Entry); } @@ -144,15 +149,22 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); - bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt); + bool zeroExtend = !signExtend; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) { + signExtend = zeroExtend = false; + } + CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) - .setNoReturn(doesNotReturn) - .setDiscardResult(!isReturnValueUsed) - .setIsPostTypeLegalization(isPostTypeLegalization) + .setNoReturn(CallOptions.DoesNotReturn) + .setDiscardResult(!CallOptions.IsReturnValueUsed) + .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) .setSExtResult(signExtend) - .setZExtResult(!signExtend); + .setZExtResult(zeroExtend); return LowerCallTo(CLI); } @@ -263,7 +275,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector &MemOps, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &dl) const { + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS) const { assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); @@ -365,8 +378,11 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = {NewLHS, NewRHS}; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, - dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { OldLHS.getValueType(), + OldRHS.getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); @@ -378,8 +394,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, - dl).first; + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first; NewLHS = DAG.getNode( ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), @@ -564,6 +579,170 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, AssumeSingleUse); } +// TODO: Can we merge SelectionDAG::GetDemandedBits into this? +// TODO: Under what circumstances can we create nodes? Constant folding? +SDValue TargetLowering::SimplifyMultipleUseDemandedBits( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + // Limit search depth. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return SDValue(); + + // Ignore UNDEFs. + if (Op.isUndef()) + return SDValue(); + + // Not demanding any bits/elts from Op. + if (DemandedBits == 0 || DemandedElts == 0) + return DAG.getUNDEF(Op.getValueType()); + + unsigned NumElts = DemandedElts.getBitWidth(); + KnownBits LHSKnown, RHSKnown; + switch (Op.getOpcode()) { + case ISD::BITCAST: { + SDValue Src = peekThroughBitcasts(Op.getOperand(0)); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); + unsigned NumDstEltBits = DstVT.getScalarSizeInBits(); + + if (NumSrcEltBits == NumDstEltBits) + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + + // TODO - bigendian once we have test coverage. + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumDstEltBits / NumSrcEltBits; + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != Scale; ++i) { + unsigned Offset = i * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + if (!Sub.isNullValue()) { + DemandedSrcBits |= Sub; + for (unsigned j = 0; j != NumElts; ++j) + if (DemandedElts[j]) + DemandedSrcElts.setBit((j * Scale) + i); + } + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + // TODO - bigendian once we have test coverage. + if ((NumSrcEltBits % NumDstEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / NumDstEltBits; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * NumDstEltBits; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + break; + } + case ISD::AND: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) + return Op.getOperand(1); + break; + } + case ISD::OR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::XOR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if (DemandedBits.isSubsetOf(RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::SIGN_EXTEND_INREG: { + // If none of the extended bits are demanded, eliminate the sextinreg. + EVT ExVT = cast(Op.getOperand(1))->getVT(); + if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits()) + return Op.getOperand(0); + break; + } + case ISD::INSERT_VECTOR_ELT: { + // If we don't demand the inserted element, return the base vector. + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast(Op.getOperand(2)); + EVT VecVT = Vec.getValueType(); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && + !DemandedElts[CIdx->getZExtValue()]) + return Vec; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef ShuffleMask = cast(Op)->getMask(); + + // If all the demanded elts are from one operand and are inline, + // then we can use the operand directly. + bool AllUndef = true, IdentityLHS = true, IdentityRHS = true; + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + AllUndef = false; + IdentityLHS &= (M == (int)i); + IdentityRHS &= ((M - NumElts) == i); + } + + if (AllUndef) + return DAG.getUNDEF(Op.getValueType()); + if (IdentityLHS) + return Op.getOperand(0); + if (IdentityRHS) + return Op.getOperand(1); + break; + } + default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode( + Op, DemandedBits, DemandedElts, DAG, Depth)) + return V; + break; + } + return SDValue(); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -619,12 +798,15 @@ bool TargetLowering::SimplifyDemandedBits( } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); - } else if (Depth == 6) { // Limit search depth. + } else if (Depth >= SelectionDAG::MaxRecursionDepth) { + // Limit search depth. return false; } KnownBits Known2, KnownOut; switch (Op.getOpcode()) { + case ISD::TargetConstant: + llvm_unreachable("Can't simplify this node"); case ISD::SCALAR_TO_VECTOR: { if (!DemandedElts[0]) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -728,6 +910,21 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::EXTRACT_SUBVECTOR: { + // If index isn't constant, assume we need all the source vector elements. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt SrcElts = APInt::getAllOnesValue(NumSrcElts); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + } + if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1)) + return true; + break; + } case ISD::CONCAT_VECTORS: { Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -773,22 +970,37 @@ bool TargetLowering::SimplifyDemandedBits( } if (!!DemandedLHS || !!DemandedRHS) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + Known.Zero.setAllBits(); Known.One.setAllBits(); if (!!DemandedLHS) { - if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS, - Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO, + Depth + 1)) return true; Known.One &= Known2.One; Known.Zero &= Known2.Zero; } if (!!DemandedRHS) { - if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS, - Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO, + Depth + 1)) return true; Known.One &= Known2.One; Known.Zero &= Known2.Zero; } + + // Attempt to avoid multi-use ops if we don't need anything from them. + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask); + return TLO.CombineTo(Op, NewOp); + } } break; } @@ -834,6 +1046,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) @@ -869,6 +1095,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) @@ -901,6 +1141,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if (DemandedBits.isSubsetOf(Known.Zero)) @@ -1034,7 +1288,7 @@ bool TargetLowering::SimplifyDemandedBits( // out) are never demanded. // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { - if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { if (SA2->getAPIntValue().ult(BitWidth)) { @@ -1141,7 +1395,8 @@ bool TargetLowering::SimplifyDemandedBits( if (Op0.getOpcode() == ISD::SHL) { if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { - if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { + if (!DemandedBits.intersects( + APInt::getHighBitsSet(BitWidth, ShAmt))) { if (SA2->getAPIntValue().ult(BitWidth)) { unsigned C1 = SA2->getZExtValue(); unsigned Opc = ISD::SRL; @@ -1479,6 +1734,11 @@ bool TargetLowering::SimplifyDemandedBits( return true; Known = Known.trunc(BitWidth); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc)); + // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. if (Src.getNode()->hasOneUse()) { @@ -1595,9 +1855,7 @@ bool TargetLowering::SimplifyDemandedBits( // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. // TODO - bigendian once we have test coverage. - // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support. - if (SrcVT.isVector() && NumSrcEltBits > 1 && - (BitWidth % NumSrcEltBits) == 0 && + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); @@ -1663,6 +1921,7 @@ bool TargetLowering::SimplifyDemandedBits( // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + SDNodeFlags Flags = Op.getNode()->getFlags(); unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros(); APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, @@ -1671,7 +1930,6 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { - SDNodeFlags Flags = Op.getNode()->getFlags(); if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. @@ -1684,6 +1942,23 @@ bool TargetLowering::SimplifyDemandedBits( return true; } + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); + return TLO.CombineTo(Op, NewOp); + } + } + // If we have a constant operand, we may be able to turn it into -1 if we // do not demand the high bits. This can make the constant smaller to // encode, allow more general folding, or match specialized instruction @@ -1694,10 +1969,8 @@ bool TargetLowering::SimplifyDemandedBits( if (C && !C->isAllOnesValue() && !C->isOne() && (C->getAPIntValue() | HighMask).isAllOnesValue()) { SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); - // We can't guarantee that the new math op doesn't wrap, so explicitly - // clear those flags to prevent folding with a potential existing node - // that has those flags set. - SDNodeFlags Flags; + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); @@ -1837,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } // Limit search depth. - if (Depth >= 6) + if (Depth >= SelectionDAG::MaxRecursionDepth) return false; SDLoc DL(Op); @@ -2001,6 +2274,15 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; APInt BaseElts = DemandedElts; BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + + // If none of the base operand elements are demanded, replace it with undef. + if (!BaseElts && !Base.isUndef()) + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + TLO.DAG.getUNDEF(VT), + Op.getOperand(1), + Op.getOperand(2))); + if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; @@ -2134,11 +2416,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Update legal shuffle masks based on demanded elements if it won't reduce // to Identity which can cause premature removal of the shuffle mask. - if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && - isShuffleMaskLegal(NewMask, VT)) - return TLO.CombineTo(Op, - TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), - Op.getOperand(1), NewMask)); + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) { + SDValue LegalShuffle = + buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1), + NewMask, TLO.DAG); + if (LegalShuffle) + return TLO.CombineTo(Op, LegalShuffle); + } // Propagate undef/zero elements from LHS/RHS. for (unsigned i = 0; i != NumElts; ++i) { @@ -2304,6 +2588,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.resetAll(); } +void TargetLowering::computeKnownBitsForTargetInstr( + GISelKnownBits &Analysis, Register R, KnownBits &Known, + const APInt &DemandedElts, const MachineRegisterInfo &MRI, + unsigned Depth) const { + Known.resetAll(); +} + void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, @@ -2357,6 +2648,36 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode( return false; } +SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + assert( + (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" + " is a target node!"); + return SDValue(); +} + +SDValue +TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, + SDValue N1, MutableArrayRef Mask, + SelectionDAG &DAG) const { + bool LegalMask = isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(N0, N1); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = isShuffleMaskLegal(Mask, VT); + } + + if (!LegalMask) + return SDValue(); + + return DAG.getVectorShuffle(VT, DL, N0, N1, Mask); +} + const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const { return nullptr; } @@ -2610,6 +2931,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( return T2; } +// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 +SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Valid only for [in]equality comparisons."); + + unsigned NewShiftOpcode; + SDValue X, C, Y; + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Look for '(C l>>/<< Y)'. + auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) { + // The shift should be one-use. + if (!V.hasOneUse()) + return false; + unsigned OldShiftOpcode = V.getOpcode(); + switch (OldShiftOpcode) { + case ISD::SHL: + NewShiftOpcode = ISD::SRL; + break; + case ISD::SRL: + NewShiftOpcode = ISD::SHL; + break; + default: + return false; // must be a logical shift. + } + // We should be shifting a constant. + // FIXME: best to use isConstantOrConstantVector(). + C = V.getOperand(0); + ConstantSDNode *CC = + isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + if (!CC) + return false; + Y = V.getOperand(1); + + ConstantSDNode *XC = + isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG); + }; + + // LHS of comparison should be an one-use 'and'. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + X = N0.getOperand(0); + SDValue Mask = N0.getOperand(1); + + // 'and' is commutative! + if (!Match(Mask)) { + std::swap(X, Mask); + if (!Match(Mask)) + return SDValue(); + } + + EVT VT = X.getValueType(); + + // Produce: + // ((X 'OppositeShiftOpcode' Y) & C) Cond 0 + SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y); + SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond); + return T2; +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -2726,9 +3118,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ - SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, - DAG.getConstant(1, dl, CTVT)); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); } @@ -2852,7 +3244,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, LoadSDNode *Lod = cast(N0.getOperand(0)); APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed()) { + if (Lod->isSimple() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueSizeInBits(); unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to @@ -3178,6 +3570,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 + if (C1.isNullValue()) + if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( + VT, N0, N1, Cond, DCI, dl)) + return CC; + } + // If we have "setcc X, C0", check to see if we can shrink the immediate // by changing cc. // TODO: Support this for vectors after legalize ops. @@ -3203,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Back to non-vector simplifications. // TODO: Can we do these for vector splats? if (auto *N1C = dyn_cast(N1.getNode())) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const APInt &C1 = N1C->getAPIntValue(); + EVT ShValTy = N0.getValueType(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - (VT == N0.getValueType() || - (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast(N0.getOperand(1))) { - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); + EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. - if (AndRHS->getAPIntValue().isPowerOf2()) { + unsigned ShCt = AndRHS->getAPIntValue().logBase2(); + if (AndRHS->getAPIntValue().isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 // Perform the xform if C1 is a single bit. - if (C1.isPowerOf2()) { + unsigned ShCt = C1.logBase2(); + if (C1.isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(C1.logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } } @@ -3452,15 +3854,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Fold remainder of division by a constant. - if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) && + N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. - if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) - if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) - return Folded; + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { + if (N0.getOpcode() == ISD::UREM) { + if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } else if (N0.getOpcode() == ISD::SREM) { + if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } + } } // Fold away ALL boolean setcc's. @@ -3567,15 +3975,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const { if (S == 1) { switch (Constraint[0]) { default: break; - case 'r': return C_RegisterClass; + case 'r': + return C_RegisterClass; case 'm': // memory case 'o': // offsetable case 'V': // not offsetable return C_Memory; - case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer case 'E': // Floating Point Constant case 'F': // Floating Point Constant + return C_Immediate; + case 'i': // Simple Integer or Relocatable Constant case 's': // Relocatable Constant case 'p': // Address. case 'X': // Allow ANY value. @@ -3950,6 +4360,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, /// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { + case TargetLowering::C_Immediate: case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -4069,11 +4480,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - // If this is an 'other' constraint, see if the operand is valid for it. - // For example, on X86 we might have an 'rI' constraint. If the operand - // is an integer in the range [0..31] we want to use I (saving a load - // of a register), otherwise we must use 'r'. - if (CType == TargetLowering::C_Other && Op.getNode()) { + // If this is an 'other' or 'immediate' constraint, see if the operand is + // valid for it. For example, on X86 we might have an 'rI' constraint. If + // the operand is an integer in the range [0..31] we want to use I (saving a + // load of a register), otherwise we must use 'r'. + if ((CType == TargetLowering::C_Other || + CType == TargetLowering::C_Immediate) && Op.getNode()) { assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector ResultOps; @@ -4455,6 +4867,34 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return DAG.getSelect(dl, VT, IsOne, N0, Q); } +/// If all values in Values that *don't* match the predicate are same 'splat' +/// value, then replace all values with that splat value. +/// Else, if AlternativeReplacement was provided, then replace all values that +/// do match predicate with AlternativeReplacement value. +static void +turnVectorIntoSplatVector(MutableArrayRef Values, + std::function Predicate, + SDValue AlternativeReplacement = SDValue()) { + SDValue Replacement; + // Is there a value for which the Predicate does *NOT* match? What is it? + auto SplatValue = llvm::find_if_not(Values, Predicate); + if (SplatValue != Values.end()) { + // Does Values consist only of SplatValue's and values matching Predicate? + if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) { + return Value == *SplatValue || Predicate(Value); + })) // Then we shall replace values matching predicate with SplatValue. + Replacement = *SplatValue; + } + if (!Replacement) { + // Oops, we did not find the "baseline" splat value. + if (!AlternativeReplacement) + return; // Nothing to do. + // Let's replace with provided value then. + Replacement = AlternativeReplacement; + } + std::replace_if(Values.begin(), Values.end(), Predicate, Replacement); +} + /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE /// where the divisor is constant and the comparison target is zero, /// return a DAG expression that will generate the same comparison result @@ -4482,77 +4922,409 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, DAGCombinerInfo &DCI, const SDLoc &DL, SmallVectorImpl &Created) const { // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) - // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1 + // - D must be constant, with D = D0 * 2^K where D0 is odd // - P is the multiplicative inverse of D0 modulo 2^W - // - Q = floor((2^W - 1) / D0) + // - Q = floor(((2^W) - 1) / D) // where W is the width of the common type of N and D. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons."); + SelectionDAG &DAG = DCI.DAG; + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); // If MUL is unavailable, we cannot proceed in any case. if (!isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); - // TODO: Add non-uniform constant support. - ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1)); + // TODO: Could support comparing with non-zero too. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!Divisor || !CompTarget || Divisor->isNullValue() || - !CompTarget->isNullValue()) + if (!CompTarget || !CompTarget->isNullValue()) return SDValue(); - const APInt &D = Divisor->getAPIntValue(); + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector PAmts, KAmts, QAmts; + + auto BuildUREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + const APInt &D = C->getAPIntValue(); + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + // D is even if it has trailing zeros. + HadEvenDivisor |= (K != 0); + // D is a power-of-two if D0 is one. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // Q = floor((2^W - 1) / D) + APInt Q = APInt::getAllOnesValue(W).udiv(D); + + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. + if (D.isOneValue()) { + // Set P and K amount to a bogus values so we can try to splat them. + P = 0; + K = -1; + assert(Q.isAllOnesValue() && + "Expecting all-ones comparison for one divisor"); + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); - // Decompose D into D0 * 2^K - unsigned K = D.countTrailingZeros(); - bool DivisorIsEven = (K != 0); - APInt D0 = D.lshr(K); + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) + return SDValue(); - // The fold is invalid when D0 == 1. - // This is reachable because visitSetCC happens before visitREM. - if (D0.isOneValue()) + // If this is a urem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) return SDValue(); - // P = inv(D0, 2^W) - // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + // If this is a urem by a powers-of-two, avoid the fold since it can be + // best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); - // Q = floor((2^W - 1) / D) - APInt Q = APInt::getAllOnesValue(W).udiv(D); + SDValue PVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } - SelectionDAG &DAG = DCI.DAG; + PVal = DAG.getBuildVector(VT, DL, PAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } - SDValue PVal = DAG.getConstant(P, DL, VT); - SDValue QVal = DAG.getConstant(Q, DL, VT); // (mul N, P) - SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal); - Created.push_back(Op1.getNode()); + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); - // Rotate right only if D was even. - if (DivisorIsEven) { + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { // We need ROTR to do this. if (!isOperationLegalOrCustom(ISD::ROTR, VT)) return SDValue(); - SDValue ShAmt = - DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); // UREM: (rotr (mul N, P), K) - Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags); - Created.push_back(Op1.getNode()); + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); } // UREM: (setule/setugt (rotr (mul N, P), K), Q) - return DAG.getSetCC(DL, SETCCVT, Op1, QVal, + return DAG.getSetCC(DL, SETCCVT, Op0, QVal, ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); } +/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE +/// where the divisor is constant and the comparison target is zero, +/// return a DAG expression that will generate the same comparison result +/// using only multiplications, additions and shifts/rotations. +/// Ref: "Hacker's Delight" 10-17. +SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + SmallVector Built; + if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, + DCI, DL, Built)) { + assert(Built.size() <= 7 && "Max size prediction failed."); + for (SDNode *N : Built) + DCI.AddToWorklist(N); + return Folded; + } + + return SDValue(); +} + +SDValue +TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl &Created) const { + // Fold: + // (seteq/ne (srem N, D), 0) + // To: + // (setule/ugt (rotr (add (mul N, P), A), K), Q) + // + // - D must be constant, with D = D0 * 2^K where D0 is odd + // - P is the multiplicative inverse of D0 modulo 2^W + // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k))) + // - Q = floor((2 * A) / (2^K)) + // where W is the width of the common type of N and D. + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Only applicable for (in)equality comparisons."); + + SelectionDAG &DAG = DCI.DAG; + + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + // If MUL is unavailable, we cannot proceed in any case. + if (!isOperationLegalOrCustom(ISD::MUL, VT)) + return SDValue(); + + // TODO: Could support comparing with non-zero too. + ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); + if (!CompTarget || !CompTarget->isNullValue()) + return SDValue(); + + bool HadIntMinDivisor = false; + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool NeedToApplyOffset = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector PAmts, AAmts, KAmts, QAmts; + + auto BuildSREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. + + // WARNING: this fold is only valid for positive divisors! + APInt D = C->getAPIntValue(); + if (D.isNegative()) + D.negate(); // `rem %X, -C` is equivalent to `rem %X, C` + + HadIntMinDivisor |= D.isMinSignedValue(); + + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + if (!D.isMinSignedValue()) { + // D is even if it has trailing zeros; unless it's INT_MIN, in which case + // we don't care about this lane in this fold, we'll special-handle it. + HadEvenDivisor |= (K != 0); + } + + // D is a power-of-two if D0 is one. This includes INT_MIN. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // A = floor((2^(W - 1) - 1) / D0) & -2^K + APInt A = APInt::getSignedMaxValue(W).udiv(D0); + A.clearLowBits(K); + + if (!D.isMinSignedValue()) { + // If divisor INT_MIN, then we don't care about this lane in this fold, + // we'll special-handle it. + NeedToApplyOffset |= A != 0; + } + + // Q = floor((2 * A) / (2^K)) + APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); + + assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && + "We are expecting that A is always less than all-ones for SVT"); + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. Likewise, we + // don't care about INT_MIN lanes, those can be set to undef if appropriate. + if (D.isOneValue()) { + // Set P, A and K to a bogus values so we can try to splat them. + P = 0; + A = -1; + K = -1; + + // x ?% 1 == 0 <--> true <--> x u<= -1 + Q = -1; + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + AAmts.push_back(DAG.getConstant(A, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); + + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildSREMPattern)) + return SDValue(); + + // If this is a srem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) + return SDValue(); + + // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold + // since it can be best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); + + SDValue PVal, AVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn AAmts into a splat, since we don't care about the + // values that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(AAmts, isAllOnesConstant, + DAG.getConstant(0, DL, SVT)); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } + + PVal = DAG.getBuildVector(VT, DL, PAmts); + AVal = DAG.getBuildVector(VT, DL, AAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + AVal = AAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } + + // (mul N, P) + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); + + if (NeedToApplyOffset) { + // We need ADD to do this. + if (!isOperationLegalOrCustom(ISD::ADD, VT)) + return SDValue(); + + // (add (mul N, P), A) + Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal); + Created.push_back(Op0.getNode()); + } + + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { + // We need ROTR to do this. + if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + SDNodeFlags Flags; + Flags.setExact(true); + // SREM: (rotr (add (mul N, P), A), K) + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); + } + + // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q) + SDValue Fold = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + + // If we didn't have lanes with INT_MIN divisor, then we're done. + if (!HadIntMinDivisor) + return Fold; + + // That fold is only valid for positive divisors. Which effectively means, + // it is invalid for INT_MIN divisors. So if we have such a lane, + // we must fix-up results for said lanes. + assert(VT.isVector() && "Can/should only get here for vectors."); + + if (!isOperationLegalOrCustom(ISD::SETEQ, VT) || + !isOperationLegalOrCustom(ISD::AND, VT) || + !isOperationLegalOrCustom(Cond, VT) || + !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + Created.push_back(Fold.getNode()); + + SDValue IntMin = DAG.getConstant( + APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue IntMax = DAG.getConstant( + APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue Zero = + DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); + + // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. + SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); + Created.push_back(DivisorIsIntMin.getNode()); + + // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0 + SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax); + Created.push_back(Masked.getNode()); + SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond); + Created.push_back(MaskedIsZero.getNode()); + + // To produce final result we need to blend 2 vectors: 'SetCC' and + // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick + // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is + // constant-folded, select can get lowered to a shuffle with constant mask. + SDValue Blended = + DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold); + + return Blended; +} + bool TargetLowering:: verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { if (!isa(Op.getOperand(0))) { @@ -4564,6 +5336,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } +char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return 2; + + // Don't allow anything with multiple uses unless we know it is free. + EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); + const TargetOptions &Options = DAG.getTarget().Options; + if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()))) + return 0; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return 0; + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + if (!LegalOperations) + return 1; + + // Don't invert constant FP values after legalization unless the target says + // the negated constant is legal. + return isOperationLegal(ISD::ConstantFP, VT) || + isFPImmLegal(neg(cast(Op)->getValueAPF()), VT, + ForCodeSize); + } + case ISD::BUILD_VECTOR: { + // Only permit BUILD_VECTOR of constants. + if (llvm::any_of(Op->op_values(), [&](SDValue N) { + return !N.isUndef() && !isa(N); + })) + return 0; + if (!LegalOperations) + return 1; + if (isOperationLegal(ISD::ConstantFP, VT) && + isOperationLegal(ISD::BUILD_VECTOR, VT)) + return 1; + return llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + isFPImmLegal(neg(cast(N)->getValueAPF()), VT, + ForCodeSize); + }); + } + case ISD::FADD: + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT)) + return 0; + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + + // Ignore X * 2.0 because that is expected to be canonicalized to X + X. + if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) + if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) + return 0; + + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + case ISD::FMA: + case ISD::FMAD: { + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (!V2) + return 0; + + // One of Op0/Op1 must be cheaply negatible, then select the cheapest. + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V01 = std::max(V0, V1); + return V01 ? std::max(V01, V2) : 0; + } + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + } + + return 0; +} + +SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return Op.getOperand(0); + + assert(Depth <= SelectionDAG::MaxRecursionDepth && + "getNegatedExpression doesn't match isNegatibleForFree"); + const SDNodeFlags Flags = Op->getFlags(); + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + APFloat V = cast(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); + } + case ISD::BUILD_VECTOR: { + SmallVector Ops; + for (SDValue C : Op->op_values()) { + if (C.isUndef()) { + Ops.push_back(C); + continue; + } + APFloat V = cast(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + } + return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + } + case ISD::FADD: + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(0), Flags); + case ISD::FSUB: + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = + isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) + if (N0CFP->isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0), Flags); + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode( + Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), + getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1), + Flags); + + case ISD::FMA: + case ISD::FMAD: { + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V0 >= V1) { + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + SDValue Neg0 = getNegatedExpression( + Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, + Op.getOperand(1), Neg2, Flags); + } + + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), Neg1, Neg2, Flags); + } + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1)); + } + + llvm_unreachable("Unknown code"); +} + //===----------------------------------------------------------------------===// // Legalization Utilities //===----------------------------------------------------------------------===// @@ -4862,7 +5874,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); SDLoc dl(SDValue(Node, 0)); @@ -4871,6 +5884,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, if (SrcVT != MVT::f32 || DstVT != MVT::i64) return false; + if (Node->isStrictFPOpcode()) + // When a NaN is converted to an integer a trap is allowed. We can't + // use this expansion here because it would eliminate that trap. Other + // traps are also allowed and cannot be eliminated. See + // IEEE 754-2008 sec 5.8. + return false; + // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c @@ -4924,9 +5944,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, } bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { SDLoc dl(SDValue(Node, 0)); - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -4934,7 +5956,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); // Only expand vector types if we have the appropriate vector bit operations. - if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) || + unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : + ISD::FP_TO_SINT; + if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) return false; @@ -4946,14 +5970,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { - Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + if (Node->isStrictFPOpcode()) { + Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), Src }); + Chain = Result.getValue(1); + } else + Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); return true; } SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); - bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + bool Strict = Node->isStrictFPOpcode() || + shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + if (Strict) { // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). @@ -4963,12 +5994,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // Result = fp_to_sint(Val) ^ Ofs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, - DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + SDValue SrcBiased; + if (Node->isStrictFPOpcode()) + SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, Cst }); + else + SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), DAG.getConstant(SignMask, dl, DstVT)); - Result = DAG.getNode(ISD::XOR, dl, DstVT, - DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs); + SDValue SInt; + if (Node->isStrictFPOpcode()) { + SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { SrcBiased.getValue(1), Val }); + Chain = SInt.getValue(1); + } else + SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) @@ -5918,7 +6960,8 @@ SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::UMULFIX || - Node->getOpcode() == ISD::SMULFIXSAT) && + Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT) && "Expected a fixed point multiplication opcode"); SDLoc dl(Node); @@ -5926,15 +6969,19 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); unsigned Scale = Node->getConstantOperandVal(2); - bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (Node->getOpcode() == ISD::SMULFIX || + Node->getOpcode() == ISD::SMULFIXSAT); EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); unsigned VTSize = VT.getScalarSizeInBits(); if (!Scale) { // [us]mul.fix(a, b, 0) -> mul(a, b) - if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) { - return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); - } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) { + if (!Saturating) { + if (isOperationLegalOrCustom(ISD::MUL, VT)) + return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) { SDValue Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue Product = Result.getValue(0); @@ -5948,11 +6995,18 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); return DAG.getSelect(dl, VT, Overflow, Result, Product); + } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) { + SDValue Result = + DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMax, Product); } } - bool Signed = - Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT; assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) && "Expected scale to be less than the number of bits if signed or at " "most the number of bits if unsigned."); @@ -5978,7 +7032,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (Scale == VTSize) // Result is just the top half since we'd be shifting by the width of the - // operand. + // operand. Overflow impossible so this works for both UMULFIX and + // UMULFIXSAT. return Hi; // The result will need to be shifted right by the scale since both operands @@ -5990,20 +7045,55 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (!Saturating) return Result; - unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign - SDValue HiMask = - DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT); - SDValue LoMask = DAG.getConstant( - APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT); - APInt MaxVal = APInt::getSignedMaxValue(VTSize); - APInt MinVal = APInt::getSignedMinValue(VTSize); - - Result = DAG.getSelectCC(dl, Hi, LoMask, - DAG.getConstant(MaxVal, dl, VT), Result, - ISD::SETGT); - return DAG.getSelectCC(dl, Hi, HiMask, - DAG.getConstant(MinVal, dl, VT), Result, - ISD::SETLT); + if (!Signed) { + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the + // widened multiplication) aren't all zeroes. + + // Saturate to max if ((Hi >> Scale) != 0), + // which is the same as if (Hi > ((1 << Scale) - 1)) + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, + DAG.getConstant(MaxVal, dl, VT), Result, + ISD::SETUGT); + + return Result; + } + + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the + // widened multiplication) aren't all ones or all zeroes. + + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT); + + if (Scale == 0) { + SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo, + DAG.getConstant(VTSize - 1, dl, ShiftTy)); + SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE); + // Saturated to SatMin if wide product is negative, and SatMax if wide + // product is positive ... + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax, + ISD::SETLT); + // ... but only if we overflowed. + return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result); + } + + // We handled Scale==0 above so all the bits to examine is in Hi. + + // Saturate to max if ((Hi >> (Scale - 1)) > 0), + // which is the same as if (Hi > (1 << (Scale - 1)) - 1) + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT); + // Saturate to min if (Hi >> (Scale - 1)) < -1), + // which is the same as if (HI < (-1 << (Scale - 1)) + SDValue HighMask = + DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT); + return Result; } void TargetLowering::expandUADDSUBO( @@ -6060,24 +7150,19 @@ void TargetLowering::expandSADDSUBO( SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Result >= 0 - // - // Add: - // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) - // Sub: - // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); - SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, - IsAdd ? ISD::SETEQ : ISD::SETNE); - - SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE); - SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); - - SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType); + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT); + SDValue ConditionRHS = + DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT); + + Overflow = DAG.getBoolExtOrTrunc( + DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl, + ResultType, ResultType); } bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, @@ -6176,20 +7261,19 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, // being a legal type for the architecture and thus has to be split to // two arguments. SDValue Ret; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + CallOptions.setIsPostTypeLegalization(true); if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { // Halves of WideVT are packed into registers in different order // depending on platform endianness. This is usually handled by // the C calling convention, but we can't defer to it in // the legalizer. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, - /* isPostTypeLegalization */ true).first; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; } else { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, - /* isPostTypeLegalization */ true).first; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; } assert(Ret.getOpcode() == ISD::MERGE_VALUES && "Ret value is a collection of constituent nodes holding result."); diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 2db0ea570598..412a00095b9b 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -278,11 +278,10 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, // Ignore instructions like DBG_VALUE which don't read/def the register. if (!MO.isDef() && !MO.readsReg()) continue; - unsigned PhysReg = MO.getReg(); + Register PhysReg = MO.getReg(); if (!PhysReg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && - "Unallocated register?!"); + assert(Register::isPhysicalRegister(PhysReg) && "Unallocated register?!"); // The stack pointer is not normally described as a callee-saved register // in calling convention definitions, so we need to watch for it // separately. An SP mentioned by a call instruction, we can ignore, diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 23e5ce0acae8..db520d4e6403 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -477,7 +477,10 @@ bool SjLjEHPrepare::runOnFunction(Function &F) { UnregisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), PointerType::getUnqual(FunctionContextTy)); - FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + FrameAddrFn = Intrinsic::getDeclaration( + &M, Intrinsic::frameaddress, + {Type::getInt8PtrTy(M.getContext(), + M.getDataLayout().getAllocaAddrSpace())}); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetupDispatchFn = diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 5c944fe3f6b3..0c1f1220c421 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "SplitKit.h" -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" @@ -22,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -437,7 +437,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) { assert(DefMI != nullptr); LaneBitmask LM; for (const MachineOperand &DefOp : DefMI->defs()) { - unsigned R = DefOp.getReg(); + Register R = DefOp.getReg(); if (R != LI.reg) continue; if (unsigned SR = DefOp.getSubReg()) @@ -1373,7 +1373,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { assert(LI.hasSubRanges()); LiveRangeCalc SubLRC; - unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg(); + Register Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg(); LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub) : MRI.getMaxLaneMaskForVReg(Reg); for (LiveInterval::SubRange &S : LI.subranges()) { diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 86ad3811e3ad..78f0bbd24db5 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -14,7 +14,6 @@ #ifndef LLVM_LIB_CODEGEN_SPLITKIT_H #define LLVM_LIB_CODEGEN_SPLITKIT_H -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -25,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SlotIndexes.h" diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index ae9401b89700..383c91259ffc 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -113,7 +113,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, unsigned Size = DL.getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; - unsigned Reg = (++MOI)->getReg(); + Register Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); Locs.emplace_back(StackMaps::Location::Direct, Size, getDwarfRegNum(Reg, TRI), Imm); @@ -122,7 +122,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, case StackMaps::IndirectMemRefOp: { int64_t Size = (++MOI)->getImm(); assert(Size > 0 && "Need a valid size for indirect memory locations."); - unsigned Reg = (++MOI)->getReg(); + Register Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); Locs.emplace_back(StackMaps::Location::Indirect, Size, getDwarfRegNum(Reg, TRI), Imm); @@ -148,14 +148,14 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, if (MOI->isImplicit()) return ++MOI; - assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && + assert(Register::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); unsigned Offset = 0; unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI); - unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false); + unsigned LLVMRegNum = *TRI->getLLVMRegNum(DwarfRegNum, false); unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg()); if (SubRegIdx) Offset = TRI->getSubRegIdxOffset(SubRegIdx); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 809960c7fdf9..5683d1db473c 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" @@ -157,6 +156,68 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, return NeedsProtector; } +bool StackProtector::HasAddressTaken(const Instruction *AI) { + for (const User *U : AI->users()) { + const auto *I = cast(U); + switch (I->getOpcode()) { + case Instruction::Store: + if (AI == cast(I)->getValueOperand()) + return true; + break; + case Instruction::AtomicCmpXchg: + // cmpxchg conceptually includes both a load and store from the same + // location. So, like store, the value being stored is what matters. + if (AI == cast(I)->getNewValOperand()) + return true; + break; + case Instruction::PtrToInt: + if (AI == cast(I)->getOperand(0)) + return true; + break; + case Instruction::Call: { + // Ignore intrinsics that do not become real instructions. + // TODO: Narrow this to intrinsics that have store-like effects. + const auto *CI = cast(I); + if (!isa(CI) && !CI->isLifetimeStartOrEnd()) + return true; + break; + } + case Instruction::Invoke: + return true; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: + case Instruction::AddrSpaceCast: + if (HasAddressTaken(I)) + return true; + break; + case Instruction::PHI: { + // Keep track of what PHI nodes we have already visited to ensure + // they are only visited once. + const auto *PN = cast(I); + if (VisitedPHIs.insert(PN).second) + if (HasAddressTaken(PN)) + return true; + break; + } + case Instruction::Load: + case Instruction::AtomicRMW: + case Instruction::Ret: + // These instructions take an address operand, but have load-like or + // other innocuous behavior that should not trigger a stack protector. + // atomicrmw conceptually has both load and store semantics, but the + // value being stored must be integer; so if a pointer is being stored, + // we'll catch it in the PtrToInt case above. + break; + default: + // Conservatively return true for any instruction that takes an address + // operand, but is not handled above. + return true; + } + } + return false; +} + /// Search for the first call to the llvm.stackprotector intrinsic and return it /// if present. static const CallInst *findStackProtectorIntrinsic(Function &F) { @@ -264,9 +325,7 @@ bool StackProtector::RequiresStackProtector() { continue; } - if (Strong && PointerMayBeCaptured(AI, - /* ReturnCaptures */ false, - /* StoreCaptures */ true)) { + if (Strong && HasAddressTaken(AI)) { ++NumAddrTaken; Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf)); ORE.emit([&]() { diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 99b533e10b87..9c8143c55dc2 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -221,7 +221,7 @@ void StackSlotColoring::InitializeSlots() { for (auto *I : Intervals) { LiveInterval &li = I->second; LLVM_DEBUG(li.dump()); - int FI = TargetRegisterInfo::stackSlot2Index(li.reg); + int FI = Register::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) continue; @@ -268,7 +268,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { int StackSlotColoring::ColorSlot(LiveInterval *li) { int Color = -1; bool Share = false; - int FI = TargetRegisterInfo::stackSlot2Index(li->reg); + int FI = Register::stackSlot2Index(li->reg); uint8_t StackID = MFI->getStackID(FI); if (!DisableSharing) { @@ -330,7 +330,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + int SS = Register::stackSlot2Index(li->reg); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); SlotMapping[SS] = NewSS; @@ -343,7 +343,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + int SS = Register::stackSlot2Index(li->reg); li->weight = SlotWeights[SS]; } // Sort them by new weight. diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp index 96821cadb1b6..c72a04276a4f 100644 --- a/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -13,9 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SwiftErrorValueTracking.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Value.h" diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index a0590a8a6cc6..03c68a37e459 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -235,8 +235,8 @@ bool TailDuplicator::tailDuplicateAndUpdate( MachineInstr *Copy = Copies[i]; if (!Copy->isCopy()) continue; - unsigned Dst = Copy->getOperand(0).getReg(); - unsigned Src = Copy->getOperand(1).getReg(); + Register Dst = Copy->getOperand(0).getReg(); + Register Src = Copy->getOperand(1).getReg(); if (MRI->hasOneNonDBGUse(Src) && MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { // Copy is the only use. Do trivial copy propagation here. @@ -312,7 +312,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB, if (!MI.isPHI()) break; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { - unsigned SrcReg = MI.getOperand(i).getReg(); + Register SrcReg = MI.getOperand(i).getReg(); UsedByPhi->insert(SrcReg); } } @@ -340,17 +340,17 @@ void TailDuplicator::processPHI( DenseMap &LocalVRMap, SmallVectorImpl> &Copies, const DenseSet &RegsUsedByPhi, bool Remove) { - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); - unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + Register SrcReg = MI->getOperand(SrcOpIdx).getReg(); unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); const TargetRegisterClass *RC = MRI->getRegClass(DefReg); LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); // Insert a copy from source to the end of the block. The def register is the // available value liveout of the block. - unsigned NewDef = MRI->createVirtualRegister(RC); + Register NewDef = MRI->createVirtualRegister(RC); Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) addSSAUpdateEntry(DefReg, NewDef, PredBB); @@ -384,12 +384,12 @@ void TailDuplicator::duplicateInstruction( MachineOperand &MO = NewMI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isDef()) { const TargetRegisterClass *RC = MRI->getRegClass(Reg); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); MO.setReg(NewReg); LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) @@ -433,7 +433,7 @@ void TailDuplicator::duplicateInstruction( auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); if (NewRC == nullptr) NewRC = OrigRC; - unsigned NewReg = MRI->createVirtualRegister(NewRC); + Register NewReg = MRI->createVirtualRegister(NewRC); BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewReg) .addReg(VI->second.Reg, 0, VI->second.SubReg); @@ -477,7 +477,7 @@ void TailDuplicator::updateSuccessorsPHIs( assert(Idx != 0); MachineOperand &MO0 = MI.getOperand(Idx); - unsigned Reg = MO0.getReg(); + Register Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 9c4483cb240d..9eeacc2584cb 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" @@ -71,7 +72,9 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, // When interprocedural register allocation is enabled caller saved registers // are preferred over callee saved registers. - if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction())) + if (MF.getTarget().Options.EnableIPRA && + isSafeForNoCSROpt(MF.getFunction()) && + isProfitableForNoCSROpt(MF.getFunction())) return; // Get the callee saved register list... @@ -118,6 +121,18 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( return 0; } +bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) { + if (!F.hasLocalLinkage() || F.hasAddressTaken() || + !F.hasFnAttribute(Attribute::NoRecurse)) + return false; + // Function should not be optimized as tail call. + for (const User *U : F.users()) + if (auto CS = ImmutableCallSite(U)) + if (CS.isTailCall()) + return false; + return true; +} + int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { llvm_unreachable("getInitialCFAOffset() not implemented!"); } @@ -125,4 +140,4 @@ int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) const { llvm_unreachable("getInitialCFARegister() not implemented!"); -} \ No newline at end of file +} diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 868617ffe14d..6cae3b869501 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -142,7 +143,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, while (Tail != MBB->end()) { auto MI = Tail++; if (MI->isCall()) - MBB->getParent()->updateCallSiteInfo(&*MI); + MBB->getParent()->eraseCallSiteInfo(&*MI); MBB->erase(MI); } @@ -183,10 +184,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead(); // Avoid calling isRenamable for virtual registers since we assert that // renamable property is only queried/set for physical registers. - bool Reg1IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg1) + bool Reg1IsRenamable = Register::isPhysicalRegister(Reg1) ? MI.getOperand(Idx1).isRenamable() : false; - bool Reg2IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg2) + bool Reg2IsRenamable = Register::isPhysicalRegister(Reg2) ? MI.getOperand(Idx2).isRenamable() : false; // If destination is tied to either of the commuted source register, then @@ -228,9 +229,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); // Avoid calling setIsRenamable for virtual registers since we assert that // renamable property is only queried/set for physical registers. - if (TargetRegisterInfo::isPhysicalRegister(Reg1)) + if (Register::isPhysicalRegister(Reg1)) CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable); - if (TargetRegisterInfo::isPhysicalRegister(Reg2)) + if (Register::isPhysicalRegister(Reg2)) CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable); return CommutedMI; } @@ -281,7 +282,7 @@ bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, return true; } -bool TargetInstrInfo::findCommutedOpIndices(MachineInstr &MI, +bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { assert(!MI.isBundle() && @@ -393,7 +394,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, if (BitOffset < 0 || BitOffset % 8) return false; - Size = BitSize /= 8; + Size = BitSize / 8; Offset = (unsigned)BitOffset / 8; assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); @@ -442,16 +443,15 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, if (FoldOp.getSubReg() || LiveOp.getSubReg()) return nullptr; - unsigned FoldReg = FoldOp.getReg(); - unsigned LiveReg = LiveOp.getReg(); + Register FoldReg = FoldOp.getReg(); + Register LiveReg = LiveOp.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && - "Cannot fold physregs"); + assert(Register::isVirtualRegister(FoldReg) && "Cannot fold physregs"); const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); - if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + if (Register::isPhysicalRegister(LiveOp.getReg())) return RC->contains(LiveOp.getReg()) ? RC : nullptr; if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) @@ -674,9 +674,9 @@ bool TargetInstrInfo::hasReassociableOperands( // reassociate. MachineInstr *MI1 = nullptr; MachineInstr *MI2 = nullptr; - if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) + if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) MI1 = MRI.getUniqueVRegDef(Op1.getReg()); - if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) + if (Op2.isReg() && Register::isVirtualRegister(Op2.getReg())) MI2 = MRI.getUniqueVRegDef(Op2.getReg()); // And they need to be in the trace (otherwise, they won't have a depth). @@ -805,27 +805,27 @@ void TargetInstrInfo::reassociateOps( MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); MachineOperand &OpC = Root.getOperand(0); - unsigned RegA = OpA.getReg(); - unsigned RegB = OpB.getReg(); - unsigned RegX = OpX.getReg(); - unsigned RegY = OpY.getReg(); - unsigned RegC = OpC.getReg(); + Register RegA = OpA.getReg(); + Register RegB = OpB.getReg(); + Register RegX = OpX.getReg(); + Register RegY = OpY.getReg(); + Register RegC = OpC.getReg(); - if (TargetRegisterInfo::isVirtualRegister(RegA)) + if (Register::isVirtualRegister(RegA)) MRI.constrainRegClass(RegA, RC); - if (TargetRegisterInfo::isVirtualRegister(RegB)) + if (Register::isVirtualRegister(RegB)) MRI.constrainRegClass(RegB, RC); - if (TargetRegisterInfo::isVirtualRegister(RegX)) + if (Register::isVirtualRegister(RegX)) MRI.constrainRegClass(RegX, RC); - if (TargetRegisterInfo::isVirtualRegister(RegY)) + if (Register::isVirtualRegister(RegY)) MRI.constrainRegClass(RegY, RC); - if (TargetRegisterInfo::isVirtualRegister(RegC)) + if (Register::isVirtualRegister(RegC)) MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of // recycling RegB because the MachineCombiner's computation of the critical // path requires a new register definition rather than an existing one. - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); unsigned Opcode = Root.getOpcode(); @@ -880,21 +880,21 @@ void TargetInstrInfo::genAlternativeCodeSequence( } bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( - const MachineInstr &MI, AliasAnalysis *AA) const { + const MachineInstr &MI, AAResults *AA) const { const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Remat clients assume operand 0 is the defined register. if (!MI.getNumOperands() || !MI.getOperand(0).isReg()) return false; - unsigned DefReg = MI.getOperand(0).getReg(); + Register DefReg = MI.getOperand(0).getReg(); // A sub-register definition can only be rematerialized if the instruction // doesn't read the other parts of the register. Otherwise it is really a // read-modify-write operation on the full virtual register which cannot be // moved safely. - if (TargetRegisterInfo::isVirtualRegister(DefReg) && - MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg)) + if (Register::isVirtualRegister(DefReg) && MI.getOperand(0).getSubReg() && + MI.readsVirtualRegister(DefReg)) return false; // A load from a fixed stack slot can be rematerialized. This may be @@ -924,12 +924,12 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Check for a well-behaved physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -1120,6 +1120,24 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, return (DefCycle != -1 && DefCycle <= 1); } +Optional +TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getMF(); + const MachineOperand *Op = nullptr; + DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});; + const MachineOperand *SrcRegOp, *DestRegOp; + + if (isCopyInstr(MI, SrcRegOp, DestRegOp)) { + Op = SrcRegOp; + return ParamLoadedValue(*Op, Expr); + } else if (MI.isMoveImmediate()) { + Op = &MI.getOperand(1); + return ParamLoadedValue(*Op, Expr); + } + + return None; +} + /// Both DefMI and UseMI must be valid. By default, call directly to the /// itinerary. This may be overriden by the target. int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, @@ -1227,3 +1245,5 @@ bool TargetInstrInfo::getInsertSubregInputs( InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm(); return true; } + +TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 9b28c1a6c450..9b23012f47e3 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -167,6 +167,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::BZERO, "__bzero"); break; case Triple::aarch64: + case Triple::aarch64_32: setLibcallName(RTLIB::BZERO, "bzero"); break; default: @@ -197,6 +198,11 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl"); } + if (TT.isPS4CPU()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + } + if (TT.isOSOpenBSD()) { setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr); } @@ -578,13 +584,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::ILP; - JumpBufSize = 0; - JumpBufAlignment = 0; - MinFunctionAlignment = 0; - PrefFunctionAlignment = 0; - PrefLoopAlignment = 0; GatherAllAliasesMaxDepth = 18; - MinStackArgumentAlignment = 1; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary. MaxAtomicSizeInBitsSupported = 1024; @@ -653,6 +653,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULFIX, VT, Expand); setOperationAction(ISD::SMULFIXSAT, VT, Expand); setOperationAction(ISD::UMULFIX, VT, Expand); + setOperationAction(ISD::UMULFIXSAT, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); @@ -689,6 +690,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::SPLAT_VECTOR, VT, Expand); } // Constrained floating-point operations default to expand. @@ -708,16 +710,22 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::STRICT_FLOG, VT, Expand); setOperationAction(ISD::STRICT_FLOG10, VT, Expand); setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_LRINT, VT, Expand); + setOperationAction(ISD::STRICT_LLRINT, VT, Expand); setOperationAction(ISD::STRICT_FRINT, VT, Expand); setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); setOperationAction(ISD::STRICT_FCEIL, VT, Expand); setOperationAction(ISD::STRICT_FFLOOR, VT, Expand); + setOperationAction(ISD::STRICT_LROUND, VT, Expand); + setOperationAction(ISD::STRICT_LLROUND, VT, Expand); setOperationAction(ISD::STRICT_FROUND, VT, Expand); setOperationAction(ISD::STRICT_FTRUNC, VT, Expand); setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand); setOperationAction(ISD::STRICT_FMINNUM, VT, Expand); setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand); + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand); + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand); // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); @@ -824,7 +832,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); assert((LA == TypeLegal || LA == TypeSoftenFloat || - ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) && + (NVT.isVector() || + ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && "Promote may not follow Expand or Promote"); if (LA == TypeSplitVector) @@ -1257,17 +1266,23 @@ void TargetLoweringBase::computeRegisterProperties( MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); bool IsLegalWiderType = false; + bool IsScalable = VT.isScalableVector(); LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); switch (PreferredAction) { - case TypePromoteInteger: + case TypePromoteInteger: { + MVT::SimpleValueType EndVT = IsScalable ? + MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE : + MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; // Try to promote the elements of integer vectors. If no legal // promotion was found, fall through to the widen-vector method. - for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) { + for (unsigned nVT = i + 1; + (MVT::SimpleValueType)nVT <= EndVT; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() && - SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) { + SVT.getVectorNumElements() == NElts && + SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1279,23 +1294,37 @@ void TargetLoweringBase::computeRegisterProperties( if (IsLegalWiderType) break; LLVM_FALLTHROUGH; + } case TypeWidenVector: - // Try to widen the vector. - for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType) nVT; - if (SVT.getVectorElementType() == EltVT - && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; + if (isPowerOf2_32(NElts)) { + // Try to widen the vector. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts + && SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) + break; + } else { + // Only widen to the next power of 2 to keep consistency with EVT. + MVT NVT = VT.getPow2VectorType(); + if (isTypeLegal(NVT)) { + TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); - IsLegalWiderType = true; + RegisterTypeForVT[i] = NVT; + NumRegistersForVT[i] = 1; break; } } - if (IsLegalWiderType) - break; LLVM_FALLTHROUGH; case TypeSplitVector: @@ -1488,12 +1517,9 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, return DL.getABITypeAlignment(Ty); } -bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, - const DataLayout &DL, EVT VT, - unsigned AddrSpace, - unsigned Alignment, - MachineMemOperand::Flags Flags, - bool *Fast) const { +bool TargetLoweringBase::allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, + unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { // Check if the specified alignment is sufficient based on the data layout. // TODO: While using the data layout works in practice, a better solution // would be to implement this check directly (make this a virtual function). @@ -1511,6 +1537,21 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); } +bool TargetLoweringBase::allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, bool *Fast) const { + return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(), + MMO.getAlignment(), MMO.getFlags(), + Fast); +} + +bool TargetLoweringBase::allowsMemoryAccess( + LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, + unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { + return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, + Flags, Fast); +} + bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 4c8f75b237aa..4978f4b9500b 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -43,6 +43,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionWasm.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" @@ -154,6 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, break; case Triple::aarch64: case Triple::aarch64_be: + case Triple::aarch64_32: // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. @@ -375,7 +377,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0)); + Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0).value()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); @@ -524,8 +526,8 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO, if (!VM) report_fatal_error("MD_associated operand is not ValueAsMetadata"); - GlobalObject *OtherGO = dyn_cast(VM->getValue()); - return OtherGO ? dyn_cast(TM.getSymbol(OtherGO)) : nullptr; + auto *OtherGV = dyn_cast(VM->getValue()); + return OtherGV ? dyn_cast(TM.getSymbol(OtherGV)) : nullptr; } static unsigned getEntrySizeForKind(SectionKind Kind) { @@ -566,6 +568,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( SectionName = Attrs.getAttribute("bss-section").getValueAsString(); } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) { SectionName = Attrs.getAttribute("rodata-section").getValueAsString(); + } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) { + SectionName = Attrs.getAttribute("relro-section").getValueAsString(); } else if (Attrs.hasAttribute("data-section") && Kind.isData()) { SectionName = Attrs.getAttribute("data-section").getValueAsString(); } @@ -1107,8 +1111,8 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( } const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( - const MCSymbol *Sym, const MCValue &MV, int64_t Offset, - MachineModuleInfo *MMI, MCStreamer &Streamer) const { + const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV, + int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation // as 64-bit do, we replace the GOT equivalent by accessing the final symbol // through a non_lazy_ptr stub instead. One advantage is that it allows the @@ -1165,12 +1169,10 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); - if (!StubSym.getPointer()) { - bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal(); - // With the assumption that IsIndirectLocal == GV->hasLocalLinkage(). + + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl::StubValueTy(const_cast(Sym), - !IsIndirectLocal); - } + !GV->hasLocalLinkage()); const MCExpr *BSymExpr = MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); @@ -1519,7 +1521,8 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, // internally, so we use ".CRT$XCA00001" for them. SmallString<24> Name; raw_svector_ostream OS(Name); - OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); + OS << ".CRT$X" << (IsCtor ? "C" : "T") << + (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); MCSectionCOFF *Sec = Ctx.getCOFFSection( Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); @@ -1595,7 +1598,8 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( static std::string APIntToHexString(const APInt &AI) { unsigned Width = (AI.getBitWidth() / 8) * 2; - std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true); + std::string HexString = AI.toString(16, /*Signed=*/false); + transform(HexString.begin(), HexString.end(), HexString.begin(), tolower); unsigned Size = HexString.size(); assert(Width >= Size && "hex string is too large!"); HexString.insert(HexString.begin(), Width - Size, '0'); @@ -1819,3 +1823,82 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( llvm_unreachable("@llvm.global_dtors should have been lowered already"); return nullptr; } + +//===----------------------------------------------------------------------===// +// XCOFF +//===----------------------------------------------------------------------===// +MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + report_fatal_error("XCOFF explicit sections not yet implemented."); +} + +MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + assert(!TM.getFunctionSections() && !TM.getDataSections() && + "XCOFF unique sections not yet implemented."); + + // Common symbols go into a csect with matching name which will get mapped + // into the .bss section. + if (Kind.isBSSLocal() || Kind.isCommon()) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); + return getContext().getXCOFFSection( + Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM, + SC, Kind, /* BeginSymbolName */ nullptr); + } + + if (Kind.isText()) + return TextSection; + + if (Kind.isData()) + return DataSection; + + report_fatal_error("XCOFF other section types not yet implemented."); +} + +bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection( + bool UsesLabelDifference, const Function &F) const { + report_fatal_error("TLOF XCOFF not yet implemented."); +} + +void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx, + const TargetMachine &TgtM) { + TargetLoweringObjectFile::Initialize(Ctx, TgtM); + TTypeEncoding = 0; + PersonalityEncoding = 0; + LSDAEncoding = 0; +} + +MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + report_fatal_error("XCOFF ctor section not yet implemented."); +} + +MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + report_fatal_error("XCOFF dtor section not yet implemented."); +} + +const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference( + const GlobalValue *LHS, const GlobalValue *RHS, + const TargetMachine &TM) const { + report_fatal_error("XCOFF not yet implemented."); +} + +XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal( + const GlobalObject *GO) { + switch (GO->getLinkage()) { + case GlobalValue::InternalLinkage: + return XCOFF::C_HIDEXT; + case GlobalValue::ExternalLinkage: + case GlobalValue::CommonLinkage: + return XCOFF::C_EXT; + case GlobalValue::ExternalWeakLinkage: + return XCOFF::C_WEAKEXT; + default: + report_fatal_error( + "Unhandled linkage when mapping linkage to StorageClass."); + } +} diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 36df02692f86..f1f4f65adf7c 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -49,9 +49,10 @@ using namespace llvm; -cl::opt EnableIPRA("enable-ipra", cl::init(false), cl::Hidden, - cl::desc("Enable interprocedural register allocation " - "to reduce load/store at procedure calls.")); +static cl::opt + EnableIPRA("enable-ipra", cl::init(false), cl::Hidden, + cl::desc("Enable interprocedural register allocation " + "to reduce load/store at procedure calls.")); static cl::opt DisablePostRASched("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc Scheduler")); static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -152,8 +153,10 @@ static cl::opt EnableGlobalISelAbort( // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). // Targets can return true in targetSchedulesPostRAScheduling() and // insert a PostRA scheduling pass wherever it wants. -cl::opt MISchedPostRA("misched-postra", cl::Hidden, - cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); +static cl::opt MISchedPostRA( + "misched-postra", cl::Hidden, + cl::desc( + "Run MachineScheduler post regalloc (independent of preRA sched)")); // Experimental option to run live interval analysis early. static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, @@ -175,10 +178,10 @@ static cl::opt UseCFLAA( /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. -const char *StartAfterOptName = "start-after"; -const char *StartBeforeOptName = "start-before"; -const char *StopAfterOptName = "stop-after"; -const char *StopBeforeOptName = "stop-before"; +static const char *StartAfterOptName = "start-after"; +static const char *StartBeforeOptName = "start-before"; +static const char *StopAfterOptName = "stop-after"; +static const char *StopBeforeOptName = "stop-before"; static cl::opt StartAfterOpt(StringRef(StartAfterOptName), @@ -654,6 +657,7 @@ void TargetPassConfig::addIRPasses() { // TODO: add a pass insertion point here addPass(createGCLoweringPass()); addPass(createShadowStackGCLoweringPass()); + addPass(createLowerConstantIntrinsicsPass()); // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); @@ -1231,5 +1235,5 @@ bool TargetPassConfig::isGISelCSEEnabled() const { } std::unique_ptr TargetPassConfig::getCSEConfig() const { - return make_unique(); + return std::make_unique(); } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index f1b2ecf3243b..e5592c31098a 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -86,22 +86,21 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, namespace llvm { -Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI, +Printable printReg(Register Reg, const TargetRegisterInfo *TRI, unsigned SubIdx, const MachineRegisterInfo *MRI) { return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) { if (!Reg) OS << "$noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + else if (Register::isStackSlot(Reg)) + OS << "SS#" << Register::stackSlot2Index(Reg); + else if (Register::isVirtualRegister(Reg)) { StringRef Name = MRI ? MRI->getVRegName(Reg) : ""; if (Name != "") { OS << '%' << Name; } else { - OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + OS << '%' << Register::virtReg2Index(Reg); } - } - else if (!TRI) + } else if (!TRI) OS << '$' << "physreg" << Reg; else if (Reg < TRI->getNumRegs()) { OS << '$'; @@ -143,8 +142,8 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << '%' << TargetRegisterInfo::virtReg2Index(Unit); + if (Register::isVirtualRegister(Unit)) { + OS << '%' << Register::virtReg2Index(Unit); } else { OS << printRegUnit(Unit, TRI); } @@ -189,7 +188,8 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { /// the right type that contains this physreg. const TargetRegisterClass * TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { - assert(isPhysicalRegister(reg) && "reg must be a physical register"); + assert(Register::isPhysicalRegister(reg) && + "reg must be a physical register"); // Pick the most sub register class of the right type that contains // this physreg. @@ -238,24 +238,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, - const TargetRegisterInfo *TRI, - const MVT::SimpleValueType SVT = - MVT::SimpleValueType::Any) { - const MVT VT(SVT); + const TargetRegisterInfo *TRI) { for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) - if (unsigned Common = *A++ & *B++) { - const TargetRegisterClass *RC = - TRI->getRegClass(I + countTrailingZeros(Common)); - if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT)) - return RC; - } + if (unsigned Common = *A++ & *B++) + return TRI->getRegClass(I + countTrailingZeros(Common)); return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - const MVT::SimpleValueType SVT) const { + const TargetRegisterClass *B) const { // First take care of the trivial cases. if (A == B) return A; @@ -264,7 +256,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); } const TargetRegisterClass * @@ -409,7 +401,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, // Target-independent hints are either a physical or a virtual register. unsigned Phys = Reg; - if (VRM && isVirtualRegister(Phys)) + if (VRM && Register::isVirtualRegister(Phys)) Phys = VRM->getPhys(Phys); // Don't add the same reg twice (Hints_MRI may contain multiple virtual @@ -417,7 +409,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (!HintedRegs.insert(Phys).second) continue; // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) + if (!Register::isPhysicalRegister(Phys)) continue; if (MRI.isReserved(Phys)) continue; @@ -433,6 +425,20 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, return false; } +bool TargetRegisterInfo::isCalleeSavedPhysReg( + unsigned PhysReg, const MachineFunction &MF) const { + if (PhysReg == 0) + return false; + const uint32_t *callerPreservedRegs = + getCallPreservedMask(MF, MF.getFunction().getCallingConv()); + if (callerPreservedRegs) { + assert(Register::isPhysicalRegister(PhysReg) && + "Expected physical register"); + return (callerPreservedRegs[PhysReg / 32] >> PhysReg % 32) & 1; + } + return false; +} + bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { return !MF.getFunction().hasFnAttribute("no-realign-stack"); } @@ -466,7 +472,7 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI) const { const TargetRegisterClass *RC{}; - if (isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. @@ -501,7 +507,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, CopySrcReg = MI->getOperand(2).getReg(); } - if (!isVirtualRegister(CopySrcReg)) + if (!Register::isVirtualRegister(CopySrcReg)) return CopySrcReg; SrcReg = CopySrcReg; diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 195279719ad4..ce59452fd1b8 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -300,7 +300,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, // TODO: The following hack exists because predication passes do not // correctly append imp-use operands, and readsReg() strangely returns false // for predicated defs. - unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); + Register Reg = DefMI->getOperand(DefOperIdx).getReg(); const MachineFunction &MF = *DefMI->getMF(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI)) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 43d876646967..ea971809d4e4 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -230,7 +230,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isUse() && MOReg != SavedReg) @@ -299,7 +299,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, MachineOperand &MO = OtherMI.getOperand(i); if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (DefReg == MOReg) @@ -418,8 +418,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, } else return false; - IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + IsSrcPhys = Register::isPhysicalRegister(SrcReg); + IsDstPhys = Register::isPhysicalRegister(DstReg); return true; } @@ -427,8 +427,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, /// given instruction, is killed by the given instruction. static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS) { - if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && - !LIS->isNotInMIMap(*MI)) { + if (LIS && Register::isVirtualRegister(Reg) && !LIS->isNotInMIMap(*MI)) { // FIXME: Sometimes tryInstructionTransform() will add instructions and // test whether they can be folded before keeping them. In this case it // sets a kill before recursively calling tryInstructionTransform() again. @@ -475,12 +474,12 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, MachineInstr *DefMI = &MI; while (true) { // All uses of physical registers are likely to be kills. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && + if (Register::isPhysicalRegister(Reg) && (allowFalsePositives || MRI->hasOneUse(Reg))) return true; if (!isPlainlyKilled(DefMI, Reg, LIS)) return false; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return true; MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); // If there are multiple defs, we can't do a simple analysis, so just @@ -536,7 +535,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, } IsDstPhys = false; if (isTwoAddrUse(UseMI, Reg, DstReg)) { - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + IsDstPhys = Register::isPhysicalRegister(DstReg); return &UseMI; } return nullptr; @@ -546,13 +545,13 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, /// to. static unsigned getMappedReg(unsigned Reg, DenseMap &RegMap) { - while (TargetRegisterInfo::isVirtualRegister(Reg)) { + while (Register::isVirtualRegister(Reg)) { DenseMap::iterator SI = RegMap.find(Reg); if (SI == RegMap.end()) return 0; Reg = SI->second; } - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg; return 0; } @@ -683,7 +682,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, unsigned RegBIdx, unsigned RegCIdx, unsigned Dist) { - unsigned RegC = MI->getOperand(RegCIdx).getReg(); + Register RegC = MI->getOperand(RegCIdx).getReg(); LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx); @@ -700,7 +699,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, // Update source register map. unsigned FromRegC = getMappedReg(RegC, SrcRegMap); if (FromRegC) { - unsigned RegA = MI->getOperand(DstIdx).getReg(); + Register RegA = MI->getOperand(DstIdx).getReg(); SrcRegMap[RegA] = FromRegC; } @@ -911,7 +910,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isDef()) @@ -955,7 +954,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isDef()) { @@ -1093,7 +1092,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : KillMI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MO.isUse()) { if (!MOReg) continue; @@ -1105,7 +1104,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, Uses.insert(MOReg); if (isKill && MOReg != Reg) Kills.insert(MOReg); - } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + } else if (Register::isPhysicalRegister(MOReg)) { Defs.insert(MOReg); if (!MO.isDead()) LiveDefs.insert(MOReg); @@ -1130,7 +1129,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isUse()) { @@ -1154,8 +1153,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, unsigned MOReg = OtherDefs[i]; if (Uses.count(MOReg)) return false; - if (TargetRegisterInfo::isPhysicalRegister(MOReg) && - LiveDefs.count(MOReg)) + if (Register::isPhysicalRegister(MOReg) && LiveDefs.count(MOReg)) return false; // Physical register def is seen. Defs.erase(MOReg); @@ -1208,8 +1206,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, return false; bool MadeChange = false; - unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); - unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); + Register DstOpReg = MI->getOperand(DstOpIdx).getReg(); + Register BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); unsigned OpsNum = MI->getDesc().getNumOperands(); unsigned OtherOpIdx = MI->getDesc().getNumDefs(); for (; OtherOpIdx < OpsNum; OtherOpIdx++) { @@ -1221,7 +1219,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx)) continue; - unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); + Register OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); bool AggressiveCommute = false; // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp @@ -1276,14 +1274,14 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, return false; MachineInstr &MI = *mi; - unsigned regA = MI.getOperand(DstIdx).getReg(); - unsigned regB = MI.getOperand(SrcIdx).getReg(); + Register regA = MI.getOperand(DstIdx).getReg(); + Register regB = MI.getOperand(SrcIdx).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(regB) && + assert(Register::isVirtualRegister(regB) && "cannot make instruction into two-address form"); bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); - if (TargetRegisterInfo::isVirtualRegister(regA)) + if (Register::isVirtualRegister(regA)) scanUses(regA); bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); @@ -1363,7 +1361,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, const TargetRegisterClass *RC = TRI->getAllocatableClass( TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); - unsigned Reg = MRI->createVirtualRegister(RC); + Register Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; if (!TII->unfoldMemoryOperand(*MF, MI, Reg, /*UnfoldLoad=*/true, @@ -1399,8 +1397,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (LV) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) @@ -1474,8 +1471,8 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { AnyOps = true; MachineOperand &SrcMO = MI->getOperand(SrcIdx); MachineOperand &DstMO = MI->getOperand(DstIdx); - unsigned SrcReg = SrcMO.getReg(); - unsigned DstReg = DstMO.getReg(); + Register SrcReg = SrcMO.getReg(); + Register DstReg = DstMO.getReg(); // Tied constraint already satisfied? if (SrcReg == DstReg) continue; @@ -1485,7 +1482,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { // Deal with undef uses immediately - simply rewrite the src operand. if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. - if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (Register::isVirtualRegister(DstReg)) if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, TRI, *MF)) MRI->constrainRegClass(DstReg, RC); @@ -1522,7 +1519,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, unsigned DstIdx = TiedPairs[tpi].second; const MachineOperand &DstMO = MI->getOperand(DstIdx); - unsigned RegA = DstMO.getReg(); + Register RegA = DstMO.getReg(); // Grab RegB from the instruction because it may have changed if the // instruction was commuted. @@ -1538,7 +1535,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } LastCopiedReg = RegA; - assert(TargetRegisterInfo::isVirtualRegister(RegB) && + assert(Register::isVirtualRegister(RegB) && "cannot make instruction into two-address form"); #ifndef NDEBUG @@ -1559,14 +1556,13 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, MIB.addReg(RegB, 0, SubRegB); const TargetRegisterClass *RC = MRI->getRegClass(RegB); if (SubRegB) { - if (TargetRegisterInfo::isVirtualRegister(RegA)) { + if (Register::isVirtualRegister(RegA)) { assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA), SubRegB) && "tied subregister must be a truncation"); // The superreg class will not be used to constrain the subreg class. RC = nullptr; - } - else { + } else { assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) && "tied subregister must be a truncation"); } @@ -1581,7 +1577,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (LIS) { LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot(); - if (TargetRegisterInfo::isVirtualRegister(RegA)) { + if (Register::isVirtualRegister(RegA)) { LiveInterval &LI = LIS->getInterval(RegA); VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = @@ -1601,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } // Make sure regA is a legal regclass for the SrcIdx operand. - if (TargetRegisterInfo::isVirtualRegister(RegA) && - TargetRegisterInfo::isVirtualRegister(RegB)) + if (Register::isVirtualRegister(RegA) && Register::isVirtualRegister(RegB)) MRI->constrainRegClass(RegA, RC); MO.setReg(RegA); // The getMatchingSuper asserts guarantee that the register class projected @@ -1744,8 +1739,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; - unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); - unsigned DstReg = mi->getOperand(DstIdx).getReg(); + Register SrcReg = mi->getOperand(SrcIdx).getReg(); + Register DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { // The tied operands have been eliminated or shifted further down @@ -1803,9 +1798,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; - unsigned DstReg = MI.getOperand(0).getReg(); - if (MI.getOperand(0).getSubReg() || - TargetRegisterInfo::isPhysicalRegister(DstReg) || + Register DstReg = MI.getOperand(0).getReg(); + if (MI.getOperand(0).getSubReg() || Register::isPhysicalRegister(DstReg) || !(MI.getNumOperands() & 1)) { LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); llvm_unreachable(nullptr); @@ -1821,7 +1815,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { bool DefEmitted = false; for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) { MachineOperand &UseMO = MI.getOperand(i); - unsigned SrcReg = UseMO.getReg(); + Register SrcReg = UseMO.getReg(); unsigned SubIdx = MI.getOperand(i+1).getImm(); // Nothing needs to be inserted for undef operands. if (UseMO.isUndef()) @@ -1855,7 +1849,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DefEmitted = true; // Update LiveVariables' kill info. - if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (LV && isKill && !Register::isPhysicalRegister(SrcReg)) LV->replaceKillInstruction(SrcReg, MI, *CopyMI); LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI); diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 177bab32bccc..3289eff71336 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -103,7 +103,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { df_iterator_default_set Reachable; bool ModifiedPHI = false; - MMI = getAnalysisIfAvailable(); + auto *MMIWP = getAnalysisIfAvailable(); + MMI = MMIWP ? &MMIWP->getMMI() : nullptr; MachineDominatorTree *MDT = getAnalysisIfAvailable(); MachineLoopInfo *MLI = getAnalysisIfAvailable(); @@ -146,8 +147,14 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { } // Actually remove the blocks now. - for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + // Remove any call site information for calls in the block. + for (auto &I : DeadBlocks[i]->instrs()) + if (I.isCall(MachineInstr::IgnoreBundle)) + DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I); + DeadBlocks[i]->eraseFromParent(); + } // Cleanup PHI nodes. for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { @@ -167,8 +174,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { if (phi->getNumOperands() == 3) { const MachineOperand &Input = phi->getOperand(1); const MachineOperand &Output = phi->getOperand(0); - unsigned InputReg = Input.getReg(); - unsigned OutputReg = Output.getReg(); + Register InputReg = Input.getReg(); + Register OutputReg = Output.getReg(); assert(Output.getSubReg() == 0 && "Cannot have output subregister"); ModifiedPHI = true; diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp index a911cdcbec9d..73b862d51c0f 100644 --- a/lib/CodeGen/ValueTypes.cpp +++ b/lib/CodeGen/ValueTypes.cpp @@ -115,8 +115,8 @@ std::string EVT::getEVTString() const { switch (V.SimpleTy) { default: if (isVector()) - return "v" + utostr(getVectorNumElements()) + - getVectorElementType().getEVTString(); + return (isScalableVector() ? "nxv" : "v") + utostr(getVectorNumElements()) + + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); llvm_unreachable("Invalid EVT!"); @@ -144,6 +144,7 @@ std::string EVT::getEVTString() const { case MVT::v32i1: return "v32i1"; case MVT::v64i1: return "v64i1"; case MVT::v128i1: return "v128i1"; + case MVT::v256i1: return "v256i1"; case MVT::v512i1: return "v512i1"; case MVT::v1024i1: return "v1024i1"; case MVT::v1i8: return "v1i8"; @@ -157,6 +158,7 @@ std::string EVT::getEVTString() const { case MVT::v256i8: return "v256i8"; case MVT::v1i16: return "v1i16"; case MVT::v2i16: return "v2i16"; + case MVT::v3i16: return "v3i16"; case MVT::v4i16: return "v4i16"; case MVT::v8i16: return "v8i16"; case MVT::v16i16: return "v16i16"; @@ -187,8 +189,11 @@ std::string EVT::getEVTString() const { case MVT::v1f32: return "v1f32"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; + case MVT::v3f16: return "v3f16"; case MVT::v4f16: return "v4f16"; case MVT::v8f16: return "v8f16"; + case MVT::v16f16: return "v16f16"; + case MVT::v32f16: return "v32f16"; case MVT::v3f32: return "v3f32"; case MVT::v4f32: return "v4f32"; case MVT::v5f32: return "v5f32"; @@ -205,6 +210,48 @@ std::string EVT::getEVTString() const { case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::v8f64: return "v8f64"; + case MVT::nxv1i1: return "nxv1i1"; + case MVT::nxv2i1: return "nxv2i1"; + case MVT::nxv4i1: return "nxv4i1"; + case MVT::nxv8i1: return "nxv8i1"; + case MVT::nxv16i1: return "nxv16i1"; + case MVT::nxv32i1: return "nxv32i1"; + case MVT::nxv1i8: return "nxv1i8"; + case MVT::nxv2i8: return "nxv2i8"; + case MVT::nxv4i8: return "nxv4i8"; + case MVT::nxv8i8: return "nxv8i8"; + case MVT::nxv16i8: return "nxv16i8"; + case MVT::nxv32i8: return "nxv32i8"; + case MVT::nxv1i16: return "nxv1i16"; + case MVT::nxv2i16: return "nxv2i16"; + case MVT::nxv4i16: return "nxv4i16"; + case MVT::nxv8i16: return "nxv8i16"; + case MVT::nxv16i16:return "nxv16i16"; + case MVT::nxv32i16:return "nxv32i16"; + case MVT::nxv1i32: return "nxv1i32"; + case MVT::nxv2i32: return "nxv2i32"; + case MVT::nxv4i32: return "nxv4i32"; + case MVT::nxv8i32: return "nxv8i32"; + case MVT::nxv16i32:return "nxv16i32"; + case MVT::nxv32i32:return "nxv32i32"; + case MVT::nxv1i64: return "nxv1i64"; + case MVT::nxv2i64: return "nxv2i64"; + case MVT::nxv4i64: return "nxv4i64"; + case MVT::nxv8i64: return "nxv8i64"; + case MVT::nxv16i64:return "nxv16i64"; + case MVT::nxv32i64:return "nxv32i64"; + case MVT::nxv2f16: return "nxv2f16"; + case MVT::nxv4f16: return "nxv4f16"; + case MVT::nxv8f16: return "nxv8f16"; + case MVT::nxv1f32: return "nxv1f32"; + case MVT::nxv2f32: return "nxv2f32"; + case MVT::nxv4f32: return "nxv4f32"; + case MVT::nxv8f32: return "nxv8f32"; + case MVT::nxv16f32:return "nxv16f32"; + case MVT::nxv1f64: return "nxv1f64"; + case MVT::nxv2f64: return "nxv2f64"; + case MVT::nxv4f64: return "nxv4f64"; + case MVT::nxv8f64: return "nxv8f64"; case MVT::Metadata:return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::exnref : return "exnref"; @@ -241,6 +288,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128); + case MVT::v256i1: return VectorType::get(Type::getInt1Ty(Context), 256); case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024); case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); @@ -254,6 +302,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256); case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1); case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2); + case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3); case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4); case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8); case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16); @@ -282,8 +331,11 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32); case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); + case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3); case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); + case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16); + case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32); case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3); @@ -302,8 +354,92 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); + case MVT::nxv1i1: + return VectorType::get(Type::getInt1Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i1: + return VectorType::get(Type::getInt1Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i1: + return VectorType::get(Type::getInt1Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i1: + return VectorType::get(Type::getInt1Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i1: + return VectorType::get(Type::getInt1Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i1: + return VectorType::get(Type::getInt1Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i8: + return VectorType::get(Type::getInt8Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i8: + return VectorType::get(Type::getInt8Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i8: + return VectorType::get(Type::getInt8Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i8: + return VectorType::get(Type::getInt8Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i8: + return VectorType::get(Type::getInt8Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i8: + return VectorType::get(Type::getInt8Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i16: + return VectorType::get(Type::getInt16Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i16: + return VectorType::get(Type::getInt16Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i16: + return VectorType::get(Type::getInt16Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i16: + return VectorType::get(Type::getInt16Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i16: + return VectorType::get(Type::getInt16Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i16: + return VectorType::get(Type::getInt16Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i32: + return VectorType::get(Type::getInt32Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i32: + return VectorType::get(Type::getInt32Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i32: + return VectorType::get(Type::getInt32Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i32: + return VectorType::get(Type::getInt32Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i32: + return VectorType::get(Type::getInt32Ty(Context), 16,/*Scalable=*/ true); + case MVT::nxv32i32: + return VectorType::get(Type::getInt32Ty(Context), 32,/*Scalable=*/ true); + case MVT::nxv1i64: + return VectorType::get(Type::getInt64Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i64: + return VectorType::get(Type::getInt64Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i64: + return VectorType::get(Type::getInt64Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i64: + return VectorType::get(Type::getInt64Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i64: + return VectorType::get(Type::getInt64Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i64: + return VectorType::get(Type::getInt64Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv2f16: + return VectorType::get(Type::getHalfTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f16: + return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f16: + return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true); + case MVT::nxv1f32: + return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true); + case MVT::nxv2f32: + return VectorType::get(Type::getFloatTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f32: + return VectorType::get(Type::getFloatTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f32: + return VectorType::get(Type::getFloatTy(Context), 8, /*Scalable=*/ true); + case MVT::nxv16f32: + return VectorType::get(Type::getFloatTy(Context), 16, /*Scalable=*/ true); + case MVT::nxv1f64: + return VectorType::get(Type::getDoubleTy(Context), 1, /*Scalable=*/ true); + case MVT::nxv2f64: + return VectorType::get(Type::getDoubleTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f64: + return VectorType::get(Type::getDoubleTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f64: + return VectorType::get(Type::getDoubleTy(Context), 8, /*Scalable=*/ true); case MVT::Metadata: return Type::getMetadataTy(Context); - } + } } /// Return the value type corresponding to the specified type. This returns all @@ -329,7 +465,8 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::VectorTyID: { VectorType *VTy = cast(Ty); return getVectorVT( - getVT(VTy->getElementType(), false), VTy->getNumElements()); + getVT(VTy->getElementType(), /*HandleUnknown=*/ false), + VTy->getElementCount()); } } } @@ -345,8 +482,9 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ return getIntegerVT(Ty->getContext(), cast(Ty)->getBitWidth()); case Type::VectorTyID: { VectorType *VTy = cast(Ty); - return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false), - VTy->getNumElements()); + return getVectorVT(Ty->getContext(), + getEVT(VTy->getElementType(), /*HandleUnknown=*/ false), + VTy->getElementCount()); } } } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 4a06704a8876..5312e2eea96b 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -80,15 +80,14 @@ void VirtRegMap::grow() { Virt2SplitMap.resize(NumRegs); } -void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg) && - TargetRegisterInfo::isPhysicalRegister(physReg)); - assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && +void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) { + assert(virtReg.isVirtual() && Register::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg.id()] == NO_PHYS_REG && "attempt to assign physical register to already mapped " "virtual register"); assert(!getRegInfo().isReserved(physReg) && "Attempt to map virtReg to a reserved physReg"); - Virt2PhysMap[virtReg] = physReg; + Virt2PhysMap[virtReg.id()] = physReg; } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { @@ -99,46 +98,46 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { return SS; } -bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) { - unsigned Hint = MRI->getSimpleHint(VirtReg); - if (!Hint) +bool VirtRegMap::hasPreferredPhys(Register VirtReg) { + Register Hint = MRI->getSimpleHint(VirtReg); + if (!Hint.isValid()) return false; - if (TargetRegisterInfo::isVirtualRegister(Hint)) + if (Hint.isVirtual()) Hint = getPhys(Hint); return getPhys(VirtReg) == Hint; } -bool VirtRegMap::hasKnownPreference(unsigned VirtReg) { +bool VirtRegMap::hasKnownPreference(Register VirtReg) { std::pair Hint = MRI->getRegAllocationHint(VirtReg); - if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) + if (Register::isPhysicalRegister(Hint.second)) return true; - if (TargetRegisterInfo::isVirtualRegister(Hint.second)) + if (Register::isVirtualRegister(Hint.second)) return hasPhys(Hint.second); return false; } -int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && +int VirtRegMap::assignVirt2StackSlot(Register virtReg) { + assert(virtReg.isVirtual()); + assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); - return Virt2StackSlotMap[virtReg] = createSpillSlot(RC); + return Virt2StackSlotMap[virtReg.id()] = createSpillSlot(RC); } -void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && +void VirtRegMap::assignVirt2StackSlot(Register virtReg, int SS) { + assert(virtReg.isVirtual()); + assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); assert((SS >= 0 || (SS >= MF->getFrameInfo().getObjectIndexBegin())) && "illegal fixed frame index"); - Virt2StackSlotMap[virtReg] = SS; + Virt2StackSlotMap[virtReg.id()] = SS; } void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << "********** REGISTER MAP **********\n"; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { OS << '[' << printReg(Reg, TRI) << " -> " << printReg(Virt2PhysMap[Reg], TRI) << "] " @@ -147,7 +146,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { } for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; @@ -185,10 +184,10 @@ class VirtRegRewriter : public MachineFunctionPass { void rewrite(); void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; - void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; + void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const; void handleIdentityCopy(MachineInstr &MI) const; void expandCopyBundle(MachineInstr &MI) const; - bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const; + bool subRegLiveThrough(const MachineInstr &MI, Register SuperPhysReg) const; public: static char ID; @@ -265,7 +264,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { } void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, - unsigned PhysReg) const { + Register PhysReg) const { assert(!LI.empty()); assert(LI.hasSubRanges()); @@ -312,7 +311,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, // assignments. void VirtRegRewriter::addMBBLiveIns() { for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { - unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); + Register VirtReg = Register::index2VirtReg(Idx); if (MRI->reg_nodbg_empty(VirtReg)) continue; LiveInterval &LI = LIS->getInterval(VirtReg); @@ -320,7 +319,7 @@ void VirtRegRewriter::addMBBLiveIns() { continue; // This is a virtual register that is live across basic blocks. Its // assigned PhysReg must be marked as live-in to those blocks. - unsigned PhysReg = VRM->getPhys(VirtReg); + Register PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); if (LI.hasSubRanges()) { @@ -353,7 +352,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { if (MO.isUndef()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); const LiveInterval &LI = LIS->getInterval(Reg); const MachineInstr &MI = *MO.getParent(); SlotIndex BaseIndex = LIS->getInstructionIndex(MI); @@ -469,7 +468,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { /// \pre \p MI defines a subregister of a virtual register that /// has been assigned to \p SuperPhysReg. bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, - unsigned SuperPhysReg) const { + Register SuperPhysReg) const { SlotIndex MIIndex = LIS->getInstructionIndex(MI); SlotIndex BeforeMIUses = MIIndex.getBaseIndex(); SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex(); @@ -493,9 +492,9 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, void VirtRegRewriter::rewrite() { bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); - SmallVector SuperDeads; - SmallVector SuperDefs; - SmallVector SuperKills; + SmallVector SuperDeads; + SmallVector SuperDefs; + SmallVector SuperKills; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { @@ -513,10 +512,10 @@ void VirtRegRewriter::rewrite() { if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; - unsigned VirtReg = MO.getReg(); - unsigned PhysReg = VRM->getPhys(VirtReg); + Register VirtReg = MO.getReg(); + Register PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); @@ -562,7 +561,7 @@ void VirtRegRewriter::rewrite() { // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, SubReg); - assert(PhysReg && "Invalid SubReg for physical register"); + assert(PhysReg.isValid() && "Invalid SubReg for physical register"); MO.setSubReg(0); } // Rewrite. Note we could have used MachineOperand::substPhysReg(), but diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 19c59e9542b4..119c3fd1ec7f 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -111,7 +111,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet( MIB.add(MO); Terminators.push_back(&T); if (T.isCall()) - MF.updateCallSiteInfo(&T); + MF.eraseCallSiteInfo(&T); } } } diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index ec4773d571c8..dd6f75f97a4a 100644 --- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -209,14 +209,6 @@ struct VisitHelper { } } - VisitHelper(TypeVisitorCallbackPipeline &Callbacks, VisitorDataSource Source) - : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) { - if (Source == VDS_BytesPresent) { - Pipeline = Callbacks; - Pipeline.addCallbackToPipelineFront(Deserializer); - } - } - TypeDeserializer Deserializer; TypeVisitorCallbackPipeline Pipeline; CVTypeVisitor Visitor; @@ -230,13 +222,6 @@ Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index, return V.Visitor.visitTypeRecord(Record, Index); } -Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index, - TypeVisitorCallbackPipeline &Callbacks, - VisitorDataSource Source) { - VisitHelper V(Callbacks, Source); - return V.Visitor.visitTypeRecord(Record, Index); -} - Error llvm::codeview::visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks, VisitorDataSource Source) { diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index 2f49474115a1..36a384baa13d 100644 --- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -20,7 +20,6 @@ Error CodeViewRecordIO::beginRecord(Optional MaxLength) { Limit.MaxLength = MaxLength; Limit.BeginOffset = getCurrentOffset(); Limits.push_back(Limit); - resetStreamedLen(); return Error::success(); } @@ -50,6 +49,7 @@ Error CodeViewRecordIO::endRecord() { Streamer->EmitBytes(BytesSR); --PaddingBytes; } + resetStreamedLen(); } return Error::success(); } @@ -126,7 +126,11 @@ Error CodeViewRecordIO::mapByteVectorTail(std::vector &Bytes, Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd, const Twine &Comment) { if (isStreaming()) { - emitComment(Comment); + std::string TypeNameStr = Streamer->getTypeName(TypeInd); + if (!TypeNameStr.empty()) + emitComment(Comment + ": " + TypeNameStr); + else + emitComment(Comment); Streamer->EmitIntValue(TypeInd.getIndex(), sizeof(TypeInd.getIndex())); incrStreamedLen(sizeof(TypeInd.getIndex())); } else if (isWriting()) { diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp index 54e68ae4ea9f..82f6713a88f5 100644 --- a/lib/DebugInfo/CodeView/EnumTables.cpp +++ b/lib/DebugInfo/CodeView/EnumTables.cpp @@ -300,6 +300,128 @@ static const EnumEntry CV_ENUM_ENT(COFF, IMAGE_SCN_MEM_READ), CV_ENUM_ENT(COFF, IMAGE_SCN_MEM_WRITE)}; +static const EnumEntry ClassOptionNames[] = { + CV_ENUM_CLASS_ENT(ClassOptions, Packed), + CV_ENUM_CLASS_ENT(ClassOptions, HasConstructorOrDestructor), + CV_ENUM_CLASS_ENT(ClassOptions, HasOverloadedOperator), + CV_ENUM_CLASS_ENT(ClassOptions, Nested), + CV_ENUM_CLASS_ENT(ClassOptions, ContainsNestedClass), + CV_ENUM_CLASS_ENT(ClassOptions, HasOverloadedAssignmentOperator), + CV_ENUM_CLASS_ENT(ClassOptions, HasConversionOperator), + CV_ENUM_CLASS_ENT(ClassOptions, ForwardReference), + CV_ENUM_CLASS_ENT(ClassOptions, Scoped), + CV_ENUM_CLASS_ENT(ClassOptions, HasUniqueName), + CV_ENUM_CLASS_ENT(ClassOptions, Sealed), + CV_ENUM_CLASS_ENT(ClassOptions, Intrinsic), +}; + +static const EnumEntry MemberAccessNames[] = { + CV_ENUM_CLASS_ENT(MemberAccess, None), + CV_ENUM_CLASS_ENT(MemberAccess, Private), + CV_ENUM_CLASS_ENT(MemberAccess, Protected), + CV_ENUM_CLASS_ENT(MemberAccess, Public), +}; + +static const EnumEntry MethodOptionNames[] = { + CV_ENUM_CLASS_ENT(MethodOptions, Pseudo), + CV_ENUM_CLASS_ENT(MethodOptions, NoInherit), + CV_ENUM_CLASS_ENT(MethodOptions, NoConstruct), + CV_ENUM_CLASS_ENT(MethodOptions, CompilerGenerated), + CV_ENUM_CLASS_ENT(MethodOptions, Sealed), +}; + +static const EnumEntry MemberKindNames[] = { + CV_ENUM_CLASS_ENT(MethodKind, Vanilla), + CV_ENUM_CLASS_ENT(MethodKind, Virtual), + CV_ENUM_CLASS_ENT(MethodKind, Static), + CV_ENUM_CLASS_ENT(MethodKind, Friend), + CV_ENUM_CLASS_ENT(MethodKind, IntroducingVirtual), + CV_ENUM_CLASS_ENT(MethodKind, PureVirtual), + CV_ENUM_CLASS_ENT(MethodKind, PureIntroducingVirtual), +}; + +static const EnumEntry PtrKindNames[] = { + CV_ENUM_CLASS_ENT(PointerKind, Near16), + CV_ENUM_CLASS_ENT(PointerKind, Far16), + CV_ENUM_CLASS_ENT(PointerKind, Huge16), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegment), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnValue), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegmentValue), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnAddress), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegmentAddress), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnType), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnSelf), + CV_ENUM_CLASS_ENT(PointerKind, Near32), + CV_ENUM_CLASS_ENT(PointerKind, Far32), + CV_ENUM_CLASS_ENT(PointerKind, Near64), +}; + +static const EnumEntry PtrModeNames[] = { + CV_ENUM_CLASS_ENT(PointerMode, Pointer), + CV_ENUM_CLASS_ENT(PointerMode, LValueReference), + CV_ENUM_CLASS_ENT(PointerMode, PointerToDataMember), + CV_ENUM_CLASS_ENT(PointerMode, PointerToMemberFunction), + CV_ENUM_CLASS_ENT(PointerMode, RValueReference), +}; + +static const EnumEntry PtrMemberRepNames[] = { + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, Unknown), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, SingleInheritanceData), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, MultipleInheritanceData), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, VirtualInheritanceData), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, GeneralData), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, SingleInheritanceFunction), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, + MultipleInheritanceFunction), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, + VirtualInheritanceFunction), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, GeneralFunction), +}; + +static const EnumEntry TypeModifierNames[] = { + CV_ENUM_CLASS_ENT(ModifierOptions, Const), + CV_ENUM_CLASS_ENT(ModifierOptions, Volatile), + CV_ENUM_CLASS_ENT(ModifierOptions, Unaligned), +}; + +static const EnumEntry CallingConventions[] = { + CV_ENUM_CLASS_ENT(CallingConvention, NearC), + CV_ENUM_CLASS_ENT(CallingConvention, FarC), + CV_ENUM_CLASS_ENT(CallingConvention, NearPascal), + CV_ENUM_CLASS_ENT(CallingConvention, FarPascal), + CV_ENUM_CLASS_ENT(CallingConvention, NearFast), + CV_ENUM_CLASS_ENT(CallingConvention, FarFast), + CV_ENUM_CLASS_ENT(CallingConvention, NearStdCall), + CV_ENUM_CLASS_ENT(CallingConvention, FarStdCall), + CV_ENUM_CLASS_ENT(CallingConvention, NearSysCall), + CV_ENUM_CLASS_ENT(CallingConvention, FarSysCall), + CV_ENUM_CLASS_ENT(CallingConvention, ThisCall), + CV_ENUM_CLASS_ENT(CallingConvention, MipsCall), + CV_ENUM_CLASS_ENT(CallingConvention, Generic), + CV_ENUM_CLASS_ENT(CallingConvention, AlphaCall), + CV_ENUM_CLASS_ENT(CallingConvention, PpcCall), + CV_ENUM_CLASS_ENT(CallingConvention, SHCall), + CV_ENUM_CLASS_ENT(CallingConvention, ArmCall), + CV_ENUM_CLASS_ENT(CallingConvention, AM33Call), + CV_ENUM_CLASS_ENT(CallingConvention, TriCall), + CV_ENUM_CLASS_ENT(CallingConvention, SH5Call), + CV_ENUM_CLASS_ENT(CallingConvention, M32RCall), + CV_ENUM_CLASS_ENT(CallingConvention, ClrCall), + CV_ENUM_CLASS_ENT(CallingConvention, Inline), + CV_ENUM_CLASS_ENT(CallingConvention, NearVector), +}; + +static const EnumEntry FunctionOptionEnum[] = { + CV_ENUM_CLASS_ENT(FunctionOptions, CxxReturnUdt), + CV_ENUM_CLASS_ENT(FunctionOptions, Constructor), + CV_ENUM_CLASS_ENT(FunctionOptions, ConstructorWithVirtualBases), +}; + +static const EnumEntry LabelTypeEnum[] = { + CV_ENUM_CLASS_ENT(LabelType, Near), + CV_ENUM_CLASS_ENT(LabelType, Far), +}; + namespace llvm { namespace codeview { @@ -379,5 +501,49 @@ getImageSectionCharacteristicNames() { return makeArrayRef(ImageSectionCharacteristicNames); } +ArrayRef> getClassOptionNames() { + return makeArrayRef(ClassOptionNames); +} + +ArrayRef> getMemberAccessNames() { + return makeArrayRef(MemberAccessNames); +} + +ArrayRef> getMethodOptionNames() { + return makeArrayRef(MethodOptionNames); +} + +ArrayRef> getMemberKindNames() { + return makeArrayRef(MemberKindNames); +} + +ArrayRef> getPtrKindNames() { + return makeArrayRef(PtrKindNames); +} + +ArrayRef> getPtrModeNames() { + return makeArrayRef(PtrModeNames); +} + +ArrayRef> getPtrMemberRepNames() { + return makeArrayRef(PtrMemberRepNames); +} + +ArrayRef> getTypeModifierNames() { + return makeArrayRef(TypeModifierNames); +} + +ArrayRef> getCallingConventions() { + return makeArrayRef(CallingConventions); +} + +ArrayRef> getFunctionOptionEnum() { + return makeArrayRef(FunctionOptionEnum); +} + +ArrayRef> getLabelTypeEnum() { + return makeArrayRef(LabelTypeEnum); +} + } // end namespace codeview } // end namespace llvm diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 27cb7e35234b..45b63983beb4 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -315,7 +315,7 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) { - W.printNumber("Offset", DefRangeFramePointerRel.Offset); + W.printNumber("Offset", DefRangeFramePointerRel.Hdr.Offset); printLocalVariableAddrRange(DefRangeFramePointerRel.Range, DefRangeFramePointerRel.getRelocationOffset()); printLocalVariableAddrGap(DefRangeFramePointerRel.Gaps); diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp index 70889839ef48..3b627930e271 100644 --- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -229,7 +229,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, DataSym &Data) { Error SymbolRecordMapping::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) { - error(IO.mapInteger(DefRangeFramePointerRel.Offset)); + error(IO.mapObject(DefRangeFramePointerRel.Hdr.Offset)); error(mapLocalVariableAddrRange(IO, DefRangeFramePointerRel.Range)); error(IO.mapVectorTail(DefRangeFramePointerRel.Gaps, MapGap())); diff --git a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp index 47928c2eef64..1aded589e565 100644 --- a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp @@ -7,24 +7,125 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" using namespace llvm; using namespace llvm::codeview; +namespace { + #define error(X) \ if (auto EC = X) \ return EC; -namespace { +static const EnumEntry LeafTypeNames[] = { +#define CV_TYPE(enum, val) {#enum, enum}, +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +}; + +static StringRef getLeafTypeName(TypeLeafKind LT) { + switch (LT) { +#define TYPE_RECORD(ename, value, name) \ + case ename: \ + return #name; +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: + break; + } + return "UnknownLeaf"; +} + +template +static bool compEnumNames(const EnumEntry &lhs, const EnumEntry &rhs) { + return lhs.Name < rhs.Name; +} + +template +static std::string getFlagNames(CodeViewRecordIO &IO, T Value, + ArrayRef> Flags) { + if (!IO.isStreaming()) + return std::string(""); + typedef EnumEntry FlagEntry; + typedef SmallVector FlagVector; + FlagVector SetFlags; + for (const auto &Flag : Flags) { + if (Flag.Value == 0) + continue; + if ((Value & Flag.Value) == Flag.Value) { + SetFlags.push_back(Flag); + } + } + + llvm::sort(SetFlags, &compEnumNames); + + std::string FlagLabel; + bool FirstOcc = true; + for (const auto &Flag : SetFlags) { + if (FirstOcc) + FirstOcc = false; + else + FlagLabel += (" | "); + + FlagLabel += (Flag.Name.str() + " (0x" + utohexstr(Flag.Value) + ")"); + } + + if (!FlagLabel.empty()) { + std::string LabelWithBraces(" ( "); + LabelWithBraces += FlagLabel + " )"; + return LabelWithBraces; + } else + return FlagLabel; +} + +template +static StringRef getEnumName(CodeViewRecordIO &IO, T Value, + ArrayRef> EnumValues) { + if (!IO.isStreaming()) + return ""; + StringRef Name; + for (const auto &EnumItem : EnumValues) { + if (EnumItem.Value == Value) { + Name = EnumItem.Name; + break; + } + } + + return Name; +} + +static std::string getMemberAttributes(CodeViewRecordIO &IO, + MemberAccess Access, MethodKind Kind, + MethodOptions Options) { + if (!IO.isStreaming()) + return ""; + std::string AccessSpecifier = + getEnumName(IO, uint8_t(Access), makeArrayRef(getMemberAccessNames())); + std::string MemberAttrs(AccessSpecifier); + if (Kind != MethodKind::Vanilla) { + std::string MethodKind = + getEnumName(IO, unsigned(Kind), makeArrayRef(getMemberKindNames())); + MemberAttrs += ", " + MethodKind; + } + if (Options != MethodOptions::None) { + std::string MethodOptions = getFlagNames( + IO, unsigned(Options), makeArrayRef(getMethodOptionNames())); + MemberAttrs += ", " + MethodOptions; + } + return MemberAttrs; +} + struct MapOneMethodRecord { explicit MapOneMethodRecord(bool IsFromOverloadList) : IsFromOverloadList(IsFromOverloadList) {} Error operator()(CodeViewRecordIO &IO, OneMethodRecord &Method) const { - error(IO.mapInteger(Method.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Method.getAccess(), Method.getMethodKind(), Method.getOptions()); + error(IO.mapInteger(Method.Attrs.Attrs, "Attrs: " + Attrs)); if (IsFromOverloadList) { uint16_t Padding = 0; - error(IO.mapInteger(Padding, "Padding")); + error(IO.mapInteger(Padding)); } error(IO.mapInteger(Method.Type, "Type")); if (Method.isIntroducingVirtual()) { @@ -41,7 +142,7 @@ struct MapOneMethodRecord { private: bool IsFromOverloadList; }; -} +} // namespace static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name, StringRef &UniqueName, bool HasUniqueName) { @@ -96,10 +197,22 @@ Error TypeRecordMapping::visitTypeBegin(CVType &CVR) { MaxLen = MaxRecordLength - sizeof(RecordPrefix); error(IO.beginRecord(MaxLen)); TypeKind = CVR.kind(); + + if (IO.isStreaming()) { + auto RecordKind = CVR.kind(); + uint16_t RecordLen = CVR.length() - 2; + std::string RecordKindName = + getEnumName(IO, unsigned(RecordKind), makeArrayRef(LeafTypeNames)); + error(IO.mapInteger(RecordLen, "Record length")); + error(IO.mapEnum(RecordKind, "Record kind: " + RecordKindName)); + } return Error::success(); } Error TypeRecordMapping::visitTypeBegin(CVType &CVR, TypeIndex Index) { + if (IO.isStreaming()) + IO.emitRawComment(" " + getLeafTypeName(CVR.kind()) + " (0x" + + utohexstr(Index.getIndex()) + ")"); return visitTypeBegin(CVR); } @@ -121,11 +234,21 @@ Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) { // followed by the subrecord, followed by a continuation, and that entire // sequence spaws `MaxRecordLength` bytes. So the record's length is // calculated as follows. + constexpr uint32_t ContinuationLength = 8; error(IO.beginRecord(MaxRecordLength - sizeof(RecordPrefix) - ContinuationLength)); MemberKind = Record.Kind; + if (IO.isStreaming()) { + std::string MemberKindName = getLeafTypeName(Record.Kind); + MemberKindName += + " ( " + + (getEnumName(IO, unsigned(Record.Kind), makeArrayRef(LeafTypeNames))) + .str() + + " )"; + error(IO.mapEnum(Record.Kind, "Member kind: " + MemberKindName)); + } return Error::success(); } @@ -144,16 +267,24 @@ Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) { } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ModifierRecord &Record) { + std::string ModifierNames = + getFlagNames(IO, static_cast(Record.Modifiers), + makeArrayRef(getTypeModifierNames())); error(IO.mapInteger(Record.ModifiedType, "ModifiedType")); - error(IO.mapEnum(Record.Modifiers, "Modifiers")); + error(IO.mapEnum(Record.Modifiers, "Modifiers" + ModifierNames)); return Error::success(); } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ProcedureRecord &Record) { + std::string CallingConvName = getEnumName( + IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions())); + std::string FuncOptionNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getFunctionOptionEnum())); error(IO.mapInteger(Record.ReturnType, "ReturnType")); - error(IO.mapEnum(Record.CallConv, "CallingConvention")); - error(IO.mapEnum(Record.Options, "FunctionOptions")); + error(IO.mapEnum(Record.CallConv, "CallingConvention: " + CallingConvName)); + error(IO.mapEnum(Record.Options, "FunctionOptions" + FuncOptionNames)); error(IO.mapInteger(Record.ParameterCount, "NumParameters")); error(IO.mapInteger(Record.ArgumentList, "ArgListType")); @@ -162,11 +293,16 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, Error TypeRecordMapping::visitKnownRecord(CVType &CVR, MemberFunctionRecord &Record) { + std::string CallingConvName = getEnumName( + IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions())); + std::string FuncOptionNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getFunctionOptionEnum())); error(IO.mapInteger(Record.ReturnType, "ReturnType")); error(IO.mapInteger(Record.ClassType, "ClassType")); error(IO.mapInteger(Record.ThisType, "ThisType")); - error(IO.mapEnum(Record.CallConv, "CallingConvention")); - error(IO.mapEnum(Record.Options, "FunctionOptions")); + error(IO.mapEnum(Record.CallConv, "CallingConvention: " + CallingConvName)); + error(IO.mapEnum(Record.Options, "FunctionOptions" + FuncOptionNames)); error(IO.mapInteger(Record.ParameterCount, "NumParameters")); error(IO.mapInteger(Record.ArgumentList, "ArgListType")); error(IO.mapInteger(Record.ThisPointerAdjustment, "ThisAdjustment")); @@ -197,8 +333,40 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) { + + SmallString<128> Attr("Attrs: "); + + if (IO.isStreaming()) { + std::string PtrType = getEnumName(IO, unsigned(Record.getPointerKind()), + makeArrayRef(getPtrKindNames())); + Attr += "[ Type: " + PtrType; + + std::string PtrMode = getEnumName(IO, unsigned(Record.getMode()), + makeArrayRef(getPtrModeNames())); + Attr += ", Mode: " + PtrMode; + + auto PtrSizeOf = Record.getSize(); + Attr += ", SizeOf: " + itostr(PtrSizeOf); + + if (Record.isFlat()) + Attr += ", isFlat"; + if (Record.isConst()) + Attr += ", isConst"; + if (Record.isVolatile()) + Attr += ", isVolatile"; + if (Record.isUnaligned()) + Attr += ", isUnaligned"; + if (Record.isRestrict()) + Attr += ", isRestricted"; + if (Record.isLValueReferenceThisPtr()) + Attr += ", isThisPtr&"; + if (Record.isRValueReferenceThisPtr()) + Attr += ", isThisPtr&&"; + Attr += " ]"; + } + error(IO.mapInteger(Record.ReferentType, "PointeeType")); - error(IO.mapInteger(Record.Attrs, "Attributes")); + error(IO.mapInteger(Record.Attrs, Attr)); if (Record.isPointerToMember()) { if (IO.isReading()) @@ -206,7 +374,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) { MemberPointerInfo &M = *Record.MemberInfo; error(IO.mapInteger(M.ContainingType, "ClassType")); - error(IO.mapEnum(M.Representation, "Representation")); + std::string PtrMemberGetRepresentation = getEnumName( + IO, uint16_t(M.Representation), makeArrayRef(getPtrMemberRepNames())); + error(IO.mapEnum(M.Representation, + "Representation: " + PtrMemberGetRepresentation)); } return Error::success(); @@ -226,8 +397,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) { (CVR.kind() == TypeLeafKind::LF_CLASS) || (CVR.kind() == TypeLeafKind::LF_INTERFACE)); + std::string PropertiesNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getClassOptionNames())); error(IO.mapInteger(Record.MemberCount, "MemberCount")); - error(IO.mapEnum(Record.Options, "Properties")); + error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames)); error(IO.mapInteger(Record.FieldList, "FieldList")); error(IO.mapInteger(Record.DerivationList, "DerivedFrom")); error(IO.mapInteger(Record.VTableShape, "VShape")); @@ -239,8 +413,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) { } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) { + std::string PropertiesNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getClassOptionNames())); error(IO.mapInteger(Record.MemberCount, "MemberCount")); - error(IO.mapEnum(Record.Options, "Properties")); + error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames)); error(IO.mapInteger(Record.FieldList, "FieldList")); error(IO.mapEncodedInteger(Record.Size, "SizeOf")); error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName, @@ -250,8 +427,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) { } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) { + std::string PropertiesNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getClassOptionNames())); error(IO.mapInteger(Record.MemberCount, "NumEnumerators")); - error(IO.mapEnum(Record.Options, "Properties")); + error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames)); error(IO.mapInteger(Record.UnderlyingType, "UnderlyingType")); error(IO.mapInteger(Record.FieldList, "FieldListType")); error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName, @@ -383,7 +563,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, Error TypeRecordMapping::visitKnownRecord(CVType &CVR, FieldListRecord &Record) { - error(IO.mapByteVectorTail(Record.Data)); + if (IO.isStreaming()) { + if (auto EC = codeview::visitMemberRecordStream(Record.Data, *this)) + return EC; + } else + error(IO.mapByteVectorTail(Record.Data)); return Error::success(); } @@ -397,13 +581,17 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, LabelRecord &Record) { - error(IO.mapEnum(Record.Mode, "Mode")); + std::string ModeName = + getEnumName(IO, uint16_t(Record.Mode), makeArrayRef(getLabelTypeEnum())); + error(IO.mapEnum(Record.Mode, "Mode: " + ModeName)); return Error::success(); } Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, BaseClassRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); error(IO.mapInteger(Record.Type, "BaseType")); error(IO.mapEncodedInteger(Record.Offset, "BaseOffset")); @@ -412,7 +600,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, EnumeratorRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs)); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); // FIXME: Handle full APInt such as __int128. error(IO.mapEncodedInteger(Record.Value, "EnumValue")); @@ -423,7 +613,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, DataMemberRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); error(IO.mapInteger(Record.Type, "Type")); error(IO.mapEncodedInteger(Record.FieldOffset, "FieldOffset")); error(IO.mapStringZ(Record.Name, "Name")); @@ -460,7 +652,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, StaticDataMemberRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); error(IO.mapInteger(Record.Type, "Type")); error(IO.mapStringZ(Record.Name, "Name")); @@ -470,7 +664,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, VirtualBaseClassRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); error(IO.mapInteger(Record.BaseType, "BaseType")); error(IO.mapInteger(Record.VBPtrType, "VBPtrType")); error(IO.mapEncodedInteger(Record.VBPtrOffset, "VBPtrOffset")); diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index f4dd79937608..abbea3a868c8 100644 --- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -38,9 +38,9 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() { bool DWARFAbbreviationDeclaration::extract(DataExtractor Data, - uint32_t* OffsetPtr) { + uint64_t* OffsetPtr) { clear(); - const uint32_t Offset = *OffsetPtr; + const uint64_t Offset = *OffsetPtr; Code = Data.getULEB128(OffsetPtr); if (Code == 0) { return false; @@ -148,7 +148,7 @@ DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const { } Optional DWARFAbbreviationDeclaration::getAttributeValue( - const uint32_t DIEOffset, const dwarf::Attribute Attr, + const uint64_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U) const { Optional MatchAttrIndex = findAttributeIndex(Attr); if (!MatchAttrIndex) @@ -158,7 +158,7 @@ Optional DWARFAbbreviationDeclaration::getAttributeValue( // Add the byte size of ULEB that for the abbrev Code so we can start // skipping the attribute data. - uint32_t Offset = DIEOffset + CodeByteSize; + uint64_t Offset = DIEOffset + CodeByteSize; uint32_t AttrIndex = 0; for (const auto &Spec : AttributeSpecs) { if (*MatchAttrIndex == AttrIndex) { diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 0721efb40f6a..875f5e9989a0 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -42,7 +42,7 @@ static Atom formatAtom(unsigned Atom) { return {Atom}; } DWARFAcceleratorTable::~DWARFAcceleratorTable() = default; Error AppleAcceleratorTable::extract() { - uint32_t Offset = 0; + uint64_t Offset = 0; // Check that we can at least read the header. if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength) + 4)) @@ -111,15 +111,15 @@ bool AppleAcceleratorTable::validateForms() { return true; } -std::pair -AppleAcceleratorTable::readAtoms(uint32_t &HashDataOffset) { - uint32_t DieOffset = dwarf::DW_INVALID_OFFSET; +std::pair +AppleAcceleratorTable::readAtoms(uint64_t *HashDataOffset) { + uint64_t DieOffset = dwarf::DW_INVALID_OFFSET; dwarf::Tag DieTag = dwarf::DW_TAG_null; dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; for (auto Atom : getAtomsDesc()) { DWARFFormValue FormValue(Atom.second); - FormValue.extractValue(AccelSection, &HashDataOffset, FormParams); + FormValue.extractValue(AccelSection, HashDataOffset, FormParams); switch (Atom.first) { case dwarf::DW_ATOM_die_offset: DieOffset = *FormValue.getAsUnsignedConstant(); @@ -163,19 +163,19 @@ Optional AppleAcceleratorTable::HeaderData::extractOffset( bool AppleAcceleratorTable::dumpName(ScopedPrinter &W, SmallVectorImpl &AtomForms, - uint32_t *DataOffset) const { + uint64_t *DataOffset) const { dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; - uint32_t NameOffset = *DataOffset; + uint64_t NameOffset = *DataOffset; if (!AccelSection.isValidOffsetForDataOfSize(*DataOffset, 4)) { W.printString("Incorrectly terminated list."); return false; } - unsigned StringOffset = AccelSection.getRelocatedValue(4, DataOffset); + uint64_t StringOffset = AccelSection.getRelocatedValue(4, DataOffset); if (!StringOffset) return false; // End of list DictScope NameScope(W, ("Name@0x" + Twine::utohexstr(NameOffset)).str()); - W.startLine() << format("String: 0x%08x", StringOffset); + W.startLine() << format("String: 0x%08" PRIx64, StringOffset); W.getOStream() << " \"" << StringSection.getCStr(&StringOffset) << "\"\n"; unsigned NumData = AccelSection.getU32(DataOffset); @@ -223,9 +223,9 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const { } // Now go through the actual tables and dump them. - uint32_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength; - unsigned HashesBase = Offset + Hdr.BucketCount * 4; - unsigned OffsetsBase = HashesBase + Hdr.HashCount * 4; + uint64_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength; + uint64_t HashesBase = Offset + Hdr.BucketCount * 4; + uint64_t OffsetsBase = HashesBase + Hdr.HashCount * 4; for (unsigned Bucket = 0; Bucket < Hdr.BucketCount; ++Bucket) { unsigned Index = AccelSection.getU32(&Offset); @@ -237,14 +237,14 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const { } for (unsigned HashIdx = Index; HashIdx < Hdr.HashCount; ++HashIdx) { - unsigned HashOffset = HashesBase + HashIdx*4; - unsigned OffsetsOffset = OffsetsBase + HashIdx*4; + uint64_t HashOffset = HashesBase + HashIdx*4; + uint64_t OffsetsOffset = OffsetsBase + HashIdx*4; uint32_t Hash = AccelSection.getU32(&HashOffset); if (Hash % Hdr.BucketCount != Bucket) break; - unsigned DataOffset = AccelSection.getU32(&OffsetsOffset); + uint64_t DataOffset = AccelSection.getU32(&OffsetsOffset); ListScope HashScope(W, ("Hash 0x" + Twine::utohexstr(Hash)).str()); if (!AccelSection.isValidOffset(DataOffset)) { W.printString("Invalid section offset"); @@ -265,7 +265,7 @@ AppleAcceleratorTable::Entry::Entry( } void AppleAcceleratorTable::Entry::extract( - const AppleAcceleratorTable &AccelTable, uint32_t *Offset) { + const AppleAcceleratorTable &AccelTable, uint64_t *Offset) { dwarf::FormParams FormParams = {AccelTable.Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; @@ -302,7 +302,7 @@ Optional AppleAcceleratorTable::Entry::getTag() const { } AppleAcceleratorTable::ValueIterator::ValueIterator( - const AppleAcceleratorTable &AccelTable, unsigned Offset) + const AppleAcceleratorTable &AccelTable, uint64_t Offset) : AccelTable(&AccelTable), Current(AccelTable.HdrData), DataOffset(Offset) { if (!AccelTable.AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) return; @@ -333,25 +333,25 @@ AppleAcceleratorTable::equal_range(StringRef Key) const { // Find the bucket. unsigned HashValue = djbHash(Key); unsigned Bucket = HashValue % Hdr.BucketCount; - unsigned BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength; - unsigned HashesBase = BucketBase + Hdr.BucketCount * 4; - unsigned OffsetsBase = HashesBase + Hdr.HashCount * 4; + uint64_t BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength; + uint64_t HashesBase = BucketBase + Hdr.BucketCount * 4; + uint64_t OffsetsBase = HashesBase + Hdr.HashCount * 4; - unsigned BucketOffset = BucketBase + Bucket * 4; + uint64_t BucketOffset = BucketBase + Bucket * 4; unsigned Index = AccelSection.getU32(&BucketOffset); // Search through all hashes in the bucket. for (unsigned HashIdx = Index; HashIdx < Hdr.HashCount; ++HashIdx) { - unsigned HashOffset = HashesBase + HashIdx * 4; - unsigned OffsetsOffset = OffsetsBase + HashIdx * 4; + uint64_t HashOffset = HashesBase + HashIdx * 4; + uint64_t OffsetsOffset = OffsetsBase + HashIdx * 4; uint32_t Hash = AccelSection.getU32(&HashOffset); if (Hash % Hdr.BucketCount != Bucket) // We are already in the next bucket. break; - unsigned DataOffset = AccelSection.getU32(&OffsetsOffset); - unsigned StringOffset = AccelSection.getRelocatedValue(4, &DataOffset); + uint64_t DataOffset = AccelSection.getU32(&OffsetsOffset); + uint64_t StringOffset = AccelSection.getRelocatedValue(4, &DataOffset); if (!StringOffset) break; @@ -377,7 +377,7 @@ void DWARFDebugNames::Header::dump(ScopedPrinter &W) const { } Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS, - uint32_t *Offset) { + uint64_t *Offset) { // Check that we can read the fixed-size part. if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1)) return createStringError(errc::illegal_byte_sequence, @@ -437,7 +437,7 @@ DWARFDebugNames::Abbrev DWARFDebugNames::AbbrevMapInfo::getTombstoneKey() { } Expected -DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) { +DWARFDebugNames::NameIndex::extractAttributeEncoding(uint64_t *Offset) { if (*Offset >= EntriesBase) { return createStringError(errc::illegal_byte_sequence, "Incorrectly terminated abbreviation table."); @@ -449,7 +449,7 @@ DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) { } Expected> -DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) { +DWARFDebugNames::NameIndex::extractAttributeEncodings(uint64_t *Offset) { std::vector Result; for (;;) { auto AttrEncOr = extractAttributeEncoding(Offset); @@ -463,7 +463,7 @@ DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) { } Expected -DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) { +DWARFDebugNames::NameIndex::extractAbbrev(uint64_t *Offset) { if (*Offset >= EntriesBase) { return createStringError(errc::illegal_byte_sequence, "Incorrectly terminated abbreviation table."); @@ -482,7 +482,7 @@ DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) { Error DWARFDebugNames::NameIndex::extract() { const DWARFDataExtractor &AS = Section.AccelSection; - uint32_t Offset = Base; + uint64_t Offset = Base; if (Error E = Hdr.extract(AS, &Offset)) return E; @@ -577,27 +577,27 @@ std::error_code DWARFDebugNames::SentinelError::convertToErrorCode() const { return inconvertibleErrorCode(); } -uint32_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const { +uint64_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const { assert(CU < Hdr.CompUnitCount); - uint32_t Offset = CUsBase + 4 * CU; + uint64_t Offset = CUsBase + 4 * CU; return Section.AccelSection.getRelocatedValue(4, &Offset); } -uint32_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const { +uint64_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const { assert(TU < Hdr.LocalTypeUnitCount); - uint32_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU); + uint64_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU); return Section.AccelSection.getRelocatedValue(4, &Offset); } uint64_t DWARFDebugNames::NameIndex::getForeignTUSignature(uint32_t TU) const { assert(TU < Hdr.ForeignTypeUnitCount); - uint32_t Offset = + uint64_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) + 8 * TU; return Section.AccelSection.getU64(&Offset); } Expected -DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const { +DWARFDebugNames::NameIndex::getEntry(uint64_t *Offset) const { const DWARFDataExtractor &AS = Section.AccelSection; if (!AS.isValidOffset(*Offset)) return createStringError(errc::illegal_byte_sequence, @@ -625,12 +625,12 @@ DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const { DWARFDebugNames::NameTableEntry DWARFDebugNames::NameIndex::getNameTableEntry(uint32_t Index) const { assert(0 < Index && Index <= Hdr.NameCount); - uint32_t StringOffsetOffset = StringOffsetsBase + 4 * (Index - 1); - uint32_t EntryOffsetOffset = EntryOffsetsBase + 4 * (Index - 1); + uint64_t StringOffsetOffset = StringOffsetsBase + 4 * (Index - 1); + uint64_t EntryOffsetOffset = EntryOffsetsBase + 4 * (Index - 1); const DWARFDataExtractor &AS = Section.AccelSection; - uint32_t StringOffset = AS.getRelocatedValue(4, &StringOffsetOffset); - uint32_t EntryOffset = AS.getU32(&EntryOffsetOffset); + uint64_t StringOffset = AS.getRelocatedValue(4, &StringOffsetOffset); + uint64_t EntryOffset = AS.getU32(&EntryOffsetOffset); EntryOffset += EntriesBase; return {Section.StringSection, Index, StringOffset, EntryOffset}; } @@ -638,13 +638,13 @@ DWARFDebugNames::NameIndex::getNameTableEntry(uint32_t Index) const { uint32_t DWARFDebugNames::NameIndex::getBucketArrayEntry(uint32_t Bucket) const { assert(Bucket < Hdr.BucketCount); - uint32_t BucketOffset = BucketsBase + 4 * Bucket; + uint64_t BucketOffset = BucketsBase + 4 * Bucket; return Section.AccelSection.getU32(&BucketOffset); } uint32_t DWARFDebugNames::NameIndex::getHashArrayEntry(uint32_t Index) const { assert(0 < Index && Index <= Hdr.NameCount); - uint32_t HashOffset = HashesBase + 4 * (Index - 1); + uint64_t HashOffset = HashesBase + 4 * (Index - 1); return Section.AccelSection.getU32(&HashOffset); } @@ -653,8 +653,8 @@ uint32_t DWARFDebugNames::NameIndex::getHashArrayEntry(uint32_t Index) const { // it's not possible to recover this entry list (but the other lists may still // parse OK). bool DWARFDebugNames::NameIndex::dumpEntry(ScopedPrinter &W, - uint32_t *Offset) const { - uint32_t EntryId = *Offset; + uint64_t *Offset) const { + uint64_t EntryId = *Offset; auto EntryOr = getEntry(Offset); if (!EntryOr) { handleAllErrors(EntryOr.takeError(), [](const SentinelError &) {}, @@ -674,10 +674,10 @@ void DWARFDebugNames::NameIndex::dumpName(ScopedPrinter &W, if (Hash) W.printHex("Hash", *Hash); - W.startLine() << format("String: 0x%08x", NTE.getStringOffset()); + W.startLine() << format("String: 0x%08" PRIx64, NTE.getStringOffset()); W.getOStream() << " \"" << NTE.getString() << "\"\n"; - uint32_t EntryOffset = NTE.getEntryOffset(); + uint64_t EntryOffset = NTE.getEntryOffset(); while (dumpEntry(W, &EntryOffset)) /*empty*/; } @@ -685,7 +685,7 @@ void DWARFDebugNames::NameIndex::dumpName(ScopedPrinter &W, void DWARFDebugNames::NameIndex::dumpCUs(ScopedPrinter &W) const { ListScope CUScope(W, "Compilation Unit offsets"); for (uint32_t CU = 0; CU < Hdr.CompUnitCount; ++CU) - W.startLine() << format("CU[%u]: 0x%08x\n", CU, getCUOffset(CU)); + W.startLine() << format("CU[%u]: 0x%08" PRIx64 "\n", CU, getCUOffset(CU)); } void DWARFDebugNames::NameIndex::dumpLocalTUs(ScopedPrinter &W) const { @@ -694,7 +694,8 @@ void DWARFDebugNames::NameIndex::dumpLocalTUs(ScopedPrinter &W) const { ListScope TUScope(W, "Local Type Unit offsets"); for (uint32_t TU = 0; TU < Hdr.LocalTypeUnitCount; ++TU) - W.startLine() << format("LocalTU[%u]: 0x%08x\n", TU, getLocalTUOffset(TU)); + W.startLine() << format("LocalTU[%u]: 0x%08" PRIx64 "\n", TU, + getLocalTUOffset(TU)); } void DWARFDebugNames::NameIndex::dumpForeignTUs(ScopedPrinter &W) const { @@ -756,7 +757,7 @@ LLVM_DUMP_METHOD void DWARFDebugNames::NameIndex::dump(ScopedPrinter &W) const { } Error DWARFDebugNames::extract() { - uint32_t Offset = 0; + uint64_t Offset = 0; while (AccelSection.isValidOffset(Offset)) { NameIndex Next(*this, Offset); if (Error E = Next.extract()) @@ -778,7 +779,7 @@ LLVM_DUMP_METHOD void DWARFDebugNames::dump(raw_ostream &OS) const { NI.dump(W); } -Optional +Optional DWARFDebugNames::ValueIterator::findEntryOffsetInCurrentIndex() { const Header &Hdr = CurrentIndex->Hdr; if (Hdr.BucketCount == 0) { @@ -822,7 +823,7 @@ bool DWARFDebugNames::ValueIterator::getEntryAtCurrentOffset() { } bool DWARFDebugNames::ValueIterator::findInCurrentIndex() { - Optional Offset = findEntryOffsetInCurrentIndex(); + Optional Offset = findEntryOffsetInCurrentIndex(); if (!Offset) return false; DataOffset = *Offset; @@ -877,7 +878,7 @@ DWARFDebugNames::equal_range(StringRef Key) const { } const DWARFDebugNames::NameIndex * -DWARFDebugNames::getCUNameIndex(uint32_t CUOffset) { +DWARFDebugNames::getCUNameIndex(uint64_t CUOffset) { if (CUToNameIndex.size() == 0 && NameIndices.size() > 0) { for (const auto &NI : *this) { for (uint32_t CU = 0; CU < NI.getCUCount(); ++CU) diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp index 74cce42466dd..f59e49268288 100644 --- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp @@ -15,16 +15,18 @@ using namespace llvm; void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { - OS << format("0x%08x", getOffset()) << ": Compile Unit:" - << " length = " << format("0x%08x", getLength()) + OS << format("0x%08" PRIx64, getOffset()) << ": Compile Unit:" + << " length = " << format("0x%08" PRIx64, getLength()) << " version = " << format("0x%04x", getVersion()); if (getVersion() >= 5) OS << " unit_type = " << dwarf::UnitTypeString(getUnitType()); - OS << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset()) + OS << " abbr_offset = " + << format("0x%04" PRIx64, getAbbreviations()->getOffset()) << " addr_size = " << format("0x%02x", getAddressByteSize()); if (getVersion() >= 5 && getUnitType() != dwarf::DW_UT_compile) OS << " DWO_id = " << format("0x%016" PRIx64, *getDWOId()); - OS << " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n"; + OS << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset()) + << ")\n"; if (DWARFDie CUDie = getUnitDIE(false)) CUDie.dump(OS, 0, DumpOpts); diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 5ede9bf59619..c06d85d50609 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -138,7 +138,7 @@ static void dumpDWARFv5StringOffsetsSection( DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0); DataExtractor StrData(StringSection, LittleEndian, 0); uint64_t SectionSize = StringOffsetsSection.Data.size(); - uint32_t Offset = 0; + uint64_t Offset = 0; for (auto &Contribution : Contributions) { // Report an ill-formed contribution. if (!Contribution) { @@ -166,10 +166,10 @@ static void dumpDWARFv5StringOffsetsSection( } // Report a gap in the table. if (Offset < ContributionHeader) { - OS << format("0x%8.8x: Gap, length = ", Offset); + OS << format("0x%8.8" PRIx64 ": Gap, length = ", Offset); OS << (ContributionHeader - Offset) << "\n"; } - OS << format("0x%8.8x: ", (uint32_t)ContributionHeader); + OS << format("0x%8.8" PRIx64 ": ", ContributionHeader); // In DWARF v5 the contribution size in the descriptor does not equal // the originally encoded length (it does not contain the length of the // version field and the padding, a total of 4 bytes). Add them back in @@ -181,26 +181,19 @@ static void dumpDWARFv5StringOffsetsSection( Offset = Contribution->Base; unsigned EntrySize = Contribution->getDwarfOffsetByteSize(); while (Offset - Contribution->Base < Contribution->Size) { - OS << format("0x%8.8x: ", Offset); - // FIXME: We can only extract strings if the offset fits in 32 bits. + OS << format("0x%8.8" PRIx64 ": ", Offset); uint64_t StringOffset = StrOffsetExt.getRelocatedValue(EntrySize, &Offset); - // Extract the string if we can and display it. Otherwise just report - // the offset. - if (StringOffset <= std::numeric_limits::max()) { - uint32_t StringOffset32 = (uint32_t)StringOffset; - OS << format("%8.8x ", StringOffset32); - const char *S = StrData.getCStr(&StringOffset32); - if (S) - OS << format("\"%s\"", S); - } else - OS << format("%16.16" PRIx64 " ", StringOffset); + OS << format("%8.8" PRIx64 " ", StringOffset); + const char *S = StrData.getCStr(&StringOffset); + if (S) + OS << format("\"%s\"", S); OS << "\n"; } } // Report a gap at the end of the table. if (Offset < SectionSize) { - OS << format("0x%8.8x: Gap, length = ", Offset); + OS << format("0x%8.8" PRIx64 ": Gap, length = ", Offset); OS << (SectionSize - Offset) << "\n"; } } @@ -225,7 +218,7 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, StringSection, Units, LittleEndian); else { DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); - uint32_t offset = 0; + uint64_t offset = 0; uint64_t size = StringOffsetsSection.Data.size(); // Ensure that size is a multiple of the size of an entry. if (size & ((uint64_t)(sizeof(uint32_t) - 1))) { @@ -235,9 +228,9 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, } DataExtractor StrData(StringSection, LittleEndian, 0); while (offset < size) { - OS << format("0x%8.8x: ", offset); - uint32_t StringOffset = strOffsetExt.getU32(&offset); - OS << format("%8.8x ", StringOffset); + OS << format("0x%8.8" PRIx64 ": ", offset); + uint64_t StringOffset = strOffsetExt.getU32(&offset); + OS << format("%8.8" PRIx64 " ", StringOffset); const char *S = StrData.getCStr(&StringOffset); if (S) OS << format("\"%s\"", S); @@ -250,10 +243,10 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData, DIDumpOptions DumpOpts, uint16_t Version, uint8_t AddrSize) { - uint32_t Offset = 0; + uint64_t Offset = 0; while (AddrData.isValidOffset(Offset)) { DWARFDebugAddrTable AddrTable; - uint32_t TableOffset = Offset; + uint64_t TableOffset = Offset; if (Error Err = AddrTable.extract(AddrData, &Offset, Version, AddrSize, DWARFContext::dumpWarning)) { WithColor::error() << toString(std::move(Err)) << '\n'; @@ -261,8 +254,7 @@ static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData, // could be read. If it couldn't, stop reading the section. if (!AddrTable.hasValidLength()) break; - uint64_t Length = AddrTable.getLength(); - Offset = TableOffset + Length; + Offset = TableOffset + AddrTable.getLength(); } else { AddrTable.dump(OS, DumpOpts); } @@ -275,10 +267,10 @@ static void dumpRnglistsSection( llvm::function_ref(uint32_t)> LookupPooledAddress, DIDumpOptions DumpOpts) { - uint32_t Offset = 0; + uint64_t Offset = 0; while (rnglistData.isValidOffset(Offset)) { llvm::DWARFDebugRnglistTable Rnglists; - uint32_t TableOffset = Offset; + uint64_t TableOffset = Offset; if (Error Err = Rnglists.extract(rnglistData, &Offset)) { WithColor::error() << toString(std::move(Err)) << '\n'; uint64_t Length = Rnglists.length(); @@ -297,21 +289,25 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, DWARFDataExtractor Data, const MCRegisterInfo *MRI, Optional DumpOffset) { - uint32_t Offset = 0; - DWARFDebugLoclists Loclists; + uint64_t Offset = 0; - DWARFListTableHeader Header(".debug_loclists", "locations"); - if (Error E = Header.extract(Data, &Offset)) { - WithColor::error() << toString(std::move(E)) << '\n'; - return; - } + while (Data.isValidOffset(Offset)) { + DWARFListTableHeader Header(".debug_loclists", "locations"); + if (Error E = Header.extract(Data, &Offset)) { + WithColor::error() << toString(std::move(E)) << '\n'; + return; + } - Header.dump(OS, DumpOpts); - DataExtractor LocData(Data.getData().drop_front(Offset), - Data.isLittleEndian(), Header.getAddrSize()); + Header.dump(OS, DumpOpts); + DataExtractor LocData(Data.getData(), + Data.isLittleEndian(), Header.getAddrSize()); - Loclists.parse(LocData, Header.getVersion()); - Loclists.dump(OS, 0, MRI, DumpOffset); + DWARFDebugLoclists Loclists; + uint64_t EndOffset = Header.length() + Header.getHeaderOffset(); + Loclists.parse(LocData, Offset, EndOffset, Header.getVersion()); + Loclists.dump(OS, 0, MRI, DumpOpts, DumpOffset); + Offset = EndOffset; + } } void DWARFContext::dump( @@ -386,7 +382,7 @@ void DWARFContext::dump( if (const auto *Off = shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc, DObj->getLocSection().Data)) { - getDebugLoc()->dump(OS, getRegisterInfo(), *Off); + getDebugLoc()->dump(OS, getRegisterInfo(), DumpOpts, *Off); } if (const auto *Off = shouldDump(Explicit, ".debug_loclists", DIDT_ID_DebugLoclists, @@ -398,15 +394,15 @@ void DWARFContext::dump( if (const auto *Off = shouldDump(ExplicitDWO, ".debug_loc.dwo", DIDT_ID_DebugLoc, DObj->getLocDWOSection().Data)) { - getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), *Off); + getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), DumpOpts, *Off); } if (const auto *Off = shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame, - DObj->getDebugFrameSection())) + DObj->getFrameSection().Data)) getDebugFrame()->dump(OS, getRegisterInfo(), *Off); if (const auto *Off = shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame, - DObj->getEHFrameSection())) + DObj->getEHFrameSection().Data)) getEHFrame()->dump(OS, getRegisterInfo(), *Off); if (DumpType & DIDT_DebugMacro) { @@ -417,9 +413,9 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_aranges", DIDT_ID_DebugAranges, - DObj->getARangeSection())) { - uint32_t offset = 0; - DataExtractor arangesData(DObj->getARangeSection(), isLittleEndian(), 0); + DObj->getArangesSection())) { + uint64_t offset = 0; + DataExtractor arangesData(DObj->getArangesSection(), isLittleEndian(), 0); DWARFDebugArangeSet set; while (set.extract(arangesData, &offset)) set.dump(OS); @@ -433,7 +429,8 @@ void DWARFContext::dump( Parser.skip(dumpWarning); continue; } - OS << "debug_line[" << format("0x%8.8x", Parser.getOffset()) << "]\n"; + OS << "debug_line[" << format("0x%8.8" PRIx64, Parser.getOffset()) + << "]\n"; if (DumpOpts.Verbose) { Parser.parseNext(dumpWarning, dumpWarning, &OS); } else { @@ -474,32 +471,32 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_str", DIDT_ID_DebugStr, - DObj->getStringSection())) { - DataExtractor strData(DObj->getStringSection(), isLittleEndian(), 0); - uint32_t offset = 0; - uint32_t strOffset = 0; + DObj->getStrSection())) { + DataExtractor strData(DObj->getStrSection(), isLittleEndian(), 0); + uint64_t offset = 0; + uint64_t strOffset = 0; while (const char *s = strData.getCStr(&offset)) { - OS << format("0x%8.8x: \"%s\"\n", strOffset, s); + OS << format("0x%8.8" PRIx64 ": \"%s\"\n", strOffset, s); strOffset = offset; } } if (shouldDump(ExplicitDWO, ".debug_str.dwo", DIDT_ID_DebugStr, - DObj->getStringDWOSection())) { - DataExtractor strDWOData(DObj->getStringDWOSection(), isLittleEndian(), 0); - uint32_t offset = 0; - uint32_t strDWOOffset = 0; + DObj->getStrDWOSection())) { + DataExtractor strDWOData(DObj->getStrDWOSection(), isLittleEndian(), 0); + uint64_t offset = 0; + uint64_t strDWOOffset = 0; while (const char *s = strDWOData.getCStr(&offset)) { - OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s); + OS << format("0x%8.8" PRIx64 ": \"%s\"\n", strDWOOffset, s); strDWOOffset = offset; } } if (shouldDump(Explicit, ".debug_line_str", DIDT_ID_DebugLineStr, - DObj->getLineStringSection())) { - DataExtractor strData(DObj->getLineStringSection(), isLittleEndian(), 0); - uint32_t offset = 0; - uint32_t strOffset = 0; + DObj->getLineStrSection())) { + DataExtractor strData(DObj->getLineStrSection(), isLittleEndian(), 0); + uint64_t offset = 0; + uint64_t strOffset = 0; while (const char *s = strData.getCStr(&offset)) { - OS << format("0x%8.8x: \"", strOffset); + OS << format("0x%8.8" PRIx64 ": \"", strOffset); OS.write_escaped(s); OS << "\"\n"; strOffset = offset; @@ -514,11 +511,11 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_ranges", DIDT_ID_DebugRanges, - DObj->getRangeSection().Data)) { + DObj->getRangesSection().Data)) { uint8_t savedAddressByteSize = getCUAddrSize(); - DWARFDataExtractor rangesData(*DObj, DObj->getRangeSection(), + DWARFDataExtractor rangesData(*DObj, DObj->getRangesSection(), isLittleEndian(), savedAddressByteSize); - uint32_t offset = 0; + uint64_t offset = 0; DWARFDebugRangeList rangeList; while (rangesData.isValidOffset(offset)) { if (Error E = rangeList.extract(rangesData, &offset)) { @@ -552,38 +549,38 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_pubnames", DIDT_ID_DebugPubnames, - DObj->getPubNamesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getPubNamesSection(), isLittleEndian(), false) + DObj->getPubnamesSection().Data)) + DWARFDebugPubTable(*DObj, DObj->getPubnamesSection(), isLittleEndian(), false) .dump(OS); if (shouldDump(Explicit, ".debug_pubtypes", DIDT_ID_DebugPubtypes, - DObj->getPubTypesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getPubTypesSection(), isLittleEndian(), false) + DObj->getPubtypesSection().Data)) + DWARFDebugPubTable(*DObj, DObj->getPubtypesSection(), isLittleEndian(), false) .dump(OS); if (shouldDump(Explicit, ".debug_gnu_pubnames", DIDT_ID_DebugGnuPubnames, - DObj->getGnuPubNamesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getGnuPubNamesSection(), isLittleEndian(), + DObj->getGnuPubnamesSection().Data)) + DWARFDebugPubTable(*DObj, DObj->getGnuPubnamesSection(), isLittleEndian(), true /* GnuStyle */) .dump(OS); if (shouldDump(Explicit, ".debug_gnu_pubtypes", DIDT_ID_DebugGnuPubtypes, - DObj->getGnuPubTypesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getGnuPubTypesSection(), isLittleEndian(), + DObj->getGnuPubtypesSection().Data)) + DWARFDebugPubTable(*DObj, DObj->getGnuPubtypesSection(), isLittleEndian(), true /* GnuStyle */) .dump(OS); if (shouldDump(Explicit, ".debug_str_offsets", DIDT_ID_DebugStrOffsets, - DObj->getStringOffsetSection().Data)) + DObj->getStrOffsetsSection().Data)) dumpStringOffsetsSection(OS, "debug_str_offsets", *DObj, - DObj->getStringOffsetSection(), - DObj->getStringSection(), normal_units(), + DObj->getStrOffsetsSection(), + DObj->getStrSection(), normal_units(), isLittleEndian(), getMaxVersion()); if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets, - DObj->getStringOffsetDWOSection().Data)) + DObj->getStrOffsetsDWOSection().Data)) dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", *DObj, - DObj->getStringOffsetDWOSection(), - DObj->getStringDWOSection(), dwo_units(), + DObj->getStrOffsetsDWOSection(), + DObj->getStrDWOSection(), dwo_units(), isLittleEndian(), getMaxDWOVersion()); if (shouldDump(Explicit, ".gdb_index", DIDT_ID_GdbIndex, @@ -607,7 +604,7 @@ void DWARFContext::dump( DObj->getAppleObjCSection().Data)) getAppleObjC().dump(OS); if (shouldDump(Explicit, ".debug_names", DIDT_ID_DebugNames, - DObj->getDebugNamesSection().Data)) + DObj->getNamesSection().Data)) getDebugNames().dump(OS); } @@ -641,7 +638,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) { return nullptr; } -DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { +DWARFDie DWARFContext::getDIEForOffset(uint64_t Offset) { parseNormalUnits(); if (auto *CU = NormalUnits.getUnitForOffset(Offset)) return CU->getDIEForOffset(Offset); @@ -667,7 +664,7 @@ const DWARFUnitIndex &DWARFContext::getCUIndex() { DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0); - CUIndex = llvm::make_unique(DW_SECT_INFO); + CUIndex = std::make_unique(DW_SECT_INFO); CUIndex->parse(CUIndexData); return *CUIndex; } @@ -678,7 +675,7 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() { DataExtractor TUIndexData(DObj->getTUIndexSection(), isLittleEndian(), 0); - TUIndex = llvm::make_unique(DW_SECT_TYPES); + TUIndex = std::make_unique(DW_SECT_TYPES); TUIndex->parse(TUIndexData); return *TUIndex; } @@ -688,7 +685,7 @@ DWARFGdbIndex &DWARFContext::getGdbIndex() { return *GdbIndex; DataExtractor GdbIndexData(DObj->getGdbIndexSection(), true /*LE*/, 0); - GdbIndex = llvm::make_unique(); + GdbIndex = std::make_unique(); GdbIndex->parse(GdbIndexData); return *GdbIndex; } @@ -740,7 +737,7 @@ const DWARFDebugLoclists *DWARFContext::getDebugLocDWO() { // Use version 4. DWO does not support the DWARF v5 .debug_loclists yet and // that means we are parsing the new style .debug_loc (pre-standatized version // of the .debug_loclists). - LocDWO->parse(LocData, 4 /* Version */); + LocDWO->parse(LocData, 0, LocData.getData().size(), 4 /* Version */); return LocDWO.get(); } @@ -766,7 +763,7 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() { // provides this information). This problem is fixed in DWARFv4 // See this dwarf-discuss discussion for more details: // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html - DWARFDataExtractor debugFrameData(DObj->getDebugFrameSection(), + DWARFDataExtractor debugFrameData(*DObj, DObj->getFrameSection(), isLittleEndian(), DObj->getAddressSize()); DebugFrame.reset(new DWARFDebugFrame(getArch(), false /* IsEH */)); DebugFrame->parse(debugFrameData); @@ -777,8 +774,8 @@ const DWARFDebugFrame *DWARFContext::getEHFrame() { if (EHFrame) return EHFrame.get(); - DWARFDataExtractor debugFrameData(DObj->getEHFrameSection(), isLittleEndian(), - DObj->getAddressSize()); + DWARFDataExtractor debugFrameData(*DObj, DObj->getEHFrameSection(), + isLittleEndian(), DObj->getAddressSize()); DebugFrame.reset(new DWARFDebugFrame(getArch(), true /* IsEH */)); DebugFrame->parse(debugFrameData); return DebugFrame.get(); @@ -809,29 +806,29 @@ static T &getAccelTable(std::unique_ptr &Cache, const DWARFObject &Obj, } const DWARFDebugNames &DWARFContext::getDebugNames() { - return getAccelTable(Names, *DObj, DObj->getDebugNamesSection(), - DObj->getStringSection(), isLittleEndian()); + return getAccelTable(Names, *DObj, DObj->getNamesSection(), + DObj->getStrSection(), isLittleEndian()); } const AppleAcceleratorTable &DWARFContext::getAppleNames() { return getAccelTable(AppleNames, *DObj, DObj->getAppleNamesSection(), - DObj->getStringSection(), isLittleEndian()); + DObj->getStrSection(), isLittleEndian()); } const AppleAcceleratorTable &DWARFContext::getAppleTypes() { return getAccelTable(AppleTypes, *DObj, DObj->getAppleTypesSection(), - DObj->getStringSection(), isLittleEndian()); + DObj->getStrSection(), isLittleEndian()); } const AppleAcceleratorTable &DWARFContext::getAppleNamespaces() { return getAccelTable(AppleNamespaces, *DObj, DObj->getAppleNamespacesSection(), - DObj->getStringSection(), isLittleEndian()); + DObj->getStrSection(), isLittleEndian()); } const AppleAcceleratorTable &DWARFContext::getAppleObjC() { return getAccelTable(AppleObjC, *DObj, DObj->getAppleObjCSection(), - DObj->getStringSection(), isLittleEndian()); + DObj->getStrSection(), isLittleEndian()); } const DWARFDebugLine::LineTable * @@ -858,7 +855,7 @@ Expected DWARFContext::getLineTableForUnit( if (!Offset) return nullptr; // No line table for this compile unit. - uint32_t stmtOffset = *Offset + U->getLineTableOffset(); + uint64_t stmtOffset = *Offset + U->getLineTableOffset(); // See if the line table is cached. if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset)) return lt; @@ -898,7 +895,7 @@ void DWARFContext::parseDWOUnits(bool Lazy) { }); } -DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { +DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint64_t Offset) { parseNormalUnits(); return dyn_cast_or_null( NormalUnits.getUnitForOffset(Offset)); @@ -906,7 +903,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { // First, get the offset of the compile unit. - uint32_t CUOffset = getDebugAranges()->findAddress(Address); + uint64_t CUOffset = getDebugAranges()->findAddress(Address); // Retrieve the compile unit. return getCompileUnitForOffset(CUOffset); } @@ -1118,8 +1115,8 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange( if (!CU) return Lines; - std::string FunctionName = ""; uint32_t StartLine = 0; + std::string FunctionName(DILineInfo::BadString); getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, FunctionName, StartLine); @@ -1379,46 +1376,50 @@ class DWARFObjInMemory final : public DWARFObject { InfoSectionMap TypesDWOSections; DWARFSectionMap LocSection; - DWARFSectionMap LocListsSection; + DWARFSectionMap LoclistsSection; DWARFSectionMap LineSection; - DWARFSectionMap RangeSection; + DWARFSectionMap RangesSection; DWARFSectionMap RnglistsSection; - DWARFSectionMap StringOffsetSection; + DWARFSectionMap StrOffsetsSection; DWARFSectionMap LineDWOSection; + DWARFSectionMap FrameSection; + DWARFSectionMap EHFrameSection; DWARFSectionMap LocDWOSection; - DWARFSectionMap StringOffsetDWOSection; - DWARFSectionMap RangeDWOSection; + DWARFSectionMap StrOffsetsDWOSection; + DWARFSectionMap RangesDWOSection; DWARFSectionMap RnglistsDWOSection; DWARFSectionMap AddrSection; DWARFSectionMap AppleNamesSection; DWARFSectionMap AppleTypesSection; DWARFSectionMap AppleNamespacesSection; DWARFSectionMap AppleObjCSection; - DWARFSectionMap DebugNamesSection; - DWARFSectionMap PubNamesSection; - DWARFSectionMap PubTypesSection; - DWARFSectionMap GnuPubNamesSection; - DWARFSectionMap GnuPubTypesSection; + DWARFSectionMap NamesSection; + DWARFSectionMap PubnamesSection; + DWARFSectionMap PubtypesSection; + DWARFSectionMap GnuPubnamesSection; + DWARFSectionMap GnuPubtypesSection; DWARFSectionMap *mapNameToDWARFSection(StringRef Name) { return StringSwitch(Name) .Case("debug_loc", &LocSection) - .Case("debug_loclists", &LocListsSection) + .Case("debug_loclists", &LoclistsSection) .Case("debug_line", &LineSection) - .Case("debug_str_offsets", &StringOffsetSection) - .Case("debug_ranges", &RangeSection) + .Case("debug_frame", &FrameSection) + .Case("eh_frame", &EHFrameSection) + .Case("debug_str_offsets", &StrOffsetsSection) + .Case("debug_ranges", &RangesSection) .Case("debug_rnglists", &RnglistsSection) .Case("debug_loc.dwo", &LocDWOSection) .Case("debug_line.dwo", &LineDWOSection) - .Case("debug_names", &DebugNamesSection) + .Case("debug_names", &NamesSection) .Case("debug_rnglists.dwo", &RnglistsDWOSection) - .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) + .Case("debug_str_offsets.dwo", &StrOffsetsDWOSection) .Case("debug_addr", &AddrSection) .Case("apple_names", &AppleNamesSection) - .Case("debug_pubnames", &PubNamesSection) - .Case("debug_pubtypes", &PubTypesSection) - .Case("debug_gnu_pubnames", &GnuPubNamesSection) - .Case("debug_gnu_pubtypes", &GnuPubTypesSection) + .Case("debug_pubnames", &PubnamesSection) + .Case("debug_pubtypes", &PubtypesSection) + .Case("debug_gnu_pubnames", &GnuPubnamesSection) + .Case("debug_gnu_pubtypes", &GnuPubtypesSection) .Case("apple_types", &AppleTypesSection) .Case("apple_namespaces", &AppleNamespacesSection) .Case("apple_namespac", &AppleNamespacesSection) @@ -1427,17 +1428,15 @@ class DWARFObjInMemory final : public DWARFObject { } StringRef AbbrevSection; - StringRef ARangeSection; - StringRef DebugFrameSection; - StringRef EHFrameSection; - StringRef StringSection; + StringRef ArangesSection; + StringRef StrSection; StringRef MacinfoSection; StringRef AbbrevDWOSection; - StringRef StringDWOSection; + StringRef StrDWOSection; StringRef CUIndexSection; StringRef GdbIndexSection; StringRef TUIndexSection; - StringRef LineStringSection; + StringRef LineStrSection; // A deque holding section data whose iterators are not invalidated when // new decompressed sections are inserted at the end. @@ -1448,17 +1447,15 @@ class DWARFObjInMemory final : public DWARFObject { return &Sec->Data; return StringSwitch(Name) .Case("debug_abbrev", &AbbrevSection) - .Case("debug_aranges", &ARangeSection) - .Case("debug_frame", &DebugFrameSection) - .Case("eh_frame", &EHFrameSection) - .Case("debug_str", &StringSection) + .Case("debug_aranges", &ArangesSection) + .Case("debug_str", &StrSection) .Case("debug_macinfo", &MacinfoSection) .Case("debug_abbrev.dwo", &AbbrevDWOSection) - .Case("debug_str.dwo", &StringDWOSection) + .Case("debug_str.dwo", &StrDWOSection) .Case("debug_cu_index", &CUIndexSection) .Case("debug_tu_index", &TUIndexSection) .Case("gdb_index", &GdbIndexSection) - .Case("debug_line_str", &LineStringSection) + .Case("debug_line_str", &LineStrSection) // Any more debug info sections go here. .Default(nullptr); } @@ -1513,7 +1510,11 @@ public: StringMap SectionAmountMap; for (const SectionRef &Section : Obj.sections()) { StringRef Name; - Section.getName(Name); + if (auto NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + ++SectionAmountMap[Name]; SectionNames.push_back({ Name, true }); @@ -1526,10 +1527,19 @@ public: continue; StringRef Data; - section_iterator RelocatedSection = Section.getRelocatedSection(); + Expected SecOrErr = Section.getRelocatedSection(); + if (!SecOrErr) { + ErrorPolicy EP = HandleError(createError( + "failed to get relocated section: ", SecOrErr.takeError())); + if (EP == ErrorPolicy::Halt) + return; + continue; + } + // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. + section_iterator RelocatedSection = *SecOrErr; if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) { Expected E = Section.getContents(); if (E) @@ -1560,7 +1570,7 @@ public: *SectionData = Data; if (Name == "debug_ranges") { // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection.Data = Data; + RangesDWOSection.Data = Data; } } else if (Name == "debug_info") { // Find debug_info and debug_types data by section rather than name as @@ -1578,12 +1588,15 @@ public: continue; StringRef RelSecName; - StringRef RelSecData; - RelocatedSection->getName(RelSecName); + if (auto NameOrErr = RelocatedSection->getName()) + RelSecName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); // If the section we're relocating was relocated already by the JIT, // then we used the relocated version above, so we do not need to process // relocations for it now. + StringRef RelSecData; if (L && L->getLoadedSectionContents(*RelocatedSection, RelSecData)) continue; @@ -1710,12 +1723,12 @@ public: const DWARFSection &getLocDWOSection() const override { return LocDWOSection; } - StringRef getStringDWOSection() const override { return StringDWOSection; } - const DWARFSection &getStringOffsetDWOSection() const override { - return StringOffsetDWOSection; + StringRef getStrDWOSection() const override { return StrDWOSection; } + const DWARFSection &getStrOffsetsDWOSection() const override { + return StrOffsetsDWOSection; } - const DWARFSection &getRangeDWOSection() const override { - return RangeDWOSection; + const DWARFSection &getRangesDWOSection() const override { + return RangesDWOSection; } const DWARFSection &getRnglistsDWOSection() const override { return RnglistsDWOSection; @@ -1726,10 +1739,10 @@ public: StringRef getTUIndexSection() const override { return TUIndexSection; } // DWARF v5 - const DWARFSection &getStringOffsetSection() const override { - return StringOffsetSection; + const DWARFSection &getStrOffsetsSection() const override { + return StrOffsetsSection; } - StringRef getLineStringSection() const override { return LineStringSection; } + StringRef getLineStrSection() const override { return LineStrSection; } // Sections for DWARF5 split dwarf proposal. void forEachInfoDWOSections( @@ -1745,24 +1758,28 @@ public: StringRef getAbbrevSection() const override { return AbbrevSection; } const DWARFSection &getLocSection() const override { return LocSection; } - const DWARFSection &getLoclistsSection() const override { return LocListsSection; } - StringRef getARangeSection() const override { return ARangeSection; } - StringRef getDebugFrameSection() const override { return DebugFrameSection; } - StringRef getEHFrameSection() const override { return EHFrameSection; } + const DWARFSection &getLoclistsSection() const override { return LoclistsSection; } + StringRef getArangesSection() const override { return ArangesSection; } + const DWARFSection &getFrameSection() const override { + return FrameSection; + } + const DWARFSection &getEHFrameSection() const override { + return EHFrameSection; + } const DWARFSection &getLineSection() const override { return LineSection; } - StringRef getStringSection() const override { return StringSection; } - const DWARFSection &getRangeSection() const override { return RangeSection; } + StringRef getStrSection() const override { return StrSection; } + const DWARFSection &getRangesSection() const override { return RangesSection; } const DWARFSection &getRnglistsSection() const override { return RnglistsSection; } StringRef getMacinfoSection() const override { return MacinfoSection; } - const DWARFSection &getPubNamesSection() const override { return PubNamesSection; } - const DWARFSection &getPubTypesSection() const override { return PubTypesSection; } - const DWARFSection &getGnuPubNamesSection() const override { - return GnuPubNamesSection; + const DWARFSection &getPubnamesSection() const override { return PubnamesSection; } + const DWARFSection &getPubtypesSection() const override { return PubtypesSection; } + const DWARFSection &getGnuPubnamesSection() const override { + return GnuPubnamesSection; } - const DWARFSection &getGnuPubTypesSection() const override { - return GnuPubTypesSection; + const DWARFSection &getGnuPubtypesSection() const override { + return GnuPubtypesSection; } const DWARFSection &getAppleNamesSection() const override { return AppleNamesSection; @@ -1776,8 +1793,8 @@ public: const DWARFSection &getAppleObjCSection() const override { return AppleObjCSection; } - const DWARFSection &getDebugNamesSection() const override { - return DebugNamesSection; + const DWARFSection &getNamesSection() const override { + return NamesSection; } StringRef getFileName() const override { return FileName; } @@ -1799,16 +1816,16 @@ std::unique_ptr DWARFContext::create(const object::ObjectFile &Obj, const LoadedObjectInfo *L, function_ref HandleError, std::string DWPName) { - auto DObj = llvm::make_unique(Obj, L, HandleError); - return llvm::make_unique(std::move(DObj), std::move(DWPName)); + auto DObj = std::make_unique(Obj, L, HandleError); + return std::make_unique(std::move(DObj), std::move(DWPName)); } std::unique_ptr DWARFContext::create(const StringMap> &Sections, uint8_t AddrSize, bool isLittleEndian) { auto DObj = - llvm::make_unique(Sections, AddrSize, isLittleEndian); - return llvm::make_unique(std::move(DObj), ""); + std::make_unique(Sections, AddrSize, isLittleEndian); + return std::make_unique(std::move(DObj), ""); } Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) { diff --git a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp index b9adf8cb1d99..53e676bc7031 100644 --- a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp +++ b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -12,14 +12,15 @@ using namespace llvm; -uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off, - uint64_t *SecNdx) const { +uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint64_t *Off, + uint64_t *SecNdx, + Error *Err) const { if (SecNdx) *SecNdx = object::SectionedAddress::UndefSection; if (!Section) - return getUnsigned(Off, Size); + return getUnsigned(Off, Size, Err); Optional E = Obj->find(*Section, *Off); - uint64_t A = getUnsigned(Off, Size); + uint64_t A = getUnsigned(Off, Size, Err); if (!E) return A; if (SecNdx) @@ -31,13 +32,13 @@ uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off, } Optional -DWARFDataExtractor::getEncodedPointer(uint32_t *Offset, uint8_t Encoding, +DWARFDataExtractor::getEncodedPointer(uint64_t *Offset, uint8_t Encoding, uint64_t PCRelOffset) const { if (Encoding == dwarf::DW_EH_PE_omit) return None; uint64_t Result = 0; - uint32_t OldOffset = *Offset; + uint64_t OldOffset = *Offset; // First get value switch (Encoding & 0x0F) { case dwarf::DW_EH_PE_absptr: diff --git a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp index 31b324e5eb27..4afac2f99503 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -26,9 +26,9 @@ void DWARFAbbreviationDeclarationSet::clear() { } bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { clear(); - const uint32_t BeginOffset = *OffsetPtr; + const uint64_t BeginOffset = *OffsetPtr; Offset = BeginOffset; DWARFAbbreviationDeclaration AbbrDecl; uint32_t PrevAbbrCode = 0; @@ -82,12 +82,12 @@ void DWARFDebugAbbrev::extract(DataExtractor Data) { void DWARFDebugAbbrev::parse() const { if (!Data) return; - uint32_t Offset = 0; + uint64_t Offset = 0; auto I = AbbrDeclSets.begin(); while (Data->isValidOffset(Offset)) { while (I != AbbrDeclSets.end() && I->first < Offset) ++I; - uint32_t CUAbbrOffset = Offset; + uint64_t CUAbbrOffset = Offset; DWARFAbbreviationDeclarationSet AbbrDecls; if (!AbbrDecls.extract(*Data, &Offset)) break; @@ -124,7 +124,7 @@ DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const { } if (Data && CUAbbrOffset < Data->getData().size()) { - uint32_t Offset = CUAbbrOffset; + uint64_t Offset = CUAbbrOffset; DWARFAbbreviationDeclarationSet AbbrDecls; if (!AbbrDecls.extract(*Data, &Offset)) return nullptr; diff --git a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp index 58626539bba4..f71543799e28 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp @@ -19,7 +19,7 @@ void DWARFDebugAddrTable::clear() { } Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, - uint32_t *OffsetPtr, + uint64_t *OffsetPtr, uint16_t Version, uint8_t AddrSize, std::function WarnCallback) { @@ -30,7 +30,7 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, return createStringError(errc::invalid_argument, "section is not large enough to contain a " ".debug_addr table length at offset 0x%" - PRIx32, *OffsetPtr); + PRIx64, *OffsetPtr); uint16_t UnitVersion; if (Version == 0) { WarnCallback(createStringError(errc::invalid_argument, @@ -44,28 +44,28 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, Format = dwarf::DwarfFormat::DWARF32; if (UnitVersion >= 5) { HeaderData.Length = Data.getU32(OffsetPtr); - if (HeaderData.Length == 0xffffffffu) { + if (HeaderData.Length == dwarf::DW_LENGTH_DWARF64) { invalidateLength(); return createStringError(errc::not_supported, - "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx32, + "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx64, HeaderOffset); } if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) { uint32_t TmpLength = getLength(); invalidateLength(); return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has too small length (0x%" PRIx32 ") to contain a complete header", HeaderOffset, TmpLength); } - uint32_t End = HeaderOffset + getLength(); + uint64_t End = HeaderOffset + getLength(); if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) { uint32_t TmpLength = getLength(); invalidateLength(); return createStringError(errc::invalid_argument, "section is not large enough to contain a .debug_addr table " - "of length 0x%" PRIx32 " at offset 0x%" PRIx32, + "of length 0x%" PRIx32 " at offset 0x%" PRIx64, TmpLength, HeaderOffset); } @@ -88,7 +88,7 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, // and consists only of a series of addresses. if (HeaderData.Version > 5) { return createStringError(errc::not_supported, "version %" PRIu16 - " of .debug_addr section at offset 0x%" PRIx32 " is not supported", + " of .debug_addr section at offset 0x%" PRIx64 " is not supported", HeaderData.Version, HeaderOffset); } // FIXME: For now we just treat version mismatch as an error, @@ -97,19 +97,19 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, // attribute in the info table. if (HeaderData.Version != UnitVersion) return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has version %" PRIu16 " which is different from the version suggested" " by the DWARF unit header: %" PRIu16, HeaderOffset, HeaderData.Version, UnitVersion); if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8) return createStringError(errc::not_supported, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has unsupported address size %" PRIu8, HeaderOffset, HeaderData.AddrSize); if (HeaderData.AddrSize != AddrSize && AddrSize != 0) return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has address size %" PRIu8 " which is different from CU address size %" PRIu8, HeaderOffset, HeaderData.AddrSize, AddrSize); @@ -117,13 +117,13 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, // TODO: add support for non-zero segment selector size. if (HeaderData.SegSize != 0) return createStringError(errc::not_supported, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has unsupported segment selector size %" PRIu8, HeaderOffset, HeaderData.SegSize); if (DataSize % HeaderData.AddrSize != 0) { invalidateLength(); return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " contains data of size %" PRIu32 " which is not a multiple of addr size %" PRIu8, HeaderOffset, DataSize, HeaderData.AddrSize); @@ -162,7 +162,7 @@ Expected DWARFDebugAddrTable::getAddrEntry(uint32_t Index) const { return Addrs[Index]; return createStringError(errc::invalid_argument, "Index %" PRIu32 " is out of range of the " - ".debug_addr table at offset 0x%" PRIx32, + ".debug_addr table at offset 0x%" PRIx64, Index, HeaderOffset); } diff --git a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp index 6551b61accb8..200b2d52a02b 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp @@ -24,13 +24,13 @@ void DWARFDebugArangeSet::Descriptor::dump(raw_ostream &OS, } void DWARFDebugArangeSet::clear() { - Offset = -1U; + Offset = -1ULL; std::memset(&HeaderData, 0, sizeof(Header)); ArangeDescriptors.clear(); } bool -DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { +DWARFDebugArangeSet::extract(DataExtractor data, uint64_t *offset_ptr) { if (data.isValidOffset(*offset_ptr)) { ArangeDescriptors.clear(); Offset = *offset_ptr; diff --git a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp index 6460c9feeab8..ca6043109cdb 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp @@ -23,11 +23,11 @@ using namespace llvm; void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { if (!DebugArangesData.isValidOffset(0)) return; - uint32_t Offset = 0; + uint64_t Offset = 0; DWARFDebugArangeSet Set; while (Set.extract(DebugArangesData, &Offset)) { - uint32_t CUOffset = Set.getCompileUnitDIEOffset(); + uint64_t CUOffset = Set.getCompileUnitDIEOffset(); for (const auto &Desc : Set.descriptors()) { uint64_t LowPC = Desc.Address; uint64_t HighPC = Desc.getEndAddress(); @@ -43,7 +43,7 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { return; // Extract aranges from .debug_aranges section. - DataExtractor ArangesData(CTX->getDWARFObj().getARangeSection(), + DataExtractor ArangesData(CTX->getDWARFObj().getArangesSection(), CTX->isLittleEndian(), 0); extract(ArangesData); @@ -51,7 +51,7 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { // it may describe only a small subset of compilation units, so we need to // manually build aranges for the rest of them. for (const auto &CU : CTX->compile_units()) { - uint32_t CUOffset = CU->getOffset(); + uint64_t CUOffset = CU->getOffset(); if (ParsedCUOffsets.insert(CUOffset).second) { Expected CURanges = CU->collectAddressRanges(); if (!CURanges) @@ -71,7 +71,7 @@ void DWARFDebugAranges::clear() { ParsedCUOffsets.clear(); } -void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, +void DWARFDebugAranges::appendRange(uint64_t CUOffset, uint64_t LowPC, uint64_t HighPC) { if (LowPC >= HighPC) return; @@ -80,7 +80,7 @@ void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, } void DWARFDebugAranges::construct() { - std::multiset ValidCUs; // Maintain the set of CUs describing + std::multiset ValidCUs; // Maintain the set of CUs describing // a current address range. llvm::sort(Endpoints); uint64_t PrevAddress = -1ULL; diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index b3f23366f2a2..81b00f65741b 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -34,10 +34,10 @@ using namespace dwarf; const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; -Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset, - uint32_t EndOffset) { +Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset, + uint64_t EndOffset) { while (*Offset < EndOffset) { - uint8_t Opcode = Data.getU8(Offset); + uint8_t Opcode = Data.getRelocatedValue(1, Offset); // Some instructions have a primary opcode encoded in the top bits. uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK; @@ -74,19 +74,19 @@ Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset, break; case DW_CFA_set_loc: // Operands: Address - addInstruction(Opcode, Data.getAddress(Offset)); + addInstruction(Opcode, Data.getRelocatedAddress(Offset)); break; case DW_CFA_advance_loc1: // Operands: 1-byte delta - addInstruction(Opcode, Data.getU8(Offset)); + addInstruction(Opcode, Data.getRelocatedValue(1, Offset)); break; case DW_CFA_advance_loc2: // Operands: 2-byte delta - addInstruction(Opcode, Data.getU16(Offset)); + addInstruction(Opcode, Data.getRelocatedValue(2, Offset)); break; case DW_CFA_advance_loc4: // Operands: 4-byte delta - addInstruction(Opcode, Data.getU32(Offset)); + addInstruction(Opcode, Data.getRelocatedValue(4, Offset)); break; case DW_CFA_restore_extended: case DW_CFA_undefined: @@ -331,7 +331,7 @@ DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch, DWARFDebugFrame::~DWARFDebugFrame() = default; static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, - uint32_t Offset, int Length) { + uint64_t Offset, int Length) { errs() << "DUMP: "; for (int i = 0; i < Length; ++i) { uint8_t c = Data.getU8(&Offset); @@ -344,7 +344,7 @@ static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, // noreturn attribute usage in lambdas. Once the support for those // compilers are phased out, we can remove this and return back to // a ReportError lambda: [StartOffset](const char *ErrorMsg). -static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset, +static void LLVM_ATTRIBUTE_NORETURN ReportError(uint64_t StartOffset, const char *ErrorMsg) { std::string Str; raw_string_ostream OS(Str); @@ -354,32 +354,30 @@ static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset, } void DWARFDebugFrame::parse(DWARFDataExtractor Data) { - uint32_t Offset = 0; - DenseMap CIEs; + uint64_t Offset = 0; + DenseMap CIEs; while (Data.isValidOffset(Offset)) { - uint32_t StartOffset = Offset; + uint64_t StartOffset = Offset; bool IsDWARF64 = false; - uint64_t Length = Data.getU32(&Offset); + uint64_t Length = Data.getRelocatedValue(4, &Offset); uint64_t Id; - if (Length == UINT32_MAX) { + if (Length == dwarf::DW_LENGTH_DWARF64) { // DWARF-64 is distinguished by the first 32 bits of the initial length // field being 0xffffffff. Then, the next 64 bits are the actual entry // length. IsDWARF64 = true; - Length = Data.getU64(&Offset); + Length = Data.getRelocatedValue(8, &Offset); } // At this point, Offset points to the next field after Length. // Length is the structure size excluding itself. Compute an offset one // past the end of the structure (needed to know how many instructions to // read). - // TODO: For honest DWARF64 support, DataExtractor will have to treat - // offset_ptr as uint64_t* - uint32_t StartStructureOffset = Offset; - uint32_t EndStructureOffset = Offset + static_cast(Length); + uint64_t StartStructureOffset = Offset; + uint64_t EndStructureOffset = Offset + Length; // The Id field's size depends on the DWARF format Id = Data.getUnsigned(&Offset, (IsDWARF64 && !IsEH) ? 8 : 4); @@ -407,22 +405,23 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { Optional PersonalityEncoding; if (IsEH) { Optional AugmentationLength; - uint32_t StartAugmentationOffset; - uint32_t EndAugmentationOffset; + uint64_t StartAugmentationOffset; + uint64_t EndAugmentationOffset; // Walk the augmentation string to get all the augmentation data. for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { switch (AugmentationString[i]) { default: - ReportError(StartOffset, - "Unknown augmentation character in entry at %lx"); + ReportError( + StartOffset, + "Unknown augmentation character in entry at %" PRIx64); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; case 'P': { if (Personality) ReportError(StartOffset, - "Duplicate personality in entry at %lx"); + "Duplicate personality in entry at %" PRIx64); PersonalityEncoding = Data.getU8(&Offset); Personality = Data.getEncodedPointer( &Offset, *PersonalityEncoding, @@ -438,13 +437,12 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { case 'z': if (i) ReportError(StartOffset, - "'z' must be the first character at %lx"); + "'z' must be the first character at %" PRIx64); // Parse the augmentation length first. We only parse it if // the string contains a 'z'. AugmentationLength = Data.getULEB128(&Offset); StartAugmentationOffset = Offset; - EndAugmentationOffset = Offset + - static_cast(*AugmentationLength); + EndAugmentationOffset = Offset + *AugmentationLength; break; case 'B': // B-Key is used for signing functions associated with this @@ -455,14 +453,15 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { if (AugmentationLength.hasValue()) { if (Offset != EndAugmentationOffset) - ReportError(StartOffset, "Parsing augmentation data at %lx failed"); + ReportError(StartOffset, + "Parsing augmentation data at %" PRIx64 " failed"); AugmentationData = Data.getData().slice(StartAugmentationOffset, EndAugmentationOffset); } } - auto Cie = llvm::make_unique( + auto Cie = std::make_unique( StartOffset, Length, Version, AugmentationString, AddressSize, SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor, ReturnAddressRegister, AugmentationData, FDEPointerEncoding, @@ -480,8 +479,8 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { if (IsEH) { // The address size is encoded in the CIE we reference. if (!Cie) - ReportError(StartOffset, - "Parsing FDE data at %lx failed due to missing CIE"); + ReportError(StartOffset, "Parsing FDE data at %" PRIx64 + " failed due to missing CIE"); if (auto Val = Data.getEncodedPointer( &Offset, Cie->getFDEPointerEncoding(), @@ -498,8 +497,7 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { // Parse the augmentation length and data for this FDE. uint64_t AugmentationLength = Data.getULEB128(&Offset); - uint32_t EndAugmentationOffset = - Offset + static_cast(AugmentationLength); + uint64_t EndAugmentationOffset = Offset + AugmentationLength; // Decode the LSDA if the CIE augmentation string said we should. if (Cie->getLSDAPointerEncoding() != DW_EH_PE_omit) { @@ -509,11 +507,12 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { } if (Offset != EndAugmentationOffset) - ReportError(StartOffset, "Parsing augmentation data at %lx failed"); + ReportError(StartOffset, + "Parsing augmentation data at %" PRIx64 " failed"); } } else { - InitialLocation = Data.getAddress(&Offset); - AddressRange = Data.getAddress(&Offset); + InitialLocation = Data.getRelocatedAddress(&Offset); + AddressRange = Data.getRelocatedAddress(&Offset); } Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer, @@ -527,7 +526,8 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { } if (Offset != EndStructureOffset) - ReportError(StartOffset, "Parsing entry instructions at %lx failed"); + ReportError(StartOffset, + "Parsing entry instructions at %" PRIx64 " failed"); } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index d8a755e90df4..87eab34d58ee 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -19,15 +19,15 @@ using namespace llvm; using namespace dwarf; bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor(); - const uint32_t UEndOffset = U.getNextUnitOffset(); + const uint64_t UEndOffset = U.getNextUnitOffset(); return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0); } -bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, +bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr, const DWARFDataExtractor &DebugInfoData, - uint32_t UEndOffset, uint32_t D) { + uint64_t UEndOffset, uint32_t D) { Offset = *OffsetPtr; Depth = D; if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset)) diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index a1cb1e8582ed..dbee28ff5ab1 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -16,7 +16,6 @@ #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Format.h" -#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include @@ -156,7 +155,7 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS, // Parse v2-v4 directory and file tables. static void parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, - uint32_t *OffsetPtr, uint64_t EndPrologueOffset, + uint64_t *OffsetPtr, uint64_t EndPrologueOffset, DWARFDebugLine::ContentTypeTracker &ContentTypes, std::vector &IncludeDirectories, std::vector &FileNames) { @@ -187,18 +186,24 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, } // Parse v5 directory/file entry content descriptions. -// Returns the descriptors, or an empty vector if we did not find a path or -// ran off the end of the prologue. -static ContentDescriptors -parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t - *OffsetPtr, uint64_t EndPrologueOffset, DWARFDebugLine::ContentTypeTracker - *ContentTypes) { +// Returns the descriptors, or an error if we did not find a path or ran off +// the end of the prologue. +static llvm::Expected +parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, + uint64_t EndPrologueOffset, + DWARFDebugLine::ContentTypeTracker *ContentTypes) { ContentDescriptors Descriptors; int FormatCount = DebugLineData.getU8(OffsetPtr); bool HasPath = false; for (int I = 0; I != FormatCount; ++I) { if (*OffsetPtr >= EndPrologueOffset) - return ContentDescriptors(); + return createStringError( + errc::invalid_argument, + "failed to parse entry content descriptions at offset " + "0x%8.8" PRIx64 + " because offset extends beyond the prologue end at offset " + "0x%8.8" PRIx64, + *OffsetPtr, EndPrologueOffset); ContentDescriptor Descriptor; Descriptor.Type = dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr)); @@ -209,60 +214,82 @@ parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t ContentTypes->trackContentType(Descriptor.Type); Descriptors.push_back(Descriptor); } - return HasPath ? Descriptors : ContentDescriptors(); + + if (!HasPath) + return createStringError(errc::invalid_argument, + "failed to parse entry content descriptions" + " because no path was found"); + return Descriptors; } -static bool +static Error parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, - uint32_t *OffsetPtr, uint64_t EndPrologueOffset, + uint64_t *OffsetPtr, uint64_t EndPrologueOffset, const dwarf::FormParams &FormParams, const DWARFContext &Ctx, const DWARFUnit *U, DWARFDebugLine::ContentTypeTracker &ContentTypes, std::vector &IncludeDirectories, std::vector &FileNames) { // Get the directory entry description. - ContentDescriptors DirDescriptors = + llvm::Expected DirDescriptors = parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset, nullptr); - if (DirDescriptors.empty()) - return false; + if (!DirDescriptors) + return DirDescriptors.takeError(); // Get the directory entries, according to the format described above. int DirEntryCount = DebugLineData.getU8(OffsetPtr); for (int I = 0; I != DirEntryCount; ++I) { if (*OffsetPtr >= EndPrologueOffset) - return false; - for (auto Descriptor : DirDescriptors) { + return createStringError( + errc::invalid_argument, + "failed to parse directory entry at offset " + "0x%8.8" PRIx64 + " because offset extends beyond the prologue end at offset " + "0x%8.8" PRIx64, + *OffsetPtr, EndPrologueOffset); + for (auto Descriptor : *DirDescriptors) { DWARFFormValue Value(Descriptor.Form); switch (Descriptor.Type) { case DW_LNCT_path: if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U)) - return false; + return createStringError(errc::invalid_argument, + "failed to parse directory entry because " + "extracting the form value failed."); IncludeDirectories.push_back(Value); break; default: if (!Value.skipValue(DebugLineData, OffsetPtr, FormParams)) - return false; + return createStringError(errc::invalid_argument, + "failed to parse directory entry because " + "skipping the form value failed."); } } } // Get the file entry description. - ContentDescriptors FileDescriptors = - parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset, - &ContentTypes); - if (FileDescriptors.empty()) - return false; + llvm::Expected FileDescriptors = parseV5EntryFormat( + DebugLineData, OffsetPtr, EndPrologueOffset, &ContentTypes); + if (!FileDescriptors) + return FileDescriptors.takeError(); // Get the file entries, according to the format described above. int FileEntryCount = DebugLineData.getU8(OffsetPtr); for (int I = 0; I != FileEntryCount; ++I) { if (*OffsetPtr >= EndPrologueOffset) - return false; + return createStringError( + errc::invalid_argument, + "failed to parse file entry at offset " + "0x%8.8" PRIx64 + " because offset extends beyond the prologue end at offset " + "0x%8.8" PRIx64, + *OffsetPtr, EndPrologueOffset); DWARFDebugLine::FileNameEntry FileEntry; - for (auto Descriptor : FileDescriptors) { + for (auto Descriptor : *FileDescriptors) { DWARFFormValue Value(Descriptor.Form); if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U)) - return false; + return createStringError(errc::invalid_argument, + "failed to parse file entry because " + "extracting the form value failed."); switch (Descriptor.Type) { case DW_LNCT_path: FileEntry.Name = Value; @@ -280,7 +307,10 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, FileEntry.Length = Value.getAsUnsignedConstant().getValue(); break; case DW_LNCT_MD5: - assert(Value.getAsBlock().getValue().size() == 16); + if (!Value.getAsBlock() || Value.getAsBlock().getValue().size() != 16) + return createStringError( + errc::invalid_argument, + "failed to parse file entry because the MD5 hash is invalid"); std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16, FileEntry.Checksum.Bytes.begin()); break; @@ -290,21 +320,21 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, } FileNames.push_back(FileEntry); } - return true; + return Error::success(); } Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, - uint32_t *OffsetPtr, + uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U) { const uint64_t PrologueOffset = *OffsetPtr; clear(); TotalLength = DebugLineData.getRelocatedValue(4, OffsetPtr); - if (TotalLength == UINT32_MAX) { + if (TotalLength == dwarf::DW_LENGTH_DWARF64) { FormParams.Format = dwarf::DWARF64; TotalLength = DebugLineData.getU64(OffsetPtr); - } else if (TotalLength >= 0xfffffff0) { + } else if (TotalLength >= dwarf::DW_LENGTH_lo_reserved) { return createStringError(errc::invalid_argument, "parsing line table prologue at offset 0x%8.8" PRIx64 " unsupported reserved unit length found of value 0x%8.8" PRIx64, @@ -343,14 +373,17 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, } if (getVersion() >= 5) { - if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, - FormParams, Ctx, U, ContentTypes, - IncludeDirectories, FileNames)) { - return createStringError(errc::invalid_argument, - "parsing line table prologue at 0x%8.8" PRIx64 - " found an invalid directory or file table description at" - " 0x%8.8" PRIx64, - PrologueOffset, (uint64_t)*OffsetPtr); + if (Error e = parseV5DirFileTables( + DebugLineData, OffsetPtr, EndPrologueOffset, FormParams, Ctx, U, + ContentTypes, IncludeDirectories, FileNames)) { + return joinErrors( + createStringError( + errc::invalid_argument, + "parsing line table prologue at 0x%8.8" PRIx64 + " found an invalid directory or file table description at" + " 0x%8.8" PRIx64, + PrologueOffset, *OffsetPtr), + std::move(e)); } } else parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, @@ -361,7 +394,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, "parsing line table prologue at 0x%8.8" PRIx64 " should have ended at 0x%8.8" PRIx64 " but it ended at 0x%8.8" PRIx64, - PrologueOffset, EndPrologueOffset, (uint64_t)*OffsetPtr); + PrologueOffset, EndPrologueOffset, *OffsetPtr); return Error::success(); } @@ -468,7 +501,7 @@ void DWARFDebugLine::ParsingState::appendRowToMatrix() { } const DWARFDebugLine::LineTable * -DWARFDebugLine::getLineTable(uint32_t Offset) const { +DWARFDebugLine::getLineTable(uint64_t Offset) const { LineTableConstIter Pos = LineTableMap.find(Offset); if (Pos != LineTableMap.end()) return &Pos->second; @@ -476,10 +509,10 @@ DWARFDebugLine::getLineTable(uint32_t Offset) const { } Expected DWARFDebugLine::getOrParseLineTable( - DWARFDataExtractor &DebugLineData, uint32_t Offset, const DWARFContext &Ctx, + DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx, const DWARFUnit *U, std::function RecoverableErrorCallback) { if (!DebugLineData.isValidOffset(Offset)) - return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx32 + return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx64 " is not a valid debug line section offset", Offset); @@ -496,10 +529,10 @@ Expected DWARFDebugLine::getOrParseLineTable( } Error DWARFDebugLine::LineTable::parse( - DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, + DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U, std::function RecoverableErrorCallback, raw_ostream *OS) { - const uint32_t DebugLineOffset = *OffsetPtr; + const uint64_t DebugLineOffset = *OffsetPtr; clear(); @@ -515,7 +548,7 @@ Error DWARFDebugLine::LineTable::parse( if (PrologueErr) return PrologueErr; - const uint32_t EndOffset = + const uint64_t EndOffset = DebugLineOffset + Prologue.TotalLength + Prologue.sizeofTotalLength(); // See if we should tell the data extractor the address size. @@ -529,7 +562,7 @@ Error DWARFDebugLine::LineTable::parse( while (*OffsetPtr < EndOffset) { if (OS) - *OS << format("0x%08.08" PRIx32 ": ", *OffsetPtr); + *OS << format("0x%08.08" PRIx64 ": ", *OffsetPtr); uint8_t Opcode = DebugLineData.getU8(OffsetPtr); @@ -540,7 +573,7 @@ Error DWARFDebugLine::LineTable::parse( // Extended Opcodes always start with a zero opcode followed by // a uleb128 length so you can skip ones you don't know about uint64_t Len = DebugLineData.getULEB128(OffsetPtr); - uint32_t ExtOffset = *OffsetPtr; + uint64_t ExtOffset = *OffsetPtr; // Tolerate zero-length; assume length is correct and soldier on. if (Len == 0) { @@ -585,7 +618,7 @@ Error DWARFDebugLine::LineTable::parse( DebugLineData.setAddressSize(Len - 1); else if (DebugLineData.getAddressSize() != Len - 1) { return createStringError(errc::invalid_argument, - "mismatching address size at offset 0x%8.8" PRIx32 + "mismatching address size at offset 0x%8.8" PRIx64 " expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64, ExtOffset, DebugLineData.getAddressSize(), Len - 1); @@ -652,8 +685,8 @@ Error DWARFDebugLine::LineTable::parse( // Otherwise we have an unparseable line-number program. if (*OffsetPtr - ExtOffset != Len) return createStringError(errc::illegal_byte_sequence, - "unexpected line op length at offset 0x%8.8" PRIx32 - " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx32, + "unexpected line op length at offset 0x%8.8" PRIx64 + " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx64, ExtOffset, Len, *OffsetPtr - ExtOffset); } else if (Opcode < Prologue.OpcodeBase) { if (OS) @@ -1007,10 +1040,9 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { sys::path::is_absolute(Path, sys::path::Style::windows); } -bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex, - StringRef CompDir, - FileLineInfoKind Kind, - std::string &Result) const { +bool DWARFDebugLine::Prologue::getFileNameByIndex( + uint64_t FileIndex, StringRef CompDir, FileLineInfoKind Kind, + std::string &Result, sys::path::Style Style) const { if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = getFileNameEntry(FileIndex); @@ -1036,11 +1068,11 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex, // We know that FileName is not absolute, the only way to have an // absolute path at this point would be if IncludeDir is absolute. if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) - sys::path::append(FilePath, CompDir); + sys::path::append(FilePath, Style, CompDir); } // sys::path::append skips empty strings. - sys::path::append(FilePath, IncludeDir, FileName); + sys::path::append(FilePath, Style, IncludeDir, FileName); Result = FilePath.str(); return true; } @@ -1092,7 +1124,8 @@ DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data, } bool DWARFDebugLine::Prologue::totalLengthIsValid() const { - return TotalLength == 0xffffffff || TotalLength < 0xfffffff0; + return TotalLength == dwarf::DW_LENGTH_DWARF64 || + TotalLength < dwarf::DW_LENGTH_lo_reserved; } DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext( @@ -1101,7 +1134,7 @@ DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext( assert(DebugLineData.isValidOffset(Offset) && "parsing should have terminated"); DWARFUnit *U = prepareToParse(Offset); - uint32_t OldOffset = Offset; + uint64_t OldOffset = Offset; LineTable LT; if (Error Err = LT.parse(DebugLineData, &Offset, Context, U, RecoverableErrorCallback, OS)) @@ -1115,14 +1148,14 @@ void DWARFDebugLine::SectionParser::skip( assert(DebugLineData.isValidOffset(Offset) && "parsing should have terminated"); DWARFUnit *U = prepareToParse(Offset); - uint32_t OldOffset = Offset; + uint64_t OldOffset = Offset; LineTable LT; if (Error Err = LT.Prologue.parse(DebugLineData, &Offset, Context, U)) ErrorCallback(std::move(Err)); moveToNextTable(OldOffset, LT.Prologue); } -DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint32_t Offset) { +DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint64_t Offset) { DWARFUnit *U = nullptr; auto It = LineToUnit.find(Offset); if (It != LineToUnit.end()) @@ -1131,7 +1164,7 @@ DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint32_t Offset) { return U; } -void DWARFDebugLine::SectionParser::moveToNextTable(uint32_t OldOffset, +void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, const Prologue &P) { // If the length field is not valid, we don't know where the next table is, so // cannot continue to parse. Mark the parser as done, and leave the Offset diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index 6d8f4bee77c4..4f7b01130a47 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -28,19 +28,18 @@ using namespace llvm; // expression that LLVM doesn't produce. Guessing the wrong version means we // won't be able to pretty print expressions in DWARF2 binaries produced by // non-LLVM tools. -static void dumpExpression(raw_ostream &OS, ArrayRef Data, +static void dumpExpression(raw_ostream &OS, ArrayRef Data, bool IsLittleEndian, unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U) { - DWARFDataExtractor Extractor(StringRef(Data.data(), Data.size()), - IsLittleEndian, AddressSize); + DWARFDataExtractor Extractor(toStringRef(Data), IsLittleEndian, AddressSize); DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U); } -void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, bool IsLittleEndian, +void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, uint64_t BaseAddress, + bool IsLittleEndian, unsigned AddressSize, - const MCRegisterInfo *MRI, - DWARFUnit *U, - uint64_t BaseAddress, + const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent) const { for (const Entry &E : Entries) { OS << '\n'; @@ -64,12 +63,12 @@ DWARFDebugLoc::getLocationListAtOffset(uint64_t Offset) const { return nullptr; } -void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, +void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, DIDumpOptions DumpOpts, Optional Offset) const { auto DumpLocationList = [&](const LocationList &L) { - OS << format("0x%8.8x: ", L.Offset); - L.dump(OS, IsLittleEndian, AddressSize, MRI, nullptr, 0, 12); - OS << "\n\n"; + OS << format("0x%8.8" PRIx64 ": ", L.Offset); + L.dump(OS, 0, IsLittleEndian, AddressSize, MRI, nullptr, DumpOpts, 12); + OS << "\n"; }; if (Offset) { @@ -80,50 +79,47 @@ void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, for (const LocationList &L : Locations) { DumpLocationList(L); + if (&L != &Locations.back()) + OS << '\n'; } } -Optional -DWARFDebugLoc::parseOneLocationList(DWARFDataExtractor Data, unsigned *Offset) { +Expected +DWARFDebugLoc::parseOneLocationList(const DWARFDataExtractor &Data, + uint64_t *Offset) { LocationList LL; LL.Offset = *Offset; + AddressSize = Data.getAddressSize(); + DataExtractor::Cursor C(*Offset); // 2.6.2 Location Lists // A location list entry consists of: while (true) { Entry E; - if (!Data.isValidOffsetForDataOfSize(*Offset, 2 * Data.getAddressSize())) { - WithColor::error() << "location list overflows the debug_loc section.\n"; - return None; - } // 1. A beginning address offset. ... - E.Begin = Data.getRelocatedAddress(Offset); + E.Begin = Data.getRelocatedAddress(C); // 2. An ending address offset. ... - E.End = Data.getRelocatedAddress(Offset); + E.End = Data.getRelocatedAddress(C); + + if (Error Err = C.takeError()) + return std::move(Err); // The end of any given location list is marked by an end of list entry, // which consists of a 0 for the beginning address offset and a 0 for the // ending address offset. - if (E.Begin == 0 && E.End == 0) + if (E.Begin == 0 && E.End == 0) { + *Offset = C.tell(); return LL; - - if (!Data.isValidOffsetForDataOfSize(*Offset, 2)) { - WithColor::error() << "location list overflows the debug_loc section.\n"; - return None; } - unsigned Bytes = Data.getU16(Offset); - if (!Data.isValidOffsetForDataOfSize(*Offset, Bytes)) { - WithColor::error() << "location list overflows the debug_loc section.\n"; - return None; + if (E.Begin != (AddressSize == 4 ? -1U : -1ULL)) { + unsigned Bytes = Data.getU16(C); + // A single location description describing the location of the object... + Data.getU8(C, E.Loc, Bytes); } - // A single location description describing the location of the object... - StringRef str = Data.getData().substr(*Offset, Bytes); - *Offset += Bytes; - E.Loc.reserve(str.size()); - llvm::copy(str, std::back_inserter(E.Loc)); + LL.Entries.push_back(std::move(E)); } } @@ -132,81 +128,89 @@ void DWARFDebugLoc::parse(const DWARFDataExtractor &data) { IsLittleEndian = data.isLittleEndian(); AddressSize = data.getAddressSize(); - uint32_t Offset = 0; - while (data.isValidOffset(Offset + data.getAddressSize() - 1)) { + uint64_t Offset = 0; + while (Offset < data.getData().size()) { if (auto LL = parseOneLocationList(data, &Offset)) Locations.push_back(std::move(*LL)); - else + else { + logAllUnhandledErrors(LL.takeError(), WithColor::error()); break; + } } - if (data.isValidOffset(Offset)) - WithColor::error() << "failed to consume entire .debug_loc section\n"; } -Optional -DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset, - unsigned Version) { +Expected +DWARFDebugLoclists::parseOneLocationList(const DataExtractor &Data, + uint64_t *Offset, unsigned Version) { LocationList LL; LL.Offset = *Offset; + DataExtractor::Cursor C(*Offset); // dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list. - while (auto Kind = - static_cast(Data.getU8(Offset))) { - + while (auto Kind = Data.getU8(C)) { Entry E; E.Kind = Kind; + E.Offset = C.tell() - 1; switch (Kind) { + case dwarf::DW_LLE_base_addressx: + E.Value0 = Data.getULEB128(C); + break; case dwarf::DW_LLE_startx_length: - E.Value0 = Data.getULEB128(Offset); + E.Value0 = Data.getULEB128(C); // Pre-DWARF 5 has different interpretation of the length field. We have // to support both pre- and standartized styles for the compatibility. if (Version < 5) - E.Value1 = Data.getU32(Offset); + E.Value1 = Data.getU32(C); else - E.Value1 = Data.getULEB128(Offset); + E.Value1 = Data.getULEB128(C); break; case dwarf::DW_LLE_start_length: - E.Value0 = Data.getAddress(Offset); - E.Value1 = Data.getULEB128(Offset); + E.Value0 = Data.getAddress(C); + E.Value1 = Data.getULEB128(C); break; case dwarf::DW_LLE_offset_pair: - E.Value0 = Data.getULEB128(Offset); - E.Value1 = Data.getULEB128(Offset); + E.Value0 = Data.getULEB128(C); + E.Value1 = Data.getULEB128(C); break; case dwarf::DW_LLE_base_address: - E.Value0 = Data.getAddress(Offset); + E.Value0 = Data.getAddress(C); break; default: - WithColor::error() << "dumping support for LLE of kind " << (int)Kind - << " not implemented\n"; - return None; + cantFail(C.takeError()); + return createStringError(errc::illegal_byte_sequence, + "LLE of kind %x not supported", (int)Kind); } - if (Kind != dwarf::DW_LLE_base_address) { - unsigned Bytes = - Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset); + if (Kind != dwarf::DW_LLE_base_address && + Kind != dwarf::DW_LLE_base_addressx) { + unsigned Bytes = Version >= 5 ? Data.getULEB128(C) : Data.getU16(C); // A single location description describing the location of the object... - StringRef str = Data.getData().substr(*Offset, Bytes); - *Offset += Bytes; - E.Loc.resize(str.size()); - llvm::copy(str, E.Loc.begin()); + Data.getU8(C, E.Loc, Bytes); } LL.Entries.push_back(std::move(E)); } + if (Error Err = C.takeError()) + return std::move(Err); + Entry E; + E.Kind = dwarf::DW_LLE_end_of_list; + E.Offset = C.tell() - 1; + LL.Entries.push_back(E); + *Offset = C.tell(); return LL; } -void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) { +void DWARFDebugLoclists::parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version) { IsLittleEndian = data.isLittleEndian(); AddressSize = data.getAddressSize(); - uint32_t Offset = 0; - while (data.isValidOffset(Offset)) { + while (Offset < EndOffset) { if (auto LL = parseOneLocationList(data, &Offset, Version)) Locations.push_back(std::move(*LL)); - else + else { + logAllUnhandledErrors(LL.takeError(), WithColor::error()); return; + } } } @@ -219,51 +223,106 @@ DWARFDebugLoclists::getLocationListAtOffset(uint64_t Offset) const { return nullptr; } -void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr, - bool IsLittleEndian, - unsigned AddressSize, - const MCRegisterInfo *MRI, - DWARFUnit *U, - unsigned Indent) const { - for (const Entry &E : Entries) { - switch (E.Kind) { +void DWARFDebugLoclists::Entry::dump(raw_ostream &OS, uint64_t &BaseAddr, + bool IsLittleEndian, unsigned AddressSize, + const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent, + size_t MaxEncodingStringLength) const { + if (DumpOpts.Verbose) { + OS << "\n"; + OS.indent(Indent); + auto EncodingString = dwarf::LocListEncodingString(Kind); + // Unsupported encodings should have been reported during parsing. + assert(!EncodingString.empty() && "Unknown loclist entry encoding"); + OS << format("%s%*c", EncodingString.data(), + MaxEncodingStringLength - EncodingString.size() + 1, '('); + switch (Kind) { case dwarf::DW_LLE_startx_length: - OS << '\n'; - OS.indent(Indent); - OS << "Addr idx " << E.Value0 << " (w/ length " << E.Value1 << "): "; - break; case dwarf::DW_LLE_start_length: - OS << '\n'; - OS.indent(Indent); - OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2, - AddressSize * 2, E.Value0, AddressSize * 2, AddressSize * 2, - E.Value0 + E.Value1); - break; case dwarf::DW_LLE_offset_pair: - OS << '\n'; - OS.indent(Indent); - OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2, - AddressSize * 2, BaseAddr + E.Value0, AddressSize * 2, - AddressSize * 2, BaseAddr + E.Value1); + OS << format("0x%*.*" PRIx64 ", 0x%*.*" PRIx64, AddressSize * 2, + AddressSize * 2, Value0, AddressSize * 2, AddressSize * 2, + Value1); break; + case dwarf::DW_LLE_base_addressx: case dwarf::DW_LLE_base_address: - BaseAddr = E.Value0; + OS << format("0x%*.*" PRIx64, AddressSize * 2, AddressSize * 2, + Value0); + break; + case dwarf::DW_LLE_end_of_list: break; - default: - llvm_unreachable("unreachable locations list kind"); } - - dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI, U); + OS << ')'; } + auto PrintPrefix = [&] { + OS << "\n"; + OS.indent(Indent); + if (DumpOpts.Verbose) + OS << format("%*s", MaxEncodingStringLength, (const char *)"=> "); + }; + switch (Kind) { + case dwarf::DW_LLE_startx_length: + PrintPrefix(); + OS << "Addr idx " << Value0 << " (w/ length " << Value1 << "): "; + break; + case dwarf::DW_LLE_start_length: + PrintPrefix(); + DWARFAddressRange(Value0, Value0 + Value1) + .dump(OS, AddressSize, DumpOpts); + OS << ": "; + break; + case dwarf::DW_LLE_offset_pair: + PrintPrefix(); + DWARFAddressRange(BaseAddr + Value0, BaseAddr + Value1) + .dump(OS, AddressSize, DumpOpts); + OS << ": "; + break; + case dwarf::DW_LLE_base_addressx: + if (!DumpOpts.Verbose) + return; + break; + case dwarf::DW_LLE_end_of_list: + if (!DumpOpts.Verbose) + return; + break; + case dwarf::DW_LLE_base_address: + BaseAddr = Value0; + if (!DumpOpts.Verbose) + return; + break; + default: + llvm_unreachable("unreachable locations list kind"); + } + + dumpExpression(OS, Loc, IsLittleEndian, AddressSize, MRI, U); +} +void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr, + bool IsLittleEndian, + unsigned AddressSize, + const MCRegisterInfo *MRI, + DWARFUnit *U, + DIDumpOptions DumpOpts, + unsigned Indent) const { + size_t MaxEncodingStringLength = 0; + if (DumpOpts.Verbose) + for (const auto &Entry : Entries) + MaxEncodingStringLength = + std::max(MaxEncodingStringLength, + dwarf::LocListEncodingString(Entry.Kind).size()); + + for (const Entry &E : Entries) + E.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, U, DumpOpts, Indent, + MaxEncodingStringLength); } void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr, - const MCRegisterInfo *MRI, + const MCRegisterInfo *MRI, DIDumpOptions DumpOpts, Optional Offset) const { auto DumpLocationList = [&](const LocationList &L) { - OS << format("0x%8.8x: ", L.Offset); - L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, /*Indent=*/12); - OS << "\n\n"; + OS << format("0x%8.8" PRIx64 ": ", L.Offset); + L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, DumpOpts, + /*Indent=*/12); + OS << "\n"; }; if (Offset) { @@ -274,5 +333,7 @@ void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr, for (const LocationList &L : Locations) { DumpLocationList(L); + if (&L != &Locations.back()) + OS << '\n'; } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp index 3317a778cc70..9a0e770aed3d 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -53,7 +53,7 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const { } void DWARFDebugMacro::parse(DataExtractor data) { - uint32_t Offset = 0; + uint64_t Offset = 0; while (data.isValidOffset(Offset)) { // A macro list entry consists of: Entry E; diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp index 963ec64f5e91..ab71b239cb67 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp @@ -23,7 +23,7 @@ DWARFDebugPubTable::DWARFDebugPubTable(const DWARFObject &Obj, bool LittleEndian, bool GnuStyle) : GnuStyle(GnuStyle) { DWARFDataExtractor PubNames(Obj, Sec, LittleEndian, 0); - uint32_t Offset = 0; + uint64_t Offset = 0; while (PubNames.isValidOffset(Offset)) { Sets.push_back({}); Set &SetData = Sets.back(); @@ -49,13 +49,13 @@ void DWARFDebugPubTable::dump(raw_ostream &OS) const { for (const Set &S : Sets) { OS << "length = " << format("0x%08x", S.Length); OS << " version = " << format("0x%04x", S.Version); - OS << " unit_offset = " << format("0x%08x", S.Offset); + OS << " unit_offset = " << format("0x%08" PRIx64, S.Offset); OS << " unit_size = " << format("0x%08x", S.Size) << '\n'; OS << (GnuStyle ? "Offset Linkage Kind Name\n" : "Offset Name\n"); for (const Entry &E : S.Entries) { - OS << format("0x%8.8x ", E.SecOffset); + OS << format("0x%8.8" PRIx64 " ", E.SecOffset); if (GnuStyle) { StringRef EntryLinkage = GDBIndexEntryLinkageString(E.Descriptor.Linkage); diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index d8df81a0aa0b..1a1857d8cd79 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -17,17 +17,17 @@ using namespace llvm; void DWARFDebugRangeList::clear() { - Offset = -1U; + Offset = -1ULL; AddressSize = 0; Entries.clear(); } Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data, - uint32_t *offset_ptr) { + uint64_t *offset_ptr) { clear(); if (!data.isValidOffset(*offset_ptr)) return createStringError(errc::invalid_argument, - "invalid range list offset 0x%" PRIx32, *offset_ptr); + "invalid range list offset 0x%" PRIx64, *offset_ptr); AddressSize = data.getAddressSize(); if (AddressSize != 4 && AddressSize != 8) @@ -38,7 +38,7 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data, RangeListEntry Entry; Entry.SectionIndex = -1ULL; - uint32_t prev_offset = *offset_ptr; + uint64_t prev_offset = *offset_ptr; Entry.StartAddress = data.getRelocatedAddress(offset_ptr); Entry.EndAddress = data.getRelocatedAddress(offset_ptr, &Entry.SectionIndex); @@ -47,7 +47,7 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data, if (*offset_ptr != prev_offset + 2 * AddressSize) { clear(); return createStringError(errc::invalid_argument, - "invalid range list entry at offset 0x%" PRIx32, + "invalid range list entry at offset 0x%" PRIx64, prev_offset); } if (Entry.isEndOfListEntry()) @@ -59,12 +59,12 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data, void DWARFDebugRangeList::dump(raw_ostream &OS) const { for (const RangeListEntry &RLE : Entries) { - const char *format_str = (AddressSize == 4 - ? "%08x %08" PRIx64 " %08" PRIx64 "\n" - : "%08x %016" PRIx64 " %016" PRIx64 "\n"); + const char *format_str = + (AddressSize == 4 ? "%08" PRIx64 " %08" PRIx64 " %08" PRIx64 "\n" + : "%08" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n"); OS << format(format_str, Offset, RLE.StartAddress, RLE.EndAddress); } - OS << format("%08x \n", Offset); + OS << format("%08" PRIx64 " \n", Offset); } DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges( diff --git a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp index 5ac3326f6681..f6785b89e86d 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp @@ -16,8 +16,8 @@ using namespace llvm; -Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End, - uint32_t *OffsetPtr) { +Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t End, + uint64_t *OffsetPtr) { Offset = *OffsetPtr; SectionIndex = -1ULL; // The caller should guarantee that we have at least 1 byte available, so @@ -32,41 +32,41 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End, break; // TODO: Support other encodings. case dwarf::DW_RLE_base_addressx: { - uint32_t PreviousOffset = *OffsetPtr - 1; + uint64_t PreviousOffset = *OffsetPtr - 1; Value0 = Data.getULEB128(OffsetPtr); if (End < *OffsetPtr) return createStringError( errc::invalid_argument, "read past end of table when reading " - "DW_RLE_base_addressx encoding at offset 0x%" PRIx32, + "DW_RLE_base_addressx encoding at offset 0x%" PRIx64, PreviousOffset); break; } case dwarf::DW_RLE_startx_endx: return createStringError(errc::not_supported, "unsupported rnglists encoding DW_RLE_startx_endx at " - "offset 0x%" PRIx32, + "offset 0x%" PRIx64, *OffsetPtr - 1); case dwarf::DW_RLE_startx_length: { - uint32_t PreviousOffset = *OffsetPtr - 1; + uint64_t PreviousOffset = *OffsetPtr - 1; Value0 = Data.getULEB128(OffsetPtr); Value1 = Data.getULEB128(OffsetPtr); if (End < *OffsetPtr) return createStringError( errc::invalid_argument, "read past end of table when reading " - "DW_RLE_startx_length encoding at offset 0x%" PRIx32, + "DW_RLE_startx_length encoding at offset 0x%" PRIx64, PreviousOffset); break; } case dwarf::DW_RLE_offset_pair: { - uint32_t PreviousOffset = *OffsetPtr - 1; + uint64_t PreviousOffset = *OffsetPtr - 1; Value0 = Data.getULEB128(OffsetPtr); Value1 = Data.getULEB128(OffsetPtr); if (End < *OffsetPtr) return createStringError(errc::invalid_argument, "read past end of table when reading " - "DW_RLE_offset_pair encoding at offset 0x%" PRIx32, + "DW_RLE_offset_pair encoding at offset 0x%" PRIx64, PreviousOffset); break; } @@ -74,7 +74,7 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End, if ((End - *OffsetPtr) < Data.getAddressSize()) return createStringError(errc::invalid_argument, "insufficient space remaining in table for " - "DW_RLE_base_address encoding at offset 0x%" PRIx32, + "DW_RLE_base_address encoding at offset 0x%" PRIx64, *OffsetPtr - 1); Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex); break; @@ -84,27 +84,27 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End, return createStringError(errc::invalid_argument, "insufficient space remaining in table for " "DW_RLE_start_end encoding " - "at offset 0x%" PRIx32, + "at offset 0x%" PRIx64, *OffsetPtr - 1); Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex); Value1 = Data.getRelocatedAddress(OffsetPtr); break; } case dwarf::DW_RLE_start_length: { - uint32_t PreviousOffset = *OffsetPtr - 1; + uint64_t PreviousOffset = *OffsetPtr - 1; Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex); Value1 = Data.getULEB128(OffsetPtr); if (End < *OffsetPtr) return createStringError(errc::invalid_argument, "read past end of table when reading " - "DW_RLE_start_length encoding at offset 0x%" PRIx32, + "DW_RLE_start_length encoding at offset 0x%" PRIx64, PreviousOffset); break; } default: return createStringError(errc::not_supported, "unknown rnglists encoding 0x%" PRIx32 - " at offset 0x%" PRIx32, + " at offset 0x%" PRIx64, uint32_t(Encoding), *OffsetPtr - 1); } @@ -187,7 +187,7 @@ void RangeListEntry::dump( if (DumpOpts.Verbose) { // Print the section offset in verbose mode. - OS << format("0x%8.8" PRIx32 ":", Offset); + OS << format("0x%8.8" PRIx64 ":", Offset); auto EncodingString = dwarf::RangeListEncodingString(EntryKind); // Unsupported encodings should have been reported during parsing. assert(!EncodingString.empty() && "Unknown range entry encoding"); diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index d638dc4239f4..cec194e8b6b3 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -21,6 +21,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/WithColor.h" @@ -91,21 +92,29 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, } FormValue.dump(OS, DumpOpts); + const auto &DumpLL = [&](auto ExpectedLL) { + if (ExpectedLL) { + uint64_t BaseAddr = 0; + if (Optional BA = U->getBaseAddress()) + BaseAddr = BA->Address; + auto LLDumpOpts = DumpOpts; + LLDumpOpts.Verbose = false; + ExpectedLL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), + MRI, U, LLDumpOpts, Indent); + } else { + OS << '\n'; + OS.indent(Indent); + OS << formatv("error extracting location list: {0}", + fmt_consume(ExpectedLL.takeError())); + } + }; if (FormValue.isFormClass(DWARFFormValue::FC_SectionOffset)) { - uint32_t Offset = *FormValue.getAsSectionOffset(); + uint64_t Offset = *FormValue.getAsSectionOffset(); if (!U->isDWOUnit() && !U->getLocSection()->Data.empty()) { DWARFDebugLoc DebugLoc; DWARFDataExtractor Data(Obj, *U->getLocSection(), Ctx.isLittleEndian(), Obj.getAddressSize()); - auto LL = DebugLoc.parseOneLocationList(Data, &Offset); - if (LL) { - uint64_t BaseAddr = 0; - if (Optional BA = U->getBaseAddress()) - BaseAddr = BA->Address; - LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, U, - BaseAddr, Indent); - } else - OS << "error extracting location list."; + DumpLL(DebugLoc.parseOneLocationList(Data, &Offset)); return; } @@ -121,18 +130,8 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, // Modern locations list (.debug_loclists) are used starting from v5. // Ideally we should take the version from the .debug_loclists section // header, but using CU's version for simplicity. - auto LL = DWARFDebugLoclists::parseOneLocationList( - Data, &Offset, UseLocLists ? U->getVersion() : 4); - - uint64_t BaseAddr = 0; - if (Optional BA = U->getBaseAddress()) - BaseAddr = BA->Address; - - if (LL) - LL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, - U, Indent); - else - OS << "error extracting location list."; + DumpLL(DWARFDebugLoclists::parseOneLocationList( + Data, &Offset, UseLocLists ? U->getVersion() : 4)); } } } @@ -264,7 +263,7 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) { } static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, - uint32_t *OffsetPtr, dwarf::Attribute Attr, + uint64_t *OffsetPtr, dwarf::Attribute Attr, dwarf::Form Form, unsigned Indent, DIDumpOptions DumpOpts) { if (!Die.isValid()) @@ -568,8 +567,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, if (!isValid()) return; DWARFDataExtractor debug_info_data = U->getDebugInfoExtractor(); - const uint32_t Offset = getOffset(); - uint32_t offset = Offset; + const uint64_t Offset = getOffset(); + uint64_t offset = Offset; if (DumpOpts.ShowParents) { DIDumpOptions ParentDumpOpts = DumpOpts; ParentDumpOpts.ShowParents = false; @@ -581,7 +580,7 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, uint32_t abbrCode = debug_info_data.getULEB128(&offset); if (DumpOpts.ShowAddresses) WithColor(OS, HighlightColor::Address).get() - << format("\n0x%8.8x: ", Offset); + << format("\n0x%8.8" PRIx64 ": ", Offset); if (abbrCode) { auto AbbrevDecl = getAbbreviationDeclarationPtr(); @@ -685,7 +684,7 @@ void DWARFDie::attribute_iterator::updateForIndex( AttrValue.Attr = AbbrDecl.getAttrByIndex(Index); // Add the previous byte size of any previous attribute value. AttrValue.Offset += AttrValue.ByteSize; - uint32_t ParseOffset = AttrValue.Offset; + uint64_t ParseOffset = AttrValue.Offset; auto U = Die.getDwarfUnit(); assert(U && "Die must have valid DWARF unit"); AttrValue.Value = DWARFFormValue::createFromUnit( @@ -733,6 +732,7 @@ bool DWARFAttribute::mayHaveLocationDescription(dwarf::Attribute Attr) { case DW_AT_call_data_value: // Extensions. case DW_AT_GNU_call_site_value: + case DW_AT_GNU_call_site_target: return true; default: return false; diff --git a/lib/DebugInfo/DWARF/DWARFExpression.cpp b/lib/DebugInfo/DWARF/DWARFExpression.cpp index 470d4b5364b4..5009b1b7b412 100644 --- a/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -119,7 +119,7 @@ static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) { } bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version, - uint8_t AddressSize, uint32_t Offset) { + uint8_t AddressSize, uint64_t Offset) { Opcode = Data.getU8(&Offset); Desc = getOpDesc(Opcode); @@ -218,9 +218,8 @@ static bool prettyPrintRegisterOp(raw_ostream &OS, uint8_t Opcode, else DwarfRegNum = Opcode - DW_OP_reg0; - int LLVMRegNum = MRI->getLLVMRegNum(DwarfRegNum, isEH); - if (LLVMRegNum >= 0) { - if (const char *RegName = MRI->getName(LLVMRegNum)) { + if (Optional LLVMRegNum = MRI->getLLVMRegNum(DwarfRegNum, isEH)) { + if (const char *RegName = MRI->getName(*LLVMRegNum)) { if ((Opcode >= DW_OP_breg0 && Opcode <= DW_OP_breg31) || Opcode == DW_OP_bregx) OS << format(" %s%+" PRId64, RegName, Operands[OpNum]); @@ -263,7 +262,7 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, if (Size == Operation::BaseTypeRef && U) { auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]); if (Die && Die.getTag() == dwarf::DW_TAG_base_type) { - OS << format(" (0x%08x)", U->getOffset() + Operands[Operand]); + OS << format(" (0x%08" PRIx64 ")", U->getOffset() + Operands[Operand]); if (auto Name = Die.find(dwarf::DW_AT_name)) OS << " \"" << Name->getAsCString() << "\""; } else { @@ -271,7 +270,7 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, Operands[Operand]); } } else if (Size == Operation::SizeBlock) { - uint32_t Offset = Operands[Operand]; + uint64_t Offset = Operands[Operand]; for (unsigned i = 0; i < Operands[Operand - 1]; ++i) OS << format(" 0x%02x", Expr->Data.getU8(&Offset)); } else { @@ -290,7 +289,7 @@ void DWARFExpression::print(raw_ostream &OS, const MCRegisterInfo *RegInfo, uint32_t EntryValExprSize = 0; for (auto &Op : *this) { if (!Op.print(OS, this, RegInfo, U, IsEH)) { - uint32_t FailOffset = Op.getEndOffset(); + uint64_t FailOffset = Op.getEndOffset(); while (FailOffset < Data.getData().size()) OS << format(" %02x", Data.getU8(&FailOffset)); return; diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 290d35511cdb..26090638b34c 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -98,7 +98,7 @@ DWARFFormValue DWARFFormValue::createFromBlockValue(dwarf::Form F, } DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { DWARFFormValue FormValue(F); FormValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr, U->getFormParams(), U); @@ -106,7 +106,7 @@ DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U, } bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, + uint64_t *OffsetPtr, const dwarf::FormParams Params) { bool Indirect = false; do { @@ -234,7 +234,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { } bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, - uint32_t *OffsetPtr, dwarf::FormParams FP, + uint64_t *OffsetPtr, dwarf::FormParams FP, const DWARFContext *Ctx, const DWARFUnit *CU) { if (!Ctx && CU) @@ -590,7 +590,7 @@ Optional DWARFFormValue::getAsCString() const { // FIXME: Add support for DW_FORM_GNU_strp_alt if (Form == DW_FORM_GNU_strp_alt || C == nullptr) return None; - uint32_t Offset = Value.uval; + uint64_t Offset = Value.uval; if (Form == DW_FORM_line_strp) { // .debug_line_str is tracked in the Context. if (const char *Str = C->getLineStringExtractor().getCStr(&Offset)) @@ -624,6 +624,7 @@ Optional DWARFFormValue::getAsAddress() const { return SA->Address; return None; } + Optional DWARFFormValue::getAsSectionedAddress() const { if (!isFormClass(FC_Address)) diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index f5f975578082..252b58e5a591 100644 --- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -112,7 +112,7 @@ void DWARFGdbIndex::dump(raw_ostream &OS) { } bool DWARFGdbIndex::parseImpl(DataExtractor Data) { - uint32_t Offset = 0; + uint64_t Offset = 0; // Only version 7 is supported at this moment. Version = Data.getU32(&Offset); diff --git a/lib/DebugInfo/DWARF/DWARFListTable.cpp b/lib/DebugInfo/DWARF/DWARFListTable.cpp index e38e706227da..269ea9f79a6e 100644 --- a/lib/DebugInfo/DWARF/DWARFListTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFListTable.cpp @@ -16,33 +16,42 @@ using namespace llvm; Error DWARFListTableHeader::extract(DWARFDataExtractor Data, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { HeaderOffset = *OffsetPtr; // Read and verify the length field. if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t))) return createStringError(errc::invalid_argument, "section is not large enough to contain a " - "%s table length at offset 0x%" PRIx32, + "%s table length at offset 0x%" PRIx64, SectionName.data(), *OffsetPtr); - // TODO: Add support for DWARF64. - HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr); - if (HeaderData.Length == 0xffffffffu) - return createStringError(errc::not_supported, - "DWARF64 is not supported in %s at offset 0x%" PRIx32, - SectionName.data(), HeaderOffset); Format = dwarf::DwarfFormat::DWARF32; - if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) + uint8_t OffsetByteSize = 4; + HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr); + if (HeaderData.Length == dwarf::DW_LENGTH_DWARF64) { + Format = dwarf::DwarfFormat::DWARF64; + OffsetByteSize = 8; + HeaderData.Length = Data.getU64(OffsetPtr); + } else if (HeaderData.Length >= dwarf::DW_LENGTH_lo_reserved) { + return createStringError(errc::invalid_argument, + "%s table at offset 0x%" PRIx64 + " has unsupported reserved unit length of value 0x%8.8" PRIx64, + SectionName.data(), HeaderOffset, HeaderData.Length); + } + uint64_t FullLength = + HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format); + assert(FullLength == length()); + if (FullLength < getHeaderSize(Format)) return createStringError(errc::invalid_argument, - "%s table at offset 0x%" PRIx32 - " has too small length (0x%" PRIx32 + "%s table at offset 0x%" PRIx64 + " has too small length (0x%" PRIx64 ") to contain a complete header", - SectionName.data(), HeaderOffset, length()); - uint32_t End = HeaderOffset + length(); - if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) + SectionName.data(), HeaderOffset, FullLength); + uint64_t End = HeaderOffset + FullLength; + if (!Data.isValidOffsetForDataOfSize(HeaderOffset, FullLength)) return createStringError(errc::invalid_argument, "section is not large enough to contain a %s table " - "of length 0x%" PRIx32 " at offset 0x%" PRIx32, - SectionName.data(), length(), HeaderOffset); + "of length 0x%" PRIx64 " at offset 0x%" PRIx64, + SectionName.data(), FullLength, HeaderOffset); HeaderData.Version = Data.getU16(OffsetPtr); HeaderData.AddrSize = Data.getU8(OffsetPtr); @@ -53,35 +62,35 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data, if (HeaderData.Version != 5) return createStringError(errc::invalid_argument, "unrecognised %s table version %" PRIu16 - " in table at offset 0x%" PRIx32, + " in table at offset 0x%" PRIx64, SectionName.data(), HeaderData.Version, HeaderOffset); if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8) return createStringError(errc::not_supported, - "%s table at offset 0x%" PRIx32 + "%s table at offset 0x%" PRIx64 " has unsupported address size %" PRIu8, SectionName.data(), HeaderOffset, HeaderData.AddrSize); if (HeaderData.SegSize != 0) return createStringError(errc::not_supported, - "%s table at offset 0x%" PRIx32 + "%s table at offset 0x%" PRIx64 " has unsupported segment selector size %" PRIu8, SectionName.data(), HeaderOffset, HeaderData.SegSize); - if (End < HeaderOffset + sizeof(HeaderData) + - HeaderData.OffsetEntryCount * sizeof(uint32_t)) + if (End < HeaderOffset + getHeaderSize(Format) + + HeaderData.OffsetEntryCount * OffsetByteSize) return createStringError(errc::invalid_argument, - "%s table at offset 0x%" PRIx32 " has more offset entries (%" PRIu32 + "%s table at offset 0x%" PRIx64 " has more offset entries (%" PRIu32 ") than there is space for", SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount); Data.setAddressSize(HeaderData.AddrSize); for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I) - Offsets.push_back(Data.getRelocatedValue(4, OffsetPtr)); + Offsets.push_back(Data.getRelocatedValue(OffsetByteSize, OffsetPtr)); return Error::success(); } void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { if (DumpOpts.Verbose) - OS << format("0x%8.8" PRIx32 ": ", HeaderOffset); + OS << format("0x%8.8" PRIx64 ": ", HeaderOffset); OS << format( - "%s list header: length = 0x%8.8" PRIx32 ", version = 0x%4.4" PRIx16 ", " + "%s list header: length = 0x%8.8" PRIx64 ", version = 0x%4.4" PRIx16 ", " "addr_size = 0x%2.2" PRIx8 ", seg_size = 0x%2.2" PRIx8 ", offset_entry_count = " "0x%8.8" PRIx32 "\n", @@ -91,18 +100,17 @@ void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { if (HeaderData.OffsetEntryCount > 0) { OS << "offsets: ["; for (const auto &Off : Offsets) { - OS << format("\n0x%8.8" PRIx32, Off); + OS << format("\n0x%8.8" PRIx64, Off); if (DumpOpts.Verbose) - OS << format(" => 0x%8.8" PRIx32, - Off + HeaderOffset + sizeof(HeaderData)); + OS << format(" => 0x%8.8" PRIx64, + Off + HeaderOffset + getHeaderSize(Format)); } OS << "\n]\n"; } } -uint32_t DWARFListTableHeader::length() const { +uint64_t DWARFListTableHeader::length() const { if (HeaderData.Length == 0) return 0; - // TODO: DWARF64 support. - return HeaderData.Length + sizeof(uint32_t); + return HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format); } diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp index 844920ba5b11..bb81090ba25c 100644 --- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp @@ -24,21 +24,23 @@ void DWARFTypeUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { if (DumpOpts.SummarizeTypes) { OS << "name = '" << Name << "'" << " type_signature = " << format("0x%016" PRIx64, getTypeHash()) - << " length = " << format("0x%08x", getLength()) << '\n'; + << " length = " << format("0x%08" PRIx64, getLength()) << '\n'; return; } - OS << format("0x%08x", getOffset()) << ": Type Unit:" - << " length = " << format("0x%08x", getLength()) + OS << format("0x%08" PRIx64, getOffset()) << ": Type Unit:" + << " length = " << format("0x%08" PRIx64, getLength()) << " version = " << format("0x%04x", getVersion()); if (getVersion() >= 5) OS << " unit_type = " << dwarf::UnitTypeString(getUnitType()); - OS << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset()) + OS << " abbr_offset = " + << format("0x%04" PRIx64, getAbbreviations()->getOffset()) << " addr_size = " << format("0x%02x", getAddressByteSize()) << " name = '" << Name << "'" << " type_signature = " << format("0x%016" PRIx64, getTypeHash()) - << " type_offset = " << format("0x%04x", getTypeOffset()) - << " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n"; + << " type_offset = " << format("0x%04" PRIx64, getTypeOffset()) + << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset()) + << ")\n"; if (DWARFDie TU = getUnitDIE(false)) TU.dump(OS, 0, DumpOpts); diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index b74acf60c747..a56402a707ad 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -37,9 +37,9 @@ void DWARFUnitVector::addUnitsForSection(DWARFContext &C, const DWARFSection &Section, DWARFSectionKind SectionKind) { const DWARFObject &D = C.getDWARFObj(); - addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangeSection(), - &D.getLocSection(), D.getStringSection(), - D.getStringOffsetSection(), &D.getAddrSection(), + addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangesSection(), + &D.getLocSection(), D.getStrSection(), + D.getStrOffsetsSection(), &D.getAddrSection(), D.getLineSection(), D.isLittleEndian(), false, false, SectionKind); } @@ -49,9 +49,9 @@ void DWARFUnitVector::addUnitsForDWOSection(DWARFContext &C, DWARFSectionKind SectionKind, bool Lazy) { const DWARFObject &D = C.getDWARFObj(); - addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangeDWOSection(), - &D.getLocDWOSection(), D.getStringDWOSection(), - D.getStringOffsetDWOSection(), &D.getAddrSection(), + addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangesDWOSection(), + &D.getLocDWOSection(), D.getStrDWOSection(), + D.getStrOffsetsDWOSection(), &D.getAddrSection(), D.getLineDWOSection(), C.isLittleEndian(), true, Lazy, SectionKind); } @@ -66,7 +66,7 @@ void DWARFUnitVector::addUnitsImpl( // Lazy initialization of Parser, now that we have all section info. if (!Parser) { Parser = [=, &Context, &Obj, &Section, &SOS, - &LS](uint32_t Offset, DWARFSectionKind SectionKind, + &LS](uint64_t Offset, DWARFSectionKind SectionKind, const DWARFSection *CurSection, const DWARFUnitIndex::Entry *IndexEntry) -> std::unique_ptr { @@ -83,11 +83,11 @@ void DWARFUnitVector::addUnitsImpl( return nullptr; std::unique_ptr U; if (Header.isTypeUnit()) - U = llvm::make_unique(Context, InfoSection, Header, DA, + U = std::make_unique(Context, InfoSection, Header, DA, RS, LocSection, SS, SOS, AOS, LS, LE, IsDWO, *this); else - U = llvm::make_unique(Context, InfoSection, Header, + U = std::make_unique(Context, InfoSection, Header, DA, RS, LocSection, SS, SOS, AOS, LS, LE, IsDWO, *this); return U; @@ -101,7 +101,7 @@ void DWARFUnitVector::addUnitsImpl( // within a section, although not necessarily within the object file, // even if we do lazy parsing. auto I = this->begin(); - uint32_t Offset = 0; + uint64_t Offset = 0; while (Data.isValidOffset(Offset)) { if (I != this->end() && (&(*I)->getInfoSection() != &Section || (*I)->getOffset() == Offset)) { @@ -126,11 +126,11 @@ DWARFUnit *DWARFUnitVector::addUnit(std::unique_ptr Unit) { return this->insert(I, std::move(Unit))->get(); } -DWARFUnit *DWARFUnitVector::getUnitForOffset(uint32_t Offset) const { +DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const { auto end = begin() + getNumInfoUnits(); auto *CU = std::upper_bound(begin(), end, Offset, - [](uint32_t LHS, const std::unique_ptr &RHS) { + [](uint64_t LHS, const std::unique_ptr &RHS) { return LHS < RHS->getNextUnitOffset(); }); if (CU != end && (*CU)->getOffset() <= Offset) @@ -149,7 +149,7 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) { auto *CU = std::upper_bound(begin(), end, CUOff->Offset, - [](uint32_t LHS, const std::unique_ptr &RHS) { + [](uint64_t LHS, const std::unique_ptr &RHS) { return LHS < RHS->getNextUnitOffset(); }); if (CU != end && (*CU)->getOffset() <= Offset) @@ -209,7 +209,7 @@ DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const { if (I != R.end() && std::next(I) == R.end()) return (*I)->getAddrOffsetSectionItem(Index); } - uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize(); + uint64_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize(); if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize()) return None; DWARFDataExtractor DA(Context.getDWARFObj(), *AddrOffsetSection, @@ -223,7 +223,7 @@ Optional DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const { if (!StringOffsetsTableContribution) return None; unsigned ItemSize = getDwarfStringOffsetsByteSize(); - uint32_t Offset = getStringOffsetsBase() + Index * ItemSize; + uint64_t Offset = getStringOffsetsBase() + Index * ItemSize; if (StringOffsetSection.Data.size() < Offset + ItemSize) return None; DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, @@ -233,7 +233,7 @@ Optional DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const { bool DWARFUnitHeader::extract(DWARFContext &Context, const DWARFDataExtractor &debug_info, - uint32_t *offset_ptr, + uint64_t *offset_ptr, DWARFSectionKind SectionKind, const DWARFUnitIndex *Index, const DWARFUnitIndex::Entry *Entry) { @@ -243,11 +243,9 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, IndexEntry = Index->getFromOffset(*offset_ptr); Length = debug_info.getRelocatedValue(4, offset_ptr); FormParams.Format = DWARF32; - unsigned SizeOfLength = 4; - if (Length == 0xffffffff) { + if (Length == dwarf::DW_LENGTH_DWARF64) { Length = debug_info.getU64(offset_ptr); FormParams.Format = DWARF64; - SizeOfLength = 8; } FormParams.Version = debug_info.getU16(offset_ptr); if (FormParams.Version >= 5) { @@ -277,7 +275,8 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, } if (isTypeUnit()) { TypeHash = debug_info.getU64(offset_ptr); - TypeOffset = debug_info.getU32(offset_ptr); + TypeOffset = + debug_info.getUnsigned(offset_ptr, FormParams.getDwarfOffsetByteSize()); } else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton) DWOId = debug_info.getU64(offset_ptr); @@ -290,7 +289,8 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, bool TypeOffsetOK = !isTypeUnit() ? true - : TypeOffset >= Size && TypeOffset < getLength() + SizeOfLength; + : TypeOffset >= Size && + TypeOffset < getLength() + getUnitLengthFieldByteSize(); bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); bool VersionOK = DWARFContext::isSupportedVersion(getVersion()); bool AddrSizeOK = getAddressByteSize() == 4 || getAddressByteSize() == 8; @@ -306,16 +306,18 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, // Parse the rangelist table header, including the optional array of offsets // following it (DWARF v5 and later). static Expected -parseRngListTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { - // TODO: Support DWARF64 +parseRngListTableHeader(DWARFDataExtractor &DA, uint64_t Offset, + DwarfFormat Format) { // We are expected to be called with Offset 0 or pointing just past the table - // header, which is 12 bytes long for DWARF32. + // header. Correct Offset in the latter case so that it points to the start + // of the header. if (Offset > 0) { - if (Offset < 12U) + uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Format); + if (Offset < HeaderSize) return createStringError(errc::invalid_argument, "Did not detect a valid" - " range list table with base = 0x%" PRIu32, + " range list table with base = 0x%" PRIx64 "\n", Offset); - Offset -= 12U; + Offset -= HeaderSize; } llvm::DWARFDebugRnglistTable Table; if (Error E = Table.extractHeaderAndOffsets(DA, &Offset)) @@ -323,13 +325,13 @@ parseRngListTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { return Table; } -Error DWARFUnit::extractRangeList(uint32_t RangeListOffset, +Error DWARFUnit::extractRangeList(uint64_t RangeListOffset, DWARFDebugRangeList &RangeList) const { // Require that compile unit is extracted. assert(!DieArray.empty()); DWARFDataExtractor RangesData(Context.getDWARFObj(), *RangeSection, isLittleEndian, getAddressByteSize()); - uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; + uint64_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; return RangeList.extract(RangesData, &ActualRangeListOffset); } @@ -354,8 +356,8 @@ void DWARFUnit::extractDIEsToVector( // Set the offset to that of the first DIE and calculate the start of the // next compilation unit header. - uint32_t DIEOffset = getOffset() + getHeaderSize(); - uint32_t NextCUOffset = getNextUnitOffset(); + uint64_t DIEOffset = getOffset() + getHeaderSize(); + uint64_t NextCUOffset = getNextUnitOffset(); DWARFDebugInfoEntry DIE; DWARFDataExtractor DebugInfoData = getDebugInfoExtractor(); uint32_t Depth = 0; @@ -396,90 +398,98 @@ void DWARFUnit::extractDIEsToVector( // unit header). if (DIEOffset > NextCUOffset) WithColor::warning() << format("DWARF compile unit extends beyond its " - "bounds cu 0x%8.8x at 0x%8.8x\n", + "bounds cu 0x%8.8" PRIx64 " " + "at 0x%8.8" PRIx64 "\n", getOffset(), DIEOffset); } -size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { +void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { + if (Error e = tryExtractDIEsIfNeeded(CUDieOnly)) + WithColor::error() << toString(std::move(e)); +} + +Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1) - return 0; // Already parsed. + return Error::success(); // Already parsed. bool HasCUDie = !DieArray.empty(); extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray); if (DieArray.empty()) - return 0; + return Error::success(); // If CU DIE was just parsed, copy several attribute values from it. - if (!HasCUDie) { - DWARFDie UnitDie = getUnitDIE(); - if (Optional DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id))) - Header.setDWOId(*DWOId); - if (!IsDWO) { - assert(AddrOffsetSectionBase == 0); - assert(RangeSectionBase == 0); - AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0); - if (!AddrOffsetSectionBase) - AddrOffsetSectionBase = - toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0); - RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); - } - - // In general, in DWARF v5 and beyond we derive the start of the unit's - // contribution to the string offsets table from the unit DIE's - // DW_AT_str_offsets_base attribute. Split DWARF units do not use this - // attribute, so we assume that there is a contribution to the string - // offsets table starting at offset 0 of the debug_str_offsets.dwo section. - // In both cases we need to determine the format of the contribution, - // which may differ from the unit's format. - DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, - isLittleEndian, 0); - if (IsDWO || getVersion() >= 5) { - auto StringOffsetOrError = - IsDWO ? determineStringOffsetsTableContributionDWO(DA) - : determineStringOffsetsTableContribution(DA); - if (!StringOffsetOrError) { - WithColor::error() << "invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: " - << toString(StringOffsetOrError.takeError()) << '\n'; - } else { - StringOffsetsTableContribution = *StringOffsetOrError; - } - } + if (HasCUDie) + return Error::success(); + + DWARFDie UnitDie(this, &DieArray[0]); + if (Optional DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id))) + Header.setDWOId(*DWOId); + if (!IsDWO) { + assert(AddrOffsetSectionBase == 0); + assert(RangeSectionBase == 0); + AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0); + if (!AddrOffsetSectionBase) + AddrOffsetSectionBase = + toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0); + RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); + } - // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to - // describe address ranges. - if (getVersion() >= 5) { - if (IsDWO) - setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0); - else - setRangesSection(&Context.getDWARFObj().getRnglistsSection(), - toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0)); - if (RangeSection->Data.size()) { - // Parse the range list table header. Individual range lists are - // extracted lazily. - DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, - isLittleEndian, 0); - if (auto TableOrError = - parseRngListTableHeader(RangesDA, RangeSectionBase)) - RngListTable = TableOrError.get(); - else - WithColor::error() << "parsing a range list table: " - << toString(TableOrError.takeError()) - << '\n'; - - // In a split dwarf unit, there is no DW_AT_rnglists_base attribute. - // Adjust RangeSectionBase to point past the table header. - if (IsDWO && RngListTable) - RangeSectionBase = RngListTable->getHeaderSize(); - } - } + // In general, in DWARF v5 and beyond we derive the start of the unit's + // contribution to the string offsets table from the unit DIE's + // DW_AT_str_offsets_base attribute. Split DWARF units do not use this + // attribute, so we assume that there is a contribution to the string + // offsets table starting at offset 0 of the debug_str_offsets.dwo section. + // In both cases we need to determine the format of the contribution, + // which may differ from the unit's format. + DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, + isLittleEndian, 0); + if (IsDWO || getVersion() >= 5) { + auto StringOffsetOrError = + IsDWO ? determineStringOffsetsTableContributionDWO(DA) + : determineStringOffsetsTableContribution(DA); + if (!StringOffsetOrError) + return createStringError(errc::invalid_argument, + "invalid reference to or invalid content in " + ".debug_str_offsets[.dwo]: " + + toString(StringOffsetOrError.takeError())); + + StringOffsetsTableContribution = *StringOffsetOrError; + } - // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for - // skeleton CU DIE, so that DWARF users not aware of it are not broken. + // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to + // describe address ranges. + if (getVersion() >= 5) { + if (IsDWO) + setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0); + else + setRangesSection(&Context.getDWARFObj().getRnglistsSection(), + toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0)); + if (RangeSection->Data.size()) { + // Parse the range list table header. Individual range lists are + // extracted lazily. + DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, + isLittleEndian, 0); + auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase, + Header.getFormat()); + if (!TableOrError) + return createStringError(errc::invalid_argument, + "parsing a range list table: " + + toString(TableOrError.takeError())); + + RngListTable = TableOrError.get(); + + // In a split dwarf unit, there is no DW_AT_rnglists_base attribute. + // Adjust RangeSectionBase to point past the table header. + if (IsDWO && RngListTable) + RangeSectionBase = RngListTable->getHeaderSize(); } + } - return DieArray.size(); + // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for + // skeleton CU DIE, so that DWARF users not aware of it are not broken. + return Error::success(); } bool DWARFUnit::parseDWO() { @@ -517,7 +527,8 @@ bool DWARFUnit::parseDWO() { DWO->setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0); DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, isLittleEndian, 0); - if (auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase)) + if (auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase, + Header.getFormat())) DWO->RngListTable = TableOrError.get(); else WithColor::error() << "parsing a range list table: " @@ -541,7 +552,7 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) { } Expected -DWARFUnit::findRnglistFromOffset(uint32_t Offset) { +DWARFUnit::findRnglistFromOffset(uint64_t Offset) { if (getVersion() <= 4) { DWARFDebugRangeList RangeList; if (Error E = extractRangeList(Offset, RangeList)) @@ -569,9 +580,9 @@ DWARFUnit::findRnglistFromIndex(uint32_t Index) { if (RngListTable) return createStringError(errc::invalid_argument, "invalid range list table index %d", Index); - else - return createStringError(errc::invalid_argument, - "missing or invalid range list table"); + + return createStringError(errc::invalid_argument, + "missing or invalid range list table"); } Expected DWARFUnit::collectAddressRanges() { @@ -780,11 +791,11 @@ StrOffsetsContributionDescriptor::validateContributionSize( // Look for a DWARF64-formatted contribution to the string offsets table // starting at a given offset and record it in a descriptor. static Expected -parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { +parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint64_t Offset) { if (!DA.isValidOffsetForDataOfSize(Offset, 16)) return createStringError(errc::invalid_argument, "section offset exceeds section size"); - if (DA.getU32(&Offset) != 0xffffffff) + if (DA.getU32(&Offset) != dwarf::DW_LENGTH_DWARF64) return createStringError(errc::invalid_argument, "32 bit contribution referenced from a 64 bit unit"); uint64_t Size = DA.getU64(&Offset); @@ -798,12 +809,12 @@ parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { // Look for a DWARF32-formatted contribution to the string offsets table // starting at a given offset and record it in a descriptor. static Expected -parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { +parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint64_t Offset) { if (!DA.isValidOffsetForDataOfSize(Offset, 8)) return createStringError(errc::invalid_argument, "section offset exceeds section size"); uint32_t ContributionSize = DA.getU32(&Offset); - if (ContributionSize >= 0xfffffff0) + if (ContributionSize >= dwarf::DW_LENGTH_lo_reserved) return createStringError(errc::invalid_argument, "invalid length"); uint8_t Version = DA.getU16(&Offset); @@ -823,7 +834,7 @@ parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA, case dwarf::DwarfFormat::DWARF64: { if (Offset < 16) return createStringError(errc::invalid_argument, "insufficient space for 64 bit header prefix"); - auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16); + auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, Offset - 16); if (!DescOrError) return DescOrError.takeError(); Desc = *DescOrError; @@ -832,7 +843,7 @@ parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA, case dwarf::DwarfFormat::DWARF32: { if (Offset < 8) return createStringError(errc::invalid_argument, "insufficient space for 32 bit header prefix"); - auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8); + auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, Offset - 8); if (!DescOrError) return DescOrError.takeError(); Desc = *DescOrError; diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 047c63461ccf..f29c1e6cc5c7 100644 --- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -18,7 +18,7 @@ using namespace llvm; bool DWARFUnitIndex::Header::parse(DataExtractor IndexData, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { if (!IndexData.isValidOffsetForDataOfSize(*OffsetPtr, 16)) return false; Version = IndexData.getU32(OffsetPtr); @@ -45,7 +45,7 @@ bool DWARFUnitIndex::parse(DataExtractor IndexData) { } bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { - uint32_t Offset = 0; + uint64_t Offset = 0; if (!Header.parse(IndexData, &Offset)) return false; @@ -54,10 +54,10 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { (2 * Header.NumUnits + 1) * 4 * Header.NumColumns)) return false; - Rows = llvm::make_unique(Header.NumBuckets); + Rows = std::make_unique(Header.NumBuckets); auto Contribs = - llvm::make_unique(Header.NumUnits); - ColumnKinds = llvm::make_unique(Header.NumColumns); + std::make_unique(Header.NumUnits); + ColumnKinds = std::make_unique(Header.NumColumns); // Read Hash Table of Signatures for (unsigned i = 0; i != Header.NumBuckets; ++i) @@ -70,7 +70,7 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { continue; Rows[i].Index = this; Rows[i].Contributions = - llvm::make_unique(Header.NumColumns); + std::make_unique(Header.NumColumns); Contribs[Index - 1] = Rows[i].Contributions.get(); } diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp index c2b3189514a8..bf499b6ee092 100644 --- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -34,11 +34,11 @@ DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) { if (Pos != End) { if (Pos->intersects(R)) - return Pos; + return std::move(Pos); if (Pos != Begin) { auto Iter = Pos - 1; if (Iter->intersects(R)) - return Iter; + return std::move(Iter); } } @@ -98,7 +98,7 @@ bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const { } bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, - uint32_t *Offset, unsigned UnitIndex, + uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType, bool &isUnitDWARF64) { uint64_t AbbrOffset, Length; uint8_t AddrSize = 0; @@ -111,9 +111,9 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, bool ValidType = true; bool ValidAbbrevOffset = true; - uint32_t OffsetStart = *Offset; + uint64_t OffsetStart = *Offset; Length = DebugInfoData.getU32(Offset); - if (Length == UINT32_MAX) { + if (Length == dwarf::DW_LENGTH_DWARF64) { Length = DebugInfoData.getU64(Offset); isUnitDWARF64 = true; } @@ -139,7 +139,7 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset || !ValidType) { Success = false; - error() << format("Units[%d] - start offset: 0x%08x \n", UnitIndex, + error() << format("Units[%d] - start offset: 0x%08" PRIx64 " \n", UnitIndex, OffsetStart); if (!ValidLength) note() << "The length for this unit is too " @@ -203,7 +203,7 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) { } unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) { - if (Die.getTag() != DW_TAG_call_site) + if (Die.getTag() != DW_TAG_call_site && Die.getTag() != DW_TAG_GNU_call_site) return 0; DWARFDie Curr = Die.getParent(); @@ -223,7 +223,9 @@ unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) { Optional CallAttr = Curr.find({DW_AT_call_all_calls, DW_AT_call_all_source_calls, - DW_AT_call_all_tail_calls}); + DW_AT_call_all_tail_calls, DW_AT_GNU_all_call_sites, + DW_AT_GNU_all_source_call_sites, + DW_AT_GNU_all_tail_call_sites}); if (!CallAttr) { error() << "Subprogram with call site entry has no DW_AT_call attribute:"; Curr.dump(OS); @@ -273,7 +275,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, const DWARFObject &DObj = DCtx.getDWARFObj(); DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0); unsigned NumDebugInfoErrors = 0; - uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0; + uint64_t OffsetStart = 0, Offset = 0, UnitIdx = 0; uint8_t UnitType = 0; bool isUnitDWARF64 = false; bool isHeaderChainValid = true; @@ -294,10 +296,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, switch (UnitType) { case dwarf::DW_UT_type: case dwarf::DW_UT_split_type: { - Unit = TypeUnitVector.addUnit(llvm::make_unique( - DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(), - &DObj.getLocSection(), DObj.getStringSection(), - DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(), + Unit = TypeUnitVector.addUnit(std::make_unique( + DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), + &DObj.getLocSection(), DObj.getStrSection(), + DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(), DObj.getLineSection(), DCtx.isLittleEndian(), false, TypeUnitVector)); break; @@ -308,10 +310,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, case dwarf::DW_UT_partial: // UnitType = 0 means that we are verifying a compile unit in DWARF v4. case 0: { - Unit = CompileUnitVector.addUnit(llvm::make_unique( - DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(), - &DObj.getLocSection(), DObj.getStringSection(), - DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(), + Unit = CompileUnitVector.addUnit(std::make_unique( + DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), + &DObj.getLocSection(), DObj.getStrSection(), + DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(), DObj.getLineSection(), DCtx.isLittleEndian(), false, CompileUnitVector)); break; @@ -449,7 +451,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, case DW_AT_ranges: // Make sure the offset in the DW_AT_ranges attribute is valid. if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= DObj.getRangeSection().Data.size()) + if (*SectionOffset >= DObj.getRangesSection().Data.size()) ReportError("DW_AT_ranges offset is beyond .debug_ranges bounds:"); break; } @@ -466,9 +468,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, ReportError("DIE has invalid DW_AT_stmt_list encoding:"); break; case DW_AT_location: { - auto VerifyLocationExpr = [&](StringRef D) { + auto VerifyLocationExpr = [&](ArrayRef D) { DWARFUnit *U = Die.getDwarfUnit(); - DataExtractor Data(D, DCtx.isLittleEndian(), 0); + DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), 0); DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize()); bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) { @@ -479,13 +481,13 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, }; if (Optional> Expr = AttrValue.Value.getAsBlock()) { // Verify inlined location. - VerifyLocationExpr(llvm::toStringRef(*Expr)); + VerifyLocationExpr(*Expr); } else if (auto LocOffset = AttrValue.Value.getAsSectionOffset()) { // Verify location list. if (auto DebugLoc = DCtx.getDebugLoc()) if (auto LocList = DebugLoc->getLocationListAtOffset(*LocOffset)) for (const auto &Entry : LocList->Entries) - VerifyLocationExpr({Entry.Loc.data(), Entry.Loc.size()}); + VerifyLocationExpr(Entry.Loc); } break; } @@ -500,6 +502,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, break; if (DieTag == DW_TAG_variable && RefTag == DW_TAG_member) break; + // This might be reference to a function declaration. + if (DieTag == DW_TAG_GNU_call_site && RefTag == DW_TAG_subprogram) + break; ReportError("DIE with tag " + TagString(DieTag) + " has " + AttributeString(Attr) + " that points to DIE with " @@ -545,7 +550,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, error() << FormEncodingString(Form) << " CU offset " << format("0x%08" PRIx64, CUOffset) << " is invalid (must be less than CU size of " - << format("0x%08" PRIx32, CUSize) << "):\n"; + << format("0x%08" PRIx64, CUSize) << "):\n"; Die.dump(OS, 0, DumpOpts); dump(Die) << '\n'; } else { @@ -578,7 +583,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, case DW_FORM_strp: { auto SecOffset = AttrValue.Value.getAsSectionOffset(); assert(SecOffset); // DW_FORM_strp is a section offset. - if (SecOffset && *SecOffset >= DObj.getStringSection().size()) { + if (SecOffset && *SecOffset >= DObj.getStrSection().size()) { ++NumErrors; error() << "DW_FORM_strp offset beyond .debug_str bounds:\n"; dump(Die) << '\n'; @@ -605,7 +610,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, // Use a 64-bit type to calculate the offset to guard against overflow. uint64_t Offset = (uint64_t)DieCU->getStringOffsetsBase() + Index * ItemSize; - if (DObj.getStringOffsetSection().Data.size() < Offset + ItemSize) { + if (DObj.getStrOffsetsSection().Data.size() < Offset + ItemSize) { ++NumErrors; error() << FormEncodingString(Form) << " uses index " << format("%" PRIu64, Index) << ", which is too large:\n"; @@ -614,7 +619,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, } // Check that the string offset is valid. uint64_t StringOffset = *DieCU->getStringOffsetSectionItem(Index); - if (StringOffset >= DObj.getStringSection().size()) { + if (StringOffset >= DObj.getStrSection().size()) { ++NumErrors; error() << FormEncodingString(Form) << " uses index " << format("%" PRIu64, Index) @@ -635,7 +640,7 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() { // getting the DIE by offset and emitting an error OS << "Verifying .debug_info references...\n"; unsigned NumErrors = 0; - for (const std::pair> &Pair : + for (const std::pair> &Pair : ReferenceToDIEOffsets) { if (DCtx.getDIEForOffset(Pair.first)) continue; @@ -659,12 +664,12 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() { auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list)); if (!StmtSectionOffset) continue; - const uint32_t LineTableOffset = *StmtSectionOffset; + const uint64_t LineTableOffset = *StmtSectionOffset; auto LineTable = DCtx.getLineTableForUnit(CU.get()); if (LineTableOffset < DCtx.getDWARFObj().getLineSection().Data.size()) { if (!LineTable) { ++NumDebugLineErrors; - error() << ".debug_line[" << format("0x%08" PRIx32, LineTableOffset) + error() << ".debug_line[" << format("0x%08" PRIx64, LineTableOffset) << "] was not able to be parsed for CU:\n"; dump(Die) << '\n'; continue; @@ -680,8 +685,8 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() { if (Iter != StmtListToDie.end()) { ++NumDebugLineErrors; error() << "two compile unit DIEs, " - << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " - << format("0x%08" PRIx32, Die.getOffset()) + << format("0x%08" PRIx64, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx64, Die.getOffset()) << ", have the same DW_AT_stmt_list section offset:\n"; dump(Iter->second); dump(Die) << '\n'; @@ -826,10 +831,10 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection, uint32_t NumBuckets = AccelTable.getNumBuckets(); uint32_t NumHashes = AccelTable.getNumHashes(); - uint32_t BucketsOffset = + uint64_t BucketsOffset = AccelTable.getSizeHdr() + AccelTable.getHeaderDataLength(); - uint32_t HashesBase = BucketsOffset + NumBuckets * 4; - uint32_t OffsetsBase = HashesBase + NumHashes * 4; + uint64_t HashesBase = BucketsOffset + NumBuckets * 4; + uint64_t OffsetsBase = HashesBase + NumHashes * 4; for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) { uint32_t HashIdx = AccelSectionData.getU32(&BucketsOffset); if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) { @@ -849,28 +854,29 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection, } for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) { - uint32_t HashOffset = HashesBase + 4 * HashIdx; - uint32_t DataOffset = OffsetsBase + 4 * HashIdx; + uint64_t HashOffset = HashesBase + 4 * HashIdx; + uint64_t DataOffset = OffsetsBase + 4 * HashIdx; uint32_t Hash = AccelSectionData.getU32(&HashOffset); - uint32_t HashDataOffset = AccelSectionData.getU32(&DataOffset); + uint64_t HashDataOffset = AccelSectionData.getU32(&DataOffset); if (!AccelSectionData.isValidOffsetForDataOfSize(HashDataOffset, sizeof(uint64_t))) { - error() << format("Hash[%d] has invalid HashData offset: 0x%08x.\n", + error() << format("Hash[%d] has invalid HashData offset: " + "0x%08" PRIx64 ".\n", HashIdx, HashDataOffset); ++NumErrors; } - uint32_t StrpOffset; - uint32_t StringOffset; + uint64_t StrpOffset; + uint64_t StringOffset; uint32_t StringCount = 0; - unsigned Offset; + uint64_t Offset; unsigned Tag; while ((StrpOffset = AccelSectionData.getU32(&HashDataOffset)) != 0) { const uint32_t NumHashDataObjects = AccelSectionData.getU32(&HashDataOffset); for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects; ++HashDataIdx) { - std::tie(Offset, Tag) = AccelTable.readAtoms(HashDataOffset); + std::tie(Offset, Tag) = AccelTable.readAtoms(&HashDataOffset); auto Die = DCtx.getDIEForOffset(Offset); if (!Die) { const uint32_t BucketIdx = @@ -882,8 +888,8 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection, error() << format( "%s Bucket[%d] Hash[%d] = 0x%08x " - "Str[%u] = 0x%08x " - "DIE[%d] = 0x%08x is not a valid DIE offset for \"%s\".\n", + "Str[%u] = 0x%08" PRIx64 " DIE[%d] = 0x%08" PRIx64 " " + "is not a valid DIE offset for \"%s\".\n", SectionName, BucketIdx, HashIdx, Hash, StringCount, StrpOffset, HashDataIdx, Offset, Name); @@ -908,8 +914,8 @@ unsigned DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) { // A map from CU offset to the (first) Name Index offset which claims to index // this CU. - DenseMap CUMap; - const uint32_t NotIndexed = std::numeric_limits::max(); + DenseMap CUMap; + const uint64_t NotIndexed = std::numeric_limits::max(); CUMap.reserve(DCtx.getNumCompileUnits()); for (const auto &CU : DCtx.compile_units()) @@ -924,7 +930,7 @@ DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) { continue; } for (uint32_t CU = 0, End = NI.getCUCount(); CU < End; ++CU) { - uint32_t Offset = NI.getCUOffset(CU); + uint64_t Offset = NI.getCUOffset(CU); auto Iter = CUMap.find(Offset); if (Iter == CUMap.end()) { @@ -1205,8 +1211,8 @@ unsigned DWARFVerifier::verifyNameIndexEntries( unsigned NumErrors = 0; unsigned NumEntries = 0; - uint32_t EntryID = NTE.getEntryOffset(); - uint32_t NextEntryID = EntryID; + uint64_t EntryID = NTE.getEntryOffset(); + uint64_t NextEntryID = EntryID; Expected EntryOr = NI.getEntry(&NextEntryID); for (; EntryOr; ++NumEntries, EntryID = NextEntryID, EntryOr = NI.getEntry(&NextEntryID)) { @@ -1218,7 +1224,7 @@ unsigned DWARFVerifier::verifyNameIndexEntries( ++NumErrors; continue; } - uint32_t CUOffset = NI.getCUOffset(CUIndex); + uint64_t CUOffset = NI.getCUOffset(CUIndex); uint64_t DIEOffset = CUOffset + *EntryOr->getDIEUnitOffset(); DWARFDie DIE = DCtx.getDIEForOffset(DIEOffset); if (!DIE) { @@ -1276,9 +1282,9 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) { if (!Location) return false; - auto ContainsInterestingOperators = [&](StringRef D) { + auto ContainsInterestingOperators = [&](ArrayRef D) { DWARFUnit *U = Die.getDwarfUnit(); - DataExtractor Data(D, DCtx.isLittleEndian(), U->getAddressByteSize()); + DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), U->getAddressByteSize()); DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize()); return any_of(Expression, [](DWARFExpression::Operation &Op) { return !Op.isError() && (Op.getCode() == DW_OP_addr || @@ -1289,7 +1295,7 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) { if (Optional> Expr = Location->getAsBlock()) { // Inlined location. - if (ContainsInterestingOperators(toStringRef(*Expr))) + if (ContainsInterestingOperators(*Expr)) return true; } else if (Optional Offset = Location->getAsSectionOffset()) { // Location list. @@ -1297,7 +1303,7 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) { if (const DWARFDebugLoc::LocationList *LocList = DebugLoc->getLocationListAtOffset(*Offset)) { if (any_of(LocList->Entries, [&](const DWARFDebugLoc::Entry &E) { - return ContainsInterestingOperators({E.Loc.data(), E.Loc.size()}); + return ContainsInterestingOperators(E.Loc); })) return true; } @@ -1455,7 +1461,7 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection, bool DWARFVerifier::handleAccelTables() { const DWARFObject &D = DCtx.getDWARFObj(); - DataExtractor StrData(D.getStringSection(), DCtx.isLittleEndian(), 0); + DataExtractor StrData(D.getStrSection(), DCtx.isLittleEndian(), 0); unsigned NumErrors = 0; if (!D.getAppleNamesSection().Data.empty()) NumErrors += verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData, @@ -1470,8 +1476,8 @@ bool DWARFVerifier::handleAccelTables() { NumErrors += verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData, ".apple_objc"); - if (!D.getDebugNamesSection().Data.empty()) - NumErrors += verifyDebugNames(D.getDebugNamesSection(), StrData); + if (!D.getNamesSection().Data.empty()) + NumErrors += verifyDebugNames(D.getNamesSection(), StrData); return NumErrors == 0; } diff --git a/lib/DebugInfo/GSYM/FileWriter.cpp b/lib/DebugInfo/GSYM/FileWriter.cpp new file mode 100644 index 000000000000..4b30dcb60a7b --- /dev/null +++ b/lib/DebugInfo/GSYM/FileWriter.cpp @@ -0,0 +1,78 @@ +//===- FileWriter.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace gsym; + +FileWriter::~FileWriter() { OS.flush(); } + +void FileWriter::writeSLEB(int64_t S) { + uint8_t Bytes[32]; + auto Length = encodeSLEB128(S, Bytes); + assert(Length < sizeof(Bytes)); + OS.write(reinterpret_cast(Bytes), Length); +} + +void FileWriter::writeULEB(uint64_t U) { + uint8_t Bytes[32]; + auto Length = encodeULEB128(U, Bytes); + assert(Length < sizeof(Bytes)); + OS.write(reinterpret_cast(Bytes), Length); +} + +void FileWriter::writeU8(uint8_t U) { + OS.write(reinterpret_cast(&U), sizeof(U)); +} + +void FileWriter::writeU16(uint16_t U) { + const uint16_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::writeU32(uint32_t U) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::writeU64(uint64_t U) { + const uint64_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::fixup32(uint32_t U, uint64_t Offset) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.pwrite(reinterpret_cast(&Swapped), sizeof(Swapped), + Offset); +} + +void FileWriter::writeData(llvm::ArrayRef Data) { + OS.write(reinterpret_cast(Data.data()), Data.size()); +} + +void FileWriter::writeNullTerminated(llvm::StringRef Str) { + OS << Str << '\0'; +} + +uint64_t FileWriter::tell() { + return OS.tell(); +} + +void FileWriter::alignTo(size_t Align) { + off_t Offset = OS.tell(); + off_t AlignedOffset = (Offset + Align - 1) / Align * Align; + if (AlignedOffset == Offset) + return; + off_t PadCount = AlignedOffset - Offset; + OS.write_zeros(PadCount); +} diff --git a/lib/DebugInfo/GSYM/FunctionInfo.cpp b/lib/DebugInfo/GSYM/FunctionInfo.cpp index 55c36a55b4be..ad022fec9e32 100644 --- a/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -1,22 +1,147 @@ -//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +//===- FunctionInfo.cpp ---------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/Support/DataExtractor.h" using namespace llvm; using namespace gsym; +/// FunctionInfo information type that is used to encode the optional data +/// that is associated with a FunctionInfo object. +enum InfoType : uint32_t { + EndOfList = 0u, + LineTableInfo = 1u, + InlineInfo = 2u +}; + raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): " - << "Name=" << HEX32(FI.Name) << '\n'; - for (const auto &Line : FI.Lines) - OS << Line << '\n'; - OS << FI.Inline; + << "Name=" << HEX32(FI.Name) << '\n' << FI.OptLineTable << FI.Inline; return OS; } + +llvm::Expected FunctionInfo::decode(DataExtractor &Data, + uint64_t BaseAddr) { + FunctionInfo FI; + FI.Range.Start = BaseAddr; + uint64_t Offset = 0; + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset); + FI.Range.End = FI.Range.Start + Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset); + FI.Name = Data.getU32(&Offset); + if (FI.Name == 0) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x", + Offset - 4, FI.Name); + bool Done = false; + while (!Done) { + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo InfoType value", Offset); + const uint32_t IT = Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo InfoType length", Offset); + const uint32_t InfoLength = Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, InfoLength)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u", + Offset, IT); + DataExtractor InfoData(Data.getData().substr(Offset, InfoLength), + Data.isLittleEndian(), + Data.getAddressSize()); + switch (IT) { + case InfoType::EndOfList: + Done = true; + break; + + case InfoType::LineTableInfo: + if (Expected LT = LineTable::decode(InfoData, BaseAddr)) + FI.OptLineTable = std::move(LT.get()); + else + return LT.takeError(); + break; + + case InfoType::InlineInfo: + if (Expected II = InlineInfo::decode(InfoData, BaseAddr)) + FI.Inline = std::move(II.get()); + else + return II.takeError(); + break; + + default: + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": unsupported InfoType %u", + Offset-8, IT); + } + Offset += InfoLength; + } + return std::move(FI); +} + +llvm::Expected FunctionInfo::encode(FileWriter &O) const { + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid FunctionInfo object"); + // Align FunctionInfo data to a 4 byte alignment. + O.alignTo(4); + const uint64_t FuncInfoOffset = O.tell(); + // Write the size in bytes of this function as a uint32_t. This can be zero + // if we just have a symbol from a symbol table and that symbol has no size. + O.writeU32(size()); + // Write the name of this function as a uint32_t string table offset. + O.writeU32(Name); + + if (OptLineTable.hasValue()) { + O.writeU32(InfoType::LineTableInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the LineTable out with the number of bytes that were written. + O.writeU32(0); + const auto StartOffset = O.tell(); + llvm::Error err = OptLineTable->encode(O, Range.Start); + if (err) + return std::move(err); + const off_t Length = O.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "LineTable length is greater than UINT32_MAX"); + // Fixup the size of the LineTable data with the correct size. + O.fixup32(static_cast(Length), StartOffset - 4); + } + + // Write out the inline function info if we have any and if it is valid. + if (Inline.hasValue()) { + O.writeU32(InfoType::InlineInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the LineTable out with the number of bytes that were written. + O.writeU32(0); + const auto StartOffset = O.tell(); + llvm::Error err = Inline->encode(O, Range.Start); + if (err) + return std::move(err); + const off_t Length = O.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "InlineInfo length is greater than UINT32_MAX"); + // Fixup the size of the InlineInfo data with the correct size. + O.fixup32(static_cast(Length), StartOffset - 4); + } + + // Terminate the data chunks with and end of list with zero size + O.writeU32(InfoType::EndOfList); + O.writeU32(0); + return FuncInfoOffset; +} diff --git a/lib/DebugInfo/GSYM/GsymCreator.cpp b/lib/DebugInfo/GSYM/GsymCreator.cpp new file mode 100644 index 000000000000..f371426f2010 --- /dev/null +++ b/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,275 @@ +//===- GsymCreator.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include +#include + +using namespace llvm; +using namespace gsym; + + +GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { + insertFile(StringRef()); +} + +uint32_t GsymCreator::insertFile(StringRef Path, + llvm::sys::path::Style Style) { + llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); + llvm::StringRef filename = llvm::sys::path::filename(Path, Style); + FileEntry FE(insertString(directory), insertString(filename)); + + std::lock_guard Guard(Mutex); + const auto NextIndex = Files.size(); + // Find FE in hash map and insert if not present. + auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); + if (R.second) + Files.emplace_back(FE); + return R.first->second; +} + +llvm::Error GsymCreator::save(StringRef Path, + llvm::support::endianness ByteOrder) const { + std::error_code EC; + raw_fd_ostream OutStrm(Path, EC); + if (EC) + return llvm::errorCodeToError(EC); + FileWriter O(OutStrm, ByteOrder); + return encode(O); +} + +llvm::Error GsymCreator::encode(FileWriter &O) const { + std::lock_guard Guard(Mutex); + if (Funcs.empty()) + return createStringError(std::errc::invalid_argument, + "no functions to encode"); + if (!Finalized) + return createStringError(std::errc::invalid_argument, + "GsymCreator wasn't finalized prior to encoding"); + + if (Funcs.size() > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many FunctionInfos"); + const uint64_t MinAddr = Funcs.front().startAddress(); + const uint64_t MaxAddr = Funcs.back().startAddress(); + const uint64_t AddrDelta = MaxAddr - MinAddr; + Header Hdr; + Hdr.Magic = GSYM_MAGIC; + Hdr.Version = GSYM_VERSION; + Hdr.AddrOffSize = 0; + Hdr.UUIDSize = static_cast(UUID.size()); + Hdr.BaseAddress = MinAddr; + Hdr.NumAddresses = static_cast(Funcs.size()); + Hdr.StrtabOffset = 0; // We will fix this up later. + Hdr.StrtabOffset = 0; // We will fix this up later. + memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); + if (UUID.size() > sizeof(Hdr.UUID)) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", (uint32_t)UUID.size()); + // Set the address offset size correctly in the GSYM header. + if (AddrDelta <= UINT8_MAX) + Hdr.AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + Hdr.AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + Hdr.AddrOffSize = 4; + else + Hdr.AddrOffSize = 8; + // Copy the UUID value if we have one. + if (UUID.size() > 0) + memcpy(Hdr.UUID, UUID.data(), UUID.size()); + // Write out the header. + llvm::Error Err = Hdr.encode(O); + if (Err) + return Err; + + // Write out the address offsets. + O.alignTo(Hdr.AddrOffSize); + for (const auto &FuncInfo : Funcs) { + uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + switch(Hdr.AddrOffSize) { + case 1: O.writeU8(static_cast(AddrOffset)); break; + case 2: O.writeU16(static_cast(AddrOffset)); break; + case 4: O.writeU32(static_cast(AddrOffset)); break; + case 8: O.writeU64(AddrOffset); break; + } + } + + // Write out all zeros for the AddrInfoOffsets. + O.alignTo(4); + const off_t AddrInfoOffsetsOffset = O.tell(); + for (size_t i = 0, n = Funcs.size(); i < n; ++i) + O.writeU32(0); + + // Write out the file table + O.alignTo(4); + assert(!Files.empty()); + assert(Files[0].Dir == 0); + assert(Files[0].Base == 0); + size_t NumFiles = Files.size(); + if (NumFiles > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many files"); + O.writeU32(static_cast(NumFiles)); + for (auto File: Files) { + O.writeU32(File.Dir); + O.writeU32(File.Base); + } + + // Write out the sting table. + const off_t StrtabOffset = O.tell(); + StrTab.write(O.get_stream()); + const off_t StrtabSize = O.tell() - StrtabOffset; + std::vector AddrInfoOffsets; + + // Write out the address infos for each function info. + for (const auto &FuncInfo : Funcs) { + if (Expected OffsetOrErr = FuncInfo.encode(O)) + AddrInfoOffsets.push_back(OffsetOrErr.get()); + else + return OffsetOrErr.takeError(); + } + // Fixup the string table offset and size in the header + O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); + O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); + + // Fixup all address info offsets + uint64_t Offset = 0; + for (auto AddrInfoOffset: AddrInfoOffsets) { + O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); + Offset += 4; + } + return ErrorSuccess(); +} + +llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { + std::lock_guard Guard(Mutex); + if (Finalized) + return createStringError(std::errc::invalid_argument, + "already finalized"); + Finalized = true; + + // Sort function infos so we can emit sorted functions. + llvm::sort(Funcs.begin(), Funcs.end()); + + // Don't let the string table indexes change by finalizing in order. + StrTab.finalizeInOrder(); + + // Remove duplicates function infos that have both entries from debug info + // (DWARF or Breakpad) and entries from the SymbolTable. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + auto NumBefore = Funcs.size(); + auto Curr = Funcs.begin(); + auto Prev = Funcs.end(); + while (Curr != Funcs.end()) { + // Can't check for overlaps or same address ranges if we don't have a + // previous entry + if (Prev != Funcs.end()) { + if (Prev->Range.intersects(Curr->Range)) { + // Overlapping address ranges. + if (Prev->Range == Curr->Range) { + // Same address range. Check if one is from debug info and the other + // is from a symbol table. If so, then keep the one with debug info. + // Our sorting guarantees that entries with matching address ranges + // that have debug info are last in the sort. + if (*Prev == *Curr) { + // FunctionInfo entries match exactly (range, lines, inlines) + OS << "warning: duplicate function info entries, removing " + "duplicate:\n" + << *Curr << '\n'; + Curr = Funcs.erase(Prev); + } else { + if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { + // Same address range, one with no debug info (symbol) and the + // next with debug info. Keep the latter. + Curr = Funcs.erase(Prev); + } else { + OS << "warning: same address range contains different debug " + << "info. Removing:\n" + << *Prev << "\nIn favor of this one:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + } else { + // print warnings about overlaps + OS << "warning: function ranges overlap:\n" + << *Prev << "\n" + << *Curr << "\n"; + } + } else if (Prev->Range.size() == 0 && + Curr->Range.contains(Prev->Range.Start)) { + OS << "warning: removing symbol:\n" + << *Prev << "\nKeeping:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + if (Curr == Funcs.end()) + break; + Prev = Curr++; + } + + OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " + << Funcs.size() << " total\n"; + return Error::success(); +} + +uint32_t GsymCreator::insertString(StringRef S) { + std::lock_guard Guard(Mutex); + if (S.empty()) + return 0; + return StrTab.add(S); +} + +void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { + std::lock_guard Guard(Mutex); + Funcs.emplace_back(FI); +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) { + std::lock_guard Guard(Mutex); + for (auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) const { + std::lock_guard Guard(Mutex); + for (const auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} diff --git a/lib/DebugInfo/GSYM/GsymReader.cpp b/lib/DebugInfo/GSYM/GsymReader.cpp new file mode 100644 index 000000000000..1b448cf80b70 --- /dev/null +++ b/lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,265 @@ +//===- GsymReader.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace gsym; + +GsymReader::GsymReader(std::unique_ptr Buffer) : + MemBuffer(std::move(Buffer)), + Endian(support::endian::system_endianness()) {} + + GsymReader::GsymReader(GsymReader &&RHS) = default; + +GsymReader::~GsymReader() = default; + +llvm::Expected GsymReader::openFile(StringRef Filename) { + // Open the input file and return an appropriate error if needed. + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto Err = BuffOrErr.getError(); + if (Err) + return llvm::errorCodeToError(Err); + return create(BuffOrErr.get()); +} + +llvm::Expected GsymReader::copyBuffer(StringRef Bytes) { + auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); + return create(MemBuffer); +} + +llvm::Expected +GsymReader::create(std::unique_ptr &MemBuffer) { + if (!MemBuffer.get()) + return createStringError(std::errc::invalid_argument, + "invalid memory buffer"); + GsymReader GR(std::move(MemBuffer)); + llvm::Error Err = GR.parse(); + if (Err) + return std::move(Err); + return std::move(GR); +} + +llvm::Error +GsymReader::parse() { + BinaryStreamReader FileData(MemBuffer->getBuffer(), + support::endian::system_endianness()); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + if (FileData.readObject(Hdr)) + return createStringError(std::errc::invalid_argument, + "not enough data for a GSYM header"); + + const auto HostByteOrder = support::endian::system_endianness(); + switch (Hdr->Magic) { + case GSYM_MAGIC: + Endian = HostByteOrder; + break; + case GSYM_CIGAM: + // This is a GSYM file, but not native endianness. + Endian = sys::IsBigEndianHost ? support::little : support::big; + Swap.reset(new SwappedData); + break; + default: + return createStringError(std::errc::invalid_argument, + "not a GSYM file"); + } + + bool DataIsLittleEndian = HostByteOrder != support::little; + // Read a correctly byte swapped header if we need to. + if (Swap) { + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + if (auto ExpectedHdr = Header::decode(Data)) + Swap->Hdr = ExpectedHdr.get(); + else + return ExpectedHdr.takeError(); + Hdr = &Swap->Hdr; + } + + // Detect errors in the header and report any that are found. If we make it + // past this without errors, we know we have a good magic value, a supported + // version number, verified address offset size and a valid UUID size. + if (Error Err = Hdr->checkForError()) + return Err; + + if (!Swap) { + // This is the native endianness case that is most common and optimized for + // efficient lookups. Here we just grab pointers to the native data and + // use ArrayRef objects to allow efficient read only access. + + // Read the address offsets. + if (FileData.padToAlignment(Hdr->AddrOffSize) || + FileData.readArray(AddrOffsets, + Hdr->NumAddresses * Hdr->AddrOffSize)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + + // Read the address info offsets. + if (FileData.padToAlignment(4) || + FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address info offsets table"); + + // Read the file table. + uint32_t NumFiles = 0; + if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles)) + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + + // Get the string table. + FileData.setOffset(Hdr->StrtabOffset); + if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize)) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); +} else { + // This is the non native endianness case that is not common and not + // optimized for lookups. Here we decode the important tables into local + // storage and then set the ArrayRef objects to point to these swapped + // copies of the read only data so lookups can be as efficient as possible. + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + + // Read the address offsets. + uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize); + Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize); + switch (Hdr->AddrOffSize) { + case 1: + if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 2: + if (!Data.getU16(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 4: + if (!Data.getU32(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 8: + if (!Data.getU64(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + } + AddrOffsets = ArrayRef(Swap->AddrOffsets); + + // Read the address info offsets. + Offset = alignTo(Offset, 4); + Swap->AddrInfoOffsets.resize(Hdr->NumAddresses); + if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses)) + AddrInfoOffsets = ArrayRef(Swap->AddrInfoOffsets); + else + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + // Read the file table. + const uint32_t NumFiles = Data.getU32(&Offset); + if (NumFiles > 0) { + Swap->Files.resize(NumFiles); + if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2)) + Files = ArrayRef(Swap->Files); + else + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + } + // Get the string table. + StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset, + Hdr->StrtabSize); + if (StrTab.Data.empty()) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); + } + return Error::success(); + +} + +const Header &GsymReader::getHeader() const { + // The only way to get a GsymReader is from GsymReader::openFile(...) or + // GsymReader::copyBuffer() and the header must be valid and initialized to + // a valid pointer value, so the assert below should not trigger. + assert(Hdr); + return *Hdr; +} + +Optional GsymReader::getAddress(size_t Index) const { + switch (Hdr->AddrOffSize) { + case 1: return addressForIndex(Index); + case 2: return addressForIndex(Index); + case 4: return addressForIndex(Index); + case 8: return addressForIndex(Index); + } + return llvm::None; +} + +Optional GsymReader::getAddressInfoOffset(size_t Index) const { + const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); + if (Index < NumAddrInfoOffsets) + return AddrInfoOffsets[Index]; + return llvm::None; +} + +Expected +GsymReader::getAddressIndex(const uint64_t Addr) const { + if (Addr < Hdr->BaseAddress) + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + const uint64_t AddrOffset = Addr - Hdr->BaseAddress; + switch (Hdr->AddrOffSize) { + case 1: return getAddressOffsetIndex(AddrOffset); + case 2: return getAddressOffsetIndex(AddrOffset); + case 4: return getAddressOffsetIndex(AddrOffset); + case 8: return getAddressOffsetIndex(AddrOffset); + default: break; + } + return createStringError(std::errc::invalid_argument, + "unsupported address offset size %u", + Hdr->AddrOffSize); +} + +llvm::Expected GsymReader::getFunctionInfo(uint64_t Addr) const { + Expected AddressIndex = getAddressIndex(Addr); + if (!AddressIndex) + return AddressIndex.takeError(); + // Address info offsets size should have been checked in parse(). + assert(*AddressIndex < AddrInfoOffsets.size()); + auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; + DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4); + if (Optional OptAddr = getAddress(*AddressIndex)) { + auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr); + if (ExpectedFI) { + if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0) + return ExpectedFI; + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + } + } + return createStringError(std::errc::invalid_argument, + "failed to extract address[%" PRIu64 "]", + *AddressIndex); +} diff --git a/lib/DebugInfo/GSYM/Header.cpp b/lib/DebugInfo/GSYM/Header.cpp new file mode 100644 index 000000000000..0b3fb9c49894 --- /dev/null +++ b/lib/DebugInfo/GSYM/Header.cpp @@ -0,0 +1,109 @@ +//===- Header.cpp -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +#define HEX8(v) llvm::format_hex(v, 4) +#define HEX16(v) llvm::format_hex(v, 6) +#define HEX32(v) llvm::format_hex(v, 10) +#define HEX64(v) llvm::format_hex(v, 18) + +using namespace llvm; +using namespace gsym; + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const Header &H) { + OS << "Header:\n"; + OS << " Magic = " << HEX32(H.Magic) << "\n"; + OS << " Version = " << HEX16(H.Version) << '\n'; + OS << " AddrOffSize = " << HEX8(H.AddrOffSize) << '\n'; + OS << " UUIDSize = " << HEX8(H.UUIDSize) << '\n'; + OS << " BaseAddress = " << HEX64(H.BaseAddress) << '\n'; + OS << " NumAddresses = " << HEX32(H.NumAddresses) << '\n'; + OS << " StrtabOffset = " << HEX32(H.StrtabOffset) << '\n'; + OS << " StrtabSize = " << HEX32(H.StrtabSize) << '\n'; + OS << " UUID = "; + for (uint8_t I = 0; I < H.UUIDSize; ++I) + OS << format_hex_no_prefix(H.UUID[I], 2); + OS << '\n'; + return OS; +} + +/// Check the header and detect any errors. +llvm::Error Header::checkForError() const { + if (Magic != GSYM_MAGIC) + return createStringError(std::errc::invalid_argument, + "invalid GSYM magic 0x%8.8x", Magic); + if (Version != GSYM_VERSION) + return createStringError(std::errc::invalid_argument, + "unsupported GSYM version %u", Version); + switch (AddrOffSize) { + case 1: break; + case 2: break; + case 4: break; + case 8: break; + default: + return createStringError(std::errc::invalid_argument, + "invalid address offset size %u", + AddrOffSize); + } + if (UUIDSize > GSYM_MAX_UUID_SIZE) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", UUIDSize); + return Error::success(); +} + +llvm::Expected
Header::decode(DataExtractor &Data) { + uint64_t Offset = 0; + // The header is stored as a single blob of data that has a fixed byte size. + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Header))) + return createStringError(std::errc::invalid_argument, + "not enough data for a gsym::Header"); + Header H; + H.Magic = Data.getU32(&Offset); + H.Version = Data.getU16(&Offset); + H.AddrOffSize = Data.getU8(&Offset); + H.UUIDSize = Data.getU8(&Offset); + H.BaseAddress = Data.getU64(&Offset); + H.NumAddresses = Data.getU32(&Offset); + H.StrtabOffset = Data.getU32(&Offset); + H.StrtabSize = Data.getU32(&Offset); + Data.getU8(&Offset, H.UUID, GSYM_MAX_UUID_SIZE); + if (llvm::Error Err = H.checkForError()) + return std::move(Err); + return H; +} + +llvm::Error Header::encode(FileWriter &O) const { + // Users must verify the Header is valid prior to calling this funtion. + if (llvm::Error Err = checkForError()) + return Err; + O.writeU32(Magic); + O.writeU16(Version); + O.writeU8(AddrOffSize); + O.writeU8(UUIDSize); + O.writeU64(BaseAddress); + O.writeU32(NumAddresses); + O.writeU32(StrtabOffset); + O.writeU32(StrtabSize); + O.writeData(llvm::ArrayRef(UUID)); + return Error::success(); +} + +bool llvm::gsym::operator==(const Header &LHS, const Header &RHS) { + return LHS.Magic == RHS.Magic && LHS.Version == RHS.Version && + LHS.AddrOffSize == RHS.AddrOffSize && LHS.UUIDSize == RHS.UUIDSize && + LHS.BaseAddress == RHS.BaseAddress && + LHS.NumAddresses == RHS.NumAddresses && + LHS.StrtabOffset == RHS.StrtabOffset && + LHS.StrtabSize == RHS.StrtabSize && + memcmp(LHS.UUID, RHS.UUID, LHS.UUIDSize) == 0; +} diff --git a/lib/DebugInfo/GSYM/InlineInfo.cpp b/lib/DebugInfo/GSYM/InlineInfo.cpp index 781c1755241d..32ed2c709575 100644 --- a/lib/DebugInfo/GSYM/InlineInfo.cpp +++ b/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -8,7 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -57,3 +59,101 @@ llvm::Optional InlineInfo::getInlineStack(uint64_t Addr return Result; return llvm::None; } + +/// Decode an InlineInfo in Data at the specified offset. +/// +/// A local helper function to decode InlineInfo objects. This function is +/// called recursively when parsing child InlineInfo objects. +/// +/// \param Data The data extractor to decode from. +/// \param Offset The offset within \a Data to decode from. +/// \param BaseAddr The base address to use when decoding address ranges. +/// \returns An InlineInfo or an error describing the issue that was +/// encountered during decoding. +static llvm::Expected decode(DataExtractor &Data, uint64_t &Offset, + uint64_t BaseAddr) { + InlineInfo Inline; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo address ranges data", Offset); + Inline.Ranges.decode(Data, BaseAddr, Offset); + if (Inline.Ranges.empty()) + return Inline; + if (!Data.isValidOffsetForDataOfSize(Offset, 1)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo uint8_t indicating children", + Offset); + bool HasChildren = Data.getU8(&Offset) != 0; + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo uint32_t for name", Offset); + Inline.Name = Data.getU32(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call file", Offset); + Inline.CallFile = (uint32_t)Data.getULEB128(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call line", Offset); + Inline.CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + // Child address ranges are encoded relative to the first address in the + // parent InlineInfo object. + const auto ChildBaseAddr = Inline.Ranges[0].Start; + while (true) { + llvm::Expected Child = decode(Data, Offset, ChildBaseAddr); + if (!Child) + return Child.takeError(); + // InlineInfo with empty Ranges termintes a child sibling chain. + if (Child.get().Ranges.empty()) + break; + Inline.Children.emplace_back(std::move(*Child)); + } + } + return Inline; +} + +llvm::Expected InlineInfo::decode(DataExtractor &Data, + uint64_t BaseAddr) { + uint64_t Offset = 0; + return ::decode(Data, Offset, BaseAddr); +} + +llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const { + // Users must verify the InlineInfo is valid prior to calling this funtion. + // We don't want to emit any InlineInfo objects if they are not valid since + // it will waste space in the GSYM file. + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid InlineInfo object"); + Ranges.encode(O, BaseAddr); + bool HasChildren = !Children.empty(); + O.writeU8(HasChildren); + O.writeU32(Name); + O.writeULEB(CallFile); + O.writeULEB(CallLine); + if (HasChildren) { + // Child address ranges are encoded as relative to the first + // address in the Ranges for this object. This keeps the offsets + // small and allows for efficient encoding using ULEB offsets. + const uint64_t ChildBaseAddr = Ranges[0].Start; + for (const auto &Child : Children) { + // Make sure all child address ranges are contained in the parent address + // ranges. + for (const auto &ChildRange: Child.Ranges) { + if (!Ranges.contains(ChildRange)) + return createStringError(std::errc::invalid_argument, + "child range not contained in parent"); + } + llvm::Error Err = Child.encode(O, ChildBaseAddr); + if (Err) + return Err; + } + + // Terminate child sibling chain by emitting a zero. This zero will cause + // the decodeAll() function above to return false and stop the decoding + // of child InlineInfo objects that are siblings. + O.writeULEB(0); + } + return Error::success(); +} diff --git a/lib/DebugInfo/GSYM/LineTable.cpp b/lib/DebugInfo/GSYM/LineTable.cpp new file mode 100644 index 000000000000..824c0041be9f --- /dev/null +++ b/lib/DebugInfo/GSYM/LineTable.cpp @@ -0,0 +1,287 @@ +//===- LineTable.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" + +using namespace llvm; +using namespace gsym; + +enum LineTableOpCode { + EndSequence = 0x00, ///< End of the line table. + SetFile = 0x01, ///< Set LineTableRow.file_idx, don't push a row. + AdvancePC = 0x02, ///< Increment LineTableRow.address, and push a row. + AdvanceLine = 0x03, ///< Set LineTableRow.file_line, don't push a row. + FirstSpecial = 0x04, ///< All special opcodes push a row. +}; + +struct DeltaInfo { + int64_t Delta; + uint32_t Count; + DeltaInfo(int64_t D, uint32_t C) : Delta(D), Count(C) {} +}; + +inline bool operator<(const DeltaInfo &LHS, int64_t Delta) { + return LHS.Delta < Delta; +} + +static bool encodeSpecial(int64_t MinLineDelta, int64_t MaxLineDelta, + int64_t LineDelta, uint64_t AddrDelta, + uint8_t &SpecialOp) { + if (LineDelta < MinLineDelta) + return false; + if (LineDelta > MaxLineDelta) + return false; + int64_t LineRange = MaxLineDelta - MinLineDelta + 1; + int64_t AdjustedOp = ((LineDelta - MinLineDelta) + AddrDelta * LineRange); + int64_t Op = AdjustedOp + FirstSpecial; + if (Op < 0) + return false; + if (Op > 255) + return false; + SpecialOp = (uint8_t)Op; + return true; +} + +typedef std::function LineEntryCallback; + +static llvm::Error parse(DataExtractor &Data, uint64_t BaseAddr, + LineEntryCallback const &Callback) { + uint64_t Offset = 0; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable MinDelta", Offset); + int64_t MinDelta = Data.getSLEB128(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable MaxDelta", Offset); + int64_t MaxDelta = Data.getSLEB128(&Offset); + int64_t LineRange = MaxDelta - MinDelta + 1; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable FirstLine", Offset); + const uint32_t FirstLine = (uint32_t)Data.getULEB128(&Offset); + LineEntry Row(BaseAddr, 1, FirstLine); + bool Done = false; + while (!Done) { + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before EndSequence", Offset); + uint8_t Op = Data.getU8(&Offset); + switch (Op) { + case EndSequence: + Done = true; + break; + case SetFile: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before SetFile value", + Offset); + Row.File = (uint32_t)Data.getULEB128(&Offset); + break; + case AdvancePC: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before AdvancePC value", + Offset); + Row.Addr += Data.getULEB128(&Offset); + // If the function callback returns false, we stop parsing. + if (Callback(Row) == false) + return Error::success(); + break; + case AdvanceLine: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before AdvanceLine value", + Offset); + Row.Line += Data.getSLEB128(&Offset); + break; + default: { + // A byte that contains both address and line increment. + uint8_t AdjustedOp = Op - FirstSpecial; + int64_t LineDelta = MinDelta + (AdjustedOp % LineRange); + uint64_t AddrDelta = (AdjustedOp / LineRange); + Row.Line += LineDelta; + Row.Addr += AddrDelta; + // If the function callback returns false, we stop parsing. + if (Callback(Row) == false) + return Error::success(); + break; + } + } + } + return Error::success(); +} + +llvm::Error LineTable::encode(FileWriter &Out, uint64_t BaseAddr) const { + // Users must verify the LineTable is valid prior to calling this funtion. + // We don't want to emit any LineTable objects if they are not valid since + // it will waste space in the GSYM file. + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid LineTable object"); + + int64_t MinLineDelta = INT64_MAX; + int64_t MaxLineDelta = INT64_MIN; + std::vector DeltaInfos; + if (Lines.size() == 1) { + MinLineDelta = 0; + MaxLineDelta = 0; + } else { + int64_t PrevLine = 1; + bool First = true; + for (const auto &line_entry : Lines) { + if (First) + First = false; + else { + int64_t LineDelta = (int64_t)line_entry.Line - PrevLine; + auto End = DeltaInfos.end(); + auto Pos = std::lower_bound(DeltaInfos.begin(), End, LineDelta); + if (Pos != End && Pos->Delta == LineDelta) + ++Pos->Count; + else + DeltaInfos.insert(Pos, DeltaInfo(LineDelta, 1)); + if (LineDelta < MinLineDelta) + MinLineDelta = LineDelta; + if (LineDelta > MaxLineDelta) + MaxLineDelta = LineDelta; + } + PrevLine = (int64_t)line_entry.Line; + } + assert(MinLineDelta <= MaxLineDelta); + } + // Set the min and max line delta intelligently based on the counts of + // the line deltas. if our range is too large. + const int64_t MaxLineRange = 14; + if (MaxLineDelta - MinLineDelta > MaxLineRange) { + uint32_t BestIndex = 0; + uint32_t BestEndIndex = 0; + uint32_t BestCount = 0; + const size_t NumDeltaInfos = DeltaInfos.size(); + for (uint32_t I = 0; I < NumDeltaInfos; ++I) { + const int64_t FirstDelta = DeltaInfos[I].Delta; + uint32_t CurrCount = 0; + uint32_t J; + for (J = I; J < NumDeltaInfos; ++J) { + auto LineRange = DeltaInfos[J].Delta - FirstDelta; + if (LineRange > MaxLineRange) + break; + CurrCount += DeltaInfos[J].Count; + } + if (CurrCount > BestCount) { + BestIndex = I; + BestEndIndex = J - 1; + BestCount = CurrCount; + } + } + MinLineDelta = DeltaInfos[BestIndex].Delta; + MaxLineDelta = DeltaInfos[BestEndIndex].Delta; + } + if (MinLineDelta == MaxLineDelta && MinLineDelta > 0 && + MinLineDelta < MaxLineRange) + MinLineDelta = 0; + assert(MinLineDelta <= MaxLineDelta); + + // Initialize the line entry state as a starting point. All line entries + // will be deltas from this. + LineEntry Prev(BaseAddr, 1, Lines.front().Line); + + // Write out the min and max line delta as signed LEB128. + Out.writeSLEB(MinLineDelta); + Out.writeSLEB(MaxLineDelta); + // Write out the starting line number as a unsigned LEB128. + Out.writeULEB(Prev.Line); + + for (const auto &Curr : Lines) { + if (Curr.Addr < BaseAddr) + return createStringError(std::errc::invalid_argument, + "LineEntry has address 0x%" PRIx64 " which is " + "less than the function start address 0x%" + PRIx64, Curr.Addr, BaseAddr); + if (Curr.Addr < Prev.Addr) + return createStringError(std::errc::invalid_argument, + "LineEntry in LineTable not in ascending order"); + const uint64_t AddrDelta = Curr.Addr - Prev.Addr; + int64_t LineDelta = 0; + if (Curr.Line > Prev.Line) + LineDelta = Curr.Line - Prev.Line; + else if (Prev.Line > Curr.Line) + LineDelta = -((int32_t)(Prev.Line - Curr.Line)); + + // Set the file if it doesn't match the current one. + if (Curr.File != Prev.File) { + Out.writeU8(SetFile); + Out.writeULEB(Curr.File); + } + + uint8_t SpecialOp; + if (encodeSpecial(MinLineDelta, MaxLineDelta, LineDelta, AddrDelta, + SpecialOp)) { + // Advance the PC and line and push a row. + Out.writeU8(SpecialOp); + } else { + // We can't encode the address delta and line delta into + // a single special opcode, we must do them separately. + + // Advance the line. + if (LineDelta != 0) { + Out.writeU8(AdvanceLine); + Out.writeSLEB(LineDelta); + } + + // Advance the PC and push a row. + Out.writeU8(AdvancePC); + Out.writeULEB(AddrDelta); + } + Prev = Curr; + } + Out.writeU8(EndSequence); + return Error::success(); +} + +// Parse all line table entries into the "LineTable" vector. We can +// cache the results of this if needed, or we can call LineTable::lookup() +// below. +llvm::Expected LineTable::decode(DataExtractor &Data, + uint64_t BaseAddr) { + LineTable LT; + llvm::Error Err = parse(Data, BaseAddr, [&](const LineEntry &Row) -> bool { + LT.Lines.push_back(Row); + return true; // Keep parsing by returning true. + }); + if (Err) + return std::move(Err); + return LT; +} +// Parse the line table on the fly and find the row we are looking for. +// We will need to determine if we need to cache the line table by calling +// LineTable::parseAllEntries(...) or just call this function each time. +// There is a CPU vs memory tradeoff we will need to determine. +LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) { + LineEntry Result; + llvm::Error Err = parse(Data, BaseAddr, + [Addr, &Result](const LineEntry &Row) -> bool { + if (Addr < Row.Addr) + return false; // Stop parsing, result contains the line table row! + Result = Row; + if (Addr == Row.Addr) { + // Stop parsing, this is the row we are looking for since the address + // matches. + return false; + } + return true; // Keep parsing till we find the right row. + }); + return Result; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable <) { + for (const auto &LineEntry : LT) + OS << LineEntry << '\n'; + return OS; +} diff --git a/lib/DebugInfo/GSYM/Range.cpp b/lib/DebugInfo/GSYM/Range.cpp index ca61984dacbd..19ab700fdd57 100644 --- a/lib/DebugInfo/GSYM/Range.cpp +++ b/lib/DebugInfo/GSYM/Range.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -40,6 +42,17 @@ bool AddressRanges::contains(uint64_t Addr) const { return It != Ranges.begin() && Addr < It[-1].End; } +bool AddressRanges::contains(AddressRange Range) const { + if (Range.size() == 0) + return false; + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.Start <= Range.Start; }); + if (It == Ranges.begin()) + return false; + return Range.End <= It[-1].End; +} + raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) { return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")"; } @@ -53,3 +66,37 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) { } return OS; } + +void AddressRange::encode(FileWriter &O, uint64_t BaseAddr) const { + assert(Start >= BaseAddr); + O.writeULEB(Start - BaseAddr); + O.writeULEB(size()); +} + +void AddressRange::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + const uint64_t AddrOffset = Data.getULEB128(&Offset); + const uint64_t Size = Data.getULEB128(&Offset); + const uint64_t StartAddr = BaseAddr + AddrOffset; + Start = StartAddr; + End = StartAddr + Size; +} + +void AddressRanges::encode(FileWriter &O, uint64_t BaseAddr) const { + O.writeULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) + Range.encode(O, BaseAddr); +} + +void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + clear(); + uint64_t NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return; + Ranges.resize(NumRanges); + for (auto &Range : Ranges) + Range.decode(Data, BaseAddr, Offset); +} diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp index df925771f0d9..5dc9c86b34fd 100644 --- a/lib/DebugInfo/MSF/MappedBlockStream.cpp +++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp @@ -52,7 +52,7 @@ MappedBlockStream::MappedBlockStream(uint32_t BlockSize, std::unique_ptr MappedBlockStream::createStream( uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData, BumpPtrAllocator &Allocator) { - return llvm::make_unique>( + return std::make_unique>( BlockSize, Layout, MsfData, Allocator); } @@ -63,7 +63,7 @@ std::unique_ptr MappedBlockStream::createIndexedStream( MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; - return llvm::make_unique>( + return std::make_unique>( Layout.SB->BlockSize, SL, MsfData, Allocator); } @@ -318,7 +318,7 @@ WritableMappedBlockStream::createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator) { - return llvm::make_unique>( + return std::make_unique>( BlockSize, Layout, MsfData, Allocator); } diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp index a8ae076e1d6c..c2552f55703c 100644 --- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -405,7 +405,7 @@ DIARawSymbol::findChildren(PDB_SymType Type) const { return nullptr; } - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -423,7 +423,7 @@ DIARawSymbol::findChildren(PDB_SymType Type, StringRef Name, Symbol->findChildrenEx(EnumVal, Name16Str, CompareFlags, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -443,7 +443,7 @@ DIARawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name, Section, Offset, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -462,7 +462,7 @@ DIARawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -480,7 +480,7 @@ DIARawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -489,7 +489,7 @@ DIARawSymbol::findInlineFramesByAddr(uint32_t Section, uint32_t Offset) const { if (S_OK != Symbol->findInlineFramesByAddr(Section, Offset, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -498,7 +498,7 @@ DIARawSymbol::findInlineFramesByRVA(uint32_t RVA) const { if (S_OK != Symbol->findInlineFramesByRVA(RVA, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -507,7 +507,7 @@ DIARawSymbol::findInlineFramesByVA(uint64_t VA) const { if (S_OK != Symbol->findInlineFramesByVA(VA, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr DIARawSymbol::findInlineeLines() const { @@ -515,7 +515,7 @@ std::unique_ptr DIARawSymbol::findInlineeLines() const { if (S_OK != Symbol->findInlineeLines(&DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } std::unique_ptr @@ -526,7 +526,7 @@ DIARawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset, Symbol->findInlineeLinesByAddr(Section, Offset, Length, &DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } std::unique_ptr @@ -535,7 +535,7 @@ DIARawSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const { if (S_OK != Symbol->findInlineeLinesByRVA(RVA, Length, &DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } std::unique_ptr @@ -544,7 +544,7 @@ DIARawSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const { if (S_OK != Symbol->findInlineeLinesByVA(VA, Length, &DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } void DIARawSymbol::getDataBytes(llvm::SmallVector &bytes) const { @@ -776,7 +776,7 @@ std::unique_ptr DIARawSymbol::getSrcLineOnTypeDefn() const { if (FAILED(Symbol->getSrcLineOnTypeDefn(&LineNumber)) || !LineNumber) return nullptr; - return llvm::make_unique(LineNumber); + return std::make_unique(LineNumber); } uint32_t DIARawSymbol::getStride() const { @@ -871,7 +871,7 @@ DIARawSymbol::getVirtualBaseTableType() const { if (FAILED(Symbol->get_virtualBaseTableType(&TableType)) || !TableType) return nullptr; - auto RawVT = llvm::make_unique(Session, TableType); + auto RawVT = std::make_unique(Session, TableType); auto Pointer = PDBSymbol::createAs(Session, std::move(RawVT)); return unique_dyn_cast(Pointer->getPointeeType()); diff --git a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp index e2d928f2c4b2..4f0e078e6712 100644 --- a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp @@ -23,7 +23,7 @@ std::unique_ptr DIASectionContrib::getCompiland() const { if (FAILED(Section->get_compiland(&Symbol))) return nullptr; - auto RawSymbol = llvm::make_unique(Session, Symbol); + auto RawSymbol = std::make_unique(Session, Symbol); return PDBSymbol::createAs(Session, std::move(RawSymbol)); } diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp index 4e0b8587c613..64ffa776bbd6 100644 --- a/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -73,15 +73,7 @@ static Error LoadDIA(CComPtr &DiaDataSource) { #if !defined(_MSC_VER) return llvm::make_error(pdb_error_code::dia_failed_loading); #else - const wchar_t *msdia_dll = nullptr; -#if _MSC_VER >= 1900 && _MSC_VER < 2000 - msdia_dll = L"msdia140.dll"; // VS2015 -#elif _MSC_VER >= 1800 - msdia_dll = L"msdia120.dll"; // VS2013 -#else -#error "Unknown Visual Studio version." -#endif - + const wchar_t *msdia_dll = L"msdia140.dll"; HRESULT HR; if (FAILED(HR = NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource, reinterpret_cast(&DiaDataSource)))) @@ -158,7 +150,7 @@ std::unique_ptr DIASession::getGlobalScope() { if (S_OK != Session->get_globalScope(&GlobalScope)) return nullptr; - auto RawSymbol = llvm::make_unique(*this, GlobalScope); + auto RawSymbol = std::make_unique(*this, GlobalScope); auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol))); std::unique_ptr ExeSymbol( static_cast(PdbSymbol.release())); @@ -193,7 +185,7 @@ DIASession::getSymbolById(SymIndexId SymbolId) const { if (S_OK != Session->symbolById(SymbolId, &LocatedSymbol)) return nullptr; - auto RawSymbol = llvm::make_unique(*this, LocatedSymbol); + auto RawSymbol = std::make_unique(*this, LocatedSymbol); return PDBSymbol::create(*this, std::move(RawSymbol)); } @@ -210,7 +202,7 @@ DIASession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { if (S_OK != Session->findSymbolByRVA(RVA, EnumVal, &Symbol)) return nullptr; } - auto RawSymbol = llvm::make_unique(*this, Symbol); + auto RawSymbol = std::make_unique(*this, Symbol); return PDBSymbol::create(*this, std::move(RawSymbol)); } @@ -222,7 +214,7 @@ std::unique_ptr DIASession::findSymbolByRVA(uint32_t RVA, if (S_OK != Session->findSymbolByRVA(RVA, EnumVal, &Symbol)) return nullptr; - auto RawSymbol = llvm::make_unique(*this, Symbol); + auto RawSymbol = std::make_unique(*this, Symbol); return PDBSymbol::create(*this, std::move(RawSymbol)); } @@ -235,7 +227,7 @@ DIASession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, if (S_OK != Session->findSymbolByAddr(Sect, Offset, EnumVal, &Symbol)) return nullptr; - auto RawSymbol = llvm::make_unique(*this, Symbol); + auto RawSymbol = std::make_unique(*this, Symbol); return PDBSymbol::create(*this, std::move(RawSymbol)); } @@ -251,7 +243,7 @@ DIASession::findLineNumbers(const PDBSymbolCompiland &Compiland, RawFile.getDiaFile(), &LineNumbers)) return nullptr; - return llvm::make_unique(LineNumbers); + return std::make_unique(LineNumbers); } std::unique_ptr @@ -265,7 +257,7 @@ DIASession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const { if (S_OK != Session->findLinesByRVA(RVA, Length, &LineNumbers)) return nullptr; } - return llvm::make_unique(LineNumbers); + return std::make_unique(LineNumbers); } std::unique_ptr @@ -274,7 +266,7 @@ DIASession::findLineNumbersByRVA(uint32_t RVA, uint32_t Length) const { if (S_OK != Session->findLinesByRVA(RVA, Length, &LineNumbers)) return nullptr; - return llvm::make_unique(LineNumbers); + return std::make_unique(LineNumbers); } std::unique_ptr @@ -284,7 +276,7 @@ DIASession::findLineNumbersBySectOffset(uint32_t Section, uint32_t Offset, if (S_OK != Session->findLinesByAddr(Section, Offset, Length, &LineNumbers)) return nullptr; - return llvm::make_unique(LineNumbers); + return std::make_unique(LineNumbers); } std::unique_ptr @@ -306,7 +298,7 @@ DIASession::findSourceFiles(const PDBSymbolCompiland *Compiland, if (S_OK != Session->findFile(DiaCompiland, Utf16Pattern.m_str, Flags, &SourceFiles)) return nullptr; - return llvm::make_unique(*this, SourceFiles); + return std::make_unique(*this, SourceFiles); } std::unique_ptr @@ -342,7 +334,7 @@ std::unique_ptr DIASession::getAllSourceFiles() const { if (S_OK != Session->findFile(nullptr, nullptr, nsNone, &Files)) return nullptr; - return llvm::make_unique(*this, Files); + return std::make_unique(*this, Files); } std::unique_ptr DIASession::getSourceFilesForCompiland( @@ -355,7 +347,7 @@ std::unique_ptr DIASession::getSourceFilesForCompiland( Session->findFile(RawSymbol.getDiaSymbol(), nullptr, nsNone, &Files)) return nullptr; - return llvm::make_unique(*this, Files); + return std::make_unique(*this, Files); } std::unique_ptr @@ -364,7 +356,7 @@ DIASession::getSourceFileById(uint32_t FileId) const { if (S_OK != Session->findFileById(FileId, &LocatedFile)) return nullptr; - return llvm::make_unique(*this, LocatedFile); + return std::make_unique(*this, LocatedFile); } std::unique_ptr DIASession::getDebugStreams() const { @@ -372,7 +364,7 @@ std::unique_ptr DIASession::getDebugStreams() const { if (S_OK != Session->getEnumDebugStreams(&DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } std::unique_ptr DIASession::getEnumTables() const { @@ -380,7 +372,7 @@ std::unique_ptr DIASession::getEnumTables() const { if (S_OK != Session->getEnumTables(&DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } template static CComPtr getTableEnumerator(IDiaSession &Session) { @@ -407,7 +399,7 @@ DIASession::getInjectedSources() const { if (!Files) return nullptr; - return llvm::make_unique(Files); + return std::make_unique(Files); } std::unique_ptr @@ -417,7 +409,7 @@ DIASession::getSectionContribs() const { if (!Sections) return nullptr; - return llvm::make_unique(*this, Sections); + return std::make_unique(*this, Sections); } std::unique_ptr @@ -427,5 +419,5 @@ DIASession::getFrameData() const { if (!FD) return nullptr; - return llvm::make_unique(FD); + return std::make_unique(FD); } diff --git a/lib/DebugInfo/PDB/GenericError.cpp b/lib/DebugInfo/PDB/GenericError.cpp index 70dc094c42ec..0e4cba3174b2 100644 --- a/lib/DebugInfo/PDB/GenericError.cpp +++ b/lib/DebugInfo/PDB/GenericError.cpp @@ -34,8 +34,8 @@ public: return "The PDB file path is an invalid UTF8 sequence."; case pdb_error_code::signature_out_of_date: return "The signature does not match; the file(s) might be out of date."; - case pdb_error_code::external_cmdline_ref: - return "The path to this file must be provided on the command-line."; + case pdb_error_code::no_matching_pch: + return "No matching precompiled header could be located."; } llvm_unreachable("Unrecognized generic_error_code"); } diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 20b6c6142547..419734771ccd 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -180,12 +180,12 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, void DbiModuleDescriptorBuilder::addDebugSubsection( std::shared_ptr Subsection) { assert(Subsection); - C13Builders.push_back(llvm::make_unique( + C13Builders.push_back(std::make_unique( std::move(Subsection), CodeViewContainer::Pdb)); } void DbiModuleDescriptorBuilder::addDebugSubsection( const DebugSubsectionRecord &SubsectionContents) { - C13Builders.push_back(llvm::make_unique( + C13Builders.push_back(std::make_unique( SubsectionContents, CodeViewContainer::Pdb)); } diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index b7ade0072ee5..0e00c2f7ff98 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -114,7 +114,7 @@ Expected DbiStreamBuilder::addModuleInfo(StringRef ModuleName) { uint32_t Index = ModiList.size(); ModiList.push_back( - llvm::make_unique(ModuleName, Index, Msf)); + std::make_unique(ModuleName, Index, Msf)); return *ModiList.back(); } diff --git a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index 8ed5b8b44c59..432f1e9b24d3 100644 --- a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -183,8 +183,8 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { } GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf) - : Msf(Msf), PSH(llvm::make_unique()), - GSH(llvm::make_unique()) {} + : Msf(Msf), PSH(std::make_unique()), + GSH(std::make_unique()) {} GSIStreamBuilder::~GSIStreamBuilder() {} diff --git a/lib/DebugInfo/PDB/Native/Hash.cpp b/lib/DebugInfo/PDB/Native/Hash.cpp index b5c139ecbec0..7fb6b4bd5d31 100644 --- a/lib/DebugInfo/PDB/Native/Hash.cpp +++ b/lib/DebugInfo/PDB/Native/Hash.cpp @@ -8,8 +8,8 @@ #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/JamCRC.h" #include using namespace llvm; @@ -79,7 +79,6 @@ uint32_t pdb::hashStringV2(StringRef Str) { // Corresponds to `SigForPbCb` in langapi/shared/crc32.h. uint32_t pdb::hashBufferV8(ArrayRef Buf) { JamCRC JC(/*Init=*/0U); - JC.update(makeArrayRef(reinterpret_cast(Buf.data()), - Buf.size())); + JC.update(Buf); return JC.getCRC(); } diff --git a/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp index f17ff5bb01f2..2f6a5bc3d574 100644 --- a/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp +++ b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp @@ -46,30 +46,31 @@ public: uint64_t getCodeByteSize() const override { return Entry.FileSize; } std::string getFileName() const override { - auto Name = Strings.getStringForID(Entry.FileNI); - assert(Name && "InjectedSourceStream should have rejected this"); - return *Name; + StringRef Ret = cantFail(Strings.getStringForID(Entry.FileNI), + "InjectedSourceStream should have rejected this"); + return Ret; } std::string getObjectFileName() const override { - auto ObjName = Strings.getStringForID(Entry.ObjNI); - assert(ObjName && "InjectedSourceStream should have rejected this"); - return *ObjName; + StringRef Ret = cantFail(Strings.getStringForID(Entry.ObjNI), + "InjectedSourceStream should have rejected this"); + return Ret; } std::string getVirtualFileName() const override { - auto VName = Strings.getStringForID(Entry.VFileNI); - assert(VName && "InjectedSourceStream should have rejected this"); - return *VName; + StringRef Ret = cantFail(Strings.getStringForID(Entry.VFileNI), + "InjectedSourceStream should have rejected this"); + return Ret; } uint32_t getCompression() const override { return Entry.Compression; } std::string getCode() const override { // Get name of stream storing the data. - auto VName = Strings.getStringForID(Entry.VFileNI); - assert(VName && "InjectedSourceStream should have rejected this"); - std::string StreamName = ("/src/files/" + *VName).str(); + StringRef VName = + cantFail(Strings.getStringForID(Entry.VFileNI), + "InjectedSourceStream should have rejected this"); + std::string StreamName = ("/src/files/" + VName).str(); // Find stream with that name and read its data. // FIXME: Consider validating (or even loading) all this in @@ -104,14 +105,14 @@ std::unique_ptr NativeEnumInjectedSources::getChildAtIndex(uint32_t N) const { if (N >= getChildCount()) return nullptr; - return make_unique(std::next(Stream.begin(), N)->second, + return std::make_unique(std::next(Stream.begin(), N)->second, File, Strings); } std::unique_ptr NativeEnumInjectedSources::getNext() { if (Cur == Stream.end()) return nullptr; - return make_unique((Cur++)->second, File, Strings); + return std::make_unique((Cur++)->second, File, Strings); } void NativeEnumInjectedSources::reset() { Cur = Stream.begin(); } diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 8e43cf24495a..2ad552470b61 100644 --- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -30,68 +30,68 @@ void NativeRawSymbol::dump(raw_ostream &OS, int Indent, std::unique_ptr NativeRawSymbol::findChildren(PDB_SymType Type) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findChildren(PDB_SymType Type, StringRef Name, PDB_NameSearchFlags Flags) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name, PDB_NameSearchFlags Flags, uint32_t Section, uint32_t Offset) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name, PDB_NameSearchFlags Flags, uint64_t VA) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name, PDB_NameSearchFlags Flags, uint32_t RVA) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineFramesByAddr(uint32_t Section, uint32_t Offset) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineFramesByVA(uint64_t VA) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineeLines() const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset, uint32_t Length) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const { - return llvm::make_unique>(); + return std::make_unique>(); } void NativeRawSymbol::getDataBytes(SmallVector &bytes) const { diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp index 8a49cb1c5963..b45a5881dcb5 100644 --- a/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -59,18 +59,18 @@ NativeSession::~NativeSession() = default; Error NativeSession::createFromPdb(std::unique_ptr Buffer, std::unique_ptr &Session) { StringRef Path = Buffer->getBufferIdentifier(); - auto Stream = llvm::make_unique( + auto Stream = std::make_unique( std::move(Buffer), llvm::support::little); - auto Allocator = llvm::make_unique(); - auto File = llvm::make_unique(Path, std::move(Stream), *Allocator); + auto Allocator = std::make_unique(); + auto File = std::make_unique(Path, std::move(Stream), *Allocator); if (auto EC = File->parseFileHeaders()) return EC; if (auto EC = File->parseStreamData()) return EC; Session = - llvm::make_unique(std::move(File), std::move(Allocator)); + std::make_unique(std::move(File), std::move(Allocator)); return Error::success(); } @@ -202,7 +202,7 @@ NativeSession::getInjectedSources() const { consumeError(Strings.takeError()); return nullptr; } - return make_unique(*Pdb, *ISS, *Strings); + return std::make_unique(*Pdb, *ISS, *Strings); } std::unique_ptr diff --git a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp index 9f5e86281a23..26ccb7daece0 100644 --- a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp +++ b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp @@ -163,14 +163,14 @@ void NativeTypeEnum::dump(raw_ostream &OS, int Indent, std::unique_ptr NativeTypeEnum::findChildren(PDB_SymType Type) const { if (Type != PDB_SymType::Data) - return llvm::make_unique>(); + return std::make_unique>(); const NativeTypeEnum *ClassParent = nullptr; if (!Modifiers) ClassParent = this; else ClassParent = UnmodifiedType; - return llvm::make_unique(Session, *ClassParent); + return std::make_unique(Session, *ClassParent); } PDB_SymType NativeTypeEnum::getSymTag() const { return PDB_SymType::Enum; } diff --git a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp index 405303469c18..f98a4c3043eb 100644 --- a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp @@ -65,7 +65,7 @@ private: std::unique_ptr wrap(std::unique_ptr S) const { if (!S) return nullptr; - auto NTFA = llvm::make_unique(Session, std::move(S)); + auto NTFA = std::make_unique(Session, std::move(S)); return PDBSymbol::create(Session, std::move(NTFA)); } NativeSession &Session; @@ -133,9 +133,9 @@ void NativeTypeFunctionSig::dump(raw_ostream &OS, int Indent, std::unique_ptr NativeTypeFunctionSig::findChildren(PDB_SymType Type) const { if (Type != PDB_SymType::FunctionArg) - return llvm::make_unique>(); + return std::make_unique>(); - auto NET = llvm::make_unique(Session, + auto NET = std::make_unique(Session, /* copy */ ArgList.ArgIndices); return std::unique_ptr( new NativeEnumFunctionArgs(Session, std::move(NET))); diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 983031dfcb78..9ac226b89139 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -264,7 +264,7 @@ Expected PDBFile::getPDBGlobalsStream() { safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex()); if (!GlobalS) return GlobalS.takeError(); - auto TempGlobals = llvm::make_unique(std::move(*GlobalS)); + auto TempGlobals = std::make_unique(std::move(*GlobalS)); if (auto EC = TempGlobals->reload()) return std::move(EC); Globals = std::move(TempGlobals); @@ -277,7 +277,7 @@ Expected PDBFile::getPDBInfoStream() { auto InfoS = safelyCreateIndexedStream(StreamPDB); if (!InfoS) return InfoS.takeError(); - auto TempInfo = llvm::make_unique(std::move(*InfoS)); + auto TempInfo = std::make_unique(std::move(*InfoS)); if (auto EC = TempInfo->reload()) return std::move(EC); Info = std::move(TempInfo); @@ -290,7 +290,7 @@ Expected PDBFile::getPDBDbiStream() { auto DbiS = safelyCreateIndexedStream(StreamDBI); if (!DbiS) return DbiS.takeError(); - auto TempDbi = llvm::make_unique(std::move(*DbiS)); + auto TempDbi = std::make_unique(std::move(*DbiS)); if (auto EC = TempDbi->reload(this)) return std::move(EC); Dbi = std::move(TempDbi); @@ -303,7 +303,7 @@ Expected PDBFile::getPDBTpiStream() { auto TpiS = safelyCreateIndexedStream(StreamTPI); if (!TpiS) return TpiS.takeError(); - auto TempTpi = llvm::make_unique(*this, std::move(*TpiS)); + auto TempTpi = std::make_unique(*this, std::move(*TpiS)); if (auto EC = TempTpi->reload()) return std::move(EC); Tpi = std::move(TempTpi); @@ -319,7 +319,7 @@ Expected PDBFile::getPDBIpiStream() { auto IpiS = safelyCreateIndexedStream(StreamIPI); if (!IpiS) return IpiS.takeError(); - auto TempIpi = llvm::make_unique(*this, std::move(*IpiS)); + auto TempIpi = std::make_unique(*this, std::move(*IpiS)); if (auto EC = TempIpi->reload()) return std::move(EC); Ipi = std::move(TempIpi); @@ -337,7 +337,7 @@ Expected PDBFile::getPDBPublicsStream() { safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex()); if (!PublicS) return PublicS.takeError(); - auto TempPublics = llvm::make_unique(std::move(*PublicS)); + auto TempPublics = std::make_unique(std::move(*PublicS)); if (auto EC = TempPublics->reload()) return std::move(EC); Publics = std::move(TempPublics); @@ -356,7 +356,7 @@ Expected PDBFile::getPDBSymbolStream() { if (!SymbolS) return SymbolS.takeError(); - auto TempSymbols = llvm::make_unique(std::move(*SymbolS)); + auto TempSymbols = std::make_unique(std::move(*SymbolS)); if (auto EC = TempSymbols->reload()) return std::move(EC); Symbols = std::move(TempSymbols); @@ -370,7 +370,7 @@ Expected PDBFile::getStringTable() { if (!NS) return NS.takeError(); - auto N = llvm::make_unique(); + auto N = std::make_unique(); BinaryStreamReader Reader(**NS); if (auto EC = N->reload(Reader)) return std::move(EC); @@ -391,7 +391,7 @@ Expected PDBFile::getInjectedSourceStream() { if (!Strings) return Strings.takeError(); - auto IJ = llvm::make_unique(std::move(*IJS)); + auto IJ = std::make_unique(std::move(*IJS)); if (auto EC = IJ->reload(*Strings)) return std::move(EC); InjectedSources = std::move(IJ); diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 8f5a048ea4b5..aa3288724390 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -22,7 +22,7 @@ #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/JamCRC.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Path.h" #include "llvm/Support/xxhash.h" @@ -42,7 +42,7 @@ Error PDBFileBuilder::initialize(uint32_t BlockSize) { auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize); if (!ExpectedMsf) return ExpectedMsf.takeError(); - Msf = llvm::make_unique(std::move(*ExpectedMsf)); + Msf = std::make_unique(std::move(*ExpectedMsf)); return Error::success(); } @@ -50,25 +50,25 @@ MSFBuilder &PDBFileBuilder::getMsfBuilder() { return *Msf; } InfoStreamBuilder &PDBFileBuilder::getInfoBuilder() { if (!Info) - Info = llvm::make_unique(*Msf, NamedStreams); + Info = std::make_unique(*Msf, NamedStreams); return *Info; } DbiStreamBuilder &PDBFileBuilder::getDbiBuilder() { if (!Dbi) - Dbi = llvm::make_unique(*Msf); + Dbi = std::make_unique(*Msf); return *Dbi; } TpiStreamBuilder &PDBFileBuilder::getTpiBuilder() { if (!Tpi) - Tpi = llvm::make_unique(*Msf, StreamTPI); + Tpi = std::make_unique(*Msf, StreamTPI); return *Tpi; } TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() { if (!Ipi) - Ipi = llvm::make_unique(*Msf, StreamIPI); + Ipi = std::make_unique(*Msf, StreamIPI); return *Ipi; } @@ -78,7 +78,7 @@ PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { GSIStreamBuilder &PDBFileBuilder::getGsiBuilder() { if (!Gsi) - Gsi = llvm::make_unique(*Msf); + Gsi = std::make_unique(*Msf); return *Gsi; } @@ -174,8 +174,7 @@ Error PDBFileBuilder::finalizeMsfLayout() { if (!InjectedSources.empty()) { for (const auto &IS : InjectedSources) { JamCRC CRC(0); - CRC.update(makeArrayRef(IS.Content->getBufferStart(), - IS.Content->getBufferSize())); + CRC.update(arrayRefFromStringRef(IS.Content->getBuffer())); SrcHeaderBlockEntry Entry; ::memset(&Entry, 0, sizeof(SrcHeaderBlockEntry)); diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp index b21b82bf76fd..b71b2b158144 100644 --- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp +++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp @@ -10,7 +10,7 @@ #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/Support/JamCRC.h" +#include "llvm/Support/CRC.h" using namespace llvm; using namespace llvm::codeview; @@ -124,8 +124,6 @@ Expected llvm::pdb::hashTypeRecord(const CVType &Rec) { // Run CRC32 over the bytes. This corresponds to `hashBufv8`. JamCRC JC(/*Init=*/0U); - ArrayRef Bytes(reinterpret_cast(Rec.data().data()), - Rec.data().size()); - JC.update(Bytes); + JC.update(Rec.data()); return JC.getCRC(); } diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp index 8ee7f897b8bb..ac19db03fab2 100644 --- a/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -112,7 +112,7 @@ Error TpiStream::reload() { HashStream = std::move(*HS); } - Types = llvm::make_unique( + Types = std::make_unique( TypeRecords, getNumTypeRecords(), getTypeIndexOffsets()); return Error::success(); } diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 6b308453c2de..4f10f8524a9b 100644 --- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -135,7 +135,7 @@ Error TpiStreamBuilder::finalizeMsfLayout() { reinterpret_cast(HashBuffer.data()), calculateHashBufferSize()); HashValueStream = - llvm::make_unique(Bytes, llvm::support::little); + std::make_unique(Bytes, llvm::support::little); } return Error::success(); } diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index 7c3ba981fd6b..cb0329bc0ed7 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -79,7 +79,7 @@ private: std::unique_ptr> PDBSymbolFunc::getArguments() const { - return llvm::make_unique(Session, *this); + return std::make_unique(Session, *this); } void PDBSymbolFunc::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp index 292320a6fe6d..1373615522eb 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp @@ -63,7 +63,7 @@ private: std::unique_ptr PDBSymbolTypeFunctionSig::getArguments() const { - return llvm::make_unique(Session, *this); + return std::make_unique(Session, *this); } void PDBSymbolTypeFunctionSig::dump(PDBSymDumper &Dumper) const { diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp index acb1599480b0..a8e1d0a619ca 100644 --- a/lib/DebugInfo/PDB/UDTLayout.cpp +++ b/lib/DebugInfo/PDB/UDTLayout.cpp @@ -71,7 +71,7 @@ DataMemberLayoutItem::DataMemberLayoutItem( DataMember(std::move(Member)) { auto Type = DataMember->getType(); if (auto UDT = unique_dyn_cast(Type)) { - UdtLayout = llvm::make_unique(std::move(UDT)); + UdtLayout = std::make_unique(std::move(UDT)); UsedBytes = UdtLayout->usedBytes(); } } @@ -84,7 +84,7 @@ VBPtrLayoutItem::VBPtrLayoutItem(const UDTLayoutBase &Parent, } const PDBSymbolData &DataMemberLayoutItem::getDataMember() { - return *dyn_cast(Symbol); + return *cast(Symbol); } bool DataMemberLayoutItem::hasUDTLayout() const { return UdtLayout != nullptr; } @@ -205,7 +205,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { for (auto &Base : Bases) { uint32_t Offset = Base->getOffset(); // Non-virtual bases never get elided. - auto BL = llvm::make_unique(*this, Offset, false, + auto BL = std::make_unique(*this, Offset, false, std::move(Base)); AllBases.push_back(BL.get()); @@ -216,7 +216,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { assert(VTables.size() <= 1); if (!VTables.empty()) { auto VTLayout = - llvm::make_unique(*this, std::move(VTables[0])); + std::make_unique(*this, std::move(VTables[0])); VTable = VTLayout.get(); @@ -224,7 +224,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { } for (auto &Data : Members) { - auto DM = llvm::make_unique(*this, std::move(Data)); + auto DM = std::make_unique(*this, std::move(Data)); addChildToLayout(std::move(DM)); } @@ -236,7 +236,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { int VBPO = VB->getVirtualBasePointerOffset(); if (!hasVBPtrAtOffset(VBPO)) { if (auto VBP = VB->getRawSymbol().getVirtualBaseTableType()) { - auto VBPL = llvm::make_unique(*this, std::move(VBP), + auto VBPL = std::make_unique(*this, std::move(VBP), VBPO, VBP->getLength()); VBPtr = VBPL.get(); addChildToLayout(std::move(VBPL)); @@ -250,7 +250,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { uint32_t Offset = UsedBytes.find_last() + 1; bool Elide = (Parent != nullptr); auto BL = - llvm::make_unique(*this, Offset, Elide, std::move(VB)); + std::make_unique(*this, Offset, Elide, std::move(VB)); AllBases.push_back(BL.get()); // Only lay this virtual base out directly inside of *this* class if this diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp index b2bfef251485..b1a80cbc4580 100644 --- a/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -30,11 +30,6 @@ namespace llvm { namespace symbolize { -// By default, DILineInfo contains "" for function/filename it -// cannot fetch. We replace it to "??" to make our output closer to addr2line. -static const char kDILineInfoBadString[] = ""; -static const char kBadString[] = "??"; - // Prints source code around in the FileName the Line. void DIPrinter::printContext(const std::string &FileName, int64_t Line) { if (PrintSourceContext <= 0) @@ -68,16 +63,16 @@ void DIPrinter::printContext(const std::string &FileName, int64_t Line) { void DIPrinter::print(const DILineInfo &Info, bool Inlined) { if (PrintFunctionNames) { std::string FunctionName = Info.FunctionName; - if (FunctionName == kDILineInfoBadString) - FunctionName = kBadString; + if (FunctionName == DILineInfo::BadString) + FunctionName = DILineInfo::Addr2LineBadString; StringRef Delimiter = PrintPretty ? " at " : "\n"; StringRef Prefix = (PrintPretty && Inlined) ? " (inlined by) " : ""; OS << Prefix << FunctionName << Delimiter; } std::string Filename = Info.FileName; - if (Filename == kDILineInfoBadString) - Filename = kBadString; + if (Filename == DILineInfo::BadString) + Filename = DILineInfo::Addr2LineBadString; else if (Basenames) Filename = llvm::sys::path::filename(Filename); if (!Verbose) { @@ -115,8 +110,8 @@ DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) { DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) { std::string Name = Global.Name; - if (Name == kDILineInfoBadString) - Name = kBadString; + if (Name == DILineInfo::BadString) + Name = DILineInfo::Addr2LineBadString; OS << Name << "\n"; OS << Global.Start << " " << Global.Size << "\n"; return *this; diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index 2765bf44d504..b4d49d9ff958 100644 --- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -43,20 +43,22 @@ getDILineInfoSpecifier(FunctionNameKind FNKind) { ErrorOr> SymbolizableObjectFile::create(const object::ObjectFile *Obj, - std::unique_ptr DICtx) { + std::unique_ptr DICtx, + bool UntagAddresses) { assert(DICtx); std::unique_ptr res( - new SymbolizableObjectFile(Obj, std::move(DICtx))); + new SymbolizableObjectFile(Obj, std::move(DICtx), UntagAddresses)); std::unique_ptr OpdExtractor; uint64_t OpdAddress = 0; // Find the .opd (function descriptor) section if any, for big-endian // PowerPC64 ELF. if (Obj->getArch() == Triple::ppc64) { for (section_iterator Section : Obj->sections()) { - StringRef Name; - if (auto EC = Section->getName(Name)) - return EC; - if (Name == ".opd") { + Expected NameOrErr = Section->getName(); + if (!NameOrErr) + return errorToErrorCode(NameOrErr.takeError()); + + if (*NameOrErr == ".opd") { Expected E = Section->getContents(); if (!E) return errorToErrorCode(E.takeError()); @@ -103,8 +105,10 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj, } SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj, - std::unique_ptr DICtx) - : Module(Obj), DebugInfoContext(std::move(DICtx)) {} + std::unique_ptr DICtx, + bool UntagAddresses) + : Module(Obj), DebugInfoContext(std::move(DICtx)), + UntagAddresses(UntagAddresses) {} namespace { @@ -172,6 +176,12 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, if (!SymbolAddressOrErr) return errorToErrorCode(SymbolAddressOrErr.takeError()); uint64_t SymbolAddress = *SymbolAddressOrErr; + if (UntagAddresses) { + // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55 + // into bits 56-63 instead of masking them out. + SymbolAddress &= (1ull << 56) - 1; + SymbolAddress = (int64_t(SymbolAddress) << 8) >> 8; + } if (OpdExtractor) { // For big-endian PowerPC64 ELF, symbols in the .opd section refer to // function descriptors. The first word of the descriptor is a pointer to @@ -179,10 +189,8 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, // For the purposes of symbolization, pretend the symbol's address is that // of the function's code, not the descriptor. uint64_t OpdOffset = SymbolAddress - OpdAddress; - uint32_t OpdOffset32 = OpdOffset; - if (OpdOffset == OpdOffset32 && - OpdExtractor->isValidOffsetForAddress(OpdOffset32)) - SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); + if (OpdExtractor->isValidOffsetForAddress(OpdOffset)) + SymbolAddress = OpdExtractor->getAddress(&OpdOffset); } Expected SymbolNameOrErr = Symbol.getName(); if (!SymbolNameOrErr) diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h index 9cab94178c1b..b5b9793a44d9 100644 --- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -31,7 +31,8 @@ namespace symbolize { class SymbolizableObjectFile : public SymbolizableModule { public: static ErrorOr> - create(const object::ObjectFile *Obj, std::unique_ptr DICtx); + create(const object::ObjectFile *Obj, std::unique_ptr DICtx, + bool UntagAddresses); DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset, FunctionNameKind FNKind, @@ -70,6 +71,7 @@ private: const object::ObjectFile *Module; std::unique_ptr DebugInfoContext; + bool UntagAddresses; struct SymbolDesc { uint64_t Addr; @@ -85,7 +87,8 @@ private: std::vector> Objects; SymbolizableObjectFile(const object::ObjectFile *Obj, - std::unique_ptr DICtx); + std::unique_ptr DICtx, + bool UntagAddresses); }; } // end namespace symbolize diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp index 6a619f8f2f37..be79d9e637c1 100644 --- a/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -35,19 +35,6 @@ #include #include -#if defined(_MSC_VER) -#include - -// This must be included after windows.h. -#include -#pragma comment(lib, "dbghelp.lib") - -// Windows.h conflicts with our COFF header definitions. -#ifdef IMAGE_FILE_MACHINE_I386 -#undef IMAGE_FILE_MACHINE_I386 -#endif -#endif - namespace llvm { namespace symbolize { @@ -205,7 +192,7 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) { MemoryBuffer::getFileOrSTDIN(Path); if (!MB) return false; - return CRCHash == llvm::crc32(0, MB.get()->getBuffer()); + return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); } bool findDebugBinary(const std::string &OrigPath, @@ -259,7 +246,11 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, return false; for (const SectionRef &Section : Obj->sections()) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + Name = Name.substr(Name.find_first_not_of("._")); if (Name == "gnu_debuglink") { Expected ContentsOrErr = Section.getContents(); @@ -268,7 +259,7 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, return false; } DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); - uint32_t Offset = 0; + uint64_t Offset = 0; if (const char *DebugNameStr = DE.getCStr(&Offset)) { // 4-byte align the offset. Offset = (Offset + 3) & ~0x3; @@ -397,7 +388,7 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path, return I->second.get(); Expected> ObjOrErr = - UB->getObjectForArch(ArchName); + UB->getMachOObjectForArch(ArchName); if (!ObjOrErr) { ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), std::unique_ptr()); @@ -418,8 +409,8 @@ Expected LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, std::unique_ptr Context, StringRef ModuleName) { - auto InfoOrErr = - SymbolizableObjectFile::create(Obj, std::move(Context)); + auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context), + Opts.UntagAddresses); std::unique_ptr SymMod; if (InfoOrErr) SymMod = std::move(*InfoOrErr); @@ -530,21 +521,20 @@ LLVMSymbolizer::DemangleName(const std::string &Name, return Result; } -#if defined(_MSC_VER) if (!Name.empty() && Name.front() == '?') { // Only do MSVC C++ demangling on symbols starting with '?'. - char DemangledName[1024] = {0}; - DWORD result = ::UnDecorateSymbolName( - Name.c_str(), DemangledName, 1023, - UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected - UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc - UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications - UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers - UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords - UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types - return (result == 0) ? Name : std::string(DemangledName); + int status = 0; + char *DemangledName = microsoftDemangle( + Name.c_str(), nullptr, nullptr, &status, + MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | + MSDF_NoMemberType | MSDF_NoReturnType)); + if (status != 0) + return Name; + std::string Result = DemangledName; + free(DemangledName); + return Result; } -#endif + if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) return std::string(demanglePE32ExternCFunc(Name)); return Name; diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp index 5c99c70e3cc6..760d28b3ab9d 100644 --- a/lib/Demangle/ItaniumDemangle.cpp +++ b/lib/Demangle/ItaniumDemangle.cpp @@ -174,6 +174,16 @@ struct DumpVisitor { return printStr("SpecialSubKind::iostream"); } } + void print(TemplateParamKind TPK) { + switch (TPK) { + case TemplateParamKind::Type: + return printStr("TemplateParamKind::Type"); + case TemplateParamKind::NonType: + return printStr("TemplateParamKind::NonType"); + case TemplateParamKind::Template: + return printStr("TemplateParamKind::Template"); + } + } void newLine() { printStr("\n"); diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp index bf7d77638f34..c681d6e25b87 100644 --- a/lib/Demangle/MicrosoftDemangle.cpp +++ b/lib/Demangle/MicrosoftDemangle.cpp @@ -783,8 +783,26 @@ SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) { return S; } +SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) { + assert(MangledName.startsWith('.')); + MangledName.consumeFront('.'); + + TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); + if (Error || !MangledName.empty()) { + Error = true; + return nullptr; + } + return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'"); +} + // Parser entry point. SymbolNode *Demangler::parse(StringView &MangledName) { + // Typeinfo names are strings stored in RTTI data. They're not symbol names. + // It's still useful to demangle them. They're the only demangled entity + // that doesn't start with a "?" but a ".". + if (MangledName.startsWith('.')) + return demangleTypeinfoName(MangledName); + if (MangledName.startsWith("??@")) return demangleMD5Name(MangledName); @@ -2161,7 +2179,7 @@ NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName, NodeArrayNode * Demangler::demangleTemplateParameterList(StringView &MangledName) { - NodeList *Head; + NodeList *Head = nullptr; NodeList **Current = &Head; size_t Count = 0; @@ -2328,12 +2346,22 @@ char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, if (Flags & MSDF_DumpBackrefs) D.dumpBackReferences(); + OutputFlags OF = OF_Default; + if (Flags & MSDF_NoCallingConvention) + OF = OutputFlags(OF | OF_NoCallingConvention); + if (Flags & MSDF_NoAccessSpecifier) + OF = OutputFlags(OF | OF_NoAccessSpecifier); + if (Flags & MSDF_NoReturnType) + OF = OutputFlags(OF | OF_NoReturnType); + if (Flags & MSDF_NoMemberType) + OF = OutputFlags(OF | OF_NoMemberType); + if (D.Error) InternalStatus = demangle_invalid_mangled_name; else if (!initializeOutputStream(Buf, N, S, 1024)) InternalStatus = demangle_memory_alloc_failure; else { - AST->output(S, OF_Default); + AST->output(S, OF); S += '\0'; if (N != nullptr) *N = S.getCurrentPosition(); diff --git a/lib/Demangle/MicrosoftDemangleNodes.cpp b/lib/Demangle/MicrosoftDemangleNodes.cpp index 63ca475ec1fe..9cee975231a2 100644 --- a/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -120,8 +120,6 @@ std::string Node::toString(OutputFlags Flags) const { return {OS.getBuffer()}; } -void TypeNode::outputQuals(bool SpaceBefore, bool SpaceAfter) const {} - void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const { switch (PrimKind) { OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Void, "void"); @@ -380,24 +378,28 @@ void LiteralOperatorIdentifierNode::output(OutputStream &OS, void FunctionSignatureNode::outputPre(OutputStream &OS, OutputFlags Flags) const { - if (FunctionClass & FC_Public) - OS << "public: "; - if (FunctionClass & FC_Protected) - OS << "protected: "; - if (FunctionClass & FC_Private) - OS << "private: "; - - if (!(FunctionClass & FC_Global)) { - if (FunctionClass & FC_Static) - OS << "static "; + if (!(Flags & OF_NoAccessSpecifier)) { + if (FunctionClass & FC_Public) + OS << "public: "; + if (FunctionClass & FC_Protected) + OS << "protected: "; + if (FunctionClass & FC_Private) + OS << "private: "; } - if (FunctionClass & FC_Virtual) - OS << "virtual "; - if (FunctionClass & FC_ExternC) - OS << "extern \"C\" "; + if (!(Flags & OF_NoMemberType)) { + if (!(FunctionClass & FC_Global)) { + if (FunctionClass & FC_Static) + OS << "static "; + } + if (FunctionClass & FC_Virtual) + OS << "virtual "; + + if (FunctionClass & FC_ExternC) + OS << "extern \"C\" "; + } - if (ReturnType) { + if (!(Flags & OF_NoReturnType) && ReturnType) { ReturnType->outputPre(OS, Flags); OS << " "; } @@ -440,7 +442,7 @@ void FunctionSignatureNode::outputPost(OutputStream &OS, else if (RefQualifier == FunctionRefQualifier::RValueReference) OS << " &&"; - if (ReturnType) + if (!(Flags & OF_NoReturnType) && ReturnType) ReturnType->outputPost(OS, Flags); } @@ -582,19 +584,26 @@ void FunctionSymbolNode::output(OutputStream &OS, OutputFlags Flags) const { } void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const { + const char *AccessSpec = nullptr; + bool IsStatic = true; switch (SC) { case StorageClass::PrivateStatic: - OS << "private: static "; + AccessSpec = "private"; break; case StorageClass::PublicStatic: - OS << "public: static "; + AccessSpec = "public"; break; case StorageClass::ProtectedStatic: - OS << "protected: static "; + AccessSpec = "protected"; break; default: + IsStatic = false; break; } + if (!(Flags & OF_NoAccessSpecifier) && AccessSpec) + OS << AccessSpec << ": "; + if (!(Flags & OF_NoMemberType) && IsStatic) + OS << "static "; if (Type) { Type->outputPre(OS, Flags); diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 1c6c0406d048..ee7a7cb60bc9 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -32,12 +32,12 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include +#include using namespace llvm; #define DEBUG_TYPE "jit" @@ -191,7 +191,7 @@ uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) { std::string ExecutionEngine::getMangledName(const GlobalValue *GV) { assert(GV->hasName() && "Global must have name."); - MutexGuard locked(lock); + std::lock_guard locked(lock); SmallString<128> FullName; const DataLayout &DL = @@ -204,12 +204,12 @@ std::string ExecutionEngine::getMangledName(const GlobalValue *GV) { } void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); addGlobalMapping(getMangledName(GV), (uint64_t) Addr); } void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); assert(!Name.empty() && "Empty GlobalMapping symbol name!"); @@ -228,14 +228,14 @@ void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) { } void ExecutionEngine::clearAllGlobalMappings() { - MutexGuard locked(lock); + std::lock_guard locked(lock); EEState.getGlobalAddressMap().clear(); EEState.getGlobalAddressReverseMap().clear(); } void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (GlobalObject &GO : M->global_objects()) EEState.RemoveMapping(getMangledName(&GO)); @@ -243,12 +243,12 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); return updateGlobalMapping(getMangledName(GV), (uint64_t) Addr); } uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); ExecutionEngineState::GlobalAddressMapTy &Map = EEState.getGlobalAddressMap(); @@ -275,7 +275,7 @@ uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) { } uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) { - MutexGuard locked(lock); + std::lock_guard locked(lock); uint64_t Address = 0; ExecutionEngineState::GlobalAddressMapTy::iterator I = EEState.getGlobalAddressMap().find(S); @@ -286,19 +286,19 @@ uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) { void *ExecutionEngine::getPointerToGlobalIfAvailable(StringRef S) { - MutexGuard locked(lock); + std::lock_guard locked(lock); if (void* Address = (void *) getAddressToGlobalIfAvailable(S)) return Address; return nullptr; } void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { - MutexGuard locked(lock); + std::lock_guard locked(lock); return getPointerToGlobalIfAvailable(getMangledName(GV)); } const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // If we haven't computed the reverse mapping yet, do so first. if (EEState.getGlobalAddressReverseMap().empty()) { @@ -340,14 +340,14 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, Values.clear(); // Free the old contents. Values.reserve(InputArgv.size()); unsigned PtrSize = EE->getDataLayout().getPointerSize(); - Array = make_unique((InputArgv.size()+1)*PtrSize); + Array = std::make_unique((InputArgv.size()+1)*PtrSize); LLVM_DEBUG(dbgs() << "JIT: ARGV = " << (void *)Array.get() << "\n"); Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; - auto Dest = make_unique(Size); + auto Dest = std::make_unique(Size); LLVM_DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void *)Dest.get() << "\n"); @@ -575,7 +575,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { if (Function *F = const_cast(dyn_cast(GV))) return getPointerToFunction(F); - MutexGuard locked(lock); + std::lock_guard locked(lock); if (void* P = getPointerToGlobalIfAvailable(GV)) return P; @@ -626,7 +626,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { break; case Type::VectorTyID: // if the whole vector is 'undef' just reserve memory for the value. - auto* VTy = dyn_cast(C->getType()); + auto* VTy = cast(C->getType()); Type *ElemTy = VTy->getElementType(); unsigned int elemNum = VTy->getNumElements(); Result.AggregateVal.resize(elemNum); @@ -925,7 +925,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { elemNum = CDV->getNumElements(); ElemTy = CDV->getElementType(); } else if (CV || CAZ) { - VectorType* VTy = dyn_cast(C->getType()); + auto* VTy = cast(C->getType()); elemNum = VTy->getNumElements(); ElemTy = VTy->getElementType(); } else { diff --git a/lib/ExecutionEngine/GDBRegistrationListener.cpp b/lib/ExecutionEngine/GDBRegistrationListener.cpp index 08d20156a590..7ed025fbb481 100644 --- a/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -14,7 +14,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" +#include using namespace llvm; using namespace llvm::object; @@ -135,7 +135,7 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) { GDBJITRegistrationListener::~GDBJITRegistrationListener() { // Free all registered object files. - llvm::MutexGuard locked(*JITDebugLock); + std::lock_guard locked(*JITDebugLock); for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(), E = ObjectBufferMap.end(); I != E; ++I) { @@ -159,7 +159,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded( const char *Buffer = DebugObj.getBinary()->getMemoryBufferRef().getBufferStart(); size_t Size = DebugObj.getBinary()->getMemoryBufferRef().getBufferSize(); - llvm::MutexGuard locked(*JITDebugLock); + std::lock_guard locked(*JITDebugLock); assert(ObjectBufferMap.find(K) == ObjectBufferMap.end() && "Second attempt to perform debug registration."); jit_code_entry* JITCodeEntry = new jit_code_entry(); @@ -178,7 +178,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded( } void GDBJITRegistrationListener::notifyFreeingObject(ObjectKey K) { - llvm::MutexGuard locked(*JITDebugLock); + std::lock_guard locked(*JITDebugLock); RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(K); if (I != ObjectBufferMap.end()) { diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index c3a2ccc582c9..71b7f893d712 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/UniqueLock.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -41,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -258,7 +258,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, ArrayRef ArgVals) { TheInterpreter = this; - unique_lock Guard(*FunctionsLock); + std::unique_lock Guard(*FunctionsLock); // Do a lookup to see if the function is in our cache... this should just be a // deferred annotation! diff --git a/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h index 1271ad962b38..b47a798c7603 100644 --- a/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h +++ b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h @@ -20,24 +20,23 @@ namespace jitlink { template class BasicGOTAndStubsBuilder { public: - BasicGOTAndStubsBuilder(AtomGraph &G) : G(G) {} + BasicGOTAndStubsBuilder(LinkGraph &G) : G(G) {} void run() { - // We're going to be adding new atoms, but we don't want to iterate over - // the newly added ones, so just copy the existing atoms out. - std::vector DAs(G.defined_atoms().begin(), - G.defined_atoms().end()); + // We're going to be adding new blocks, but we don't want to iterate over + // the newly added ones, so just copy the existing blocks out. + std::vector Blocks(G.blocks().begin(), G.blocks().end()); - for (auto *DA : DAs) - for (auto &E : DA->edges()) + for (auto *B : Blocks) + for (auto &E : B->edges()) if (impl().isGOTEdge(E)) - impl().fixGOTEdge(E, getGOTEntryAtom(E.getTarget())); + impl().fixGOTEdge(E, getGOTEntrySymbol(E.getTarget())); else if (impl().isExternalBranchEdge(E)) - impl().fixExternalBranchEdge(E, getStubAtom(E.getTarget())); + impl().fixExternalBranchEdge(E, getStubSymbol(E.getTarget())); } protected: - Atom &getGOTEntryAtom(Atom &Target) { + Symbol &getGOTEntrySymbol(Symbol &Target) { assert(Target.hasName() && "GOT edge cannot point to anonymous target"); auto GOTEntryI = GOTEntries.find(Target.getName()); @@ -49,31 +48,31 @@ protected: GOTEntries.insert(std::make_pair(Target.getName(), &GOTEntry)).first; } - assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry atom"); + assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry symbol"); return *GOTEntryI->second; } - Atom &getStubAtom(Atom &Target) { + Symbol &getStubSymbol(Symbol &Target) { assert(Target.hasName() && "External branch edge can not point to an anonymous target"); auto StubI = Stubs.find(Target.getName()); if (StubI == Stubs.end()) { - auto &StubAtom = impl().createStub(Target); - StubI = Stubs.insert(std::make_pair(Target.getName(), &StubAtom)).first; + auto &StubSymbol = impl().createStub(Target); + StubI = Stubs.insert(std::make_pair(Target.getName(), &StubSymbol)).first; } - assert(StubI != Stubs.end() && "Count not get stub atom"); + assert(StubI != Stubs.end() && "Count not get stub symbol"); return *StubI->second; } - AtomGraph &G; + LinkGraph &G; private: BuilderImpl &impl() { return static_cast(*this); } - DenseMap GOTEntries; - DenseMap Stubs; + DenseMap GOTEntries; + DenseMap Stubs; }; } // end namespace jitlink diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp index 25f0e9040ffe..f80b0e7f8909 100644 --- a/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp +++ b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp @@ -17,18 +17,14 @@ namespace llvm { namespace jitlink { -EHFrameParser::EHFrameParser(AtomGraph &G, Section &EHFrameSection, - StringRef EHFrameContent, - JITTargetAddress EHFrameAddress, - Edge::Kind FDEToCIERelocKind, - Edge::Kind FDEToTargetRelocKind) - : G(G), EHFrameSection(EHFrameSection), EHFrameContent(EHFrameContent), - EHFrameAddress(EHFrameAddress), - EHFrameReader(EHFrameContent, G.getEndianness()), - FDEToCIERelocKind(FDEToCIERelocKind), - FDEToTargetRelocKind(FDEToTargetRelocKind) {} - -Error EHFrameParser::atomize() { +EHFrameBinaryParser::EHFrameBinaryParser(JITTargetAddress EHFrameAddress, + StringRef EHFrameContent, + unsigned PointerSize, + support::endianness Endianness) + : EHFrameAddress(EHFrameAddress), EHFrameContent(EHFrameContent), + PointerSize(PointerSize), EHFrameReader(EHFrameContent, Endianness) {} + +Error EHFrameBinaryParser::addToGraph() { while (!EHFrameReader.empty()) { size_t RecordOffset = EHFrameReader.getOffset(); @@ -38,44 +34,39 @@ Error EHFrameParser::atomize() { << " (offset " << RecordOffset << ")\n"; }); - size_t CIELength = 0; - uint32_t CIELengthField; - if (auto Err = EHFrameReader.readInteger(CIELengthField)) + size_t RecordLength = 0; + uint32_t RecordLengthField; + if (auto Err = EHFrameReader.readInteger(RecordLengthField)) return Err; - // Process CIE length/extended-length fields to build the atom. + // Process CIE/FDE length/extended-length fields to build the blocks. // // The value of these fields describe the length of the *rest* of the CIE // (not including data up to the end of the field itself) so we have to - // bump CIELength to include the data up to the end of the field: 4 bytes + // bump RecordLength to include the data up to the end of the field: 4 bytes // for Length, or 12 bytes (4 bytes + 8 bytes) for ExtendedLength. - if (CIELengthField == 0) // Length 0 means end of __eh_frame section. + if (RecordLengthField == 0) // Length 0 means end of __eh_frame section. break; // If the regular length field's value is 0xffffffff, use extended length. - if (CIELengthField == 0xffffffff) { - uint64_t CIEExtendedLengthField; - if (auto Err = EHFrameReader.readInteger(CIEExtendedLengthField)) + if (RecordLengthField == 0xffffffff) { + uint64_t ExtendedLengthField; + if (auto Err = EHFrameReader.readInteger(ExtendedLengthField)) return Err; - if (CIEExtendedLengthField > EHFrameReader.bytesRemaining()) + if (ExtendedLengthField > EHFrameReader.bytesRemaining()) return make_error("CIE record extends past the end of " "the __eh_frame section"); - if (CIEExtendedLengthField + 12 > std::numeric_limits::max()) + if (ExtendedLengthField + 12 > std::numeric_limits::max()) return make_error("CIE record too large to process"); - CIELength = CIEExtendedLengthField + 12; + RecordLength = ExtendedLengthField + 12; } else { - if (CIELengthField > EHFrameReader.bytesRemaining()) + if (RecordLengthField > EHFrameReader.bytesRemaining()) return make_error("CIE record extends past the end of " "the __eh_frame section"); - CIELength = CIELengthField + 4; + RecordLength = RecordLengthField + 4; } - LLVM_DEBUG(dbgs() << " length: " << CIELength << "\n"); - - // Add an atom for this record. - CurRecordAtom = &G.addAnonymousAtom( - EHFrameSection, EHFrameAddress + RecordOffset, G.getPointerSize()); - CurRecordAtom->setContent(EHFrameContent.substr(RecordOffset, CIELength)); + LLVM_DEBUG(dbgs() << " length: " << RecordLength << "\n"); // Read the CIE Pointer. size_t CIEPointerAddress = EHFrameAddress + EHFrameReader.getOffset(); @@ -85,21 +76,24 @@ Error EHFrameParser::atomize() { // Based on the CIE pointer value, parse this as a CIE or FDE record. if (CIEPointer == 0) { - if (auto Err = processCIE()) + if (auto Err = processCIE(RecordOffset, RecordLength)) return Err; } else { - if (auto Err = processFDE(CIEPointerAddress, CIEPointer)) + if (auto Err = processFDE(RecordOffset, RecordLength, CIEPointerAddress, + CIEPointer)) return Err; } - EHFrameReader.setOffset(RecordOffset + CIELength); + EHFrameReader.setOffset(RecordOffset + RecordLength); } return Error::success(); } -Expected -EHFrameParser::parseAugmentationString() { +void EHFrameBinaryParser::anchor() {} + +Expected +EHFrameBinaryParser::parseAugmentationString() { AugmentationInfo AugInfo; uint8_t NextChar; uint8_t *NextField = &AugInfo.Fields[0]; @@ -139,14 +133,14 @@ EHFrameParser::parseAugmentationString() { return std::move(AugInfo); } -Expected EHFrameParser::readAbsolutePointer() { +Expected EHFrameBinaryParser::readAbsolutePointer() { static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t), "Result must be able to hold a uint64_t"); JITTargetAddress Addr; - if (G.getPointerSize() == 8) { + if (PointerSize == 8) { if (auto Err = EHFrameReader.readInteger(Addr)) return std::move(Err); - } else if (G.getPointerSize() == 4) { + } else if (PointerSize == 4) { uint32_t Addr32; if (auto Err = EHFrameReader.readInteger(Addr32)) return std::move(Err); @@ -156,14 +150,19 @@ Expected EHFrameParser::readAbsolutePointer() { return Addr; } -Error EHFrameParser::processCIE() { +Error EHFrameBinaryParser::processCIE(size_t RecordOffset, + size_t RecordLength) { // Use the dwarf namespace for convenient access to pointer encoding // constants. using namespace dwarf; LLVM_DEBUG(dbgs() << " Record is CIE\n"); - CIEInformation CIEInfo(*CurRecordAtom); + auto &CIESymbol = + createCIERecord(EHFrameAddress + RecordOffset, + EHFrameContent.substr(RecordOffset, RecordLength)); + + CIEInformation CIEInfo(CIESymbol); uint8_t Version = 0; if (auto Err = EHFrameReader.readInteger(Version)) @@ -179,7 +178,7 @@ Error EHFrameParser::processCIE() { // Skip the EH Data field if present. if (AugInfo->EHDataFieldPresent) - if (auto Err = EHFrameReader.skip(G.getPointerSize())) + if (auto Err = EHFrameReader.skip(PointerSize)) return Err; // Read and sanity check the code alignment factor. @@ -226,7 +225,7 @@ Error EHFrameParser::processCIE() { return make_error( "Unsupported LSDA pointer encoding " + formatv("{0:x2}", LSDAPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CurRecordAtom->getAddress())); + formatv("{0:x16}", CIESymbol.getAddress())); break; } case 'P': { @@ -239,7 +238,7 @@ Error EHFrameParser::processCIE() { "Unspported personality pointer " "encoding " + formatv("{0:x2}", PersonalityPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CurRecordAtom->getAddress())); + formatv("{0:x16}", CIESymbol.getAddress())); uint32_t PersonalityPointerAddress; if (auto Err = EHFrameReader.readInteger(PersonalityPointerAddress)) return Err; @@ -254,7 +253,7 @@ Error EHFrameParser::processCIE() { "Unsupported FDE address pointer " "encoding " + formatv("{0:x2}", FDEPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CurRecordAtom->getAddress())); + formatv("{0:x16}", CIESymbol.getAddress())); break; } default: @@ -267,15 +266,16 @@ Error EHFrameParser::processCIE() { return make_error("Read past the end of the augmentation " "data while parsing fields"); - assert(!CIEInfos.count(CurRecordAtom->getAddress()) && + assert(!CIEInfos.count(CIESymbol.getAddress()) && "Multiple CIEs recorded at the same address?"); - CIEInfos[CurRecordAtom->getAddress()] = std::move(CIEInfo); + CIEInfos[CIESymbol.getAddress()] = std::move(CIEInfo); return Error::success(); } -Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress, - uint32_t CIEPointer) { +Error EHFrameBinaryParser::processFDE(size_t RecordOffset, size_t RecordLength, + JITTargetAddress CIEPointerAddress, + uint32_t CIEPointer) { LLVM_DEBUG(dbgs() << " Record is FDE\n"); LLVM_DEBUG({ @@ -286,16 +286,11 @@ Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress, auto CIEInfoItr = CIEInfos.find(CIEPointerAddress - CIEPointer); if (CIEInfoItr == CIEInfos.end()) return make_error( - "FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress()) + + "FDE at " + formatv("{0:x16}", EHFrameAddress + RecordOffset) + " points to non-existant CIE at " + formatv("{0:x16}", CIEPointerAddress - CIEPointer)); auto &CIEInfo = CIEInfoItr->second; - // The CIEPointer looks good. Add a relocation. - CurRecordAtom->addEdge(FDEToCIERelocKind, - CIEPointerAddress - CurRecordAtom->getAddress(), - *CIEInfo.CIEAtom, 0); - // Read and sanity check the PC-start pointer and size. JITTargetAddress PCBeginAddress = EHFrameAddress + EHFrameReader.getOffset(); @@ -305,83 +300,68 @@ Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress, JITTargetAddress PCBegin = PCBeginAddress + *PCBeginDelta; LLVM_DEBUG({ - dbgs() << " PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n"; + dbgs() << " PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n"; }); - auto *TargetAtom = G.getAtomByAddress(PCBegin); + auto *TargetSymbol = getSymbolAtAddress(PCBegin); - if (!TargetAtom) + if (!TargetSymbol) return make_error("FDE PC-begin " + formatv("{0:x16}", PCBegin) + - " does not point at atom"); + " does not point at symbol"); - if (TargetAtom->getAddress() != PCBegin) + if (TargetSymbol->getAddress() != PCBegin) return make_error( "FDE PC-begin " + formatv("{0:x16}", PCBegin) + - " does not point to start of atom at " + - formatv("{0:x16}", TargetAtom->getAddress())); - - LLVM_DEBUG(dbgs() << " FDE target: " << *TargetAtom << "\n"); + " does not point to start of symbol at " + + formatv("{0:x16}", TargetSymbol->getAddress())); - // The PC-start pointer and size look good. Add relocations. - CurRecordAtom->addEdge(FDEToTargetRelocKind, - PCBeginAddress - CurRecordAtom->getAddress(), - *TargetAtom, 0); - - // Add a keep-alive relocation from the function to the FDE to ensure it is - // not dead stripped. - TargetAtom->addEdge(Edge::KeepAlive, 0, *CurRecordAtom, 0); + LLVM_DEBUG(dbgs() << " FDE target: " << *TargetSymbol << "\n"); // Skip over the PC range size field. - if (auto Err = EHFrameReader.skip(G.getPointerSize())) + if (auto Err = EHFrameReader.skip(PointerSize)) return Err; + Symbol *LSDASymbol = nullptr; + JITTargetAddress LSDAAddress = 0; if (CIEInfo.FDEsHaveLSDAField) { uint64_t AugmentationDataSize; if (auto Err = EHFrameReader.readULEB128(AugmentationDataSize)) return Err; - if (AugmentationDataSize != G.getPointerSize()) + if (AugmentationDataSize != PointerSize) return make_error( "Unexpected FDE augmentation data size (expected " + - Twine(G.getPointerSize()) + ", got " + Twine(AugmentationDataSize) + - ") for FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress())); - JITTargetAddress LSDAAddress = EHFrameAddress + EHFrameReader.getOffset(); + Twine(PointerSize) + ", got " + Twine(AugmentationDataSize) + + ") for FDE at " + formatv("{0:x16}", EHFrameAddress + RecordOffset)); + LSDAAddress = EHFrameAddress + EHFrameReader.getOffset(); auto LSDADelta = readAbsolutePointer(); if (!LSDADelta) return LSDADelta.takeError(); JITTargetAddress LSDA = LSDAAddress + *LSDADelta; - auto *LSDAAtom = G.getAtomByAddress(LSDA); + LSDASymbol = getSymbolAtAddress(LSDA); - if (!LSDAAtom) + if (!LSDASymbol) return make_error("FDE LSDA " + formatv("{0:x16}", LSDA) + - " does not point at atom"); + " does not point at symbol"); - if (LSDAAtom->getAddress() != LSDA) + if (LSDASymbol->getAddress() != LSDA) return make_error( "FDE LSDA " + formatv("{0:x16}", LSDA) + - " does not point to start of atom at " + - formatv("{0:x16}", LSDAAtom->getAddress())); - - LLVM_DEBUG(dbgs() << " FDE LSDA: " << *LSDAAtom << "\n"); + " does not point to start of symbol at " + + formatv("{0:x16}", LSDASymbol->getAddress())); - // LSDA looks good. Add relocations. - CurRecordAtom->addEdge(FDEToTargetRelocKind, - LSDAAddress - CurRecordAtom->getAddress(), *LSDAAtom, - 0); + LLVM_DEBUG(dbgs() << " FDE LSDA: " << *LSDASymbol << "\n"); } - return Error::success(); -} + JITTargetAddress RecordAddress = EHFrameAddress + RecordOffset; + auto FDESymbol = createFDERecord( + RecordAddress, EHFrameContent.substr(RecordOffset, RecordLength), + *CIEInfo.CIESymbol, CIEPointerAddress - RecordAddress, *TargetSymbol, + PCBeginAddress - RecordAddress, LSDASymbol, LSDAAddress - RecordAddress); -Error addEHFrame(AtomGraph &G, Section &EHFrameSection, - StringRef EHFrameContent, JITTargetAddress EHFrameAddress, - Edge::Kind FDEToCIERelocKind, - Edge::Kind FDEToTargetRelocKind) { - return EHFrameParser(G, EHFrameSection, EHFrameContent, EHFrameAddress, - FDEToCIERelocKind, FDEToTargetRelocKind) - .atomize(); + return FDESymbol.takeError(); } // Determine whether we can register EH tables. @@ -451,11 +431,13 @@ static Error deregisterFrameWrapper(const void *P) { template Error walkAppleEHFrameSection(const char *const SectionStart, + size_t SectionSize, HandleFDEFn HandleFDE) { const char *CurCFIRecord = SectionStart; + const char *End = SectionStart + SectionSize; uint64_t Size = *reinterpret_cast(CurCFIRecord); - while (Size != 0) { + while (CurCFIRecord != End && Size != 0) { const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4); if (Size == 0xffffffff) Size = *reinterpret_cast(CurCFIRecord + 4) + 12; @@ -484,10 +466,12 @@ Error walkAppleEHFrameSection(const char *const SectionStart, #endif // __APPLE__ -Error registerEHFrameSection(const void *EHFrameSectionAddr) { +Error registerEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize) { #ifdef __APPLE__ // On Darwin __register_frame has to be called for each FDE entry. return walkAppleEHFrameSection(static_cast(EHFrameSectionAddr), + EHFrameSectionSize, registerFrameWrapper); #else // On Linux __register_frame takes a single argument: @@ -499,9 +483,11 @@ Error registerEHFrameSection(const void *EHFrameSectionAddr) { #endif } -Error deregisterEHFrameSection(const void *EHFrameSectionAddr) { +Error deregisterEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize) { #ifdef __APPLE__ return walkAppleEHFrameSection(static_cast(EHFrameSectionAddr), + EHFrameSectionSize, deregisterFrameWrapper); #else return deregisterFrameWrapper(EHFrameSectionAddr); @@ -517,23 +503,31 @@ InProcessEHFrameRegistrar &InProcessEHFrameRegistrar::getInstance() { InProcessEHFrameRegistrar::InProcessEHFrameRegistrar() {} -AtomGraphPassFunction +LinkGraphPassFunction createEHFrameRecorderPass(const Triple &TT, - StoreFrameAddressFunction StoreFrameAddress) { + StoreFrameRangeFunction StoreRangeAddress) { const char *EHFrameSectionName = nullptr; if (TT.getObjectFormat() == Triple::MachO) EHFrameSectionName = "__eh_frame"; else EHFrameSectionName = ".eh_frame"; - auto RecordEHFrame = [EHFrameSectionName, - StoreFrameAddress](AtomGraph &G) -> Error { - // Search for a non-empty eh-frame and record the address of the first atom - // in it. + auto RecordEHFrame = + [EHFrameSectionName, + StoreFrameRange = std::move(StoreRangeAddress)](LinkGraph &G) -> Error { + // Search for a non-empty eh-frame and record the address of the first + // symbol in it. JITTargetAddress Addr = 0; - if (auto *S = G.findSectionByName(EHFrameSectionName)) - Addr = S->getRange().getStart(); - StoreFrameAddress(Addr); + size_t Size = 0; + if (auto *S = G.findSectionByName(EHFrameSectionName)) { + auto R = SectionRange(*S); + Addr = R.getStart(); + Size = R.getSize(); + } + if (Addr == 0 && Size != 0) + return make_error("__eh_frame section can not have zero " + "address with non-zero size"); + StoreFrameRange(Addr, Size); return Error::success(); }; diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h index d679edef7ea6..6f9f68ad8382 100644 --- a/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h +++ b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h @@ -21,18 +21,31 @@ namespace llvm { namespace jitlink { -/// A generic parser for eh-frame sections. +/// A generic binary parser for eh-frame sections. /// -/// Adds atoms representing CIE and FDE entries, using the given FDE-to-CIE and -/// FDEToTarget relocation kinds. -class EHFrameParser { +/// Adds blocks and symbols representing CIE and FDE entries to a JITLink graph. +/// +/// This parser assumes that the user has already verified that the EH-frame's +/// address range does not overlap any other section/symbol, so that generated +/// CIE/FDE records do not overlap other sections/symbols. +class EHFrameBinaryParser { public: - EHFrameParser(AtomGraph &G, Section &EHFrameSection, StringRef EHFrameContent, - JITTargetAddress EHFrameAddress, Edge::Kind FDEToCIERelocKind, - Edge::Kind FDEToTargetRelocKind); - Error atomize(); + EHFrameBinaryParser(JITTargetAddress EHFrameAddress, StringRef EHFrameContent, + unsigned PointerSize, support::endianness Endianness); + virtual ~EHFrameBinaryParser() {} + + Error addToGraph(); private: + virtual void anchor(); + virtual Symbol *getSymbolAtAddress(JITTargetAddress Addr) = 0; + virtual Symbol &createCIERecord(JITTargetAddress RecordAddr, + StringRef RecordContent) = 0; + virtual Expected + createFDERecord(JITTargetAddress RecordAddr, StringRef RecordContent, + Symbol &CIE, size_t CIEOffset, Symbol &Func, + size_t FuncOffset, Symbol *LSDA, size_t LSDAOffset) = 0; + struct AugmentationInfo { bool AugmentationDataPresent = false; bool EHDataFieldPresent = false; @@ -41,31 +54,24 @@ private: Expected parseAugmentationString(); Expected readAbsolutePointer(); - Error processCIE(); - Error processFDE(JITTargetAddress CIEPointerAddress, uint32_t CIEPointer); + Error processCIE(size_t RecordOffset, size_t RecordLength); + Error processFDE(size_t RecordOffset, size_t RecordLength, + JITTargetAddress CIEPointerOffset, uint32_t CIEPointer); struct CIEInformation { CIEInformation() = default; - CIEInformation(DefinedAtom &CIEAtom) : CIEAtom(&CIEAtom) {} - DefinedAtom *CIEAtom = nullptr; + CIEInformation(Symbol &CIESymbol) : CIESymbol(&CIESymbol) {} + Symbol *CIESymbol = nullptr; bool FDEsHaveLSDAField = false; }; - AtomGraph &G; - Section &EHFrameSection; - StringRef EHFrameContent; JITTargetAddress EHFrameAddress; + StringRef EHFrameContent; + unsigned PointerSize; BinaryStreamReader EHFrameReader; - DefinedAtom *CurRecordAtom = nullptr; DenseMap CIEInfos; - Edge::Kind FDEToCIERelocKind; - Edge::Kind FDEToTargetRelocKind; }; -Error addEHFrame(AtomGraph &G, Section &EHFrameSection, - StringRef EHFrameContent, JITTargetAddress EHFrameAddress, - Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind); - } // end namespace jitlink } // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/JITLink.cpp b/lib/ExecutionEngine/JITLink/JITLink.cpp index 9d0a7459dc09..1e19038951ac 100644 --- a/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -56,95 +56,151 @@ std::error_code JITLinkError::convertToErrorCode() const { return std::error_code(GenericJITLinkError, *JITLinkerErrorCategory); } -const StringRef getGenericEdgeKindName(Edge::Kind K) { +const char *getGenericEdgeKindName(Edge::Kind K) { switch (K) { case Edge::Invalid: return "INVALID RELOCATION"; case Edge::KeepAlive: return "Keep-Alive"; - case Edge::LayoutNext: - return "Layout-Next"; default: llvm_unreachable("Unrecognized relocation kind"); } } -raw_ostream &operator<<(raw_ostream &OS, const Atom &A) { +const char *getLinkageName(Linkage L) { + switch (L) { + case Linkage::Strong: + return "strong"; + case Linkage::Weak: + return "weak"; + } + llvm_unreachable("Unrecognized llvm.jitlink.Linkage enum"); +} + +const char *getScopeName(Scope S) { + switch (S) { + case Scope::Default: + return "default"; + case Scope::Hidden: + return "hidden"; + case Scope::Local: + return "local"; + } + llvm_unreachable("Unrecognized llvm.jitlink.Scope enum"); +} + +raw_ostream &operator<<(raw_ostream &OS, const Block &B) { + return OS << formatv("{0:x16}", B.getAddress()) << " -- " + << formatv("{0:x16}", B.getAddress() + B.getSize()) << ": " + << (B.isZeroFill() ? "zero-fill" : "content") + << ", align = " << B.getAlignment() + << ", align-ofs = " << B.getAlignmentOffset() + << ", section = " << B.getSection().getName(); +} + +raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { OS << "<"; - if (A.getName().empty()) - OS << "anon@" << format("0x%016" PRIx64, A.getAddress()); + if (Sym.getName().empty()) + OS << "*anon*"; else - OS << A.getName(); - OS << " ["; - if (A.isDefined()) { - auto &DA = static_cast(A); - OS << " section=" << DA.getSection().getName(); - if (DA.isLive()) - OS << " live"; - if (DA.shouldDiscard()) - OS << " should-discard"; - } else - OS << " external"; - OS << " ]>"; + OS << Sym.getName(); + OS << ": flags = "; + switch (Sym.getLinkage()) { + case Linkage::Strong: + OS << 'S'; + break; + case Linkage::Weak: + OS << 'W'; + break; + } + switch (Sym.getScope()) { + case Scope::Default: + OS << 'D'; + break; + case Scope::Hidden: + OS << 'H'; + break; + case Scope::Local: + OS << 'L'; + break; + } + OS << (Sym.isLive() ? '+' : '-') + << ", size = " << formatv("{0:x8}", Sym.getSize()) + << ", addr = " << formatv("{0:x16}", Sym.getAddress()) << " (" + << formatv("{0:x16}", Sym.getAddressable().getAddress()) << " + " + << formatv("{0:x8}", Sym.getOffset()); + if (Sym.isDefined()) + OS << " " << Sym.getBlock().getSection().getName(); + OS << ")>"; return OS; } -void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E, +void printEdge(raw_ostream &OS, const Block &B, const Edge &E, StringRef EdgeKindName) { - OS << "edge@" << formatv("{0:x16}", FixupAtom.getAddress() + E.getOffset()) - << ": " << FixupAtom << " + " << E.getOffset() << " -- " << EdgeKindName - << " -> " << E.getTarget() << " + " << E.getAddend(); + OS << "edge@" << formatv("{0:x16}", B.getAddress() + E.getOffset()) << ": " + << formatv("{0:x16}", B.getAddress()) << " + " << E.getOffset() << " -- " + << EdgeKindName << " -> " << E.getTarget() << " + " << E.getAddend(); } Section::~Section() { - for (auto *DA : DefinedAtoms) - DA->~DefinedAtom(); + for (auto *Sym : Symbols) + Sym->~Symbol(); } -void AtomGraph::dump(raw_ostream &OS, +LinkGraph::~LinkGraph() { + // Destroy blocks. + for (auto *B : Blocks) + B->~Block(); +} + +void LinkGraph::dump(raw_ostream &OS, std::function EdgeKindToName) { if (!EdgeKindToName) EdgeKindToName = [](Edge::Kind K) { return StringRef(); }; - OS << "Defined atoms:\n"; - for (auto *DA : defined_atoms()) { - OS << " " << format("0x%016" PRIx64, DA->getAddress()) << ": " << *DA + OS << "Symbols:\n"; + for (auto *Sym : defined_symbols()) { + OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym << "\n"; - for (auto &E : DA->edges()) { - OS << " "; - StringRef EdgeName = (E.getKind() < Edge::FirstRelocation - ? getGenericEdgeKindName(E.getKind()) - : EdgeKindToName(E.getKind())); - - if (!EdgeName.empty()) - printEdge(OS, *DA, E, EdgeName); - else { - auto EdgeNumberString = std::to_string(E.getKind()); - printEdge(OS, *DA, E, EdgeNumberString); + if (Sym->isDefined()) { + for (auto &E : Sym->getBlock().edges()) { + OS << " "; + StringRef EdgeName = (E.getKind() < Edge::FirstRelocation + ? getGenericEdgeKindName(E.getKind()) + : EdgeKindToName(E.getKind())); + + if (!EdgeName.empty()) + printEdge(OS, Sym->getBlock(), E, EdgeName); + else { + auto EdgeNumberString = std::to_string(E.getKind()); + printEdge(OS, Sym->getBlock(), E, EdgeNumberString); + } + OS << "\n"; } - OS << "\n"; } } - OS << "Absolute atoms:\n"; - for (auto *A : absolute_atoms()) - OS << " " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A + OS << "Absolute symbols:\n"; + for (auto *Sym : absolute_symbols()) + OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym << "\n"; - OS << "External atoms:\n"; - for (auto *A : external_atoms()) - OS << " " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A + OS << "External symbols:\n"; + for (auto *Sym : external_symbols()) + OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym << "\n"; } +void JITLinkAsyncLookupContinuation::anchor() {} + JITLinkContext::~JITLinkContext() {} bool JITLinkContext::shouldAddDefaultTargetPasses(const Triple &TT) const { return true; } -AtomGraphPassFunction JITLinkContext::getMarkLivePass(const Triple &TT) const { - return AtomGraphPassFunction(); +LinkGraphPassFunction JITLinkContext::getMarkLivePass(const Triple &TT) const { + return LinkGraphPassFunction(); } Error JITLinkContext::modifyPassConfig(const Triple &TT, @@ -152,9 +208,9 @@ Error JITLinkContext::modifyPassConfig(const Triple &TT, return Error::success(); } -Error markAllAtomsLive(AtomGraph &G) { - for (auto *DA : G.defined_atoms()) - DA->setLive(true); +Error markAllSymbolsLive(LinkGraph &G) { + for (auto *Sym : G.defined_symbols()) + Sym->setLive(true); return Error::success(); } diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 96e074da122b..d4270b5aa796 100644 --- a/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "JITLinkGeneric.h" -#include "EHFrameSupportImpl.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/MemoryBuffer.h" @@ -25,7 +24,7 @@ JITLinkerBase::~JITLinkerBase() {} void JITLinkerBase::linkPhase1(std::unique_ptr Self) { - // Build the atom graph. + // Build the link graph. if (auto GraphOrErr = buildGraph(Ctx->getObjectBuffer())) G = std::move(*GraphOrErr); else @@ -33,33 +32,33 @@ void JITLinkerBase::linkPhase1(std::unique_ptr Self) { assert(G && "Graph should have been created by buildGraph above"); // Prune and optimize the graph. - if (auto Err = runPasses(Passes.PrePrunePasses, *G)) + if (auto Err = runPasses(Passes.PrePrunePasses)) return Ctx->notifyFailed(std::move(Err)); LLVM_DEBUG({ - dbgs() << "Atom graph \"" << G->getName() << "\" pre-pruning:\n"; + dbgs() << "Link graph \"" << G->getName() << "\" pre-pruning:\n"; dumpGraph(dbgs()); }); prune(*G); LLVM_DEBUG({ - dbgs() << "Atom graph \"" << G->getName() << "\" post-pruning:\n"; + dbgs() << "Link graph \"" << G->getName() << "\" post-pruning:\n"; dumpGraph(dbgs()); }); // Run post-pruning passes. - if (auto Err = runPasses(Passes.PostPrunePasses, *G)) + if (auto Err = runPasses(Passes.PostPrunePasses)) return Ctx->notifyFailed(std::move(Err)); - // Sort atoms into segments. - layOutAtoms(); + // Sort blocks into segments. + auto Layout = layOutBlocks(); // Allocate memory for segments. if (auto Err = allocateSegments(Layout)) return Ctx->notifyFailed(std::move(Err)); - // Notify client that the defined atoms have been assigned addresses. + // Notify client that the defined symbols have been assigned addresses. Ctx->notifyResolved(*G); auto ExternalSymbols = getExternalSymbolNames(); @@ -74,42 +73,42 @@ void JITLinkerBase::linkPhase1(std::unique_ptr Self) { // [Self=std::move(Self)](Expected Result) { // Self->linkPhase2(std::move(Self), std::move(Result)); // }); - // - // FIXME: Use move capture once we have c++14. auto *TmpCtx = Ctx.get(); - auto *UnownedSelf = Self.release(); - auto Phase2Continuation = - [UnownedSelf](Expected LookupResult) { - std::unique_ptr Self(UnownedSelf); - UnownedSelf->linkPhase2(std::move(Self), std::move(LookupResult)); - }; - TmpCtx->lookup(std::move(ExternalSymbols), std::move(Phase2Continuation)); + TmpCtx->lookup(std::move(ExternalSymbols), + createLookupContinuation( + [S = std::move(Self), L = std::move(Layout)]( + Expected LookupResult) mutable { + auto &TmpSelf = *S; + TmpSelf.linkPhase2(std::move(S), std::move(LookupResult), + std::move(L)); + })); } void JITLinkerBase::linkPhase2(std::unique_ptr Self, - Expected LR) { + Expected LR, + SegmentLayoutMap Layout) { // If the lookup failed, bail out. if (!LR) return deallocateAndBailOut(LR.takeError()); - // Assign addresses to external atoms. + // Assign addresses to external addressables. applyLookupResult(*LR); LLVM_DEBUG({ - dbgs() << "Atom graph \"" << G->getName() << "\" before copy-and-fixup:\n"; + dbgs() << "Link graph \"" << G->getName() << "\" before copy-and-fixup:\n"; dumpGraph(dbgs()); }); - // Copy atom content to working memory and fix up. - if (auto Err = copyAndFixUpAllAtoms(Layout, *Alloc)) + // Copy block content to working memory and fix up. + if (auto Err = copyAndFixUpBlocks(Layout, *Alloc)) return deallocateAndBailOut(std::move(Err)); LLVM_DEBUG({ - dbgs() << "Atom graph \"" << G->getName() << "\" after copy-and-fixup:\n"; + dbgs() << "Link graph \"" << G->getName() << "\" after copy-and-fixup:\n"; dumpGraph(dbgs()); }); - if (auto Err = runPasses(Passes.PostFixupPasses, *G)) + if (auto Err = runPasses(Passes.PostFixupPasses)) return deallocateAndBailOut(std::move(Err)); // FIXME: Use move capture once we have c++14. @@ -128,82 +127,38 @@ void JITLinkerBase::linkPhase3(std::unique_ptr Self, Error Err) { Ctx->notifyFinalized(std::move(Alloc)); } -Error JITLinkerBase::runPasses(AtomGraphPassList &Passes, AtomGraph &G) { +Error JITLinkerBase::runPasses(LinkGraphPassList &Passes) { for (auto &P : Passes) - if (auto Err = P(G)) + if (auto Err = P(*G)) return Err; return Error::success(); } -void JITLinkerBase::layOutAtoms() { - // Group sections by protections, and whether or not they're zero-fill. - for (auto &S : G->sections()) { +JITLinkerBase::SegmentLayoutMap JITLinkerBase::layOutBlocks() { - // Skip empty sections. - if (S.atoms_empty()) - continue; + SegmentLayoutMap Layout; - auto &SL = Layout[S.getProtectionFlags()]; - if (S.isZeroFill()) - SL.ZeroFillSections.push_back(SegmentLayout::SectionLayout(S)); + /// Partition blocks based on permissions and content vs. zero-fill. + for (auto *B : G->blocks()) { + auto &SegLists = Layout[B->getSection().getProtectionFlags()]; + if (!B->isZeroFill()) + SegLists.ContentBlocks.push_back(B); else - SL.ContentSections.push_back(SegmentLayout::SectionLayout(S)); + SegLists.ZeroFillBlocks.push_back(B); } - // Sort sections within the layout by ordinal. - { - auto CompareByOrdinal = [](const SegmentLayout::SectionLayout &LHS, - const SegmentLayout::SectionLayout &RHS) { - return LHS.S->getSectionOrdinal() < RHS.S->getSectionOrdinal(); + /// Sort blocks within each list. + for (auto &KV : Layout) { + + auto CompareBlocks = [](const Block *LHS, const Block *RHS) { + if (LHS->getSection().getOrdinal() != RHS->getSection().getOrdinal()) + return LHS->getSection().getOrdinal() < RHS->getSection().getOrdinal(); + return LHS->getOrdinal() < RHS->getOrdinal(); }; - for (auto &KV : Layout) { - auto &SL = KV.second; - std::sort(SL.ContentSections.begin(), SL.ContentSections.end(), - CompareByOrdinal); - std::sort(SL.ZeroFillSections.begin(), SL.ZeroFillSections.end(), - CompareByOrdinal); - } - } - // Add atoms to the sections. - for (auto &KV : Layout) { - auto &SL = KV.second; - for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections}) { - for (auto &SI : *SIList) { - // First build the set of layout-heads (i.e. "heads" of layout-next - // chains) by copying the section atoms, then eliminating any that - // appear as layout-next targets. - DenseSet LayoutHeads; - for (auto *DA : SI.S->atoms()) - LayoutHeads.insert(DA); - - for (auto *DA : SI.S->atoms()) - if (DA->hasLayoutNext()) - LayoutHeads.erase(&DA->getLayoutNext()); - - // Next, sort the layout heads by address order. - std::vector OrderedLayoutHeads; - OrderedLayoutHeads.reserve(LayoutHeads.size()); - for (auto *DA : LayoutHeads) - OrderedLayoutHeads.push_back(DA); - - // Now sort the list of layout heads by address. - std::sort(OrderedLayoutHeads.begin(), OrderedLayoutHeads.end(), - [](const DefinedAtom *LHS, const DefinedAtom *RHS) { - return LHS->getAddress() < RHS->getAddress(); - }); - - // Now populate the SI.Atoms field by appending each of the chains. - for (auto *DA : OrderedLayoutHeads) { - SI.Atoms.push_back(DA); - while (DA->hasLayoutNext()) { - auto &Next = DA->getLayoutNext(); - SI.Atoms.push_back(&Next); - DA = &Next; - } - } - } - } + auto &SegLists = KV.second; + llvm::sort(SegLists.ContentBlocks, CompareBlocks); + llvm::sort(SegLists.ZeroFillBlocks, CompareBlocks); } LLVM_DEBUG({ @@ -213,18 +168,16 @@ void JITLinkerBase::layOutAtoms() { << static_cast(KV.first) << ":\n"; auto &SL = KV.second; for (auto &SIEntry : - {std::make_pair(&SL.ContentSections, "content sections"), - std::make_pair(&SL.ZeroFillSections, "zero-fill sections")}) { - auto &SIList = *SIEntry.first; + {std::make_pair(&SL.ContentBlocks, "content block"), + std::make_pair(&SL.ZeroFillBlocks, "zero-fill block")}) { dbgs() << " " << SIEntry.second << ":\n"; - for (auto &SI : SIList) { - dbgs() << " " << SI.S->getName() << ":\n"; - for (auto *DA : SI.Atoms) - dbgs() << " " << *DA << "\n"; - } + for (auto *B : *SIEntry.first) + dbgs() << " " << *B << "\n"; } } }); + + return Layout; } Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { @@ -234,74 +187,36 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { JITLinkMemoryManager::SegmentsRequestMap Segments; for (auto &KV : Layout) { auto &Prot = KV.first; - auto &SegLayout = KV.second; + auto &SegLists = KV.second; + + uint64_t SegAlign = 1; // Calculate segment content size. size_t SegContentSize = 0; - for (auto &SI : SegLayout.ContentSections) { - assert(!SI.S->atoms_empty() && "Sections in layout must not be empty"); - assert(!SI.Atoms.empty() && "Section layouts must not be empty"); - - // Bump to section alignment before processing atoms. - SegContentSize = alignTo(SegContentSize, SI.S->getAlignment()); - - for (auto *DA : SI.Atoms) { - SegContentSize = alignTo(SegContentSize, DA->getAlignment()); - SegContentSize += DA->getSize(); - } + for (auto *B : SegLists.ContentBlocks) { + SegAlign = std::max(SegAlign, B->getAlignment()); + SegContentSize = alignToBlock(SegContentSize, *B); + SegContentSize += B->getSize(); } - // Get segment content alignment. - unsigned SegContentAlign = 1; - if (!SegLayout.ContentSections.empty()) { - auto &FirstContentSection = SegLayout.ContentSections.front(); - SegContentAlign = - std::max(FirstContentSection.S->getAlignment(), - FirstContentSection.Atoms.front()->getAlignment()); - } + uint64_t SegZeroFillStart = SegContentSize; + uint64_t SegZeroFillEnd = SegZeroFillStart; - // Calculate segment zero-fill size. - uint64_t SegZeroFillSize = 0; - for (auto &SI : SegLayout.ZeroFillSections) { - assert(!SI.S->atoms_empty() && "Sections in layout must not be empty"); - assert(!SI.Atoms.empty() && "Section layouts must not be empty"); - - // Bump to section alignment before processing atoms. - SegZeroFillSize = alignTo(SegZeroFillSize, SI.S->getAlignment()); - - for (auto *DA : SI.Atoms) { - SegZeroFillSize = alignTo(SegZeroFillSize, DA->getAlignment()); - SegZeroFillSize += DA->getSize(); - } - } - - // Calculate segment zero-fill alignment. - uint32_t SegZeroFillAlign = 1; - - if (!SegLayout.ZeroFillSections.empty()) { - auto &FirstZeroFillSection = SegLayout.ZeroFillSections.front(); - SegZeroFillAlign = - std::max(FirstZeroFillSection.S->getAlignment(), - FirstZeroFillSection.Atoms.front()->getAlignment()); + for (auto *B : SegLists.ZeroFillBlocks) { + SegAlign = std::max(SegAlign, B->getAlignment()); + SegZeroFillEnd = alignToBlock(SegZeroFillEnd, *B); + SegZeroFillEnd += B->getSize(); } - if (SegContentSize == 0) - SegContentAlign = SegZeroFillAlign; - - if (SegContentAlign % SegZeroFillAlign != 0) - return make_error("First content atom alignment does not " - "accommodate first zero-fill atom " - "alignment"); - - Segments[Prot] = {SegContentSize, SegContentAlign, SegZeroFillSize, - SegZeroFillAlign}; + Segments[Prot] = {SegAlign, SegContentSize, + SegZeroFillEnd - SegZeroFillStart}; LLVM_DEBUG({ dbgs() << (&KV == &*Layout.begin() ? "" : "; ") - << static_cast(Prot) << ": " - << SegContentSize << " content bytes (alignment " - << SegContentAlign << ") + " << SegZeroFillSize - << " zero-fill bytes (alignment " << SegZeroFillAlign << ")"; + << static_cast(Prot) + << ": alignment = " << SegAlign + << ", content size = " << SegContentSize + << ", zero-fill size = " << (SegZeroFillEnd - SegZeroFillStart); }); } LLVM_DEBUG(dbgs() << " }\n"); @@ -320,22 +235,19 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { } }); - // Update atom target addresses. + // Update block target addresses. for (auto &KV : Layout) { auto &Prot = KV.first; auto &SL = KV.second; - JITTargetAddress AtomTargetAddr = + JITTargetAddress NextBlockAddr = Alloc->getTargetMemory(static_cast(Prot)); - for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections}) - for (auto &SI : *SIList) { - AtomTargetAddr = alignTo(AtomTargetAddr, SI.S->getAlignment()); - for (auto *DA : SI.Atoms) { - AtomTargetAddr = alignTo(AtomTargetAddr, DA->getAlignment()); - DA->setAddress(AtomTargetAddr); - AtomTargetAddr += DA->getSize(); - } + for (auto *SIList : {&SL.ContentBlocks, &SL.ZeroFillBlocks}) + for (auto *B : *SIList) { + NextBlockAddr = alignToBlock(NextBlockAddr, *B); + B->setAddress(NextBlockAddr); + NextBlockAddr += B->getSize(); } } @@ -343,34 +255,35 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { } DenseSet JITLinkerBase::getExternalSymbolNames() const { - // Identify unresolved external atoms. + // Identify unresolved external symbols. DenseSet UnresolvedExternals; - for (auto *DA : G->external_atoms()) { - assert(DA->getAddress() == 0 && + for (auto *Sym : G->external_symbols()) { + assert(Sym->getAddress() == 0 && "External has already been assigned an address"); - assert(DA->getName() != StringRef() && DA->getName() != "" && + assert(Sym->getName() != StringRef() && Sym->getName() != "" && "Externals must be named"); - UnresolvedExternals.insert(DA->getName()); + UnresolvedExternals.insert(Sym->getName()); } return UnresolvedExternals; } void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { - for (auto &KV : Result) { - Atom &A = G->getAtomByName(KV.first); - assert(A.getAddress() == 0 && "Atom already resolved"); - A.setAddress(KV.second.getAddress()); + for (auto *Sym : G->external_symbols()) { + assert(Sym->getAddress() == 0 && "Symbol already resolved"); + assert(!Sym->isDefined() && "Symbol being resolved is already defined"); + assert(Result.count(Sym->getName()) && "Missing resolution for symbol"); + Sym->getAddressable().setAddress(Result[Sym->getName()].getAddress()); } LLVM_DEBUG({ dbgs() << "Externals after applying lookup result:\n"; - for (auto *A : G->external_atoms()) - dbgs() << " " << A->getName() << ": " - << formatv("{0:x16}", A->getAddress()) << "\n"; + for (auto *Sym : G->external_symbols()) + dbgs() << " " << Sym->getName() << ": " + << formatv("{0:x16}", Sym->getAddress()) << "\n"; }); - assert(llvm::all_of(G->external_atoms(), - [](Atom *A) { return A->getAddress() != 0; }) && - "All atoms should have been resolved by this point"); + assert(llvm::all_of(G->external_symbols(), + [](Symbol *Sym) { return Sym->getAddress() != 0; }) && + "All symbols should have been resolved by this point"); } void JITLinkerBase::deallocateAndBailOut(Error Err) { @@ -384,96 +297,60 @@ void JITLinkerBase::dumpGraph(raw_ostream &OS) { G->dump(dbgs(), [this](Edge::Kind K) { return getEdgeKindName(K); }); } -void prune(AtomGraph &G) { - std::vector Worklist; - DenseMap> EdgesToUpdate; - - // Build the initial worklist from all atoms initially live. - for (auto *DA : G.defined_atoms()) { - if (!DA->isLive() || DA->shouldDiscard()) - continue; - - for (auto &E : DA->edges()) { - if (!E.getTarget().isDefined()) - continue; +void prune(LinkGraph &G) { + std::vector Worklist; + DenseSet VisitedBlocks; - auto &EDT = static_cast(E.getTarget()); + // Build the initial worklist from all symbols initially live. + for (auto *Sym : G.defined_symbols()) + if (Sym->isLive()) + Worklist.push_back(Sym); - if (EDT.shouldDiscard()) - EdgesToUpdate[&EDT].push_back(&E); - else if (E.isKeepAlive() && !EDT.isLive()) - Worklist.push_back(&EDT); - } - } - - // Propagate live flags to all atoms reachable from the initial live set. + // Propagate live flags to all symbols reachable from the initial live set. while (!Worklist.empty()) { - DefinedAtom &NextLive = *Worklist.back(); + auto *Sym = Worklist.back(); Worklist.pop_back(); - assert(!NextLive.shouldDiscard() && - "should-discard nodes should never make it into the worklist"); + auto &B = Sym->getBlock(); - // If this atom has already been marked as live, or is marked to be - // discarded, then skip it. - if (NextLive.isLive()) + // Skip addressables that we've visited before. + if (VisitedBlocks.count(&B)) continue; - // Otherwise set it as live and add any non-live atoms that it points to - // to the worklist. - NextLive.setLive(true); - - for (auto &E : NextLive.edges()) { - if (!E.getTarget().isDefined()) - continue; + VisitedBlocks.insert(&B); - auto &EDT = static_cast(E.getTarget()); - - if (EDT.shouldDiscard()) - EdgesToUpdate[&EDT].push_back(&E); - else if (E.isKeepAlive() && !EDT.isLive()) - Worklist.push_back(&EDT); + for (auto &E : Sym->getBlock().edges()) { + if (E.getTarget().isDefined() && !E.getTarget().isLive()) { + E.getTarget().setLive(true); + Worklist.push_back(&E.getTarget()); + } } } - // Collect atoms to remove, then remove them from the graph. - std::vector AtomsToRemove; - for (auto *DA : G.defined_atoms()) - if (DA->shouldDiscard() || !DA->isLive()) - AtomsToRemove.push_back(DA); - - LLVM_DEBUG(dbgs() << "Pruning atoms:\n"); - for (auto *DA : AtomsToRemove) { - LLVM_DEBUG(dbgs() << " " << *DA << "... "); - - // Check whether we need to replace this atom with an external atom. - // - // We replace if all of the following hold: - // (1) The atom is marked should-discard, - // (2) it has live edges (i.e. edges from live atoms) pointing to it. - // - // Otherwise we simply delete the atom. - - G.removeDefinedAtom(*DA); - - auto EdgesToUpdateItr = EdgesToUpdate.find(DA); - if (EdgesToUpdateItr != EdgesToUpdate.end()) { - auto &ExternalReplacement = G.addExternalAtom(DA->getName()); - for (auto *EdgeToUpdate : EdgesToUpdateItr->second) - EdgeToUpdate->setTarget(ExternalReplacement); - LLVM_DEBUG(dbgs() << "replaced with " << ExternalReplacement << "\n"); - } else - LLVM_DEBUG(dbgs() << "deleted\n"); + // Collect all the symbols to remove, then remove them. + { + LLVM_DEBUG(dbgs() << "Dead-stripping symbols:\n"); + std::vector SymbolsToRemove; + for (auto *Sym : G.defined_symbols()) + if (!Sym->isLive()) + SymbolsToRemove.push_back(Sym); + for (auto *Sym : SymbolsToRemove) { + LLVM_DEBUG(dbgs() << " " << *Sym << "...\n"); + G.removeDefinedSymbol(*Sym); + } } - // Finally, discard any absolute symbols that were marked should-discard. + // Delete any unused blocks. { - std::vector AbsoluteAtomsToRemove; - for (auto *A : G.absolute_atoms()) - if (A->shouldDiscard() || A->isLive()) - AbsoluteAtomsToRemove.push_back(A); - for (auto *A : AbsoluteAtomsToRemove) - G.removeAbsoluteAtom(*A); + LLVM_DEBUG(dbgs() << "Dead-stripping blocks:\n"); + std::vector BlocksToRemove; + for (auto *B : G.blocks()) + if (!VisitedBlocks.count(B)) + BlocksToRemove.push_back(B); + for (auto *B : BlocksToRemove) { + LLVM_DEBUG(dbgs() << " " << *B << "...\n"); + G.removeBlock(*B); + } } } diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h index e6fd6e38f7a6..07dee6cee200 100644 --- a/lib/ExecutionEngine/JITLink/JITLinkGeneric.h +++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h @@ -41,39 +41,32 @@ public: protected: struct SegmentLayout { - using SectionAtomsList = std::vector; - struct SectionLayout { - SectionLayout(Section &S) : S(&S) {} + using BlocksList = std::vector; - Section *S; - SectionAtomsList Atoms; - }; - - using SectionLayoutList = std::vector; - - SectionLayoutList ContentSections; - SectionLayoutList ZeroFillSections; + BlocksList ContentBlocks; + BlocksList ZeroFillBlocks; }; using SegmentLayoutMap = DenseMap; // Phase 1: - // 1.1: Build atom graph + // 1.1: Build link graph // 1.2: Run pre-prune passes // 1.2: Prune graph // 1.3: Run post-prune passes - // 1.4: Sort atoms into segments + // 1.4: Sort blocks into segments // 1.5: Allocate segment memory // 1.6: Identify externals and make an async call to resolve function void linkPhase1(std::unique_ptr Self); // Phase 2: // 2.1: Apply resolution results - // 2.2: Fix up atom contents + // 2.2: Fix up block contents // 2.3: Call OnResolved callback // 2.3: Make an async call to transfer and finalize memory. void linkPhase2(std::unique_ptr Self, - Expected LookupResult); + Expected LookupResult, + SegmentLayoutMap Layout); // Phase 3: // 3.1: Call OnFinalized callback, handing off allocation. @@ -81,24 +74,37 @@ protected: // Build a graph from the given object buffer. // To be implemented by the client. - virtual Expected> + virtual Expected> buildGraph(MemoryBufferRef ObjBuffer) = 0; - // For debug dumping of the atom graph. + // For debug dumping of the link graph. virtual StringRef getEdgeKindName(Edge::Kind K) const = 0; + // Alight a JITTargetAddress to conform with block alignment requirements. + static JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) { + uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment(); + return Addr + Delta; + } + + // Alight a pointer to conform with block alignment requirements. + static char *alignToBlock(char *P, Block &B) { + uint64_t PAddr = static_cast(reinterpret_cast(P)); + uint64_t Delta = (B.getAlignmentOffset() - PAddr) % B.getAlignment(); + return P + Delta; + } + private: // Run all passes in the given pass list, bailing out immediately if any pass // returns an error. - Error runPasses(AtomGraphPassList &Passes, AtomGraph &G); + Error runPasses(LinkGraphPassList &Passes); - // Copy atom contents and apply relocations. + // Copy block contents and apply relocations. // Implemented in JITLinker. virtual Error - copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout, - JITLinkMemoryManager::Allocation &Alloc) const = 0; + copyAndFixUpBlocks(const SegmentLayoutMap &Layout, + JITLinkMemoryManager::Allocation &Alloc) const = 0; - void layOutAtoms(); + SegmentLayoutMap layOutBlocks(); Error allocateSegments(const SegmentLayoutMap &Layout); DenseSet getExternalSymbolNames() const; void applyLookupResult(AsyncLookupResult LR); @@ -108,8 +114,7 @@ private: std::unique_ptr Ctx; PassConfiguration Passes; - std::unique_ptr G; - SegmentLayoutMap Layout; + std::unique_ptr G; std::unique_ptr Alloc; }; @@ -121,7 +126,7 @@ public: /// Link should be called with the constructor arguments for LinkerImpl, which /// will be forwarded to the constructor. template static void link(ArgTs &&... Args) { - auto L = llvm::make_unique(std::forward(Args)...); + auto L = std::make_unique(std::forward(Args)...); // Ownership of the linker is passed into the linker's doLink function to // allow it to be passed on to async continuations. @@ -140,17 +145,17 @@ private: } Error - copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout, - JITLinkMemoryManager::Allocation &Alloc) const override { - LLVM_DEBUG(dbgs() << "Copying and fixing up atoms:\n"); + copyAndFixUpBlocks(const SegmentLayoutMap &Layout, + JITLinkMemoryManager::Allocation &Alloc) const override { + LLVM_DEBUG(dbgs() << "Copying and fixing up blocks:\n"); for (auto &KV : Layout) { auto &Prot = KV.first; auto &SegLayout = KV.second; auto SegMem = Alloc.getWorkingMemory( static_cast(Prot)); - char *LastAtomEnd = SegMem.data(); - char *AtomDataPtr = LastAtomEnd; + char *LastBlockEnd = SegMem.data(); + char *BlockDataPtr = LastBlockEnd; LLVM_DEBUG({ dbgs() << " Processing segment " @@ -160,93 +165,79 @@ private: << " ]\n Processing content sections:\n"; }); - for (auto &SI : SegLayout.ContentSections) { - LLVM_DEBUG(dbgs() << " " << SI.S->getName() << ":\n"); + for (auto *B : SegLayout.ContentBlocks) { + LLVM_DEBUG(dbgs() << " " << *B << ":\n"); + + // Pad to alignment/alignment-offset. + BlockDataPtr = alignToBlock(BlockDataPtr, *B); - AtomDataPtr += alignmentAdjustment(AtomDataPtr, SI.S->getAlignment()); + LLVM_DEBUG({ + dbgs() << " Bumped block pointer to " + << (const void *)BlockDataPtr << " to meet block alignment " + << B->getAlignment() << " and alignment offset " + << B->getAlignmentOffset() << "\n"; + }); + // Zero pad up to alignment. LLVM_DEBUG({ - dbgs() << " Bumped atom pointer to " << (const void *)AtomDataPtr - << " to meet section alignment " - << " of " << SI.S->getAlignment() << "\n"; + if (LastBlockEnd != BlockDataPtr) + dbgs() << " Zero padding from " << (const void *)LastBlockEnd + << " to " << (const void *)BlockDataPtr << "\n"; }); - for (auto *DA : SI.Atoms) { - - // Align. - AtomDataPtr += alignmentAdjustment(AtomDataPtr, DA->getAlignment()); - LLVM_DEBUG({ - dbgs() << " Bumped atom pointer to " - << (const void *)AtomDataPtr << " to meet alignment of " - << DA->getAlignment() << "\n"; - }); - - // Zero pad up to alignment. - LLVM_DEBUG({ - if (LastAtomEnd != AtomDataPtr) - dbgs() << " Zero padding from " << (const void *)LastAtomEnd - << " to " << (const void *)AtomDataPtr << "\n"; - }); - while (LastAtomEnd != AtomDataPtr) - *LastAtomEnd++ = 0; - - // Copy initial atom content. - LLVM_DEBUG({ - dbgs() << " Copying atom " << *DA << " content, " - << DA->getContent().size() << " bytes, from " - << (const void *)DA->getContent().data() << " to " - << (const void *)AtomDataPtr << "\n"; - }); - memcpy(AtomDataPtr, DA->getContent().data(), DA->getContent().size()); - - // Copy atom data and apply fixups. - LLVM_DEBUG(dbgs() << " Applying fixups.\n"); - for (auto &E : DA->edges()) { - - // Skip non-relocation edges. - if (!E.isRelocation()) - continue; - - // Dispatch to LinkerImpl for fixup. - if (auto Err = impl().applyFixup(*DA, E, AtomDataPtr)) - return Err; - } - - // Point the atom's content to the fixed up buffer. - DA->setContent(StringRef(AtomDataPtr, DA->getContent().size())); - - // Update atom end pointer. - LastAtomEnd = AtomDataPtr + DA->getContent().size(); - AtomDataPtr = LastAtomEnd; + while (LastBlockEnd != BlockDataPtr) + *LastBlockEnd++ = 0; + + // Copy initial block content. + LLVM_DEBUG({ + dbgs() << " Copying block " << *B << " content, " + << B->getContent().size() << " bytes, from " + << (const void *)B->getContent().data() << " to " + << (const void *)BlockDataPtr << "\n"; + }); + memcpy(BlockDataPtr, B->getContent().data(), B->getContent().size()); + + // Copy Block data and apply fixups. + LLVM_DEBUG(dbgs() << " Applying fixups.\n"); + for (auto &E : B->edges()) { + + // Skip non-relocation edges. + if (!E.isRelocation()) + continue; + + // Dispatch to LinkerImpl for fixup. + if (auto Err = impl().applyFixup(*B, E, BlockDataPtr)) + return Err; } + + // Point the block's content to the fixed up buffer. + B->setContent(StringRef(BlockDataPtr, B->getContent().size())); + + // Update block end pointer. + LastBlockEnd = BlockDataPtr + B->getContent().size(); + BlockDataPtr = LastBlockEnd; } // Zero pad the rest of the segment. LLVM_DEBUG({ dbgs() << " Zero padding end of segment from " - << (const void *)LastAtomEnd << " to " + << (const void *)LastBlockEnd << " to " << (const void *)((char *)SegMem.data() + SegMem.size()) << "\n"; }); - while (LastAtomEnd != SegMem.data() + SegMem.size()) - *LastAtomEnd++ = 0; + while (LastBlockEnd != SegMem.data() + SegMem.size()) + *LastBlockEnd++ = 0; } return Error::success(); } }; -/// Dead strips and replaces discarded definitions with external atoms. +/// Removes dead symbols/blocks/addressables. /// -/// Finds the set of nodes reachable from any node initially marked live -/// (nodes marked should-discard are treated as not live, even if they are -/// reachable). All nodes not marked as live at the end of this process, -/// are deleted. Nodes that are live, but marked should-discard are replaced -/// with external atoms and all edges to them are re-written. -void prune(AtomGraph &G); - -Error addEHFrame(AtomGraph &G, Section &EHFrameSection, - StringRef EHFrameContent, JITTargetAddress EHFrameAddress, - Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind); +/// Finds the set of symbols and addressables reachable from any symbol +/// initially marked live. All symbols/addressables not marked live at the end +/// of this process are removed. +void prune(LinkGraph &G); } // end namespace jitlink } // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 267307cfde05..9e0d207e8bdb 100644 --- a/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -38,9 +38,21 @@ InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) { OnFinalize(applyProtections()); } Error deallocate() override { - for (auto &KV : SegBlocks) - if (auto EC = sys::Memory::releaseMappedMemory(KV.second)) - return errorCodeToError(EC); + if (SegBlocks.empty()) + return Error::success(); + void *SlabStart = SegBlocks.begin()->second.base(); + char *SlabEnd = (char *)SlabStart; + for (auto &KV : SegBlocks) { + SlabStart = std::min(SlabStart, KV.second.base()); + SlabEnd = std::max(SlabEnd, (char *)(KV.second.base()) + + KV.second.allocatedSize()); + } + size_t SlabSize = SlabEnd - (char *)SlabStart; + assert((SlabSize % sys::Process::getPageSizeEstimate()) == 0 && + "Slab size is not a multiple of page size"); + sys::MemoryBlock Slab(SlabStart, SlabSize); + if (auto EC = sys::Memory::releaseMappedMemory(Slab)) + return errorCodeToError(EC); return Error::success(); } @@ -61,37 +73,52 @@ InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) { AllocationMap SegBlocks; }; + if (!isPowerOf2_64((uint64_t)sys::Process::getPageSizeEstimate())) + return make_error("Page size is not a power of 2", + inconvertibleErrorCode()); + AllocationMap Blocks; const sys::Memory::ProtectionFlags ReadWrite = static_cast(sys::Memory::MF_READ | sys::Memory::MF_WRITE); + // Compute the total number of pages to allocate. + size_t TotalSize = 0; for (auto &KV : Request) { - auto &Seg = KV.second; + const auto &Seg = KV.second; - if (Seg.getContentAlignment() > sys::Process::getPageSizeEstimate()) + if (Seg.getAlignment() > sys::Process::getPageSizeEstimate()) return make_error("Cannot request higher than page " "alignment", inconvertibleErrorCode()); - if (sys::Process::getPageSizeEstimate() % Seg.getContentAlignment() != 0) - return make_error("Page size is not a multiple of " - "alignment", - inconvertibleErrorCode()); + TotalSize = alignTo(TotalSize, sys::Process::getPageSizeEstimate()); + TotalSize += Seg.getContentSize(); + TotalSize += Seg.getZeroFillSize(); + } + + // Allocate one slab to cover all the segments. + std::error_code EC; + auto SlabRemaining = + sys::Memory::allocateMappedMemory(TotalSize, nullptr, ReadWrite, EC); + + if (EC) + return errorCodeToError(EC); + + // Allocate segment memory from the slab. + for (auto &KV : Request) { - uint64_t ZeroFillStart = - alignTo(Seg.getContentSize(), Seg.getZeroFillAlignment()); - uint64_t SegmentSize = ZeroFillStart + Seg.getZeroFillSize(); + const auto &Seg = KV.second; - std::error_code EC; - auto SegMem = - sys::Memory::allocateMappedMemory(SegmentSize, nullptr, ReadWrite, EC); + uint64_t SegmentSize = alignTo(Seg.getContentSize() + Seg.getZeroFillSize(), + sys::Process::getPageSizeEstimate()); - if (EC) - return errorCodeToError(EC); + sys::MemoryBlock SegMem(SlabRemaining.base(), SegmentSize); + SlabRemaining = sys::MemoryBlock((char *)SlabRemaining.base() + SegmentSize, + SegmentSize); // Zero out the zero-fill memory. - memset(static_cast(SegMem.base()) + ZeroFillStart, 0, + memset(static_cast(SegMem.base()) + Seg.getContentSize(), 0, Seg.getZeroFillSize()); // Record the block for this segment. diff --git a/lib/ExecutionEngine/JITLink/MachO.cpp b/lib/ExecutionEngine/JITLink/MachO.cpp index 15995b8ce98f..58bc0f56e155 100644 --- a/lib/ExecutionEngine/JITLink/MachO.cpp +++ b/lib/ExecutionEngine/JITLink/MachO.cpp @@ -14,6 +14,7 @@ #include "llvm/ExecutionEngine/JITLink/MachO.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h" #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" @@ -64,6 +65,8 @@ void jitLink_MachO(std::unique_ptr Ctx) { }); switch (Header.cputype) { + case MachO::CPU_TYPE_ARM64: + return jitLink_MachO_arm64(std::move(Ctx)); case MachO::CPU_TYPE_X86_64: return jitLink_MachO_x86_64(std::move(Ctx)); } diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp deleted file mode 100644 index 1501c7ad0bc5..000000000000 --- a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp +++ /dev/null @@ -1,411 +0,0 @@ -//=--------- MachOAtomGraphBuilder.cpp - MachO AtomGraph builder ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Generic MachO AtomGraph buliding code. -// -//===----------------------------------------------------------------------===// - -#include "MachOAtomGraphBuilder.h" - -#define DEBUG_TYPE "jitlink" - -namespace llvm { -namespace jitlink { - -MachOAtomGraphBuilder::~MachOAtomGraphBuilder() {} - -Expected> MachOAtomGraphBuilder::buildGraph() { - if (auto Err = parseSections()) - return std::move(Err); - - if (auto Err = addAtoms()) - return std::move(Err); - - if (auto Err = addRelocations()) - return std::move(Err); - - return std::move(G); -} - -MachOAtomGraphBuilder::MachOAtomGraphBuilder(const object::MachOObjectFile &Obj) - : Obj(Obj), - G(llvm::make_unique(Obj.getFileName(), getPointerSize(Obj), - getEndianness(Obj))) {} - -void MachOAtomGraphBuilder::addCustomAtomizer(StringRef SectionName, - CustomAtomizeFunction Atomizer) { - assert(!CustomAtomizeFunctions.count(SectionName) && - "Custom atomizer for this section already exists"); - CustomAtomizeFunctions[SectionName] = std::move(Atomizer); -} - -bool MachOAtomGraphBuilder::areLayoutLocked(const Atom &A, const Atom &B) { - // If these atoms are the same then they're trivially "locked". - if (&A == &B) - return true; - - // If A and B are different, check whether either is undefined. (in which - // case they are not locked). - if (!A.isDefined() || !B.isDefined()) - return false; - - // A and B are different, but they're both defined atoms. We need to check - // whether they're part of the same alt_entry chain. - auto &DA = static_cast(A); - auto &DB = static_cast(B); - - auto AStartItr = AltEntryStarts.find(&DA); - if (AStartItr == AltEntryStarts.end()) // If A is not in a chain bail out. - return false; - - auto BStartItr = AltEntryStarts.find(&DB); - if (BStartItr == AltEntryStarts.end()) // If B is not in a chain bail out. - return false; - - // A and B are layout locked if they're in the same chain. - return AStartItr->second == BStartItr->second; -} - -unsigned -MachOAtomGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) { - return Obj.is64Bit() ? 8 : 4; -} - -support::endianness -MachOAtomGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) { - return Obj.isLittleEndian() ? support::little : support::big; -} - -MachOAtomGraphBuilder::MachOSection &MachOAtomGraphBuilder::getCommonSection() { - if (!CommonSymbolsSection) { - auto Prot = static_cast( - sys::Memory::MF_READ | sys::Memory::MF_WRITE); - auto &GenericSection = G->createSection("", 1, Prot, true); - CommonSymbolsSection = MachOSection(GenericSection); - } - return *CommonSymbolsSection; -} - -Error MachOAtomGraphBuilder::parseSections() { - for (auto &SecRef : Obj.sections()) { - assert((SecRef.getAlignment() <= std::numeric_limits::max()) && - "Section alignment does not fit in 32 bits"); - - StringRef Name; - if (auto EC = SecRef.getName(Name)) - return errorCodeToError(EC); - - unsigned SectionIndex = SecRef.getIndex() + 1; - - uint32_t Align = SecRef.getAlignment(); - if (!isPowerOf2_32(Align)) - return make_error("Section " + Name + - " has non-power-of-2 " - "alignment"); - - // FIXME: Get real section permissions - // How, exactly, on MachO? - sys::Memory::ProtectionFlags Prot; - if (SecRef.isText()) - Prot = static_cast(sys::Memory::MF_READ | - sys::Memory::MF_EXEC); - else - Prot = static_cast(sys::Memory::MF_READ | - sys::Memory::MF_WRITE); - - auto &GenericSection = G->createSection(Name, Align, Prot, SecRef.isBSS()); - - LLVM_DEBUG({ - dbgs() << "Adding section " << Name << ": " - << format("0x%016" PRIx64, SecRef.getAddress()) - << ", align: " << SecRef.getAlignment() << "\n"; - }); - - assert(!Sections.count(SectionIndex) && "Section index already in use"); - - auto &MachOSec = - Sections - .try_emplace(SectionIndex, GenericSection, SecRef.getAddress(), - SecRef.getAlignment()) - .first->second; - - if (!SecRef.isVirtual()) { - // If this section has content then record it. - Expected Content = SecRef.getContents(); - if (!Content) - return Content.takeError(); - if (Content->size() != SecRef.getSize()) - return make_error("Section content size does not match " - "declared size for " + - Name); - MachOSec.setContent(*Content); - } else { - // If this is a zero-fill section then just record the size. - MachOSec.setZeroFill(SecRef.getSize()); - } - - uint32_t SectionFlags = - Obj.is64Bit() ? Obj.getSection64(SecRef.getRawDataRefImpl()).flags - : Obj.getSection(SecRef.getRawDataRefImpl()).flags; - - MachOSec.setNoDeadStrip(SectionFlags & MachO::S_ATTR_NO_DEAD_STRIP); - } - - return Error::success(); -} - -// Adds atoms with identified start addresses (but not lengths) for all named -// atoms. -// Also, for every section that contains named atoms, but does not have an -// atom at offset zero of that section, constructs an anonymous atom covering -// that range. -Error MachOAtomGraphBuilder::addNonCustomAtoms() { - using AddrToAtomMap = std::map; - DenseMap SecToAtoms; - - DenseMap FirstOrdinal; - std::vector AltEntryAtoms; - - DenseSet ProcessedSymbols; // Used to check for duplicate defs. - - for (auto SymI = Obj.symbol_begin(), SymE = Obj.symbol_end(); SymI != SymE; - ++SymI) { - object::SymbolRef Sym(SymI->getRawDataRefImpl(), &Obj); - - auto Name = Sym.getName(); - if (!Name) - return Name.takeError(); - - // Bail out on duplicate definitions: There should never be more than one - // definition for a symbol in a given object file. - if (ProcessedSymbols.count(*Name)) - return make_error("Duplicate definition within object: " + - *Name); - else - ProcessedSymbols.insert(*Name); - - auto Addr = Sym.getAddress(); - if (!Addr) - return Addr.takeError(); - - auto SymType = Sym.getType(); - if (!SymType) - return SymType.takeError(); - - auto Flags = Sym.getFlags(); - - if (Flags & object::SymbolRef::SF_Undefined) { - LLVM_DEBUG(dbgs() << "Adding undef atom \"" << *Name << "\"\n"); - G->addExternalAtom(*Name); - continue; - } else if (Flags & object::SymbolRef::SF_Absolute) { - LLVM_DEBUG(dbgs() << "Adding absolute \"" << *Name << "\" addr: " - << format("0x%016" PRIx64, *Addr) << "\n"); - auto &A = G->addAbsoluteAtom(*Name, *Addr); - A.setGlobal(Flags & object::SymbolRef::SF_Global); - A.setExported(Flags & object::SymbolRef::SF_Exported); - A.setWeak(Flags & object::SymbolRef::SF_Weak); - continue; - } else if (Flags & object::SymbolRef::SF_Common) { - LLVM_DEBUG({ - dbgs() << "Adding common \"" << *Name - << "\" addr: " << format("0x%016" PRIx64, *Addr) << "\n"; - }); - auto &A = - G->addCommonAtom(getCommonSection().getGenericSection(), *Name, *Addr, - std::max(Sym.getAlignment(), 1U), - Obj.getCommonSymbolSize(Sym.getRawDataRefImpl())); - A.setGlobal(Flags & object::SymbolRef::SF_Global); - A.setExported(Flags & object::SymbolRef::SF_Exported); - continue; - } - - LLVM_DEBUG(dbgs() << "Adding defined atom \"" << *Name << "\"\n"); - - // This atom is neither undefined nor absolute, so it must be defined in - // this object. Get its section index. - auto SecItr = Sym.getSection(); - if (!SecItr) - return SecItr.takeError(); - - uint64_t SectionIndex = (*SecItr)->getIndex() + 1; - - LLVM_DEBUG(dbgs() << " to section index " << SectionIndex << "\n"); - - auto SecByIndexItr = Sections.find(SectionIndex); - if (SecByIndexItr == Sections.end()) - return make_error("Unrecognized section index in macho"); - - auto &Sec = SecByIndexItr->second; - - auto &DA = G->addDefinedAtom(Sec.getGenericSection(), *Name, *Addr, - std::max(Sym.getAlignment(), 1U)); - - DA.setGlobal(Flags & object::SymbolRef::SF_Global); - DA.setExported(Flags & object::SymbolRef::SF_Exported); - DA.setWeak(Flags & object::SymbolRef::SF_Weak); - - DA.setCallable(*SymType & object::SymbolRef::ST_Function); - - // Check NDesc flags. - { - uint16_t NDesc = 0; - if (Obj.is64Bit()) - NDesc = Obj.getSymbol64TableEntry(SymI->getRawDataRefImpl()).n_desc; - else - NDesc = Obj.getSymbolTableEntry(SymI->getRawDataRefImpl()).n_desc; - - // Record atom for alt-entry post-processing (where the layout-next - // constraints will be added). - if (NDesc & MachO::N_ALT_ENTRY) - AltEntryAtoms.push_back(&DA); - - // If this atom has a no-dead-strip attr attached then mark it live. - if (NDesc & MachO::N_NO_DEAD_STRIP) - DA.setLive(true); - } - - LLVM_DEBUG({ - dbgs() << " Added " << *Name - << " addr: " << format("0x%016" PRIx64, *Addr) - << ", align: " << DA.getAlignment() - << ", section: " << Sec.getGenericSection().getName() << "\n"; - }); - - auto &SecAtoms = SecToAtoms[&Sec]; - SecAtoms[DA.getAddress() - Sec.getAddress()] = &DA; - } - - // Add anonymous atoms. - for (auto &KV : Sections) { - auto &S = KV.second; - - // Skip empty sections. - if (S.empty()) - continue; - - // Skip sections with custom handling. - if (CustomAtomizeFunctions.count(S.getName())) - continue; - - auto SAI = SecToAtoms.find(&S); - - // If S is not in the SecToAtoms map then it contained no named atom. Add - // one anonymous atom to cover the whole section. - if (SAI == SecToAtoms.end()) { - SecToAtoms[&S][0] = &G->addAnonymousAtom( - S.getGenericSection(), S.getAddress(), S.getAlignment()); - continue; - } - - // Otherwise, check whether this section had an atom covering offset zero. - // If not, add one. - auto &SecAtoms = SAI->second; - if (!SecAtoms.count(0)) - SecAtoms[0] = &G->addAnonymousAtom(S.getGenericSection(), S.getAddress(), - S.getAlignment()); - } - - LLVM_DEBUG(dbgs() << "MachOGraphBuilder setting atom content\n"); - - // Set atom contents and any section-based flags. - for (auto &KV : SecToAtoms) { - auto &S = *KV.first; - auto &SecAtoms = KV.second; - - // Iterate the atoms in reverse order and set up their contents. - JITTargetAddress LastAtomAddr = S.getSize(); - for (auto I = SecAtoms.rbegin(), E = SecAtoms.rend(); I != E; ++I) { - auto Offset = I->first; - auto &A = *I->second; - LLVM_DEBUG({ - dbgs() << " " << A << " to [ " << S.getAddress() + Offset << " .. " - << S.getAddress() + LastAtomAddr << " ]\n"; - }); - - if (S.isZeroFill()) - A.setZeroFill(LastAtomAddr - Offset); - else - A.setContent(S.getContent().substr(Offset, LastAtomAddr - Offset)); - - // If the section has no-dead-strip set then mark the atom as live. - if (S.isNoDeadStrip()) - A.setLive(true); - - LastAtomAddr = Offset; - } - } - - LLVM_DEBUG(dbgs() << "Adding alt-entry starts\n"); - - // Sort alt-entry atoms by address in ascending order. - llvm::sort(AltEntryAtoms.begin(), AltEntryAtoms.end(), - [](const DefinedAtom *LHS, const DefinedAtom *RHS) { - return LHS->getAddress() < RHS->getAddress(); - }); - - // Process alt-entry atoms in address order to build the table of alt-entry - // atoms to alt-entry chain starts. - for (auto *DA : AltEntryAtoms) { - assert(!AltEntryStarts.count(DA) && "Duplicate entry in AltEntryStarts"); - - // DA is an alt-entry atom. Look for the predecessor atom that it is locked - // to, bailing out if we do not find one. - auto AltEntryPred = G->findAtomByAddress(DA->getAddress() - 1); - if (!AltEntryPred) - return AltEntryPred.takeError(); - - // Add a LayoutNext edge from the predecessor to this atom. - AltEntryPred->setLayoutNext(*DA); - - // Check to see whether the predecessor itself is an alt-entry atom. - auto AltEntryStartItr = AltEntryStarts.find(&*AltEntryPred); - if (AltEntryStartItr != AltEntryStarts.end()) { - // If the predecessor was an alt-entry atom then re-use its value. - LLVM_DEBUG({ - dbgs() << " " << *DA << " -> " << *AltEntryStartItr->second - << " (based on existing entry for " << *AltEntryPred << ")\n"; - }); - AltEntryStarts[DA] = AltEntryStartItr->second; - } else { - // If the predecessor does not have an entry then add an entry for this - // atom (i.e. the alt_entry atom) and a self-reference entry for the - /// predecessory atom that is the start of this chain. - LLVM_DEBUG({ - dbgs() << " " << *AltEntryPred << " -> " << *AltEntryPred << "\n" - << " " << *DA << " -> " << *AltEntryPred << "\n"; - }); - AltEntryStarts[&*AltEntryPred] = &*AltEntryPred; - AltEntryStarts[DA] = &*AltEntryPred; - } - } - - return Error::success(); -} - -Error MachOAtomGraphBuilder::addAtoms() { - // Add all named atoms. - if (auto Err = addNonCustomAtoms()) - return Err; - - // Process special sections. - for (auto &KV : Sections) { - auto &S = KV.second; - auto HI = CustomAtomizeFunctions.find(S.getGenericSection().getName()); - if (HI != CustomAtomizeFunctions.end()) { - auto &Atomize = HI->second; - if (auto Err = Atomize(S)) - return Err; - } - } - - return Error::success(); -} - -} // end namespace jitlink -} // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h deleted file mode 100644 index 72d441b24d06..000000000000 --- a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h +++ /dev/null @@ -1,138 +0,0 @@ -//===----- MachOAtomGraphBuilder.h - MachO AtomGraph builder ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Generic MachO AtomGraph building code. -// -//===----------------------------------------------------------------------===// - -#ifndef LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H -#define LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H - -#include "llvm/ExecutionEngine/JITLink/JITLink.h" - -#include "JITLinkGeneric.h" - -#include "llvm/Object/MachO.h" - -namespace llvm { -namespace jitlink { - -class MachOAtomGraphBuilder { -public: - virtual ~MachOAtomGraphBuilder(); - Expected> buildGraph(); - -protected: - using OffsetToAtomMap = std::map; - - class MachOSection { - public: - MachOSection() = default; - - /// Create a MachO section with the given address and alignment. - MachOSection(Section &GenericSection, JITTargetAddress Address, - unsigned Alignment) - : Address(Address), GenericSection(&GenericSection), - Alignment(Alignment) {} - - /// Create a section without address, content or size (used for common - /// symbol sections). - MachOSection(Section &GenericSection) : GenericSection(&GenericSection) {} - - Section &getGenericSection() const { - assert(GenericSection && "Section is null"); - return *GenericSection; - } - - StringRef getName() const { - assert(GenericSection && "No generic section attached"); - return GenericSection->getName(); - } - - MachOSection &setContent(StringRef Content) { - assert(!ContentPtr && !Size && "Content/zeroFill already set"); - ContentPtr = Content.data(); - Size = Content.size(); - return *this; - } - - MachOSection &setZeroFill(uint64_t Size) { - assert(!ContentPtr && !this->Size && "Content/zeroFill already set"); - this->Size = Size; - return *this; - } - - bool isZeroFill() const { return !ContentPtr; } - - bool empty() const { return getSize() == 0; } - - size_t getSize() const { return Size; } - - StringRef getContent() const { - assert(ContentPtr && "getContent() called on zero-fill section"); - return {ContentPtr, static_cast(Size)}; - } - - JITTargetAddress getAddress() const { return Address; } - - unsigned getAlignment() const { return Alignment; } - - MachOSection &setNoDeadStrip(bool NoDeadStrip) { - this->NoDeadStrip = NoDeadStrip; - return *this; - } - - bool isNoDeadStrip() const { return NoDeadStrip; } - - private: - JITTargetAddress Address = 0; - Section *GenericSection = nullptr; - const char *ContentPtr = nullptr; - uint64_t Size = 0; - unsigned Alignment = 0; - bool NoDeadStrip = false; - }; - - using CustomAtomizeFunction = std::function; - - MachOAtomGraphBuilder(const object::MachOObjectFile &Obj); - - AtomGraph &getGraph() const { return *G; } - - const object::MachOObjectFile &getObject() const { return Obj; } - - void addCustomAtomizer(StringRef SectionName, CustomAtomizeFunction Atomizer); - - virtual Error addRelocations() = 0; - - /// Returns true if Atom A and Atom B are at a fixed offset from one another - /// (i.e. if they're part of the same alt-entry chain). - bool areLayoutLocked(const Atom &A, const Atom &B); - -private: - static unsigned getPointerSize(const object::MachOObjectFile &Obj); - static support::endianness getEndianness(const object::MachOObjectFile &Obj); - - MachOSection &getCommonSection(); - - Error parseSections(); - Error addNonCustomAtoms(); - Error addAtoms(); - - const object::MachOObjectFile &Obj; - std::unique_ptr G; - DenseMap AltEntryStarts; - DenseMap Sections; - StringMap CustomAtomizeFunctions; - Optional CommonSymbolsSection; -}; - -} // end namespace jitlink -} // end namespace llvm - -#endif // LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H diff --git a/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp new file mode 100644 index 000000000000..7366f53ebf36 --- /dev/null +++ b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -0,0 +1,535 @@ +//=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic MachO LinkGraph buliding code. +// +//===----------------------------------------------------------------------===// + +#include "MachOLinkGraphBuilder.h" + +#define DEBUG_TYPE "jitlink" + +static const char *CommonSectionName = "__common"; + +namespace llvm { +namespace jitlink { + +MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {} + +Expected> MachOLinkGraphBuilder::buildGraph() { + + // Sanity check: we only operate on relocatable objects. + if (!Obj.isRelocatableObject()) + return make_error("Object is not a relocatable MachO"); + + if (auto Err = createNormalizedSections()) + return std::move(Err); + + if (auto Err = createNormalizedSymbols()) + return std::move(Err); + + if (auto Err = graphifyRegularSymbols()) + return std::move(Err); + + if (auto Err = graphifySectionsWithCustomParsers()) + return std::move(Err); + + if (auto Err = addRelocations()) + return std::move(Err); + + return std::move(G); +} + +MachOLinkGraphBuilder::MachOLinkGraphBuilder(const object::MachOObjectFile &Obj) + : Obj(Obj), + G(std::make_unique(Obj.getFileName(), getPointerSize(Obj), + getEndianness(Obj))) {} + +void MachOLinkGraphBuilder::addCustomSectionParser( + StringRef SectionName, SectionParserFunction Parser) { + assert(!CustomSectionParserFunctions.count(SectionName) && + "Custom parser for this section already exists"); + CustomSectionParserFunctions[SectionName] = std::move(Parser); +} + +Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) { + if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF)) + return Linkage::Weak; + return Linkage::Strong; +} + +Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) { + if (Name.startswith("l")) + return Scope::Local; + if (Type & MachO::N_PEXT) + return Scope::Hidden; + if (Type & MachO::N_EXT) + return Scope::Default; + return Scope::Local; +} + +bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) { + return NSym.Desc & MachO::N_ALT_ENTRY; +} + +unsigned +MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) { + return Obj.is64Bit() ? 8 : 4; +} + +support::endianness +MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) { + return Obj.isLittleEndian() ? support::little : support::big; +} + +Section &MachOLinkGraphBuilder::getCommonSection() { + if (!CommonSection) { + auto Prot = static_cast( + sys::Memory::MF_READ | sys::Memory::MF_WRITE); + CommonSection = &G->createSection(CommonSectionName, Prot); + } + return *CommonSection; +} + +Error MachOLinkGraphBuilder::createNormalizedSections() { + // Build normalized sections. Verifies that section data is in-range (for + // sections with content) and that address ranges are non-overlapping. + + LLVM_DEBUG(dbgs() << "Creating normalized sections...\n"); + + for (auto &SecRef : Obj.sections()) { + NormalizedSection NSec; + uint32_t DataOffset = 0; + + auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl()); + + auto Name = SecRef.getName(); + if (!Name) + return Name.takeError(); + + if (Obj.is64Bit()) { + const MachO::section_64 &Sec64 = + Obj.getSection64(SecRef.getRawDataRefImpl()); + + NSec.Address = Sec64.addr; + NSec.Size = Sec64.size; + NSec.Alignment = 1ULL << Sec64.align; + NSec.Flags = Sec64.flags; + DataOffset = Sec64.offset; + } else { + const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl()); + NSec.Address = Sec32.addr; + NSec.Size = Sec32.size; + NSec.Alignment = 1ULL << Sec32.align; + NSec.Flags = Sec32.flags; + DataOffset = Sec32.offset; + } + + LLVM_DEBUG({ + dbgs() << " " << *Name << ": " << formatv("{0:x16}", NSec.Address) + << " -- " << formatv("{0:x16}", NSec.Address + NSec.Size) + << ", align: " << NSec.Alignment << ", index: " << SecIndex + << "\n"; + }); + + // Get the section data if any. + { + unsigned SectionType = NSec.Flags & MachO::SECTION_TYPE; + if (SectionType != MachO::S_ZEROFILL && + SectionType != MachO::S_GB_ZEROFILL) { + + if (DataOffset + NSec.Size > Obj.getData().size()) + return make_error( + "Section data extends past end of file"); + + NSec.Data = Obj.getData().data() + DataOffset; + } + } + + // Get prot flags. + // FIXME: Make sure this test is correct (it's probably missing cases + // as-is). + sys::Memory::ProtectionFlags Prot; + if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) + Prot = static_cast(sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + else + Prot = static_cast(sys::Memory::MF_READ | + sys::Memory::MF_WRITE); + + NSec.GraphSection = &G->createSection(*Name, Prot); + IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec))); + } + + std::vector Sections; + Sections.reserve(IndexToSection.size()); + for (auto &KV : IndexToSection) + Sections.push_back(&KV.second); + + // If we didn't end up creating any sections then bail out. The code below + // assumes that we have at least one section. + if (Sections.empty()) + return Error::success(); + + llvm::sort(Sections, + [](const NormalizedSection *LHS, const NormalizedSection *RHS) { + assert(LHS && RHS && "Null section?"); + return LHS->Address < RHS->Address; + }); + + for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) { + auto &Cur = *Sections[I]; + auto &Next = *Sections[I + 1]; + if (Next.Address < Cur.Address + Cur.Size) + return make_error( + "Address range for section " + Cur.GraphSection->getName() + + formatv(" [ {0:x16} -- {1:x16} ] ", Cur.Address, + Cur.Address + Cur.Size) + + "overlaps " + + formatv(" [ {0:x16} -- {1:x16} ] ", Next.Address, + Next.Address + Next.Size)); + } + + return Error::success(); +} + +Error MachOLinkGraphBuilder::createNormalizedSymbols() { + LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n"); + + for (auto &SymRef : Obj.symbols()) { + + unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl()); + uint64_t Value; + uint32_t NStrX; + uint8_t Type; + uint8_t Sect; + uint16_t Desc; + + if (Obj.is64Bit()) { + const MachO::nlist_64 &NL64 = + Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl()); + Value = NL64.n_value; + NStrX = NL64.n_strx; + Type = NL64.n_type; + Sect = NL64.n_sect; + Desc = NL64.n_desc; + } else { + const MachO::nlist &NL32 = + Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl()); + Value = NL32.n_value; + NStrX = NL32.n_strx; + Type = NL32.n_type; + Sect = NL32.n_sect; + Desc = NL32.n_desc; + } + + // Skip stabs. + // FIXME: Are there other symbols we should be skipping? + if (Type & MachO::N_STAB) + continue; + + Optional Name; + if (NStrX) { + if (auto NameOrErr = SymRef.getName()) + Name = *NameOrErr; + else + return NameOrErr.takeError(); + } + + LLVM_DEBUG({ + dbgs() << " "; + if (!Name) + dbgs() << ""; + else + dbgs() << *Name; + dbgs() << ": value = " << formatv("{0:x16}", Value) + << ", type = " << formatv("{0:x2}", Type) + << ", desc = " << formatv("{0:x4}", Desc) << ", sect = "; + if (Sect) + dbgs() << static_cast(Sect - 1); + else + dbgs() << "none"; + dbgs() << "\n"; + }); + + // If this symbol has a section, sanity check that the addresses line up. + NormalizedSection *NSec = nullptr; + if (Sect != 0) { + if (auto NSecOrErr = findSectionByIndex(Sect - 1)) + NSec = &*NSecOrErr; + else + return NSecOrErr.takeError(); + + if (Value < NSec->Address || Value > NSec->Address + NSec->Size) + return make_error("Symbol address does not fall within " + "section"); + } + + IndexToSymbol[SymbolIndex] = + &createNormalizedSymbol(*Name, Value, Type, Sect, Desc, + getLinkage(Type), getScope(*Name, Type)); + } + + return Error::success(); +} + +void MachOLinkGraphBuilder::addSectionStartSymAndBlock( + Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size, + uint32_t Alignment, bool IsLive) { + Block &B = + Data ? G->createContentBlock(GraphSec, StringRef(Data, Size), Address, + Alignment, 0) + : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0); + auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive); + assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) && + "Anonymous block start symbol clashes with existing symbol address"); + AddrToCanonicalSymbol[Sym.getAddress()] = &Sym; +} + +Error MachOLinkGraphBuilder::graphifyRegularSymbols() { + + LLVM_DEBUG(dbgs() << "Creating graph symbols...\n"); + + /// We only have 256 section indexes: Use a vector rather than a map. + std::vector> SecIndexToSymbols; + SecIndexToSymbols.resize(256); + + // Create commons, externs, and absolutes, and partition all other symbols by + // section. + for (auto &KV : IndexToSymbol) { + auto &NSym = *KV.second; + + switch (NSym.Type & MachO::N_TYPE) { + case MachO::N_UNDF: + if (NSym.Value) { + if (!NSym.Name) + return make_error("Anonymous common symbol at index " + + Twine(KV.first)); + NSym.GraphSymbol = &G->addCommonSymbol( + *NSym.Name, NSym.S, getCommonSection(), NSym.Value, 0, + 1ull << MachO::GET_COMM_ALIGN(NSym.Desc), + NSym.Desc & MachO::N_NO_DEAD_STRIP); + } else { + if (!NSym.Name) + return make_error("Anonymous external symbol at " + "index " + + Twine(KV.first)); + NSym.GraphSymbol = &G->addExternalSymbol(*NSym.Name, 0); + } + break; + case MachO::N_ABS: + if (!NSym.Name) + return make_error("Anonymous absolute symbol at index " + + Twine(KV.first)); + NSym.GraphSymbol = &G->addAbsoluteSymbol( + *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default, + NSym.Desc & MachO::N_NO_DEAD_STRIP); + break; + case MachO::N_SECT: + SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym); + break; + case MachO::N_PBUD: + return make_error( + "Unupported N_PBUD symbol " + + (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("")) + + " at index " + Twine(KV.first)); + case MachO::N_INDR: + return make_error( + "Unupported N_INDR symbol " + + (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("")) + + " at index " + Twine(KV.first)); + default: + return make_error( + "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) + + " for symbol " + + (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("")) + + " at index " + Twine(KV.first)); + } + } + + // Loop over sections performing regular graphification for those that + // don't have custom parsers. + for (auto &KV : IndexToSection) { + auto SecIndex = KV.first; + auto &NSec = KV.second; + + // Skip sections with custom parsers. + if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) { + LLVM_DEBUG({ + dbgs() << " Skipping section " << NSec.GraphSection->getName() + << " as it has a custom parser.\n"; + }); + continue; + } else + LLVM_DEBUG({ + dbgs() << " Processing section " << NSec.GraphSection->getName() + << "...\n"; + }); + + bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP; + bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; + + auto &SecNSymStack = SecIndexToSymbols[SecIndex]; + + // If this section is non-empty but there are no symbols covering it then + // create one block and anonymous symbol to cover the entire section. + if (SecNSymStack.empty()) { + if (NSec.Size > 0) { + LLVM_DEBUG({ + dbgs() << " Section non-empty, but contains no symbols. " + "Creating anonymous block to cover " + << formatv("{0:x16}", NSec.Address) << " -- " + << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n"; + }); + addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data, + NSec.Size, NSec.Alignment, + SectionIsNoDeadStrip); + } else + LLVM_DEBUG({ + dbgs() << " Section empty and contains no symbols. Skipping.\n"; + }); + continue; + } + + // Sort the symbol stack in by address, alt-entry status, scope, and name. + // We sort in reverse order so that symbols will be visited in the right + // order when we pop off the stack below. + llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS, + const NormalizedSymbol *RHS) { + if (LHS->Value != RHS->Value) + return LHS->Value > RHS->Value; + if (isAltEntry(*LHS) != isAltEntry(*RHS)) + return isAltEntry(*RHS); + if (LHS->S != RHS->S) + return static_cast(LHS->S) < static_cast(RHS->S); + return LHS->Name < RHS->Name; + }); + + // The first symbol in a section can not be an alt-entry symbol. + if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back())) + return make_error( + "First symbol in " + NSec.GraphSection->getName() + " is alt-entry"); + + // If the section is non-empty but there is no symbol covering the start + // address then add an anonymous one. + if (SecNSymStack.back()->Value != NSec.Address) { + auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address; + LLVM_DEBUG({ + dbgs() << " Section start not covered by symbol. " + << "Creating anonymous block to cover [ " + << formatv("{0:x16}", NSec.Address) << " -- " + << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n"; + }); + addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data, + AnonBlockSize, NSec.Alignment, + SectionIsNoDeadStrip); + } + + // Visit section symbols in order by popping off the reverse-sorted stack, + // building blocks for each alt-entry chain and creating symbols as we go. + while (!SecNSymStack.empty()) { + SmallVector BlockSyms; + + BlockSyms.push_back(SecNSymStack.back()); + SecNSymStack.pop_back(); + while (!SecNSymStack.empty() && + (isAltEntry(*SecNSymStack.back()) || + SecNSymStack.back()->Value == BlockSyms.back()->Value)) { + BlockSyms.push_back(SecNSymStack.back()); + SecNSymStack.pop_back(); + } + + // BlockNSyms now contains the block symbols in reverse canonical order. + JITTargetAddress BlockStart = BlockSyms.front()->Value; + JITTargetAddress BlockEnd = SecNSymStack.empty() + ? NSec.Address + NSec.Size + : SecNSymStack.back()->Value; + JITTargetAddress BlockOffset = BlockStart - NSec.Address; + JITTargetAddress BlockSize = BlockEnd - BlockStart; + + LLVM_DEBUG({ + dbgs() << " Creating block for " << formatv("{0:x16}", BlockStart) + << " -- " << formatv("{0:x16}", BlockEnd) << ": " + << NSec.GraphSection->getName() << " + " + << formatv("{0:x16}", BlockOffset) << " with " + << BlockSyms.size() << " symbol(s)...\n"; + }); + + Block &B = + NSec.Data + ? G->createContentBlock( + *NSec.GraphSection, + StringRef(NSec.Data + BlockOffset, BlockSize), BlockStart, + NSec.Alignment, BlockStart % NSec.Alignment) + : G->createZeroFillBlock(*NSec.GraphSection, BlockSize, + BlockStart, NSec.Alignment, + BlockStart % NSec.Alignment); + + Optional LastCanonicalAddr; + JITTargetAddress SymEnd = BlockEnd; + while (!BlockSyms.empty()) { + auto &NSym = *BlockSyms.back(); + BlockSyms.pop_back(); + + bool SymLive = + (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; + + LLVM_DEBUG({ + dbgs() << " " << formatv("{0:x16}", NSym.Value) << " -- " + << formatv("{0:x16}", SymEnd) << ": "; + if (!NSym.Name) + dbgs() << ""; + else + dbgs() << NSym.Name; + if (SymLive) + dbgs() << " [no-dead-strip]"; + if (LastCanonicalAddr == NSym.Value) + dbgs() << " [non-canonical]"; + dbgs() << "\n"; + }); + + auto &Sym = + NSym.Name + ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name, + SymEnd - NSym.Value, NSym.L, NSym.S, + SectionIsText, SymLive) + : G->addAnonymousSymbol(B, NSym.Value - BlockStart, + SymEnd - NSym.Value, SectionIsText, + SymLive); + NSym.GraphSymbol = &Sym; + if (LastCanonicalAddr != Sym.getAddress()) { + if (LastCanonicalAddr) + SymEnd = *LastCanonicalAddr; + LastCanonicalAddr = Sym.getAddress(); + setCanonicalSymbol(Sym); + } + } + } + } + + return Error::success(); +} + +Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() { + // Graphify special sections. + for (auto &KV : IndexToSection) { + auto &NSec = KV.second; + + auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName()); + if (HI != CustomSectionParserFunctions.end()) { + auto &Parse = HI->second; + if (auto Err = Parse(NSec)) + return Err; + } + } + + return Error::success(); +} + +} // end namespace jitlink +} // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h new file mode 100644 index 000000000000..e1123cd11048 --- /dev/null +++ b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h @@ -0,0 +1,269 @@ +//===----- MachOLinkGraphBuilder.h - MachO LinkGraph builder ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic MachO LinkGraph building code. +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H +#define LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +#include "EHFrameSupportImpl.h" +#include "JITLinkGeneric.h" +#include "llvm/Object/MachO.h" + +#include + +namespace llvm { +namespace jitlink { + +class MachOLinkGraphBuilder { +public: + virtual ~MachOLinkGraphBuilder(); + Expected> buildGraph(); + +protected: + class MachOEHFrameBinaryParser : public EHFrameBinaryParser { + public: + MachOEHFrameBinaryParser(MachOLinkGraphBuilder &Builder, + JITTargetAddress EHFrameAddress, + StringRef EHFrameContent, Section &EHFrameSection, + uint64_t CIEAlignment, uint64_t FDEAlignment, + Edge::Kind FDEToCIERelocKind, + Edge::Kind FDEToTargetRelocKind) + : EHFrameBinaryParser(EHFrameAddress, EHFrameContent, + Builder.getGraph().getPointerSize(), + Builder.getGraph().getEndianness()), + Builder(Builder), EHFrameSection(EHFrameSection), + CIEAlignment(CIEAlignment), FDEAlignment(FDEAlignment), + FDEToCIERelocKind(FDEToCIERelocKind), + FDEToTargetRelocKind(FDEToTargetRelocKind) {} + + Symbol *getSymbolAtAddress(JITTargetAddress Address) override { + if (auto *Sym = Builder.getSymbolByAddress(Address)) + if (Sym->getAddress() == Address) + return Sym; + return nullptr; + } + + Symbol &createCIERecord(JITTargetAddress RecordAddr, + StringRef RecordContent) override { + auto &G = Builder.getGraph(); + auto &B = G.createContentBlock(EHFrameSection, RecordContent, RecordAddr, + CIEAlignment, 0); + auto &CIESymbol = + G.addAnonymousSymbol(B, 0, RecordContent.size(), false, false); + Builder.setCanonicalSymbol(CIESymbol); + return CIESymbol; + } + + Expected createFDERecord(JITTargetAddress RecordAddr, + StringRef RecordContent, Symbol &CIE, + size_t CIEOffset, Symbol &Func, + size_t FuncOffset, Symbol *LSDA, + size_t LSDAOffset) override { + auto &G = Builder.getGraph(); + auto &B = G.createContentBlock(EHFrameSection, RecordContent, RecordAddr, + FDEAlignment, 0); + + // Add edges to CIE, Func, and (conditionally) LSDA. + B.addEdge(FDEToCIERelocKind, CIEOffset, CIE, 0); + B.addEdge(FDEToTargetRelocKind, FuncOffset, Func, 0); + + if (LSDA) + B.addEdge(FDEToTargetRelocKind, LSDAOffset, *LSDA, 0); + + auto &FDESymbol = + G.addAnonymousSymbol(B, 0, RecordContent.size(), false, false); + + // Add a keep-alive relocation from the function to the FDE to ensure it + // is not dead stripped. + Func.getBlock().addEdge(Edge::KeepAlive, 0, FDESymbol, 0); + + return FDESymbol; + } + + private: + MachOLinkGraphBuilder &Builder; + Section &EHFrameSection; + uint64_t CIEAlignment; + uint64_t FDEAlignment; + Edge::Kind FDEToCIERelocKind; + Edge::Kind FDEToTargetRelocKind; + }; + + struct NormalizedSymbol { + friend class MachOLinkGraphBuilder; + + private: + NormalizedSymbol(Optional Name, uint64_t Value, uint8_t Type, + uint8_t Sect, uint16_t Desc, Linkage L, Scope S) + : Name(Name), Value(Value), Type(Type), Sect(Sect), Desc(Desc), L(L), + S(S) { + assert((!Name || !Name->empty()) && "Name must be none or non-empty"); + } + + public: + NormalizedSymbol(const NormalizedSymbol &) = delete; + NormalizedSymbol &operator=(const NormalizedSymbol &) = delete; + NormalizedSymbol(NormalizedSymbol &&) = delete; + NormalizedSymbol &operator=(NormalizedSymbol &&) = delete; + + Optional Name; + uint64_t Value = 0; + uint8_t Type = 0; + uint8_t Sect = 0; + uint16_t Desc = 0; + Linkage L = Linkage::Strong; + Scope S = Scope::Default; + Symbol *GraphSymbol = nullptr; + }; + + class NormalizedSection { + friend class MachOLinkGraphBuilder; + + private: + NormalizedSection() = default; + + public: + Section *GraphSection = nullptr; + uint64_t Address = 0; + uint64_t Size = 0; + uint64_t Alignment = 0; + uint32_t Flags = 0; + const char *Data = nullptr; + }; + + using SectionParserFunction = std::function; + + MachOLinkGraphBuilder(const object::MachOObjectFile &Obj); + + LinkGraph &getGraph() const { return *G; } + + const object::MachOObjectFile &getObject() const { return Obj; } + + void addCustomSectionParser(StringRef SectionName, + SectionParserFunction Parse); + + virtual Error addRelocations() = 0; + + /// Create a symbol. + template + NormalizedSymbol &createNormalizedSymbol(ArgTs &&... Args) { + NormalizedSymbol *Sym = reinterpret_cast( + Allocator.Allocate()); + new (Sym) NormalizedSymbol(std::forward(Args)...); + return *Sym; + } + + /// Index is zero-based (MachO section indexes are usually one-based) and + /// assumed to be in-range. Client is responsible for checking. + NormalizedSection &getSectionByIndex(unsigned Index) { + auto I = IndexToSection.find(Index); + assert(I != IndexToSection.end() && "No section recorded at index"); + return I->second; + } + + /// Try to get the section at the given index. Will return an error if the + /// given index is out of range, or if no section has been added for the given + /// index. + Expected findSectionByIndex(unsigned Index) { + auto I = IndexToSection.find(Index); + if (I == IndexToSection.end()) + return make_error("No section recorded for index " + + formatv("{0:u}", Index)); + return I->second; + } + + /// Try to get the symbol at the given index. Will return an error if the + /// given index is out of range, or if no symbol has been added for the given + /// index. + Expected findSymbolByIndex(uint64_t Index) { + if (Index >= IndexToSymbol.size()) + return make_error("Symbol index out of range"); + auto *Sym = IndexToSymbol[Index]; + if (!Sym) + return make_error("No symbol at index " + + formatv("{0:u}", Index)); + return *Sym; + } + + /// Returns the symbol with the highest address not greater than the search + /// address, or null if no such symbol exists. + Symbol *getSymbolByAddress(JITTargetAddress Address) { + auto I = AddrToCanonicalSymbol.upper_bound(Address); + if (I == AddrToCanonicalSymbol.begin()) + return nullptr; + return std::prev(I)->second; + } + + /// Returns the symbol with the highest address not greater than the search + /// address, or an error if no such symbol exists. + Expected findSymbolByAddress(JITTargetAddress Address) { + auto *Sym = getSymbolByAddress(Address); + if (Sym) + if (Address < Sym->getAddress() + Sym->getSize()) + return *Sym; + return make_error("No symbol covering address " + + formatv("{0:x16}", Address)); + } + + static Linkage getLinkage(uint16_t Desc); + static Scope getScope(StringRef Name, uint8_t Type); + static bool isAltEntry(const NormalizedSymbol &NSym); + +private: + static unsigned getPointerSize(const object::MachOObjectFile &Obj); + static support::endianness getEndianness(const object::MachOObjectFile &Obj); + + void setCanonicalSymbol(Symbol &Sym) { + auto *&CanonicalSymEntry = AddrToCanonicalSymbol[Sym.getAddress()]; + // There should be no symbol at this address, or, if there is, + // it should be a zero-sized symbol from an empty section (which + // we can safely override). + assert((!CanonicalSymEntry || CanonicalSymEntry->getSize() == 0) && + "Duplicate canonical symbol at address"); + CanonicalSymEntry = &Sym; + } + + Section &getCommonSection(); + void addSectionStartSymAndBlock(Section &GraphSec, uint64_t Address, + const char *Data, uint64_t Size, + uint32_t Alignment, bool IsLive); + + Error createNormalizedSections(); + Error createNormalizedSymbols(); + + /// Create graph blocks and symbols for externals, absolutes, commons and + /// all defined symbols in sections without custom parsers. + Error graphifyRegularSymbols(); + + /// Create graph blocks and symbols for all sections. + Error graphifySectionsWithCustomParsers(); + + // Put the BumpPtrAllocator first so that we don't free any of the underlying + // memory until the Symbol/Addressable destructors have been run. + BumpPtrAllocator Allocator; + + const object::MachOObjectFile &Obj; + std::unique_ptr G; + + DenseMap IndexToSection; + Section *CommonSection = nullptr; + + DenseMap IndexToSymbol; + std::map AddrToCanonicalSymbol; + StringMap CustomSectionParserFunctions; +}; + +} // end namespace jitlink +} // end namespace llvm + +#endif // LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H diff --git a/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/lib/ExecutionEngine/JITLink/MachO_arm64.cpp new file mode 100644 index 000000000000..945343bff89d --- /dev/null +++ b/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -0,0 +1,736 @@ +//===---- MachO_arm64.cpp - JIT linker implementation for MachO/arm64 -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// MachO/arm64 jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h" + +#include "BasicGOTAndStubsBuilder.h" +#include "MachOLinkGraphBuilder.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm; +using namespace llvm::jitlink; +using namespace llvm::jitlink::MachO_arm64_Edges; + +namespace { + +class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { +public: + MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj) + : MachOLinkGraphBuilder(Obj), + NumSymbols(Obj.getSymtabLoadCommand().nsyms) { + addCustomSectionParser( + "__eh_frame", [this](NormalizedSection &EHFrameSection) { + if (!EHFrameSection.Data) + return make_error( + "__eh_frame section is marked zero-fill"); + return MachOEHFrameBinaryParser( + *this, EHFrameSection.Address, + StringRef(EHFrameSection.Data, EHFrameSection.Size), + *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64) + .addToGraph(); + }); + } + +private: + static Expected + getRelocationKind(const MachO::relocation_info &RI) { + switch (RI.r_type) { + case MachO::ARM64_RELOC_UNSIGNED: + if (!RI.r_pcrel) { + if (RI.r_length == 3) + return RI.r_extern ? Pointer64 : Pointer64Anon; + else if (RI.r_length == 2) + return Pointer32; + } + break; + case MachO::ARM64_RELOC_SUBTRACTOR: + // SUBTRACTOR must be non-pc-rel, extern, with length 2 or 3. + // Initially represent SUBTRACTOR relocations with 'Delta'. + // They may be turned into NegDelta by parsePairRelocation. + if (!RI.r_pcrel && RI.r_extern) { + if (RI.r_length == 2) + return Delta32; + else if (RI.r_length == 3) + return Delta64; + } + break; + case MachO::ARM64_RELOC_BRANCH26: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return Branch26; + break; + case MachO::ARM64_RELOC_PAGE21: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return Page21; + break; + case MachO::ARM64_RELOC_PAGEOFF12: + if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return PageOffset12; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGE21: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return GOTPage21; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12: + if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return GOTPageOffset12; + break; + case MachO::ARM64_RELOC_POINTER_TO_GOT: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return PointerToGOT; + break; + case MachO::ARM64_RELOC_ADDEND: + if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2) + return PairedAddend; + break; + } + + return make_error( + "Unsupported arm64 relocation: address=" + + formatv("{0:x8}", RI.r_address) + + ", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) + + ", kind=" + formatv("{0:x1}", RI.r_type) + + ", pc_rel=" + (RI.r_pcrel ? "true" : "false") + + ", extern=" + (RI.r_extern ? "true" : "false") + + ", length=" + formatv("{0:d}", RI.r_length)); + } + + MachO::relocation_info + getRelocationInfo(const object::relocation_iterator RelItr) { + MachO::any_relocation_info ARI = + getObject().getRelocation(RelItr->getRawDataRefImpl()); + MachO::relocation_info RI; + memcpy(&RI, &ARI, sizeof(MachO::relocation_info)); + return RI; + } + + using PairRelocInfo = + std::tuple; + + // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success, + // returns the edge kind and addend to be used. + Expected + parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind, + const MachO::relocation_info &SubRI, + JITTargetAddress FixupAddress, const char *FixupContent, + object::relocation_iterator &UnsignedRelItr, + object::relocation_iterator &RelEnd) { + using namespace support; + + assert(((SubtractorKind == Delta32 && SubRI.r_length == 2) || + (SubtractorKind == Delta64 && SubRI.r_length == 3)) && + "Subtractor kind should match length"); + assert(SubRI.r_extern && "SUBTRACTOR reloc symbol should be extern"); + assert(!SubRI.r_pcrel && "SUBTRACTOR reloc should not be PCRel"); + + if (UnsignedRelItr == RelEnd) + return make_error("arm64 SUBTRACTOR without paired " + "UNSIGNED relocation"); + + auto UnsignedRI = getRelocationInfo(UnsignedRelItr); + + if (SubRI.r_address != UnsignedRI.r_address) + return make_error("arm64 SUBTRACTOR and paired UNSIGNED " + "point to different addresses"); + + if (SubRI.r_length != UnsignedRI.r_length) + return make_error("length of arm64 SUBTRACTOR and paired " + "UNSIGNED reloc must match"); + + Symbol *FromSymbol; + if (auto FromSymbolOrErr = findSymbolByIndex(SubRI.r_symbolnum)) + FromSymbol = FromSymbolOrErr->GraphSymbol; + else + return FromSymbolOrErr.takeError(); + + // Read the current fixup value. + uint64_t FixupValue = 0; + if (SubRI.r_length == 3) + FixupValue = *(const little64_t *)FixupContent; + else + FixupValue = *(const little32_t *)FixupContent; + + // Find 'ToSymbol' using symbol number or address, depending on whether the + // paired UNSIGNED relocation is extern. + Symbol *ToSymbol = nullptr; + if (UnsignedRI.r_extern) { + // Find target symbol by symbol index. + if (auto ToSymbolOrErr = findSymbolByIndex(UnsignedRI.r_symbolnum)) + ToSymbol = ToSymbolOrErr->GraphSymbol; + else + return ToSymbolOrErr.takeError(); + } else { + if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue)) + ToSymbol = &*ToSymbolOrErr; + else + return ToSymbolOrErr.takeError(); + FixupValue -= ToSymbol->getAddress(); + } + + MachOARM64RelocationKind DeltaKind; + Symbol *TargetSymbol; + uint64_t Addend; + if (&BlockToFix == &FromSymbol->getAddressable()) { + TargetSymbol = ToSymbol; + DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32; + Addend = FixupValue + (FixupAddress - FromSymbol->getAddress()); + // FIXME: handle extern 'from'. + } else if (&BlockToFix == &ToSymbol->getAddressable()) { + TargetSymbol = &*FromSymbol; + DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32; + Addend = FixupValue - (FixupAddress - ToSymbol->getAddress()); + } else { + // BlockToFix was neither FromSymbol nor ToSymbol. + return make_error("SUBTRACTOR relocation must fix up " + "either 'A' or 'B' (or a symbol in one " + "of their alt-entry groups)"); + } + + return PairRelocInfo(DeltaKind, TargetSymbol, Addend); + } + + Error addRelocations() override { + using namespace support; + auto &Obj = getObject(); + + for (auto &S : Obj.sections()) { + + JITTargetAddress SectionAddress = S.getAddress(); + + for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end(); + RelItr != RelEnd; ++RelItr) { + + MachO::relocation_info RI = getRelocationInfo(RelItr); + + // Sanity check the relocation kind. + auto Kind = getRelocationKind(RI); + if (!Kind) + return Kind.takeError(); + + // Find the address of the value to fix up. + JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; + + LLVM_DEBUG({ + dbgs() << "Processing " << getMachOARM64RelocationKindName(*Kind) + << " relocation at " << format("0x%016" PRIx64, FixupAddress) + << "\n"; + }); + + // Find the block that the fixup points to. + Block *BlockToFix = nullptr; + { + auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress); + if (!SymbolToFixOrErr) + return SymbolToFixOrErr.takeError(); + BlockToFix = &SymbolToFixOrErr->getBlock(); + } + + if (FixupAddress + static_cast(1ULL << RI.r_length) > + BlockToFix->getAddress() + BlockToFix->getContent().size()) + return make_error( + "Relocation content extends past end of fixup block"); + + // Get a pointer to the fixup content. + const char *FixupContent = BlockToFix->getContent().data() + + (FixupAddress - BlockToFix->getAddress()); + + // The target symbol and addend will be populated by the switch below. + Symbol *TargetSymbol = nullptr; + uint64_t Addend = 0; + + if (*Kind == PairedAddend) { + // If this is an Addend relocation then process it and move to the + // paired reloc. + + Addend = RI.r_symbolnum; + + if (RelItr == RelEnd) + return make_error("Unpaired Addend reloc at " + + formatv("{0:x16}", FixupAddress)); + ++RelItr; + RI = getRelocationInfo(RelItr); + + Kind = getRelocationKind(RI); + if (!Kind) + return Kind.takeError(); + + if (*Kind != Branch26 && *Kind != Page21 && *Kind != PageOffset12) + return make_error( + "Invalid relocation pair: Addend + " + + getMachOARM64RelocationKindName(*Kind)); + else + LLVM_DEBUG({ + dbgs() << " pair is " << getMachOARM64RelocationKindName(*Kind) + << "`\n"; + }); + + // Find the address of the value to fix up. + JITTargetAddress PairedFixupAddress = + SectionAddress + (uint32_t)RI.r_address; + if (PairedFixupAddress != FixupAddress) + return make_error("Paired relocation points at " + "different target"); + } + + switch (*Kind) { + case Branch26: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0x7fffffff) != 0x14000000) + return make_error("BRANCH26 target is not a B or BL " + "instruction with a zero addend"); + break; + } + case Pointer32: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + Addend = *(const ulittle32_t *)FixupContent; + break; + case Pointer64: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + Addend = *(const ulittle64_t *)FixupContent; + break; + case Pointer64Anon: { + JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; + else + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); + break; + } + case Page21: + case GOTPage21: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0xffffffe0) != 0x90000000) + return make_error("PAGE21/GOTPAGE21 target is not an " + "ADRP instruction with a zero " + "addend"); + break; + } + case PageOffset12: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + break; + } + case GOTPageOffset12: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0xfffffc00) != 0xf9400000) + return make_error("GOTPAGEOFF12 target is not an LDR " + "immediate instruction with a zero " + "addend"); + break; + } + case PointerToGOT: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + break; + case Delta32: + case Delta64: { + // We use Delta32/Delta64 to represent SUBTRACTOR relocations. + // parsePairRelocation handles the paired reloc, and returns the + // edge kind to be used (either Delta32/Delta64, or + // NegDelta32/NegDelta64, depending on the direction of the + // subtraction) along with the addend. + auto PairInfo = + parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress, + FixupContent, ++RelItr, RelEnd); + if (!PairInfo) + return PairInfo.takeError(); + std::tie(*Kind, TargetSymbol, Addend) = *PairInfo; + assert(TargetSymbol && "No target symbol from parsePairRelocation?"); + break; + } + default: + llvm_unreachable("Special relocation kind should not appear in " + "mach-o file"); + } + + LLVM_DEBUG({ + Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, + Addend); + printEdge(dbgs(), *BlockToFix, GE, + getMachOARM64RelocationKindName(*Kind)); + dbgs() << "\n"; + }); + BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(), + *TargetSymbol, Addend); + } + } + return Error::success(); + } + + unsigned NumSymbols = 0; +}; + +class MachO_arm64_GOTAndStubsBuilder + : public BasicGOTAndStubsBuilder { +public: + MachO_arm64_GOTAndStubsBuilder(LinkGraph &G) + : BasicGOTAndStubsBuilder(G) {} + + bool isGOTEdge(Edge &E) const { + return E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 || + E.getKind() == PointerToGOT; + } + + Symbol &createGOTEntry(Symbol &Target) { + auto &GOTEntryBlock = G.createContentBlock( + getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); + GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); + return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); + } + + void fixGOTEdge(Edge &E, Symbol &GOTEntry) { + if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12) { + // Update the target, but leave the edge addend as-is. + E.setTarget(GOTEntry); + } else if (E.getKind() == PointerToGOT) { + E.setTarget(GOTEntry); + E.setKind(Delta32); + } else + llvm_unreachable("Not a GOT edge?"); + } + + bool isExternalBranchEdge(Edge &E) { + return E.getKind() == Branch26 && !E.getTarget().isDefined(); + } + + Symbol &createStub(Symbol &Target) { + auto &StubContentBlock = + G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); + // Re-use GOT entries for stub targets. + auto &GOTEntrySymbol = getGOTEntrySymbol(Target); + StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0); + return G.addAnonymousSymbol(StubContentBlock, 0, 8, true, false); + } + + void fixExternalBranchEdge(Edge &E, Symbol &Stub) { + assert(E.getKind() == Branch26 && "Not a Branch32 edge?"); + assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?"); + E.setTarget(Stub); + } + +private: + Section &getGOTSection() { + if (!GOTSection) + GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ); + return *GOTSection; + } + + Section &getStubsSection() { + if (!StubsSection) { + auto StubsProt = static_cast( + sys::Memory::MF_READ | sys::Memory::MF_EXEC); + StubsSection = &G.createSection("$__STUBS", StubsProt); + } + return *StubsSection; + } + + StringRef getGOTEntryBlockContent() { + return StringRef(reinterpret_cast(NullGOTEntryContent), + sizeof(NullGOTEntryContent)); + } + + StringRef getStubBlockContent() { + return StringRef(reinterpret_cast(StubContent), + sizeof(StubContent)); + } + + static const uint8_t NullGOTEntryContent[8]; + static const uint8_t StubContent[8]; + Section *GOTSection = nullptr; + Section *StubsSection = nullptr; +}; + +const uint8_t MachO_arm64_GOTAndStubsBuilder::NullGOTEntryContent[8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +const uint8_t MachO_arm64_GOTAndStubsBuilder::StubContent[8] = { + 0x10, 0x00, 0x00, 0x58, // LDR x16, + 0x00, 0x02, 0x1f, 0xd6 // BR x16 +}; + +} // namespace + +namespace llvm { +namespace jitlink { + +class MachOJITLinker_arm64 : public JITLinker { + friend class JITLinker; + +public: + MachOJITLinker_arm64(std::unique_ptr Ctx, + PassConfiguration PassConfig) + : JITLinker(std::move(Ctx), std::move(PassConfig)) {} + +private: + StringRef getEdgeKindName(Edge::Kind R) const override { + return getMachOARM64RelocationKindName(R); + } + + Expected> + buildGraph(MemoryBufferRef ObjBuffer) override { + auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer); + if (!MachOObj) + return MachOObj.takeError(); + return MachOLinkGraphBuilder_arm64(**MachOObj).buildGraph(); + } + + static Error targetOutOfRangeError(const Block &B, const Edge &E) { + std::string ErrMsg; + { + raw_string_ostream ErrStream(ErrMsg); + ErrStream << "Relocation target out of range: "; + printEdge(ErrStream, B, E, getMachOARM64RelocationKindName(E.getKind())); + ErrStream << "\n"; + } + return make_error(std::move(ErrMsg)); + } + + static unsigned getPageOffset12Shift(uint32_t Instr) { + constexpr uint32_t LDRLiteralMask = 0x3ffffc00; + + // Check for a GPR LDR immediate with a zero embedded literal. + // If found, the top two bits contain the shift. + if ((Instr & LDRLiteralMask) == 0x39400000) + return Instr >> 30; + + // Check for a Neon LDR immediate of size 64-bit or less with a zero + // embedded literal. If found, the top two bits contain the shift. + if ((Instr & LDRLiteralMask) == 0x3d400000) + return Instr >> 30; + + // Check for a Neon LDR immediate of size 128-bit with a zero embedded + // literal. + constexpr uint32_t SizeBitsMask = 0xc0000000; + if ((Instr & (LDRLiteralMask | SizeBitsMask)) == 0x3dc00000) + return 4; + + return 0; + } + + Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const { + using namespace support; + + char *FixupPtr = BlockWorkingMem + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + + switch (E.getKind()) { + case Branch26: { + assert((FixupAddress & 0x3) == 0 && "Branch-inst is not 32-bit aligned"); + + int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); + + if (static_cast(Value) & 0x3) + return make_error("Branch26 target is not 32-bit " + "aligned"); + + if (Value < -(1 << 27) || Value > ((1 << 27) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t RawInstr = *(little32_t *)FixupPtr; + assert((RawInstr & 0x7fffffff) == 0x14000000 && + "RawInstr isn't a B or BR immediate instruction"); + uint32_t Imm = (static_cast(Value) & ((1 << 28) - 1)) >> 2; + uint32_t FixedInstr = RawInstr | Imm; + *(little32_t *)FixupPtr = FixedInstr; + break; + } + case Pointer32: { + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + if (Value > std::numeric_limits::max()) + return targetOutOfRangeError(B, E); + *(ulittle32_t *)FixupPtr = Value; + break; + } + case Pointer64: { + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + *(ulittle64_t *)FixupPtr = Value; + break; + } + case Page21: + case GOTPage21: { + assert(E.getAddend() == 0 && "PAGE21/GOTPAGE21 with non-zero addend"); + uint64_t TargetPage = + E.getTarget().getAddress() & ~static_cast(4096 - 1); + uint64_t PCPage = B.getAddress() & ~static_cast(4096 - 1); + + int64_t PageDelta = TargetPage - PCPage; + if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert((RawInstr & 0xffffffe0) == 0x90000000 && + "RawInstr isn't an ADRP instruction"); + uint32_t ImmLo = (static_cast(PageDelta) >> 12) & 0x3; + uint32_t ImmHi = (static_cast(PageDelta) >> 14) & 0x7ffff; + uint32_t FixedInstr = RawInstr | (ImmLo << 29) | (ImmHi << 5); + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case PageOffset12: { + assert(E.getAddend() == 0 && "PAGEOFF12 with non-zero addend"); + uint64_t TargetOffset = E.getTarget().getAddress() & 0xfff; + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + unsigned ImmShift = getPageOffset12Shift(RawInstr); + + if (TargetOffset & ((1 << ImmShift) - 1)) + return make_error("PAGEOFF12 target is not aligned"); + + uint32_t EncodedImm = (TargetOffset >> ImmShift) << 10; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case GOTPageOffset12: { + assert(E.getAddend() == 0 && "GOTPAGEOF12 with non-zero addend"); + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert((RawInstr & 0xfffffc00) == 0xf9400000 && + "RawInstr isn't a 64-bit LDR immediate"); + + uint32_t TargetOffset = E.getTarget().getAddress() & 0xfff; + assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned"); + uint32_t EncodedImm = (TargetOffset >> 3) << 10; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case LDRLiteral19: { + assert((FixupAddress & 0x3) == 0 && "LDR is not 32-bit aligned"); + assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend"); + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal"); + int64_t Delta = E.getTarget().getAddress() - FixupAddress; + if (Delta & 0x3) + return make_error("LDR literal target is not 32-bit " + "aligned"); + if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t EncodedImm = (static_cast(Delta) >> 2) << 5; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case Delta32: + case Delta64: + case NegDelta32: + case NegDelta64: { + int64_t Value; + if (E.getKind() == Delta32 || E.getKind() == Delta64) + Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); + else + Value = FixupAddress - E.getTarget().getAddress() + E.getAddend(); + + if (E.getKind() == Delta32 || E.getKind() == NegDelta32) { + if (Value < std::numeric_limits::min() || + Value > std::numeric_limits::max()) + return targetOutOfRangeError(B, E); + *(little32_t *)FixupPtr = Value; + } else + *(little64_t *)FixupPtr = Value; + break; + } + default: + llvm_unreachable("Unrecognized edge kind"); + } + + return Error::success(); + } + + uint64_t NullValue = 0; +}; + +void jitLink_MachO_arm64(std::unique_ptr Ctx) { + PassConfiguration Config; + Triple TT("arm64-apple-ios"); + + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add a mark-live pass. + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + Config.PrePrunePasses.push_back(std::move(MarkLive)); + else + Config.PrePrunePasses.push_back(markAllSymbolsLive); + + // Add an in-place GOT/Stubs pass. + Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error { + MachO_arm64_GOTAndStubsBuilder(G).run(); + return Error::success(); + }); + } + + if (auto Err = Ctx->modifyPassConfig(TT, Config)) + return Ctx->notifyFailed(std::move(Err)); + + // Construct a JITLinker and run the link function. + MachOJITLinker_arm64::link(std::move(Ctx), std::move(Config)); +} + +StringRef getMachOARM64RelocationKindName(Edge::Kind R) { + switch (R) { + case Branch26: + return "Branch26"; + case Pointer64: + return "Pointer64"; + case Pointer64Anon: + return "Pointer64Anon"; + case Page21: + return "Page21"; + case PageOffset12: + return "PageOffset12"; + case GOTPage21: + return "GOTPage21"; + case GOTPageOffset12: + return "GOTPageOffset12"; + case PointerToGOT: + return "PointerToGOT"; + case PairedAddend: + return "PairedAddend"; + case LDRLiteral19: + return "LDRLiteral19"; + case Delta32: + return "Delta32"; + case Delta64: + return "Delta64"; + case NegDelta32: + return "NegDelta32"; + case NegDelta64: + return "NegDelta64"; + default: + return getGenericEdgeKindName(static_cast(R)); + } +} + +} // end namespace jitlink +} // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index 4010678c6d33..d83787ffd598 100644 --- a/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -13,7 +13,7 @@ #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" #include "BasicGOTAndStubsBuilder.h" -#include "MachOAtomGraphBuilder.h" +#include "MachOLinkGraphBuilder.h" #define DEBUG_TYPE "jitlink" @@ -23,16 +23,21 @@ using namespace llvm::jitlink::MachO_x86_64_Edges; namespace { -class MachOAtomGraphBuilder_x86_64 : public MachOAtomGraphBuilder { +class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { public: - MachOAtomGraphBuilder_x86_64(const object::MachOObjectFile &Obj) - : MachOAtomGraphBuilder(Obj), - NumSymbols(Obj.getSymtabLoadCommand().nsyms) { - addCustomAtomizer("__eh_frame", [this](MachOSection &EHFrameSection) { - return addEHFrame(getGraph(), EHFrameSection.getGenericSection(), - EHFrameSection.getContent(), - EHFrameSection.getAddress(), NegDelta32, Delta64); - }); + MachOLinkGraphBuilder_x86_64(const object::MachOObjectFile &Obj) + : MachOLinkGraphBuilder(Obj) { + addCustomSectionParser( + "__eh_frame", [this](NormalizedSection &EHFrameSection) { + if (!EHFrameSection.Data) + return make_error( + "__eh_frame section is marked zero-fill"); + return MachOEHFrameBinaryParser( + *this, EHFrameSection.Address, + StringRef(EHFrameSection.Data, EHFrameSection.Size), + *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64) + .addToGraph(); + }); } private: @@ -40,8 +45,12 @@ private: getRelocationKind(const MachO::relocation_info &RI) { switch (RI.r_type) { case MachO::X86_64_RELOC_UNSIGNED: - if (!RI.r_pcrel && RI.r_length == 3) - return RI.r_extern ? Pointer64 : Pointer64Anon; + if (!RI.r_pcrel) { + if (RI.r_length == 3) + return RI.r_extern ? Pointer64 : Pointer64Anon; + else if (RI.r_extern && RI.r_length == 2) + return Pointer32; + } break; case MachO::X86_64_RELOC_SIGNED: if (RI.r_pcrel && RI.r_length == 2) @@ -94,21 +103,10 @@ private: ", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) + ", kind=" + formatv("{0:x1}", RI.r_type) + ", pc_rel=" + (RI.r_pcrel ? "true" : "false") + - ", extern= " + (RI.r_extern ? "true" : "false") + + ", extern=" + (RI.r_extern ? "true" : "false") + ", length=" + formatv("{0:d}", RI.r_length)); } - Expected findAtomBySymbolIndex(const MachO::relocation_info &RI) { - auto &Obj = getObject(); - if (RI.r_symbolnum >= NumSymbols) - return make_error("Symbol index out of range"); - auto SymI = Obj.getSymbolByIndex(RI.r_symbolnum); - auto Name = SymI->getName(); - if (!Name) - return Name.takeError(); - return getGraph().getAtomByName(*Name); - } - MachO::relocation_info getRelocationInfo(const object::relocation_iterator RelItr) { MachO::any_relocation_info ARI = @@ -118,12 +116,12 @@ private: return RI; } - using PairRelocInfo = std::tuple; + using PairRelocInfo = std::tuple; // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success, // returns the edge kind and addend to be used. Expected - parsePairRelocation(DefinedAtom &AtomToFix, Edge::Kind SubtractorKind, + parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind, const MachO::relocation_info &SubRI, JITTargetAddress FixupAddress, const char *FixupContent, object::relocation_iterator &UnsignedRelItr, @@ -150,9 +148,11 @@ private: return make_error("length of x86_64 SUBTRACTOR and paired " "UNSIGNED reloc must match"); - auto FromAtom = findAtomBySymbolIndex(SubRI); - if (!FromAtom) - return FromAtom.takeError(); + Symbol *FromSymbol; + if (auto FromSymbolOrErr = findSymbolByIndex(SubRI.r_symbolnum)) + FromSymbol = FromSymbolOrErr->GraphSymbol; + else + return FromSymbolOrErr.takeError(); // Read the current fixup value. uint64_t FixupValue = 0; @@ -161,54 +161,60 @@ private: else FixupValue = *(const little32_t *)FixupContent; - // Find 'ToAtom' using symbol number or address, depending on whether the + // Find 'ToSymbol' using symbol number or address, depending on whether the // paired UNSIGNED relocation is extern. - Atom *ToAtom = nullptr; + Symbol *ToSymbol = nullptr; if (UnsignedRI.r_extern) { - // Find target atom by symbol index. - if (auto ToAtomOrErr = findAtomBySymbolIndex(UnsignedRI)) - ToAtom = &*ToAtomOrErr; + // Find target symbol by symbol index. + if (auto ToSymbolOrErr = findSymbolByIndex(UnsignedRI.r_symbolnum)) + ToSymbol = ToSymbolOrErr->GraphSymbol; else - return ToAtomOrErr.takeError(); + return ToSymbolOrErr.takeError(); } else { - if (auto ToAtomOrErr = getGraph().findAtomByAddress(FixupValue)) - ToAtom = &*ToAtomOrErr; + if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue)) + ToSymbol = &*ToSymbolOrErr; else - return ToAtomOrErr.takeError(); - FixupValue -= ToAtom->getAddress(); + return ToSymbolOrErr.takeError(); + FixupValue -= ToSymbol->getAddress(); } MachOX86RelocationKind DeltaKind; - Atom *TargetAtom; + Symbol *TargetSymbol; uint64_t Addend; - if (areLayoutLocked(AtomToFix, *FromAtom)) { - TargetAtom = ToAtom; + if (&BlockToFix == &FromSymbol->getAddressable()) { + TargetSymbol = ToSymbol; DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32; - Addend = FixupValue + (FixupAddress - FromAtom->getAddress()); + Addend = FixupValue + (FixupAddress - FromSymbol->getAddress()); // FIXME: handle extern 'from'. - } else if (areLayoutLocked(AtomToFix, *ToAtom)) { - TargetAtom = &*FromAtom; + } else if (&BlockToFix == &ToSymbol->getAddressable()) { + TargetSymbol = FromSymbol; DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32; - Addend = FixupValue - (FixupAddress - ToAtom->getAddress()); + Addend = FixupValue - (FixupAddress - ToSymbol->getAddress()); } else { - // AtomToFix was neither FromAtom nor ToAtom. + // BlockToFix was neither FromSymbol nor ToSymbol. return make_error("SUBTRACTOR relocation must fix up " - "either 'A' or 'B' (or an atom in one " - "of their alt-entry groups)"); + "either 'A' or 'B' (or a symbol in one " + "of their alt-entry chains)"); } - return PairRelocInfo(DeltaKind, TargetAtom, Addend); + return PairRelocInfo(DeltaKind, TargetSymbol, Addend); } Error addRelocations() override { using namespace support; - auto &G = getGraph(); auto &Obj = getObject(); for (auto &S : Obj.sections()) { JITTargetAddress SectionAddress = S.getAddress(); + if (S.isVirtual()) { + if (S.relocation_begin() != S.relocation_end()) + return make_error("Virtual section contains " + "relocations"); + continue; + } + for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end(); RelItr != RelEnd; ++RelItr) { @@ -227,26 +233,26 @@ private: << format("0x%016" PRIx64, FixupAddress) << "\n"; }); - // Find the atom that the fixup points to. - DefinedAtom *AtomToFix = nullptr; + // Find the block that the fixup points to. + Block *BlockToFix = nullptr; { - auto AtomToFixOrErr = G.findAtomByAddress(FixupAddress); - if (!AtomToFixOrErr) - return AtomToFixOrErr.takeError(); - AtomToFix = &*AtomToFixOrErr; + auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress); + if (!SymbolToFixOrErr) + return SymbolToFixOrErr.takeError(); + BlockToFix = &SymbolToFixOrErr->getBlock(); } if (FixupAddress + static_cast(1ULL << RI.r_length) > - AtomToFix->getAddress() + AtomToFix->getContent().size()) + BlockToFix->getAddress() + BlockToFix->getContent().size()) return make_error( - "Relocation content extends past end of fixup atom"); + "Relocation extends past end of fixup block"); // Get a pointer to the fixup content. - const char *FixupContent = AtomToFix->getContent().data() + - (FixupAddress - AtomToFix->getAddress()); + const char *FixupContent = BlockToFix->getContent().data() + + (FixupAddress - BlockToFix->getAddress()); - // The target atom and addend will be populated by the switch below. - Atom *TargetAtom = nullptr; + // The target symbol and addend will be populated by the switch below. + Symbol *TargetSymbol = nullptr; uint64_t Addend = 0; switch (*Kind) { @@ -254,46 +260,53 @@ private: case PCRel32: case PCRel32GOTLoad: case PCRel32GOT: - if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + Addend = *(const ulittle32_t *)FixupContent; + break; + case Pointer32: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; else - return TargetAtomOrErr.takeError(); + return TargetSymbolOrErr.takeError(); Addend = *(const ulittle32_t *)FixupContent; break; case Pointer64: - if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; else - return TargetAtomOrErr.takeError(); + return TargetSymbolOrErr.takeError(); Addend = *(const ulittle64_t *)FixupContent; break; case Pointer64Anon: { JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; - if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; else - return TargetAtomOrErr.takeError(); - Addend = TargetAddress - TargetAtom->getAddress(); + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); break; } case PCRel32Minus1: case PCRel32Minus2: case PCRel32Minus4: - if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; else - return TargetAtomOrErr.takeError(); + return TargetSymbolOrErr.takeError(); Addend = *(const ulittle32_t *)FixupContent + (1 << (*Kind - PCRel32Minus1)); break; case PCRel32Anon: { JITTargetAddress TargetAddress = FixupAddress + 4 + *(const ulittle32_t *)FixupContent; - if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; else - return TargetAtomOrErr.takeError(); - Addend = TargetAddress - TargetAtom->getAddress(); + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); break; } case PCRel32Minus1Anon: @@ -303,11 +316,11 @@ private: static_cast(1ULL << (*Kind - PCRel32Minus1Anon)); JITTargetAddress TargetAddress = FixupAddress + 4 + Delta + *(const ulittle32_t *)FixupContent; - if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; else - return TargetAtomOrErr.takeError(); - Addend = TargetAddress - TargetAtom->getAddress(); + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); break; } case Delta32: @@ -318,12 +331,12 @@ private: // NegDelta32/NegDelta64, depending on the direction of the // subtraction) along with the addend. auto PairInfo = - parsePairRelocation(*AtomToFix, *Kind, RI, FixupAddress, + parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress, FixupContent, ++RelItr, RelEnd); if (!PairInfo) return PairInfo.takeError(); - std::tie(*Kind, TargetAtom, Addend) = *PairInfo; - assert(TargetAtom && "No target atom from parsePairRelocation?"); + std::tie(*Kind, TargetSymbol, Addend) = *PairInfo; + assert(TargetSymbol && "No target symbol from parsePairRelocation?"); break; } default: @@ -332,41 +345,38 @@ private: } LLVM_DEBUG({ - Edge GE(*Kind, FixupAddress - AtomToFix->getAddress(), *TargetAtom, + Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, Addend); - printEdge(dbgs(), *AtomToFix, GE, + printEdge(dbgs(), *BlockToFix, GE, getMachOX86RelocationKindName(*Kind)); dbgs() << "\n"; }); - AtomToFix->addEdge(*Kind, FixupAddress - AtomToFix->getAddress(), - *TargetAtom, Addend); + BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(), + *TargetSymbol, Addend); } } return Error::success(); } - - unsigned NumSymbols = 0; }; class MachO_x86_64_GOTAndStubsBuilder : public BasicGOTAndStubsBuilder { public: - MachO_x86_64_GOTAndStubsBuilder(AtomGraph &G) + MachO_x86_64_GOTAndStubsBuilder(LinkGraph &G) : BasicGOTAndStubsBuilder(G) {} bool isGOTEdge(Edge &E) const { return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad; } - DefinedAtom &createGOTEntry(Atom &Target) { - auto &GOTEntryAtom = G.addAnonymousAtom(getGOTSection(), 0x0, 8); - GOTEntryAtom.setContent( - StringRef(reinterpret_cast(NullGOTEntryContent), 8)); - GOTEntryAtom.addEdge(Pointer64, 0, Target, 0); - return GOTEntryAtom; + Symbol &createGOTEntry(Symbol &Target) { + auto &GOTEntryBlock = G.createContentBlock( + getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); + GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); + return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); } - void fixGOTEdge(Edge &E, Atom &GOTEntry) { + void fixGOTEdge(Edge &E, Symbol &GOTEntry) { assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) && "Not a GOT edge?"); E.setKind(PCRel32); @@ -378,19 +388,16 @@ public: return E.getKind() == Branch32 && !E.getTarget().isDefined(); } - DefinedAtom &createStub(Atom &Target) { - auto &StubAtom = G.addAnonymousAtom(getStubsSection(), 0x0, 2); - StubAtom.setContent( - StringRef(reinterpret_cast(StubContent), 6)); - + Symbol &createStub(Symbol &Target) { + auto &StubContentBlock = + G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); // Re-use GOT entries for stub targets. - auto &GOTEntryAtom = getGOTEntryAtom(Target); - StubAtom.addEdge(PCRel32, 2, GOTEntryAtom, 0); - - return StubAtom; + auto &GOTEntrySymbol = getGOTEntrySymbol(Target); + StubContentBlock.addEdge(PCRel32, 2, GOTEntrySymbol, 0); + return G.addAnonymousSymbol(StubContentBlock, 0, 6, true, false); } - void fixExternalBranchEdge(Edge &E, Atom &Stub) { + void fixExternalBranchEdge(Edge &E, Symbol &Stub) { assert(E.getKind() == Branch32 && "Not a Branch32 edge?"); assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?"); E.setTarget(Stub); @@ -399,7 +406,7 @@ public: private: Section &getGOTSection() { if (!GOTSection) - GOTSection = &G.createSection("$__GOT", 8, sys::Memory::MF_READ, false); + GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ); return *GOTSection; } @@ -407,11 +414,21 @@ private: if (!StubsSection) { auto StubsProt = static_cast( sys::Memory::MF_READ | sys::Memory::MF_EXEC); - StubsSection = &G.createSection("$__STUBS", 8, StubsProt, false); + StubsSection = &G.createSection("$__STUBS", StubsProt); } return *StubsSection; } + StringRef getGOTEntryBlockContent() { + return StringRef(reinterpret_cast(NullGOTEntryContent), + sizeof(NullGOTEntryContent)); + } + + StringRef getStubBlockContent() { + return StringRef(reinterpret_cast(StubContent), + sizeof(StubContent)); + } + static const uint8_t NullGOTEntryContent[8]; static const uint8_t StubContent[6]; Section *GOTSection = nullptr; @@ -440,30 +457,31 @@ private: return getMachOX86RelocationKindName(R); } - Expected> + Expected> buildGraph(MemoryBufferRef ObjBuffer) override { auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer); if (!MachOObj) return MachOObj.takeError(); - return MachOAtomGraphBuilder_x86_64(**MachOObj).buildGraph(); + return MachOLinkGraphBuilder_x86_64(**MachOObj).buildGraph(); } - static Error targetOutOfRangeError(const Atom &A, const Edge &E) { + static Error targetOutOfRangeError(const Block &B, const Edge &E) { std::string ErrMsg; { raw_string_ostream ErrStream(ErrMsg); ErrStream << "Relocation target out of range: "; - printEdge(ErrStream, A, E, getMachOX86RelocationKindName(E.getKind())); + printEdge(ErrStream, B, E, getMachOX86RelocationKindName(E.getKind())); ErrStream << "\n"; } return make_error(std::move(ErrMsg)); } - Error applyFixup(DefinedAtom &A, const Edge &E, char *AtomWorkingMem) const { + Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const { + using namespace support; - char *FixupPtr = AtomWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = A.getAddress() + E.getOffset(); + char *FixupPtr = BlockWorkingMem + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case Branch32: @@ -473,7 +491,7 @@ private: E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend(); if (Value < std::numeric_limits::min() || Value > std::numeric_limits::max()) - return targetOutOfRangeError(A, E); + return targetOutOfRangeError(B, E); *(little32_t *)FixupPtr = Value; break; } @@ -491,7 +509,7 @@ private: E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend(); if (Value < std::numeric_limits::min() || Value > std::numeric_limits::max()) - return targetOutOfRangeError(A, E); + return targetOutOfRangeError(B, E); *(little32_t *)FixupPtr = Value; break; } @@ -503,7 +521,7 @@ private: E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend(); if (Value < std::numeric_limits::min() || Value > std::numeric_limits::max()) - return targetOutOfRangeError(A, E); + return targetOutOfRangeError(B, E); *(little32_t *)FixupPtr = Value; break; } @@ -520,12 +538,19 @@ private: if (E.getKind() == Delta32 || E.getKind() == NegDelta32) { if (Value < std::numeric_limits::min() || Value > std::numeric_limits::max()) - return targetOutOfRangeError(A, E); + return targetOutOfRangeError(B, E); *(little32_t *)FixupPtr = Value; } else *(little64_t *)FixupPtr = Value; break; } + case Pointer32: { + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + if (Value > std::numeric_limits::max()) + return targetOutOfRangeError(B, E); + *(ulittle32_t *)FixupPtr = Value; + break; + } default: llvm_unreachable("Unrecognized edge kind"); } @@ -545,10 +570,10 @@ void jitLink_MachO_x86_64(std::unique_ptr Ctx) { if (auto MarkLive = Ctx->getMarkLivePass(TT)) Config.PrePrunePasses.push_back(std::move(MarkLive)); else - Config.PrePrunePasses.push_back(markAllAtomsLive); + Config.PrePrunePasses.push_back(markAllSymbolsLive); // Add an in-place GOT/Stubs pass. - Config.PostPrunePasses.push_back([](AtomGraph &G) -> Error { + Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error { MachO_x86_64_GOTAndStubsBuilder(G).run(); return Error::success(); }); @@ -565,6 +590,8 @@ StringRef getMachOX86RelocationKindName(Edge::Kind R) { switch (R) { case Branch32: return "Branch32"; + case Pointer32: + return "Pointer32"; case Pointer64: return "Pointer64"; case Pointer64Anon: diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 08815b7a80ae..94741f5f01d5 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -23,7 +23,7 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MutexGuard.h" +#include using namespace llvm; @@ -88,7 +88,7 @@ MCJIT::MCJIT(std::unique_ptr M, std::unique_ptr TM, } MCJIT::~MCJIT() { - MutexGuard locked(lock); + std::lock_guard locked(lock); Dyld.deregisterEHFrames(); @@ -100,7 +100,7 @@ MCJIT::~MCJIT() { } void MCJIT::addModule(std::unique_ptr M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); if (M->getDataLayout().isDefault()) M->setDataLayout(getDataLayout()); @@ -109,7 +109,7 @@ void MCJIT::addModule(std::unique_ptr M) { } bool MCJIT::removeModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); return OwnedModules.removeModule(M); } @@ -136,14 +136,14 @@ void MCJIT::addArchive(object::OwningBinary A) { } void MCJIT::setObjectCache(ObjectCache* NewCache) { - MutexGuard locked(lock); + std::lock_guard locked(lock); ObjCache = NewCache; } std::unique_ptr MCJIT::emitObject(Module *M) { assert(M && "Can not emit a null module"); - MutexGuard locked(lock); + std::lock_guard locked(lock); // Materialize all globals in the module if they have not been // materialized already. @@ -185,7 +185,7 @@ std::unique_ptr MCJIT::emitObject(Module *M) { void MCJIT::generateCodeForModule(Module *M) { // Get a thread lock to make sure we aren't trying to load multiple times - MutexGuard locked(lock); + std::lock_guard locked(lock); // This must be a module which has already been added to this MCJIT instance. assert(OwnedModules.ownsModule(M) && @@ -234,7 +234,7 @@ void MCJIT::generateCodeForModule(Module *M) { } void MCJIT::finalizeLoadedModules() { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Resolve any outstanding relocations. Dyld.resolveRelocations(); @@ -250,7 +250,7 @@ void MCJIT::finalizeLoadedModules() { // FIXME: Rename this. void MCJIT::finalizeObject() { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Generate code for module is going to move objects out of the 'added' list, // so we need to copy that out before using it: @@ -265,7 +265,7 @@ void MCJIT::finalizeObject() { } void MCJIT::finalizeModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // This must be a module which has already been added to this MCJIT instance. assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module."); @@ -292,7 +292,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name, if (DemangledName[0] == getDataLayout().getGlobalPrefix()) DemangledName = DemangledName.substr(1); - MutexGuard locked(lock); + std::lock_guard locked(lock); // If it hasn't already been generated, see if it's in one of our modules. for (ModulePtrSet::iterator I = OwnedModules.begin_added(), @@ -332,7 +332,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, JITSymbol MCJIT::findSymbol(const std::string &Name, bool CheckFunctionsOnly) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // First, check to see if we already have this symbol. if (auto Sym = findExistingSymbol(Name)) @@ -388,7 +388,7 @@ JITSymbol MCJIT::findSymbol(const std::string &Name, } uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { - MutexGuard locked(lock); + std::lock_guard locked(lock); uint64_t Result = getSymbolAddress(Name, false); if (Result != 0) finalizeLoadedModules(); @@ -396,7 +396,7 @@ uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { } uint64_t MCJIT::getFunctionAddress(const std::string &Name) { - MutexGuard locked(lock); + std::lock_guard locked(lock); uint64_t Result = getSymbolAddress(Name, true); if (Result != 0) finalizeLoadedModules(); @@ -405,7 +405,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) { // Deprecated. Use getFunctionAddress instead. void *MCJIT::getPointerToFunction(Function *F) { - MutexGuard locked(lock); + std::lock_guard locked(lock); Mangler Mang; SmallString<128> Name; @@ -632,14 +632,14 @@ void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) { void MCJIT::RegisterJITEventListener(JITEventListener *L) { if (!L) return; - MutexGuard locked(lock); + std::lock_guard locked(lock); EventListeners.push_back(L); } void MCJIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; - MutexGuard locked(lock); + std::lock_guard locked(lock); auto I = find(reverse(EventListeners), L); if (I != EventListeners.rend()) { std::swap(*I, EventListeners.back()); @@ -651,7 +651,7 @@ void MCJIT::notifyObjectLoaded(const object::ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) { uint64_t Key = static_cast(reinterpret_cast(Obj.getData().data())); - MutexGuard locked(lock); + std::lock_guard locked(lock); MemMgr->notifyObjectLoaded(this, Obj); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->notifyObjectLoaded(Key, Obj, L); @@ -661,7 +661,7 @@ void MCJIT::notifyObjectLoaded(const object::ObjectFile &Obj, void MCJIT::notifyFreeingObject(const object::ObjectFile &Obj) { uint64_t Key = static_cast(reinterpret_cast(Obj.getData().data())); - MutexGuard locked(lock); + std::lock_guard locked(lock); for (JITEventListener *L : EventListeners) L->notifyFreeingObject(Key); } diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 2ad9d24555f3..bb5d96051da9 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -177,7 +177,7 @@ void OProfileJITEventListener::notifyFreeingObject(ObjectKey Key) { namespace llvm { JITEventListener *JITEventListener::createOProfileJITEventListener() { - return new OProfileJITEventListener(llvm::make_unique()); + return new OProfileJITEventListener(std::make_unique()); } } // namespace llvm diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp index 1a2667736926..b78d2531382d 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp @@ -17,11 +17,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/raw_ostream.h" #include #include #include +#include #include #include #include @@ -54,7 +54,7 @@ bool OProfileWrapper::initialize() { using namespace llvm; using namespace llvm::sys; - MutexGuard Guard(OProfileInitializationMutex); + std::lock_guard Guard(OProfileInitializationMutex); if (Initialized) return OpenAgentFunc != 0; diff --git a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index 99bf53bc3afa..75ddbc30445d 100644 --- a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -54,11 +54,12 @@ static ThreadSafeModule extractSubModule(ThreadSafeModule &TSM, llvm_unreachable("Unsupported global type"); }; - auto NewTSMod = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs); - auto &M = *NewTSMod.getModule(); - M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str()); + auto NewTSM = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs); + NewTSM.withModuleDo([&](Module &M) { + M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str()); + }); - return NewTSMod; + return NewTSM; } namespace llvm { @@ -117,39 +118,44 @@ void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) { this->Partition = std::move(Partition); } +void CompileOnDemandLayer::setImplMap(ImplSymbolMap *Imp) { + this->AliaseeImpls = Imp; +} void CompileOnDemandLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { - assert(TSM.getModule() && "Null module"); + assert(TSM && "Null module"); auto &ES = getExecutionSession(); - auto &M = *TSM.getModule(); - - // First, do some cleanup on the module: - cleanUpModule(M); - // Now sort the callables and non-callables, build re-exports and lodge the + // Sort the callables and non-callables, build re-exports and lodge the // actual module with the implementation dylib. auto &PDR = getPerDylibResources(R.getTargetJITDylib()); - MangleAndInterner Mangle(ES, M.getDataLayout()); SymbolAliasMap NonCallables; SymbolAliasMap Callables; - for (auto &GV : M.global_values()) { - if (GV.isDeclaration() || GV.hasLocalLinkage() || GV.hasAppendingLinkage()) - continue; - - auto Name = Mangle(GV.getName()); - auto Flags = JITSymbolFlags::fromGlobalValue(GV); - if (Flags.isCallable()) - Callables[Name] = SymbolAliasMapEntry(Name, Flags); - else - NonCallables[Name] = SymbolAliasMapEntry(Name, Flags); - } + TSM.withModuleDo([&](Module &M) { + // First, do some cleanup on the module: + cleanUpModule(M); + + MangleAndInterner Mangle(ES, M.getDataLayout()); + for (auto &GV : M.global_values()) { + if (GV.isDeclaration() || GV.hasLocalLinkage() || + GV.hasAppendingLinkage()) + continue; + + auto Name = Mangle(GV.getName()); + auto Flags = JITSymbolFlags::fromGlobalValue(GV); + if (Flags.isCallable()) + Callables[Name] = SymbolAliasMapEntry(Name, Flags); + else + NonCallables[Name] = SymbolAliasMapEntry(Name, Flags); + } + }); // Create a partitioning materialization unit and lodge it with the // implementation dylib. if (auto Err = PDR.getImplDylib().define( - llvm::make_unique( + std::make_unique( ES, std::move(TSM), R.getVModuleKey(), *this))) { ES.reportError(std::move(Err)); R.failMaterialization(); @@ -158,7 +164,7 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), true)); R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), - std::move(Callables))); + std::move(Callables), AliaseeImpls)); } CompileOnDemandLayer::PerDylibResources & @@ -239,14 +245,16 @@ void CompileOnDemandLayer::emitPartition( // memory manager instance to the linking layer. auto &ES = getExecutionSession(); - GlobalValueSet RequestedGVs; for (auto &Name : R.getRequestedSymbols()) { assert(Defs.count(Name) && "No definition for symbol"); RequestedGVs.insert(Defs[Name]); } - auto GVsToExtract = Partition(RequestedGVs); + /// Perform partitioning with the context lock held, since the partition + /// function is allowed to access the globals to compute the partition. + auto GVsToExtract = + TSM.withModuleDo([&](Module &M) { return Partition(RequestedGVs); }); // Take a 'None' partition to mean the whole module (as opposed to an empty // partition, which means "materialize nothing"). Emit the whole module @@ -259,43 +267,52 @@ void CompileOnDemandLayer::emitPartition( // If the partition is empty, return the whole module to the symbol table. if (GVsToExtract->empty()) { - R.replace(llvm::make_unique( + R.replace(std::make_unique( std::move(TSM), R.getSymbols(), std::move(Defs), *this)); return; } // Ok -- we actually need to partition the symbols. Promote the symbol - // linkages/names. - // FIXME: We apply this once per partitioning. It's safe, but overkill. - { - auto PromotedGlobals = PromoteSymbols(*TSM.getModule()); - if (!PromotedGlobals.empty()) { - MangleAndInterner Mangle(ES, TSM.getModule()->getDataLayout()); - SymbolFlagsMap SymbolFlags; - for (auto &GV : PromotedGlobals) - SymbolFlags[Mangle(GV->getName())] = - JITSymbolFlags::fromGlobalValue(*GV); - if (auto Err = R.defineMaterializing(SymbolFlags)) { - ES.reportError(std::move(Err)); - R.failMaterialization(); - return; - } - } + // linkages/names, expand the partition to include any required symbols + // (i.e. symbols that can't be separated from our partition), and + // then extract the partition. + // + // FIXME: We apply this promotion once per partitioning. It's safe, but + // overkill. + + auto ExtractedTSM = + TSM.withModuleDo([&](Module &M) -> Expected { + auto PromotedGlobals = PromoteSymbols(M); + if (!PromotedGlobals.empty()) { + MangleAndInterner Mangle(ES, M.getDataLayout()); + SymbolFlagsMap SymbolFlags; + for (auto &GV : PromotedGlobals) + SymbolFlags[Mangle(GV->getName())] = + JITSymbolFlags::fromGlobalValue(*GV); + if (auto Err = R.defineMaterializing(SymbolFlags)) + return std::move(Err); + } + + expandPartition(*GVsToExtract); + + // Extract the requested partiton (plus any necessary aliases) and + // put the rest back into the impl dylib. + auto ShouldExtract = [&](const GlobalValue &GV) -> bool { + return GVsToExtract->count(&GV); + }; + + return extractSubModule(TSM, ".submodule", ShouldExtract); + }); + + if (!ExtractedTSM) { + ES.reportError(ExtractedTSM.takeError()); + R.failMaterialization(); + return; } - expandPartition(*GVsToExtract); - - // Extract the requested partiton (plus any necessary aliases) and - // put the rest back into the impl dylib. - auto ShouldExtract = [&](const GlobalValue &GV) -> bool { - return GVsToExtract->count(&GV); - }; - - auto ExtractedTSM = extractSubModule(TSM, ".submodule", ShouldExtract); - R.replace(llvm::make_unique( + R.replace(std::make_unique( ES, std::move(TSM), R.getVModuleKey(), *this)); - - BaseLayer.emit(std::move(R), std::move(ExtractedTSM)); + BaseLayer.emit(std::move(R), std::move(*ExtractedTSM)); } } // end namespace orc diff --git a/lib/ExecutionEngine/Orc/CompileUtils.cpp b/lib/ExecutionEngine/Orc/CompileUtils.cpp index d46b6fcf9a5f..f8251627a4ef 100644 --- a/lib/ExecutionEngine/Orc/CompileUtils.cpp +++ b/lib/ExecutionEngine/Orc/CompileUtils.cpp @@ -42,7 +42,7 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) { PM.run(M); } - auto ObjBuffer = llvm::make_unique( + auto ObjBuffer = std::make_unique( std::move(ObjBufferSV), ""); diff --git a/lib/ExecutionEngine/Orc/Core.cpp b/lib/ExecutionEngine/Orc/Core.cpp index dac37e030e0c..5c7d888c2d6e 100644 --- a/lib/ExecutionEngine/Orc/Core.cpp +++ b/lib/ExecutionEngine/Orc/Core.cpp @@ -151,6 +151,8 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) { } raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) { + if (Flags.hasError()) + OS << "[*ERROR*]"; if (Flags.isCallable()) OS << "[Callable]"; else @@ -224,7 +226,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases) { for (auto &KV : Aliases) OS << " " << *KV.first << ": " << KV.second.Aliasee << " " << KV.second.AliasFlags; - OS << " }\n"; + OS << " }"; return OS; } @@ -238,15 +240,18 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S) { return OS << "Materializing"; case SymbolState::Resolved: return OS << "Resolved"; + case SymbolState::Emitted: + return OS << "Emitted"; case SymbolState::Ready: return OS << "Ready"; } llvm_unreachable("Invalid state"); } -FailedToMaterialize::FailedToMaterialize(SymbolNameSet Symbols) +FailedToMaterialize::FailedToMaterialize( + std::shared_ptr Symbols) : Symbols(std::move(Symbols)) { - assert(!this->Symbols.empty() && "Can not fail to resolve an empty set"); + assert(!this->Symbols->empty() && "Can not fail to resolve an empty set"); } std::error_code FailedToMaterialize::convertToErrorCode() const { @@ -254,7 +259,7 @@ std::error_code FailedToMaterialize::convertToErrorCode() const { } void FailedToMaterialize::log(raw_ostream &OS) const { - OS << "Failed to materialize symbols: " << Symbols; + OS << "Failed to materialize symbols: " << *Symbols; } SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) @@ -367,35 +372,35 @@ SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const { return JD.getRequestedSymbols(SymbolFlags); } -void MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) { +Error MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) { LLVM_DEBUG({ dbgs() << "In " << JD.getName() << " resolving " << Symbols << "\n"; }); #ifndef NDEBUG for (auto &KV : Symbols) { + auto WeakFlags = JITSymbolFlags::Weak | JITSymbolFlags::Common; auto I = SymbolFlags.find(KV.first); assert(I != SymbolFlags.end() && "Resolving symbol outside this responsibility set"); - if (I->second.isWeak()) - assert(I->second == (KV.second.getFlags() | JITSymbolFlags::Weak) && - "Resolving symbol with incorrect flags"); - else - assert(I->second == KV.second.getFlags() && - "Resolving symbol with incorrect flags"); + assert((KV.second.getFlags() & ~WeakFlags) == (I->second & ~WeakFlags) && + "Resolving symbol with incorrect flags"); } #endif - JD.resolve(Symbols); + return JD.resolve(Symbols); } -void MaterializationResponsibility::notifyEmitted() { +Error MaterializationResponsibility::notifyEmitted() { LLVM_DEBUG({ dbgs() << "In " << JD.getName() << " emitting " << SymbolFlags << "\n"; }); - JD.emit(SymbolFlags); + if (auto Err = JD.emit(SymbolFlags)) + return Err; + SymbolFlags.clear(); + return Error::success(); } Error MaterializationResponsibility::defineMaterializing( @@ -417,12 +422,13 @@ void MaterializationResponsibility::failMaterialization() { << SymbolFlags << "\n"; }); - SymbolNameSet FailedSymbols; - for (auto &KV : SymbolFlags) - FailedSymbols.insert(KV.first); + JITDylib::FailedSymbolsWorklist Worklist; - JD.notifyFailed(FailedSymbols); + for (auto &KV : SymbolFlags) + Worklist.push_back(std::make_pair(&JD, KV.first)); SymbolFlags.clear(); + + JD.notifyFailed(std::move(Worklist)); } void MaterializationResponsibility::replace( @@ -485,8 +491,9 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const { void AbsoluteSymbolsMaterializationUnit::materialize( MaterializationResponsibility R) { - R.notifyResolved(Symbols); - R.notifyEmitted(); + // No dependencies, so these calls can't fail. + cantFail(R.notifyResolved(Symbols)); + cantFail(R.notifyEmitted()); } void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD, @@ -625,6 +632,7 @@ void ReExportsMaterializationUnit::materialize( }; auto OnComplete = [QueryInfo](Expected Result) { + auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession(); if (Result) { SymbolMap ResolutionMap; for (auto &KV : QueryInfo->Aliases) { @@ -633,10 +641,17 @@ void ReExportsMaterializationUnit::materialize( ResolutionMap[KV.first] = JITEvaluatedSymbol( (*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags); } - QueryInfo->R.notifyResolved(ResolutionMap); - QueryInfo->R.notifyEmitted(); + if (auto Err = QueryInfo->R.notifyResolved(ResolutionMap)) { + ES.reportError(std::move(Err)); + QueryInfo->R.failMaterialization(); + return; + } + if (auto Err = QueryInfo->R.notifyEmitted()) { + ES.reportError(std::move(Err)); + QueryInfo->R.failMaterialization(); + return; + } } else { - auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession(); ES.reportError(Result.takeError()); QueryInfo->R.failMaterialization(); } @@ -694,7 +709,7 @@ ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD, Allow(std::move(Allow)) {} Expected -ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) { +ReexportsGenerator::tryToGenerate(JITDylib &JD, const SymbolNameSet &Names) { orc::SymbolNameSet Added; orc::SymbolAliasMap AliasMap; @@ -716,6 +731,19 @@ ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) { return Added; } +JITDylib::DefinitionGenerator::~DefinitionGenerator() {} + +void JITDylib::removeGenerator(DefinitionGenerator &G) { + ES.runSessionLocked([&]() { + auto I = std::find_if(DefGenerators.begin(), DefGenerators.end(), + [&](const std::unique_ptr &H) { + return H.get() == &G; + }); + assert(I != DefGenerators.end() && "Generator not found"); + DefGenerators.erase(I); + }); +} + Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) { return ES.runSessionLocked([&]() -> Error { std::vector AddedSyms; @@ -823,26 +851,52 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name, assert(Symbols[Name].isInMaterializationPhase() && "Can not add dependencies for a symbol that is not materializing"); + // If Name is already in an error state then just bail out. + if (Symbols[Name].getFlags().hasError()) + return; + auto &MI = MaterializingInfos[Name]; - assert(!MI.IsEmitted && "Can not add dependencies to an emitted symbol"); + assert(Symbols[Name].getState() != SymbolState::Emitted && + "Can not add dependencies to an emitted symbol"); + bool DependsOnSymbolInErrorState = false; + + // Register dependencies, record whether any depenendency is in the error + // state. for (auto &KV : Dependencies) { assert(KV.first && "Null JITDylib in dependency?"); auto &OtherJITDylib = *KV.first; auto &DepsOnOtherJITDylib = MI.UnemittedDependencies[&OtherJITDylib]; for (auto &OtherSymbol : KV.second) { + + // Check the sym entry for the dependency. + auto OtherSymI = OtherJITDylib.Symbols.find(OtherSymbol); + #ifndef NDEBUG - // Assert that this symbol exists and has not been emitted already. - auto SymI = OtherJITDylib.Symbols.find(OtherSymbol); - assert(SymI != OtherJITDylib.Symbols.end() && - (SymI->second.getState() != SymbolState::Ready && - "Dependency on emitted symbol")); + // Assert that this symbol exists and has not reached the ready state + // already. + assert(OtherSymI != OtherJITDylib.Symbols.end() && + (OtherSymI->second.getState() != SymbolState::Ready && + "Dependency on emitted/ready symbol")); #endif + auto &OtherSymEntry = OtherSymI->second; + + // If the dependency is in an error state then note this and continue, + // we will move this symbol to the error state below. + if (OtherSymEntry.getFlags().hasError()) { + DependsOnSymbolInErrorState = true; + continue; + } + + // If the dependency was not in the error state then add it to + // our list of dependencies. + assert(OtherJITDylib.MaterializingInfos.count(OtherSymbol) && + "No MaterializingInfo for dependency"); auto &OtherMI = OtherJITDylib.MaterializingInfos[OtherSymbol]; - if (OtherMI.IsEmitted) + if (OtherSymEntry.getState() == SymbolState::Emitted) transferEmittedNodeDependencies(MI, Name, OtherMI); else if (&OtherJITDylib != this || OtherSymbol != Name) { OtherMI.Dependants[this].insert(Name); @@ -853,63 +907,142 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name, if (DepsOnOtherJITDylib.empty()) MI.UnemittedDependencies.erase(&OtherJITDylib); } + + // If this symbol dependended on any symbols in the error state then move + // this symbol to the error state too. + if (DependsOnSymbolInErrorState) + Symbols[Name].setFlags(Symbols[Name].getFlags() | JITSymbolFlags::HasError); } -void JITDylib::resolve(const SymbolMap &Resolved) { - auto CompletedQueries = ES.runSessionLocked([&, this]() { - AsynchronousSymbolQuerySet CompletedQueries; +Error JITDylib::resolve(const SymbolMap &Resolved) { + SymbolNameSet SymbolsInErrorState; + AsynchronousSymbolQuerySet CompletedQueries; + + ES.runSessionLocked([&, this]() { + struct WorklistEntry { + SymbolTable::iterator SymI; + JITEvaluatedSymbol ResolvedSym; + }; + + std::vector Worklist; + Worklist.reserve(Resolved.size()); + + // Build worklist and check for any symbols in the error state. for (const auto &KV : Resolved) { - auto &Name = KV.first; - auto Sym = KV.second; - auto I = Symbols.find(Name); + assert(!KV.second.getFlags().hasError() && + "Resolution result can not have error flag set"); - assert(I != Symbols.end() && "Symbol not found"); - assert(!I->second.hasMaterializerAttached() && + auto SymI = Symbols.find(KV.first); + + assert(SymI != Symbols.end() && "Symbol not found"); + assert(!SymI->second.hasMaterializerAttached() && "Resolving symbol with materializer attached?"); - assert(I->second.getState() == SymbolState::Materializing && + assert(SymI->second.getState() == SymbolState::Materializing && "Symbol should be materializing"); - assert(I->second.getAddress() == 0 && "Symbol has already been resolved"); + assert(SymI->second.getAddress() == 0 && + "Symbol has already been resolved"); + + if (SymI->second.getFlags().hasError()) + SymbolsInErrorState.insert(KV.first); + else { + auto Flags = KV.second.getFlags(); + Flags &= ~(JITSymbolFlags::Weak | JITSymbolFlags::Common); + assert(Flags == (SymI->second.getFlags() & + ~(JITSymbolFlags::Weak | JITSymbolFlags::Common)) && + "Resolved flags should match the declared flags"); + + Worklist.push_back( + {SymI, JITEvaluatedSymbol(KV.second.getAddress(), Flags)}); + } + } + + // If any symbols were in the error state then bail out. + if (!SymbolsInErrorState.empty()) + return; + + while (!Worklist.empty()) { + auto SymI = Worklist.back().SymI; + auto ResolvedSym = Worklist.back().ResolvedSym; + Worklist.pop_back(); - assert((Sym.getFlags() & ~JITSymbolFlags::Weak) == - (I->second.getFlags() & ~JITSymbolFlags::Weak) && - "Resolved flags should match the declared flags"); + auto &Name = SymI->first; - // Once resolved, symbols can never be weak. - JITSymbolFlags ResolvedFlags = Sym.getFlags(); - ResolvedFlags &= ~JITSymbolFlags::Weak; - I->second.setAddress(Sym.getAddress()); - I->second.setFlags(ResolvedFlags); - I->second.setState(SymbolState::Resolved); + // Resolved symbols can not be weak: discard the weak flag. + JITSymbolFlags ResolvedFlags = ResolvedSym.getFlags(); + SymI->second.setAddress(ResolvedSym.getAddress()); + SymI->second.setFlags(ResolvedFlags); + SymI->second.setState(SymbolState::Resolved); auto &MI = MaterializingInfos[Name]; for (auto &Q : MI.takeQueriesMeeting(SymbolState::Resolved)) { - Q->notifySymbolMetRequiredState(Name, Sym); + Q->notifySymbolMetRequiredState(Name, ResolvedSym); + Q->removeQueryDependence(*this, Name); if (Q->isComplete()) CompletedQueries.insert(std::move(Q)); } } - - return CompletedQueries; }); + assert((SymbolsInErrorState.empty() || CompletedQueries.empty()) && + "Can't fail symbols and completed queries at the same time"); + + // If we failed any symbols then return an error. + if (!SymbolsInErrorState.empty()) { + auto FailedSymbolsDepMap = std::make_shared(); + (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState); + return make_error(std::move(FailedSymbolsDepMap)); + } + + // Otherwise notify all the completed queries. for (auto &Q : CompletedQueries) { assert(Q->isComplete() && "Q not completed"); Q->handleComplete(); } + + return Error::success(); } -void JITDylib::emit(const SymbolFlagsMap &Emitted) { - auto CompletedQueries = ES.runSessionLocked([&, this]() { - AsynchronousSymbolQuerySet CompletedQueries; +Error JITDylib::emit(const SymbolFlagsMap &Emitted) { + AsynchronousSymbolQuerySet CompletedQueries; + SymbolNameSet SymbolsInErrorState; + ES.runSessionLocked([&, this]() { + std::vector Worklist; + + // Scan to build worklist, record any symbols in the erorr state. for (const auto &KV : Emitted) { - const auto &Name = KV.first; + auto &Name = KV.first; + + auto SymI = Symbols.find(Name); + assert(SymI != Symbols.end() && "No symbol table entry for Name"); + + if (SymI->second.getFlags().hasError()) + SymbolsInErrorState.insert(Name); + else + Worklist.push_back(SymI); + } + + // If any symbols were in the error state then bail out. + if (!SymbolsInErrorState.empty()) + return; + + // Otherwise update dependencies and move to the emitted state. + while (!Worklist.empty()) { + auto SymI = Worklist.back(); + Worklist.pop_back(); + + auto &Name = SymI->first; + auto &SymEntry = SymI->second; + + // Move symbol to the emitted state. + assert(SymEntry.getState() == SymbolState::Resolved && + "Emitting from state other than Resolved"); + SymEntry.setState(SymbolState::Emitted); auto MII = MaterializingInfos.find(Name); assert(MII != MaterializingInfos.end() && "Missing MaterializingInfo entry"); - auto &MI = MII->second; // For each dependant, transfer this node's emitted dependencies to @@ -926,8 +1059,12 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) { auto &DependantMI = DependantMII->second; // Remove the dependant's dependency on this node. + assert(DependantMI.UnemittedDependencies.count(this) && + "Dependant does not have an unemitted dependencies record for " + "this JITDylib"); assert(DependantMI.UnemittedDependencies[this].count(Name) && "Dependant does not count this symbol as a dependency?"); + DependantMI.UnemittedDependencies[this].erase(Name); if (DependantMI.UnemittedDependencies[this].empty()) DependantMI.UnemittedDependencies.erase(this); @@ -936,20 +1073,22 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) { DependantJD.transferEmittedNodeDependencies(DependantMI, DependantName, MI); + auto DependantSymI = DependantJD.Symbols.find(DependantName); + assert(DependantSymI != DependantJD.Symbols.end() && + "Dependant has no entry in the Symbols table"); + auto &DependantSymEntry = DependantSymI->second; + // If the dependant is emitted and this node was the last of its // unemitted dependencies then the dependant node is now ready, so // notify any pending queries on the dependant node. - if (DependantMI.IsEmitted && + if (DependantSymEntry.getState() == SymbolState::Emitted && DependantMI.UnemittedDependencies.empty()) { assert(DependantMI.Dependants.empty() && "Dependants should be empty by now"); // Since this dependant is now ready, we erase its MaterializingInfo // and update its materializing state. - auto DependantSymI = DependantJD.Symbols.find(DependantName); - assert(DependantSymI != DependantJD.Symbols.end() && - "Dependant has no entry in the Symbols table"); - DependantSymI->second.setState(SymbolState::Ready); + DependantSymEntry.setState(SymbolState::Ready); for (auto &Q : DependantMI.takeQueriesMeeting(SymbolState::Ready)) { Q->notifySymbolMetRequiredState( @@ -963,12 +1102,9 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) { } } } - MI.Dependants.clear(); - MI.IsEmitted = true; + MI.Dependants.clear(); if (MI.UnemittedDependencies.empty()) { - auto SymI = Symbols.find(Name); - assert(SymI != Symbols.end() && "Symbol has no entry in Symbols table"); SymI->second.setState(SymbolState::Ready); for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) { Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); @@ -979,80 +1115,138 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) { MaterializingInfos.erase(MII); } } - - return CompletedQueries; }); + assert((SymbolsInErrorState.empty() || CompletedQueries.empty()) && + "Can't fail symbols and completed queries at the same time"); + + // If we failed any symbols then return an error. + if (!SymbolsInErrorState.empty()) { + auto FailedSymbolsDepMap = std::make_shared(); + (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState); + return make_error(std::move(FailedSymbolsDepMap)); + } + + // Otherwise notify all the completed queries. for (auto &Q : CompletedQueries) { assert(Q->isComplete() && "Q is not complete"); Q->handleComplete(); } + + return Error::success(); } -void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) { +void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) { + AsynchronousSymbolQuerySet FailedQueries; + auto FailedSymbolsMap = std::make_shared(); - // FIXME: This should fail any transitively dependant symbols too. + // Failing no symbols is a no-op. + if (Worklist.empty()) + return; - auto FailedQueriesToNotify = ES.runSessionLocked([&, this]() { - AsynchronousSymbolQuerySet FailedQueries; - std::vector MIIsToRemove; + auto &ES = Worklist.front().first->getExecutionSession(); - for (auto &Name : FailedSymbols) { - auto I = Symbols.find(Name); - assert(I != Symbols.end() && "Symbol not present in this JITDylib"); - Symbols.erase(I); + ES.runSessionLocked([&]() { + while (!Worklist.empty()) { + assert(Worklist.back().first && "Failed JITDylib can not be null"); + auto &JD = *Worklist.back().first; + auto Name = std::move(Worklist.back().second); + Worklist.pop_back(); - auto MII = MaterializingInfos.find(Name); + (*FailedSymbolsMap)[&JD].insert(Name); + + assert(JD.Symbols.count(Name) && "No symbol table entry for Name"); + auto &Sym = JD.Symbols[Name]; - // If we have not created a MaterializingInfo for this symbol yet then - // there is nobody to notify. - if (MII == MaterializingInfos.end()) + // Move the symbol into the error state. + // Note that this may be redundant: The symbol might already have been + // moved to this state in response to the failure of a dependence. + Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError); + + // FIXME: Come up with a sane mapping of state to + // presence-of-MaterializingInfo so that we can assert presence / absence + // here, rather than testing it. + auto MII = JD.MaterializingInfos.find(Name); + + if (MII == JD.MaterializingInfos.end()) continue; - // Remove this symbol from the dependants list of any dependencies. - for (auto &KV : MII->second.UnemittedDependencies) { - auto *DependencyJD = KV.first; - auto &Dependencies = KV.second; - for (auto &DependencyName : Dependencies) { - auto DependencyMII = - DependencyJD->MaterializingInfos.find(DependencyName); - assert(DependencyMII != DependencyJD->MaterializingInfos.end() && - "Unemitted dependency must have a MaterializingInfo entry"); - assert(DependencyMII->second.Dependants.count(this) && - "Dependency's dependants list does not contain this JITDylib"); - assert(DependencyMII->second.Dependants[this].count(Name) && - "Dependency's dependants list does not contain dependant"); - DependencyMII->second.Dependants[this].erase(Name); + auto &MI = MII->second; + + // Move all dependants to the error state and disconnect from them. + for (auto &KV : MI.Dependants) { + auto &DependantJD = *KV.first; + for (auto &DependantName : KV.second) { + assert(DependantJD.Symbols.count(DependantName) && + "No symbol table entry for DependantName"); + auto &DependantSym = DependantJD.Symbols[DependantName]; + DependantSym.setFlags(DependantSym.getFlags() | + JITSymbolFlags::HasError); + + assert(DependantJD.MaterializingInfos.count(DependantName) && + "No MaterializingInfo for dependant"); + auto &DependantMI = DependantJD.MaterializingInfos[DependantName]; + + auto UnemittedDepI = DependantMI.UnemittedDependencies.find(&JD); + assert(UnemittedDepI != DependantMI.UnemittedDependencies.end() && + "No UnemittedDependencies entry for this JITDylib"); + assert(UnemittedDepI->second.count(Name) && + "No UnemittedDependencies entry for this symbol"); + UnemittedDepI->second.erase(Name); + if (UnemittedDepI->second.empty()) + DependantMI.UnemittedDependencies.erase(UnemittedDepI); + + // If this symbol is already in the emitted state then we need to + // take responsibility for failing its queries, so add it to the + // worklist. + if (DependantSym.getState() == SymbolState::Emitted) { + assert(DependantMI.Dependants.empty() && + "Emitted symbol should not have dependants"); + Worklist.push_back(std::make_pair(&DependantJD, DependantName)); + } } } + MI.Dependants.clear(); - // Copy all the queries to the FailedQueries list, then abandon them. - // This has to be a copy, and the copy has to come before the abandon - // operation: Each Q.detach() call will reach back into this - // PendingQueries list to remove Q. - for (auto &Q : MII->second.pendingQueries()) - FailedQueries.insert(Q); - - MIIsToRemove.push_back(std::move(MII)); - } - - // Detach failed queries. - for (auto &Q : FailedQueries) - Q->detach(); + // Disconnect from all unemitted depenencies. + for (auto &KV : MI.UnemittedDependencies) { + auto &UnemittedDepJD = *KV.first; + for (auto &UnemittedDepName : KV.second) { + auto UnemittedDepMII = + UnemittedDepJD.MaterializingInfos.find(UnemittedDepName); + assert(UnemittedDepMII != UnemittedDepJD.MaterializingInfos.end() && + "Missing MII for unemitted dependency"); + assert(UnemittedDepMII->second.Dependants.count(&JD) && + "JD not listed as a dependant of unemitted dependency"); + assert(UnemittedDepMII->second.Dependants[&JD].count(Name) && + "Name is not listed as a dependant of unemitted dependency"); + UnemittedDepMII->second.Dependants[&JD].erase(Name); + if (UnemittedDepMII->second.Dependants[&JD].empty()) + UnemittedDepMII->second.Dependants.erase(&JD); + } + } + MI.UnemittedDependencies.clear(); - // Remove the MaterializingInfos. - for (auto &MII : MIIsToRemove) { - assert(!MII->second.hasQueriesPending() && - "Queries remain after symbol was failed"); + // Collect queries to be failed for this MII. + for (auto &Q : MII->second.pendingQueries()) { + // Add the query to the list to be failed and detach it. + FailedQueries.insert(Q); + Q->detach(); + } - MaterializingInfos.erase(MII); + assert(MI.Dependants.empty() && + "Can not delete MaterializingInfo with dependants still attached"); + assert(MI.UnemittedDependencies.empty() && + "Can not delete MaterializingInfo with unemitted dependencies " + "still attached"); + assert(!MI.hasQueriesPending() && + "Can not delete MaterializingInfo with queries pending"); + JD.MaterializingInfos.erase(MII); } - - return FailedQueries; }); - for (auto &Q : FailedQueriesToNotify) - Q->handleFailed(make_error(FailedSymbols)); + for (auto &Q : FailedQueries) + Q->handleFailed(make_error(FailedSymbolsMap)); } void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder, @@ -1159,10 +1353,18 @@ Expected JITDylib::lookupFlags(const SymbolNameSet &Names) { if (!Unresolved) return Unresolved.takeError(); - if (DefGenerator && !Unresolved->empty()) { - auto NewDefs = DefGenerator(*this, *Unresolved); + /// Run any definition generators. + for (auto &DG : DefGenerators) { + + // Bail out early if we've resolved everything. + if (Unresolved->empty()) + break; + + // Run this generator. + auto NewDefs = DG->tryToGenerate(*this, *Unresolved); if (!NewDefs) return NewDefs.takeError(); + if (!NewDefs->empty()) { auto Unresolved2 = lookupFlagsImpl(Result, *NewDefs); if (!Unresolved2) @@ -1171,7 +1373,10 @@ Expected JITDylib::lookupFlags(const SymbolNameSet &Names) { assert(Unresolved2->empty() && "All fallback defs should have been found by lookupFlagsImpl"); } - }; + + for (auto &Name : *NewDefs) + Unresolved->erase(Name); + } return Result; }); } @@ -1197,15 +1402,34 @@ Error JITDylib::lodgeQuery(std::shared_ptr &Q, MaterializationUnitList &MUs) { assert(Q && "Query can not be null"); - lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs); - if (DefGenerator && !Unresolved.empty()) { - auto NewDefs = DefGenerator(*this, Unresolved); + if (auto Err = lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs)) + return Err; + + // Run any definition generators. + for (auto &DG : DefGenerators) { + + // Bail out early if we have resolved everything. + if (Unresolved.empty()) + break; + + // Run the generator. + auto NewDefs = DG->tryToGenerate(*this, Unresolved); + + // If the generator returns an error then bail out. if (!NewDefs) return NewDefs.takeError(); + + // If the generator was able to generate new definitions for any of the + // unresolved symbols then lodge the query against them. if (!NewDefs->empty()) { for (auto &D : *NewDefs) Unresolved.erase(D); - lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs); + + // Lodge query. This can not fail as any new definitions were added + // by the generator under the session locked. Since they can't have + // started materializing yet the can not have failed. + cantFail(lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs)); + assert(NewDefs->empty() && "All fallback defs should have been found by lookupImpl"); } @@ -1214,7 +1438,7 @@ Error JITDylib::lodgeQuery(std::shared_ptr &Q, return Error::success(); } -void JITDylib::lodgeQueryImpl( +Error JITDylib::lodgeQueryImpl( std::shared_ptr &Q, SymbolNameSet &Unresolved, bool MatchNonExported, std::vector> &MUs) { @@ -1235,6 +1459,14 @@ void JITDylib::lodgeQueryImpl( // Unresolved set. ToRemove.push_back(Name); + // If we matched against this symbol but it is in the error state then + // bail out and treat it as a failure to materialize. + if (SymI->second.getFlags().hasError()) { + auto FailedSymbolsMap = std::make_shared(); + (*FailedSymbolsMap)[this] = {Name}; + return make_error(std::move(FailedSymbolsMap)); + } + // If this symbol already meets the required state for then notify the // query and continue. if (SymI->second.getState() >= Q->getRequiredState()) { @@ -1277,6 +1509,8 @@ void JITDylib::lodgeQueryImpl( // Remove any symbols that we found. for (auto &Name : ToRemove) Unresolved.erase(Name); + + return Error::success(); } Expected @@ -1292,9 +1526,16 @@ JITDylib::legacyLookup(std::shared_ptr Q, SymbolNameSet Unresolved = std::move(Names); auto Err = ES.runSessionLocked([&, this]() -> Error { QueryComplete = lookupImpl(Q, MUs, Unresolved); - if (DefGenerator && !Unresolved.empty()) { + + // Run any definition generators. + for (auto &DG : DefGenerators) { + + // Bail out early if we have resolved everything. + if (Unresolved.empty()) + break; + assert(!QueryComplete && "query complete but unresolved symbols remain?"); - auto NewDefs = DefGenerator(*this, Unresolved); + auto NewDefs = DG->tryToGenerate(*this, Unresolved); if (!NewDefs) return NewDefs.takeError(); if (!NewDefs->empty()) { @@ -1432,8 +1673,6 @@ void JITDylib::dump(raw_ostream &OS) { OS << " MaterializingInfos entries:\n"; for (auto &KV : MaterializingInfos) { OS << " \"" << *KV.first << "\":\n" - << " IsEmitted = " << (KV.second.IsEmitted ? "true" : "false") - << "\n" << " " << KV.second.pendingQueries().size() << " pending queries: { "; for (const auto &Q : KV.second.pendingQueries()) @@ -1486,13 +1725,6 @@ JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) { return Result; } -JITDylib::AsynchronousSymbolQueryList -JITDylib::MaterializingInfo::takeAllQueries() { - AsynchronousSymbolQueryList Result; - std::swap(Result, PendingQueries); - return Result; -} - JITDylib::JITDylib(ExecutionSession &ES, std::string Name) : ES(ES), JITDylibName(std::move(Name)) { SearchOrder.push_back({this, true}); diff --git a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index f7fc5f8f1797..4a886ac0597c 100644 --- a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -8,6 +8,7 @@ #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/Layer.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -67,7 +68,7 @@ CtorDtorIterator::Element CtorDtorIterator::operator*() const { } } - ConstantInt *Priority = dyn_cast(CS->getOperand(0)); + auto *Priority = cast(CS->getOperand(0)); Value *Data = CS->getNumOperands() == 3 ? CS->getOperand(2) : nullptr; if (Data && !isa(Data)) Data = nullptr; @@ -87,7 +88,7 @@ iterator_range getDestructors(const Module &M) { } void CtorDtorRunner::add(iterator_range CtorDtors) { - if (empty(CtorDtors)) + if (CtorDtors.empty()) return; MangleAndInterner Mangle( @@ -178,20 +179,20 @@ DynamicLibrarySearchGenerator::DynamicLibrarySearchGenerator( : Dylib(std::move(Dylib)), Allow(std::move(Allow)), GlobalPrefix(GlobalPrefix) {} -Expected +Expected> DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix, SymbolPredicate Allow) { std::string ErrMsg; auto Lib = sys::DynamicLibrary::getPermanentLibrary(FileName, &ErrMsg); if (!Lib.isValid()) return make_error(std::move(ErrMsg), inconvertibleErrorCode()); - return DynamicLibrarySearchGenerator(std::move(Lib), GlobalPrefix, - std::move(Allow)); + return std::make_unique( + std::move(Lib), GlobalPrefix, std::move(Allow)); } Expected -DynamicLibrarySearchGenerator::operator()(JITDylib &JD, - const SymbolNameSet &Names) { +DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) { orc::SymbolNameSet Added; orc::SymbolMap NewSymbols; @@ -226,5 +227,82 @@ DynamicLibrarySearchGenerator::operator()(JITDylib &JD, return Added; } +Expected> +StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName) { + auto ArchiveBuffer = errorOrToExpected(MemoryBuffer::getFile(FileName)); + + if (!ArchiveBuffer) + return ArchiveBuffer.takeError(); + + return Create(L, std::move(*ArchiveBuffer)); +} + +Expected> +StaticLibraryDefinitionGenerator::Create( + ObjectLayer &L, std::unique_ptr ArchiveBuffer) { + Error Err = Error::success(); + + std::unique_ptr ADG( + new StaticLibraryDefinitionGenerator(L, std::move(ArchiveBuffer), Err)); + + if (Err) + return std::move(Err); + + return std::move(ADG); +} + +Expected +StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) { + + DenseSet> ChildBufferInfos; + SymbolNameSet NewDefs; + + for (const auto &Name : Names) { + auto Child = Archive.findSym(*Name); + if (!Child) + return Child.takeError(); + if (*Child == None) + continue; + auto ChildBuffer = (*Child)->getMemoryBufferRef(); + if (!ChildBuffer) + return ChildBuffer.takeError(); + ChildBufferInfos.insert( + {ChildBuffer->getBuffer(), ChildBuffer->getBufferIdentifier()}); + NewDefs.insert(Name); + } + + for (auto ChildBufferInfo : ChildBufferInfos) { + MemoryBufferRef ChildBufferRef(ChildBufferInfo.first, + ChildBufferInfo.second); + + if (auto Err = + L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef), VModuleKey())) + return std::move(Err); + + --UnrealizedObjects; + } + + return NewDefs; +} + +StaticLibraryDefinitionGenerator::StaticLibraryDefinitionGenerator( + ObjectLayer &L, std::unique_ptr ArchiveBuffer, Error &Err) + : L(L), ArchiveBuffer(std::move(ArchiveBuffer)), + Archive(*this->ArchiveBuffer, Err) { + + if (Err) + return; + + Error Err2 = Error::success(); + for (auto _ : Archive.children(Err2)) { + (void)_; + ++UnrealizedObjects; + } + + // No need to check this: We will leave it to the caller. + Err = std::move(Err2); +} + } // End namespace orc. } // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp index 81dfc02f55b2..d311f34179c7 100644 --- a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp +++ b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp @@ -22,9 +22,9 @@ void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) { void IRCompileLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { - assert(TSM.getModule() && "Module must not be null"); + assert(TSM && "Module must not be null"); - if (auto Obj = Compile(*TSM.getModule())) { + if (auto Obj = TSM.withModuleDo(Compile)) { { std::lock_guard Lock(IRLayerMutex); if (NotifyCompiled) diff --git a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp index e3519284613e..845ecc71eb87 100644 --- a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp +++ b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp @@ -19,7 +19,7 @@ IRTransformLayer::IRTransformLayer(ExecutionSession &ES, void IRTransformLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { - assert(TSM.getModule() && "Module must not be null"); + assert(TSM && "Module must not be null"); if (auto TransformedTSM = Transform(std::move(TSM), R)) BaseLayer.emit(std::move(R), std::move(*TransformedTSM)); diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index cc3656fe5dc5..0295db7633dd 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -37,8 +37,9 @@ private: void materialize(MaterializationResponsibility R) override { SymbolMap Result; Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported); - R.notifyResolved(Result); - R.notifyEmitted(); + // No dependencies, so these calls cannot fail. + cantFail(R.notifyResolved(Result)); + cantFail(R.notifyEmitted()); } void discard(const JITDylib &JD, const SymbolStringPtr &Name) override { @@ -66,7 +67,7 @@ JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) { std::lock_guard Lock(CCMgrMutex); AddrToSymbol[*TrampolineAddr] = CallbackName; cantFail(CallbacksJD.define( - llvm::make_unique( + std::make_unique( std::move(CallbackName), std::move(Compile), ES.allocateVModule()))); return *TrampolineAddr; @@ -119,7 +120,8 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, return make_error( std::string("No callback manager available for ") + T.str(), inconvertibleErrorCode()); - case Triple::aarch64: { + case Triple::aarch64: + case Triple::aarch64_32: { typedef orc::LocalJITCompileCallbackManager CCMgrT; return CCMgrT::Create(ES, ErrorHandlerAddress); } @@ -162,50 +164,51 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) { switch (T.getArch()) { default: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::aarch64: + case Triple::aarch64_32: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::x86: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::mips: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::mipsel: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::mips64: case Triple::mips64el: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) { return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; } else { return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; } diff --git a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp index df23547a9de3..1d3e6db913e2 100644 --- a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp +++ b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp @@ -8,6 +8,7 @@ #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" namespace llvm { @@ -22,7 +23,21 @@ JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT) Expected JITTargetMachineBuilder::detectHost() { // FIXME: getProcessTriple is bogus. It returns the host LLVM was compiled on, // rather than a valid triple for the current process. - return JITTargetMachineBuilder(Triple(sys::getProcessTriple())); + JITTargetMachineBuilder TMBuilder((Triple(sys::getProcessTriple()))); + + // Retrieve host CPU name and sub-target features and add them to builder. + // Relocation model, code model and codegen opt level are kept to default + // values. + llvm::SubtargetFeatures SubtargetFeatures; + llvm::StringMap FeatureMap; + llvm::sys::getHostCPUFeatures(FeatureMap); + for (auto &Feature : FeatureMap) + SubtargetFeatures.AddFeature(Feature.first(), Feature.second); + + TMBuilder.setCPU(llvm::sys::getHostCPUName()); + TMBuilder.addFeatures(SubtargetFeatures.getFeatures()); + + return TMBuilder; } Expected> diff --git a/lib/ExecutionEngine/Orc/LLJIT.cpp b/lib/ExecutionEngine/Orc/LLJIT.cpp index b120691faf07..a80f78afe80f 100644 --- a/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -41,7 +41,8 @@ Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) { Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) { assert(TSM && "Can not add null module"); - if (auto Err = applyDataLayout(*TSM.getModule())) + if (auto Err = + TSM.withModuleDo([&](Module &M) { return applyDataLayout(M); })) return Err; return CompileLayer->add(JD, std::move(TSM), ES->allocateVModule()); @@ -63,12 +64,21 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) { // If the config state provided an ObjectLinkingLayer factory then use it. if (S.CreateObjectLinkingLayer) - return S.CreateObjectLinkingLayer(ES); + return S.CreateObjectLinkingLayer(ES, S.JTMB->getTargetTriple()); // Otherwise default to creating an RTDyldObjectLinkingLayer that constructs // a new SectionMemoryManager for each object. - auto GetMemMgr = []() { return llvm::make_unique(); }; - return llvm::make_unique(ES, std::move(GetMemMgr)); + auto GetMemMgr = []() { return std::make_unique(); }; + auto ObjLinkingLayer = + std::make_unique(ES, std::move(GetMemMgr)); + + if (S.JTMB->getTargetTriple().isOSBinFormatCOFF()) + ObjLinkingLayer->setOverrideObjectFlagsWithResponsibilityFlags(true); + + // FIXME: Explicit conversion to std::unique_ptr added to silence + // errors from some GCC / libstdc++ bots. Remove this conversion (i.e. + // just return ObjLinkingLayer) once those bots are upgraded. + return std::unique_ptr(std::move(ObjLinkingLayer)); } Expected @@ -92,7 +102,7 @@ LLJIT::createCompileFunction(LLJITBuilderState &S, } LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) - : ES(S.ES ? std::move(S.ES) : llvm::make_unique()), + : ES(S.ES ? std::move(S.ES) : std::make_unique()), Main(this->ES->getMainJITDylib()), DL(""), CtorRunner(Main), DtorRunner(Main) { @@ -113,13 +123,13 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) Err = CompileFunction.takeError(); return; } - CompileLayer = llvm::make_unique( + CompileLayer = std::make_unique( *ES, *ObjLinkingLayer, std::move(*CompileFunction)); } if (S.NumCompileThreads > 0) { CompileLayer->setCloneToNewContextOnEmit(true); - CompileThreads = llvm::make_unique(S.NumCompileThreads); + CompileThreads = std::make_unique(S.NumCompileThreads); ES->setDispatchMaterialization( [this](JITDylib &JD, std::unique_ptr MU) { // FIXME: Switch to move capture once we have c++14. @@ -166,10 +176,14 @@ Error LLLazyJITBuilderState::prepareForConstruction() { Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) { assert(TSM && "Can not add null module"); - if (auto Err = applyDataLayout(*TSM.getModule())) - return Err; + if (auto Err = TSM.withModuleDo([&](Module &M) -> Error { + if (auto Err = applyDataLayout(M)) + return Err; - recordCtorDtors(*TSM.getModule()); + recordCtorDtors(M); + return Error::success(); + })) + return Err; return CODLayer->add(JD, std::move(TSM), ES->allocateVModule()); } @@ -212,10 +226,10 @@ LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) { } // Create the transform layer. - TransformLayer = llvm::make_unique(*ES, *CompileLayer); + TransformLayer = std::make_unique(*ES, *CompileLayer); // Create the COD layer. - CODLayer = llvm::make_unique( + CODLayer = std::make_unique( *ES, *TransformLayer, *LCTMgr, std::move(ISMBuilder)); if (S.NumCompileThreads > 0) diff --git a/lib/ExecutionEngine/Orc/Layer.cpp b/lib/ExecutionEngine/Orc/Layer.cpp index 3ed2dabf4545..580e2682ec8c 100644 --- a/lib/ExecutionEngine/Orc/Layer.cpp +++ b/lib/ExecutionEngine/Orc/Layer.cpp @@ -19,7 +19,7 @@ IRLayer::IRLayer(ExecutionSession &ES) : ES(ES) {} IRLayer::~IRLayer() {} Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) { - return JD.define(llvm::make_unique( + return JD.define(std::make_unique( *this, std::move(K), std::move(TSM))); } @@ -29,15 +29,17 @@ IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES, assert(this->TSM && "Module must not be null"); - MangleAndInterner Mangle(ES, this->TSM.getModule()->getDataLayout()); - for (auto &G : this->TSM.getModule()->global_values()) { - if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() && - !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) { - auto MangledName = Mangle(G.getName()); - SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G); - SymbolToDefinition[MangledName] = &G; + MangleAndInterner Mangle(ES, this->TSM.getModuleUnlocked()->getDataLayout()); + this->TSM.withModuleDo([&](Module &M) { + for (auto &G : M.global_values()) { + if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() && + !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) { + auto MangledName = Mangle(G.getName()); + SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G); + SymbolToDefinition[MangledName] = &G; + } } - } + }); } IRMaterializationUnit::IRMaterializationUnit( @@ -47,8 +49,9 @@ IRMaterializationUnit::IRMaterializationUnit( TSM(std::move(TSM)), SymbolToDefinition(std::move(SymbolToDefinition)) {} StringRef IRMaterializationUnit::getName() const { - if (TSM.getModule()) - return TSM.getModule()->getModuleIdentifier(); + if (TSM) + return TSM.withModuleDo( + [](const Module &M) -> StringRef { return M.getModuleIdentifier(); }); return ""; } @@ -90,7 +93,6 @@ void BasicIRLayerMaterializationUnit::materialize( auto &N = R.getTargetJITDylib().getName(); #endif // NDEBUG - auto Lock = TSM.getContextLock(); LLVM_DEBUG(ES.runSessionLocked( [&]() { dbgs() << "Emitting, for " << N << ", " << *this << "\n"; });); L.emit(std::move(R), std::move(TSM)); diff --git a/lib/ExecutionEngine/Orc/LazyReexports.cpp b/lib/ExecutionEngine/Orc/LazyReexports.cpp index fc8205845654..93aabd817d60 100644 --- a/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -50,7 +50,6 @@ LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) { SourceJD = I->second.first; SymbolName = I->second.second; } - auto LookupResult = ES.lookup(JITDylibSearchList({{SourceJD, true}}), SymbolName); @@ -91,6 +90,7 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, inconvertibleErrorCode()); case Triple::aarch64: + case Triple::aarch64_32: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); @@ -121,7 +121,8 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit( LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager, - JITDylib &SourceJD, SymbolAliasMap CallableAliases, VModuleKey K) + JITDylib &SourceJD, SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc, + VModuleKey K) : MaterializationUnit(extractFlags(CallableAliases), std::move(K)), LCTManager(LCTManager), ISManager(ISManager), SourceJD(SourceJD), CallableAliases(std::move(CallableAliases)), @@ -129,7 +130,8 @@ LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit( [&ISManager](JITDylib &JD, const SymbolStringPtr &SymbolName, JITTargetAddress ResolvedAddr) { return ISManager.updatePointer(*SymbolName, ResolvedAddr); - })) {} + })), + AliaseeTable(SrcJDLoc) {} StringRef LazyReexportsMaterializationUnit::getName() const { return ""; @@ -149,7 +151,7 @@ void LazyReexportsMaterializationUnit::materialize( if (!CallableAliases.empty()) R.replace(lazyReexports(LCTManager, ISManager, SourceJD, - std::move(CallableAliases))); + std::move(CallableAliases), AliaseeTable)); IndirectStubsManager::StubInitsMap StubInits; for (auto &Alias : RequestedAliases) { @@ -168,6 +170,9 @@ void LazyReexportsMaterializationUnit::materialize( std::make_pair(*CallThroughTrampoline, Alias.second.AliasFlags); } + if (AliaseeTable != nullptr && !RequestedAliases.empty()) + AliaseeTable->trackImpls(RequestedAliases, &SourceJD); + if (auto Err = ISManager.createStubs(StubInits)) { SourceJD.getExecutionSession().reportError(std::move(Err)); R.failMaterialization(); @@ -178,8 +183,9 @@ void LazyReexportsMaterializationUnit::materialize( for (auto &Alias : RequestedAliases) Stubs[Alias.first] = ISManager.findStub(*Alias.first, false); - R.notifyResolved(Stubs); - R.notifyEmitted(); + // No registered dependencies, so these calls cannot fail. + cantFail(R.notifyResolved(Stubs)); + cantFail(R.notifyEmitted()); } void LazyReexportsMaterializationUnit::discard(const JITDylib &JD, diff --git a/lib/ExecutionEngine/Orc/Legacy.cpp b/lib/ExecutionEngine/Orc/Legacy.cpp index ce6368b57a89..9f9a6730b2c3 100644 --- a/lib/ExecutionEngine/Orc/Legacy.cpp +++ b/lib/ExecutionEngine/Orc/Legacy.cpp @@ -23,7 +23,8 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols, for (auto &S : Symbols) InternedSymbols.insert(ES.intern(S)); - auto OnResolvedWithUnwrap = [OnResolved](Expected InternedResult) { + auto OnResolvedWithUnwrap = [OnResolved = std::move(OnResolved)]( + Expected InternedResult) mutable { if (!InternedResult) { OnResolved(InternedResult.takeError()); return; @@ -36,7 +37,7 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols, }; auto Q = std::make_shared( - InternedSymbols, SymbolState::Resolved, OnResolvedWithUnwrap); + InternedSymbols, SymbolState::Resolved, std::move(OnResolvedWithUnwrap)); auto Unresolved = R.lookup(Q, InternedSymbols); if (Unresolved.empty()) { diff --git a/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index def0b300eca1..874decb2ade0 100644 --- a/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -29,6 +29,13 @@ public: std::unique_ptr ObjBuffer) : Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {} + ~ObjectLinkingLayerJITLinkContext() { + // If there is an object buffer return function then use it to + // return ownership of the buffer. + if (Layer.ReturnObjectBuffer) + Layer.ReturnObjectBuffer(std::move(ObjBuffer)); + } + JITLinkMemoryManager &getMemoryManager() override { return Layer.MemMgr; } MemoryBufferRef getObjectBuffer() const override { @@ -41,7 +48,7 @@ public: } void lookup(const DenseSet &Symbols, - JITLinkAsyncLookupContinuation LookupContinuation) override { + std::unique_ptr LC) override { JITDylibSearchList SearchOrder; MR.getTargetJITDylib().withSearchOrderDo( @@ -54,18 +61,16 @@ public: InternedSymbols.insert(ES.intern(S)); // OnResolve -- De-intern the symbols and pass the result to the linker. - // FIXME: Capture LookupContinuation by move once we have c++14. - auto SharedLookupContinuation = - std::make_shared( - std::move(LookupContinuation)); - auto OnResolve = [SharedLookupContinuation](Expected Result) { + auto OnResolve = [this, LookupContinuation = std::move(LC)]( + Expected Result) mutable { + auto Main = Layer.getExecutionSession().intern("_main"); if (!Result) - (*SharedLookupContinuation)(Result.takeError()); + LookupContinuation->run(Result.takeError()); else { AsyncLookupResult LR; for (auto &KV : *Result) LR[*KV.first] = KV.second; - (*SharedLookupContinuation)(std::move(LR)); + LookupContinuation->run(std::move(LR)); } }; @@ -75,29 +80,25 @@ public: }); } - void notifyResolved(AtomGraph &G) override { + void notifyResolved(LinkGraph &G) override { auto &ES = Layer.getExecutionSession(); SymbolFlagsMap ExtraSymbolsToClaim; bool AutoClaim = Layer.AutoClaimObjectSymbols; SymbolMap InternedResult; - for (auto *DA : G.defined_atoms()) - if (DA->hasName() && DA->isGlobal()) { - auto InternedName = ES.intern(DA->getName()); + for (auto *Sym : G.defined_symbols()) + if (Sym->hasName() && Sym->getScope() != Scope::Local) { + auto InternedName = ES.intern(Sym->getName()); JITSymbolFlags Flags; - if (DA->isExported()) - Flags |= JITSymbolFlags::Exported; - if (DA->isWeak()) - Flags |= JITSymbolFlags::Weak; - if (DA->isCallable()) + if (Sym->isCallable()) Flags |= JITSymbolFlags::Callable; - if (DA->isCommon()) - Flags |= JITSymbolFlags::Common; + if (Sym->getScope() == Scope::Default) + Flags |= JITSymbolFlags::Exported; InternedResult[InternedName] = - JITEvaluatedSymbol(DA->getAddress(), Flags); + JITEvaluatedSymbol(Sym->getAddress(), Flags); if (AutoClaim && !MR.getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -105,17 +106,17 @@ public: } } - for (auto *A : G.absolute_atoms()) - if (A->hasName()) { - auto InternedName = ES.intern(A->getName()); + for (auto *Sym : G.absolute_symbols()) + if (Sym->hasName()) { + auto InternedName = ES.intern(Sym->getName()); JITSymbolFlags Flags; Flags |= JITSymbolFlags::Absolute; - if (A->isWeak()) - Flags |= JITSymbolFlags::Weak; - if (A->isCallable()) + if (Sym->isCallable()) Flags |= JITSymbolFlags::Callable; + if (Sym->getLinkage() == Linkage::Weak) + Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = - JITEvaluatedSymbol(A->getAddress(), Flags); + JITEvaluatedSymbol(Sym->getAddress(), Flags); if (AutoClaim && !MR.getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -126,35 +127,38 @@ public: if (!ExtraSymbolsToClaim.empty()) if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim)) return notifyFailed(std::move(Err)); - - MR.notifyResolved(InternedResult); - + if (auto Err = MR.notifyResolved(InternedResult)) { + Layer.getExecutionSession().reportError(std::move(Err)); + MR.failMaterialization(); + return; + } Layer.notifyLoaded(MR); } void notifyFinalized( std::unique_ptr A) override { - if (auto Err = Layer.notifyEmitted(MR, std::move(A))) { Layer.getExecutionSession().reportError(std::move(Err)); MR.failMaterialization(); - return; } - MR.notifyEmitted(); + if (auto Err = MR.notifyEmitted()) { + Layer.getExecutionSession().reportError(std::move(Err)); + MR.failMaterialization(); + } } - AtomGraphPassFunction getMarkLivePass(const Triple &TT) const override { - return [this](AtomGraph &G) { return markResponsibilitySymbolsLive(G); }; + LinkGraphPassFunction getMarkLivePass(const Triple &TT) const override { + return [this](LinkGraph &G) { return markResponsibilitySymbolsLive(G); }; } Error modifyPassConfig(const Triple &TT, PassConfiguration &Config) override { // Add passes to mark duplicate defs as should-discard, and to walk the - // atom graph to build the symbol dependence graph. + // link graph to build the symbol dependence graph. Config.PrePrunePasses.push_back( - [this](AtomGraph &G) { return markSymbolsToDiscard(G); }); + [this](LinkGraph &G) { return externalizeWeakAndCommonSymbols(G); }); Config.PostPrunePasses.push_back( - [this](AtomGraph &G) { return computeNamedSymbolDependencies(G); }); + [this](LinkGraph &G) { return computeNamedSymbolDependencies(G); }); Layer.modifyPassConfig(MR, TT, Config); @@ -162,65 +166,59 @@ public: } private: - using AnonAtomNamedDependenciesMap = - DenseMap; + using AnonToNamedDependenciesMap = DenseMap; - Error markSymbolsToDiscard(AtomGraph &G) { + Error externalizeWeakAndCommonSymbols(LinkGraph &G) { auto &ES = Layer.getExecutionSession(); - for (auto *DA : G.defined_atoms()) - if (DA->isWeak() && DA->hasName()) { - auto S = ES.intern(DA->getName()); - auto I = MR.getSymbols().find(S); - if (I == MR.getSymbols().end()) - DA->setShouldDiscard(true); + for (auto *Sym : G.defined_symbols()) + if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { + if (!MR.getSymbols().count(ES.intern(Sym->getName()))) + G.makeExternal(*Sym); } - for (auto *A : G.absolute_atoms()) - if (A->isWeak() && A->hasName()) { - auto S = ES.intern(A->getName()); - auto I = MR.getSymbols().find(S); - if (I == MR.getSymbols().end()) - A->setShouldDiscard(true); + for (auto *Sym : G.absolute_symbols()) + if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { + if (!MR.getSymbols().count(ES.intern(Sym->getName()))) + G.makeExternal(*Sym); } return Error::success(); } - Error markResponsibilitySymbolsLive(AtomGraph &G) const { + Error markResponsibilitySymbolsLive(LinkGraph &G) const { auto &ES = Layer.getExecutionSession(); - for (auto *DA : G.defined_atoms()) - if (DA->hasName() && - MR.getSymbols().count(ES.intern(DA->getName()))) - DA->setLive(true); + for (auto *Sym : G.defined_symbols()) + if (Sym->hasName() && MR.getSymbols().count(ES.intern(Sym->getName()))) + Sym->setLive(true); return Error::success(); } - Error computeNamedSymbolDependencies(AtomGraph &G) { + Error computeNamedSymbolDependencies(LinkGraph &G) { auto &ES = MR.getTargetJITDylib().getExecutionSession(); auto AnonDeps = computeAnonDeps(G); - for (auto *DA : G.defined_atoms()) { + for (auto *Sym : G.defined_symbols()) { // Skip anonymous and non-global atoms: we do not need dependencies for // these. - if (!DA->hasName() || !DA->isGlobal()) + if (Sym->getScope() == Scope::Local) continue; - auto DAName = ES.intern(DA->getName()); - SymbolNameSet &DADeps = NamedSymbolDeps[DAName]; + auto SymName = ES.intern(Sym->getName()); + SymbolNameSet &SymDeps = NamedSymbolDeps[SymName]; - for (auto &E : DA->edges()) { - auto &TA = E.getTarget(); + for (auto &E : Sym->getBlock().edges()) { + auto &TargetSym = E.getTarget(); - if (TA.hasName()) - DADeps.insert(ES.intern(TA.getName())); + if (TargetSym.getScope() != Scope::Local) + SymDeps.insert(ES.intern(TargetSym.getName())); else { - assert(TA.isDefined() && "Anonymous atoms must be defined"); - auto &DTA = static_cast(TA); - auto I = AnonDeps.find(&DTA); + assert(TargetSym.isDefined() && + "Anonymous/local symbols must be defined"); + auto I = AnonDeps.find(&TargetSym); if (I != AnonDeps.end()) for (auto &S : I->second) - DADeps.insert(S); + SymDeps.insert(S); } } } @@ -228,58 +226,59 @@ private: return Error::success(); } - AnonAtomNamedDependenciesMap computeAnonDeps(AtomGraph &G) { + AnonToNamedDependenciesMap computeAnonDeps(LinkGraph &G) { auto &ES = MR.getTargetJITDylib().getExecutionSession(); - AnonAtomNamedDependenciesMap DepMap; + AnonToNamedDependenciesMap DepMap; - // For all anonymous atoms: + // For all anonymous symbols: // (1) Add their named dependencies. // (2) Add them to the worklist for further iteration if they have any - // depend on any other anonymous atoms. + // depend on any other anonymous symbols. struct WorklistEntry { - WorklistEntry(DefinedAtom *DA, DenseSet DAAnonDeps) - : DA(DA), DAAnonDeps(std::move(DAAnonDeps)) {} + WorklistEntry(Symbol *Sym, DenseSet SymAnonDeps) + : Sym(Sym), SymAnonDeps(std::move(SymAnonDeps)) {} - DefinedAtom *DA = nullptr; - DenseSet DAAnonDeps; + Symbol *Sym = nullptr; + DenseSet SymAnonDeps; }; std::vector Worklist; - for (auto *DA : G.defined_atoms()) - if (!DA->hasName()) { - auto &DANamedDeps = DepMap[DA]; - DenseSet DAAnonDeps; - - for (auto &E : DA->edges()) { - auto &TA = E.getTarget(); - if (TA.hasName()) - DANamedDeps.insert(ES.intern(TA.getName())); + for (auto *Sym : G.defined_symbols()) + if (!Sym->hasName()) { + auto &SymNamedDeps = DepMap[Sym]; + DenseSet SymAnonDeps; + + for (auto &E : Sym->getBlock().edges()) { + auto &TargetSym = E.getTarget(); + if (TargetSym.hasName()) + SymNamedDeps.insert(ES.intern(TargetSym.getName())); else { - assert(TA.isDefined() && "Anonymous atoms must be defined"); - DAAnonDeps.insert(static_cast(&TA)); + assert(TargetSym.isDefined() && + "Anonymous symbols must be defined"); + SymAnonDeps.insert(&TargetSym); } } - if (!DAAnonDeps.empty()) - Worklist.push_back(WorklistEntry(DA, std::move(DAAnonDeps))); + if (!SymAnonDeps.empty()) + Worklist.push_back(WorklistEntry(Sym, std::move(SymAnonDeps))); } - // Loop over all anonymous atoms with anonymous dependencies, propagating + // Loop over all anonymous symbols with anonymous dependencies, propagating // their respective *named* dependencies. Iterate until we hit a stable // state. bool Changed; do { Changed = false; for (auto &WLEntry : Worklist) { - auto *DA = WLEntry.DA; - auto &DANamedDeps = DepMap[DA]; - auto &DAAnonDeps = WLEntry.DAAnonDeps; + auto *Sym = WLEntry.Sym; + auto &SymNamedDeps = DepMap[Sym]; + auto &SymAnonDeps = WLEntry.SymAnonDeps; - for (auto *TA : DAAnonDeps) { - auto I = DepMap.find(TA); + for (auto *TargetSym : SymAnonDeps) { + auto I = DepMap.find(TargetSym); if (I != DepMap.end()) for (const auto &S : I->second) - Changed |= DANamedDeps.insert(S).second; + Changed |= SymNamedDeps.insert(S).second; } } } while (Changed); @@ -330,7 +329,7 @@ ObjectLinkingLayer::~ObjectLinkingLayer() { void ObjectLinkingLayer::emit(MaterializationResponsibility R, std::unique_ptr O) { assert(O && "Object must not be null"); - jitLink(llvm::make_unique( + jitLink(std::make_unique( *this, std::move(R), std::move(O))); } @@ -410,7 +409,7 @@ Error ObjectLinkingLayer::removeAllModules() { } EHFrameRegistrationPlugin::EHFrameRegistrationPlugin( - jitlink::EHFrameRegistrar &Registrar) + EHFrameRegistrar &Registrar) : Registrar(Registrar) {} void EHFrameRegistrationPlugin::modifyPassConfig( @@ -419,61 +418,66 @@ void EHFrameRegistrationPlugin::modifyPassConfig( assert(!InProcessLinks.count(&MR) && "Link for MR already being tracked?"); PassConfig.PostFixupPasses.push_back( - createEHFrameRecorderPass(TT, [this, &MR](JITTargetAddress Addr) { + createEHFrameRecorderPass(TT, [this, &MR](JITTargetAddress Addr, + size_t Size) { if (Addr) - InProcessLinks[&MR] = Addr; + InProcessLinks[&MR] = { Addr, Size }; })); } Error EHFrameRegistrationPlugin::notifyEmitted( MaterializationResponsibility &MR) { - auto EHFrameAddrItr = InProcessLinks.find(&MR); - if (EHFrameAddrItr == InProcessLinks.end()) + auto EHFrameRangeItr = InProcessLinks.find(&MR); + if (EHFrameRangeItr == InProcessLinks.end()) return Error::success(); - auto EHFrameAddr = EHFrameAddrItr->second; - assert(EHFrameAddr && "eh-frame addr to register can not be null"); + auto EHFrameRange = EHFrameRangeItr->second; + assert(EHFrameRange.Addr && + "eh-frame addr to register can not be null"); - InProcessLinks.erase(EHFrameAddrItr); + InProcessLinks.erase(EHFrameRangeItr); if (auto Key = MR.getVModuleKey()) - TrackedEHFrameAddrs[Key] = EHFrameAddr; + TrackedEHFrameRanges[Key] = EHFrameRange; else - UntrackedEHFrameAddrs.push_back(EHFrameAddr); + UntrackedEHFrameRanges.push_back(EHFrameRange); - return Registrar.registerEHFrames(EHFrameAddr); + return Registrar.registerEHFrames(EHFrameRange.Addr, EHFrameRange.Size); } Error EHFrameRegistrationPlugin::notifyRemovingModule(VModuleKey K) { - auto EHFrameAddrItr = TrackedEHFrameAddrs.find(K); - if (EHFrameAddrItr == TrackedEHFrameAddrs.end()) + auto EHFrameRangeItr = TrackedEHFrameRanges.find(K); + if (EHFrameRangeItr == TrackedEHFrameRanges.end()) return Error::success(); - auto EHFrameAddr = EHFrameAddrItr->second; - assert(EHFrameAddr && "Tracked eh-frame addr must not be null"); + auto EHFrameRange = EHFrameRangeItr->second; + assert(EHFrameRange.Addr && "Tracked eh-frame range must not be null"); - TrackedEHFrameAddrs.erase(EHFrameAddrItr); + TrackedEHFrameRanges.erase(EHFrameRangeItr); - return Registrar.deregisterEHFrames(EHFrameAddr); + return Registrar.deregisterEHFrames(EHFrameRange.Addr, EHFrameRange.Size); } Error EHFrameRegistrationPlugin::notifyRemovingAllModules() { - std::vector EHFrameAddrs = std::move(UntrackedEHFrameAddrs); - EHFrameAddrs.reserve(EHFrameAddrs.size() + TrackedEHFrameAddrs.size()); + std::vector EHFrameRanges = + std::move(UntrackedEHFrameRanges); + EHFrameRanges.reserve(EHFrameRanges.size() + TrackedEHFrameRanges.size()); - for (auto &KV : TrackedEHFrameAddrs) - EHFrameAddrs.push_back(KV.second); + for (auto &KV : TrackedEHFrameRanges) + EHFrameRanges.push_back(KV.second); - TrackedEHFrameAddrs.clear(); + TrackedEHFrameRanges.clear(); Error Err = Error::success(); - while (!EHFrameAddrs.empty()) { - auto EHFrameAddr = EHFrameAddrs.back(); - assert(EHFrameAddr && "Untracked eh-frame addr must not be null"); - EHFrameAddrs.pop_back(); - Err = joinErrors(std::move(Err), Registrar.deregisterEHFrames(EHFrameAddr)); + while (!EHFrameRanges.empty()) { + auto EHFrameRange = EHFrameRanges.back(); + assert(EHFrameRange.Addr && "Untracked eh-frame range must not be null"); + EHFrameRanges.pop_back(); + Err = joinErrors(std::move(Err), + Registrar.deregisterEHFrames(EHFrameRange.Addr, + EHFrameRange.Size)); } return Err; diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index 98129e1690d2..e0af3df9d010 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -97,7 +97,7 @@ public: template std::unique_ptr> createGenericLayer(LayerT &Layer) { - return llvm::make_unique>(Layer); + return std::make_unique>(Layer); } } // end namespace detail @@ -316,7 +316,8 @@ public: if (auto Err = CtorRunner.runViaLayer(*this)) return std::move(Err); - IRStaticDestructorRunners.emplace_back(std::move(DtorNames), K); + IRStaticDestructorRunners.emplace_back(AcknowledgeORCv1Deprecation, + std::move(DtorNames), K); return K; } @@ -326,7 +327,7 @@ public: LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { return addIRModule(CompileLayer, std::move(M), - llvm::make_unique(), + std::make_unique(), std::move(ExternalResolver), ExternalResolverCtx); } @@ -340,7 +341,7 @@ public: inconvertibleErrorCode()); return addIRModule(*CODLayer, std::move(M), - llvm::make_unique(), + std::make_unique(), std::move(ExternalResolver), ExternalResolverCtx); } @@ -468,7 +469,7 @@ private: if (!CCMgr) return nullptr; - return llvm::make_unique( + return std::make_unique( AcknowledgeORCv1Deprecation, ES, CompileLayer, [&Resolvers](orc::VModuleKey K) { auto ResolverI = Resolvers.find(K); diff --git a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index b22ecd5f80a1..939cd539d1fb 100644 --- a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -27,9 +27,9 @@ public: // Build an OnResolve callback to unwrap the interned strings and pass them // to the OnResolved callback. - // FIXME: Switch to move capture of OnResolved once we have c++14. auto OnResolvedWithUnwrap = - [OnResolved](Expected InternedResult) { + [OnResolved = std::move(OnResolved)]( + Expected InternedResult) mutable { if (!InternedResult) { OnResolved(InternedResult.takeError()); return; @@ -50,7 +50,7 @@ public: MR.getTargetJITDylib().withSearchOrderDo( [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; }); ES.lookup(SearchOrder, InternedSymbols, SymbolState::Resolved, - OnResolvedWithUnwrap, RegisterDependencies); + std::move(OnResolvedWithUnwrap), RegisterDependencies); } Expected getResponsibilitySet(const LookupSet &Symbols) { @@ -133,8 +133,6 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, JITDylibSearchOrderResolver Resolver(*SharedR); - // FIXME: Switch to move-capture for the 'O' buffer once we have c++14. - MemoryBuffer *UnownedObjBuffer = O.release(); jitLinkForORC( **Obj, std::move(O), *MemMgr, Resolver, ProcessAllSections, [this, K, SharedR, &Obj, InternalSymbols]( @@ -143,9 +141,8 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, return onObjLoad(K, *SharedR, **Obj, std::move(LoadedObjInfo), ResolvedSymbols, *InternalSymbols); }, - [this, K, SharedR, UnownedObjBuffer](Error Err) { - std::unique_ptr ObjBuffer(UnownedObjBuffer); - onObjEmit(K, std::move(ObjBuffer), *SharedR, std::move(Err)); + [this, K, SharedR, O = std::move(O)](Error Err) mutable { + onObjEmit(K, std::move(O), *SharedR, std::move(Err)); }); } @@ -184,7 +181,10 @@ Error RTDyldObjectLinkingLayer::onObjLoad( if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim)) return Err; - R.notifyResolved(Symbols); + if (auto Err = R.notifyResolved(Symbols)) { + R.failMaterialization(); + return Err; + } if (NotifyLoaded) NotifyLoaded(K, Obj, *LoadedObjInfo); @@ -201,7 +201,11 @@ void RTDyldObjectLinkingLayer::onObjEmit( return; } - R.notifyEmitted(); + if (auto Err = R.notifyEmitted()) { + getExecutionSession().reportError(std::move(Err)); + R.failMaterialization(); + return; + } if (NotifyEmitted) NotifyEmitted(K, std::move(ObjBuffer)); diff --git a/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp b/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp new file mode 100644 index 000000000000..f22acf50419d --- /dev/null +++ b/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp @@ -0,0 +1,307 @@ +//===-- SpeculateAnalyses.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/SpeculateAnalyses.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/ErrorHandling.h" + +#include + +namespace { +using namespace llvm; +SmallVector findBBwithCalls(const Function &F, + bool IndirectCall = false) { + SmallVector BBs; + + auto findCallInst = [&IndirectCall](const Instruction &I) { + if (auto Call = dyn_cast(&I)) + return Call->isIndirectCall() ? IndirectCall : true; + else + return false; + }; + for (auto &BB : F) + if (findCallInst(*BB.getTerminator()) || + llvm::any_of(BB.instructionsWithoutDebug(), findCallInst)) + BBs.emplace_back(&BB); + + return BBs; +} +} // namespace + +// Implementations of Queries shouldn't need to lock the resources +// such as LLVMContext, each argument (function) has a non-shared LLVMContext +// Plus, if Queries contain states necessary locking scheme should be provided. +namespace llvm { +namespace orc { + +// Collect direct calls only +void SpeculateQuery::findCalles(const BasicBlock *BB, + DenseSet &CallesNames) { + assert(BB != nullptr && "Traversing Null BB to find calls?"); + + auto getCalledFunction = [&CallesNames](const CallBase *Call) { + auto CalledValue = Call->getCalledOperand()->stripPointerCasts(); + if (auto DirectCall = dyn_cast(CalledValue)) + CallesNames.insert(DirectCall->getName()); + }; + for (auto &I : BB->instructionsWithoutDebug()) + if (auto CI = dyn_cast(&I)) + getCalledFunction(CI); + + if (auto II = dyn_cast(BB->getTerminator())) + getCalledFunction(II); +} + +bool SpeculateQuery::isStraightLine(const Function &F) { + return llvm::all_of(F.getBasicBlockList(), [](const BasicBlock &BB) { + return BB.getSingleSuccessor() != nullptr; + }); +} + +// BlockFreqQuery Implementations + +size_t BlockFreqQuery::numBBToGet(size_t numBB) { + // small CFG + if (numBB < 4) + return numBB; + // mid-size CFG + else if (numBB < 20) + return (numBB / 2); + else + return (numBB / 2) + (numBB / 4); +} + +BlockFreqQuery::ResultTy BlockFreqQuery::operator()(Function &F) { + DenseMap> CallerAndCalles; + DenseSet Calles; + SmallVector, 8> BBFreqs; + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + + auto IBBs = findBBwithCalls(F); + + if (IBBs.empty()) + return None; + + auto &BFI = FAM.getResult(F); + + for (const auto I : IBBs) + BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()}); + + assert(IBBs.size() == BBFreqs.size() && "BB Count Mismatch"); + + llvm::sort(BBFreqs.begin(), BBFreqs.end(), + [](decltype(BBFreqs)::const_reference BBF, + decltype(BBFreqs)::const_reference BBS) { + return BBF.second > BBS.second ? true : false; + }); + + // ignoring number of direct calls in a BB + auto Topk = numBBToGet(BBFreqs.size()); + + for (size_t i = 0; i < Topk; i++) + findCalles(BBFreqs[i].first, Calles); + + assert(!Calles.empty() && "Running Analysis on Function with no calls?"); + + CallerAndCalles.insert({F.getName(), std::move(Calles)}); + + return CallerAndCalles; +} + +// SequenceBBQuery Implementation +std::size_t SequenceBBQuery::getHottestBlocks(std::size_t TotalBlocks) { + if (TotalBlocks == 1) + return TotalBlocks; + return TotalBlocks / 2; +} + +// FIXME : find good implementation. +SequenceBBQuery::BlockListTy +SequenceBBQuery::rearrangeBB(const Function &F, const BlockListTy &BBList) { + BlockListTy RearrangedBBSet; + + for (auto &Block : F.getBasicBlockList()) + if (llvm::is_contained(BBList, &Block)) + RearrangedBBSet.push_back(&Block); + + assert(RearrangedBBSet.size() == BBList.size() && + "BasicBlock missing while rearranging?"); + return RearrangedBBSet; +} + +void SequenceBBQuery::traverseToEntryBlock(const BasicBlock *AtBB, + const BlockListTy &CallerBlocks, + const BackEdgesInfoTy &BackEdgesInfo, + const BranchProbabilityInfo *BPI, + VisitedBlocksInfoTy &VisitedBlocks) { + auto Itr = VisitedBlocks.find(AtBB); + if (Itr != VisitedBlocks.end()) { // already visited. + if (!Itr->second.Upward) + return; + Itr->second.Upward = false; + } else { + // Create hint for newly discoverd blocks. + WalkDirection BlockHint; + BlockHint.Upward = false; + // FIXME: Expensive Check + if (llvm::is_contained(CallerBlocks, AtBB)) + BlockHint.CallerBlock = true; + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } + + const_pred_iterator PIt = pred_begin(AtBB), EIt = pred_end(AtBB); + // Move this check to top, when we have code setup to launch speculative + // compiles for function in entry BB, this triggers the speculative compiles + // before running the program. + if (PIt == EIt) // No Preds. + return; + + DenseSet PredSkipNodes; + + // Since we are checking for predecessor's backedges, this Block + // occurs in second position. + for (auto &I : BackEdgesInfo) + if (I.second == AtBB) + PredSkipNodes.insert(I.first); + + // Skip predecessors which source of back-edges. + for (; PIt != EIt; ++PIt) + // checking EdgeHotness is cheaper + if (BPI->isEdgeHot(*PIt, AtBB) && !PredSkipNodes.count(*PIt)) + traverseToEntryBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); +} + +void SequenceBBQuery::traverseToExitBlock(const BasicBlock *AtBB, + const BlockListTy &CallerBlocks, + const BackEdgesInfoTy &BackEdgesInfo, + const BranchProbabilityInfo *BPI, + VisitedBlocksInfoTy &VisitedBlocks) { + auto Itr = VisitedBlocks.find(AtBB); + if (Itr != VisitedBlocks.end()) { // already visited. + if (!Itr->second.Downward) + return; + Itr->second.Downward = false; + } else { + // Create hint for newly discoverd blocks. + WalkDirection BlockHint; + BlockHint.Downward = false; + // FIXME: Expensive Check + if (llvm::is_contained(CallerBlocks, AtBB)) + BlockHint.CallerBlock = true; + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } + + succ_const_iterator PIt = succ_begin(AtBB), EIt = succ_end(AtBB); + if (PIt == EIt) // No succs. + return; + + // If there are hot edges, then compute SuccSkipNodes. + DenseSet SuccSkipNodes; + + // Since we are checking for successor's backedges, this Block + // occurs in first position. + for (auto &I : BackEdgesInfo) + if (I.first == AtBB) + SuccSkipNodes.insert(I.second); + + for (; PIt != EIt; ++PIt) + if (BPI->isEdgeHot(AtBB, *PIt) && !SuccSkipNodes.count(*PIt)) + traverseToExitBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); +} + +// Get Block frequencies for blocks and take most frquently executed block, +// walk towards the entry block from those blocks and discover the basic blocks +// with call. +SequenceBBQuery::BlockListTy +SequenceBBQuery::queryCFG(Function &F, const BlockListTy &CallerBlocks) { + + BlockFreqInfoTy BBFreqs; + VisitedBlocksInfoTy VisitedBlocks; + BackEdgesInfoTy BackEdgesInfo; + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + + auto &BFI = FAM.getResult(F); + + llvm::FindFunctionBackedges(F, BackEdgesInfo); + + for (const auto I : CallerBlocks) + BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()}); + + llvm::sort(BBFreqs, [](decltype(BBFreqs)::const_reference Bbf, + decltype(BBFreqs)::const_reference Bbs) { + return Bbf.second > Bbs.second; + }); + + ArrayRef> HotBlocksRef(BBFreqs); + HotBlocksRef = + HotBlocksRef.drop_back(BBFreqs.size() - getHottestBlocks(BBFreqs.size())); + + BranchProbabilityInfo *BPI = + FAM.getCachedResult(F); + + // visit NHotBlocks, + // traverse upwards to entry + // traverse downwards to end. + + for (auto I : HotBlocksRef) { + traverseToEntryBlock(I.first, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); + traverseToExitBlock(I.first, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); + } + + BlockListTy MinCallerBlocks; + for (auto &I : VisitedBlocks) + if (I.second.CallerBlock) + MinCallerBlocks.push_back(std::move(I.first)); + + return rearrangeBB(F, MinCallerBlocks); +} + +SpeculateQuery::ResultTy SequenceBBQuery::operator()(Function &F) { + // reduce the number of lists! + DenseMap> CallerAndCalles; + DenseSet Calles; + BlockListTy SequencedBlocks; + BlockListTy CallerBlocks; + + CallerBlocks = findBBwithCalls(F); + if (CallerBlocks.empty()) + return None; + + if (isStraightLine(F)) + SequencedBlocks = rearrangeBB(F, CallerBlocks); + else + SequencedBlocks = queryCFG(F, CallerBlocks); + + for (auto BB : SequencedBlocks) + findCalles(BB, Calles); + + CallerAndCalles.insert({F.getName(), std::move(Calles)}); + return CallerAndCalles; +} + +} // namespace orc +} // namespace llvm diff --git a/lib/ExecutionEngine/Orc/Speculation.cpp b/lib/ExecutionEngine/Orc/Speculation.cpp new file mode 100644 index 000000000000..f29201c147a1 --- /dev/null +++ b/lib/ExecutionEngine/Orc/Speculation.cpp @@ -0,0 +1,146 @@ +//===---------- speculation.cpp - Utilities for Speculation ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/Speculation.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" + +#include + +namespace llvm { + +namespace orc { + +// ImplSymbolMap methods +void ImplSymbolMap::trackImpls(SymbolAliasMap ImplMaps, JITDylib *SrcJD) { + assert(SrcJD && "Tracking on Null Source .impl dylib"); + std::lock_guard Lockit(ConcurrentAccess); + for (auto &I : ImplMaps) { + auto It = Maps.insert({I.first, {I.second.Aliasee, SrcJD}}); + // check rationale when independent dylibs have same symbol name? + assert(It.second && "ImplSymbols are already tracked for this Symbol?"); + (void)(It); + } +} + +// Trigger Speculative Compiles. +void Speculator::speculateForEntryPoint(Speculator *Ptr, uint64_t StubId) { + assert(Ptr && " Null Address Received in orc_speculate_for "); + Ptr->speculateFor(StubId); +} + +Error Speculator::addSpeculationRuntime(JITDylib &JD, + MangleAndInterner &Mangle) { + JITEvaluatedSymbol ThisPtr(pointerToJITTargetAddress(this), + JITSymbolFlags::Exported); + JITEvaluatedSymbol SpeculateForEntryPtr( + pointerToJITTargetAddress(&speculateForEntryPoint), + JITSymbolFlags::Exported); + return JD.define(absoluteSymbols({ + {Mangle("__orc_speculator"), ThisPtr}, // Data Symbol + {Mangle("__orc_speculate_for"), SpeculateForEntryPtr} // Callable Symbol + })); +} + +// If two modules, share the same LLVMContext, different threads must +// not access them concurrently without locking the associated LLVMContext +// this implementation follows this contract. +void IRSpeculationLayer::emit(MaterializationResponsibility R, + ThreadSafeModule TSM) { + + assert(TSM && "Speculation Layer received Null Module ?"); + assert(TSM.getContext().getContext() != nullptr && + "Module with null LLVMContext?"); + + // Instrumentation of runtime calls, lock the Module + TSM.withModuleDo([this, &R](Module &M) { + auto &MContext = M.getContext(); + auto SpeculatorVTy = StructType::create(MContext, "Class.Speculator"); + auto RuntimeCallTy = FunctionType::get( + Type::getVoidTy(MContext), + {SpeculatorVTy->getPointerTo(), Type::getInt64Ty(MContext)}, false); + auto RuntimeCall = + Function::Create(RuntimeCallTy, Function::LinkageTypes::ExternalLinkage, + "__orc_speculate_for", &M); + auto SpeclAddr = new GlobalVariable( + M, SpeculatorVTy, false, GlobalValue::LinkageTypes::ExternalLinkage, + nullptr, "__orc_speculator"); + + IRBuilder<> Mutator(MContext); + + // QueryAnalysis allowed to transform the IR source, one such example is + // Simplify CFG helps the static branch prediction heuristics! + for (auto &Fn : M.getFunctionList()) { + if (!Fn.isDeclaration()) { + + auto IRNames = QueryAnalysis(Fn); + // Instrument and register if Query has result + if (IRNames.hasValue()) { + + // Emit globals for each function. + auto LoadValueTy = Type::getInt8Ty(MContext); + auto SpeculatorGuard = new GlobalVariable( + M, LoadValueTy, false, GlobalValue::LinkageTypes::InternalLinkage, + ConstantInt::get(LoadValueTy, 0), + "__orc_speculate.guard.for." + Fn.getName()); + SpeculatorGuard->setAlignment(Align::None()); + SpeculatorGuard->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); + + BasicBlock &ProgramEntry = Fn.getEntryBlock(); + // Create BasicBlocks before the program's entry basicblock + BasicBlock *SpeculateBlock = BasicBlock::Create( + MContext, "__orc_speculate.block", &Fn, &ProgramEntry); + BasicBlock *SpeculateDecisionBlock = BasicBlock::Create( + MContext, "__orc_speculate.decision.block", &Fn, SpeculateBlock); + + assert(SpeculateDecisionBlock == &Fn.getEntryBlock() && + "SpeculateDecisionBlock not updated?"); + Mutator.SetInsertPoint(SpeculateDecisionBlock); + + auto LoadGuard = + Mutator.CreateLoad(LoadValueTy, SpeculatorGuard, "guard.value"); + // if just loaded value equal to 0,return true. + auto CanSpeculate = + Mutator.CreateICmpEQ(LoadGuard, ConstantInt::get(LoadValueTy, 0), + "compare.to.speculate"); + Mutator.CreateCondBr(CanSpeculate, SpeculateBlock, &ProgramEntry); + + Mutator.SetInsertPoint(SpeculateBlock); + auto ImplAddrToUint = + Mutator.CreatePtrToInt(&Fn, Type::getInt64Ty(MContext)); + Mutator.CreateCall(RuntimeCallTy, RuntimeCall, + {SpeclAddr, ImplAddrToUint}); + Mutator.CreateStore(ConstantInt::get(LoadValueTy, 1), + SpeculatorGuard); + Mutator.CreateBr(&ProgramEntry); + + assert(Mutator.GetInsertBlock()->getParent() == &Fn && + "IR builder association mismatch?"); + S.registerSymbols(internToJITSymbols(IRNames.getValue()), + &R.getTargetJITDylib()); + } + } + } + }); + + assert(!TSM.withModuleDo([](const Module &M) { return verifyModule(M); }) && + "Speculation Instrumentation breaks IR?"); + + NextLayer.emit(std::move(R), std::move(TSM)); +} + +} // namespace orc +} // namespace llvm diff --git a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp index 4cb7376758a7..1f4e6f132115 100644 --- a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp +++ b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp @@ -23,41 +23,41 @@ ThreadSafeModule cloneToNewContext(ThreadSafeModule &TSM, if (!ShouldCloneDef) ShouldCloneDef = [](const GlobalValue &) { return true; }; - auto Lock = TSM.getContextLock(); + return TSM.withModuleDo([&](Module &M) { + SmallVector ClonedModuleBuffer; - SmallVector ClonedModuleBuffer; + { + std::set ClonedDefsInSrc; + ValueToValueMapTy VMap; + auto Tmp = CloneModule(M, VMap, [&](const GlobalValue *GV) { + if (ShouldCloneDef(*GV)) { + ClonedDefsInSrc.insert(const_cast(GV)); + return true; + } + return false; + }); - { - std::set ClonedDefsInSrc; - ValueToValueMapTy VMap; - auto Tmp = CloneModule(*TSM.getModule(), VMap, [&](const GlobalValue *GV) { - if (ShouldCloneDef(*GV)) { - ClonedDefsInSrc.insert(const_cast(GV)); - return true; - } - return false; - }); + if (UpdateClonedDefSource) + for (auto *GV : ClonedDefsInSrc) + UpdateClonedDefSource(*GV); - if (UpdateClonedDefSource) - for (auto *GV : ClonedDefsInSrc) - UpdateClonedDefSource(*GV); + BitcodeWriter BCWriter(ClonedModuleBuffer); - BitcodeWriter BCWriter(ClonedModuleBuffer); + BCWriter.writeModule(*Tmp); + BCWriter.writeSymtab(); + BCWriter.writeStrtab(); + } - BCWriter.writeModule(*Tmp); - BCWriter.writeSymtab(); - BCWriter.writeStrtab(); - } + MemoryBufferRef ClonedModuleBufferRef( + StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()), + "cloned module buffer"); + ThreadSafeContext NewTSCtx(std::make_unique()); - MemoryBufferRef ClonedModuleBufferRef( - StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()), - "cloned module buffer"); - ThreadSafeContext NewTSCtx(llvm::make_unique()); - - auto ClonedModule = - cantFail(parseBitcodeFile(ClonedModuleBufferRef, *NewTSCtx.getContext())); - ClonedModule->setModuleIdentifier(TSM.getModule()->getName()); - return ThreadSafeModule(std::move(ClonedModule), std::move(NewTSCtx)); + auto ClonedModule = cantFail( + parseBitcodeFile(ClonedModuleBufferRef, *NewTSCtx.getContext())); + ClonedModule->setModuleIdentifier(M.getName()); + return ThreadSafeModule(std::move(ClonedModule), std::move(NewTSCtx)); + }); } } // end namespace orc diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp index 5606421a3cb0..184388dc4d7a 100644 --- a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp @@ -26,11 +26,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" +#include #include // mmap() #include // getpid() @@ -203,7 +203,7 @@ PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) { return; } - Dumpstream = make_unique(DumpFd, true); + Dumpstream = std::make_unique(DumpFd, true); LLVMPerfJitHeader Header = {0}; if (!FillMachine(Header)) @@ -420,7 +420,7 @@ void PerfJITEventListener::NotifyCode(Expected &Symbol, rec.Tid = get_threadid(); // avoid interspersing output - MutexGuard Guard(Mutex); + std::lock_guard Guard(Mutex); rec.CodeIndex = CodeGeneration++; // under lock! @@ -462,7 +462,7 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, // * char name[n] : source file name in ASCII, including null termination // avoid interspersing output - MutexGuard Guard(Mutex); + std::lock_guard Guard(Mutex); Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index e26e6ce45db4..2df71a5e5e74 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -17,10 +17,11 @@ #include "RuntimeDyldMachO.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/MutexGuard.h" +#include #include @@ -120,7 +121,7 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) { // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Print out the sections prior to relocation. LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i) @@ -156,7 +157,7 @@ void RuntimeDyldImpl::resolveLocalRelocations() { void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress) { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (unsigned i = 0, e = Sections.size(); i != e; ++i) { if (Sections[i].getAddress() == LocalAddress) { reassignSectionAddress(i, TargetAddress); @@ -177,7 +178,7 @@ static Error getOffset(const SymbolRef &Sym, SectionRef Sec, Expected RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Save information about our target Arch = (Triple::ArchType)Obj.getArch(); @@ -347,8 +348,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { StubMap Stubs; - section_iterator RelocatedSection = SI->getRelocatedSection(); + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + return RelSecOrErr.takeError(); + + section_iterator RelocatedSection = *RelSecOrErr; if (RelocatedSection == SE) continue; @@ -535,9 +540,10 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, bool IsCode = Section.isText(); bool IsReadOnly = isReadOnlyData(Section); - StringRef Name; - if (auto EC = Section.getName(Name)) - return errorCodeToError(EC); + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = *NameOrErr; uint64_t StubBufSize = computeSectionStubBufSize(Obj, Section); @@ -646,7 +652,12 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj, unsigned StubBufSize = 0; for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { - section_iterator RelSecI = SI->getRelocatedSection(); + + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + report_fatal_error(toString(RelSecOrErr.takeError())); + + section_iterator RelSecI = *RelSecOrErr; if (!(RelSecI == Section)) continue; @@ -727,16 +738,17 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, // Assign the address of each symbol for (auto &Sym : SymbolsToAllocate) { - uint32_t Align = Sym.getAlignment(); + uint32_t Alignment = Sym.getAlignment(); uint64_t Size = Sym.getCommonSize(); StringRef Name; if (auto NameOrErr = Sym.getName()) Name = *NameOrErr; else return NameOrErr.takeError(); - if (Align) { + if (Alignment) { // This symbol has an alignment requirement. - uint64_t AlignOffset = OffsetToAlignment((uint64_t)Addr, Align); + uint64_t AlignOffset = + offsetToAlignment((uint64_t)Addr, Align(Alignment)); Addr += AlignOffset; Offset += AlignOffset; } @@ -777,9 +789,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj, // anyway, so we should guarantee that the alignment is always at least 1. Alignment = std::max(1u, Alignment); - StringRef Name; - if (auto EC = Section.getName(Name)) - return errorCodeToError(EC); + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = *NameOrErr; StubBufSize = computeSectionStubBufSize(Obj, Section); @@ -917,7 +930,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, unsigned AbiVariant) { - if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) { + if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be || + Arch == Triple::aarch64_32) { // This stub has to be able to access the full address space, // since symbol lookup won't necessarily find a handy, in-range, // PLT stub for functions which could be anywhere. @@ -1175,17 +1189,15 @@ Error RuntimeDyldImpl::resolveExternalSymbols() { } void RuntimeDyldImpl::finalizeAsync( - std::unique_ptr This, std::function OnEmitted, + std::unique_ptr This, + unique_function OnEmitted, std::unique_ptr UnderlyingBuffer) { - // FIXME: Move-capture OnRelocsApplied and UnderlyingBuffer once we have - // c++14. - auto SharedUnderlyingBuffer = - std::shared_ptr(std::move(UnderlyingBuffer)); auto SharedThis = std::shared_ptr(std::move(This)); auto PostResolveContinuation = - [SharedThis, OnEmitted, SharedUnderlyingBuffer]( - Expected Result) { + [SharedThis, OnEmitted = std::move(OnEmitted), + UnderlyingBuffer = std::move(UnderlyingBuffer)]( + Expected Result) mutable { if (!Result) { OnEmitted(Result.takeError()); return; @@ -1219,7 +1231,7 @@ void RuntimeDyldImpl::finalizeAsync( } if (!Symbols.empty()) { - SharedThis->Resolver.lookup(Symbols, PostResolveContinuation); + SharedThis->Resolver.lookup(Symbols, std::move(PostResolveContinuation)); } else PostResolveContinuation(std::map()); } @@ -1395,11 +1407,11 @@ void jitLinkForORC(object::ObjectFile &Obj, std::unique_ptr UnderlyingBuffer, RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver, bool ProcessAllSections, - std::function LoadedObj, std::map)> OnLoaded, - std::function OnEmitted) { + unique_function OnEmitted) { RuntimeDyld RTDyld(MemMgr, Resolver); RTDyld.setProcessAllSections(ProcessAllSections); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp index d4e3b0ba7670..27a7690db34f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -50,18 +50,18 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, switch (Arch) { default: llvm_unreachable("Unsupported target for RuntimeDyldCOFF."); case Triple::x86: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::thumb: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::x86_64: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); } } std::unique_ptr RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) { if (auto ObjSectionToIDOrErr = loadObjectImpl(O)) { - return llvm::make_unique(*this, *ObjSectionToIDOrErr); + return std::make_unique(*this, *ObjSectionToIDOrErr); } else { HasError = true; raw_string_ostream ErrStream(ErrorStr); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index ec31ea4e573c..b9c5a12e08d8 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -851,7 +851,7 @@ RuntimeDyldChecker::RuntimeDyldChecker( GetGOTInfoFunction GetGOTInfo, support::endianness Endianness, MCDisassembler *Disassembler, MCInstPrinter *InstPrinter, raw_ostream &ErrStream) - : Impl(::llvm::make_unique( + : Impl(::std::make_unique( std::move(IsSymbolValid), std::move(GetSymbolInfo), std::move(GetSectionInfo), std::move(GetStubInfo), std::move(GetGOTInfo), Endianness, Disassembler, InstPrinter, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 60041a45e2b8..440ab4174a56 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -160,9 +160,13 @@ createRTDyldELFObject(MemoryBufferRef Buffer, const ObjectFile &SourceObject, // Iterate over all sections in the object. auto SI = SourceObject.section_begin(); for (const auto &Sec : Obj->sections()) { - StringRef SectionName; - Sec.getName(SectionName); - if (SectionName != "") { + Expected NameOrErr = Sec.getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + continue; + } + + if (*NameOrErr != "") { DataRefImpl ShdrRef = Sec.getRawDataRefImpl(); Elf_Shdr *shdr = const_cast( reinterpret_cast(ShdrRef.p)); @@ -238,19 +242,19 @@ llvm::RuntimeDyldELF::create(Triple::ArchType Arch, JITSymbolResolver &Resolver) { switch (Arch) { default: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::mips: case Triple::mipsel: case Triple::mips64: case Triple::mips64el: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); } } std::unique_ptr RuntimeDyldELF::loadObject(const object::ObjectFile &O) { if (auto ObjSectionToIDOrErr = loadObjectImpl(O)) - return llvm::make_unique(*this, *ObjSectionToIDOrErr); + return std::make_unique(*this, *ObjSectionToIDOrErr); else { HasError = true; raw_string_ostream ErrStream(ErrorStr); @@ -567,10 +571,11 @@ Error RuntimeDyldELF::findPPC64TOCSection(const ELFObjectFileBase &Obj, // The TOC consists of sections .got, .toc, .tocbss, .plt in that // order. The TOC starts where the first of these sections starts. - for (auto &Section: Obj.sections()) { - StringRef SectionName; - if (auto EC = Section.getName(SectionName)) - return errorCodeToError(EC); + for (auto &Section : Obj.sections()) { + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef SectionName = *NameOrErr; if (SectionName == ".got" || SectionName == ".toc" @@ -601,13 +606,19 @@ Error RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj, // .opd entries for (section_iterator si = Obj.section_begin(), se = Obj.section_end(); si != se; ++si) { - section_iterator RelSecI = si->getRelocatedSection(); + + Expected RelSecOrErr = si->getRelocatedSection(); + if (!RelSecOrErr) + report_fatal_error(toString(RelSecOrErr.takeError())); + + section_iterator RelSecI = *RelSecOrErr; if (RelSecI == Obj.section_end()) continue; - StringRef RelSectionName; - if (auto EC = RelSecI->getName(RelSectionName)) - return errorCodeToError(EC); + Expected NameOrErr = RelSecI->getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef RelSectionName = *NameOrErr; if (RelSectionName != ".opd") continue; @@ -1865,7 +1876,12 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { if (SI->relocation_begin() != SI->relocation_end()) { - section_iterator RelocatedSection = SI->getRelocatedSection(); + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + return make_error( + toString(RelSecOrErr.takeError())); + + section_iterator RelocatedSection = *RelSecOrErr; ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); assert (i != SectionMap.end()); SectionToGOTMap[i->second] = GOTSectionID; @@ -1879,8 +1895,14 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap::iterator i, e; for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) { const SectionRef &Section = i->first; + StringRef Name; - Section.getName(Name); + Expected NameOrErr = Section.getName(); + if (NameOrErr) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == ".eh_frame") { UnregisteredEHFrameSections.push_back(i->second); break; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 68b3468fbc9d..cec7b92b8c48 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -549,7 +549,7 @@ public: void resolveLocalRelocations(); static void finalizeAsync(std::unique_ptr This, - std::function OnEmitted, + unique_function OnEmitted, std::unique_ptr UnderlyingBuffer); void reassignSectionAddress(unsigned SectionID, uint64_t Addr); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 202c3ca1c507..9ca76602ea18 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -233,7 +233,10 @@ RuntimeDyldMachOCRTPBase::finalizeLoad(const ObjectFile &Obj, for (const auto &Section : Obj.sections()) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); // Force emission of the __text, __eh_frame, and __gcc_except_tab sections // if they're present. Otherwise call down to the impl to handle other @@ -351,20 +354,22 @@ RuntimeDyldMachO::create(Triple::ArchType Arch, llvm_unreachable("Unsupported target for RuntimeDyldMachO."); break; case Triple::arm: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::aarch64: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); + case Triple::aarch64_32: + return std::make_unique(MemMgr, Resolver); case Triple::x86: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::x86_64: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); } } std::unique_ptr RuntimeDyldMachO::loadObject(const object::ObjectFile &O) { if (auto ObjSectionToIDOrErr = loadObjectImpl(O)) - return llvm::make_unique(*this, + return std::make_unique(*this, *ObjSectionToIDOrErr); else { HasError = true; diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index d2d74534cf90..dc4af08583de 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -284,14 +284,14 @@ public: // Look for and record the EH frame section IDs. for (const auto &SectionPair : SectionMap) { const object::SectionRef &Section = SectionPair.first; - StringRef Name; - if (auto EC = Section.getName(Name)) - return errorCodeToError(EC); + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); // Note unwind info is stored in .pdata but often points to .xdata // with an IMAGE_REL_AMD64_ADDR32NB relocation. Using a memory manager // that keeps sections ordered in relation to __ImageBase is necessary. - if (Name == ".pdata") + if ((*NameOrErr) == ".pdata") UnregisteredEHFrameSections.push_back(SectionPair.second); } return Error::success(); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index 3bec8b979f7d..a76958a9e2c2 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -289,7 +289,10 @@ public: Error finalizeSection(const ObjectFile &Obj, unsigned SectionID, const SectionRef &Section) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); if (Name == "__nl_symbol_ptr") return populateIndirectSymbolPointersSection(cast(Obj), diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h index f0de27ba14bb..523deb29b723 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h @@ -128,7 +128,10 @@ public: Error finalizeSection(const ObjectFile &Obj, unsigned SectionID, const SectionRef &Section) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); if (Name == "__jump_table") return populateJumpTable(cast(Obj), Section, SectionID); diff --git a/lib/FuzzMutate/FuzzerCLI.cpp b/lib/FuzzMutate/FuzzerCLI.cpp index 63d31c035390..f2368ea7f26b 100644 --- a/lib/FuzzMutate/FuzzerCLI.cpp +++ b/lib/FuzzMutate/FuzzerCLI.cpp @@ -171,7 +171,7 @@ std::unique_ptr llvm::parseModule( if (Size <= 1) // We get bogus data given an empty corpus - just create a new module. - return llvm::make_unique("M", Context); + return std::make_unique("M", Context); auto Buffer = MemoryBuffer::getMemBuffer( StringRef(reinterpret_cast(Data), Size), "Fuzzer input", diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index eb5760daecb3..b0c26e0ecaf5 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -352,6 +352,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::PreserveAll: Out << "preserve_allcc"; break; case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; + case CallingConv::Tail: Out << "tailcc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; @@ -835,7 +836,7 @@ SlotTracker *ModuleSlotTracker::getMachine() { ShouldCreateStorage = false; MachineStorage = - llvm::make_unique(M, ShouldInitializeAllMetadata); + std::make_unique(M, ShouldInitializeAllMetadata); Machine = MachineStorage.get(); return Machine; } @@ -2312,7 +2313,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD, if (const MDNode *N = dyn_cast(MD)) { std::unique_ptr MachineStorage; if (!Machine) { - MachineStorage = make_unique(Context); + MachineStorage = std::make_unique(Context); Machine = MachineStorage.get(); } int Slot = Machine->getMetadataSlot(N); @@ -2950,7 +2951,7 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) { FunctionSummary::FFlags FFlags = FS->fflags(); if (FFlags.ReadNone | FFlags.ReadOnly | FFlags.NoRecurse | - FFlags.ReturnDoesNotAlias) { + FFlags.ReturnDoesNotAlias | FFlags.NoInline) { Out << ", funcFlags: ("; Out << "readNone: " << FFlags.ReadNone; Out << ", readOnly: " << FFlags.ReadOnly; @@ -3553,6 +3554,10 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { if (Arg->hasName()) { Out << ' '; PrintLLVMName(Out, Arg); + } else { + int Slot = Machine.getLocalSlot(Arg); + assert(Slot != -1 && "expect argument in function here"); + Out << " %" << Slot; } } diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index f989fa3b910e..15e488bbb13b 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -159,7 +159,7 @@ public: }; class TypeAttributeImpl : public EnumAttributeImpl { - virtual void anchor(); + void anchor() override; Type *Ty; @@ -208,8 +208,8 @@ public: Attribute getAttribute(Attribute::AttrKind Kind) const; Attribute getAttribute(StringRef Kind) const; - unsigned getAlignment() const; - unsigned getStackAlignment() const; + MaybeAlign getAlignment() const; + MaybeAlign getStackAlignment() const; uint64_t getDereferenceableBytes() const; uint64_t getDereferenceableOrNullBytes() const; std::pair> getAllocSizeArgs() const; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index bb90bcd7dd74..cc370e628e9a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -142,17 +142,14 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind, return Attribute(PA); } -Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) { - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x40000000 && "Alignment too large."); - return get(Context, Alignment, Align); +Attribute Attribute::getWithAlignment(LLVMContext &Context, Align A) { + assert(A <= 0x40000000 && "Alignment too large."); + return get(Context, Alignment, A.value()); } -Attribute Attribute::getWithStackAlignment(LLVMContext &Context, - uint64_t Align) { - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x100 && "Alignment too large."); - return get(Context, StackAlignment, Align); +Attribute Attribute::getWithStackAlignment(LLVMContext &Context, Align A) { + assert(A <= 0x100 && "Alignment too large."); + return get(Context, StackAlignment, A.value()); } Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context, @@ -244,16 +241,16 @@ bool Attribute::hasAttribute(StringRef Kind) const { return pImpl && pImpl->hasAttribute(Kind); } -unsigned Attribute::getAlignment() const { +MaybeAlign Attribute::getAlignment() const { assert(hasAttribute(Attribute::Alignment) && "Trying to get alignment from non-alignment attribute!"); - return pImpl->getValueAsInt(); + return MaybeAlign(pImpl->getValueAsInt()); } -unsigned Attribute::getStackAlignment() const { +MaybeAlign Attribute::getStackAlignment() const { assert(hasAttribute(Attribute::StackAlignment) && "Trying to get alignment from non-alignment attribute!"); - return pImpl->getValueAsInt(); + return MaybeAlign(pImpl->getValueAsInt()); } uint64_t Attribute::getDereferenceableBytes() const { @@ -670,12 +667,12 @@ Attribute AttributeSet::getAttribute(StringRef Kind) const { return SetNode ? SetNode->getAttribute(Kind) : Attribute(); } -unsigned AttributeSet::getAlignment() const { - return SetNode ? SetNode->getAlignment() : 0; +MaybeAlign AttributeSet::getAlignment() const { + return SetNode ? SetNode->getAlignment() : None; } -unsigned AttributeSet::getStackAlignment() const { - return SetNode ? SetNode->getStackAlignment() : 0; +MaybeAlign AttributeSet::getStackAlignment() const { + return SetNode ? SetNode->getStackAlignment() : None; } uint64_t AttributeSet::getDereferenceableBytes() const { @@ -782,10 +779,12 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) { Attr = Attribute::getWithByValType(C, B.getByValType()); break; case Attribute::Alignment: - Attr = Attribute::getWithAlignment(C, B.getAlignment()); + assert(B.getAlignment() && "Alignment must be set"); + Attr = Attribute::getWithAlignment(C, *B.getAlignment()); break; case Attribute::StackAlignment: - Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment()); + assert(B.getStackAlignment() && "StackAlignment must be set"); + Attr = Attribute::getWithStackAlignment(C, *B.getStackAlignment()); break; case Attribute::Dereferenceable: Attr = Attribute::getWithDereferenceableBytes( @@ -836,18 +835,18 @@ Attribute AttributeSetNode::getAttribute(StringRef Kind) const { return {}; } -unsigned AttributeSetNode::getAlignment() const { +MaybeAlign AttributeSetNode::getAlignment() const { for (const auto I : *this) if (I.hasAttribute(Attribute::Alignment)) return I.getAlignment(); - return 0; + return None; } -unsigned AttributeSetNode::getStackAlignment() const { +MaybeAlign AttributeSetNode::getStackAlignment() const { for (const auto I : *this) if (I.hasAttribute(Attribute::StackAlignment)) return I.getStackAlignment(); - return 0; + return None; } Type *AttributeSetNode::getByValType() const { @@ -1164,8 +1163,8 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. For now, say // we can't change a known alignment. - unsigned OldAlign = getAttributes(Index).getAlignment(); - unsigned NewAlign = B.getAlignment(); + const MaybeAlign OldAlign = getAttributes(Index).getAlignment(); + const MaybeAlign NewAlign = B.getAlignment(); assert((!OldAlign || !NewAlign || OldAlign == NewAlign) && "Attempt to change alignment!"); #endif @@ -1349,11 +1348,11 @@ Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const { return getAttributes(Index).getAttribute(Kind); } -unsigned AttributeList::getRetAlignment() const { +MaybeAlign AttributeList::getRetAlignment() const { return getAttributes(ReturnIndex).getAlignment(); } -unsigned AttributeList::getParamAlignment(unsigned ArgNo) const { +MaybeAlign AttributeList::getParamAlignment(unsigned ArgNo) const { return getAttributes(ArgNo + FirstArgIndex).getAlignment(); } @@ -1361,8 +1360,7 @@ Type *AttributeList::getParamByValType(unsigned Index) const { return getAttributes(Index+FirstArgIndex).getByValType(); } - -unsigned AttributeList::getStackAlignment(unsigned Index) const { +MaybeAlign AttributeList::getStackAlignment(unsigned Index) const { return getAttributes(Index).getStackAlignment(); } @@ -1438,7 +1436,9 @@ AttrBuilder::AttrBuilder(AttributeSet AS) { void AttrBuilder::clear() { Attrs.reset(); TargetDepAttrs.clear(); - Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0; + Alignment.reset(); + StackAlignment.reset(); + DerefBytes = DerefOrNullBytes = 0; AllocSizeArgs = 0; ByValType = nullptr; } @@ -1486,9 +1486,9 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { Attrs[Val] = false; if (Val == Attribute::Alignment) - Alignment = 0; + Alignment.reset(); else if (Val == Attribute::StackAlignment) - StackAlignment = 0; + StackAlignment.reset(); else if (Val == Attribute::ByVal) ByValType = nullptr; else if (Val == Attribute::Dereferenceable) @@ -1517,23 +1517,23 @@ std::pair> AttrBuilder::getAllocSizeArgs() const { return unpackAllocSizeArgs(AllocSizeArgs); } -AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { - if (Align == 0) return *this; +AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) { + if (!Align) + return *this; - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x40000000 && "Alignment too large."); + assert(*Align <= 0x40000000 && "Alignment too large."); Attrs[Attribute::Alignment] = true; Alignment = Align; return *this; } -AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) { +AttrBuilder &AttrBuilder::addStackAlignmentAttr(MaybeAlign Align) { // Default alignment, allow the target to define how to align it. - if (Align == 0) return *this; + if (!Align) + return *this; - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x100 && "Alignment too large."); + assert(*Align <= 0x100 && "Alignment too large."); Attrs[Attribute::StackAlignment] = true; StackAlignment = Align; @@ -1610,10 +1610,10 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) { // FIXME: What if both have alignments, but they don't match?! if (B.Alignment) - Alignment = 0; + Alignment.reset(); if (B.StackAlignment) - StackAlignment = 0; + StackAlignment.reset(); if (B.DerefBytes) DerefBytes = 0; diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index a2d820352825..79f580d0e14d 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -490,12 +490,6 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { assert(F && "Illegal to upgrade a non-existent Function."); - // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.". - if (F->getName() == "clang.arc.use") { - NewFn = nullptr; - return true; - } - // Quickly eliminate it, if it's not a candidate. StringRef Name = F->getName(); if (Name.size() <= 8 || !Name.startswith("llvm.")) @@ -528,7 +522,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { F->arg_begin()->getType()); return true; } - Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); + static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); if (vldRegex.match(Name)) { auto fArgs = F->getFunctionType()->params(); SmallVector Tys(fArgs.begin(), fArgs.end()); @@ -539,7 +533,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { "llvm." + Name + ".p0i8", F->getParent()); return true; } - Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); + static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); if (vstRegex.match(Name)) { static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2, @@ -604,7 +598,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } case 'e': { SmallVector Groups; - Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); + static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); if (R.match(Name, &Groups)) { Intrinsic::ID ID = Intrinsic::not_intrinsic; if (Groups[1] == "fadd") @@ -789,6 +783,19 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; + case 'p': + if (Name == "prefetch") { + // Handle address space overloading. + Type *Tys[] = {F->arg_begin()->getType()}; + if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) { + rename(F); + NewFn = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys); + return true; + } + } + break; + case 's': if (Name == "stackprotectorcheck") { NewFn = nullptr; @@ -1648,14 +1655,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Get the Function's name. StringRef Name = F->getName(); - // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped - // from upgrader because the optimizer now only recognizes intrinsics for - // ARC runtime calls. - if (Name == "clang.arc.use") { - CI->eraseFromParent(); - return; - } - assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); Name = Name.substr(5); @@ -3831,7 +3830,9 @@ bool llvm::UpgradeDebugInfo(Module &M) { return Modified; } -bool llvm::UpgradeRetainReleaseMarker(Module &M) { +/// This checks for objc retain release marker which should be upgraded. It +/// returns true if module is modified. +static bool UpgradeRetainReleaseMarker(Module &M) { bool Changed = false; const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey); @@ -3855,6 +3856,106 @@ bool llvm::UpgradeRetainReleaseMarker(Module &M) { return Changed; } +void llvm::UpgradeARCRuntime(Module &M) { + // This lambda converts normal function calls to ARC runtime functions to + // intrinsic calls. + auto UpgradeToIntrinsic = [&](const char *OldFunc, + llvm::Intrinsic::ID IntrinsicFunc) { + Function *Fn = M.getFunction(OldFunc); + + if (!Fn) + return; + + Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc); + + for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) { + CallInst *CI = dyn_cast(*I++); + if (!CI || CI->getCalledFunction() != Fn) + continue; + + IRBuilder<> Builder(CI->getParent(), CI->getIterator()); + FunctionType *NewFuncTy = NewFn->getFunctionType(); + SmallVector Args; + + for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) { + Value *Arg = CI->getArgOperand(I); + // Bitcast argument to the parameter type of the new function if it's + // not a variadic argument. + if (I < NewFuncTy->getNumParams()) + Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I)); + Args.push_back(Arg); + } + + // Create a call instruction that calls the new function. + CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args); + NewCall->setTailCallKind(cast(CI)->getTailCallKind()); + NewCall->setName(CI->getName()); + + // Bitcast the return value back to the type of the old call. + Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType()); + + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewRetVal); + CI->eraseFromParent(); + } + + if (Fn->use_empty()) + Fn->eraseFromParent(); + }; + + // Unconditionally convert a call to "clang.arc.use" to a call to + // "llvm.objc.clang.arc.use". + UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use); + + // Upgrade the retain release marker. If there is no need to upgrade + // the marker, that means either the module is already new enough to contain + // new intrinsics or it is not ARC. There is no need to upgrade runtime call. + if (!UpgradeRetainReleaseMarker(M)) + return; + + std::pair RuntimeFuncs[] = { + {"objc_autorelease", llvm::Intrinsic::objc_autorelease}, + {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop}, + {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush}, + {"objc_autoreleaseReturnValue", + llvm::Intrinsic::objc_autoreleaseReturnValue}, + {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak}, + {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak}, + {"objc_initWeak", llvm::Intrinsic::objc_initWeak}, + {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak}, + {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained}, + {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak}, + {"objc_release", llvm::Intrinsic::objc_release}, + {"objc_retain", llvm::Intrinsic::objc_retain}, + {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease}, + {"objc_retainAutoreleaseReturnValue", + llvm::Intrinsic::objc_retainAutoreleaseReturnValue}, + {"objc_retainAutoreleasedReturnValue", + llvm::Intrinsic::objc_retainAutoreleasedReturnValue}, + {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock}, + {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong}, + {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak}, + {"objc_unsafeClaimAutoreleasedReturnValue", + llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue}, + {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject}, + {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject}, + {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer}, + {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease}, + {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter}, + {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit}, + {"objc_arc_annotation_topdown_bbstart", + llvm::Intrinsic::objc_arc_annotation_topdown_bbstart}, + {"objc_arc_annotation_topdown_bbend", + llvm::Intrinsic::objc_arc_annotation_topdown_bbend}, + {"objc_arc_annotation_bottomup_bbstart", + llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart}, + {"objc_arc_annotation_bottomup_bbend", + llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}}; + + for (auto &I : RuntimeFuncs) + UpgradeToIntrinsic(I.first, I.second); +} + bool llvm::UpgradeModuleFlags(Module &M) { NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); if (!ModFlags) @@ -4012,3 +4113,23 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { return MDTuple::get(T->getContext(), Ops); } + +std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { + std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64"; + + // If X86, and the datalayout matches the expected format, add pointer size + // address spaces to the datalayout. + Triple::ArchType Arch = Triple(TT).getArch(); + if ((Arch != llvm::Triple::x86 && Arch != llvm::Triple::x86_64) || + DL.contains(AddrSpaces)) + return DL; + + SmallVector Groups; + Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)"); + if (!R.match(DL, &Groups)) + return DL; + + SmallString<1024> Buf; + std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str(); + return Res; +} diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 34410712645d..bdee6990f932 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -107,6 +107,13 @@ BasicBlock::instructionsWithoutDebug() { return make_filter_range(*this, Fn); } +filter_iterator>::difference_type +BasicBlock::sizeWithoutDebug() const { + return std::distance(instructionsWithoutDebug().begin(), + instructionsWithoutDebug().end()); +} + void BasicBlock::removeFromParent() { getParent()->getBasicBlockList().remove(getIterator()); } diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 835fbb3443b8..71fa795ec294 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -746,7 +746,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, ConstantInt::get(Ty, i)); Constant *V2Element = ConstantExpr::getExtractElement(V2, ConstantInt::get(Ty, i)); - Constant *Cond = dyn_cast(CondV->getOperand(i)); + auto *Cond = cast(CondV->getOperand(i)); if (V1Element == V2Element) { V = V1Element; } else if (isa(Cond)) { @@ -787,12 +787,9 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx) { - if (isa(Val)) // ee(undef, x) -> undef - return UndefValue::get(Val->getType()->getVectorElementType()); - if (Val->isNullValue()) // ee(zero, x) -> zero - return Constant::getNullValue(Val->getType()->getVectorElementType()); - // ee({w,x,y,z}, undef) -> undef - if (isa(Idx)) + // extractelt undef, C -> undef + // extractelt C, undef -> undef + if (isa(Val) || isa(Idx)) return UndefValue::get(Val->getType()->getVectorElementType()); if (ConstantInt *CIdx = dyn_cast(Idx)) { @@ -1125,7 +1122,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, isa(CE1->getOperand(0))) { GlobalValue *GV = cast(CE1->getOperand(0)); - unsigned GVAlign; + MaybeAlign GVAlign; if (Module *TheModule = GV->getParent()) { GVAlign = GV->getPointerAlignment(TheModule->getDataLayout()); @@ -1139,19 +1136,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, // increased code size (see https://reviews.llvm.org/D55115) // FIXME: This code should be deleted once existing targets have // appropriate defaults - if (GVAlign == 0U && isa(GV)) - GVAlign = 4U; + if (!GVAlign && isa(GV)) + GVAlign = Align(4); } else if (isa(GV)) { // Without a datalayout we have to assume the worst case: that the // function pointer isn't aligned at all. - GVAlign = 0U; + GVAlign = llvm::None; } else { - GVAlign = GV->getAlignment(); + GVAlign = MaybeAlign(GV->getAlignment()); } - if (GVAlign > 1) { + if (GVAlign && *GVAlign > 1) { unsigned DstWidth = CI2->getType()->getBitWidth(); - unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign)); + unsigned SrcWidth = std::min(DstWidth, Log2(*GVAlign)); APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth)); // If checking bits we know are clear, return zero. diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp index 920fdc01a14f..642bf0f39342 100644 --- a/lib/IR/ConstantRange.cpp +++ b/lib/IR/ConstantRange.cpp @@ -269,6 +269,27 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, return makeExactMulNSWRegion(Other.getSignedMin()) .intersectWith(makeExactMulNSWRegion(Other.getSignedMax())); + + case Instruction::Shl: { + // For given range of shift amounts, if we ignore all illegal shift amounts + // (that always produce poison), what shift amount range is left? + ConstantRange ShAmt = Other.intersectWith( + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, (BitWidth - 1) + 1))); + if (ShAmt.isEmptySet()) { + // If the entire range of shift amounts is already poison-producing, + // then we can freely add more poison-producing flags ontop of that. + return getFull(BitWidth); + } + // There are some legal shift amounts, we can compute conservatively-correct + // range of no-wrap inputs. Note that by now we have clamped the ShAmtUMax + // to be at most bitwidth-1, which results in most conservative range. + APInt ShAmtUMax = ShAmt.getUnsignedMax(); + if (Unsigned) + return getNonEmpty(APInt::getNullValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(ShAmtUMax) + 1); + return getNonEmpty(APInt::getSignedMinValue(BitWidth).ashr(ShAmtUMax), + APInt::getSignedMaxValue(BitWidth).ashr(ShAmtUMax) + 1); + } } } @@ -815,14 +836,55 @@ ConstantRange::add(const ConstantRange &Other) const { return X; } -ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const { - // Calculate the subset of this range such that "X + Other" is - // guaranteed not to wrap (overflow) for all X in this subset. - auto NSWRange = ConstantRange::makeExactNoWrapRegion( - BinaryOperator::Add, Other, OverflowingBinaryOperator::NoSignedWrap); - auto NSWConstrainedRange = intersectWith(NSWRange); +ConstantRange ConstantRange::addWithNoWrap(const ConstantRange &Other, + unsigned NoWrapKind, + PreferredRangeType RangeType) const { + // Calculate the range for "X + Y" which is guaranteed not to wrap(overflow). + // (X is from this, and Y is from Other) + if (isEmptySet() || Other.isEmptySet()) + return getEmpty(); + if (isFullSet() && Other.isFullSet()) + return getFull(); + + using OBO = OverflowingBinaryOperator; + ConstantRange Result = add(Other); + + auto addWithNoUnsignedWrap = [this](const ConstantRange &Other) { + APInt LMin = getUnsignedMin(), LMax = getUnsignedMax(); + APInt RMin = Other.getUnsignedMin(), RMax = Other.getUnsignedMax(); + bool Overflow; + APInt NewMin = LMin.uadd_ov(RMin, Overflow); + if (Overflow) + return getEmpty(); + APInt NewMax = LMax.uadd_sat(RMax); + return getNonEmpty(std::move(NewMin), std::move(NewMax) + 1); + }; + + auto addWithNoSignedWrap = [this](const ConstantRange &Other) { + APInt LMin = getSignedMin(), LMax = getSignedMax(); + APInt RMin = Other.getSignedMin(), RMax = Other.getSignedMax(); + if (LMin.isNonNegative()) { + bool Overflow; + APInt Temp = LMin.sadd_ov(RMin, Overflow); + if (Overflow) + return getEmpty(); + } + if (LMax.isNegative()) { + bool Overflow; + APInt Temp = LMax.sadd_ov(RMax, Overflow); + if (Overflow) + return getEmpty(); + } + APInt NewMin = LMin.sadd_sat(RMin); + APInt NewMax = LMax.sadd_sat(RMax); + return getNonEmpty(std::move(NewMin), std::move(NewMax) + 1); + }; - return NSWConstrainedRange.add(ConstantRange(Other)); + if (NoWrapKind & OBO::NoSignedWrap) + Result = Result.intersectWith(addWithNoSignedWrap(Other), RangeType); + if (NoWrapKind & OBO::NoUnsignedWrap) + Result = Result.intersectWith(addWithNoUnsignedWrap(Other), RangeType); + return Result; } ConstantRange diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index ff551da29ae6..f792f01efc1a 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" @@ -250,6 +251,20 @@ bool Constant::isNaN() const { return true; } +bool Constant::isElementWiseEqual(Value *Y) const { + // Are they fully identical? + if (this == Y) + return true; + // They may still be identical element-wise (if they have `undef`s). + auto *Cy = dyn_cast(Y); + if (!Cy) + return false; + return PatternMatch::match(ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_EQ, + const_cast(this), + Cy), + PatternMatch::m_One()); +} + bool Constant::containsUndefElement() const { if (!getType()->isVectorTy()) return false; @@ -502,22 +517,32 @@ bool Constant::needsRelocation() const { if (const BlockAddress *BA = dyn_cast(this)) return BA->getFunction()->needsRelocation(); - // While raw uses of blockaddress need to be relocated, differences between - // two of them don't when they are for labels in the same function. This is a - // common idiom when creating a table for the indirect goto extension, so we - // handle it efficiently here. - if (const ConstantExpr *CE = dyn_cast(this)) + if (const ConstantExpr *CE = dyn_cast(this)) { if (CE->getOpcode() == Instruction::Sub) { ConstantExpr *LHS = dyn_cast(CE->getOperand(0)); ConstantExpr *RHS = dyn_cast(CE->getOperand(1)); if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt && - RHS->getOpcode() == Instruction::PtrToInt && - isa(LHS->getOperand(0)) && - isa(RHS->getOperand(0)) && - cast(LHS->getOperand(0))->getFunction() == - cast(RHS->getOperand(0))->getFunction()) - return false; + RHS->getOpcode() == Instruction::PtrToInt) { + Constant *LHSOp0 = LHS->getOperand(0); + Constant *RHSOp0 = RHS->getOperand(0); + + // While raw uses of blockaddress need to be relocated, differences + // between two of them don't when they are for labels in the same + // function. This is a common idiom when creating a table for the + // indirect goto extension, so we handle it efficiently here. + if (isa(LHSOp0) && isa(RHSOp0) && + cast(LHSOp0)->getFunction() == + cast(RHSOp0)->getFunction()) + return false; + + // Relative pointers do not need to be dynamically relocated. + if (auto *LHSGV = dyn_cast(LHSOp0->stripPointerCasts())) + if (auto *RHSGV = dyn_cast(RHSOp0->stripPointerCasts())) + if (LHSGV->isDSOLocal() && RHSGV->isDSOLocal()) + return false; + } } + } bool Result = false; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) @@ -563,13 +588,10 @@ void Constant::removeDeadConstantUsers() const { } // If the constant was dead, then the iterator is invalidated. - if (LastNonDeadUser == E) { + if (LastNonDeadUser == E) I = user_begin(); - if (I == E) break; - } else { - I = LastNonDeadUser; - ++I; - } + else + I = std::next(LastNonDeadUser); } } diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h index 7614dab9f15d..1ec9087551f8 100644 --- a/lib/IR/ConstantsContext.h +++ b/lib/IR/ConstantsContext.h @@ -480,14 +480,16 @@ struct ConstantExprKeyType { : Opcode(CE->getOpcode()), SubclassOptionalData(CE->getRawSubclassOptionalData()), SubclassData(CE->isCompare() ? CE->getPredicate() : 0), Ops(Operands), - Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()) {} + Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()), + ExplicitTy(nullptr) {} ConstantExprKeyType(const ConstantExpr *CE, SmallVectorImpl &Storage) : Opcode(CE->getOpcode()), SubclassOptionalData(CE->getRawSubclassOptionalData()), SubclassData(CE->isCompare() ? CE->getPredicate() : 0), - Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()) { + Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()), + ExplicitTy(nullptr) { assert(Storage.empty() && "Expected empty storage"); for (unsigned I = 0, E = CE->getNumOperands(); I != E; ++I) Storage.push_back(CE->getOperand(I)); @@ -676,9 +678,9 @@ public: /// Hash once, and reuse it for the lookup and the insertion if needed. LookupKeyHashed Lookup(MapInfo::getHashValue(Key), Key); - auto I = Map.find_as(Lookup); - if (I != Map.end()) - return *I; + auto ItMap = Map.find_as(Lookup); + if (ItMap != Map.end()) + return *ItMap; // Update to the new value. Optimize for the case when we have a single // operand that we're changing, but handle bulk updates efficiently. diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 310935b5213a..a5f46b16e600 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -140,7 +140,16 @@ unsigned LLVMGetLastEnumAttributeKind(void) { LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID, uint64_t Val) { - return wrap(Attribute::get(*unwrap(C), (Attribute::AttrKind)KindID, Val)); + auto &Ctx = *unwrap(C); + auto AttrKind = (Attribute::AttrKind)KindID; + + if (AttrKind == Attribute::AttrKind::ByVal) { + // After r362128, byval attributes need to have a type attribute. Provide a + // NULL one until a proper API is added for this. + return wrap(Attribute::getWithByValType(Ctx, NULL)); + } else { + return wrap(Attribute::get(Ctx, AttrKind, Val)); + } } unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A) { @@ -386,7 +395,7 @@ void LLVMDumpModule(LLVMModuleRef M) { LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename, char **ErrorMessage) { std::error_code EC; - raw_fd_ostream dest(Filename, EC, sys::fs::F_Text); + raw_fd_ostream dest(Filename, EC, sys::fs::OF_Text); if (EC) { *ErrorMessage = strdup(EC.message().c_str()); return true; @@ -1999,13 +2008,13 @@ unsigned LLVMGetAlignment(LLVMValueRef V) { void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) { Value *P = unwrap(V); if (GlobalObject *GV = dyn_cast(P)) - GV->setAlignment(Bytes); + GV->setAlignment(MaybeAlign(Bytes)); else if (AllocaInst *AI = dyn_cast(P)) - AI->setAlignment(Bytes); + AI->setAlignment(MaybeAlign(Bytes)); else if (LoadInst *LI = dyn_cast(P)) - LI->setAlignment(Bytes); + LI->setAlignment(MaybeAlign(Bytes)); else if (StoreInst *SI = dyn_cast(P)) - SI->setAlignment(Bytes); + SI->setAlignment(MaybeAlign(Bytes)); else llvm_unreachable( "only GlobalValue, AllocaInst, LoadInst and StoreInst have alignment"); @@ -2480,7 +2489,7 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) { Argument *A = unwrap(Arg); - A->addAttr(Attribute::getWithAlignment(A->getContext(), align)); + A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(align))); } /*--.. Operations on ifuncs ................................................--*/ @@ -2779,7 +2788,8 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) { void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align) { auto *Call = unwrap(Instr); - Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align); + Attribute AlignAttr = + Attribute::getWithAlignment(Call->getContext(), Align(align)); Call->addAttribute(index, AlignAttr); } @@ -3518,6 +3528,47 @@ static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) { llvm_unreachable("Invalid AtomicOrdering value!"); } +static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) { + switch (BinOp) { + case LLVMAtomicRMWBinOpXchg: return AtomicRMWInst::Xchg; + case LLVMAtomicRMWBinOpAdd: return AtomicRMWInst::Add; + case LLVMAtomicRMWBinOpSub: return AtomicRMWInst::Sub; + case LLVMAtomicRMWBinOpAnd: return AtomicRMWInst::And; + case LLVMAtomicRMWBinOpNand: return AtomicRMWInst::Nand; + case LLVMAtomicRMWBinOpOr: return AtomicRMWInst::Or; + case LLVMAtomicRMWBinOpXor: return AtomicRMWInst::Xor; + case LLVMAtomicRMWBinOpMax: return AtomicRMWInst::Max; + case LLVMAtomicRMWBinOpMin: return AtomicRMWInst::Min; + case LLVMAtomicRMWBinOpUMax: return AtomicRMWInst::UMax; + case LLVMAtomicRMWBinOpUMin: return AtomicRMWInst::UMin; + case LLVMAtomicRMWBinOpFAdd: return AtomicRMWInst::FAdd; + case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub; + } + + llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!"); +} + +static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) { + switch (BinOp) { + case AtomicRMWInst::Xchg: return LLVMAtomicRMWBinOpXchg; + case AtomicRMWInst::Add: return LLVMAtomicRMWBinOpAdd; + case AtomicRMWInst::Sub: return LLVMAtomicRMWBinOpSub; + case AtomicRMWInst::And: return LLVMAtomicRMWBinOpAnd; + case AtomicRMWInst::Nand: return LLVMAtomicRMWBinOpNand; + case AtomicRMWInst::Or: return LLVMAtomicRMWBinOpOr; + case AtomicRMWInst::Xor: return LLVMAtomicRMWBinOpXor; + case AtomicRMWInst::Max: return LLVMAtomicRMWBinOpMax; + case AtomicRMWInst::Min: return LLVMAtomicRMWBinOpMin; + case AtomicRMWInst::UMax: return LLVMAtomicRMWBinOpUMax; + case AtomicRMWInst::UMin: return LLVMAtomicRMWBinOpUMin; + case AtomicRMWInst::FAdd: return LLVMAtomicRMWBinOpFAdd; + case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub; + default: break; + } + + llvm_unreachable("Invalid AtomicRMWBinOp value!"); +} + // TODO: Should this and other atomic instructions support building with // "syncscope"? LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, @@ -3593,14 +3644,30 @@ LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) { Value *P = unwrap(MemAccessInst); if (LoadInst *LI = dyn_cast(P)) return LI->isVolatile(); - return cast(P)->isVolatile(); + if (StoreInst *SI = dyn_cast(P)) + return SI->isVolatile(); + if (AtomicRMWInst *AI = dyn_cast(P)) + return AI->isVolatile(); + return cast(P)->isVolatile(); } void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) { Value *P = unwrap(MemAccessInst); if (LoadInst *LI = dyn_cast(P)) return LI->setVolatile(isVolatile); - return cast(P)->setVolatile(isVolatile); + if (StoreInst *SI = dyn_cast(P)) + return SI->setVolatile(isVolatile); + if (AtomicRMWInst *AI = dyn_cast(P)) + return AI->setVolatile(isVolatile); + return cast(P)->setVolatile(isVolatile); +} + +LLVMBool LLVMGetWeak(LLVMValueRef CmpXchgInst) { + return unwrap(CmpXchgInst)->isWeak(); +} + +void LLVMSetWeak(LLVMValueRef CmpXchgInst, LLVMBool isWeak) { + return unwrap(CmpXchgInst)->setWeak(isWeak); } LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) { @@ -3608,8 +3675,10 @@ LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) { AtomicOrdering O; if (LoadInst *LI = dyn_cast(P)) O = LI->getOrdering(); + else if (StoreInst *SI = dyn_cast(P)) + O = SI->getOrdering(); else - O = cast(P)->getOrdering(); + O = cast(P)->getOrdering(); return mapToLLVMOrdering(O); } @@ -3622,6 +3691,14 @@ void LLVMSetOrdering(LLVMValueRef MemAccessInst, LLVMAtomicOrdering Ordering) { return cast(P)->setOrdering(O); } +LLVMAtomicRMWBinOp LLVMGetAtomicRMWBinOp(LLVMValueRef Inst) { + return mapToLLVMRMWBinOp(unwrap(Inst)->getOperation()); +} + +void LLVMSetAtomicRMWBinOp(LLVMValueRef Inst, LLVMAtomicRMWBinOp BinOp) { + unwrap(Inst)->setOperation(mapFromLLVMRMWBinOp(BinOp)); +} + /*--.. Casts ...............................................................--*/ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val, @@ -3840,20 +3917,7 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, LLVMValueRef PTR, LLVMValueRef Val, LLVMAtomicOrdering ordering, LLVMBool singleThread) { - AtomicRMWInst::BinOp intop; - switch (op) { - case LLVMAtomicRMWBinOpXchg: intop = AtomicRMWInst::Xchg; break; - case LLVMAtomicRMWBinOpAdd: intop = AtomicRMWInst::Add; break; - case LLVMAtomicRMWBinOpSub: intop = AtomicRMWInst::Sub; break; - case LLVMAtomicRMWBinOpAnd: intop = AtomicRMWInst::And; break; - case LLVMAtomicRMWBinOpNand: intop = AtomicRMWInst::Nand; break; - case LLVMAtomicRMWBinOpOr: intop = AtomicRMWInst::Or; break; - case LLVMAtomicRMWBinOpXor: intop = AtomicRMWInst::Xor; break; - case LLVMAtomicRMWBinOpMax: intop = AtomicRMWInst::Max; break; - case LLVMAtomicRMWBinOpMin: intop = AtomicRMWInst::Min; break; - case LLVMAtomicRMWBinOpUMax: intop = AtomicRMWInst::UMax; break; - case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break; - } + AtomicRMWInst::BinOp intop = mapFromLLVMRMWBinOp(op); return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), mapFromLLVMOrdering(ordering), singleThread ? SyncScope::SingleThread : SyncScope::System)); diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 2493c6cbe532..5d5671227430 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -25,7 +25,7 @@ using namespace llvm; using namespace llvm::dwarf; -cl::opt +static cl::opt UseDbgAddr("use-dbg-addr", llvm::cl::desc("Use llvm.dbg.addr for all local variables"), cl::init(false), cl::Hidden); diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index 6e0ebbd4a730..5fe7a2e94b6a 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -44,7 +45,6 @@ using namespace llvm; StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); - StructAlignment = 0; StructSize = 0; IsPadded = false; NumElements = ST->getNumElements(); @@ -52,10 +52,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { // Loop over each of the elements, placing them in memory. for (unsigned i = 0, e = NumElements; i != e; ++i) { Type *Ty = ST->getElementType(i); - unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty); + const Align TyAlign(ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty)); // Add padding if necessary to align the data element properly. - if ((StructSize & (TyAlign-1)) != 0) { + if (!isAligned(TyAlign, StructSize)) { IsPadded = true; StructSize = alignTo(StructSize, TyAlign); } @@ -67,12 +67,9 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item } - // Empty structures have alignment of 1 byte. - if (StructAlignment == 0) StructAlignment = 1; - // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. - if ((StructSize & (StructAlignment-1)) != 0) { + if (!isAligned(StructAlignment, StructSize)) { IsPadded = true; StructSize = alignTo(StructSize, StructAlignment); } @@ -102,9 +99,8 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { // LayoutAlignElem, LayoutAlign support //===----------------------------------------------------------------------===// -LayoutAlignElem -LayoutAlignElem::get(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { +LayoutAlignElem LayoutAlignElem::get(AlignTypeEnum align_type, Align abi_align, + Align pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); LayoutAlignElem retval; retval.AlignType = align_type; @@ -126,10 +122,9 @@ LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const { // PointerAlignElem, PointerAlign support //===----------------------------------------------------------------------===// -PointerAlignElem -PointerAlignElem::get(uint32_t AddressSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth, - uint32_t IndexWidth) { +PointerAlignElem PointerAlignElem::get(uint32_t AddressSpace, Align ABIAlign, + Align PrefAlign, uint32_t TypeByteWidth, + uint32_t IndexWidth) { assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!"); PointerAlignElem retval; retval.AddressSpace = AddressSpace; @@ -162,18 +157,18 @@ const char *DataLayout::getManglingComponent(const Triple &T) { } static const LayoutAlignElem DefaultAlignments[] = { - { INTEGER_ALIGN, 1, 1, 1 }, // i1 - { INTEGER_ALIGN, 8, 1, 1 }, // i8 - { INTEGER_ALIGN, 16, 2, 2 }, // i16 - { INTEGER_ALIGN, 32, 4, 4 }, // i32 - { INTEGER_ALIGN, 64, 4, 8 }, // i64 - { FLOAT_ALIGN, 16, 2, 2 }, // half - { FLOAT_ALIGN, 32, 4, 4 }, // float - { FLOAT_ALIGN, 64, 8, 8 }, // double - { FLOAT_ALIGN, 128, 16, 16 }, // ppcf128, quad, ... - { VECTOR_ALIGN, 64, 8, 8 }, // v2i32, v1i64, ... - { VECTOR_ALIGN, 128, 16, 16 }, // v16i8, v8i16, v4i32, ... - { AGGREGATE_ALIGN, 0, 0, 8 } // struct + {INTEGER_ALIGN, 1, Align(1), Align(1)}, // i1 + {INTEGER_ALIGN, 8, Align(1), Align(1)}, // i8 + {INTEGER_ALIGN, 16, Align(2), Align(2)}, // i16 + {INTEGER_ALIGN, 32, Align(4), Align(4)}, // i32 + {INTEGER_ALIGN, 64, Align(4), Align(8)}, // i64 + {FLOAT_ALIGN, 16, Align(2), Align(2)}, // half + {FLOAT_ALIGN, 32, Align(4), Align(4)}, // float + {FLOAT_ALIGN, 64, Align(8), Align(8)}, // double + {FLOAT_ALIGN, 128, Align(16), Align(16)}, // ppcf128, quad, ... + {VECTOR_ALIGN, 64, Align(8), Align(8)}, // v2i32, v1i64, ... + {VECTOR_ALIGN, 128, Align(16), Align(16)}, // v16i8, v8i16, v4i32, ... + {AGGREGATE_ALIGN, 0, Align(1), Align(8)} // struct }; void DataLayout::reset(StringRef Desc) { @@ -182,9 +177,9 @@ void DataLayout::reset(StringRef Desc) { LayoutMap = nullptr; BigEndian = false; AllocaAddrSpace = 0; - StackNaturalAlign = 0; + StackNaturalAlign.reset(); ProgramAddrSpace = 0; - FunctionPtrAlign = 0; + FunctionPtrAlign.reset(); TheFunctionPtrAlignType = FunctionPtrAlignType::Independent; ManglingMode = MM_None; NonIntegralAddressSpaces.clear(); @@ -194,7 +189,7 @@ void DataLayout::reset(StringRef Desc) { setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign, E.TypeBitWidth); } - setPointerAlignment(0, 8, 8, 8, 8); + setPointerAlignment(0, Align(8), Align(8), 8, 8); parseSpecifier(Desc); } @@ -320,8 +315,9 @@ void DataLayout::parseSpecifier(StringRef Desc) { report_fatal_error("Invalid index size of 0 bytes"); } } - setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign, - PointerMemSize, IndexSize); + setPointerAlignment(AddrSpace, assumeAligned(PointerABIAlign), + assumeAligned(PointerPrefAlign), PointerMemSize, + IndexSize); break; } case 'i': @@ -349,11 +345,16 @@ void DataLayout::parseSpecifier(StringRef Desc) { report_fatal_error( "Missing alignment specification in datalayout string"); Split = split(Rest, ':'); - unsigned ABIAlign = inBytes(getInt(Tok)); + const unsigned ABIAlign = inBytes(getInt(Tok)); if (AlignType != AGGREGATE_ALIGN && !ABIAlign) report_fatal_error( "ABI alignment specification must be >0 for non-aggregate types"); + if (!isUInt<16>(ABIAlign)) + report_fatal_error("Invalid ABI alignment, must be a 16bit integer"); + if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign)) + report_fatal_error("Invalid ABI alignment, must be a power of 2"); + // Preferred alignment. unsigned PrefAlign = ABIAlign; if (!Rest.empty()) { @@ -361,7 +362,14 @@ void DataLayout::parseSpecifier(StringRef Desc) { PrefAlign = inBytes(getInt(Tok)); } - setAlignment(AlignType, ABIAlign, PrefAlign, Size); + if (!isUInt<16>(PrefAlign)) + report_fatal_error( + "Invalid preferred alignment, must be a 16bit integer"); + if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign)) + report_fatal_error("Invalid preferred alignment, must be a power of 2"); + + setAlignment(AlignType, assumeAligned(ABIAlign), assumeAligned(PrefAlign), + Size); break; } @@ -378,7 +386,10 @@ void DataLayout::parseSpecifier(StringRef Desc) { } break; case 'S': { // Stack natural alignment. - StackNaturalAlign = inBytes(getInt(Tok)); + uint64_t Alignment = inBytes(getInt(Tok)); + if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment)) + report_fatal_error("Alignment is neither 0 nor a power of 2"); + StackNaturalAlign = MaybeAlign(Alignment); break; } case 'F': { @@ -394,7 +405,10 @@ void DataLayout::parseSpecifier(StringRef Desc) { "datalayout string"); } Tok = Tok.substr(1); - FunctionPtrAlign = inBytes(getInt(Tok)); + uint64_t Alignment = inBytes(getInt(Tok)); + if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment)) + report_fatal_error("Alignment is neither 0 nor a power of 2"); + FunctionPtrAlign = MaybeAlign(Alignment); break; } case 'P': { // Function address space. @@ -468,20 +482,15 @@ DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType, }); } -void -DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { +void DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align, + Align pref_align, uint32_t bit_width) { + // AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as + // uint16_t, it is unclear if there are requirements for alignment to be less + // than 2^16 other than storage. In the meantime we leave the restriction as + // an assert. See D67400 for context. + assert(Log2(abi_align) < 16 && Log2(pref_align) < 16 && "Alignment too big"); if (!isUInt<24>(bit_width)) report_fatal_error("Invalid bit width, must be a 24bit integer"); - if (!isUInt<16>(abi_align)) - report_fatal_error("Invalid ABI alignment, must be a 16bit integer"); - if (!isUInt<16>(pref_align)) - report_fatal_error("Invalid preferred alignment, must be a 16bit integer"); - if (abi_align != 0 && !isPowerOf2_64(abi_align)) - report_fatal_error("Invalid ABI alignment, must be a power of 2"); - if (pref_align != 0 && !isPowerOf2_64(pref_align)) - report_fatal_error("Invalid preferred alignment, must be a power of 2"); - if (pref_align < abi_align) report_fatal_error( "Preferred alignment cannot be less than the ABI alignment"); @@ -507,8 +516,8 @@ DataLayout::findPointerLowerBound(uint32_t AddressSpace) { }); } -void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth, +void DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, + Align PrefAlign, uint32_t TypeByteWidth, uint32_t IndexWidth) { if (PrefAlign < ABIAlign) report_fatal_error( @@ -528,9 +537,8 @@ void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, /// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or /// preferred if ABIInfo = false) the layout wants for the specified datatype. -unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, - uint32_t BitWidth, bool ABIInfo, - Type *Ty) const { +Align DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth, + bool ABIInfo, Type *Ty) const { AlignmentsTy::const_iterator I = findAlignmentLowerBound(AlignType, BitWidth); // See if we found an exact match. Of if we are looking for an integer type, // but don't have an exact match take the next largest integer. This is where @@ -549,10 +557,11 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, } else if (AlignType == VECTOR_ALIGN) { // By default, use natural alignment for vector types. This is consistent // with what clang and llvm-gcc do. - unsigned Align = getTypeAllocSize(cast(Ty)->getElementType()); - Align *= cast(Ty)->getNumElements(); - Align = PowerOf2Ceil(Align); - return Align; + unsigned Alignment = + getTypeAllocSize(cast(Ty)->getElementType()); + Alignment *= cast(Ty)->getNumElements(); + Alignment = PowerOf2Ceil(Alignment); + return Align(Alignment); } // If we still couldn't find a reasonable default alignment, fall back @@ -561,9 +570,9 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, // approximation of reality, and if the user wanted something less // less conservative, they should have specified it explicitly in the data // layout. - unsigned Align = getTypeStoreSize(Ty); - Align = PowerOf2Ceil(Align); - return Align; + unsigned Alignment = getTypeStoreSize(Ty); + Alignment = PowerOf2Ceil(Alignment); + return Align(Alignment); } namespace { @@ -624,7 +633,7 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { return L; } -unsigned DataLayout::getPointerABIAlignment(unsigned AS) const { +Align DataLayout::getPointerABIAlignment(unsigned AS) const { PointersTy::const_iterator I = findPointerLowerBound(AS); if (I == Pointers.end() || I->AddressSpace != AS) { I = findPointerLowerBound(0); @@ -633,7 +642,7 @@ unsigned DataLayout::getPointerABIAlignment(unsigned AS) const { return I->ABIAlign; } -unsigned DataLayout::getPointerPrefAlignment(unsigned AS) const { +Align DataLayout::getPointerPrefAlignment(unsigned AS) const { PointersTy::const_iterator I = findPointerLowerBound(AS); if (I == Pointers.end() || I->AddressSpace != AS) { I = findPointerLowerBound(0); @@ -690,21 +699,18 @@ unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const { Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref == false) for the requested type \a Ty. */ -unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { +Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { AlignTypeEnum AlignType; assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { // Early escape for the non-numeric types. case Type::LabelTyID: - return (abi_or_pref - ? getPointerABIAlignment(0) - : getPointerPrefAlignment(0)); + return abi_or_pref ? getPointerABIAlignment(0) : getPointerPrefAlignment(0); case Type::PointerTyID: { unsigned AS = cast(Ty)->getAddressSpace(); - return (abi_or_pref - ? getPointerABIAlignment(AS) - : getPointerPrefAlignment(AS)); + return abi_or_pref ? getPointerABIAlignment(AS) + : getPointerPrefAlignment(AS); } case Type::ArrayTyID: return getAlignment(cast(Ty)->getElementType(), abi_or_pref); @@ -712,11 +718,11 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { case Type::StructTyID: { // Packed structure types always have an ABI alignment of one. if (cast(Ty)->isPacked() && abi_or_pref) - return 1; + return Align::None(); // Get the layout annotation... which is lazily created on demand. const StructLayout *Layout = getStructLayout(cast(Ty)); - unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); + const Align Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); return std::max(Align, Layout->getAlignment()); } case Type::IntegerTyID: @@ -740,27 +746,24 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { llvm_unreachable("Bad type for getAlignment!!!"); } - return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty), abi_or_pref, Ty); + // If we're dealing with a scalable vector, we just need the known minimum + // size for determining alignment. If not, we'll get the exact size. + return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty).getKnownMinSize(), + abi_or_pref, Ty); } unsigned DataLayout::getABITypeAlignment(Type *Ty) const { - return getAlignment(Ty, true); + return getAlignment(Ty, true).value(); } /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for /// an integer type of the specified bitwidth. -unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const { +Align DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const { return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, nullptr); } unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const { - return getAlignment(Ty, false); -} - -unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const { - unsigned Align = getPrefTypeAlignment(Ty); - assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); - return Log2_32(Align); + return getAlignment(Ty, false).value(); } IntegerType *DataLayout::getIntPtrType(LLVMContext &C, diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index ce47ef207434..1bbe6b85d260 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -279,7 +279,7 @@ bool DebugInfoFinder::addScope(DIScope *Scope) { } static MDNode *stripDebugLocFromLoopID(MDNode *N) { - assert(!empty(N->operands()) && "Missing self reference?"); + assert(!N->operands().empty() && "Missing self reference?"); // if there is no debug location, we do not have to rewrite this MDNode. if (std::none_of(N->op_begin() + 1, N->op_end(), [](const MDOperand &Op) { @@ -929,6 +929,26 @@ const char *LLVMDIFileGetSource(LLVMMetadataRef File, unsigned *Len) { return ""; } +LLVMMetadataRef LLVMDIBuilderCreateMacro(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, + unsigned Line, + LLVMDWARFMacinfoRecordType RecordType, + const char *Name, size_t NameLen, + const char *Value, size_t ValueLen) { + return wrap( + unwrap(Builder)->createMacro(unwrapDI(ParentMacroFile), Line, + static_cast(RecordType), + {Name, NameLen}, {Value, ValueLen})); +} + +LLVMMetadataRef +LLVMDIBuilderCreateTempMacroFile(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, unsigned Line, + LLVMMetadataRef File) { + return wrap(unwrap(Builder)->createTempMacroFile( + unwrapDI(ParentMacroFile), Line, unwrapDI(File))); +} + LLVMMetadataRef LLVMDIBuilderCreateEnumerator(LLVMDIBuilderRef Builder, const char *Name, size_t NameLen, int64_t Value, diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index 900df27d1d33..94ec3abfa7a2 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -828,15 +828,23 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context, } unsigned DIExpression::ExprOperand::getSize() const { - switch (getOp()) { + uint64_t Op = getOp(); + + if (Op >= dwarf::DW_OP_breg0 && Op <= dwarf::DW_OP_breg31) + return 2; + + switch (Op) { case dwarf::DW_OP_LLVM_convert: case dwarf::DW_OP_LLVM_fragment: + case dwarf::DW_OP_bregx: return 3; case dwarf::DW_OP_constu: + case dwarf::DW_OP_consts: case dwarf::DW_OP_deref_size: case dwarf::DW_OP_plus_uconst: case dwarf::DW_OP_LLVM_tag_offset: - case dwarf::DW_OP_entry_value: + case dwarf::DW_OP_LLVM_entry_value: + case dwarf::DW_OP_regx: return 2; default: return 1; @@ -849,8 +857,13 @@ bool DIExpression::isValid() const { if (I->get() + I->getSize() > E->get()) return false; + uint64_t Op = I->getOp(); + if ((Op >= dwarf::DW_OP_reg0 && Op <= dwarf::DW_OP_reg31) || + (Op >= dwarf::DW_OP_breg0 && Op <= dwarf::DW_OP_breg31)) + return true; + // Check that the operand is valid. - switch (I->getOp()) { + switch (Op) { default: return false; case dwarf::DW_OP_LLVM_fragment: @@ -877,10 +890,12 @@ bool DIExpression::isValid() const { return false; break; } - case dwarf::DW_OP_entry_value: { - // An entry value operator must appear at the begin and the size - // of following expression should be 1, because we support only - // entry values of a simple register location. + case dwarf::DW_OP_LLVM_entry_value: { + // An entry value operator must appear at the beginning and the number of + // operations it cover can currently only be 1, because we support only + // entry values of a simple register location. One reason for this is that + // we currently can't calculate the size of the resulting DWARF block for + // other expressions. return I->get() == expr_op_begin()->get() && I->getArg(0) == 1 && getNumElements() == 2; } @@ -905,6 +920,8 @@ bool DIExpression::isValid() const { case dwarf::DW_OP_lit0: case dwarf::DW_OP_not: case dwarf::DW_OP_dup: + case dwarf::DW_OP_regx: + case dwarf::DW_OP_bregx: break; } } @@ -1035,7 +1052,7 @@ DIExpression *DIExpression::prependOpcodes(const DIExpression *Expr, assert(Expr && "Can't prepend ops to this expression"); if (EntryValue) { - Ops.push_back(dwarf::DW_OP_entry_value); + Ops.push_back(dwarf::DW_OP_LLVM_entry_value); // Add size info needed for entry value expression. // Add plus one for target register operand. Ops.push_back(Expr->getNumElements() + 1); @@ -1146,6 +1163,7 @@ Optional DIExpression::createFragmentExpression( Op.appendToVector(Ops); } } + assert(Expr && "Unknown DIExpression"); Ops.push_back(dwarf::DW_OP_LLVM_fragment); Ops.push_back(OffsetInBits); Ops.push_back(SizeInBits); diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 4a8e3cca3493..99d5aec3f043 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -370,5 +370,16 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const { return OS.str(); } +DiagnosticInfoMisExpect::DiagnosticInfoMisExpect(const Instruction *Inst, + Twine &Msg) + : DiagnosticInfoWithLocationBase(DK_MisExpect, DS_Warning, + *Inst->getParent()->getParent(), + Inst->getDebugLoc()), + Msg(Msg) {} + +void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const { + DP << getLocationStr() << ": " << getMsg(); +} + void OptimizationRemarkAnalysisFPCommute::anchor() {} void OptimizationRemarkAnalysisAliasing::anchor() {} diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index dc28d22548dd..3f70d2c904e5 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -251,7 +251,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, // We only need a symbol table for a function if the context keeps value names if (!getContext().shouldDiscardValueNames()) - SymTab = make_unique(); + SymTab = std::make_unique(); // If the function has arguments, mark them as lazily built. if (Ty->getNumParams()) @@ -293,7 +293,8 @@ void Function::BuildLazyArguments() const { // Clear the lazy arguments bit. unsigned SDC = getSubclassDataFromValue(); - const_cast(this)->setValueSubclassData(SDC &= ~(1<<0)); + SDC &= ~(1 << 0); + const_cast(this)->setValueSubclassData(SDC); assert(!hasLazyArguments()); } @@ -611,9 +612,11 @@ static std::string getMangledTypeStr(Type* Ty) { Result += "vararg"; // Ensure nested function types are distinguishable. Result += "f"; - } else if (isa(Ty)) { - Result += "v" + utostr(Ty->getVectorNumElements()) + - getMangledTypeStr(Ty->getVectorElementType()); + } else if (VectorType* VTy = dyn_cast(Ty)) { + if (VTy->isScalable()) + Result += "nx"; + Result += "v" + utostr(VTy->getVectorNumElements()) + + getMangledTypeStr(VTy->getVectorElementType()); } else if (Ty) { switch (Ty->getTypeID()) { default: llvm_unreachable("Unhandled type"); @@ -700,7 +703,11 @@ enum IIT_Info { IIT_STRUCT7 = 39, IIT_STRUCT8 = 40, IIT_F128 = 41, - IIT_VEC_ELEMENT = 42 + IIT_VEC_ELEMENT = 42, + IIT_SCALABLE_VEC = 43, + IIT_SUBDIVIDE2_ARG = 44, + IIT_SUBDIVIDE4_ARG = 45, + IIT_VEC_OF_BITCASTS_TO_INT = 46 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -865,12 +872,36 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, DecodeIITType(NextElt, Infos, OutputTable); return; } + case IIT_SUBDIVIDE2_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide2Argument, + ArgInfo)); + return; + } + case IIT_SUBDIVIDE4_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide4Argument, + ArgInfo)); + return; + } case IIT_VEC_ELEMENT: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecElementArgument, ArgInfo)); return; } + case IIT_SCALABLE_VEC: { + OutputTable.push_back(IITDescriptor::get(IITDescriptor::ScalableVecArgument, + 0)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + } + case IIT_VEC_OF_BITCASTS_TO_INT: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfBitcastsToInt, + ArgInfo)); + return; + } } llvm_unreachable("unhandled"); } @@ -961,6 +992,14 @@ static Type *DecodeFixedType(ArrayRef &Infos, assert(ITy->getBitWidth() % 2 == 0); return IntegerType::get(Context, ITy->getBitWidth() / 2); } + case IITDescriptor::Subdivide2Argument: + case IITDescriptor::Subdivide4Argument: { + Type *Ty = Tys[D.getArgumentNumber()]; + VectorType *VTy = dyn_cast(Ty); + assert(VTy && "Expected an argument of Vector Type"); + int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2; + return VectorType::getSubdividedVectorType(VTy, SubDivs); + } case IITDescriptor::HalfVecArgument: return VectorType::getHalfElementsVectorType(cast( Tys[D.getArgumentNumber()])); @@ -968,7 +1007,7 @@ static Type *DecodeFixedType(ArrayRef &Infos, Type *EltTy = DecodeFixedType(Infos, Tys, Context); Type *Ty = Tys[D.getArgumentNumber()]; if (auto *VTy = dyn_cast(Ty)) - return VectorType::get(EltTy, VTy->getNumElements()); + return VectorType::get(EltTy, VTy->getElementCount()); return EltTy; } case IITDescriptor::PtrToArgument: { @@ -989,9 +1028,20 @@ static Type *DecodeFixedType(ArrayRef &Infos, return VTy->getElementType(); llvm_unreachable("Expected an argument of Vector Type"); } + case IITDescriptor::VecOfBitcastsToInt: { + Type *Ty = Tys[D.getArgumentNumber()]; + VectorType *VTy = dyn_cast(Ty); + assert(VTy && "Expected an argument of Vector Type"); + return VectorType::getInteger(VTy); + } case IITDescriptor::VecOfAnyPtrsToElt: // Return the overloaded type (which determines the pointers address space) return Tys[D.getOverloadArgNumber()]; + case IITDescriptor::ScalableVecArgument: { + Type *Ty = DecodeFixedType(Infos, Tys, Context); + return VectorType::get(Ty->getVectorElementType(), + { Ty->getVectorNumElements(), true }); + } } llvm_unreachable("unhandled"); } @@ -1174,8 +1224,9 @@ static bool matchIntrinsicType( } case IITDescriptor::HalfVecArgument: // If this is a forward reference, defer the check for later. - return D.getArgumentNumber() >= ArgTys.size() || - !isa(ArgTys[D.getArgumentNumber()]) || + if (D.getArgumentNumber() >= ArgTys.size()) + return IsDeferredCheck || DeferCheck(Ty); + return !isa(ArgTys[D.getArgumentNumber()]) || VectorType::getHalfElementsVectorType( cast(ArgTys[D.getArgumentNumber()])) != Ty; case IITDescriptor::SameVecWidthArgument: { @@ -1191,8 +1242,8 @@ static bool matchIntrinsicType( return true; Type *EltTy = Ty; if (ThisArgType) { - if (ReferenceType->getVectorNumElements() != - ThisArgType->getVectorNumElements()) + if (ReferenceType->getElementCount() != + ThisArgType->getElementCount()) return true; EltTy = ThisArgType->getVectorElementType(); } @@ -1255,6 +1306,36 @@ static bool matchIntrinsicType( auto *ReferenceType = dyn_cast(ArgTys[D.getArgumentNumber()]); return !ReferenceType || Ty != ReferenceType->getElementType(); } + case IITDescriptor::Subdivide2Argument: + case IITDescriptor::Subdivide4Argument: { + // If this is a forward reference, defer the check for later. + if (D.getArgumentNumber() >= ArgTys.size()) + return IsDeferredCheck || DeferCheck(Ty); + + Type *NewTy = ArgTys[D.getArgumentNumber()]; + if (auto *VTy = dyn_cast(NewTy)) { + int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2; + NewTy = VectorType::getSubdividedVectorType(VTy, SubDivs); + return Ty != NewTy; + } + return true; + } + case IITDescriptor::ScalableVecArgument: { + VectorType *VTy = dyn_cast(Ty); + if (!VTy || !VTy->isScalable()) + return true; + return matchIntrinsicType(VTy, Infos, ArgTys, DeferredChecks, + IsDeferredCheck); + } + case IITDescriptor::VecOfBitcastsToInt: { + if (D.getArgumentNumber() >= ArgTys.size()) + return IsDeferredCheck || DeferCheck(Ty); + auto *ReferenceType = dyn_cast(ArgTys[D.getArgumentNumber()]); + auto *ThisArgVecTy = dyn_cast(Ty); + if (!ThisArgVecTy || !ReferenceType) + return true; + return ThisArgVecTy != VectorType::getInteger(ReferenceType); + } } llvm_unreachable("unhandled"); } diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index e2bfc0420bc5..46a9696b2944 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -114,18 +114,22 @@ unsigned GlobalValue::getAddressSpace() const { } void GlobalObject::setAlignment(unsigned Align) { - assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); - assert(Align <= MaximumAlignment && + setAlignment(MaybeAlign(Align)); +} + +void GlobalObject::setAlignment(MaybeAlign Align) { + assert((!Align || Align <= MaximumAlignment) && "Alignment is greater than MaximumAlignment!"); - unsigned AlignmentData = Log2_32(Align) + 1; + unsigned AlignmentData = encode(Align); unsigned OldData = getGlobalValueSubClassData(); setGlobalValueSubClassData((OldData & ~AlignmentMask) | AlignmentData); - assert(getAlignment() == Align && "Alignment representation error!"); + assert(MaybeAlign(getAlignment()) == Align && + "Alignment representation error!"); } void GlobalObject::copyAttributesFrom(const GlobalObject *Src) { GlobalValue::copyAttributesFrom(Src); - setAlignment(Src->getAlignment()); + setAlignment(MaybeAlign(Src->getAlignment())); setSection(Src->getSection()); } @@ -427,6 +431,43 @@ GlobalIndirectSymbol::GlobalIndirectSymbol(Type *Ty, ValueTy VTy, Op<0>() = Symbol; } +static const GlobalObject * +findBaseObject(const Constant *C, DenseSet &Aliases) { + if (auto *GO = dyn_cast(C)) + return GO; + if (auto *GA = dyn_cast(C)) + if (Aliases.insert(GA).second) + return findBaseObject(GA->getOperand(0), Aliases); + if (auto *CE = dyn_cast(C)) { + switch (CE->getOpcode()) { + case Instruction::Add: { + auto *LHS = findBaseObject(CE->getOperand(0), Aliases); + auto *RHS = findBaseObject(CE->getOperand(1), Aliases); + if (LHS && RHS) + return nullptr; + return LHS ? LHS : RHS; + } + case Instruction::Sub: { + if (findBaseObject(CE->getOperand(1), Aliases)) + return nullptr; + return findBaseObject(CE->getOperand(0), Aliases); + } + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::GetElementPtr: + return findBaseObject(CE->getOperand(0), Aliases); + default: + break; + } + } + return nullptr; +} + +const GlobalObject *GlobalIndirectSymbol::getBaseObject() const { + DenseSet Aliases; + return findBaseObject(getOperand(0), Aliases); +} //===----------------------------------------------------------------------===// // GlobalAlias Implementation diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index 0c6461c9078f..b782012e9731 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -49,7 +49,7 @@ GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str, nullptr, GlobalVariable::NotThreadLocal, AddressSpace); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - GV->setAlignment(1); + GV->setAlignment(Align::None()); return GV; } @@ -289,8 +289,10 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove( CallInst *CI = createCallHelper(TheFn, Ops, this); // Set the alignment of the pointer args. - CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign)); - CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), SrcAlign)); + CI->addParamAttr( + 0, Attribute::getWithAlignment(CI->getContext(), Align(DstAlign))); + CI->addParamAttr( + 1, Attribute::getWithAlignment(CI->getContext(), Align(SrcAlign))); // Set the TBAA info if present. if (TBAATag) diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp index 35b06135a828..953cf9410162 100644 --- a/lib/IR/IRPrintingPasses.cpp +++ b/lib/IR/IRPrintingPasses.cpp @@ -26,14 +26,22 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner, ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {} PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &) { - if (!Banner.empty()) - OS << Banner << "\n"; - if (llvm::isFunctionInPrintList("*")) + if (llvm::isFunctionInPrintList("*")) { + if (!Banner.empty()) + OS << Banner << "\n"; M.print(OS, nullptr, ShouldPreserveUseListOrder); + } else { - for(const auto &F : M.functions()) - if (llvm::isFunctionInPrintList(F.getName())) + bool BannerPrinted = false; + for(const auto &F : M.functions()) { + if (llvm::isFunctionInPrintList(F.getName())) { + if (!BannerPrinted && !Banner.empty()) { + OS << Banner << "\n"; + BannerPrinted = true; + } F.print(OS); + } + } } return PreservedAnalyses::all(); } diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index 99da7caaccf0..fd732f9eda8b 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -181,6 +181,16 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, // FIXME: For now assuming these are 2-character constraints. pCodes->push_back(StringRef(I+1, 2)); I += 3; + } else if (*I == '@') { + // Multi-letter constraint + ++I; + unsigned char C = static_cast(*I); + assert(isdigit(C) && "Expected a digit!"); + int N = C - '0'; + assert(N > 0 && "Found a zero letter constraint!"); + ++I; + pCodes->push_back(StringRef(I, N)); + I += N; } else { // Single letter constraint. pCodes->push_back(StringRef(I, 1)); diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index ba5629d1662b..b157c7bb34bf 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -524,7 +524,7 @@ bool Instruction::mayReadFromMemory() const { case Instruction::Call: case Instruction::Invoke: case Instruction::CallBr: - return !cast(this)->doesNotAccessMemory(); + return !cast(this)->doesNotReadMemory(); case Instruction::Store: return !cast(this)->isUnordered(); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 2e7cad103c12..245c7628b08e 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -45,12 +46,6 @@ using namespace llvm; -static cl::opt SwitchInstProfUpdateWrapperStrict( - "switch-inst-prof-update-wrapper-strict", cl::Hidden, - cl::desc("Assert that prof branch_weights metadata is valid when creating " - "an instance of SwitchInstProfUpdateWrapper"), - cl::init(false)); - //===----------------------------------------------------------------------===// // AllocaInst Class //===----------------------------------------------------------------------===// @@ -822,6 +817,17 @@ void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough, setName(NameStr); } +void CallBrInst::updateArgBlockAddresses(unsigned i, BasicBlock *B) { + assert(getNumIndirectDests() > i && "IndirectDest # out of range for callbr"); + if (BasicBlock *OldBB = getIndirectDest(i)) { + BlockAddress *Old = BlockAddress::get(OldBB); + BlockAddress *New = BlockAddress::get(B); + for (unsigned ArgNo = 0, e = getNumArgOperands(); ArgNo != e; ++ArgNo) + if (dyn_cast(getArgOperand(ArgNo)) == Old) + setArgOperand(ArgNo, New); + } +} + CallBrInst::CallBrInst(const CallBrInst &CBI) : CallBase(CBI.Attrs, CBI.FTy, CBI.getType(), Instruction::CallBr, OperandTraits::op_end(this) - CBI.getNumOperands(), @@ -1223,7 +1229,7 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore), AllocatedType(Ty) { - setAlignment(Align); + setAlignment(MaybeAlign(Align)); assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -1234,18 +1240,21 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd), AllocatedType(Ty) { - setAlignment(Align); + setAlignment(MaybeAlign(Align)); assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } -void AllocaInst::setAlignment(unsigned Align) { - assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); - assert(Align <= MaximumAlignment && +void AllocaInst::setAlignment(MaybeAlign Align) { + assert((!Align || *Align <= MaximumAlignment) && "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData((getSubclassDataFromInstruction() & ~31) | - (Log2_32(Align) + 1)); - assert(getAlignment() == Align && "Alignment representation error!"); + encode(Align)); + if (Align) + assert(getAlignment() == Align->value() && + "Alignment representation error!"); + else + assert(getAlignment() == 0 && "Alignment representation error!"); } bool AllocaInst::isArrayAllocation() const { @@ -1287,36 +1296,36 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, Instruction *InsertBef) - : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertBef) {} + : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/None, InsertBef) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, BasicBlock *InsertAE) - : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {} + : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/None, InsertAE) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, - unsigned Align, Instruction *InsertBef) + MaybeAlign Align, Instruction *InsertBef) : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, SyncScope::System, InsertBef) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, - unsigned Align, BasicBlock *InsertAE) + MaybeAlign Align, BasicBlock *InsertAE) : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, SyncScope::System, InsertAE) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SyncScope::ID SSID, Instruction *InsertBef) + MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, + Instruction *InsertBef) : UnaryInstruction(Ty, Load, Ptr, InsertBef) { assert(Ty == cast(Ptr->getType())->getElementType()); setVolatile(isVolatile); - setAlignment(Align); + setAlignment(MaybeAlign(Align)); setAtomic(Order, SSID); AssertOK(); setName(Name); } LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, - unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, + MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAE) : UnaryInstruction(Ty, Load, Ptr, InsertAE) { assert(Ty == cast(Ptr->getType())->getElementType()); @@ -1327,13 +1336,16 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, setName(Name); } -void LoadInst::setAlignment(unsigned Align) { - assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); - assert(Align <= MaximumAlignment && +void LoadInst::setAlignment(MaybeAlign Align) { + assert((!Align || *Align <= MaximumAlignment) && "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) | - ((Log2_32(Align)+1)<<1)); - assert(getAlignment() == Align && "Alignment representation error!"); + (encode(Align) << 1)); + if (Align) + assert(getAlignment() == Align->value() && + "Alignment representation error!"); + else + assert(getAlignment() == 0 && "Alignment representation error!"); } //===----------------------------------------------------------------------===// @@ -1359,30 +1371,28 @@ StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd) StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Instruction *InsertBefore) - : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertBefore) {} + : StoreInst(val, addr, isVolatile, /*Align=*/None, InsertBefore) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, BasicBlock *InsertAtEnd) - : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertAtEnd) {} + : StoreInst(val, addr, isVolatile, /*Align=*/None, InsertAtEnd) {} -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align, Instruction *InsertBefore) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, SyncScope::System, InsertBefore) {} -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align, BasicBlock *InsertAtEnd) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, SyncScope::System, InsertAtEnd) {} -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SyncScope::ID SSID, +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align, + AtomicOrdering Order, SyncScope::ID SSID, Instruction *InsertBefore) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), - InsertBefore) { + : Instruction(Type::getVoidTy(val->getContext()), Store, + OperandTraits::op_begin(this), + OperandTraits::operands(this), InsertBefore) { Op<0>() = val; Op<1>() = addr; setVolatile(isVolatile); @@ -1391,14 +1401,12 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, AssertOK(); } -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SyncScope::ID SSID, +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align, + AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), - InsertAtEnd) { + : Instruction(Type::getVoidTy(val->getContext()), Store, + OperandTraits::op_begin(this), + OperandTraits::operands(this), InsertAtEnd) { Op<0>() = val; Op<1>() = addr; setVolatile(isVolatile); @@ -1407,13 +1415,16 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, AssertOK(); } -void StoreInst::setAlignment(unsigned Align) { - assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); - assert(Align <= MaximumAlignment && +void StoreInst::setAlignment(MaybeAlign Align) { + assert((!Align || *Align <= MaximumAlignment) && "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) | - ((Log2_32(Align)+1) << 1)); - assert(getAlignment() == Align && "Alignment representation error!"); + (encode(Align) << 1)); + if (Align) + assert(getAlignment() == Align->value() && + "Alignment representation error!"); + else + assert(getAlignment() == 0 && "Alignment representation error!"); } //===----------------------------------------------------------------------===// @@ -1778,7 +1789,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, const Twine &Name, Instruction *InsertBefore) : Instruction(VectorType::get(cast(V1->getType())->getElementType(), - cast(Mask->getType())->getNumElements()), + cast(Mask->getType())->getElementCount()), ShuffleVector, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -1795,7 +1806,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, const Twine &Name, BasicBlock *InsertAtEnd) : Instruction(VectorType::get(cast(V1->getType())->getElementType(), - cast(Mask->getType())->getNumElements()), + cast(Mask->getType())->getElementCount()), ShuffleVector, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -2968,8 +2979,8 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) { } // Get the bit sizes, we'll need these - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr - unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr + TypeSize SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr + TypeSize DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr // Run through the possibilities ... if (DestTy->isIntegerTy()) { // Casting to integral @@ -3016,7 +3027,7 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { if (VectorType *SrcVecTy = dyn_cast(SrcTy)) { if (VectorType *DestVecTy = dyn_cast(DestTy)) { - if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) { + if (SrcVecTy->getElementCount() == DestVecTy->getElementCount()) { // An element by element cast. Valid if casting the elements is valid. SrcTy = SrcVecTy->getElementType(); DestTy = DestVecTy->getElementType(); @@ -3030,12 +3041,12 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { } } - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr - unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr + TypeSize SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr + TypeSize DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr // Could still have vectors of pointers if the number of elements doesn't // match - if (SrcBits == 0 || DestBits == 0) + if (SrcBits.getKnownMinSize() == 0 || DestBits.getKnownMinSize() == 0) return false; if (SrcBits != DestBits) @@ -3886,7 +3897,7 @@ SwitchInstProfUpdateWrapper::getProfBranchWeightsMD(const SwitchInst &SI) { } MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() { - assert(State == Changed && "called only if metadata has changed"); + assert(Changed && "called only if metadata has changed"); if (!Weights) return nullptr; @@ -3905,17 +3916,12 @@ MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() { void SwitchInstProfUpdateWrapper::init() { MDNode *ProfileData = getProfBranchWeightsMD(SI); - if (!ProfileData) { - State = Initialized; + if (!ProfileData) return; - } if (ProfileData->getNumOperands() != SI.getNumSuccessors() + 1) { - State = Invalid; - if (SwitchInstProfUpdateWrapperStrict) - llvm_unreachable("number of prof branch_weights metadata operands does " - "not correspond to number of succesors"); - return; + llvm_unreachable("number of prof branch_weights metadata operands does " + "not correspond to number of succesors"); } SmallVector Weights; @@ -3924,7 +3930,6 @@ void SwitchInstProfUpdateWrapper::init() { uint32_t CW = C->getValue().getZExtValue(); Weights.push_back(CW); } - State = Initialized; this->Weights = std::move(Weights); } @@ -3933,7 +3938,7 @@ SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) { if (Weights) { assert(SI.getNumSuccessors() == Weights->size() && "num of prof branch_weights must accord with num of successors"); - State = Changed; + Changed = true; // Copy the last case to the place of the removed one and shrink. // This is tightly coupled with the way SwitchInst::removeCase() removes // the cases in SwitchInst::removeCase(CaseIt). @@ -3948,15 +3953,12 @@ void SwitchInstProfUpdateWrapper::addCase( SwitchInstProfUpdateWrapper::CaseWeightOpt W) { SI.addCase(OnVal, Dest); - if (State == Invalid) - return; - if (!Weights && W && *W) { - State = Changed; + Changed = true; Weights = SmallVector(SI.getNumSuccessors(), 0); Weights.getValue()[SI.getNumSuccessors() - 1] = *W; } else if (Weights) { - State = Changed; + Changed = true; Weights.getValue().push_back(W ? *W : 0); } if (Weights) @@ -3967,11 +3969,9 @@ void SwitchInstProfUpdateWrapper::addCase( SymbolTableList::iterator SwitchInstProfUpdateWrapper::eraseFromParent() { // Instruction is erased. Mark as unchanged to not touch it in the destructor. - if (State != Invalid) { - State = Initialized; - if (Weights) - Weights->resize(0); - } + Changed = false; + if (Weights) + Weights->resize(0); return SI.eraseFromParent(); } @@ -3984,7 +3984,7 @@ SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) { void SwitchInstProfUpdateWrapper::setSuccessorWeight( unsigned idx, SwitchInstProfUpdateWrapper::CaseWeightOpt W) { - if (!W || State == Invalid) + if (!W) return; if (!Weights && *W) @@ -3993,7 +3993,7 @@ void SwitchInstProfUpdateWrapper::setSuccessorWeight( if (Weights) { auto &OldW = Weights.getValue()[idx]; if (*W != OldW) { - State = Changed; + Changed = true; OldW = *W; } } @@ -4136,13 +4136,14 @@ AllocaInst *AllocaInst::cloneImpl() const { LoadInst *LoadInst::cloneImpl() const { return new LoadInst(getType(), getOperand(0), Twine(), isVolatile(), - getAlignment(), getOrdering(), getSyncScopeID()); + MaybeAlign(getAlignment()), getOrdering(), + getSyncScopeID()); } StoreInst *StoreInst::cloneImpl() const { return new StoreInst(getOperand(0), getOperand(1), isVolatile(), - getAlignment(), getOrdering(), getSyncScopeID()); - + MaybeAlign(getAlignment()), getOrdering(), + getSyncScopeID()); } AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp index 7a042326f67f..26ed46a9cd91 100644 --- a/lib/IR/IntrinsicInst.cpp +++ b/lib/IR/IntrinsicInst.cpp @@ -67,13 +67,12 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef NameTable, // size 1. During the search, we can skip the prefix that we already know is // identical. By using strncmp we consider names with differing suffixes to // be part of the equal range. - size_t CmpStart = 0; size_t CmpEnd = 4; // Skip the "llvm" component. const char *const *Low = NameTable.begin(); const char *const *High = NameTable.end(); const char *const *LastLow = Low; while (CmpEnd < Name.size() && High - Low > 0) { - CmpStart = CmpEnd; + size_t CmpStart = CmpEnd; CmpEnd = Name.find('.', CmpStart + 1); CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd; auto Cmp = [CmpStart, CmpEnd](const char *LHS, const char *RHS) { @@ -107,7 +106,7 @@ Optional ConstrainedFPIntrinsic::getRoundingMode() const { unsigned NumOperands = getNumArgOperands(); Metadata *MD = - dyn_cast(getArgOperand(NumOperands - 2))->getMetadata(); + cast(getArgOperand(NumOperands - 2))->getMetadata(); if (!MD || !isa(MD)) return None; return StrToRoundingMode(cast(MD)->getString()); @@ -143,7 +142,7 @@ ConstrainedFPIntrinsic::RoundingModeToStr(RoundingMode UseRounding) { RoundingStr = "round.upward"; break; case ConstrainedFPIntrinsic::rmTowardZero: - RoundingStr = "round.tozero"; + RoundingStr = "round.towardzero"; break; } return RoundingStr; @@ -153,7 +152,7 @@ Optional ConstrainedFPIntrinsic::getExceptionBehavior() const { unsigned NumOperands = getNumArgOperands(); Metadata *MD = - dyn_cast(getArgOperand(NumOperands - 1))->getMetadata(); + cast(getArgOperand(NumOperands - 1))->getMetadata(); if (!MD || !isa(MD)) return None; return StrToExceptionBehavior(cast(MD)->getString()); @@ -189,6 +188,8 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: return false; + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -199,10 +200,14 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: return true; diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index e1cdf6b539db..5e8772186a2a 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -36,34 +36,9 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { // Create the fixed metadata kinds. This is done in the same order as the // MD_* enum values so that they correspond. std::pair MDKinds[] = { - {MD_dbg, "dbg"}, - {MD_tbaa, "tbaa"}, - {MD_prof, "prof"}, - {MD_fpmath, "fpmath"}, - {MD_range, "range"}, - {MD_tbaa_struct, "tbaa.struct"}, - {MD_invariant_load, "invariant.load"}, - {MD_alias_scope, "alias.scope"}, - {MD_noalias, "noalias"}, - {MD_nontemporal, "nontemporal"}, - {MD_mem_parallel_loop_access, "llvm.mem.parallel_loop_access"}, - {MD_nonnull, "nonnull"}, - {MD_dereferenceable, "dereferenceable"}, - {MD_dereferenceable_or_null, "dereferenceable_or_null"}, - {MD_make_implicit, "make.implicit"}, - {MD_unpredictable, "unpredictable"}, - {MD_invariant_group, "invariant.group"}, - {MD_align, "align"}, - {MD_loop, "llvm.loop"}, - {MD_type, "type"}, - {MD_section_prefix, "section_prefix"}, - {MD_absolute_symbol, "absolute_symbol"}, - {MD_associated, "associated"}, - {MD_callees, "callees"}, - {MD_irr_loop, "irr_loop"}, - {MD_access_group, "llvm.access.group"}, - {MD_callback, "callback"}, - {MD_preserve_access_index, "llvm.preserve.access.index"}, +#define LLVM_FIXED_MD_KIND(EnumID, Name, Value) {EnumID, Name}, +#include "llvm/IR/FixedMetadataKinds.def" +#undef LLVM_FIXED_MD_KIND }; for (auto &MDKind : MDKinds) { diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index c6ab2c6f213a..5f9782714170 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -21,7 +21,7 @@ using namespace llvm; LLVMContextImpl::LLVMContextImpl(LLVMContext &C) - : DiagHandler(llvm::make_unique()), + : DiagHandler(std::make_unique()), VoidTy(C, Type::VoidTyID), LabelTy(C, Type::LabelTyID), HalfTy(C, Type::HalfTyID), diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index c575d6e782b9..3a03c493100b 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -1680,7 +1680,6 @@ bool FPPassManager::runOnFunction(Function &F) { bool FPPassManager::runOnModule(Module &M) { bool Changed = false; - llvm::TimeTraceScope TimeScope("OptModule", M.getName()); for (Function &F : M) Changed |= runOnFunction(F); @@ -1999,10 +1998,28 @@ void FunctionPass::assignPassManager(PMStack &PMS, FPP->add(this); } +void BasicBlockPass::preparePassManager(PMStack &PMS) { + // Find BBPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_BasicBlockPassManager) + PMS.pop(); + + // If this pass is destroying high level information that is used + // by other passes that are managed by BBPM then do not insert + // this pass in current BBPM. Use new BBPassManager. + if (PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager && + !PMS.top()->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + /// Find appropriate Basic Pass Manager or Call Graph Pass Manager /// in the PM Stack and add self into that manager. void BasicBlockPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_BasicBlockPassManager) + PMS.pop(); + BBPassManager *BBP; // Basic Pass Manager is a leaf pass manager. It does not handle @@ -2018,6 +2035,7 @@ void BasicBlockPass::assignPassManager(PMStack &PMS, // [1] Create new Basic Block Manager BBP = new BBPassManager(); + BBP->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager // Basic Block Pass Manager does not live by itself diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp index 14bcb3a29b07..7bdb85ace522 100644 --- a/lib/IR/MDBuilder.cpp +++ b/lib/IR/MDBuilder.cpp @@ -309,3 +309,15 @@ MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) { }; return MDNode::get(Context, Vals); } + +MDNode *MDBuilder::createMisExpect(uint64_t Index, uint64_t LikleyWeight, + uint64_t UnlikleyWeight) { + auto *IntType = Type::getInt64Ty(Context); + Metadata *Vals[] = { + createString("misexpect"), + createConstant(ConstantInt::get(IntType, Index)), + createConstant(ConstantInt::get(IntType, LikleyWeight)), + createConstant(ConstantInt::get(IntType, UnlikleyWeight)), + }; + return MDNode::get(Context, Vals); +} diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 748a2238e642..62c2aa86f3b0 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -1497,6 +1497,24 @@ void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) { TypeID})); } +void GlobalObject::addVCallVisibilityMetadata(VCallVisibility Visibility) { + addMetadata(LLVMContext::MD_vcall_visibility, + *MDNode::get(getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Type::getInt64Ty(getContext()), Visibility))})); +} + +GlobalObject::VCallVisibility GlobalObject::getVCallVisibility() const { + if (MDNode *MD = getMetadata(LLVMContext::MD_vcall_visibility)) { + uint64_t Val = cast( + cast(MD->getOperand(0))->getValue()) + ->getZExtValue(); + assert(Val <= 2 && "unknown vcall visibility!"); + return (VCallVisibility)Val; + } + return VCallVisibility::VCallVisibilityPublic; +} + void Function::setSubprogram(DISubprogram *SP) { setMetadata(LLVMContext::MD_dbg, SP); } diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index dbf4035ac7c1..25efd009194f 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -604,7 +604,7 @@ GlobalVariable *llvm::collectUsedGlobalVariables( const ConstantArray *Init = cast(GV->getInitializer()); for (Value *Op : Init->operands()) { - GlobalValue *G = cast(Op->stripPointerCastsNoFollowAliases()); + GlobalValue *G = cast(Op->stripPointerCasts()); Set.insert(G); } return GV; diff --git a/lib/IR/RemarkStreamer.cpp b/lib/IR/RemarkStreamer.cpp index 5b4c7e72b479..0fcc06b961f3 100644 --- a/lib/IR/RemarkStreamer.cpp +++ b/lib/IR/RemarkStreamer.cpp @@ -15,15 +15,17 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Remarks/BitstreamRemarkSerializer.h" #include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkSerializer.h" using namespace llvm; -RemarkStreamer::RemarkStreamer(StringRef Filename, - std::unique_ptr Serializer) - : Filename(Filename), PassFilter(), Serializer(std::move(Serializer)) { - assert(!Filename.empty() && "This needs to be a real filename."); -} +RemarkStreamer::RemarkStreamer( + std::unique_ptr RemarkSerializer, + Optional FilenameIn) + : PassFilter(), RemarkSerializer(std::move(RemarkSerializer)), + Filename(FilenameIn ? Optional(FilenameIn->str()) : None) {} Error RemarkStreamer::setFilter(StringRef Filter) { Regex R = Regex(Filter); @@ -99,24 +101,13 @@ void RemarkStreamer::emit(const DiagnosticInfoOptimizationBase &Diag) { // First, convert the diagnostic to a remark. remarks::Remark R = toRemark(Diag); // Then, emit the remark through the serializer. - Serializer->emit(R); + RemarkSerializer->emit(R); } char RemarkSetupFileError::ID = 0; char RemarkSetupPatternError::ID = 0; char RemarkSetupFormatError::ID = 0; -static std::unique_ptr -formatToSerializer(remarks::Format RemarksFormat, raw_ostream &OS) { - switch (RemarksFormat) { - default: - llvm_unreachable("Unknown remark serializer format."); - return nullptr; - case remarks::Format::YAML: - return llvm::make_unique(OS); - }; -} - Expected> llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, StringRef RemarksFormat, @@ -131,24 +122,63 @@ llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, if (RemarksFilename.empty()) return nullptr; + Expected Format = remarks::parseFormat(RemarksFormat); + if (Error E = Format.takeError()) + return make_error(std::move(E)); + std::error_code EC; + auto Flags = *Format == remarks::Format::YAML ? sys::fs::OF_Text + : sys::fs::OF_None; auto RemarksFile = - llvm::make_unique(RemarksFilename, EC, sys::fs::F_None); + std::make_unique(RemarksFilename, EC, Flags); // We don't use llvm::FileError here because some diagnostics want the file // name separately. if (EC) return make_error(errorCodeToError(EC)); + Expected> RemarkSerializer = + remarks::createRemarkSerializer( + *Format, remarks::SerializerMode::Separate, RemarksFile->os()); + if (Error E = RemarkSerializer.takeError()) + return make_error(std::move(E)); + + Context.setRemarkStreamer(std::make_unique( + std::move(*RemarkSerializer), RemarksFilename)); + + if (!RemarksPasses.empty()) + if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses)) + return make_error(std::move(E)); + + return std::move(RemarksFile); +} + +Error llvm::setupOptimizationRemarks(LLVMContext &Context, raw_ostream &OS, + StringRef RemarksPasses, + StringRef RemarksFormat, + bool RemarksWithHotness, + unsigned RemarksHotnessThreshold) { + if (RemarksWithHotness) + Context.setDiagnosticsHotnessRequested(true); + + if (RemarksHotnessThreshold) + Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold); + Expected Format = remarks::parseFormat(RemarksFormat); if (Error E = Format.takeError()) return make_error(std::move(E)); - Context.setRemarkStreamer(llvm::make_unique( - RemarksFilename, formatToSerializer(*Format, RemarksFile->os()))); + Expected> RemarkSerializer = + remarks::createRemarkSerializer(*Format, + remarks::SerializerMode::Separate, OS); + if (Error E = RemarkSerializer.takeError()) + return make_error(std::move(E)); + + Context.setRemarkStreamer( + std::make_unique(std::move(*RemarkSerializer))); if (!RemarksPasses.empty()) if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses)) return make_error(std::move(E)); - return std::move(RemarksFile); + return Error::success(); } diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp index 7f3dea5e6a6d..c90347ec48fd 100644 --- a/lib/IR/SafepointIRVerifier.cpp +++ b/lib/IR/SafepointIRVerifier.cpp @@ -102,11 +102,11 @@ public: } bool isDeadEdge(const Use *U) const { - assert(dyn_cast(U->getUser())->isTerminator() && + assert(cast(U->getUser())->isTerminator() && "edge must be operand of terminator"); assert(cast_or_null(U->get()) && "edge must refer to basic block"); - assert(!isDeadBlock(dyn_cast(U->getUser())->getParent()) && + assert(!isDeadBlock(cast(U->getUser())->getParent()) && "isDeadEdge() must be applied to edge from live block"); return DeadEdges.count(U); } diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index 8ece7f223dd2..3eab5042b542 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TypeSize.h" #include #include @@ -111,18 +112,22 @@ bool Type::isEmptyTy() const { return false; } -unsigned Type::getPrimitiveSizeInBits() const { +TypeSize Type::getPrimitiveSizeInBits() const { switch (getTypeID()) { - case Type::HalfTyID: return 16; - case Type::FloatTyID: return 32; - case Type::DoubleTyID: return 64; - case Type::X86_FP80TyID: return 80; - case Type::FP128TyID: return 128; - case Type::PPC_FP128TyID: return 128; - case Type::X86_MMXTyID: return 64; - case Type::IntegerTyID: return cast(this)->getBitWidth(); - case Type::VectorTyID: return cast(this)->getBitWidth(); - default: return 0; + case Type::HalfTyID: return TypeSize::Fixed(16); + case Type::FloatTyID: return TypeSize::Fixed(32); + case Type::DoubleTyID: return TypeSize::Fixed(64); + case Type::X86_FP80TyID: return TypeSize::Fixed(80); + case Type::FP128TyID: return TypeSize::Fixed(128); + case Type::PPC_FP128TyID: return TypeSize::Fixed(128); + case Type::X86_MMXTyID: return TypeSize::Fixed(64); + case Type::IntegerTyID: + return TypeSize::Fixed(cast(this)->getBitWidth()); + case Type::VectorTyID: { + const VectorType *VTy = cast(this); + return TypeSize(VTy->getBitWidth(), VTy->isScalable()); + } + default: return TypeSize::Fixed(0); } } diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index b7f77dc3043e..3c8a5b536695 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -444,15 +444,11 @@ void Value::replaceUsesOutsideBlock(Value *New, BasicBlock *BB) { "replaceUses of value with new value of different type!"); assert(BB && "Basic block that may contain a use of 'New' must be defined\n"); - use_iterator UI = use_begin(), E = use_end(); - for (; UI != E;) { - Use &U = *UI; - ++UI; - auto *Usr = dyn_cast(U.getUser()); - if (Usr && Usr->getParent() == BB) - continue; - U.set(New); - } + replaceUsesWithIf(New, [BB](Use &U) { + auto *I = dyn_cast(U.getUser()); + // Don't replace if it's an instruction in the BB basic block. + return !I || I->getParent() != BB; + }); } namespace { @@ -460,8 +456,8 @@ namespace { enum PointerStripKind { PSK_ZeroIndices, PSK_ZeroIndicesAndAliases, - PSK_ZeroIndicesAndAliasesSameRepresentation, - PSK_ZeroIndicesAndAliasesAndInvariantGroups, + PSK_ZeroIndicesSameRepresentation, + PSK_ZeroIndicesAndInvariantGroups, PSK_InBoundsConstantIndices, PSK_InBounds }; @@ -479,10 +475,10 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { do { if (auto *GEP = dyn_cast(V)) { switch (StripKind) { - case PSK_ZeroIndicesAndAliases: - case PSK_ZeroIndicesAndAliasesSameRepresentation: - case PSK_ZeroIndicesAndAliasesAndInvariantGroups: case PSK_ZeroIndices: + case PSK_ZeroIndicesAndAliases: + case PSK_ZeroIndicesSameRepresentation: + case PSK_ZeroIndicesAndInvariantGroups: if (!GEP->hasAllZeroIndices()) return V; break; @@ -498,15 +494,13 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); - } else if (StripKind != PSK_ZeroIndicesAndAliasesSameRepresentation && + } else if (StripKind != PSK_ZeroIndicesSameRepresentation && Operator::getOpcode(V) == Instruction::AddrSpaceCast) { // TODO: If we know an address space cast will not change the // representation we could look through it here as well. V = cast(V)->getOperand(0); - } else if (auto *GA = dyn_cast(V)) { - if (StripKind == PSK_ZeroIndices || GA->isInterposable()) - return V; - V = GA->getAliasee(); + } else if (StripKind == PSK_ZeroIndicesAndAliases && isa(V)) { + V = cast(V)->getAliasee(); } else { if (const auto *Call = dyn_cast(V)) { if (const Value *RV = Call->getReturnedArgOperand()) { @@ -516,7 +510,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { // The result of launder.invariant.group must alias it's argument, // but it can't be marked with returned attribute, that's why it needs // special case. - if (StripKind == PSK_ZeroIndicesAndAliasesAndInvariantGroups && + if (StripKind == PSK_ZeroIndicesAndInvariantGroups && (Call->getIntrinsicID() == Intrinsic::launder_invariant_group || Call->getIntrinsicID() == Intrinsic::strip_invariant_group)) { V = Call->getArgOperand(0); @@ -533,16 +527,15 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { } // end anonymous namespace const Value *Value::stripPointerCasts() const { - return stripPointerCastsAndOffsets(this); + return stripPointerCastsAndOffsets(this); } -const Value *Value::stripPointerCastsSameRepresentation() const { - return stripPointerCastsAndOffsets< - PSK_ZeroIndicesAndAliasesSameRepresentation>(this); +const Value *Value::stripPointerCastsAndAliases() const { + return stripPointerCastsAndOffsets(this); } -const Value *Value::stripPointerCastsNoFollowAliases() const { - return stripPointerCastsAndOffsets(this); +const Value *Value::stripPointerCastsSameRepresentation() const { + return stripPointerCastsAndOffsets(this); } const Value *Value::stripInBoundsConstantOffsets() const { @@ -550,8 +543,7 @@ const Value *Value::stripInBoundsConstantOffsets() const { } const Value *Value::stripPointerCastsAndInvariantGroups() const { - return stripPointerCastsAndOffsets( - this); + return stripPointerCastsAndOffsets(this); } const Value * @@ -650,6 +642,19 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL, } CanBeNull = true; } + } else if (auto *IP = dyn_cast(this)) { + if (MDNode *MD = IP->getMetadata(LLVMContext::MD_dereferenceable)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + if (DerefBytes == 0) { + if (MDNode *MD = + IP->getMetadata(LLVMContext::MD_dereferenceable_or_null)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + CanBeNull = true; + } } else if (auto *AI = dyn_cast(this)) { if (!AI->isArrayAllocation()) { DerefBytes = DL.getTypeStoreSize(AI->getAllocatedType()); @@ -666,21 +671,21 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL, return DerefBytes; } -unsigned Value::getPointerAlignment(const DataLayout &DL) const { +MaybeAlign Value::getPointerAlignment(const DataLayout &DL) const { assert(getType()->isPointerTy() && "must be pointer"); - - unsigned Align = 0; if (auto *GO = dyn_cast(this)) { if (isa(GO)) { + const MaybeAlign FunctionPtrAlign = DL.getFunctionPtrAlign(); switch (DL.getFunctionPtrAlignType()) { case DataLayout::FunctionPtrAlignType::Independent: - return DL.getFunctionPtrAlign(); + return FunctionPtrAlign; case DataLayout::FunctionPtrAlignType::MultipleOfFunctionAlign: - return std::max(DL.getFunctionPtrAlign(), GO->getAlignment()); + return std::max(FunctionPtrAlign, MaybeAlign(GO->getAlignment())); } + llvm_unreachable("Unhandled FunctionPtrAlignType"); } - Align = GO->getAlignment(); - if (Align == 0) { + const MaybeAlign Alignment(GO->getAlignment()); + if (!Alignment) { if (auto *GVar = dyn_cast(GO)) { Type *ObjectType = GVar->getValueType(); if (ObjectType->isSized()) { @@ -688,37 +693,43 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const { // it the preferred alignment. Otherwise, we have to assume that it // may only have the minimum ABI alignment. if (GVar->isStrongDefinitionForLinker()) - Align = DL.getPreferredAlignment(GVar); + return MaybeAlign(DL.getPreferredAlignment(GVar)); else - Align = DL.getABITypeAlignment(ObjectType); + return Align(DL.getABITypeAlignment(ObjectType)); } } } + return Alignment; } else if (const Argument *A = dyn_cast(this)) { - Align = A->getParamAlignment(); - - if (!Align && A->hasStructRetAttr()) { + const MaybeAlign Alignment(A->getParamAlignment()); + if (!Alignment && A->hasStructRetAttr()) { // An sret parameter has at least the ABI alignment of the return type. Type *EltTy = cast(A->getType())->getElementType(); if (EltTy->isSized()) - Align = DL.getABITypeAlignment(EltTy); + return Align(DL.getABITypeAlignment(EltTy)); } + return Alignment; } else if (const AllocaInst *AI = dyn_cast(this)) { - Align = AI->getAlignment(); - if (Align == 0) { + const MaybeAlign Alignment(AI->getAlignment()); + if (!Alignment) { Type *AllocatedType = AI->getAllocatedType(); if (AllocatedType->isSized()) - Align = DL.getPrefTypeAlignment(AllocatedType); + return MaybeAlign(DL.getPrefTypeAlignment(AllocatedType)); } - } else if (const auto *Call = dyn_cast(this)) - Align = Call->getAttributes().getRetAlignment(); - else if (const LoadInst *LI = dyn_cast(this)) + return Alignment; + } else if (const auto *Call = dyn_cast(this)) { + const MaybeAlign Alignment(Call->getRetAlignment()); + if (!Alignment && Call->getCalledFunction()) + return MaybeAlign( + Call->getCalledFunction()->getAttributes().getRetAlignment()); + return Alignment; + } else if (const LoadInst *LI = dyn_cast(this)) { if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { ConstantInt *CI = mdconst::extract(MD->getOperand(0)); - Align = CI->getLimitedValue(); + return MaybeAlign(CI->getLimitedValue()); } - - return Align; + } + return llvm::None; } const Value *Value::DoPHITranslation(const BasicBlock *CurBB, diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 9346c8bda75d..b17fc433ed74 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -119,6 +119,7 @@ struct VerifierSupport { raw_ostream *OS; const Module &M; ModuleSlotTracker MST; + Triple TT; const DataLayout &DL; LLVMContext &Context; @@ -130,7 +131,8 @@ struct VerifierSupport { bool TreatBrokenDebugInfoAsError = true; explicit VerifierSupport(raw_ostream *OS, const Module &M) - : OS(OS), M(M), MST(&M), DL(M.getDataLayout()), Context(M.getContext()) {} + : OS(OS), M(M), MST(&M), TT(M.getTargetTriple()), DL(M.getDataLayout()), + Context(M.getContext()) {} private: void Write(const Module *M) { @@ -416,6 +418,7 @@ private: void visitBasicBlock(BasicBlock &BB); void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty); void visitDereferenceableMetadata(Instruction &I, MDNode *MD); + void visitProfMetadata(Instruction &I, MDNode *MD); template bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); @@ -515,6 +518,7 @@ private: DIExpression::FragmentInfo Fragment, ValueOrMetadata *Desc); void verifyFnArgs(const DbgVariableIntrinsic &I); + void verifyNotEntryValue(const DbgVariableIntrinsic &I); /// Module-level debug info verification... void verifyCompileUnits(); @@ -670,7 +674,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { Assert(InitArray, "wrong initalizer for intrinsic global variable", Init); for (Value *Op : InitArray->operands()) { - Value *V = Op->stripPointerCastsNoFollowAliases(); + Value *V = Op->stripPointerCasts(); Assert(isa(V) || isa(V) || isa(V), "invalid llvm.used member", V); @@ -979,6 +983,9 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { N.getRawVTableHolder()); AssertDI(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags", &N); + unsigned DIBlockByRefStruct = 1 << 4; + AssertDI((N.getFlags() & DIBlockByRefStruct) == 0, + "DIBlockByRefStruct on DICompositeType is no longer supported", &N); if (N.isVector()) { const DINodeArray Elements = N.getElements(); @@ -1306,11 +1313,12 @@ void Verifier::visitDIImportedEntity(const DIImportedEntity &N) { } void Verifier::visitComdat(const Comdat &C) { - // The Module is invalid if the GlobalValue has private linkage. Entities - // with private linkage don't have entries in the symbol table. - if (const GlobalValue *GV = M.getNamedValue(C.getName())) - Assert(!GV->hasPrivateLinkage(), "comdat global value has private linkage", - GV); + // In COFF the Module is invalid if the GlobalValue has private linkage. + // Entities with private linkage don't have entries in the symbol table. + if (TT.isOSBinFormatCOFF()) + if (const GlobalValue *GV = M.getNamedValue(C.getName())) + Assert(!GV->hasPrivateLinkage(), + "comdat global value has private linkage", GV); } void Verifier::visitModuleIdents(const Module &M) { @@ -2497,6 +2505,15 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) { Assert(CBI.getOperand(i) != CBI.getOperand(j), "Duplicate callbr destination!", &CBI); } + { + SmallPtrSet ArgBBs; + for (Value *V : CBI.args()) + if (auto *BA = dyn_cast(V)) + ArgBBs.insert(BA->getBasicBlock()); + for (BasicBlock *BB : CBI.getIndirectDests()) + Assert(ArgBBs.find(BB) != ArgBBs.end(), + "Indirect label missing from arglist.", &CBI); + } visitTerminator(CBI); } @@ -2715,8 +2732,8 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { &I); if (SrcTy->isVectorTy()) { - VectorType *VSrc = dyn_cast(SrcTy); - VectorType *VDest = dyn_cast(DestTy); + VectorType *VSrc = cast(SrcTy); + VectorType *VDest = cast(DestTy); Assert(VSrc->getNumElements() == VDest->getNumElements(), "PtrToInt Vector width mismatch", &I); } @@ -2740,8 +2757,8 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch", &I); if (SrcTy->isVectorTy()) { - VectorType *VSrc = dyn_cast(SrcTy); - VectorType *VDest = dyn_cast(DestTy); + VectorType *VSrc = cast(SrcTy); + VectorType *VDest = cast(DestTy); Assert(VSrc->getNumElements() == VDest->getNumElements(), "IntToPtr Vector width mismatch", &I); } @@ -3983,9 +4000,9 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) { void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { Assert(I.getType()->isPointerTy(), "dereferenceable, dereferenceable_or_null " "apply only to pointer types", &I); - Assert(isa(I), + Assert((isa(I) || isa(I)), "dereferenceable, dereferenceable_or_null apply only to load" - " instructions, use attributes for calls or invokes", &I); + " and inttoptr instructions, use attributes for calls or invokes", &I); Assert(MD->getNumOperands() == 1, "dereferenceable, dereferenceable_or_null " "take one operand!", &I); ConstantInt *CI = mdconst::dyn_extract(MD->getOperand(0)); @@ -3993,6 +4010,45 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { "dereferenceable_or_null metadata value must be an i64!", &I); } +void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { + Assert(MD->getNumOperands() >= 2, + "!prof annotations should have no less than 2 operands", MD); + + // Check first operand. + Assert(MD->getOperand(0) != nullptr, "first operand should not be null", MD); + Assert(isa(MD->getOperand(0)), + "expected string with name of the !prof annotation", MD); + MDString *MDS = cast(MD->getOperand(0)); + StringRef ProfName = MDS->getString(); + + // Check consistency of !prof branch_weights metadata. + if (ProfName.equals("branch_weights")) { + unsigned ExpectedNumOperands = 0; + if (BranchInst *BI = dyn_cast(&I)) + ExpectedNumOperands = BI->getNumSuccessors(); + else if (SwitchInst *SI = dyn_cast(&I)) + ExpectedNumOperands = SI->getNumSuccessors(); + else if (isa(&I) || isa(&I)) + ExpectedNumOperands = 1; + else if (IndirectBrInst *IBI = dyn_cast(&I)) + ExpectedNumOperands = IBI->getNumDestinations(); + else if (isa(&I)) + ExpectedNumOperands = 2; + else + CheckFailed("!prof branch_weights are not allowed for this instruction", + MD); + + Assert(MD->getNumOperands() == 1 + ExpectedNumOperands, + "Wrong number of operands", MD); + for (unsigned i = 1; i < MD->getNumOperands(); ++i) { + auto &MDO = MD->getOperand(i); + Assert(MDO, "second operand should not be null", MD); + Assert(mdconst::dyn_extract(MDO), + "!prof brunch_weights operand is not a const int"); + } + } +} + /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { @@ -4150,13 +4206,18 @@ void Verifier::visitInstruction(Instruction &I) { "alignment is larger that implementation defined limit", &I); } + if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof)) + visitProfMetadata(I, MD); + if (MDNode *N = I.getDebugLoc().getAsMDNode()) { AssertDI(isa(N), "invalid !dbg metadata attachment", &I, N); visitMDNode(*N); } - if (auto *DII = dyn_cast(&I)) + if (auto *DII = dyn_cast(&I)) { verifyFragmentExpression(*DII); + verifyNotEntryValue(*DII); + } InstsInThisBlock.insert(&I); } @@ -4236,6 +4297,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -4248,12 +4311,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(Call)); @@ -4623,7 +4690,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { } case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: - case Intrinsic::umul_fix: { + case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: { Value *Op1 = Call.getArgOperand(0); Value *Op2 = Call.getArgOperand(1); Assert(Op1->getType()->isIntOrIntVectorTy(), @@ -4705,6 +4773,31 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { HasRoundingMD = true; break; + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: { + Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic", + &FPI); + Type *ValTy = FPI.getArgOperand(0)->getType(); + Type *ResultTy = FPI.getType(); + Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &FPI); + HasExceptionMD = true; + HasRoundingMD = true; + } + break; + + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: { + Assert((NumOperands == 2), "invalid arguments for constrained FP intrinsic", + &FPI); + Type *ValTy = FPI.getArgOperand(0)->getType(); + Type *ResultTy = FPI.getType(); + Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &FPI); + HasExceptionMD = true; + break; + } + case Intrinsic::experimental_constrained_fma: Assert((NumOperands == 5), "invalid arguments for constrained FP intrinsic", &FPI); @@ -4727,6 +4820,33 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { HasRoundingMD = true; break; + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: { + Assert((NumOperands == 2), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + + Value *Operand = FPI.getArgOperand(0); + uint64_t NumSrcElem = 0; + Assert(Operand->getType()->isFPOrFPVectorTy(), + "Intrinsic first argument must be floating point", &FPI); + if (auto *OperandT = dyn_cast(Operand->getType())) { + NumSrcElem = OperandT->getNumElements(); + } + + Operand = &FPI; + Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", &FPI); + Assert(Operand->getType()->isIntOrIntVectorTy(), + "Intrinsic result must be an integer", &FPI); + if (auto *OperandT = dyn_cast(Operand->getType())) { + Assert(NumSrcElem == OperandT->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); + } + } + break; + case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: { if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) { @@ -4826,11 +4946,6 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { // This check is redundant with one in visitLocalVariable(). AssertDI(isType(Var->getRawType()), "invalid type ref", Var, Var->getRawType()); - if (auto *Type = dyn_cast_or_null(Var->getRawType())) - if (Type->isBlockByrefStruct()) - AssertDI(DII.getExpression() && DII.getExpression()->getNumElements(), - "BlockByRef variable without complex expression", Var, &DII); - verifyFnArgs(DII); } @@ -4935,6 +5050,16 @@ void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) { Prev, Var); } +void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) { + DIExpression *E = dyn_cast_or_null(I.getRawExpression()); + + // We don't know whether this intrinsic verified correctly. + if (!E || !E->isValid()) + return; + + AssertDI(!E->isEntryValue(), "Entry values are only allowed in MIR", &I); +} + void Verifier::verifyCompileUnits() { // When more than one Module is imported into the same context, such as during // an LTO build before linking the modules, ODR type uniquing may cause types @@ -5021,7 +5146,7 @@ struct VerifierLegacyPass : public FunctionPass { } bool doInitialization(Module &M) override { - V = llvm::make_unique( + V = std::make_unique( &dbgs(), /*ShouldTreatBrokenDebugInfoAsError=*/false, M); return false; } diff --git a/lib/LTO/Caching.cpp b/lib/LTO/Caching.cpp index 000ab91dba7c..12dcd182de2d 100644 --- a/lib/LTO/Caching.cpp +++ b/lib/LTO/Caching.cpp @@ -142,8 +142,8 @@ Expected lto::localCache(StringRef CacheDirectoryPath, } // This CacheStream will move the temporary file into the cache when done. - return llvm::make_unique( - llvm::make_unique(Temp->FD, /* ShouldClose */ false), + return std::make_unique( + std::make_unique(Temp->FD, /* ShouldClose */ false), AddBuffer, std::move(*Temp), EntryPath.str(), Task); }; }; diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 64506890956a..1e345e7dd89e 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -44,6 +44,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" @@ -383,7 +384,9 @@ static bool isWeakObjectWithRWAccess(GlobalValueSummary *GVS) { static void thinLTOInternalizeAndPromoteGUID( GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID, - function_ref isExported) { + function_ref isExported, + function_ref + isPrevailing) { for (auto &S : GVSummaryList) { if (isExported(S->modulePath(), GUID)) { if (GlobalValue::isLocalLinkage(S->linkage())) @@ -392,6 +395,8 @@ static void thinLTOInternalizeAndPromoteGUID( // Ignore local and appending linkage values since the linker // doesn't resolve them. !GlobalValue::isLocalLinkage(S->linkage()) && + (!GlobalValue::isInterposableLinkage(S->linkage()) || + isPrevailing(GUID, S.get())) && S->linkage() != GlobalValue::AppendingLinkage && // We can't internalize available_externally globals because this // can break function pointer equality. @@ -410,9 +415,12 @@ static void thinLTOInternalizeAndPromoteGUID( // as external and non-exported values as internal. void llvm::thinLTOInternalizeAndPromoteInIndex( ModuleSummaryIndex &Index, - function_ref isExported) { + function_ref isExported, + function_ref + isPrevailing) { for (auto &I : Index) - thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported); + thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported, + isPrevailing); } // Requires a destructor for std::vector. @@ -459,8 +467,8 @@ BitcodeModule &InputFile::getSingleBitcodeModule() { LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf) : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel), - Ctx(Conf), CombinedModule(llvm::make_unique("ld-temp.o", Ctx)), - Mover(llvm::make_unique(*CombinedModule)) {} + Ctx(Conf), CombinedModule(std::make_unique("ld-temp.o", Ctx)), + Mover(std::make_unique(*CombinedModule)) {} LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) : Backend(Backend), CombinedIndex(/*HaveGVs*/ false) { @@ -754,7 +762,8 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, // For now they aren't reported correctly by ModuleSymbolTable. auto &CommonRes = RegularLTO.Commons[Sym.getIRName()]; CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); - CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment()); + CommonRes.Align = + std::max(CommonRes.Align, MaybeAlign(Sym.getCommonAlignment())); CommonRes.Prevailing |= Res.Prevailing; } @@ -899,8 +908,7 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); if (Res.second.VisibleOutsideSummary && Res.second.Prevailing) - GUIDPreservedSymbols.insert(GlobalValue::getGUID( - GlobalValue::dropLLVMManglingEscape(Res.second.IRName))); + GUIDPreservedSymbols.insert(GUID); GUIDPrevailingResolutions[GUID] = Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No; @@ -996,6 +1004,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { GV->setLinkage(GlobalValue::InternalLinkage); } + RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + if (Conf.PostInternalizeModuleHook && !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); @@ -1004,6 +1014,16 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex); } +static const char *libcallRoutineNames[] = { +#define HANDLE_LIBCALL(code, name) name, +#include "llvm/IR/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL +}; + +ArrayRef LTO::getRuntimeLibcallSymbols() { + return makeArrayRef(libcallRoutineNames); +} + /// This class defines the interface to the ThinLTO backend. class lto::ThinBackendProc { protected: @@ -1141,7 +1161,7 @@ ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) { return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex, const StringMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, NativeObjectCache Cache) { - return llvm::make_unique( + return std::make_unique( Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries, AddStream, Cache); }; @@ -1204,7 +1224,7 @@ public: std::error_code EC; raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, - sys::fs::OpenFlags::F_None); + sys::fs::OpenFlags::OF_None); if (EC) return errorCodeToError(EC); WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex); @@ -1231,7 +1251,7 @@ ThinBackend lto::createWriteIndexesThinBackend( return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex, const StringMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, NativeObjectCache Cache) { - return llvm::make_unique( + return std::make_unique( Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix, ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite); }; @@ -1274,6 +1294,15 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, if (DumpThinCGSCCs) ThinLTO.CombinedIndex.dumpSCCs(outs()); + std::set ExportedGUIDs; + + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map> LocalWPDTargetsMap; + runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs, + LocalWPDTargetsMap); + if (Conf.OptLevel > 0) ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, ImportLists, ExportLists); @@ -1282,7 +1311,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, // at -O0 because summary-based DCE is implemented using internalization, and // we must apply DCE consistently with the full LTO module in order to avoid // undefined references during the final link. - std::set ExportedGUIDs; for (auto &Res : GlobalResolutions) { // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. @@ -1308,12 +1336,19 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ExportList->second.count(GUID)) || ExportedGUIDs.count(GUID); }; - thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); + + // Update local devirtualized targets that were exported by cross-module + // importing or by other devirtualizations marked in the ExportedGUIDs set. + updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported, + LocalWPDTargetsMap); auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); }; + thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported, + isPrevailing); + auto recordNewLinkage = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID, GlobalValue::LinkageTypes NewLinkage) { @@ -1368,7 +1403,7 @@ lto::setupStatsFile(StringRef StatsFilename) { llvm::EnableStatistics(false); std::error_code EC; auto StatsFile = - llvm::make_unique(StatsFilename, EC, sys::fs::F_None); + std::make_unique(StatsFilename, EC, sys::fs::OF_None); if (EC) return errorCodeToError(EC); diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp index 7456e7175163..2761f8367b0d 100644 --- a/lib/LTO/LTOBackend.cpp +++ b/lib/LTO/LTOBackend.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -57,8 +58,8 @@ Error Config::addSaveTemps(std::string OutputFileName, ShouldDiscardValueNames = false; std::error_code EC; - ResolutionFile = llvm::make_unique( - OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text); + ResolutionFile = std::make_unique( + OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::OF_Text); if (EC) return errorCodeToError(EC); @@ -83,7 +84,7 @@ Error Config::addSaveTemps(std::string OutputFileName, PathPrefix = M.getModuleIdentifier() + "."; std::string Path = PathPrefix + PathSuffix + ".bc"; std::error_code EC; - raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); // Because -save-temps is a debugging feature, we report the error // directly and exit. if (EC) @@ -103,7 +104,7 @@ Error Config::addSaveTemps(std::string OutputFileName, CombinedIndexHook = [=](const ModuleSummaryIndex &Index) { std::string Path = OutputFileName + "index.bc"; std::error_code EC; - raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); // Because -save-temps is a debugging feature, we report the error // directly and exit. if (EC) @@ -111,7 +112,7 @@ Error Config::addSaveTemps(std::string OutputFileName, WriteIndexToFile(Index, OS); Path = OutputFileName + "index.dot"; - raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None); if (EC) reportOpenError(Path, EC.message()); Index.exportToDot(OSDot); @@ -165,7 +166,10 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, PGOOptions::IRUse, PGOOptions::CSIRUse); } - PassBuilder PB(TM, PipelineTuningOptions(), PGOOpt); + PassInstrumentationCallbacks PIC; + StandardInstrumentations SI; + SI.registerCallbacks(PIC); + PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC); AAManager AA; // Parse a custom AA pipeline if asked to. @@ -329,7 +333,7 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream, if (!DwoFile.empty()) { std::error_code EC; - DwoOut = llvm::make_unique(DwoFile, EC, sys::fs::F_None); + DwoOut = std::make_unique(DwoFile, EC, sys::fs::OF_None); if (EC) report_fatal_error("Failed to open " + DwoFile + ": " + EC.message()); } diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 6bb3bfaefc9c..882192892867 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -151,7 +151,7 @@ void LTOCodeGenerator::initializeLTOPasses() { void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) { const std::vector &undefs = Mod->getAsmUndefinedRefs(); for (int i = 0, e = undefs.size(); i != e; ++i) - AsmUndefinedRefs[undefs[i]] = 1; + AsmUndefinedRefs.insert(undefs[i]); } bool LTOCodeGenerator::addModule(LTOModule *Mod) { @@ -174,7 +174,7 @@ void LTOCodeGenerator::setModule(std::unique_ptr Mod) { AsmUndefinedRefs.clear(); MergedModule = Mod->takeModule(); - TheLinker = make_unique(*MergedModule); + TheLinker = std::make_unique(*MergedModule); setAsmUndefinedRefs(&*Mod); // We've just changed the input, so let's make sure we verify it. @@ -229,7 +229,7 @@ bool LTOCodeGenerator::writeMergedModules(StringRef Path) { // create output file std::error_code EC; - ToolOutputFile Out(Path, EC, sys::fs::F_None); + ToolOutputFile Out(Path, EC, sys::fs::OF_None); if (EC) { std::string ErrMsg = "could not open bitcode file for writing: "; ErrMsg += Path.str() + ": " + EC.message(); @@ -365,7 +365,8 @@ bool LTOCodeGenerator::determineTarget() { MCpu = "core2"; else if (Triple.getArch() == llvm::Triple::x86) MCpu = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) MCpu = "cyclone"; } @@ -462,6 +463,8 @@ void LTOCodeGenerator::applyScopeRestrictions() { internalizeModule(*MergedModule, mustPreserveGV); + MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + ScopeRestrictionsDone = true; } @@ -690,7 +693,7 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler, return Context.setDiagnosticHandler(nullptr); // Register the LTOCodeGenerator stub in the LLVMContext to forward the // diagnostic to the external DiagHandler. - Context.setDiagnosticHandler(llvm::make_unique(this), + Context.setDiagnosticHandler(std::make_unique(this), true); } diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 7ffe7bf84ba8..587b332e7064 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -220,7 +220,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options, CPU = "core2"; else if (Triple.getArch() == llvm::Triple::x86) CPU = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) CPU = "cyclone"; } diff --git a/lib/LTO/SummaryBasedOptimizations.cpp b/lib/LTO/SummaryBasedOptimizations.cpp index e919fd530fb0..6db495de003b 100644 --- a/lib/LTO/SummaryBasedOptimizations.cpp +++ b/lib/LTO/SummaryBasedOptimizations.cpp @@ -18,7 +18,7 @@ using namespace llvm; -cl::opt ThinLTOSynthesizeEntryCounts( +static cl::opt ThinLTOSynthesizeEntryCounts( "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden, cl::desc("Synthesize entry counts based on the summary")); diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index 1c52218836ca..d151de17896f 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/CachePruning.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" @@ -52,6 +53,7 @@ #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" @@ -89,7 +91,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir, // User asked to save temps, let dump the bitcode file after import. std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str(); std::error_code EC; - raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); + raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); if (EC) report_fatal_error(Twine("Failed to open ") + SaveTempPath + " to save optimized bitcode\n"); @@ -224,7 +226,8 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, } static void optimizeModule(Module &TheModule, TargetMachine &TM, - unsigned OptLevel, bool Freestanding) { + unsigned OptLevel, bool Freestanding, + ModuleSummaryIndex *Index) { // Populate the PassManager PassManagerBuilder PMB; PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); @@ -238,6 +241,7 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM, // Already did this in verifyLoadedModule(). PMB.VerifyInput = false; PMB.VerifyOutput = false; + PMB.ImportSummary = Index; legacy::PassManager PM; @@ -295,7 +299,7 @@ std::unique_ptr codegenModule(Module &TheModule, // Run codegen now. resulting binary is in OutputBuffer. PM.run(TheModule); } - return make_unique(std::move(OutputBuffer)); + return std::make_unique(std::move(OutputBuffer)); } /// Manage caching for a single Module. @@ -368,23 +372,26 @@ public: // Write to a temporary to avoid race condition SmallString<128> TempFilename; SmallString<128> CachePath(EntryPath); - int TempFD; llvm::sys::path::remove_filename(CachePath); sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o"); - std::error_code EC = - sys::fs::createUniqueFile(TempFilename, TempFD, TempFilename); - if (EC) { - errs() << "Error: " << EC.message() << "\n"; - report_fatal_error("ThinLTO: Can't get a temporary file"); - } - { - raw_fd_ostream OS(TempFD, /* ShouldClose */ true); - OS << OutputBuffer.getBuffer(); + + if (auto Err = handleErrors( + llvm::writeFileAtomically(TempFilename, EntryPath, + OutputBuffer.getBuffer()), + [](const llvm::AtomicFileWriteError &E) { + std::string ErrorMsgBuffer; + llvm::raw_string_ostream S(ErrorMsgBuffer); + E.log(S); + + if (E.Error == + llvm::atomic_write_error::failed_to_create_uniq_file) { + errs() << "Error: " << ErrorMsgBuffer << "\n"; + report_fatal_error("ThinLTO: Can't get a temporary file"); + } + })) { + // FIXME + consumeError(std::move(Err)); } - // Rename temp file to final destination; rename is atomic - EC = sys::fs::rename(TempFilename, EntryPath); - if (EC) - sys::fs::remove(TempFilename); } }; @@ -429,7 +436,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); } - optimizeModule(TheModule, TM, OptLevel, Freestanding); + optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index); saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); @@ -442,7 +449,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI); WriteBitcodeToFile(TheModule, OS, true, &Index); } - return make_unique(std::move(OutputBuffer)); + return std::make_unique(std::move(OutputBuffer)); } return codegenModule(TheModule, TM); @@ -457,10 +464,9 @@ static void resolvePrevailingInIndex( ModuleSummaryIndex &Index, StringMap> &ResolvedODR, - const DenseSet &GUIDPreservedSymbols) { - - DenseMap PrevailingCopy; - computePrevailingCopies(Index, PrevailingCopy); + const DenseSet &GUIDPreservedSymbols, + const DenseMap + &PrevailingCopy) { auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { const auto &Prevailing = PrevailingCopy.find(GUID); @@ -490,7 +496,8 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder, TMBuilder.MCpu = "core2"; else if (TheTriple.getArch() == llvm::Triple::x86) TMBuilder.MCpu = "yonah"; - else if (TheTriple.getArch() == llvm::Triple::aarch64) + else if (TheTriple.getArch() == llvm::Triple::aarch64 || + TheTriple.getArch() == llvm::Triple::aarch64_32) TMBuilder.MCpu = "cyclone"; } TMBuilder.TheTriple = std::move(TheTriple); @@ -557,7 +564,7 @@ std::unique_ptr TargetMachineBuilder::create() const { */ std::unique_ptr ThinLTOCodeGenerator::linkCombinedIndex() { std::unique_ptr CombinedIndex = - llvm::make_unique(/*HaveGVs=*/false); + std::make_unique(/*HaveGVs=*/false); uint64_t NextModuleId = 0; for (auto &Mod : Modules) { auto &M = Mod->getSingleBitcodeModule(); @@ -573,19 +580,36 @@ std::unique_ptr ThinLTOCodeGenerator::linkCombinedIndex() { return CombinedIndex; } -static void internalizeAndPromoteInIndex( - const StringMap &ExportLists, - const DenseSet &GUIDPreservedSymbols, - ModuleSummaryIndex &Index) { - auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { +struct IsExported { + const StringMap &ExportLists; + const DenseSet &GUIDPreservedSymbols; + + IsExported(const StringMap &ExportLists, + const DenseSet &GUIDPreservedSymbols) + : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {} + + bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || GUIDPreservedSymbols.count(GUID); - }; + } +}; - thinLTOInternalizeAndPromoteInIndex(Index, isExported); -} +struct IsPrevailing { + const DenseMap &PrevailingCopy; + IsPrevailing(const DenseMap + &PrevailingCopy) + : PrevailingCopy(PrevailingCopy) {} + + bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const { + const auto &Prevailing = PrevailingCopy.find(GUID); + // Not in map means that there was only one copy, which must be prevailing. + if (Prevailing == PrevailingCopy.end()) + return true; + return Prevailing->second == S; + }; +}; static void computeDeadSymbolsInIndex( ModuleSummaryIndex &Index, @@ -629,16 +653,22 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index, ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists); + DenseMap PrevailingCopy; + computePrevailingCopies(Index, PrevailingCopy); + // Resolve prevailing symbols StringMap> ResolvedODR; - resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols); + resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); thinLTOResolvePrevailingInModule( TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); // Promote the exported values in the index, so that they are promoted // in the module. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index); + thinLTOInternalizeAndPromoteInIndex( + Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); promoteModule(TheModule, Index); } @@ -785,13 +815,19 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, if (ExportList.empty() && GUIDPreservedSymbols.empty()) return; + DenseMap PrevailingCopy; + computePrevailingCopies(Index, PrevailingCopy); + // Resolve prevailing symbols StringMap> ResolvedODR; - resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols); + resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); // Promote the exported values in the index, so that they are promoted // in the module. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index); + thinLTOInternalizeAndPromoteInIndex( + Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); promoteModule(TheModule, Index); @@ -810,7 +846,8 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); // Optimize now - optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding); + optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding, + nullptr); } /// Write out the generated object file, either from CacheEntryPath or from @@ -845,7 +882,7 @@ ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath, } // No cache entry, just write out the buffer. std::error_code Err; - raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None); + raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None); if (Err) report_fatal_error("Can't open output '" + OutputPath + "'\n"); OS << OutputBuffer.getBuffer(); @@ -900,7 +937,7 @@ void ThinLTOCodeGenerator::run() { if (!SaveTempsDir.empty()) { auto SaveTempPath = SaveTempsDir + "index.bc"; std::error_code EC; - raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); + raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); if (EC) report_fatal_error(Twine("Failed to open ") + SaveTempPath + " to save optimized bitcode\n"); @@ -931,6 +968,15 @@ void ThinLTOCodeGenerator::run() { // Synthesize entry counts for functions in the combined index. computeSyntheticCounts(*Index); + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map> LocalWPDTargetsMap; + std::set ExportedGUIDs; + runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap); + for (auto GUID : ExportedGUIDs) + GUIDPreservedSymbols.insert(GUID); + // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap ImportLists(ModuleCount); @@ -944,14 +990,23 @@ void ThinLTOCodeGenerator::run() { // on the index, and nuke this map. StringMap> ResolvedODR; + DenseMap PrevailingCopy; + computePrevailingCopies(*Index, PrevailingCopy); + // Resolve prevailing symbols, this has to be computed early because it // impacts the caching. - resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols); + resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); // Use global summary-based analysis to identify symbols that can be // internalized (because they aren't exported or preserved as per callback). // Changes are made in the index, consumed in the ThinLTO backends. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, *Index); + updateIndexWPDForExports(*Index, + IsExported(ExportLists, GUIDPreservedSymbols), + LocalWPDTargetsMap); + thinLTOInternalizeAndPromoteInIndex( + *Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); // Make sure that every module has an entry in the ExportLists, ImportList, // GVSummary and ResolvedODR maps to enable threaded access to these maps diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index 37515d93ed50..6784d81595e5 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -398,7 +398,7 @@ class IRLinker { /// due to the use of Value handles which the Linker doesn't actually need, /// but this allows us to reuse the ValueMapper code. ValueToValueMapTy ValueMap; - ValueToValueMapTy AliasValueMap; + ValueToValueMapTy IndirectSymbolValueMap; DenseSet ValuesToLink; std::vector Worklist; @@ -437,7 +437,7 @@ class IRLinker { /// Entry point for mapping values and alternate context for mapping aliases. ValueMapper Mapper; - unsigned AliasMCID; + unsigned IndirectSymbolMCID; /// Handles cloning of a global values from the source module into /// the destination module, including setting the attributes and visibility. @@ -480,13 +480,15 @@ class IRLinker { /// /// Note this code may call the client-provided \p AddLazyFor. bool shouldLink(GlobalValue *DGV, GlobalValue &SGV); - Expected linkGlobalValueProto(GlobalValue *GV, bool ForAlias); + Expected linkGlobalValueProto(GlobalValue *GV, + bool ForIndirectSymbol); Error linkModuleFlagsMetadata(); void linkGlobalVariable(GlobalVariable &Dst, GlobalVariable &Src); Error linkFunctionBody(Function &Dst, Function &Src); - void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src); + void linkIndirectSymbolBody(GlobalIndirectSymbol &Dst, + GlobalIndirectSymbol &Src); Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src); /// Replace all types in the source AttributeList with the @@ -497,7 +499,7 @@ class IRLinker { /// into the destination module. GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar); Function *copyFunctionProto(const Function *SF); - GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA); + GlobalValue *copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS); /// Perform "replace all uses with" operations. These work items need to be /// performed as part of materialization, but we postpone them to happen after @@ -524,8 +526,8 @@ public: SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport), Mapper(ValueMap, RF_MoveDistinctMDs | RF_IgnoreMissingLocals, &TypeMap, &GValMaterializer), - AliasMCID(Mapper.registerAlternateMappingContext(AliasValueMap, - &LValMaterializer)) { + IndirectSymbolMCID(Mapper.registerAlternateMappingContext( + IndirectSymbolValueMap, &LValMaterializer)) { ValueMap.getMDMap() = std::move(SharedMDs); for (GlobalValue *GV : ValuesToLink) maybeAdd(GV); @@ -535,7 +537,7 @@ public: ~IRLinker() { SharedMDs = std::move(*ValueMap.getMDMap()); } Error run(); - Value *materialize(Value *V, bool ForAlias); + Value *materialize(Value *V, bool ForIndirectSymbol); }; } @@ -568,12 +570,12 @@ Value *LocalValueMaterializer::materialize(Value *SGV) { return TheIRLinker.materialize(SGV, true); } -Value *IRLinker::materialize(Value *V, bool ForAlias) { +Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) { auto *SGV = dyn_cast(V); if (!SGV) return nullptr; - Expected NewProto = linkGlobalValueProto(SGV, ForAlias); + Expected NewProto = linkGlobalValueProto(SGV, ForIndirectSymbol); if (!NewProto) { setError(NewProto.takeError()); return nullptr; @@ -593,23 +595,23 @@ Value *IRLinker::materialize(Value *V, bool ForAlias) { if (V->hasInitializer() || V->hasAppendingLinkage()) return New; } else { - auto *A = cast(New); - if (A->getAliasee()) + auto *IS = cast(New); + if (IS->getIndirectSymbol()) return New; } - // When linking a global for an alias, it will always be linked. However we - // need to check if it was not already scheduled to satisfy a reference from a - // regular global value initializer. We know if it has been schedule if the - // "New" GlobalValue that is mapped here for the alias is the same as the one - // already mapped. If there is an entry in the ValueMap but the value is - // different, it means that the value already had a definition in the - // destination module (linkonce for instance), but we need a new definition - // for the alias ("New" will be different. - if (ForAlias && ValueMap.lookup(SGV) == New) + // When linking a global for an indirect symbol, it will always be linked. + // However we need to check if it was not already scheduled to satisfy a + // reference from a regular global value initializer. We know if it has been + // schedule if the "New" GlobalValue that is mapped here for the indirect + // symbol is the same as the one already mapped. If there is an entry in the + // ValueMap but the value is different, it means that the value already had a + // definition in the destination module (linkonce for instance), but we need a + // new definition for the indirect symbol ("New" will be different. + if (ForIndirectSymbol && ValueMap.lookup(SGV) == New) return New; - if (ForAlias || shouldLink(New, *SGV)) + if (ForIndirectSymbol || shouldLink(New, *SGV)) setError(linkGlobalValueBody(*New, *SGV)); return New; @@ -627,7 +629,7 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) { /*init*/ nullptr, SGVar->getName(), /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(), SGVar->getType()->getAddressSpace()); - NewDGV->setAlignment(SGVar->getAlignment()); + NewDGV->setAlignment(MaybeAlign(SGVar->getAlignment())); NewDGV->copyAttributesFrom(SGVar); return NewDGV; } @@ -660,16 +662,24 @@ Function *IRLinker::copyFunctionProto(const Function *SF) { return F; } -/// Set up prototypes for any aliases that come over from the source module. -GlobalValue *IRLinker::copyGlobalAliasProto(const GlobalAlias *SGA) { +/// Set up prototypes for any indirect symbols that come over from the source +/// module. +GlobalValue * +IRLinker::copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS) { // If there is no linkage to be performed or we're linking from the source, // bring over SGA. - auto *Ty = TypeMap.get(SGA->getValueType()); - auto *GA = - GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(), - GlobalValue::ExternalLinkage, SGA->getName(), &DstM); - GA->copyAttributesFrom(SGA); - return GA; + auto *Ty = TypeMap.get(SGIS->getValueType()); + GlobalIndirectSymbol *GIS; + if (isa(SGIS)) + GIS = GlobalAlias::create(Ty, SGIS->getType()->getPointerAddressSpace(), + GlobalValue::ExternalLinkage, SGIS->getName(), + &DstM); + else + GIS = GlobalIFunc::create(Ty, SGIS->getType()->getPointerAddressSpace(), + GlobalValue::ExternalLinkage, SGIS->getName(), + nullptr, &DstM); + GIS->copyAttributesFrom(SGIS); + return GIS; } GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, @@ -681,7 +691,7 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, NewGV = copyFunctionProto(SF); } else { if (ForDefinition) - NewGV = copyGlobalAliasProto(cast(SGV)); + NewGV = copyGlobalIndirectSymbolProto(cast(SGV)); else if (SGV->getValueType()->isFunctionTy()) NewGV = Function::Create(cast(TypeMap.get(SGV->getValueType())), @@ -748,8 +758,18 @@ void IRLinker::computeTypeMapping() { } for (GlobalValue &SGV : *SrcM) - if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) + if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) { + if (DGV->getType() == SGV.getType()) { + // If the types of DGV and SGV are the same, it means that DGV is from + // the source module and got added to DstM from a shared metadata. We + // shouldn't map this type to itself in case the type's components get + // remapped to a new type from DstM (for instance, during the loop over + // SrcM->getIdentifiedStructTypes() below). + continue; + } + TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); + } for (GlobalValue &SGV : SrcM->aliases()) if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) @@ -940,7 +960,7 @@ bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) { } Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, - bool ForAlias) { + bool ForIndirectSymbol) { GlobalValue *DGV = getLinkedToGlobal(SGV); bool ShouldLink = shouldLink(DGV, *SGV); @@ -951,12 +971,12 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, if (I != ValueMap.end()) return cast(I->second); - I = AliasValueMap.find(SGV); - if (I != AliasValueMap.end()) + I = IndirectSymbolValueMap.find(SGV); + if (I != IndirectSymbolValueMap.end()) return cast(I->second); } - if (!ShouldLink && ForAlias) + if (!ShouldLink && ForIndirectSymbol) DGV = nullptr; // Handle the ultra special appending linkage case first. @@ -975,8 +995,8 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, if (DoneLinkingBodies) return nullptr; - NewGV = copyGlobalValueProto(SGV, ShouldLink || ForAlias); - if (ShouldLink || !ForAlias) + NewGV = copyGlobalValueProto(SGV, ShouldLink || ForIndirectSymbol); + if (ShouldLink || !ForIndirectSymbol) forceRenaming(NewGV, SGV->getName()); } @@ -987,7 +1007,7 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) NewGV = Remangled.getValue(); - if (ShouldLink || ForAlias) { + if (ShouldLink || ForIndirectSymbol) { if (const Comdat *SC = SGV->getComdat()) { if (auto *GO = dyn_cast(NewGV)) { Comdat *DC = DstM.getOrInsertComdat(SC->getName()); @@ -997,7 +1017,7 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, } } - if (!ShouldLink && ForAlias) + if (!ShouldLink && ForIndirectSymbol) NewGV->setLinkage(GlobalValue::InternalLinkage); Constant *C = NewGV; @@ -1060,8 +1080,10 @@ Error IRLinker::linkFunctionBody(Function &Dst, Function &Src) { return Error::success(); } -void IRLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) { - Mapper.scheduleMapGlobalAliasee(Dst, *Src.getAliasee(), AliasMCID); +void IRLinker::linkIndirectSymbolBody(GlobalIndirectSymbol &Dst, + GlobalIndirectSymbol &Src) { + Mapper.scheduleMapGlobalIndirectSymbol(Dst, *Src.getIndirectSymbol(), + IndirectSymbolMCID); } Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) { @@ -1071,7 +1093,7 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) { linkGlobalVariable(cast(Dst), *GVar); return Error::success(); } - linkAliasBody(cast(Dst), cast(Src)); + linkIndirectSymbolBody(cast(Dst), cast(Src)); return Error::success(); } @@ -1411,7 +1433,7 @@ Error IRLinker::run() { // Already mapped. if (ValueMap.find(GV) != ValueMap.end() || - AliasValueMap.find(GV) != AliasValueMap.end()) + IndirectSymbolValueMap.find(GV) != IndirectSymbolValueMap.end()) continue; assert(!GV->isDeclaration()); diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index a18f4cc25bcc..35d6290e901b 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -351,7 +351,8 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { SGVar->setConstant(false); } if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) { - unsigned Align = std::max(DGVar->getAlignment(), SGVar->getAlignment()); + MaybeAlign Align( + std::max(DGVar->getAlignment(), SGVar->getAlignment())); SGVar->setAlignment(Align); DGVar->setAlignment(Align); } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 2c68723a12f8..6f160e491cea 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -36,6 +36,7 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" @@ -336,7 +337,7 @@ public: } // end anonymous namespace void ELFWriter::align(unsigned Alignment) { - uint64_t Padding = OffsetToAlignment(W.OS.tell(), Alignment); + uint64_t Padding = offsetToAlignment(W.OS.tell(), Align(Alignment)); W.OS.write_zeros(Padding); } @@ -511,6 +512,19 @@ static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) { return Type; } +static bool isIFunc(const MCSymbolELF *Symbol) { + while (Symbol->getType() != ELF::STT_GNU_IFUNC) { + const MCSymbolRefExpr *Value; + if (!Symbol->isVariable() || + !(Value = dyn_cast(Symbol->getVariableValue())) || + Value->getKind() != MCSymbolRefExpr::VK_None || + mergeTypeForSet(Symbol->getType(), ELF::STT_GNU_IFUNC) != ELF::STT_GNU_IFUNC) + return false; + Symbol = &cast(Value->getSymbol()); + } + return true; +} + void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, ELFSymbolData &MSD, const MCAsmLayout &Layout) { const auto &Symbol = cast(*MSD.Symbol); @@ -524,6 +538,8 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, // Binding and Type share the same byte as upper and lower nibbles uint8_t Binding = Symbol.getBinding(); uint8_t Type = Symbol.getType(); + if (isIFunc(&Symbol)) + Type = ELF::STT_GNU_IFUNC; if (Base) { Type = mergeTypeForSet(Type, Base->getType()); } @@ -622,7 +638,7 @@ void ELFWriter::computeSymbolTable( unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32; MCSectionELF *SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, EntrySize, ""); - SymtabSection->setAlignment(is64Bit() ? 8 : 4); + SymtabSection->setAlignment(is64Bit() ? Align(8) : Align(4)); SymbolTableIndex = addToSectionTable(SymtabSection); align(SymtabSection->getAlignment()); @@ -720,7 +736,7 @@ void ELFWriter::computeSymbolTable( MCSectionELF *SymtabShndxSection = Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0, 4, ""); SymtabShndxSectionIndex = addToSectionTable(SymtabShndxSection); - SymtabShndxSection->setAlignment(4); + SymtabShndxSection->setAlignment(Align(4)); } ArrayRef FileNames = Asm.getFileNames(); @@ -808,7 +824,7 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx, MCSectionELF *RelaSection = Ctx.createELFRelSection( RelaSectionName, hasRelocationAddend() ? ELF::SHT_RELA : ELF::SHT_REL, Flags, EntrySize, Sec.getGroup(), &Sec); - RelaSection->setAlignment(is64Bit() ? 8 : 4); + RelaSection->setAlignment(is64Bit() ? Align(8) : Align(4)); return RelaSection; } @@ -895,7 +911,7 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); // Alignment field should reflect the requirements of // the compressed section header. - Section.setAlignment(is64Bit() ? 8 : 4); + Section.setAlignment(is64Bit() ? Align(8) : Align(4)); } else { // Add "z" prefix to section name. This is zlib-gnu style. MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); @@ -1119,7 +1135,7 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { if (!GroupIdx) { MCSectionELF *Group = Ctx.createELFGroupSection(SignatureSymbol); GroupIdx = addToSectionTable(Group); - Group->setAlignment(4); + Group->setAlignment(Align(4)); Groups.push_back(Group); } std::vector &Members = @@ -1437,22 +1453,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, MCContext &Ctx = Asm.getContext(); if (const MCSymbolRefExpr *RefB = Target.getSymB()) { - // Let A, B and C being the components of Target and R be the location of - // the fixup. If the fixup is not pcrel, we want to compute (A - B + C). - // If it is pcrel, we want to compute (A - B + C - R). - - // In general, ELF has no relocations for -B. It can only represent (A + C) - // or (A + C - R). If B = R + K and the relocation is not pcrel, we can - // replace B to implement it: (A - R - K + C) - if (IsPCRel) { - Ctx.reportError( - Fixup.getLoc(), - "No relocation available to represent this relative expression"); - return; - } - const auto &SymB = cast(RefB->getSymbol()); - if (SymB.isUndefined()) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + @@ -1468,10 +1469,9 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, return; } - uint64_t SymBOffset = Layout.getSymbolOffset(SymB); - uint64_t K = SymBOffset - FixupOffset; + assert(!IsPCRel && "should have been folded"); IsPCRel = true; - C -= K; + C += FixupOffset - Layout.getSymbolOffset(SymB); } // We either rejected the fixup or folded B into C at this point. @@ -1489,38 +1489,35 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, } } - unsigned Type = TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel); - uint64_t OriginalC = C; - bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type); - if (!RelocateWithSymbol && SymA && !SymA->isUndefined()) - C += Layout.getSymbolOffset(*SymA); - - uint64_t Addend = 0; - if (hasRelocationAddend()) { - Addend = C; - C = 0; - } - - FixedValue = C; - const MCSectionELF *SecA = (SymA && SymA->isInSection()) ? cast(&SymA->getSection()) : nullptr; if (!checkRelocation(Ctx, Fixup.getLoc(), &FixupSection, SecA)) return; + unsigned Type = TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel); + bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type); + uint64_t Addend = 0; + + FixedValue = !RelocateWithSymbol && SymA && !SymA->isUndefined() + ? C + Layout.getSymbolOffset(*SymA) + : C; + if (hasRelocationAddend()) { + Addend = FixedValue; + FixedValue = 0; + } + if (!RelocateWithSymbol) { const auto *SectionSymbol = SecA ? cast(SecA->getBeginSymbol()) : nullptr; if (SectionSymbol) SectionSymbol->setUsedInReloc(); - ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, - OriginalC); + ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, C); Relocations[&FixupSection].push_back(Rec); return; } - const auto *RenamedSymA = SymA; + const MCSymbolELF *RenamedSymA = SymA; if (SymA) { if (const MCSymbolELF *R = Renames.lookup(SymA)) RenamedSymA = R; @@ -1530,8 +1527,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, else RenamedSymA->setUsedInReloc(); } - ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, - OriginalC); + ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, C); Relocations[&FixupSection].push_back(Rec); } @@ -1551,7 +1547,7 @@ bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( std::unique_ptr llvm::createELFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) { - return llvm::make_unique(std::move(MOTW), OS, + return std::make_unique(std::move(MOTW), OS, IsLittleEndian); } @@ -1559,6 +1555,6 @@ std::unique_ptr llvm::createELFDwoObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS, bool IsLittleEndian) { - return llvm::make_unique(std::move(MOTW), OS, DwoOS, + return std::make_unique(std::move(MOTW), OS, DwoOS, IsLittleEndian); } diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index 9b1102cbe7d1..b800e9caee22 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -73,6 +73,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"FK_Data_2", 0, 16, 0}, {"FK_Data_4", 0, 32, 0}, {"FK_Data_8", 0, 64, 0}, + {"FK_Data_6b", 0, 6, 0}, {"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel}, {"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, {"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, @@ -93,10 +94,12 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"FK_Data_Add_2", 0, 16, 0}, {"FK_Data_Add_4", 0, 32, 0}, {"FK_Data_Add_8", 0, 64, 0}, + {"FK_Data_Add_6b", 0, 6, 0}, {"FK_Data_Sub_1", 0, 8, 0}, {"FK_Data_Sub_2", 0, 16, 0}, {"FK_Data_Sub_4", 0, 32, 0}, - {"FK_Data_Sub_8", 0, 64, 0}}; + {"FK_Data_Sub_8", 0, 64, 0}, + {"FK_Data_Sub_6b", 0, 6, 0}}; assert((size_t)Kind <= array_lengthof(Builtins) && "Unknown fixup kind"); return Builtins[Kind]; diff --git a/lib/MC/MCAsmInfoXCOFF.cpp b/lib/MC/MCAsmInfoXCOFF.cpp index 74c21f0c9e6d..65fe8848e20f 100644 --- a/lib/MC/MCAsmInfoXCOFF.cpp +++ b/lib/MC/MCAsmInfoXCOFF.cpp @@ -15,4 +15,21 @@ void MCAsmInfoXCOFF::anchor() {} MCAsmInfoXCOFF::MCAsmInfoXCOFF() { IsLittleEndian = false; HasDotTypeDotSizeDirective = false; + COMMDirectiveAlignmentIsInBytes = false; + LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; + UseDotAlignForAlignment = true; + AsciiDirective = nullptr; // not supported + AscizDirective = nullptr; // not supported + NeedsFunctionDescriptors = true; + HasDotLGloblDirective = true; + Data64bitsDirective = "\t.llong\t"; + SupportsQuotedNames = false; +} + +bool MCAsmInfoXCOFF::isValidUnquotedName(StringRef Name) const { + // FIXME: Remove this function when we stop using "TOC[TC0]" as a symbol name. + if (Name.equals("TOC[TC0]")) + return true; + + return MCAsmInfo::isValidUnquotedName(Name); } diff --git a/lib/MC/MCAsmMacro.cpp b/lib/MC/MCAsmMacro.cpp index ba4fb7d4f387..186a68b02a29 100644 --- a/lib/MC/MCAsmMacro.cpp +++ b/lib/MC/MCAsmMacro.cpp @@ -11,6 +11,7 @@ using namespace llvm; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MCAsmMacroParameter::dump(raw_ostream &OS) const { OS << "\"" << Name << "\""; if (Required) @@ -39,3 +40,4 @@ void MCAsmMacro::dump(raw_ostream &OS) const { } OS << " (BEGIN BODY)" << Body << "(END BODY)\n"; } +#endif diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 7a2b0b8a1220..2d9c2cb21255 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -23,6 +24,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" @@ -66,7 +68,7 @@ public: std::unique_ptr asmbackend, bool showInst) : MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner), MAI(Context.getAsmInfo()), InstPrinter(printer), - Assembler(llvm::make_unique( + Assembler(std::make_unique( Context, std::move(asmbackend), std::move(emitter), (asmbackend) ? asmbackend->createObjectWriter(NullStream) : nullptr)), @@ -162,6 +164,8 @@ public: void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; + void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlign) override; void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; @@ -254,9 +258,26 @@ public: unsigned SourceLineNum, const MCSymbol *FnStartSym, const MCSymbol *FnEndSym) override; + + void PrintCVDefRangePrefix( + ArrayRef> Ranges); + + void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterRelHeader DRHdr) override; + + void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeSubfieldRegisterHeader DRHdr) override; + + void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterHeader DRHdr) override; + void EmitCVDefRangeDirective( ArrayRef> Ranges, - StringRef FixedSizePortion) override; + codeview::DefRangeFramePointerRelHeader DRHdr) override; + void EmitCVStringTableDirective() override; void EmitCVFileChecksumsDirective() override; void EmitCVFileChecksumOffsetDirective(unsigned FileNo) override; @@ -291,13 +312,13 @@ public: void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) override; void EmitWinCFIStartChained(SMLoc Loc) override; void EmitWinCFIEndChained(SMLoc Loc) override; - void EmitWinCFIPushReg(unsigned Register, SMLoc Loc) override; - void EmitWinCFISetFrame(unsigned Register, unsigned Offset, + void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) override; + void EmitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) override; void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) override; - void EmitWinCFISaveReg(unsigned Register, unsigned Offset, + void EmitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) override; - void EmitWinCFISaveXMM(unsigned Register, unsigned Offset, + void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) override; void EmitWinCFIPushFrame(bool Code, SMLoc Loc) override; void EmitWinCFIEndProlog(SMLoc Loc) override; @@ -630,6 +651,7 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_Global: // .globl/.global OS << MAI->getGlobalDirective(); break; + case MCSA_LGlobal: OS << "\t.lglobl\t"; break; case MCSA_Hidden: OS << "\t.hidden\t"; break; case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; case MCSA_Internal: OS << "\t.internal\t"; break; @@ -740,6 +762,24 @@ void MCAsmStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) { EmitEOL(); } +// We need an XCOFF-specific version of this directive as the AIX syntax +// requires a QualName argument identifying the csect name and storage mapping +// class to appear before the alignment if we are specifying it. +void MCAsmStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + assert(MAI->getLCOMMDirectiveAlignmentType() == LCOMM::Log2Alignment && + "We only support writing log base-2 alignment format with XCOFF."); + assert(isPowerOf2_32(ByteAlignment) && "Alignment must be a power of 2."); + + OS << "\t.lcomm\t"; + Symbol->print(OS, MAI); + OS << ',' << Size; + OS << ',' << Symbol->getName(); + OS << ',' << Log2_32(ByteAlignment); + + EmitEOL(); +} + void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) { assert(MAI->hasDotTypeDotSizeDirective()); OS << "\t.size\t"; @@ -1082,6 +1122,16 @@ void MCAsmStreamer::emitFill(const MCExpr &NumValues, int64_t Size, void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit) { + if (MAI->useDotAlignForAlignment()) { + if (!isPowerOf2_32(ByteAlignment)) + report_fatal_error("Only power-of-two alignments are supported " + "with .align."); + OS << "\t.align\t"; + OS << Log2_32(ByteAlignment); + EmitEOL(); + return; + } + // Some assemblers don't support non-power of two alignments, so we always // emit alignments as a power of two if possible. if (isPowerOf2_32(ByteAlignment)) { @@ -1376,9 +1426,8 @@ void MCAsmStreamer::EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId, PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym); } -void MCAsmStreamer::EmitCVDefRangeDirective( - ArrayRef> Ranges, - StringRef FixedSizePortion) { +void MCAsmStreamer::PrintCVDefRangePrefix( + ArrayRef> Ranges) { OS << "\t.cv_def_range\t"; for (std::pair Range : Ranges) { OS << ' '; @@ -1386,10 +1435,43 @@ void MCAsmStreamer::EmitCVDefRangeDirective( OS << ' '; Range.second->print(OS, MAI); } - OS << ", "; - PrintQuotedString(FixedSizePortion, OS); +} + +void MCAsmStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterRelHeader DRHdr) { + PrintCVDefRangePrefix(Ranges); + OS << ", reg_rel, "; + OS << DRHdr.Register << ", " << DRHdr.Flags << ", " + << DRHdr.BasePointerOffset; + EmitEOL(); +} + +void MCAsmStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeSubfieldRegisterHeader DRHdr) { + PrintCVDefRangePrefix(Ranges); + OS << ", subfield_reg, "; + OS << DRHdr.Register << ", " << DRHdr.OffsetInParent; + EmitEOL(); +} + +void MCAsmStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterHeader DRHdr) { + PrintCVDefRangePrefix(Ranges); + OS << ", reg, "; + OS << DRHdr.Register; + EmitEOL(); +} + +void MCAsmStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeFramePointerRelHeader DRHdr) { + PrintCVDefRangePrefix(Ranges); + OS << ", frame_ptr_rel, "; + OS << DRHdr.Offset; EmitEOL(); - this->MCStreamer::EmitCVDefRangeDirective(Ranges, FixedSizePortion); } void MCAsmStreamer::EmitCVStringTableDirective() { @@ -1453,9 +1535,8 @@ void MCAsmStreamer::EmitRegisterName(int64_t Register) { // just ones that map to LLVM register numbers and have known names. // Fall back to using the original number directly if no name is known. const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - int LLVMRegister = MRI->getLLVMRegNumFromEH(Register); - if (LLVMRegister != -1) { - InstPrinter->printRegName(OS, LLVMRegister); + if (Optional LLVMRegister = MRI->getLLVMRegNum(Register, true)) { + InstPrinter->printRegName(OS, *LLVMRegister); return; } } @@ -1668,6 +1749,12 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { // We only do this so the section switch that terminates the handler // data block is visible. WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo(); + + // Do nothing if no frame is open. MCStreamer should've already reported an + // error. + if (!CurFrame) + return; + MCSection *TextSec = &CurFrame->Function->getSection(); MCSection *XData = getAssociatedXDataSection(TextSec); SwitchSectionNoChange(XData); @@ -1676,18 +1763,21 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIPushReg(unsigned Register, SMLoc Loc) { +void MCAsmStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { MCStreamer::EmitWinCFIPushReg(Register, Loc); - OS << "\t.seh_pushreg " << Register; + OS << "\t.seh_pushreg "; + InstPrinter->printRegName(OS, Register); EmitEOL(); } -void MCAsmStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset, +void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) { MCStreamer::EmitWinCFISetFrame(Register, Offset, Loc); - OS << "\t.seh_setframe " << Register << ", " << Offset; + OS << "\t.seh_setframe "; + InstPrinter->printRegName(OS, Register); + OS << ", " << Offset; EmitEOL(); } @@ -1698,19 +1788,23 @@ void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset, +void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) { MCStreamer::EmitWinCFISaveReg(Register, Offset, Loc); - OS << "\t.seh_savereg " << Register << ", " << Offset; + OS << "\t.seh_savereg "; + InstPrinter->printRegName(OS, Register); + OS << ", " << Offset; EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset, +void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) { MCStreamer::EmitWinCFISaveXMM(Register, Offset, Loc); - OS << "\t.seh_savexmm " << Register << ", " << Offset; + OS << "\t.seh_savexmm "; + InstPrinter->printRegName(OS, Register); + OS << ", " << Offset; EmitEOL(); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index c4f4d4c2870e..cf42fe85b8e5 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -321,7 +322,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_Align: { const MCAlignFragment &AF = cast(F); unsigned Offset = Layout.getFragmentOffset(&AF); - unsigned Size = OffsetToAlignment(Offset, AF.getAlignment()); + unsigned Size = offsetToAlignment(Offset, Align(AF.getAlignment())); // Insert extra Nops for code alignment if the target define // shouldInsertExtraNopBytesForCodeAlign target hook. @@ -840,6 +841,10 @@ void MCAssembler::layout(MCAsmLayout &Layout) { getBackend().shouldInsertFixupForCodeAlign(*this, Layout, *AF); } continue; + } else if (auto *FragWithFixups = + dyn_cast(&Frag)) { + Fixups = FragWithFixups->getFixups(); + Contents = FragWithFixups->getContents(); } else llvm_unreachable("Unknown fragment with fixups!"); for (const MCFixup &Fixup : Fixups) { @@ -969,13 +974,9 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, MCContext &Context = Layout.getAssembler().getContext(); uint64_t OldSize = DF.getContents().size(); int64_t AddrDelta; - bool Abs; - if (getBackend().requiresDiffExpressionRelocations()) - Abs = DF.getAddrDelta().evaluateAsAbsolute(AddrDelta, Layout); - else { - Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); - assert(Abs && "We created a line delta with an invalid expression"); - } + bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); + assert(Abs && "We created a line delta with an invalid expression"); + (void)Abs; int64_t LineDelta; LineDelta = DF.getLineDelta(); SmallVectorImpl &Data = DF.getContents(); @@ -983,7 +984,7 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, raw_svector_ostream OSE(Data); DF.getFixups().clear(); - if (Abs) { + if (!getBackend().requiresDiffExpressionRelocations()) { MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta, AddrDelta, OSE); } else { @@ -1017,10 +1018,25 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout, bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); assert(Abs && "We created call frame with an invalid expression"); (void) Abs; - SmallString<8> &Data = DF.getContents(); + SmallVectorImpl &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); - MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE); + DF.getFixups().clear(); + + if (getBackend().requiresDiffExpressionRelocations()) { + uint32_t Offset; + uint32_t Size; + MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE, &Offset, + &Size); + if (Size) { + DF.getFixups().push_back(MCFixup::create( + Offset, &DF.getAddrDelta(), + MCFixup::getKindForSizeInBits(Size /*In bits.*/, false /*isPCRel*/))); + } + } else { + MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE); + } + return OldSize != Data.size(); } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 0dc2e2d37caf..a69ee19e1a1a 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -58,11 +58,12 @@ AsSecureLogFileName("as-secure-log-file-name", MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCObjectFileInfo *mofi, const SourceMgr *mgr, - bool DoAutoReset) + MCTargetOptions const *TargetOpts, bool DoAutoReset) : SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi), Symbols(Allocator), UsedNames(Allocator), + InlineAsmUsedLabelNames(Allocator), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), - AutoReset(DoAutoReset) { + AutoReset(DoAutoReset), TargetOptions(TargetOpts) { SecureLogFile = AsSecureLogFileName; if (SrcMgr && SrcMgr->getNumBuffers()) @@ -90,6 +91,7 @@ void MCContext::reset() { XCOFFAllocator.DestroyAll(); MCSubtargetAllocator.DestroyAll(); + InlineAsmUsedLabelNames.clear(); UsedNames.clear(); Symbols.clear(); Allocator.Reset(); @@ -272,6 +274,10 @@ void MCContext::setSymbolValue(MCStreamer &Streamer, Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this)); } +void MCContext::registerInlineAsmLabel(MCSymbol *Sym) { + InlineAsmUsedLabelNames[Sym->getName()] = Sym; +} + //===----------------------------------------------------------------------===// // Section Management //===----------------------------------------------------------------------===// @@ -531,6 +537,8 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section, XCOFF::StorageMappingClass SMC, + XCOFF::SymbolType Type, + XCOFF::StorageClass SC, SectionKind Kind, const char *BeginSymName) { // Do the lookup. If we have a hit, return it. @@ -548,7 +556,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section, Begin = createTempSymbol(BeginSymName, false); MCSectionXCOFF *Result = new (XCOFFAllocator.Allocate()) - MCSectionXCOFF(CachedName, SMC, Kind, Begin); + MCSectionXCOFF(CachedName, SMC, Type, SC, Kind, Begin); Entry.second = Result; auto *F = new MCDataFragment(); @@ -690,6 +698,21 @@ void MCContext::reportError(SMLoc Loc, const Twine &Msg) { report_fatal_error(Msg, false); } +void MCContext::reportWarning(SMLoc Loc, const Twine &Msg) { + if (TargetOptions && TargetOptions->MCNoWarn) + return; + if (TargetOptions && TargetOptions->MCFatalWarnings) + reportError(Loc, Msg); + else { + // If we have a source manager use it. Otherwise, try using the inline + // source manager. + if (SrcMgr) + SrcMgr->PrintMessage(Loc, SourceMgr::DK_Warning, Msg); + else if (InlineSrcMgr) + InlineSrcMgr->PrintMessage(Loc, SourceMgr::DK_Warning, Msg); + } +} + void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) { reportError(Loc, Msg); diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index aae6fdf90931..bcc7c45afc01 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -544,8 +544,8 @@ Expected MCDwarfLineTable::tryGetFile(StringRef &Directory, FileNumber); } -bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory, - StringRef &FileName, Optional Checksum) { +static bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory, + StringRef &FileName, Optional Checksum) { if (RootFile.Name.empty() || RootFile.Name != FileName.data()) return false; return RootFile.Checksum == Checksum; @@ -1897,26 +1897,54 @@ void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer, } void MCDwarfFrameEmitter::EncodeAdvanceLoc(MCContext &Context, - uint64_t AddrDelta, - raw_ostream &OS) { + uint64_t AddrDelta, raw_ostream &OS, + uint32_t *Offset, uint32_t *Size) { // Scale the address delta by the minimum instruction length. AddrDelta = ScaleAddrDelta(Context, AddrDelta); + bool WithFixups = false; + if (Offset && Size) + WithFixups = true; + support::endianness E = Context.getAsmInfo()->isLittleEndian() ? support::little : support::big; if (AddrDelta == 0) { + if (WithFixups) { + *Offset = 0; + *Size = 0; + } } else if (isUIntN(6, AddrDelta)) { uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta; - OS << Opcode; + if (WithFixups) { + *Offset = OS.tell(); + *Size = 6; + OS << uint8_t(dwarf::DW_CFA_advance_loc); + } else + OS << Opcode; } else if (isUInt<8>(AddrDelta)) { OS << uint8_t(dwarf::DW_CFA_advance_loc1); - OS << uint8_t(AddrDelta); + if (WithFixups) { + *Offset = OS.tell(); + *Size = 8; + OS.write_zeros(1); + } else + OS << uint8_t(AddrDelta); } else if (isUInt<16>(AddrDelta)) { OS << uint8_t(dwarf::DW_CFA_advance_loc2); - support::endian::write(OS, AddrDelta, E); + if (WithFixups) { + *Offset = OS.tell(); + *Size = 16; + OS.write_zeros(2); + } else + support::endian::write(OS, AddrDelta, E); } else { assert(isUInt<32>(AddrDelta)); OS << uint8_t(dwarf::DW_CFA_advance_loc4); - support::endian::write(OS, AddrDelta, E); + if (WithFixups) { + *Offset = OS.tell(); + *Size = 32; + OS.write_zeros(4); + } else + support::endian::write(OS, AddrDelta, E); } } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 245dd063004f..fa2133078bfe 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -139,7 +139,7 @@ static void setSectionAlignmentForBundling(const MCAssembler &Assembler, MCSection *Section) { if (Section && Assembler.isBundlingEnabled() && Section->hasInstructions() && Section->getAlignment() < Assembler.getBundleAlignSize()) - Section->setAlignment(Assembler.getBundleAlignSize()); + Section->setAlignment(Align(Assembler.getBundleAlignSize())); } void MCELFStreamer::ChangeSection(MCSection *Section, @@ -277,6 +277,9 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { case MCSA_AltEntry: llvm_unreachable("ELF doesn't support the .alt_entry attribute"); + + case MCSA_LGlobal: + llvm_unreachable("ELF doesn't support the .lglobl attribute"); } return true; @@ -306,7 +309,7 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size, // Update the maximum alignment of the section if necessary. if (ByteAlignment > Section.getAlignment()) - Section.setAlignment(ByteAlignment); + Section.setAlignment(Align(ByteAlignment)); SwitchSection(P.first, P.second); } else { diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index ab53ed42778e..813c00f6f3bb 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -259,6 +259,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_PPC_TOC_LO: return "toc@l"; case VK_PPC_TOC_HI: return "toc@h"; case VK_PPC_TOC_HA: return "toc@ha"; + case VK_PPC_U: return "u"; + case VK_PPC_L: return "l"; case VK_PPC_DTPMOD: return "dtpmod"; case VK_PPC_TPREL_LO: return "tprel@l"; case VK_PPC_TPREL_HI: return "tprel@h"; @@ -373,6 +375,8 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("toc@l", VK_PPC_TOC_LO) .Case("toc@h", VK_PPC_TOC_HI) .Case("toc@ha", VK_PPC_TOC_HA) + .Case("u", VK_PPC_U) + .Case("l", VK_PPC_L) .Case("tls", VK_PPC_TLS) .Case("dtpmod", VK_PPC_DTPMOD) .Case("tprel@l", VK_PPC_TPREL_LO) @@ -453,26 +457,28 @@ void MCTargetExpr::anchor() {} /* *** */ bool MCExpr::evaluateAsAbsolute(int64_t &Res) const { - return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr); + return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr, false); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const { - return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr); + return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, false); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout, const SectionAddrMap &Addrs) const { - return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs); + // Setting InSet causes us to absolutize differences across sections and that + // is what the MachO writer uses Addrs for. + return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs, true); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const { - return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr); + return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr, false); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm) const { - return evaluateAsAbsolute(Res, Asm, nullptr, nullptr); + return evaluateAsAbsolute(Res, Asm, nullptr, nullptr, false); } bool MCExpr::evaluateKnownAbsolute(int64_t &Res, @@ -481,15 +487,6 @@ bool MCExpr::evaluateKnownAbsolute(int64_t &Res, true); } -bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, - const MCAsmLayout *Layout, - const SectionAddrMap *Addrs) const { - // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us - // absolutize differences across sections and that is what the MachO writer - // uses Addrs for. - return evaluateAsAbsolute(Res, Asm, Layout, Addrs, Addrs); -} - bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs, bool InSet) const { @@ -577,6 +574,24 @@ static void AttemptToFoldSymbolOffsetDifference( A = B = nullptr; } +static bool canFold(const MCAssembler *Asm, const MCSymbolRefExpr *A, + const MCSymbolRefExpr *B, bool InSet) { + if (InSet) + return true; + + if (!Asm->getBackend().requiresDiffExpressionRelocations()) + return true; + + const MCSymbol &CheckSym = A ? A->getSymbol() : B->getSymbol(); + if (!CheckSym.isInSection()) + return true; + + if (!CheckSym.getSection().hasInstructions()) + return true; + + return false; +} + /// Evaluate the result of an add between (conceptually) two MCValues. /// /// This routine conceptually attempts to construct an MCValue: @@ -617,8 +632,7 @@ EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout, // the backend requires this to be emitted as individual relocations, unless // the InSet flag is set to get the current difference anyway (used for // example to calculate symbol sizes). - if (Asm && - (InSet || !Asm->getBackend().requiresDiffExpressionRelocations())) { + if (Asm && canFold(Asm, LHS_A, LHS_B, InSet)) { // First, fold out any differences which are fully resolved. By // reassociating terms in // Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst). diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 159f4070fe9f..c5c06f323e68 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -64,12 +64,6 @@ StringRef MCInstPrinter::markup(StringRef s) const { else return ""; } -StringRef MCInstPrinter::markup(StringRef a, StringRef b) const { - if (getUseMarkup()) - return a; - else - return b; -} // For asm-style hex (e.g. 0ffh) the first digit always has to be a number. static bool needsLeadingZero(uint64_t Value) @@ -89,24 +83,25 @@ format_object MCInstPrinter::formatDec(int64_t Value) const { } format_object MCInstPrinter::formatHex(int64_t Value) const { - switch(PrintHexStyle) { + switch (PrintHexStyle) { case HexStyle::C: - if (Value < 0) + if (Value < 0) { + if (Value == std::numeric_limits::min()) + return format("-0x8000000000000000", Value); return format("-0x%" PRIx64, -Value); - else - return format("0x%" PRIx64, Value); + } + return format("0x%" PRIx64, Value); case HexStyle::Asm: if (Value < 0) { - if (needsLeadingZero((uint64_t)(-Value))) + if (Value == std::numeric_limits::min()) + return format("-8000000000000000h", Value); + if (needsLeadingZero(-(uint64_t)(Value))) return format("-0%" PRIx64 "h", -Value); - else - return format("-%" PRIx64 "h", -Value); - } else { - if (needsLeadingZero((uint64_t)(Value))) - return format("0%" PRIx64 "h", Value); - else - return format("%" PRIx64 "h", Value); + return format("-%" PRIx64 "h", -Value); } + if (needsLeadingZero((uint64_t)(Value))) + return format("0%" PRIx64 "h", Value); + return format("%" PRIx64 "h", Value); } llvm_unreachable("unsupported print style"); } diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp index eca87f940bf5..54741fdd686d 100644 --- a/lib/MC/MCInstrAnalysis.cpp +++ b/lib/MC/MCInstrAnalysis.cpp @@ -33,3 +33,9 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, Target = Addr+Size+Imm; return true; } + +Optional +MCInstrAnalysis::evaluateMemoryOperandAddress(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + return None; +} diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 613f255a4ea4..8e558a36b7a1 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -330,6 +330,7 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Sym, case MCSA_Protected: case MCSA_Weak: case MCSA_Local: + case MCSA_LGlobal: return false; case MCSA_Global: diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 9f555abe1404..70c0409ece7a 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -28,7 +28,7 @@ static bool useCompactUnwind(const Triple &T) { return false; // aarch64 always has it. - if (T.getArch() == Triple::aarch64) + if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32) return true; // armv7k always has it. @@ -57,7 +57,8 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT, SectionKind::getReadOnly()); - if (T.isOSDarwin() && T.getArch() == Triple::aarch64) + if (T.isOSDarwin() && + (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)) SupportsCompactUnwindWithoutEHFrame = true; if (T.isWatchABI()) @@ -193,7 +194,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF - else if (T.getArch() == Triple::aarch64) + else if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32) CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb) CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF @@ -768,7 +769,12 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) { // the ABI or object file format. For example, the XL compiler uses an unnamed // csect for program code. TextSection = Ctx->getXCOFFSection( - ".text", XCOFF::StorageMappingClass::XMC_PR, SectionKind::getText()); + ".text", XCOFF::StorageMappingClass::XMC_PR, XCOFF::XTY_SD, + XCOFF::C_HIDEXT, SectionKind::getText()); + + DataSection = Ctx->getXCOFFSection( + ".data", XCOFF::StorageMappingClass::XMC_RW, XCOFF::XTY_SD, + XCOFF::C_HIDEXT, SectionKind::getData()); } void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC, diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 1587d8498666..83f6ab8fe332 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -27,7 +27,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, std::unique_ptr OW, std::unique_ptr Emitter) : MCStreamer(Context), - Assembler(llvm::make_unique( + Assembler(std::make_unique( Context, std::move(TAB), std::move(Emitter), std::move(OW))), EmitEHFrame(true), EmitDebugFrame(false) {} @@ -539,7 +539,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment, // Update the maximum alignment on the current section if necessary. MCSection *CurSec = getCurrentSectionOnly(); if (ByteAlignment > CurSec->getAlignment()) - CurSec->setAlignment(ByteAlignment); + CurSec->setAlignment(Align(ByteAlignment)); } void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment, diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 084f6a7a2e14..b59ac08ad6cc 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -524,6 +525,19 @@ private: /// directives parsed by this class. StringMap DirectiveKindMap; + // Codeview def_range type parsing. + enum CVDefRangeType { + CVDR_DEFRANGE = 0, // Placeholder + CVDR_DEFRANGE_REGISTER, + CVDR_DEFRANGE_FRAMEPOINTER_REL, + CVDR_DEFRANGE_SUBFIELD_REGISTER, + CVDR_DEFRANGE_REGISTER_REL + }; + + /// Maps Codeview def_range types --> CVDefRangeType enum, for + /// Codeview def_range types parsed by this class. + StringMap CVDefRangeTypeMap; + // ".ascii", ".asciz", ".string" bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc" @@ -671,6 +685,7 @@ private: bool parseDirectiveAddrsigSym(); void initializeDirectiveKindMap(); + void initializeCVDefRangeTypeMap(); }; } // end anonymous namespace @@ -714,12 +729,14 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, PlatformParser.reset(createWasmAsmParser()); break; case MCObjectFileInfo::IsXCOFF: - // TODO: Need to implement createXCOFFAsmParser for XCOFF format. + report_fatal_error( + "Need to implement createXCOFFAsmParser for XCOFF format."); break; } PlatformParser->Initialize(*this); initializeDirectiveKindMap(); + initializeCVDefRangeTypeMap(); NumOfMacroInstantiations = 0; } @@ -1142,7 +1159,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } } - MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName); + MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); + if (!Sym) + Sym = getContext().getOrCreateSymbol(SymbolName); // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. @@ -1737,6 +1756,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, StringMap::const_iterator DirKindIt = DirectiveKindMap.find(IDVal); DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end()) + ? DK_NO_DIRECTIVE : DirKindIt->getValue(); switch (DirKind) { @@ -2895,11 +2915,27 @@ bool AsmParser::parseEscapedString(std::string &Data) { } // Recognize escaped characters. Note that this escape semantics currently - // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + // loosely follows Darwin 'as'. ++i; if (i == e) return TokError("unexpected backslash at end of string"); + // Recognize hex sequences similarly to GNU 'as'. + if (Str[i] == 'x' || Str[i] == 'X') { + size_t length = Str.size(); + if (i + 1 >= length || !isHexDigit(Str[i + 1])) + return TokError("invalid hexadecimal escape sequence"); + + // Consume hex characters. GNU 'as' reads all hexadecimal characters and + // then truncates to the lower 16 bits. Seems reasonable. + unsigned Value = 0; + while (i + 1 < length && isHexDigit(Str[i + 1])) + Value = Value * 16 + hexDigitValue(Str[++i]); + + Data += (unsigned char)(Value & 0xFF); + continue; + } + // Recognize octal sequences. if ((unsigned)(Str[i] - '0') <= 7) { // Consume up to three octal characters. @@ -3825,6 +3861,13 @@ bool AsmParser::parseDirectiveCVInlineLinetable() { return false; } +void AsmParser::initializeCVDefRangeTypeMap() { + CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER; + CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL; + CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER; + CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL; +} + /// parseDirectiveCVDefRange /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes* bool AsmParser::parseDirectiveCVDefRange() { @@ -3846,13 +3889,92 @@ bool AsmParser::parseDirectiveCVDefRange() { Ranges.push_back({GapStartSym, GapEndSym}); } - std::string FixedSizePortion; - if (parseToken(AsmToken::Comma, "unexpected token in directive") || - parseEscapedString(FixedSizePortion)) - return true; - - getStreamer().EmitCVDefRangeDirective(Ranges, FixedSizePortion); - return false; + StringRef CVDefRangeTypeStr; + if (parseToken( + AsmToken::Comma, + "expected comma before def_range type in .cv_def_range directive") || + parseIdentifier(CVDefRangeTypeStr)) + return Error(Loc, "expected def_range type in directive"); + + StringMap::const_iterator CVTypeIt = + CVDefRangeTypeMap.find(CVDefRangeTypeStr); + CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end()) + ? CVDR_DEFRANGE + : CVTypeIt->getValue(); + switch (CVDRType) { + case CVDR_DEFRANGE_REGISTER: { + int64_t DRRegister; + if (parseToken(AsmToken::Comma, "expected comma before register number in " + ".cv_def_range directive") || + parseAbsoluteExpression(DRRegister)) + return Error(Loc, "expected register number"); + + codeview::DefRangeRegisterHeader DRHdr; + DRHdr.Register = DRRegister; + DRHdr.MayHaveNoName = 0; + getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); + break; + } + case CVDR_DEFRANGE_FRAMEPOINTER_REL: { + int64_t DROffset; + if (parseToken(AsmToken::Comma, + "expected comma before offset in .cv_def_range directive") || + parseAbsoluteExpression(DROffset)) + return Error(Loc, "expected offset value"); + + codeview::DefRangeFramePointerRelHeader DRHdr; + DRHdr.Offset = DROffset; + getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); + break; + } + case CVDR_DEFRANGE_SUBFIELD_REGISTER: { + int64_t DRRegister; + int64_t DROffsetInParent; + if (parseToken(AsmToken::Comma, "expected comma before register number in " + ".cv_def_range directive") || + parseAbsoluteExpression(DRRegister)) + return Error(Loc, "expected register number"); + if (parseToken(AsmToken::Comma, + "expected comma before offset in .cv_def_range directive") || + parseAbsoluteExpression(DROffsetInParent)) + return Error(Loc, "expected offset value"); + + codeview::DefRangeSubfieldRegisterHeader DRHdr; + DRHdr.Register = DRRegister; + DRHdr.MayHaveNoName = 0; + DRHdr.OffsetInParent = DROffsetInParent; + getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); + break; + } + case CVDR_DEFRANGE_REGISTER_REL: { + int64_t DRRegister; + int64_t DRFlags; + int64_t DRBasePointerOffset; + if (parseToken(AsmToken::Comma, "expected comma before register number in " + ".cv_def_range directive") || + parseAbsoluteExpression(DRRegister)) + return Error(Loc, "expected register value"); + if (parseToken( + AsmToken::Comma, + "expected comma before flag value in .cv_def_range directive") || + parseAbsoluteExpression(DRFlags)) + return Error(Loc, "expected flag value"); + if (parseToken(AsmToken::Comma, "expected comma before base pointer offset " + "in .cv_def_range directive") || + parseAbsoluteExpression(DRBasePointerOffset)) + return Error(Loc, "expected base pointer offset value"); + + codeview::DefRangeRegisterRelHeader DRHdr; + DRHdr.Register = DRRegister; + DRHdr.Flags = DRFlags; + DRHdr.BasePointerOffset = DRBasePointerOffset; + getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); + break; + } + default: + return Error(Loc, "unexpected def_range type in .cv_def_range directive"); + } + return true; } /// parseDirectiveCVString diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index 1217ea99e465..06f8310ae061 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -69,6 +69,7 @@ class COFFAsmParser : public MCAsmParserExtension { addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecIdx>(".secidx"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveLinkOnce>(".linkonce"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveRVA>(".rva"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); // Win64 EH directives. addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>( @@ -83,21 +84,10 @@ class COFFAsmParser : public MCAsmParserExtension { ".seh_handler"); addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>( ".seh_handlerdata"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>( - ".seh_pushreg"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>( - ".seh_setframe"); addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>( ".seh_stackalloc"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>( - ".seh_savereg"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>( - ".seh_savexmm"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>( - ".seh_pushframe"); addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>( ".seh_endprologue"); - addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); } bool ParseSectionDirectiveText(StringRef, SMLoc) { @@ -143,12 +133,7 @@ class COFFAsmParser : public MCAsmParserExtension { bool ParseSEHDirectiveEndChained(StringRef, SMLoc); bool ParseSEHDirectiveHandler(StringRef, SMLoc); bool ParseSEHDirectiveHandlerData(StringRef, SMLoc); - bool ParseSEHDirectivePushReg(StringRef, SMLoc); - bool ParseSEHDirectiveSetFrame(StringRef, SMLoc); bool ParseSEHDirectiveAllocStack(StringRef, SMLoc); - bool ParseSEHDirectiveSaveReg(StringRef, SMLoc); - bool ParseSEHDirectiveSaveXMM(StringRef, SMLoc); - bool ParseSEHDirectivePushFrame(StringRef, SMLoc); bool ParseSEHDirectiveEndProlog(StringRef, SMLoc); bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except); @@ -682,39 +667,6 @@ bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc Loc) { return false; } -bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc Loc) { - unsigned Reg = 0; - if (ParseSEHRegisterNumber(Reg)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - getStreamer().EmitWinCFIPushReg(Reg, Loc); - return false; -} - -bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc Loc) { - unsigned Reg = 0; - int64_t Off; - if (ParseSEHRegisterNumber(Reg)) - return true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("you must specify a stack pointer offset"); - - Lex(); - if (getParser().parseAbsoluteExpression(Off)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - getStreamer().EmitWinCFISetFrame(Reg, Off, Loc); - return false; -} - bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) { int64_t Size; if (getParser().parseAbsoluteExpression(Size)) @@ -728,71 +680,6 @@ bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) { return false; } -bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc Loc) { - unsigned Reg = 0; - int64_t Off; - if (ParseSEHRegisterNumber(Reg)) - return true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("you must specify an offset on the stack"); - - Lex(); - if (getParser().parseAbsoluteExpression(Off)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - // FIXME: Err on %xmm* registers - getStreamer().EmitWinCFISaveReg(Reg, Off, Loc); - return false; -} - -// FIXME: This method is inherently x86-specific. It should really be in the -// x86 backend. -bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc Loc) { - unsigned Reg = 0; - int64_t Off; - if (ParseSEHRegisterNumber(Reg)) - return true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("you must specify an offset on the stack"); - - Lex(); - if (getParser().parseAbsoluteExpression(Off)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - // FIXME: Err on non-%xmm* registers - getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc); - return false; -} - -bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc Loc) { - bool Code = false; - StringRef CodeID; - if (getLexer().is(AsmToken::At)) { - SMLoc startLoc = getLexer().getLoc(); - Lex(); - if (!getParser().parseIdentifier(CodeID)) { - if (CodeID != "code") - return Error(startLoc, "expected @code"); - Code = true; - } - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - getStreamer().EmitWinCFIPushFrame(Code, Loc); - return false; -} - bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc Loc) { Lex(); getStreamer().EmitWinCFIEndProlog(Loc); @@ -816,46 +703,6 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) { return false; } -bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { - SMLoc startLoc = getLexer().getLoc(); - if (getLexer().is(AsmToken::Percent)) { - const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - SMLoc endLoc; - unsigned LLVMRegNo; - if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc)) - return true; - -#if 0 - // FIXME: TargetAsmInfo::getCalleeSavedRegs() commits a serious layering - // violation so this validation code is disabled. - - // Check that this is a non-volatile register. - const unsigned *NVRegs = TAI.getCalleeSavedRegs(); - unsigned i; - for (i = 0; NVRegs[i] != 0; ++i) - if (NVRegs[i] == LLVMRegNo) - break; - if (NVRegs[i] == 0) - return Error(startLoc, "expected non-volatile register"); -#endif - - int SEHRegNo = MRI->getSEHRegNum(LLVMRegNo); - if (SEHRegNo < 0) - return Error(startLoc,"register can't be represented in SEH unwind info"); - RegNo = SEHRegNo; - } - else { - int64_t n; - if (getParser().parseAbsoluteExpression(n)) - return true; - if (n > 15) - return Error(startLoc, "register number is too high"); - RegNo = n; - } - - return false; -} - namespace llvm { MCAsmParserExtension *createCOFFAsmParser() { diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 1160934dc62c..bd66e5f39c0d 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -778,8 +778,8 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { raw_fd_ostream *OS = getContext().getSecureLog(); if (!OS) { std::error_code EC; - auto NewOS = llvm::make_unique( - StringRef(SecureLogFile), EC, sys::fs::F_Append | sys::fs::F_Text); + auto NewOS = std::make_unique( + StringRef(SecureLogFile), EC, sys::fs::OF_Append | sys::fs::OF_Text); if (EC) return Error(IDLoc, Twine("can't open secure log file: ") + SecureLogFile + " (" + EC.message() + ")"); diff --git a/lib/MC/MCParser/WasmAsmParser.cpp b/lib/MC/MCParser/WasmAsmParser.cpp index 28d4459fecd4..0c242aed706d 100644 --- a/lib/MC/MCParser/WasmAsmParser.cpp +++ b/lib/MC/MCParser/WasmAsmParser.cpp @@ -123,6 +123,7 @@ public: // See use of .init_array in WasmObjectWriter and // TargetLoweringObjectFileWasm .StartsWith(".init_array", SectionKind::getData()) + .StartsWith(".debug_", SectionKind::getMetadata()) .Default(Optional()); if (!Kind.hasValue()) return Parser->Error(Lexer->getLoc(), "unknown section kind: " + Name); diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp index 4273b876b7bb..d491c0eb7e06 100644 --- a/lib/MC/MCRegisterInfo.cpp +++ b/lib/MC/MCRegisterInfo.cpp @@ -20,15 +20,16 @@ using namespace llvm; -unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx, - const MCRegisterClass *RC) const { +MCRegister +MCRegisterInfo::getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, + const MCRegisterClass *RC) const { for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers) if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx)) return *Supers; return 0; } -unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const { +MCRegister MCRegisterInfo::getSubReg(MCRegister Reg, unsigned Idx) const { assert(Idx && Idx < getNumSubRegIndices() && "This is not a subregister index"); // Get a pointer to the corresponding SubRegIndices list. This list has the @@ -40,7 +41,8 @@ unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const { return 0; } -unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const { +unsigned MCRegisterInfo::getSubRegIndex(MCRegister Reg, + MCRegister SubReg) const { assert(SubReg && SubReg < getNumRegs() && "This is not a register"); // Get a pointer to the corresponding SubRegIndices list. This list has the // name of each sub-register in the same order as MCSubRegIterator. @@ -63,7 +65,7 @@ unsigned MCRegisterInfo::getSubRegIdxOffset(unsigned Idx) const { return SubRegIdxRanges[Idx].Offset; } -int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { +int MCRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const { const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs; unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize; @@ -76,29 +78,18 @@ int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return I->ToReg; } -int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { +Optional MCRegisterInfo::getLLVMRegNum(unsigned RegNum, + bool isEH) const { const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs; unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize; if (!M) - return -1; - DwarfLLVMRegPair Key = { RegNum, 0 }; - const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); - assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum"); - return I->ToReg; -} - -int MCRegisterInfo::getLLVMRegNumFromEH(unsigned RegNum) const { - const DwarfLLVMRegPair *M = EHDwarf2LRegs; - unsigned Size = EHDwarf2LRegsSize; - - if (!M) - return -1; + return None; DwarfLLVMRegPair Key = { RegNum, 0 }; const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); - if (I == M+Size || I->FromReg != RegNum) - return -1; - return I->ToReg; + if (I != M + Size && I->FromReg == RegNum) + return I->ToReg; + return None; } int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const { @@ -110,22 +101,21 @@ int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const { // a corresponding LLVM register number at all. So if we can't map the // EH register number to an LLVM register number, assume it's just a // valid DWARF register number as is. - int LRegNum = getLLVMRegNumFromEH(RegNum); - if (LRegNum != -1) - return getDwarfRegNum(LRegNum, false); + if (Optional LRegNum = getLLVMRegNum(RegNum, true)) + return getDwarfRegNum(*LRegNum, false); return RegNum; } -int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const { - const DenseMap::const_iterator I = L2SEHRegs.find(RegNum); +int MCRegisterInfo::getSEHRegNum(MCRegister RegNum) const { + const DenseMap::const_iterator I = L2SEHRegs.find(RegNum); if (I == L2SEHRegs.end()) return (int)RegNum; return I->second; } -int MCRegisterInfo::getCodeViewRegNum(unsigned RegNum) const { +int MCRegisterInfo::getCodeViewRegNum(MCRegister RegNum) const { if (L2CVRegs.empty()) report_fatal_error("target does not implement codeview register mapping"); - const DenseMap::const_iterator I = L2CVRegs.find(RegNum); + const DenseMap::const_iterator I = L2CVRegs.find(RegNum); if (I == L2CVRegs.end()) report_fatal_error("unknown codeview register " + (RegNum < getNumRegs() ? getName(RegNum) diff --git a/lib/MC/MCSectionXCOFF.cpp b/lib/MC/MCSectionXCOFF.cpp index d1a637345024..d52959f15f92 100644 --- a/lib/MC/MCSectionXCOFF.cpp +++ b/lib/MC/MCSectionXCOFF.cpp @@ -15,19 +15,65 @@ using namespace llvm; MCSectionXCOFF::~MCSectionXCOFF() = default; +static StringRef getMappingClassString(XCOFF::StorageMappingClass SMC) { + switch (SMC) { + case XCOFF::XMC_DS: + return "DS"; + case XCOFF::XMC_RW: + return "RW"; + case XCOFF::XMC_PR: + return "PR"; + default: + report_fatal_error("Unhandled storage-mapping class."); + } +} + void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const { if (getKind().isText()) { + if (getMappingClass() != XCOFF::XMC_PR) + report_fatal_error("Unhandled storage-mapping class for .text csect"); + OS << "\t.csect " << getSectionName() << "[" - << "PR" + << getMappingClassString(getMappingClass()) << "]" << '\n'; return; } + if (getKind().isData()) { + switch (getMappingClass()) { + case XCOFF::XMC_RW: + case XCOFF::XMC_DS: + OS << "\t.csect " << getSectionName() << "[" + << getMappingClassString(getMappingClass()) << "]" << '\n'; + break; + case XCOFF::XMC_TC0: + OS << "\t.toc\n"; + break; + default: + report_fatal_error( + "Unhandled storage-mapping class for .data csect."); + } + return; + } + + if (getKind().isBSSLocal() || getKind().isCommon()) { + assert((getMappingClass() == XCOFF::XMC_RW || + getMappingClass() == XCOFF::XMC_BS) && + "Generated a storage-mapping class for a common/bss csect we don't " + "understand how to switch to."); + assert(getCSectType() == XCOFF::XTY_CM && + "wrong csect type for .bss csect"); + // Don't have to print a directive for switching to section for commons. + // '.comm' and '.lcomm' directives for the variable will create the needed + // csect. + return; + } + report_fatal_error("Printing for this SectionKind is unimplemented."); } bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); } -bool MCSectionXCOFF::isVirtualSection() const { return !getKind().isCommon(); } +bool MCSectionXCOFF::isVirtualSection() const { return XCOFF::XTY_CM == Type; } diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index decbb96817e3..b8278cb11079 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" @@ -21,6 +22,8 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSymbol.h" @@ -327,10 +330,56 @@ void MCStreamer::EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId, const MCSymbol *FnStartSym, const MCSymbol *FnEndSym) {} +/// Only call this on endian-specific types like ulittle16_t and little32_t, or +/// structs composed of them. +template +static void copyBytesForDefRange(SmallString<20> &BytePrefix, + codeview::SymbolKind SymKind, + const T &DefRangeHeader) { + BytePrefix.resize(2 + sizeof(T)); + codeview::ulittle16_t SymKindLE = codeview::ulittle16_t(SymKind); + memcpy(&BytePrefix[0], &SymKindLE, 2); + memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T)); +} + void MCStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, StringRef FixedSizePortion) {} +void MCStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterRelHeader DRHdr) { + SmallString<20> BytePrefix; + copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_REGISTER_REL, DRHdr); + EmitCVDefRangeDirective(Ranges, BytePrefix); +} + +void MCStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeSubfieldRegisterHeader DRHdr) { + SmallString<20> BytePrefix; + copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_SUBFIELD_REGISTER, + DRHdr); + EmitCVDefRangeDirective(Ranges, BytePrefix); +} + +void MCStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterHeader DRHdr) { + SmallString<20> BytePrefix; + copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_REGISTER, DRHdr); + EmitCVDefRangeDirective(Ranges, BytePrefix); +} + +void MCStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeFramePointerRelHeader DRHdr) { + SmallString<20> BytePrefix; + copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_FRAMEPOINTER_REL, + DRHdr); + EmitCVDefRangeDirective(Ranges, BytePrefix); +} + void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) { } @@ -631,7 +680,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { MCSymbol *StartProc = EmitCFILabel(); WinFrameInfos.emplace_back( - llvm::make_unique(Symbol, StartProc)); + std::make_unique(Symbol, StartProc)); CurrentWinFrameInfo = WinFrameInfos.back().get(); CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); } @@ -665,7 +714,7 @@ void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) { MCSymbol *StartProc = EmitCFILabel(); - WinFrameInfos.emplace_back(llvm::make_unique( + WinFrameInfos.emplace_back(std::make_unique( CurFrame->Function, StartProc, CurFrame)); CurrentWinFrameInfo = WinFrameInfos.back().get(); CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); @@ -763,18 +812,23 @@ MCSection *MCStreamer::getAssociatedXDataSection(const MCSection *TextSec) { void MCStreamer::EmitSyntaxDirective() {} -void MCStreamer::EmitWinCFIPushReg(unsigned Register, SMLoc Loc) { +static unsigned encodeSEHRegNum(MCContext &Ctx, MCRegister Reg) { + return Ctx.getRegisterInfo()->getSEHRegNum(Reg); +} + +void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; MCSymbol *Label = EmitCFILabel(); - WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol(Label, Register); + WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol( + Label, encodeSEHRegNum(Context, Register)); CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset, +void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -790,8 +844,8 @@ void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset, MCSymbol *Label = EmitCFILabel(); - WinEH::Instruction Inst = - Win64EH::Instruction::SetFPReg(Label, Register, Offset); + WinEH::Instruction Inst = Win64EH::Instruction::SetFPReg( + Label, encodeSEHRegNum(getContext(), Register), Offset); CurFrame->LastFrameInst = CurFrame->Instructions.size(); CurFrame->Instructions.push_back(Inst); } @@ -813,7 +867,7 @@ void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset, +void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -825,12 +879,12 @@ void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset, MCSymbol *Label = EmitCFILabel(); - WinEH::Instruction Inst = - Win64EH::Instruction::SaveNonVol(Label, Register, Offset); + WinEH::Instruction Inst = Win64EH::Instruction::SaveNonVol( + Label, encodeSEHRegNum(Context, Register), Offset); CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset, +void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -840,8 +894,8 @@ void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset, MCSymbol *Label = EmitCFILabel(); - WinEH::Instruction Inst = - Win64EH::Instruction::SaveXMM(Label, Register, Offset); + WinEH::Instruction Inst = Win64EH::Instruction::SaveXMM( + Label, encodeSEHRegNum(Context, Register), Offset); CurFrame->Instructions.push_back(Inst); } @@ -1009,6 +1063,10 @@ void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { void MCStreamer::EmitCOFFSymbolType(int Type) { llvm_unreachable("this directive only supported on COFF targets"); } +void MCStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlign) { + llvm_unreachable("this directive only supported on XCOFF targets"); +} void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} void MCStreamer::emitELFSymverDirective(StringRef AliasName, const MCSymbol *Aliasee) {} diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 5fd48d9e1010..c8678df02bfd 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -315,3 +315,28 @@ void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const { InstrItins = InstrItineraryData(getSchedModel(), Stages, OperandCycles, ForwardingPaths); } + +Optional MCSubtargetInfo::getCacheSize(unsigned Level) const { + return Optional(); +} + +Optional +MCSubtargetInfo::getCacheAssociativity(unsigned Level) const { + return Optional(); +} + +Optional MCSubtargetInfo::getCacheLineSize(unsigned Level) const { + return Optional(); +} + +unsigned MCSubtargetInfo::getPrefetchDistance() const { + return 0; +} + +unsigned MCSubtargetInfo::getMaxPrefetchIterationsAhead() const { + return UINT_MAX; +} + +unsigned MCSubtargetInfo::getMinPrefetchStride() const { + return 1; +} diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp index e46257823e34..1ccb3a58d5c1 100644 --- a/lib/MC/MCWasmObjectTargetWriter.cpp +++ b/lib/MC/MCWasmObjectTargetWriter.cpp @@ -10,8 +10,9 @@ using namespace llvm; -MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit) - : Is64Bit(Is64Bit) {} +MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit, + bool IsEmscripten) + : Is64Bit(Is64Bit), IsEmscripten(IsEmscripten) {} // Pin the vtable to this object file MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default; diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp index 86fa72197855..e7e96ecbb3a0 100644 --- a/lib/MC/MCWasmStreamer.cpp +++ b/lib/MC/MCWasmStreamer.cpp @@ -122,7 +122,7 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { break; case MCSA_NoDeadStrip: - Symbol->setExported(); + Symbol->setNoStrip(); break; default: diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp index 04d5f100a2ff..c5a21312140b 100644 --- a/lib/MC/MCWinCOFFStreamer.cpp +++ b/lib/MC/MCWinCOFFStreamer.cpp @@ -88,7 +88,19 @@ void MCWinCOFFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc) { } void MCWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { - llvm_unreachable("not implemented"); + // Let the target do whatever target specific stuff it needs to do. + getAssembler().getBackend().handleAssemblerFlag(Flag); + + switch (Flag) { + // None of these require COFF specific handling. + case MCAF_SyntaxUnified: + case MCAF_Code16: + case MCAF_Code32: + case MCAF_Code64: + break; + case MCAF_SubsectionsViaSymbols: + llvm_unreachable("COFF doesn't support .subsections_via_symbols"); + } } void MCWinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) { @@ -180,7 +192,7 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { MCSection *SXData = getContext().getObjectFileInfo()->getSXDataSection(); getAssembler().registerSection(*SXData); if (SXData->getAlignment() < 4) - SXData->setAlignment(4); + SXData->setAlignment(Align(4)); new MCSymbolIdFragment(Symbol, SXData); @@ -197,7 +209,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { MCSection *Sec = getCurrentSectionOnly(); getAssembler().registerSection(*Sec); if (Sec->getAlignment() < 4) - Sec->setAlignment(4); + Sec->setAlignment(Align(4)); new MCSymbolIdFragment(Symbol, getCurrentSectionOnly()); diff --git a/lib/MC/MCXCOFFStreamer.cpp b/lib/MC/MCXCOFFStreamer.cpp index 071de024a3fa..50937d6adc0c 100644 --- a/lib/MC/MCXCOFFStreamer.cpp +++ b/lib/MC/MCXCOFFStreamer.cpp @@ -10,10 +10,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCXCOFFStreamer.h" +#include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbolXCOFF.h" +#include "llvm/MC/MCXCOFFStreamer.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -25,14 +27,38 @@ MCXCOFFStreamer::MCXCOFFStreamer(MCContext &Context, : MCObjectStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)) {} -bool MCXCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, +bool MCXCOFFStreamer::EmitSymbolAttribute(MCSymbol *Sym, MCSymbolAttr Attribute) { - report_fatal_error("Symbol attributes not implemented for XCOFF."); + auto *Symbol = cast(Sym); + getAssembler().registerSymbol(*Symbol); + + switch (Attribute) { + case MCSA_Global: + Symbol->setStorageClass(XCOFF::C_EXT); + Symbol->setExternal(true); + break; + default: + report_fatal_error("Not implemented yet."); + } + return true; } void MCXCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { - report_fatal_error("Emiting common symbols not implemented for XCOFF."); + getAssembler().registerSymbol(*Symbol); + Symbol->setExternal(cast(Symbol)->getStorageClass() != + XCOFF::C_HIDEXT); + Symbol->setCommon(Size, ByteAlignment); + + // Need to add this symbol to the current Fragment which will belong to the + // containing CSECT. + auto *F = dyn_cast_or_null(getCurrentFragment()); + assert(F && "Expected a valid section with a fragment set."); + Symbol->setFragment(F); + + // Emit the alignment and storage for the variable to the section. + EmitValueToAlignment(ByteAlignment); + EmitZeros(Size); } void MCXCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, @@ -42,8 +68,18 @@ void MCXCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, } void MCXCOFFStreamer::EmitInstToData(const MCInst &Inst, - const MCSubtargetInfo &) { - report_fatal_error("Instruction emission not implemented for XCOFF."); + const MCSubtargetInfo &STI) { + MCAssembler &Assembler = getAssembler(); + SmallVector Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI); + + // TODO: Handle Fixups later + + MCDataFragment *DF = getOrCreateDataFragment(&STI); + DF->setHasInstructions(STI); + DF->getContents().append(Code.begin(), Code.end()); } MCStreamer *llvm::createXCOFFStreamer(MCContext &Context, @@ -57,3 +93,9 @@ MCStreamer *llvm::createXCOFFStreamer(MCContext &Context, S->getAssembler().setRelaxAll(true); return S; } + +void MCXCOFFStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, + uint64_t Size, + unsigned ByteAlignment) { + EmitCommonSymbol(Symbol, Size, ByteAlignment); +} diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index f0ceb86b25af..9f6af981aca1 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -25,6 +25,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -126,7 +127,7 @@ uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec, const MCSection &NextSec = *Layout.getSectionOrder()[Next]; if (NextSec.isVirtualSection()) return 0; - return OffsetToAlignment(EndAddr, NextSec.getAlignment()); + return offsetToAlignment(EndAddr, Align(NextSec.getAlignment())); } void MachObjectWriter::writeHeader(MachO::HeaderFileType Type, @@ -444,7 +445,8 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand( } // Pad to a multiple of the pointer size. - W.OS.write_zeros(OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); + W.OS.write_zeros( + offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4))); assert(W.OS.tell() - Start == Size); } @@ -832,7 +834,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, // The section data is padded to 4 bytes. // // FIXME: Is this machine dependent? - unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); + unsigned SectionDataPadding = + offsetToAlignment(SectionDataFileSize, Align(4)); SectionDataFileSize += SectionDataPadding; // Write the prolog, starting with the header and load command... @@ -997,7 +1000,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, #endif Asm.getLOHContainer().emit(*this, Layout); // Pad to a multiple of the pointer size. - W.OS.write_zeros(OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); + W.OS.write_zeros( + offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4))); assert(W.OS.tell() - Start == LOHSize); } @@ -1043,6 +1047,6 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, std::unique_ptr llvm::createMachObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) { - return llvm::make_unique(std::move(MOTW), OS, + return std::make_unique(std::move(MOTW), OS, IsLittleEndian); } diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index cb3db8e2268c..c9c88ec58432 100644 --- a/lib/MC/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -38,6 +38,7 @@ void StringTableBuilder::initSize() { // Start the table with a NUL byte. Size = 1; break; + case XCOFF: case WinCOFF: // Make room to write the table size later. Size = 4; @@ -67,9 +68,12 @@ void StringTableBuilder::write(uint8_t *Buf) const { if (!Data.empty()) memcpy(Buf + P.second, Data.data(), Data.size()); } - if (K != WinCOFF) - return; - support::endian::write32le(Buf, Size); + // The COFF formats store the size of the string table in the first 4 bytes. + // For Windows, the format is little-endian; for AIX, it is big-endian. + if (K == WinCOFF) + support::endian::write32le(Buf, Size); + else if (K == XCOFF) + support::endian::write32be(Buf, Size); } // Returns the character at Pos from end of a string. diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 098343cd0107..c1ff3cc2480c 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -258,6 +258,7 @@ class WasmObjectWriter : public MCObjectWriter { // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } + bool isEmscripten() const { return TargetObjectWriter->isEmscripten(); } void startSection(SectionBookkeeping &Section, unsigned SectionId); void startCustomSection(SectionBookkeeping &Section, StringRef Name); @@ -426,9 +427,10 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - MCAsmBackend &Backend = Asm.getBackend(); - bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & - MCFixupKindInfo::FKF_IsPCRel; + // The WebAssembly backend should never generate FKF_IsPCRel fixups + assert(!(Asm.getBackend().getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel)); + const auto &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -439,51 +441,22 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, return; if (const MCSymbolRefExpr *RefB = Target.getSymB()) { - assert(RefB->getKind() == MCSymbolRefExpr::VK_None && - "Should not have constructed this"); - - // Let A, B and C being the components of Target and R be the location of - // the fixup. If the fixup is not pcrel, we want to compute (A - B + C). - // If it is pcrel, we want to compute (A - B + C - R). - - // In general, Wasm has no relocations for -B. It can only represent (A + C) - // or (A + C - R). If B = R + K and the relocation is not pcrel, we can - // replace B to implement it: (A - R - K + C) - if (IsPCRel) { - Ctx.reportError( - Fixup.getLoc(), - "No relocation available to represent this relative expression"); - return; - } - + // To get here the A - B expression must have failed evaluateAsRelocatable. + // This means either A or B must be undefined and in WebAssembly we can't + // support either of those cases. const auto &SymB = cast(RefB->getSymbol()); - - if (SymB.isUndefined()) { - Ctx.reportError(Fixup.getLoc(), - Twine("symbol '") + SymB.getName() + - "' can not be undefined in a subtraction expression"); - return; - } - - assert(!SymB.isAbsolute() && "Should have been folded"); - const MCSection &SecB = SymB.getSection(); - if (&SecB != &FixupSection) { - Ctx.reportError(Fixup.getLoc(), - "Cannot represent a difference across sections"); - return; - } - - uint64_t SymBOffset = Layout.getSymbolOffset(SymB); - uint64_t K = SymBOffset - FixupOffset; - IsPCRel = true; - C -= K; + Ctx.reportError( + Fixup.getLoc(), + Twine("symbol '") + SymB.getName() + + "': unsupported subtraction expression used in relocation."); + return; } // We either rejected the fixup or folded B into C at this point. const MCSymbolRefExpr *RefA = Target.getSymA(); - const auto *SymA = RefA ? cast(&RefA->getSymbol()) : nullptr; + const auto *SymA = cast(&RefA->getSymbol()); - if (SymA && SymA->isVariable()) { + if (SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); const auto *Inner = cast(Expr); if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) @@ -496,8 +469,6 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, FixedValue = 0; unsigned Type = TargetObjectWriter->getRelocType(Target, Fixup); - assert(!IsPCRel); - assert(SymA); // Absolute offset within a section or a function. // Currently only supported for for metadata sections. @@ -1296,12 +1267,12 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, // Separate out the producers and target features sections if (Name == "producers") { - ProducersSection = llvm::make_unique(Name, &Section); + ProducersSection = std::make_unique(Name, &Section); continue; } if (Name == "target_features") { TargetFeaturesSection = - llvm::make_unique(Name, &Section); + std::make_unique(Name, &Section); continue; } @@ -1379,7 +1350,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, report_fatal_error(".size expression must be evaluatable"); auto &DataSection = static_cast(WS.getSection()); - assert(DataSection.isWasmData()); + if (!DataSection.isWasmData()) + report_fatal_error("data symbols must live in a data section: " + + WS.getName()); // For each data symbol, export it in the symtab as a reference to the // corresponding Wasm data segment. @@ -1473,8 +1446,12 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL; if (WS.isUndefined()) Flags |= wasm::WASM_SYMBOL_UNDEFINED; - if (WS.isExported()) - Flags |= wasm::WASM_SYMBOL_EXPORTED; + if (WS.isNoStrip()) { + Flags |= wasm::WASM_SYMBOL_NO_STRIP; + if (isEmscripten()) { + Flags |= wasm::WASM_SYMBOL_EXPORTED; + } + } if (WS.getName() != WS.getImportName()) Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME; @@ -1618,5 +1595,5 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, std::unique_ptr llvm::createWasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) { - return llvm::make_unique(std::move(MOTW), OS); + return std::make_unique(std::move(MOTW), OS); } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 0e6c05bc726d..749ed8badfaa 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -31,10 +31,10 @@ #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/JamCRC.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -239,7 +239,7 @@ WinCOFFObjectWriter::WinCOFFObjectWriter( } COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) { - Symbols.push_back(make_unique(Name)); + Symbols.push_back(std::make_unique(Name)); return Symbols.back().get(); } @@ -251,7 +251,7 @@ COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol *Symbol) { } COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) { - Sections.emplace_back(make_unique(Name)); + Sections.emplace_back(std::make_unique(Name)); return Sections.back().get(); } @@ -605,7 +605,7 @@ uint32_t WinCOFFObjectWriter::writeSectionContents(MCAssembler &Asm, // Calculate our CRC with an initial value of '0', this is not how // JamCRC is specified but it aligns with the expected output. JamCRC JC(/*Init=*/0); - JC.update(Buf); + JC.update(makeArrayRef(reinterpret_cast(Buf.data()), Buf.size())); return JC.getCRC(); } @@ -1098,5 +1098,5 @@ void MCWinCOFFObjectTargetWriter::anchor() {} std::unique_ptr llvm::createWinCOFFObjectWriter( std::unique_ptr MOTW, raw_pwrite_stream &OS) { - return llvm::make_unique(std::move(MOTW), OS); + return std::make_unique(std::move(MOTW), OS); } diff --git a/lib/MC/XCOFFObjectWriter.cpp b/lib/MC/XCOFFObjectWriter.cpp index 9b9a7b6c118c..353c21068735 100644 --- a/lib/MC/XCOFFObjectWriter.cpp +++ b/lib/MC/XCOFFObjectWriter.cpp @@ -10,18 +10,135 @@ // //===----------------------------------------------------------------------===// +#include "llvm/BinaryFormat/XCOFF.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionXCOFF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCXCOFFObjectWriter.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" + +#include using namespace llvm; +// An XCOFF object file has a limited set of predefined sections. The most +// important ones for us (right now) are: +// .text --> contains program code and read-only data. +// .data --> contains initialized data, function descriptors, and the TOC. +// .bss --> contains uninitialized data. +// Each of these sections is composed of 'Control Sections'. A Control Section +// is more commonly referred to as a csect. A csect is an indivisible unit of +// code or data, and acts as a container for symbols. A csect is mapped +// into a section based on its storage-mapping class, with the exception of +// XMC_RW which gets mapped to either .data or .bss based on whether it's +// explicitly initialized or not. +// +// We don't represent the sections in the MC layer as there is nothing +// interesting about them at at that level: they carry information that is +// only relevant to the ObjectWriter, so we materialize them in this class. namespace { +constexpr unsigned DefaultSectionAlign = 4; + +// Packs the csect's alignment and type into a byte. +uint8_t getEncodedType(const MCSectionXCOFF *); + +// Wrapper around an MCSymbolXCOFF. +struct Symbol { + const MCSymbolXCOFF *const MCSym; + uint32_t SymbolTableIndex; + + XCOFF::StorageClass getStorageClass() const { + return MCSym->getStorageClass(); + } + StringRef getName() const { return MCSym->getName(); } + Symbol(const MCSymbolXCOFF *MCSym) : MCSym(MCSym), SymbolTableIndex(-1) {} +}; + +// Wrapper for an MCSectionXCOFF. +struct ControlSection { + const MCSectionXCOFF *const MCCsect; + uint32_t SymbolTableIndex; + uint32_t Address; + uint32_t Size; + + SmallVector Syms; + StringRef getName() const { return MCCsect->getSectionName(); } + ControlSection(const MCSectionXCOFF *MCSec) + : MCCsect(MCSec), SymbolTableIndex(-1), Address(-1), Size(0) {} +}; + +// Represents the data related to a section excluding the csects that make up +// the raw data of the section. The csects are stored separately as not all +// sections contain csects, and some sections contain csects which are better +// stored separately, e.g. the .data section containing read-write, descriptor, +// TOCBase and TOC-entry csects. +struct Section { + char Name[XCOFF::NameSize]; + // The physical/virtual address of the section. For an object file + // these values are equivalent. + uint32_t Address; + uint32_t Size; + uint32_t FileOffsetToData; + uint32_t FileOffsetToRelocations; + uint32_t RelocationCount; + int32_t Flags; + + int16_t Index; + + // Virtual sections do not need storage allocated in the object file. + const bool IsVirtual; + + void reset() { + Address = 0; + Size = 0; + FileOffsetToData = 0; + FileOffsetToRelocations = 0; + RelocationCount = 0; + Index = -1; + } + + Section(const char *N, XCOFF::SectionTypeFlags Flags, bool IsVirtual) + : Address(0), Size(0), FileOffsetToData(0), FileOffsetToRelocations(0), + RelocationCount(0), Flags(Flags), Index(-1), IsVirtual(IsVirtual) { + strncpy(Name, N, XCOFF::NameSize); + } +}; + class XCOFFObjectWriter : public MCObjectWriter { + // Type to be used for a container representing a set of csects with + // (approximately) the same storage mapping class. For example all the csects + // with a storage mapping class of `xmc_pr` will get placed into the same + // container. + using CsectGroup = std::deque; + support::endian::Writer W; std::unique_ptr TargetObjectWriter; + StringTableBuilder Strings; + + // The non-empty sections, in the order they will appear in the section header + // table. + std::vector
Sections; + + // The Predefined sections. + Section Text; + Section BSS; + + // CsectGroups. These store the csects which make up different parts of + // the sections. Should have one for each set of csects that get mapped into + // the same section and get handled in a 'similar' way. + CsectGroup ProgramCodeCsects; + CsectGroup BSSCsects; + + uint32_t SymbolTableEntryCount = 0; + uint32_t SymbolTableOffset = 0; + + virtual void reset() override; void executePostLayoutBinding(MCAssembler &, const MCAsmLayout &) override; @@ -30,6 +147,40 @@ class XCOFFObjectWriter : public MCObjectWriter { uint64_t writeObject(MCAssembler &, const MCAsmLayout &) override; + static bool nameShouldBeInStringTable(const StringRef &); + void writeSymbolName(const StringRef &); + void writeSymbolTableEntryForCsectMemberLabel(const Symbol &, + const ControlSection &, int16_t, + uint64_t); + void writeSymbolTableEntryForControlSection(const ControlSection &, int16_t, + XCOFF::StorageClass); + void writeFileHeader(); + void writeSectionHeaderTable(); + void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout); + void writeSymbolTable(const MCAsmLayout &Layout); + + // Called after all the csects and symbols have been processed by + // `executePostLayoutBinding`, this function handles building up the majority + // of the structures in the object file representation. Namely: + // *) Calculates physical/virtual addresses, raw-pointer offsets, and section + // sizes. + // *) Assigns symbol table indices. + // *) Builds up the section header table by adding any non-empty sections to + // `Sections`. + void assignAddressesAndIndices(const MCAsmLayout &); + + bool + needsAuxiliaryHeader() const { /* TODO aux header support not implemented. */ + return false; + } + + // Returns the size of the auxiliary header to be written to the object file. + size_t auxiliaryHeaderSize() const { + assert(!needsAuxiliaryHeader() && + "Auxiliary header support not implemented."); + return 0; + } + public: XCOFFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS); @@ -37,11 +188,100 @@ public: XCOFFObjectWriter::XCOFFObjectWriter( std::unique_ptr MOTW, raw_pwrite_stream &OS) - : W(OS, support::big), TargetObjectWriter(std::move(MOTW)) {} + : W(OS, support::big), TargetObjectWriter(std::move(MOTW)), + Strings(StringTableBuilder::XCOFF), + Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false), + BSS(".bss", XCOFF::STYP_BSS, /* IsVirtual */ true) {} + +void XCOFFObjectWriter::reset() { + // Reset any sections we have written to, and empty the section header table. + for (auto *Sec : Sections) + Sec->reset(); + Sections.clear(); + + // Clear any csects we have stored. + ProgramCodeCsects.clear(); + BSSCsects.clear(); + + // Reset the symbol table and string table. + SymbolTableEntryCount = 0; + SymbolTableOffset = 0; + Strings.clear(); + + MCObjectWriter::reset(); +} + +void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout) { + if (TargetObjectWriter->is64Bit()) + report_fatal_error("64-bit XCOFF object files are not supported yet."); + + // Maps the MC Section representation to its corresponding ControlSection + // wrapper. Needed for finding the ControlSection to insert an MCSymbol into + // from its containing MCSectionXCOFF. + DenseMap WrapperMap; + + for (const auto &S : Asm) { + const auto *MCSec = cast(&S); + assert(WrapperMap.find(MCSec) == WrapperMap.end() && + "Cannot add a csect twice."); -void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &, - const MCAsmLayout &) { - // TODO Implement once we have sections and symbols to handle. + // If the name does not fit in the storage provided in the symbol table + // entry, add it to the string table. + if (nameShouldBeInStringTable(MCSec->getSectionName())) + Strings.add(MCSec->getSectionName()); + + switch (MCSec->getMappingClass()) { + case XCOFF::XMC_PR: + assert(XCOFF::XTY_SD == MCSec->getCSectType() && + "Only an initialized csect can contain program code."); + ProgramCodeCsects.emplace_back(MCSec); + WrapperMap[MCSec] = &ProgramCodeCsects.back(); + break; + case XCOFF::XMC_RW: + if (XCOFF::XTY_CM == MCSec->getCSectType()) { + BSSCsects.emplace_back(MCSec); + WrapperMap[MCSec] = &BSSCsects.back(); + break; + } + report_fatal_error("Unhandled mapping of read-write csect to section."); + case XCOFF::XMC_TC0: + // TODO FIXME Handle emiting the TOC base. + break; + case XCOFF::XMC_BS: + assert(XCOFF::XTY_CM == MCSec->getCSectType() && + "Mapping invalid csect. CSECT with bss storage class must be " + "common type."); + BSSCsects.emplace_back(MCSec); + WrapperMap[MCSec] = &BSSCsects.back(); + break; + default: + report_fatal_error("Unhandled mapping of csect to section."); + } + } + + for (const MCSymbol &S : Asm.symbols()) { + // Nothing to do for temporary symbols. + if (S.isTemporary()) + continue; + const MCSymbolXCOFF *XSym = cast(&S); + + // Map the symbol into its containing csect. + const MCSectionXCOFF *ContainingCsect = XSym->getContainingCsect(); + assert(WrapperMap.find(ContainingCsect) != WrapperMap.end() && + "Expected containing csect to exist in map"); + + // Lookup the containing csect and add the symbol to it. + WrapperMap[ContainingCsect]->Syms.emplace_back(XSym); + + // If the name does not fit in the storage provided in the symbol table + // entry, add it to the string table. + if (nameShouldBeInStringTable(XSym->getName())) + Strings.add(XSym->getName()); + } + + Strings.finalize(); + assignAddressesAndIndices(Layout); } void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &, @@ -50,7 +290,29 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &, report_fatal_error("XCOFF relocations not supported."); } -uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) { +void XCOFFObjectWriter::writeSections(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + // Write the program code control sections one at a time. + uint32_t CurrentAddressLocation = Text.Address; + for (const auto &Csect : ProgramCodeCsects) { + if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation) + W.OS.write_zeros(PaddingSize); + Asm.writeSectionData(W.OS, Csect.MCCsect, Layout); + CurrentAddressLocation = Csect.Address + Csect.Size; + } + + if (Text.Index != -1) { + // The size of the tail padding in a section is the end virtual address of + // the current section minus the the end virtual address of the last csect + // in that section. + if (uint32_t PaddingSize = + Text.Address + Text.Size - CurrentAddressLocation) + W.OS.write_zeros(PaddingSize); + } +} + +uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, + const MCAsmLayout &Layout) { // We always emit a timestamp of 0 for reproducibility, so ensure incremental // linking is not enabled, in case, like with Windows COFF, such a timestamp // is incompatible with incremental linking of XCOFF. @@ -62,27 +324,274 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) { uint64_t StartOffset = W.OS.tell(); - // TODO FIXME Assign section numbers/finalize sections. + writeFileHeader(); + writeSectionHeaderTable(); + writeSections(Asm, Layout); + // TODO writeRelocations(); - // TODO FIXME Finalize symbols. + writeSymbolTable(Layout); + // Write the string table. + Strings.write(W.OS); + + return W.OS.tell() - StartOffset; +} +bool XCOFFObjectWriter::nameShouldBeInStringTable(const StringRef &SymbolName) { + return SymbolName.size() > XCOFF::NameSize; +} + +void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) { + if (nameShouldBeInStringTable(SymbolName)) { + W.write(0); + W.write(Strings.getOffset(SymbolName)); + } else { + char Name[XCOFF::NameSize]; + std::strncpy(Name, SymbolName.data(), XCOFF::NameSize); + ArrayRef NameRef(Name, XCOFF::NameSize); + W.write(NameRef); + } +} + +void XCOFFObjectWriter::writeSymbolTableEntryForCsectMemberLabel( + const Symbol &SymbolRef, const ControlSection &CSectionRef, + int16_t SectionIndex, uint64_t SymbolOffset) { + // Name or Zeros and string table offset + writeSymbolName(SymbolRef.getName()); + assert(SymbolOffset <= UINT32_MAX - CSectionRef.Address && + "Symbol address overflows."); + W.write(CSectionRef.Address + SymbolOffset); + W.write(SectionIndex); + // Basic/Derived type. See the description of the n_type field for symbol + // table entries for a detailed description. Since we don't yet support + // visibility, and all other bits are either optionally set or reserved, this + // is always zero. + // TODO FIXME How to assert a symbol's visibilty is default? + // TODO Set the function indicator (bit 10, 0x0020) for functions + // when debugging is enabled. + W.write(0); + W.write(SymbolRef.getStorageClass()); + // Always 1 aux entry for now. + W.write(1); + + // Now output the auxiliary entry. + W.write(CSectionRef.SymbolTableIndex); + // Parameter typecheck hash. Not supported. + W.write(0); + // Typecheck section number. Not supported. + W.write(0); + // Symbol type: Label + W.write(XCOFF::XTY_LD); + // Storage mapping class. + W.write(CSectionRef.MCCsect->getMappingClass()); + // Reserved (x_stab). + W.write(0); + // Reserved (x_snstab). + W.write(0); +} + +void XCOFFObjectWriter::writeSymbolTableEntryForControlSection( + const ControlSection &CSectionRef, int16_t SectionIndex, + XCOFF::StorageClass StorageClass) { + // n_name, n_zeros, n_offset + writeSymbolName(CSectionRef.getName()); + // n_value + W.write(CSectionRef.Address); + // n_scnum + W.write(SectionIndex); + // Basic/Derived type. See the description of the n_type field for symbol + // table entries for a detailed description. Since we don't yet support + // visibility, and all other bits are either optionally set or reserved, this + // is always zero. + // TODO FIXME How to assert a symbol's visibilty is default? + // TODO Set the function indicator (bit 10, 0x0020) for functions + // when debugging is enabled. + W.write(0); + // n_sclass + W.write(StorageClass); + // Always 1 aux entry for now. + W.write(1); + + // Now output the auxiliary entry. + W.write(CSectionRef.Size); + // Parameter typecheck hash. Not supported. + W.write(0); + // Typecheck section number. Not supported. + W.write(0); + // Symbol type. + W.write(getEncodedType(CSectionRef.MCCsect)); + // Storage mapping class. + W.write(CSectionRef.MCCsect->getMappingClass()); + // Reserved (x_stab). + W.write(0); + // Reserved (x_snstab). + W.write(0); +} + +void XCOFFObjectWriter::writeFileHeader() { // Magic. W.write(0x01df); // Number of sections. - W.write(0); + W.write(Sections.size()); // Timestamp field. For reproducible output we write a 0, which represents no // timestamp. W.write(0); // Byte Offset to the start of the symbol table. - W.write(0); + W.write(SymbolTableOffset); // Number of entries in the symbol table. - W.write(0); + W.write(SymbolTableEntryCount); // Size of the optional header. W.write(0); // Flags. W.write(0); +} - return W.OS.tell() - StartOffset; +void XCOFFObjectWriter::writeSectionHeaderTable() { + for (const auto *Sec : Sections) { + // Write Name. + ArrayRef NameRef(Sec->Name, XCOFF::NameSize); + W.write(NameRef); + + // Write the Physical Address and Virtual Address. In an object file these + // are the same. + W.write(Sec->Address); + W.write(Sec->Address); + + W.write(Sec->Size); + W.write(Sec->FileOffsetToData); + + // Relocation pointer and Lineno pointer. Not supported yet. + W.write(0); + W.write(0); + + // Relocation and line-number counts. Not supported yet. + W.write(0); + W.write(0); + + W.write(Sec->Flags); + } +} + +void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) { + // Print out symbol table for the program code. + for (const auto &Csect : ProgramCodeCsects) { + // Write out the control section first and then each symbol in it. + writeSymbolTableEntryForControlSection(Csect, Text.Index, + Csect.MCCsect->getStorageClass()); + for (const auto &Sym : Csect.Syms) + writeSymbolTableEntryForCsectMemberLabel( + Sym, Csect, Text.Index, Layout.getSymbolOffset(*Sym.MCSym)); + } + + // The BSS Section is special in that the csects must contain a single symbol, + // and the contained symbol cannot be represented in the symbol table as a + // label definition. + for (auto &Csect : BSSCsects) { + assert(Csect.Syms.size() == 1 && + "Uninitialized csect cannot contain more then 1 symbol."); + Symbol &Sym = Csect.Syms.back(); + writeSymbolTableEntryForControlSection(Csect, BSS.Index, + Sym.getStorageClass()); + } +} + +void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { + // The address corrresponds to the address of sections and symbols in the + // object file. We place the shared address 0 immediately after the + // section header table. + uint32_t Address = 0; + // Section indices are 1-based in XCOFF. + int16_t SectionIndex = 1; + // The first symbol table entry is for the file name. We are not emitting it + // yet, so start at index 0. + uint32_t SymbolTableIndex = 0; + + // Text section comes first. + if (!ProgramCodeCsects.empty()) { + Sections.push_back(&Text); + Text.Index = SectionIndex++; + for (auto &Csect : ProgramCodeCsects) { + const MCSectionXCOFF *MCSec = Csect.MCCsect; + Csect.Address = alignTo(Address, MCSec->getAlignment()); + Csect.Size = Layout.getSectionAddressSize(MCSec); + Address = Csect.Address + Csect.Size; + Csect.SymbolTableIndex = SymbolTableIndex; + // 1 main and 1 auxiliary symbol table entry for the csect. + SymbolTableIndex += 2; + for (auto &Sym : Csect.Syms) { + Sym.SymbolTableIndex = SymbolTableIndex; + // 1 main and 1 auxiliary symbol table entry for each contained symbol + SymbolTableIndex += 2; + } + } + Address = alignTo(Address, DefaultSectionAlign); + + // The first csect of a section can be aligned by adjusting the virtual + // address of its containing section instead of writing zeroes into the + // object file. + Text.Address = ProgramCodeCsects.front().Address; + + Text.Size = Address - Text.Address; + } + + // Data section Second. TODO + + // BSS Section third. + if (!BSSCsects.empty()) { + Sections.push_back(&BSS); + BSS.Index = SectionIndex++; + for (auto &Csect : BSSCsects) { + const MCSectionXCOFF *MCSec = Csect.MCCsect; + Csect.Address = alignTo(Address, MCSec->getAlignment()); + Csect.Size = Layout.getSectionAddressSize(MCSec); + Address = Csect.Address + Csect.Size; + Csect.SymbolTableIndex = SymbolTableIndex; + // 1 main and 1 auxiliary symbol table entry for the csect. + SymbolTableIndex += 2; + + assert(Csect.Syms.size() == 1 && + "csect in the BSS can only contain a single symbol."); + Csect.Syms[0].SymbolTableIndex = Csect.SymbolTableIndex; + } + // Pad out Address to the default alignment. This is to match how the system + // assembler handles the .bss section. Its size is always a multiple of 4. + Address = alignTo(Address, DefaultSectionAlign); + + BSS.Address = BSSCsects.front().Address; + BSS.Size = Address - BSS.Address; + } + + SymbolTableEntryCount = SymbolTableIndex; + + // Calculate the RawPointer value for each section. + uint64_t RawPointer = sizeof(XCOFF::FileHeader32) + auxiliaryHeaderSize() + + Sections.size() * sizeof(XCOFF::SectionHeader32); + for (auto *Sec : Sections) { + if (!Sec->IsVirtual) { + Sec->FileOffsetToData = RawPointer; + RawPointer += Sec->Size; + } + } + + // TODO Add in Relocation storage to the RawPointer Calculation. + // TODO What to align the SymbolTable to? + // TODO Error check that the number of symbol table entries fits in 32-bits + // signed ... + if (SymbolTableEntryCount) + SymbolTableOffset = RawPointer; +} + +// Takes the log base 2 of the alignment and shifts the result into the 5 most +// significant bits of a byte, then or's in the csect type into the least +// significant 3 bits. +uint8_t getEncodedType(const MCSectionXCOFF *Sec) { + unsigned Align = Sec->getAlignment(); + assert(isPowerOf2_32(Align) && "Alignment must be a power of 2."); + unsigned Log2Align = Log2_32(Align); + // Result is a number in the range [0, 31] which fits in the 5 least + // significant bits. Shift this value into the 5 most significant bits, and + // bitwise-or in the csect type. + uint8_t EncodedAlign = Log2Align << 3; + return EncodedAlign | Sec->getCSectType(); } } // end anonymous namespace @@ -90,5 +599,5 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) { std::unique_ptr llvm::createXCOFFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) { - return llvm::make_unique(std::move(MOTW), OS); + return std::make_unique(std::move(MOTW), OS); } diff --git a/lib/MCA/CodeEmitter.cpp b/lib/MCA/CodeEmitter.cpp new file mode 100644 index 000000000000..294107219cb0 --- /dev/null +++ b/lib/MCA/CodeEmitter.cpp @@ -0,0 +1,37 @@ +//===--------------------- CodeEmitter.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CodeEmitter API. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/CodeEmitter.h" + +namespace llvm { +namespace mca { + +CodeEmitter::EncodingInfo +CodeEmitter::getOrCreateEncodingInfo(unsigned MCID) { + EncodingInfo &EI = Encodings[MCID]; + if (EI.second) + return EI; + + SmallVector Fixups; + const MCInst &Inst = Sequence[MCID]; + MCInst Relaxed(Sequence[MCID]); + if (MAB.mayNeedRelaxation(Inst, STI)) + MAB.relaxInstruction(Inst, STI, Relaxed); + + EI.first = Code.size(); + MCE.encodeInstruction(Relaxed, VecOS, Fixups, STI); + EI.second = Code.size() - EI.first; + return EI; +} + +} // namespace mca +} // namespace llvm diff --git a/lib/MCA/Context.cpp b/lib/MCA/Context.cpp index f0e8dfab8680..0160e1f9f787 100644 --- a/lib/MCA/Context.cpp +++ b/lib/MCA/Context.cpp @@ -28,24 +28,23 @@ namespace llvm { namespace mca { std::unique_ptr -Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, - SourceMgr &SrcMgr) { +Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) { const MCSchedModel &SM = STI.getSchedModel(); // Create the hardware units defining the backend. - auto RCU = llvm::make_unique(SM); - auto PRF = llvm::make_unique(SM, MRI, Opts.RegisterFileSize); - auto LSU = llvm::make_unique(SM, Opts.LoadQueueSize, + auto RCU = std::make_unique(SM); + auto PRF = std::make_unique(SM, MRI, Opts.RegisterFileSize); + auto LSU = std::make_unique(SM, Opts.LoadQueueSize, Opts.StoreQueueSize, Opts.AssumeNoAlias); - auto HWS = llvm::make_unique(SM, *LSU); + auto HWS = std::make_unique(SM, *LSU); // Create the pipeline stages. - auto Fetch = llvm::make_unique(SrcMgr); - auto Dispatch = llvm::make_unique(STI, MRI, Opts.DispatchWidth, + auto Fetch = std::make_unique(SrcMgr); + auto Dispatch = std::make_unique(STI, MRI, Opts.DispatchWidth, *RCU, *PRF); auto Execute = - llvm::make_unique(*HWS, Opts.EnableBottleneckAnalysis); - auto Retire = llvm::make_unique(*RCU, *PRF); + std::make_unique(*HWS, Opts.EnableBottleneckAnalysis); + auto Retire = std::make_unique(*RCU, *PRF, *LSU); // Pass the ownership of all the hardware units to this Context. addHardwareUnit(std::move(RCU)); @@ -54,10 +53,10 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, addHardwareUnit(std::move(HWS)); // Build the pipeline. - auto StagePipeline = llvm::make_unique(); + auto StagePipeline = std::make_unique(); StagePipeline->appendStage(std::move(Fetch)); if (Opts.MicroOpQueueSize) - StagePipeline->appendStage(llvm::make_unique( + StagePipeline->appendStage(std::make_unique( Opts.MicroOpQueueSize, Opts.DecodersThroughput)); StagePipeline->appendStage(std::move(Dispatch)); StagePipeline->appendStage(std::move(Execute)); diff --git a/lib/MCA/HardwareUnits/LSUnit.cpp b/lib/MCA/HardwareUnits/LSUnit.cpp index ac1a6a36547b..0ee084c7ce1a 100644 --- a/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/lib/MCA/HardwareUnits/LSUnit.cpp @@ -29,12 +29,12 @@ LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ, const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); if (!LQSize && EPI.LoadQueueID) { const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID); - LQSize = LdQDesc.BufferSize; + LQSize = std::max(0, LdQDesc.BufferSize); } if (!SQSize && EPI.StoreQueueID) { const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID); - SQSize = StQDesc.BufferSize; + SQSize = std::max(0, StQDesc.BufferSize); } } } @@ -72,9 +72,9 @@ unsigned LSUnit::dispatch(const InstRef &IR) { assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); if (Desc.MayLoad) - assignLQSlot(); + acquireLQSlot(); if (Desc.MayStore) - assignSQSlot(); + acquireSQSlot(); if (Desc.MayStore) { // Always create a new group for store operations. @@ -160,26 +160,28 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { } void LSUnitBase::onInstructionExecuted(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - assert((IsALoad || IsAStore) && "Expected a memory operation!"); - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); auto It = Groups.find(GroupID); + assert(It != Groups.end() && "Instruction not dispatched to the LS unit"); It->second->onInstructionExecuted(); - if (It->second->isExecuted()) { + if (It->second->isExecuted()) Groups.erase(It); - } +} + +void LSUnitBase::onInstructionRetired(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + bool IsALoad = Desc.MayLoad; + bool IsAStore = Desc.MayStore; + assert((IsALoad || IsAStore) && "Expected a memory operation!"); if (IsALoad) { - UsedLQEntries--; + releaseLQSlot(); LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex() << " has been removed from the load queue.\n"); } if (IsAStore) { - UsedSQEntries--; + releaseSQSlot(); LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex() << " has been removed from the store queue.\n"); } diff --git a/lib/MCA/HardwareUnits/RegisterFile.cpp b/lib/MCA/HardwareUnits/RegisterFile.cpp index 86a888ea8cae..7ea5506f11d6 100644 --- a/lib/MCA/HardwareUnits/RegisterFile.cpp +++ b/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -147,7 +147,7 @@ void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, void RegisterFile::addRegisterWrite(WriteRef Write, MutableArrayRef UsedPhysRegs) { WriteState &WS = *Write.getWriteState(); - unsigned RegID = WS.getRegisterID(); + MCPhysReg RegID = WS.getRegisterID(); assert(RegID && "Adding an invalid register definition?"); LLVM_DEBUG({ @@ -194,7 +194,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, } // Update zero registers. - unsigned ZeroRegisterID = + MCPhysReg ZeroRegisterID = WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); if (IsWriteZero) { ZeroRegisters.setBit(ZeroRegisterID); @@ -247,7 +247,7 @@ void RegisterFile::removeRegisterWrite( if (WS.isEliminated()) return; - unsigned RegID = WS.getRegisterID(); + MCPhysReg RegID = WS.getRegisterID(); assert(RegID != 0 && "Invalidating an already invalid register?"); assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && @@ -255,7 +255,7 @@ void RegisterFile::removeRegisterWrite( assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); bool ShouldFreePhysRegs = !WS.isWriteZero(); - unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; + MCPhysReg RenameAs = RegisterMappings[RegID].second.RenameAs; if (RenameAs && RenameAs != RegID) { RegID = RenameAs; @@ -355,7 +355,7 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { void RegisterFile::collectWrites(const ReadState &RS, SmallVectorImpl &Writes) const { - unsigned RegID = RS.getRegisterID(); + MCPhysReg RegID = RS.getRegisterID(); assert(RegID && RegID < RegisterMappings.size()); LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " << MRI.getName(RegID) << '\n'); @@ -397,7 +397,7 @@ void RegisterFile::collectWrites(const ReadState &RS, void RegisterFile::addRegisterRead(ReadState &RS, const MCSubtargetInfo &STI) const { - unsigned RegID = RS.getRegisterID(); + MCPhysReg RegID = RS.getRegisterID(); const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; RS.setPRF(RRI.IndexPlusCost.first); if (RS.isIndependentFromDef()) @@ -424,11 +424,11 @@ void RegisterFile::addRegisterRead(ReadState &RS, } } -unsigned RegisterFile::isAvailable(ArrayRef Regs) const { +unsigned RegisterFile::isAvailable(ArrayRef Regs) const { SmallVector NumPhysRegs(getNumRegisterFiles()); // Find how many new mappings must be created for each register file. - for (const unsigned RegID : Regs) { + for (const MCPhysReg RegID : Regs) { const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; if (Entry.first) diff --git a/lib/MCA/HardwareUnits/ResourceManager.cpp b/lib/MCA/HardwareUnits/ResourceManager.cpp index 06f2476353d6..088aea3e23c6 100644 --- a/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -104,7 +104,7 @@ void ResourceState::dump() const { static std::unique_ptr getStrategyFor(const ResourceState &RS) { if (RS.isAResourceGroup() || RS.getNumUnits() > 1) - return llvm::make_unique(RS.getReadyMask()); + return std::make_unique(RS.getReadyMask()); return std::unique_ptr(nullptr); } @@ -114,7 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM) Resource2Groups(SM.getNumProcResourceKinds() - 1, 0), ProcResID2Mask(SM.getNumProcResourceKinds(), 0), ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0), - ProcResUnitMask(0), ReservedResourceGroups(0) { + ProcResUnitMask(0), ReservedResourceGroups(0), + AvailableBuffers(~0ULL), ReservedBuffers(0) { computeProcResourceMasks(SM, ProcResID2Mask); // initialize vector ResIndex2ProcResID. @@ -127,7 +128,7 @@ ResourceManager::ResourceManager(const MCSchedModel &SM) uint64_t Mask = ProcResID2Mask[I]; unsigned Index = getResourceStateIndex(Mask); Resources[Index] = - llvm::make_unique(*SM.getProcResource(I), I, Mask); + std::make_unique(*SM.getProcResource(I), I, Mask); Strategies[Index] = getStrategyFor(*Resources[Index]); } @@ -241,33 +242,41 @@ void ResourceManager::release(const ResourceRef &RR) { } ResourceStateEvent -ResourceManager::canBeDispatched(ArrayRef Buffers) const { - ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; - for (uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - Result = RS.isBufferAvailable(); - if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) - break; - } - return Result; +ResourceManager::canBeDispatched(uint64_t ConsumedBuffers) const { + if (ConsumedBuffers & ReservedBuffers) + return ResourceStateEvent::RS_RESERVED; + if (ConsumedBuffers & (~AvailableBuffers)) + return ResourceStateEvent::RS_BUFFER_UNAVAILABLE; + return ResourceStateEvent::RS_BUFFER_AVAILABLE; } -void ResourceManager::reserveBuffers(ArrayRef Buffers) { - for (const uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; +void ResourceManager::reserveBuffers(uint64_t ConsumedBuffers) { + while (ConsumedBuffers) { + uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers); + ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)]; + ConsumedBuffers ^= CurrentBuffer; assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); - RS.reserveBuffer(); - + if (!RS.reserveBuffer()) + AvailableBuffers ^= CurrentBuffer; if (RS.isADispatchHazard()) { - assert(!RS.isReserved()); - RS.setReserved(); + // Reserve this buffer now, and release it once pipeline resources + // consumed by the instruction become available again. + // We do this to simulate an in-order dispatch/issue of instructions. + ReservedBuffers ^= CurrentBuffer; } } } -void ResourceManager::releaseBuffers(ArrayRef Buffers) { - for (const uint64_t R : Buffers) - Resources[getResourceStateIndex(R)]->releaseBuffer(); +void ResourceManager::releaseBuffers(uint64_t ConsumedBuffers) { + AvailableBuffers |= ConsumedBuffers; + while (ConsumedBuffers) { + uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers); + ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)]; + ConsumedBuffers ^= CurrentBuffer; + RS.releaseBuffer(); + // Do not unreserve dispatch hazard resource buffers. Wait until all + // pipeline resources have been freed too. + } } uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const { @@ -322,7 +331,6 @@ void ResourceManager::cycleEvent(SmallVectorImpl &ResourcesFreed) { if (countPopulation(RR.first) == 1) release(RR); - releaseResource(RR.first); ResourcesFreed.push_back(RR); } @@ -336,7 +344,7 @@ void ResourceManager::reserveResource(uint64_t ResourceID) { const unsigned Index = getResourceStateIndex(ResourceID); ResourceState &Resource = *Resources[Index]; assert(Resource.isAResourceGroup() && !Resource.isReserved() && - "Unexpected resource found!"); + "Unexpected resource state found!"); Resource.setReserved(); ReservedResourceGroups ^= 1ULL << Index; } @@ -347,6 +355,9 @@ void ResourceManager::releaseResource(uint64_t ResourceID) { Resource.clearReserved(); if (Resource.isAResourceGroup()) ReservedResourceGroups ^= 1ULL << Index; + // Now it is safe to release dispatch/issue resources. + if (Resource.isADispatchHazard()) + ReservedBuffers ^= 1ULL << Index; } } // namespace mca diff --git a/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/lib/MCA/HardwareUnits/RetireControlUnit.cpp index 068c5062ccdf..de519d7fd94a 100644 --- a/lib/MCA/HardwareUnits/RetireControlUnit.cpp +++ b/lib/MCA/HardwareUnits/RetireControlUnit.cpp @@ -21,65 +21,78 @@ namespace mca { RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { + NumROBEntries(SM.MicroOpBufferSize), + AvailableEntries(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { // Check if the scheduling model provides extra information about the machine // processor. If so, then use that information to set the reorder buffer size // and the maximum number of instructions retired per cycle. if (SM.hasExtraProcessorInfo()) { const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); if (EPI.ReorderBufferSize) - AvailableSlots = EPI.ReorderBufferSize; + AvailableEntries = EPI.ReorderBufferSize; MaxRetirePerCycle = EPI.MaxRetirePerCycle; } - - assert(AvailableSlots && "Invalid reorder buffer size!"); - Queue.resize(AvailableSlots); + NumROBEntries = AvailableEntries; + assert(NumROBEntries && "Invalid reorder buffer size!"); + Queue.resize(2 * NumROBEntries); } // Reserves a number of slots, and returns a new token. -unsigned RetireControlUnit::reserveSlot(const InstRef &IR, - unsigned NumMicroOps) { - assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!"); - unsigned NormalizedQuantity = - std::min(NumMicroOps, static_cast(Queue.size())); - // Zero latency instructions may have zero uOps. Artificially bump this - // value to 1. Although zero latency instructions don't consume scheduler - // resources, they still consume one slot in the retire queue. - NormalizedQuantity = std::max(NormalizedQuantity, 1U); +unsigned RetireControlUnit::dispatch(const InstRef &IR) { + const Instruction &Inst = *IR.getInstruction(); + unsigned Entries = normalizeQuantity(Inst.getNumMicroOps()); + assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!"); + unsigned TokenID = NextAvailableSlotIdx; - Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; - NextAvailableSlotIdx += NormalizedQuantity; + Queue[NextAvailableSlotIdx] = {IR, Entries, false}; + NextAvailableSlotIdx += std::max(1U, Entries); NextAvailableSlotIdx %= Queue.size(); - AvailableSlots -= NormalizedQuantity; + + AvailableEntries -= Entries; return TokenID; } -const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { - return Queue[CurrentInstructionSlotIdx]; +const RetireControlUnit::RUToken &RetireControlUnit::getCurrentToken() const { + const RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; +#ifndef NDEBUG + const Instruction *Inst = Current.IR.getInstruction(); + assert(Inst && "Invalid RUToken in the RCU queue."); +#endif + return Current; +} + +unsigned RetireControlUnit::computeNextSlotIdx() const { + const RetireControlUnit::RUToken &Current = getCurrentToken(); + unsigned NextSlotIdx = CurrentInstructionSlotIdx + std::max(1U, Current.NumSlots); + return NextSlotIdx % Queue.size(); +} + +const RetireControlUnit::RUToken &RetireControlUnit::peekNextToken() const { + return Queue[computeNextSlotIdx()]; } void RetireControlUnit::consumeCurrentToken() { RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; - assert(Current.NumSlots && "Reserved zero slots?"); - assert(Current.IR && "Invalid RUToken in the RCU queue."); Current.IR.getInstruction()->retire(); // Update the slot index to be the next item in the circular queue. - CurrentInstructionSlotIdx += Current.NumSlots; + CurrentInstructionSlotIdx += std::max(1U, Current.NumSlots); CurrentInstructionSlotIdx %= Queue.size(); - AvailableSlots += Current.NumSlots; + AvailableEntries += Current.NumSlots; + Current = { InstRef(), 0U, false }; } void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { assert(Queue.size() > TokenID); - assert(Queue[TokenID].Executed == false && Queue[TokenID].IR); + assert(Queue[TokenID].IR.getInstruction() && "Instruction was not dispatched!"); + assert(Queue[TokenID].Executed == false && "Instruction already executed!"); Queue[TokenID].Executed = true; } #ifndef NDEBUG void RetireControlUnit::dump() const { - dbgs() << "Retire Unit: { Total Slots=" << Queue.size() - << ", Available Slots=" << AvailableSlots << " }\n"; + dbgs() << "Retire Unit: { Total ROB Entries =" << NumROBEntries + << ", Available ROB entries=" << AvailableEntries << " }\n"; } #endif diff --git a/lib/MCA/HardwareUnits/Scheduler.cpp b/lib/MCA/HardwareUnits/Scheduler.cpp index 0f0f2ffb8325..8730336c6669 100644 --- a/lib/MCA/HardwareUnits/Scheduler.cpp +++ b/lib/MCA/HardwareUnits/Scheduler.cpp @@ -21,7 +21,7 @@ namespace mca { void Scheduler::initializeStrategy(std::unique_ptr S) { // Ensure we have a valid (non-null) strategy object. - Strategy = S ? std::move(S) : llvm::make_unique(); + Strategy = S ? std::move(S) : std::make_unique(); } // Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. @@ -38,9 +38,8 @@ void Scheduler::dump() const { #endif Scheduler::Status Scheduler::isAvailable(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - - ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers); + ResourceStateEvent RSE = + Resources->canBeDispatched(IR.getInstruction()->getUsedBuffers()); HadTokenStall = RSE != RS_BUFFER_AVAILABLE; switch (RSE) { @@ -106,7 +105,7 @@ void Scheduler::issueInstruction( bool HasDependentUsers = Inst.hasDependentUsers(); HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR); - Resources->releaseBuffers(Inst.getDesc().Buffers); + Resources->releaseBuffers(Inst.getUsedBuffers()); issueInstructionImpl(IR, UsedResources); // Instructions that have been issued during this cycle might have unblocked // other dependent instructions. Dependent instructions may be issued during @@ -300,8 +299,7 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const { bool Scheduler::dispatch(InstRef &IR) { Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - Resources->reserveBuffers(Desc.Buffers); + Resources->reserveBuffers(IS.getUsedBuffers()); // If necessary, reserve queue entries in the load-store unit (LSU). if (IS.isMemOp()) diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp index 829920366c90..bd28c733535c 100644 --- a/lib/MCA/InstrBuilder.cpp +++ b/lib/MCA/InstrBuilder.cpp @@ -80,7 +80,7 @@ static void initializeUsedResources(InstrDesc &ID, if (PR.BufferSize < 0) { AllInOrderResources = false; } else { - Buffers.setBit(PRE->ProcResourceIdx); + Buffers.setBit(getResourceStateIndex(Mask)); AnyDispatchHazards |= (PR.BufferSize == 0); AllInOrderResources &= (PR.BufferSize <= 1); } @@ -139,9 +139,6 @@ static void initializeUsedResources(InstrDesc &ID, } } - ID.UsedProcResUnits = UsedResourceUnits; - ID.UsedProcResGroups = UsedResourceGroups; - // A SchedWrite may specify a number of cycles in which a resource group // is reserved. For example (on target x86; cpu Haswell): // @@ -177,20 +174,13 @@ static void initializeUsedResources(InstrDesc &ID, uint64_t Mask = ProcResourceMasks[I]; if (Mask != SR.first && ((Mask & SR.first) == SR.first)) - Buffers.setBit(I); + Buffers.setBit(getResourceStateIndex(Mask)); } } - // Now set the buffers. - if (unsigned NumBuffers = Buffers.countPopulation()) { - ID.Buffers.resize(NumBuffers); - for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { - if (Buffers[I]) { - --NumBuffers; - ID.Buffers[NumBuffers] = ProcResourceMasks[I]; - } - } - } + ID.UsedBuffers = Buffers.getZExtValue(); + ID.UsedProcResUnits = UsedResourceUnits; + ID.UsedProcResGroups = UsedResourceGroups; LLVM_DEBUG({ for (const std::pair &R : ID.Resources) @@ -198,8 +188,12 @@ static void initializeUsedResources(InstrDesc &ID, << "Reserved=" << R.second.isReserved() << ", " << "#Units=" << R.second.NumUnits << ", " << "cy=" << R.second.size() << '\n'; - for (const uint64_t R : ID.Buffers) - dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n'; + uint64_t BufferIDs = ID.UsedBuffers; + while (BufferIDs) { + uint64_t Current = BufferIDs & (-BufferIDs); + dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; + BufferIDs ^= Current; + } dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) << '\n'; @@ -464,9 +458,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no // "unmodeledSideEffects", then this logic optimistically assumes that any - // extra register operands in the variadic sequence are not register + // extra register operand in the variadic sequence is not a register // definition. - bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && !MCDesc.hasUnmodeledSideEffects(); for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); @@ -493,7 +486,7 @@ Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, return ErrorSuccess(); bool UsesMemory = ID.MayLoad || ID.MayStore; - bool UsesBuffers = !ID.Buffers.empty(); + bool UsesBuffers = ID.UsedBuffers; bool UsesResources = !ID.Resources.empty(); if (!UsesMemory && !UsesBuffers && !UsesResources) return ErrorSuccess(); @@ -550,7 +543,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); // Create a new empty descriptor. - std::unique_ptr ID = llvm::make_unique(); + std::unique_ptr ID = std::make_unique(); ID->NumMicroOps = SCDesc.NumMicroOps; ID->SchedClassID = SchedClassID; @@ -619,7 +612,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) { if (!DescOrErr) return DescOrErr.takeError(); const InstrDesc &D = *DescOrErr; - std::unique_ptr NewIS = llvm::make_unique(D); + std::unique_ptr NewIS = std::make_unique(D); // Check if this is a dependency breaking instruction. APInt Mask; @@ -636,8 +629,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) { } // Initialize Reads first. + MCPhysReg RegID = 0; for (const ReadDescriptor &RD : D.Reads) { - int RegID = -1; if (!RD.isImplicitRead()) { // explicit read. const MCOperand &Op = MCI.getOperand(RD.OpIndex); @@ -655,7 +648,6 @@ InstrBuilder::createInstruction(const MCInst &MCI) { continue; // Okay, this is a register operand. Create a ReadState for it. - assert(RegID > 0 && "Invalid register ID found!"); NewIS->getUses().emplace_back(RD, RegID); ReadState &RS = NewIS->getUses().back(); @@ -696,8 +688,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) { // Initialize writes. unsigned WriteIndex = 0; for (const WriteDescriptor &WD : D.Writes) { - unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID - : MCI.getOperand(WD.OpIndex).getReg(); + RegID = WD.isImplicitWrite() ? WD.RegisterID + : MCI.getOperand(WD.OpIndex).getReg(); // Check if this is a optional definition that references NoReg. if (WD.IsOptionalDef && !RegID) { ++WriteIndex; diff --git a/lib/MCA/Instruction.cpp b/lib/MCA/Instruction.cpp index 001842bca318..e5f2c4fd1eec 100644 --- a/lib/MCA/Instruction.cpp +++ b/lib/MCA/Instruction.cpp @@ -18,7 +18,7 @@ namespace llvm { namespace mca { -void WriteState::writeStartEvent(unsigned IID, unsigned RegID, +void WriteState::writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles) { CRD.IID = IID; CRD.RegID = RegID; @@ -27,7 +27,7 @@ void WriteState::writeStartEvent(unsigned IID, unsigned RegID, DependentWrite = nullptr; } -void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) { +void ReadState::writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles) { assert(DependentWrites); assert(CyclesLeft == UNKNOWN_CYCLES); diff --git a/lib/MCA/Stages/DispatchStage.cpp b/lib/MCA/Stages/DispatchStage.cpp index 7334a268e9a6..3a3d82259160 100644 --- a/lib/MCA/Stages/DispatchStage.cpp +++ b/lib/MCA/Stages/DispatchStage.cpp @@ -44,7 +44,7 @@ void DispatchStage::notifyInstructionDispatched(const InstRef &IR, } bool DispatchStage::checkPRF(const InstRef &IR) const { - SmallVector RegDefs; + SmallVector RegDefs; for (const WriteState &RegDef : IR.getInstruction()->getDefs()) RegDefs.emplace_back(RegDef.getRegisterID()); @@ -60,7 +60,7 @@ bool DispatchStage::checkPRF(const InstRef &IR) const { } bool DispatchStage::checkRCU(const InstRef &IR) const { - const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; + const unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps(); if (RCU.isAvailable(NumMicroOps)) return true; notifyEvent( @@ -79,7 +79,7 @@ Error DispatchStage::dispatch(InstRef IR) { assert(!CarryOver && "Cannot dispatch another instruction!"); Instruction &IS = *IR.getInstruction(); const InstrDesc &Desc = IS.getDesc(); - const unsigned NumMicroOps = Desc.NumMicroOps; + const unsigned NumMicroOps = IS.getNumMicroOps(); if (NumMicroOps > DispatchWidth) { assert(AvailableEntries == DispatchWidth); AvailableEntries = 0; @@ -123,9 +123,10 @@ Error DispatchStage::dispatch(InstRef IR) { for (WriteState &WS : IS.getDefs()) PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); - // Reserve slots in the RCU, and notify the instruction that it has been - // dispatched to the schedulers for execution. - IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); + // Reserve entries in the reorder buffer. + unsigned RCUTokenID = RCU.dispatch(IR); + // Notify the instruction that it has been dispatched. + IS.dispatch(RCUTokenID); // Notify listeners of the "instruction dispatched" event, // and move IR to the next stage. @@ -155,8 +156,10 @@ Error DispatchStage::cycleStart() { } bool DispatchStage::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); + const Instruction &Inst = *IR.getInstruction(); + unsigned NumMicroOps = Inst.getNumMicroOps(); + const InstrDesc &Desc = Inst.getDesc(); + unsigned Required = std::min(NumMicroOps, DispatchWidth); if (Required > AvailableEntries) return false; diff --git a/lib/MCA/Stages/EntryStage.cpp b/lib/MCA/Stages/EntryStage.cpp index d2f5613a0fb6..66135790a4cd 100644 --- a/lib/MCA/Stages/EntryStage.cpp +++ b/lib/MCA/Stages/EntryStage.cpp @@ -33,7 +33,7 @@ void EntryStage::getNextInstruction() { if (!SM.hasNext()) return; SourceRef SR = SM.peekNext(); - std::unique_ptr Inst = llvm::make_unique(SR.second); + std::unique_ptr Inst = std::make_unique(SR.second); CurrentInstruction = InstRef(SR.first, Inst.get()); Instructions.emplace_back(std::move(Inst)); SM.updateNext(); diff --git a/lib/MCA/Stages/ExecuteStage.cpp b/lib/MCA/Stages/ExecuteStage.cpp index a2b361fcd1bf..2284ed7f2816 100644 --- a/lib/MCA/Stages/ExecuteStage.cpp +++ b/lib/MCA/Stages/ExecuteStage.cpp @@ -56,12 +56,13 @@ Error ExecuteStage::issueInstruction(InstRef &IR) { SmallVector Ready; HWS.issueInstruction(IR, Used, Pending, Ready); - NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps; + Instruction &IS = *IR.getInstruction(); + NumIssuedOpcodes += IS.getNumMicroOps(); notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); notifyInstructionIssued(IR, Used); - if (IR.getInstruction()->isExecuted()) { + if (IS.isExecuted()) { notifyInstructionExecuted(IR); // FIXME: add a buffer of executed instructions. if (Error S = moveToTheNextStage(IR)) @@ -199,7 +200,8 @@ Error ExecuteStage::execute(InstRef &IR) { // units have been consumed. bool IsReadyInstruction = HWS.dispatch(IR); const Instruction &Inst = *IR.getInstruction(); - NumDispatchedOpcodes += Inst.getDesc().NumMicroOps; + unsigned NumMicroOps = Inst.getNumMicroOps(); + NumDispatchedOpcodes += NumMicroOps; notifyReservedOrReleasedBuffers(IR, /* Reserved */ true); if (!IsReadyInstruction) { @@ -269,13 +271,17 @@ void ExecuteStage::notifyInstructionIssued( void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.Buffers.empty()) + uint64_t UsedBuffers = IR.getInstruction()->getDesc().UsedBuffers; + if (!UsedBuffers) return; - SmallVector BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end()); - std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(), - [&](uint64_t Op) { return HWS.getResourceID(Op); }); + SmallVector BufferIDs(countPopulation(UsedBuffers), 0); + for (unsigned I = 0, E = BufferIDs.size(); I < E; ++I) { + uint64_t CurrentBufferMask = UsedBuffers & (-UsedBuffers); + BufferIDs[I] = HWS.getResourceID(CurrentBufferMask); + UsedBuffers ^= CurrentBufferMask; + } + if (Reserved) { for (HWEventListener *Listener : getListeners()) Listener->onReservedBuffers(IR, BufferIDs); diff --git a/lib/MCA/Stages/RetireStage.cpp b/lib/MCA/Stages/RetireStage.cpp index e1789dd7fa2a..f792af748bce 100644 --- a/lib/MCA/Stages/RetireStage.cpp +++ b/lib/MCA/Stages/RetireStage.cpp @@ -31,11 +31,11 @@ llvm::Error RetireStage::cycleStart() { while (!RCU.isEmpty()) { if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) break; - const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); + const RetireControlUnit::RUToken &Current = RCU.getCurrentToken(); if (!Current.Executed) break; - RCU.consumeCurrentToken(); notifyInstructionRetired(Current.IR); + RCU.consumeCurrentToken(); NumRetired++; } @@ -52,6 +52,10 @@ void RetireStage::notifyInstructionRetired(const InstRef &IR) const { llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); const Instruction &Inst = *IR.getInstruction(); + // Release the load/store queue entries. + if (Inst.isMemOp()) + LSU.onInstructionRetired(IR); + for (const WriteState &WS : Inst.getDefs()) PRF.removeRegisterWrite(WS, FreedRegs); notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 49e66f46ab3f..148c011d9cd4 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -223,8 +223,8 @@ Expected ArchiveMemberHeader::getName(uint64_t Size) const { return Name.drop_back(1); } -Expected ArchiveMemberHeader::getSize() const { - uint32_t Ret; +Expected ArchiveMemberHeader::getSize() const { + uint64_t Ret; if (StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { std::string Buf; @@ -550,7 +550,7 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) } else if (Buffer.startswith(Magic)) { IsThin = false; } else { - Err = make_error("File too small to be an archive", + Err = make_error("file too small to be an archive", object_error::invalid_file_type); return; } diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index 228f6b40c5ec..5234b0e18233 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -16,8 +16,10 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" @@ -147,7 +149,7 @@ static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { static void printRestOfMemberHeader( raw_ostream &Out, const sys::TimePoint &ModTime, - unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { + unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); // The format has only 6 chars for uid and gid. Truncate if the provided @@ -164,7 +166,7 @@ static void printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, const sys::TimePoint &ModTime, unsigned UID, unsigned GID, unsigned Perms, - unsigned Size) { + uint64_t Size) { printWithSpacePadding(Out, Twine(Name) + "/", 16); printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); } @@ -172,11 +174,10 @@ printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, static void printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, const sys::TimePoint &ModTime, - unsigned UID, unsigned GID, unsigned Perms, - unsigned Size) { + unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { uint64_t PosAfterHeader = Pos + 60 + Name.size(); // Pad so that even 64 bit object files are aligned. - unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); + unsigned Pad = offsetToAlignment(PosAfterHeader, Align(8)); unsigned NameWithPadding = Name.size() + Pad; printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, @@ -208,7 +209,7 @@ static void printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, StringMap &MemberNames, object::Archive::Kind Kind, bool Thin, const NewArchiveMember &M, - sys::TimePoint ModTime, unsigned Size) { + sys::TimePoint ModTime, uint64_t Size) { if (isBSDLike(Kind)) return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID, M.Perms, Size); @@ -243,7 +244,7 @@ struct MemberData { static MemberData computeStringTable(StringRef Names) { unsigned Size = Names.size(); - unsigned Pad = OffsetToAlignment(Size, 2); + unsigned Pad = offsetToAlignment(Size, Align(2)); std::string Header; raw_string_ostream Out(Header); printWithSpacePadding(Out, "//", 48); @@ -307,8 +308,8 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. // We do this for all bsd formats because it simplifies aligning members. - unsigned Alignment = isBSDLike(Kind) ? 8 : 2; - unsigned Pad = OffsetToAlignment(Size, Alignment); + const Align Alignment(isBSDLike(Kind) ? 8 : 2); + unsigned Pad = offsetToAlignment(Size, Alignment); Size += Pad; if (isBSDLike(Kind)) { @@ -464,8 +465,9 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // uniformly. This matches the behaviour with cctools and ensures that ld64 // is happy with archives that we generate. unsigned MemberPadding = - isDarwin(Kind) ? OffsetToAlignment(Data.size(), 8) : 0; - unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2); + isDarwin(Kind) ? offsetToAlignment(Data.size(), Align(8)) : 0; + unsigned TailPadding = + offsetToAlignment(Data.size() + MemberPadding, Align(2)); StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); sys::TimePoint ModTime; @@ -474,8 +476,17 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); else ModTime = M.ModTime; + + uint64_t Size = Buf.getBufferSize() + MemberPadding; + if (Size > object::Archive::MaxMemberSize) { + std::string StringMsg = + "File " + M.MemberName.str() + " exceeds size limit"; + return make_error( + std::move(StringMsg), object::object_error::parse_failed); + } + printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, - ModTime, Buf.getBufferSize() + MemberPadding); + ModTime, Size); Out.flush(); Expected> Symbols = diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp index a953c1d8cb80..944d2bc1bca7 100644 --- a/lib/Object/Binary.cpp +++ b/lib/Object/Binary.cpp @@ -18,6 +18,7 @@ #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/Minidump.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/TapiUniversal.h" #include "llvm/Object/WindowsResource.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -86,6 +87,8 @@ Expected> object::createBinary(MemoryBufferRef Buffer, return errorCodeToError(object_error::invalid_file_type); case file_magic::minidump: return MinidumpFile::create(Buffer); + case file_magic::tapi_file: + return TapiUniversal::create(Buffer); } llvm_unreachable("Unexpected Binary File Type"); } diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 854664e679df..2c0f6dc2b1e9 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -936,29 +936,6 @@ iterator_range COFFObjectFile::base_relocs() const { return make_range(base_reloc_begin(), base_reloc_end()); } -std::error_code -COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const { - Res = COFFHeader; - return std::error_code(); -} - -std::error_code -COFFObjectFile::getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const { - Res = COFFBigObjHeader; - return std::error_code(); -} - -std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const { - Res = PE32Header; - return std::error_code(); -} - -std::error_code -COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const { - Res = PE32PlusHeader; - return std::error_code(); -} - std::error_code COFFObjectFile::getDataDirectory(uint32_t Index, const data_directory *&Res) const { @@ -994,11 +971,12 @@ std::error_code COFFObjectFile::getSection(int32_t Index, std::error_code COFFObjectFile::getSection(StringRef SectionName, const coff_section *&Result) const { Result = nullptr; - StringRef SecName; for (const SectionRef &Section : sections()) { - if (std::error_code E = Section.getName(SecName)) - return E; - if (SecName == SectionName) { + auto NameOrErr = Section.getName(); + if (!NameOrErr) + return errorToErrorCode(NameOrErr.takeError()); + + if (*NameOrErr == SectionName) { Result = getCOFFSection(Section); return std::error_code(); } @@ -1684,9 +1662,12 @@ std::error_code BaseRelocRef::getRVA(uint32_t &Result) const { return std::error_code(); } -#define RETURN_IF_ERROR(E) \ - if (E) \ - return E; +#define RETURN_IF_ERROR(Expr) \ + do { \ + Error E = (Expr); \ + if (E) \ + return std::move(E); \ + } while (0) Expected> ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) { @@ -1715,11 +1696,168 @@ ResourceSectionRef::getTableAtOffset(uint32_t Offset) { return *Table; } +Expected +ResourceSectionRef::getTableEntryAtOffset(uint32_t Offset) { + const coff_resource_dir_entry *Entry = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Entry)); + assert(Entry != nullptr); + return *Entry; +} + +Expected +ResourceSectionRef::getDataEntryAtOffset(uint32_t Offset) { + const coff_resource_data_entry *Entry = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Entry)); + assert(Entry != nullptr); + return *Entry; +} + Expected ResourceSectionRef::getEntrySubDir(const coff_resource_dir_entry &Entry) { + assert(Entry.Offset.isSubDir()); return getTableAtOffset(Entry.Offset.value()); } +Expected +ResourceSectionRef::getEntryData(const coff_resource_dir_entry &Entry) { + assert(!Entry.Offset.isSubDir()); + return getDataEntryAtOffset(Entry.Offset.value()); +} + Expected ResourceSectionRef::getBaseTable() { return getTableAtOffset(0); } + +Expected +ResourceSectionRef::getTableEntry(const coff_resource_dir_table &Table, + uint32_t Index) { + if (Index >= (uint32_t)(Table.NumberOfNameEntries + Table.NumberOfIDEntries)) + return createStringError(object_error::parse_failed, "index out of range"); + const uint8_t *TablePtr = reinterpret_cast(&Table); + ptrdiff_t TableOffset = TablePtr - BBS.data().data(); + return getTableEntryAtOffset(TableOffset + sizeof(Table) + + Index * sizeof(coff_resource_dir_entry)); +} + +Error ResourceSectionRef::load(const COFFObjectFile *O) { + for (const SectionRef &S : O->sections()) { + Expected Name = S.getName(); + if (!Name) + return Name.takeError(); + + if (*Name == ".rsrc" || *Name == ".rsrc$01") + return load(O, S); + } + return createStringError(object_error::parse_failed, + "no resource section found"); +} + +Error ResourceSectionRef::load(const COFFObjectFile *O, const SectionRef &S) { + Obj = O; + Section = S; + Expected Contents = Section.getContents(); + if (!Contents) + return Contents.takeError(); + BBS = BinaryByteStream(*Contents, support::little); + const coff_section *COFFSect = Obj->getCOFFSection(Section); + ArrayRef OrigRelocs = Obj->getRelocations(COFFSect); + Relocs.reserve(OrigRelocs.size()); + for (const coff_relocation &R : OrigRelocs) + Relocs.push_back(&R); + std::sort(Relocs.begin(), Relocs.end(), + [](const coff_relocation *A, const coff_relocation *B) { + return A->VirtualAddress < B->VirtualAddress; + }); + return Error::success(); +} + +Expected +ResourceSectionRef::getContents(const coff_resource_data_entry &Entry) { + if (!Obj) + return createStringError(object_error::parse_failed, "no object provided"); + + // Find a potential relocation at the DataRVA field (first member of + // the coff_resource_data_entry struct). + const uint8_t *EntryPtr = reinterpret_cast(&Entry); + ptrdiff_t EntryOffset = EntryPtr - BBS.data().data(); + coff_relocation RelocTarget{ulittle32_t(EntryOffset), ulittle32_t(0), + ulittle16_t(0)}; + auto RelocsForOffset = + std::equal_range(Relocs.begin(), Relocs.end(), &RelocTarget, + [](const coff_relocation *A, const coff_relocation *B) { + return A->VirtualAddress < B->VirtualAddress; + }); + + if (RelocsForOffset.first != RelocsForOffset.second) { + // We found a relocation with the right offset. Check that it does have + // the expected type. + const coff_relocation &R = **RelocsForOffset.first; + uint16_t RVAReloc; + switch (Obj->getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + RVAReloc = COFF::IMAGE_REL_I386_DIR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + RVAReloc = COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + RVAReloc = COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_ARM64: + RVAReloc = COFF::IMAGE_REL_ARM64_ADDR32NB; + break; + default: + return createStringError(object_error::parse_failed, + "unsupported architecture"); + } + if (R.Type != RVAReloc) + return createStringError(object_error::parse_failed, + "unexpected relocation type"); + // Get the relocation's symbol + Expected Sym = Obj->getSymbol(R.SymbolTableIndex); + if (!Sym) + return Sym.takeError(); + const coff_section *Section = nullptr; + // And the symbol's section + if (std::error_code EC = Obj->getSection(Sym->getSectionNumber(), Section)) + return errorCodeToError(EC); + // Add the initial value of DataRVA to the symbol's offset to find the + // data it points at. + uint64_t Offset = Entry.DataRVA + Sym->getValue(); + ArrayRef Contents; + if (Error E = Obj->getSectionContents(Section, Contents)) + return std::move(E); + if (Offset + Entry.DataSize > Contents.size()) + return createStringError(object_error::parse_failed, + "data outside of section"); + // Return a reference to the data inside the section. + return StringRef(reinterpret_cast(Contents.data()) + Offset, + Entry.DataSize); + } else { + // Relocatable objects need a relocation for the DataRVA field. + if (Obj->isRelocatableObject()) + return createStringError(object_error::parse_failed, + "no relocation found for DataRVA"); + + // Locate the section that contains the address that DataRVA points at. + uint64_t VA = Entry.DataRVA + Obj->getImageBase(); + for (const SectionRef &S : Obj->sections()) { + if (VA >= S.getAddress() && + VA + Entry.DataSize <= S.getAddress() + S.getSize()) { + uint64_t Offset = VA - S.getAddress(); + Expected Contents = S.getContents(); + if (!Contents) + return Contents.takeError(); + return Contents->slice(Offset, Offset + Entry.DataSize); + } + } + return createStringError(object_error::parse_failed, + "address not found in image"); + } +} diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp index ec15e6f69ada..11efd857d1a1 100644 --- a/lib/Object/Decompressor.cpp +++ b/lib/Object/Decompressor.cpp @@ -56,7 +56,7 @@ Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, return createError("corrupted compressed section header"); DataExtractor Extractor(SectionData, IsLittleEndian, 0); - uint32_t Offset = 0; + uint64_t Offset = 0; if (Extractor.getUnsigned(&Offset, Is64Bit ? sizeof(Elf64_Word) : sizeof(Elf32_Word)) != ELFCOMPRESS_ZLIB) @@ -77,10 +77,15 @@ bool Decompressor::isGnuStyle(StringRef Name) { } bool Decompressor::isCompressed(const object::SectionRef &Section) { - StringRef Name; - if (Section.getName(Name)) - return false; - return Section.isCompressed() || isGnuStyle(Name); + if (Section.isCompressed()) + return true; + + Expected SecNameOrErr = Section.getName(); + if (SecNameOrErr) + return isGnuStyle(*SecNameOrErr); + + consumeError(SecNameOrErr.takeError()); + return false; } bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 8660b1a64bdd..d491288579df 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -255,6 +255,8 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ADDRSIG); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_DEPENDENT_LIBRARIES); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_EHDR); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index c7b715793048..bf6ffd6c37b9 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -43,7 +43,16 @@ const EnumEntry llvm::object::ElfSymbolTypes[NumElfSymbolTypes] = { {"File", "FILE", ELF::STT_FILE}, {"Common", "COMMON", ELF::STT_COMMON}, {"TLS", "TLS", ELF::STT_TLS}, - {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}}; + {"Unknown", ": 7", 7}, + {"Unknown", ": 8", 8}, + {"Unknown", ": 9", 9}, + {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}, + {"OS Specific", ": 11", 11}, + {"OS Specific", ": 12", 12}, + {"Proc Specific", ": 13", 13}, + {"Proc Specific", ": 14", 14}, + {"Proc Specific", ": 15", 15} +}; ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) : ObjectFile(Type, Source) {} @@ -54,7 +63,7 @@ createPtr(MemoryBufferRef Object) { auto Ret = ELFObjectFile::create(Object); if (Error E = Ret.takeError()) return std::move(E); - return make_unique>(std::move(*Ret)); + return std::make_unique>(std::move(*Ret)); } Expected> @@ -194,7 +203,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { default: break; case ARMBuildAttrs::Not_Allowed: - Features.AddFeature("vfp2d16sp", false); + Features.AddFeature("vfp2sp", false); Features.AddFeature("vfp3d16sp", false); Features.AddFeature("vfp4d16sp", false); break; @@ -347,6 +356,21 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { case ARMBuildAttrs::v7E_M: Triple += "v7em"; break; + case ARMBuildAttrs::v8_A: + Triple += "v8a"; + break; + case ARMBuildAttrs::v8_R: + Triple += "v8r"; + break; + case ARMBuildAttrs::v8_M_Base: + Triple += "v8m.base"; + break; + case ARMBuildAttrs::v8_M_Main: + Triple += "v8m.main"; + break; + case ARMBuildAttrs::v8_1_M_Main: + Triple += "v8.1m.main"; + break; } } if (!isLittleEndian()) @@ -383,9 +407,13 @@ ELFObjectFileBase::getPltAddresses() const { return {}; Optional Plt = None, RelaPlt = None, GotPlt = None; for (const SectionRef &Section : sections()) { - StringRef Name; - if (Section.getName(Name)) + Expected NameOrErr = Section.getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); continue; + } + StringRef Name = *NameOrErr; + if (Name == ".plt") Plt = Section; else if (Name == ".rela.plt" || Name == ".rel.plt") diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 5aec844003c0..c0c873f97354 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -57,12 +57,6 @@ namespace { } // end anonymous namespace -static const std::array validArchs = { - "i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e", - "armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m", - "armv7s", "arm64", "arm64_32", "ppc", "ppc64", -}; - static Error malformedError(const Twine &Msg) { return make_error("truncated or malformed object (" + Msg + ")", @@ -1951,6 +1945,11 @@ uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { return SectSize; } +ArrayRef MachOObjectFile::getSectionContents(uint32_t Offset, + uint64_t Size) const { + return arrayRefFromStringRef(getData().substr(Offset, Size)); +} + Expected> MachOObjectFile::getSectionContents(DataRefImpl Sec) const { uint32_t Offset; @@ -1966,7 +1965,7 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec) const { Size = Sect.size; } - return arrayRefFromStringRef(getData().substr(Offset, Size)); + return getSectionContents(Offset, Size); } uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const { @@ -1992,13 +1991,12 @@ Expected MachOObjectFile::getSection(unsigned SectionIndex) const { } Expected MachOObjectFile::getSection(StringRef SectionName) const { - StringRef SecName; for (const SectionRef &Section : sections()) { - if (std::error_code E = Section.getName(SecName)) - return errorCodeToError(E); - if (SecName == SectionName) { + auto NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + if (*NameOrErr == SectionName) return Section; - } } return errorCodeToError(object_error::parse_failed); } @@ -2724,11 +2722,19 @@ Triple MachOObjectFile::getHostArch() { } bool MachOObjectFile::isValidArch(StringRef ArchFlag) { - return std::find(validArchs.cbegin(), validArchs.cend(), ArchFlag) != - validArchs.cend(); + auto validArchs = getValidArchs(); + return llvm::find(validArchs, ArchFlag) != validArchs.end(); } -ArrayRef MachOObjectFile::getValidArchs() { return validArchs; } +ArrayRef MachOObjectFile::getValidArchs() { + static const std::array validArchs = {{ + "i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e", + "armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m", + "armv7s", "arm64", "arm64_32", "ppc", "ppc64", + }}; + + return validArchs; +} Triple::ArchType MachOObjectFile::getArch() const { return getArch(getCPUType(*this)); @@ -3427,7 +3433,7 @@ iterator_range MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O, ArrayRef Opcodes, bool is64) { if (O->BindRebaseSectionTable == nullptr) - O->BindRebaseSectionTable = llvm::make_unique(O); + O->BindRebaseSectionTable = std::make_unique(O); MachORebaseEntry Start(&Err, O, Opcodes, is64); Start.moveToFirst(); @@ -3993,7 +3999,11 @@ BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) { uint64_t CurSegAddress; for (const SectionRef &Section : Obj->sections()) { SectionInfo Info; - Section.getName(Info.SectionName); + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + consumeError(NameOrErr.takeError()); + else + Info.SectionName = *NameOrErr; Info.Address = Section.getAddress(); Info.Size = Section.getSize(); Info.SegmentName = @@ -4094,7 +4104,7 @@ MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O, ArrayRef Opcodes, bool is64, MachOBindEntry::Kind BKind) { if (O->BindRebaseSectionTable == nullptr) - O->BindRebaseSectionTable = llvm::make_unique(O); + O->BindRebaseSectionTable = std::make_unique(O); MachOBindEntry Start(&Err, O, Opcodes, is64, BKind); Start.moveToFirst(); @@ -4610,7 +4620,7 @@ void MachOObjectFile::ReadULEB128s(uint64_t Index, SmallVectorImpl &Out) const { DataExtractor extractor(ObjectFile::getData(), true, 0); - uint32_t offset = Index; + uint64_t offset = Index; uint64_t data = 0; while (uint64_t delta = extractor.getULEB128(&offset)) { data += delta; diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp index b3f0993412c6..a178ecde949e 100644 --- a/lib/Object/MachOUniversal.cpp +++ b/lib/Object/MachOUniversal.cpp @@ -155,15 +155,16 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) ") extends past the end of the file"); return; } -#define MAXSECTALIGN 15 /* 2**15 or 0x8000 */ - if (A.getAlign() > MAXSECTALIGN) { - Err = malformedError("align (2^" + Twine(A.getAlign()) + ") too large " - "for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + - Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + - ") (maximum 2^" + Twine(MAXSECTALIGN) + ")"); + + if (A.getAlign() > MaxSectionAlignment) { + Err = malformedError("align (2^" + Twine(A.getAlign()) + + ") too large for cputype (" + Twine(A.getCPUType()) + + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") (maximum 2^" + Twine(MaxSectionAlignment) + ")"); return; } - if(A.getOffset() % (1 << A.getAlign()) != 0){ + if(A.getOffset() % (1ull << A.getAlign()) != 0){ Err = malformedError("offset: " + Twine(A.getOffset()) + " for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + @@ -209,19 +210,34 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) Err = Error::success(); } -Expected> +Expected MachOUniversalBinary::getObjectForArch(StringRef ArchName) const { if (Triple(ArchName).getArch() == Triple::ArchType::UnknownArch) return make_error("Unknown architecture " "named: " + ArchName, object_error::arch_not_found); - - for (auto &Obj : objects()) + for (const auto &Obj : objects()) if (Obj.getArchFlagName() == ArchName) - return Obj.getAsObjectFile(); + return Obj; return make_error("fat file does not " "contain " + ArchName, object_error::arch_not_found); } + +Expected> +MachOUniversalBinary::getMachOObjectForArch(StringRef ArchName) const { + Expected O = getObjectForArch(ArchName); + if (!O) + return O.takeError(); + return O->getAsObjectFile(); +} + +Expected> +MachOUniversalBinary::getArchiveForArch(StringRef ArchName) const { + Expected O = getObjectForArch(ArchName); + if (!O) + return O.takeError(); + return O->getAsArchive(); +} diff --git a/lib/Object/Minidump.cpp b/lib/Object/Minidump.cpp index 7b5b21558699..3e932fe7be28 100644 --- a/lib/Object/Minidump.cpp +++ b/lib/Object/Minidump.cpp @@ -53,13 +53,30 @@ Expected MinidumpFile::getString(size_t Offset) const { return Result; } +Expected> +MinidumpFile::getMemoryInfoList() const { + Optional> Stream = getRawStream(StreamType::MemoryInfoList); + if (!Stream) + return createError("No such stream"); + auto ExpectedHeader = + getDataSliceAs(*Stream, 0, 1); + if (!ExpectedHeader) + return ExpectedHeader.takeError(); + const minidump::MemoryInfoListHeader &H = ExpectedHeader.get()[0]; + Expected> Data = + getDataSlice(*Stream, H.SizeOfHeader, H.SizeOfEntry * H.NumberOfEntries); + if (!Data) + return Data.takeError(); + return make_range(MemoryInfoIterator(*Data, H.SizeOfEntry), + MemoryInfoIterator({}, H.SizeOfEntry)); +} + template -Expected> MinidumpFile::getListStream(StreamType Stream) const { - auto OptionalStream = getRawStream(Stream); - if (!OptionalStream) +Expected> MinidumpFile::getListStream(StreamType Type) const { + Optional> Stream = getRawStream(Type); + if (!Stream) return createError("No such stream"); - auto ExpectedSize = - getDataSliceAs(*OptionalStream, 0, 1); + auto ExpectedSize = getDataSliceAs(*Stream, 0, 1); if (!ExpectedSize) return ExpectedSize.takeError(); @@ -69,10 +86,10 @@ Expected> MinidumpFile::getListStream(StreamType Stream) const { // Some producers insert additional padding bytes to align the list to an // 8-byte boundary. Check for that by comparing the list size with the overall // stream size. - if (ListOffset + sizeof(T) * ListSize < OptionalStream->size()) + if (ListOffset + sizeof(T) * ListSize < Stream->size()) ListOffset = 8; - return getDataSliceAs(*OptionalStream, ListOffset, ListSize); + return getDataSliceAs(*Stream, ListOffset, ListSize); } template Expected> MinidumpFile::getListStream(StreamType) const; @@ -109,13 +126,14 @@ MinidumpFile::create(MemoryBufferRef Source) { return ExpectedStreams.takeError(); DenseMap StreamMap; - for (const auto &Stream : llvm::enumerate(*ExpectedStreams)) { - StreamType Type = Stream.value().Type; - const LocationDescriptor &Loc = Stream.value().Location; + for (const auto &StreamDescriptor : llvm::enumerate(*ExpectedStreams)) { + StreamType Type = StreamDescriptor.value().Type; + const LocationDescriptor &Loc = StreamDescriptor.value().Location; - auto ExpectedStream = getDataSlice(Data, Loc.RVA, Loc.DataSize); - if (!ExpectedStream) - return ExpectedStream.takeError(); + Expected> Stream = + getDataSlice(Data, Loc.RVA, Loc.DataSize); + if (!Stream) + return Stream.takeError(); if (Type == StreamType::Unused && Loc.DataSize == 0) { // Ignore dummy streams. This is technically ill-formed, but a number of @@ -128,7 +146,7 @@ MinidumpFile::create(MemoryBufferRef Source) { return createError("Cannot handle one of the minidump streams"); // Update the directory map, checking for duplicate stream types. - if (!StreamMap.try_emplace(Type, Stream.index()).second) + if (!StreamMap.try_emplace(Type, StreamDescriptor.index()).second) return createError("Duplicate stream type"); } diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index d84798cc6dd0..b486e9f5c9a8 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -138,7 +138,7 @@ LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR, char **ErrorMessage) { auto universal = cast(unwrap(BR)); Expected> ObjOrErr( - universal->getObjectForArch({Arch, ArchLen})); + universal->getMachOObjectForArch({Arch, ArchLen})); if (!ObjOrErr) { *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str()); return nullptr; @@ -251,10 +251,10 @@ void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) { // SectionRef accessors const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { - StringRef ret; - if (std::error_code ec = (*unwrap(SI))->getName(ret)) - report_fatal_error(ec.message()); - return ret.data(); + auto NameOrErr = (*unwrap(SI))->getName(); + if (!NameOrErr) + report_fatal_error(NameOrErr.takeError()); + return NameOrErr->data(); } uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 101f5dcc0821..e0e63a5a7d76 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -67,8 +67,10 @@ Error ObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const { uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; } bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const { - if (Expected NameOrErr = getSectionName(Sec)) + Expected NameOrErr = getSectionName(Sec); + if (NameOrErr) return *NameOrErr == ".llvmbc"; + consumeError(NameOrErr.takeError()); return false; } @@ -82,7 +84,8 @@ bool ObjectFile::isBerkeleyData(DataRefImpl Sec) const { return isSectionData(Sec); } -section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { +Expected +ObjectFile::getRelocatedSection(DataRefImpl Sec) const { return section_iterator(SectionRef(Sec, this)); } @@ -103,7 +106,7 @@ Triple ObjectFile::makeTriple() const { TheTriple.setObjectFormat(Triple::MachO); if (isCOFF()) { - const auto COFFObj = dyn_cast(this); + const auto COFFObj = cast(this); if (COFFObj->getArch() == Triple::thumb) TheTriple.setTriple("thumbv7-windows"); } @@ -127,6 +130,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { case file_magic::pdb: case file_magic::minidump: return errorCodeToError(object_error::invalid_file_type); + case file_magic::tapi_file: + return errorCodeToError(object_error::invalid_file_type); case file_magic::elf: case file_magic::elf_relocatable: case file_magic::elf_executable: diff --git a/lib/Object/RelocationResolver.cpp b/lib/Object/RelocationResolver.cpp index 0a243f32e12c..ca89f5671b8a 100644 --- a/lib/Object/RelocationResolver.cpp +++ b/lib/Object/RelocationResolver.cpp @@ -30,6 +30,7 @@ static bool supportsX86_64(uint64_t Type) { case ELF::R_X86_64_DTPOFF32: case ELF::R_X86_64_DTPOFF64: case ELF::R_X86_64_PC32: + case ELF::R_X86_64_PC64: case ELF::R_X86_64_32: case ELF::R_X86_64_32S: return true; @@ -47,6 +48,7 @@ static uint64_t resolveX86_64(RelocationRef R, uint64_t S, uint64_t A) { case ELF::R_X86_64_DTPOFF64: return S + getELFAddend(R); case ELF::R_X86_64_PC32: + case ELF::R_X86_64_PC64: return S + getELFAddend(R) - R.getOffset(); case ELF::R_X86_64_32: case ELF::R_X86_64_32S: @@ -90,9 +92,9 @@ static bool supportsBPF(uint64_t Type) { static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) { switch (R.getType()) { case ELF::R_BPF_64_32: - return S & 0xFFFFFFFF; + return (S + A) & 0xFFFFFFFF; case ELF::R_BPF_64_64: - return S; + return S + A; default: llvm_unreachable("Invalid relocation type"); } @@ -335,6 +337,8 @@ static bool supportsRISCV(uint64_t Type) { case ELF::R_RISCV_NONE: case ELF::R_RISCV_32: case ELF::R_RISCV_64: + case ELF::R_RISCV_SET6: + case ELF::R_RISCV_SUB6: case ELF::R_RISCV_ADD8: case ELF::R_RISCV_SUB8: case ELF::R_RISCV_ADD16: @@ -358,6 +362,10 @@ static uint64_t resolveRISCV(RelocationRef R, uint64_t S, uint64_t A) { return (S + RA) & 0xFFFFFFFF; case ELF::R_RISCV_64: return S + RA; + case ELF::R_RISCV_SET6: + return (A + (S + RA)) & 0xFF; + case ELF::R_RISCV_SUB6: + return (A - (S + RA)) & 0xFF; case ELF::R_RISCV_ADD8: return (A + (S + RA)) & 0xFF; case ELF::R_RISCV_SUB8: @@ -420,6 +428,47 @@ static uint64_t resolveCOFFX86_64(RelocationRef R, uint64_t S, uint64_t A) { } } +static bool supportsCOFFARM(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_ARM_SECREL: + case COFF::IMAGE_REL_ARM_ADDR32: + return true; + default: + return false; + } +} + +static uint64_t resolveCOFFARM(RelocationRef R, uint64_t S, uint64_t A) { + switch (R.getType()) { + case COFF::IMAGE_REL_ARM_SECREL: + case COFF::IMAGE_REL_ARM_ADDR32: + return (S + A) & 0xFFFFFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsCOFFARM64(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_ARM64_SECREL: + case COFF::IMAGE_REL_ARM64_ADDR64: + return true; + default: + return false; + } +} + +static uint64_t resolveCOFFARM64(RelocationRef R, uint64_t S, uint64_t A) { + switch (R.getType()) { + case COFF::IMAGE_REL_ARM64_SECREL: + return (S + A) & 0xFFFFFFFF; + case COFF::IMAGE_REL_ARM64_ADDR64: + return S + A; + default: + llvm_unreachable("Invalid relocation type"); + } +} + static bool supportsMachOX86_64(uint64_t Type) { return Type == MachO::X86_64_RELOC_UNSIGNED; } @@ -472,9 +521,19 @@ static uint64_t resolveWasm32(RelocationRef R, uint64_t S, uint64_t A) { std::pair getRelocationResolver(const ObjectFile &Obj) { if (Obj.isCOFF()) { - if (Obj.getBytesInAddress() == 8) + switch (Obj.getArch()) { + case Triple::x86_64: return {supportsCOFFX86_64, resolveCOFFX86_64}; - return {supportsCOFFX86, resolveCOFFX86}; + case Triple::x86: + return {supportsCOFFX86, resolveCOFFX86}; + case Triple::arm: + case Triple::thumb: + return {supportsCOFFARM, resolveCOFFARM}; + case Triple::aarch64: + return {supportsCOFFARM64, resolveCOFFARM64}; + default: + return {nullptr, nullptr}; + } } else if (Obj.isELF()) { if (Obj.getBytesInAddress() == 8) { switch (Obj.getArch()) { diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index 2b152b7d8da3..3db4ad9ed14b 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -53,6 +53,7 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, case file_magic::windows_resource: case file_magic::pdb: case file_magic::minidump: + case file_magic::tapi_file: return errorCodeToError(object_error::invalid_file_type); case file_magic::elf: case file_magic::elf_executable: diff --git a/lib/Object/TapiFile.cpp b/lib/Object/TapiFile.cpp new file mode 100644 index 000000000000..c409bd8e5995 --- /dev/null +++ b/lib/Object/TapiFile.cpp @@ -0,0 +1,104 @@ +//===- TapiFile.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Text-based Dynamcic Library Stub format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/TapiFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace MachO; +using namespace object; + +static constexpr StringLiteral ObjC1ClassNamePrefix = ".objc_class_name_"; +static constexpr StringLiteral ObjC2ClassNamePrefix = "_OBJC_CLASS_$_"; +static constexpr StringLiteral ObjC2MetaClassNamePrefix = "_OBJC_METACLASS_$_"; +static constexpr StringLiteral ObjC2EHTypePrefix = "_OBJC_EHTYPE_$_"; +static constexpr StringLiteral ObjC2IVarPrefix = "_OBJC_IVAR_$_"; + +static uint32_t getFlags(const Symbol *Sym) { + uint32_t Flags = BasicSymbolRef::SF_Global; + if (Sym->isUndefined()) + Flags |= BasicSymbolRef::SF_Undefined; + else + Flags |= BasicSymbolRef::SF_Exported; + + if (Sym->isWeakDefined() || Sym->isWeakReferenced()) + Flags |= BasicSymbolRef::SF_Weak; + + return Flags; +} + +TapiFile::TapiFile(MemoryBufferRef Source, const InterfaceFile &interface, + Architecture Arch) + : SymbolicFile(ID_TapiFile, Source) { + for (const auto *Symbol : interface.symbols()) { + if (!Symbol->getArchitectures().has(Arch)) + continue; + + switch (Symbol->getKind()) { + case SymbolKind::GlobalSymbol: + Symbols.emplace_back(StringRef(), Symbol->getName(), getFlags(Symbol)); + break; + case SymbolKind::ObjectiveCClass: + if (interface.getPlatforms().count(PlatformKind::macOS) && + Arch == AK_i386) { + Symbols.emplace_back(ObjC1ClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + } else { + Symbols.emplace_back(ObjC2ClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + Symbols.emplace_back(ObjC2MetaClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + } + break; + case SymbolKind::ObjectiveCClassEHType: + Symbols.emplace_back(ObjC2EHTypePrefix, Symbol->getName(), + getFlags(Symbol)); + break; + case SymbolKind::ObjectiveCInstanceVariable: + Symbols.emplace_back(ObjC2IVarPrefix, Symbol->getName(), + getFlags(Symbol)); + break; + } + } +} + +TapiFile::~TapiFile() = default; + +void TapiFile::moveSymbolNext(DataRefImpl &DRI) const { + const auto *Sym = reinterpret_cast(DRI.p); + DRI.p = reinterpret_cast(++Sym); +} + +Error TapiFile::printSymbolName(raw_ostream &OS, DataRefImpl DRI) const { + const auto *Sym = reinterpret_cast(DRI.p); + OS << Sym->Prefix << Sym->Name; + return Error::success(); +} + +uint32_t TapiFile::getSymbolFlags(DataRefImpl DRI) const { + const auto *Sym = reinterpret_cast(DRI.p); + return Sym->Flags; +} + +basic_symbol_iterator TapiFile::symbol_begin() const { + DataRefImpl DRI; + DRI.p = reinterpret_cast(&*Symbols.begin()); + return BasicSymbolRef{DRI, this}; +} + +basic_symbol_iterator TapiFile::symbol_end() const { + DataRefImpl DRI; + DRI.p = reinterpret_cast(&*Symbols.end()); + return BasicSymbolRef{DRI, this}; +} diff --git a/lib/Object/TapiUniversal.cpp b/lib/Object/TapiUniversal.cpp new file mode 100644 index 000000000000..b3273e345a61 --- /dev/null +++ b/lib/Object/TapiUniversal.cpp @@ -0,0 +1,54 @@ +//===- TapiUniversal.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Text-based Dynamic Library Stub format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/TapiUniversal.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TextAPI/MachO/TextAPIReader.h" + +using namespace llvm; +using namespace MachO; +using namespace object; + +TapiUniversal::TapiUniversal(MemoryBufferRef Source, Error &Err) + : Binary(ID_TapiUniversal, Source) { + auto Result = TextAPIReader::get(Source); + ErrorAsOutParameter ErrAsOuParam(&Err); + if (!Result) { + Err = Result.takeError(); + return; + } + ParsedFile = std::move(Result.get()); + + auto Archs = ParsedFile->getArchitectures(); + for (auto Arch : Archs) + Architectures.emplace_back(Arch); +} + +TapiUniversal::~TapiUniversal() = default; + +Expected> +TapiUniversal::ObjectForArch::getAsObjectFile() const { + return std::unique_ptr(new TapiFile(Parent->getMemoryBufferRef(), + *Parent->ParsedFile.get(), + Parent->Architectures[Index])); +} + +Expected> +TapiUniversal::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr Ret(new TapiUniversal(Source, Err)); + if (Err) + return std::move(Err); + return std::move(Ret); +} diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index 82aa1830dced..014b403556df 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -56,7 +56,7 @@ LLVM_DUMP_METHOD void WasmSymbol::dump() const { print(dbgs()); } Expected> ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { Error Err = Error::success(); - auto ObjectFile = llvm::make_unique(Buffer, Err); + auto ObjectFile = std::make_unique(Buffer, Err); if (Err) return std::move(Err); @@ -781,7 +781,7 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) { break; case wasm::R_WASM_GLOBAL_INDEX_LEB: // R_WASM_GLOBAL_INDEX_LEB are can be used against function and data - // symbols to refer to thier GOT enties. + // symbols to refer to their GOT entries. if (!isValidGlobalSymbol(Reloc.Index) && !isValidDataSymbol(Reloc.Index) && !isValidFunctionSymbol(Reloc.Index)) @@ -881,12 +881,9 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) { Sig.Params.push_back(wasm::ValType(ParamType)); } uint32_t ReturnCount = readVaruint32(Ctx); - if (ReturnCount) { - if (ReturnCount != 1) { - return make_error( - "Multiple return types not supported", object_error::parse_failed); - } - Sig.Returns.push_back(wasm::ValType(readUint8(Ctx))); + while (ReturnCount--) { + uint32_t ReturnType = readUint8(Ctx); + Sig.Returns.push_back(wasm::ValType(ReturnType)); } Signatures.push_back(std::move(Sig)); } diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index d76e1231684c..10717718b201 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -30,15 +30,24 @@ namespace object { if (auto EC = X) \ return EC; +#define UNWRAP_REF_OR_RETURN(Name, Expr) \ + auto Name##OrErr = Expr; \ + if (!Name##OrErr) \ + return Name##OrErr.takeError(); \ + const auto &Name = *Name##OrErr; + +#define UNWRAP_OR_RETURN(Name, Expr) \ + auto Name##OrErr = Expr; \ + if (!Name##OrErr) \ + return Name##OrErr.takeError(); \ + auto Name = *Name##OrErr; + const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); // COFF files seem to be inconsistent with alignment between sections, just use // 8-byte because it makes everyone happy. const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); -uint32_t WindowsResourceParser::TreeNode::StringCount = 0; -uint32_t WindowsResourceParser::TreeNode::DataCount = 0; - WindowsResource::WindowsResource(MemoryBufferRef Source) : Binary(Binary::ID_WinRes, Source) { size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE; @@ -128,7 +137,8 @@ Error ResourceEntryRef::loadNext() { return Error::success(); } -WindowsResourceParser::WindowsResourceParser() : Root(false) {} +WindowsResourceParser::WindowsResourceParser(bool MinGW) + : Root(false), MinGW(MinGW) {} void printResourceTypeName(uint16_t TypeID, raw_ostream &OS) { switch (TypeID) { @@ -200,6 +210,122 @@ static std::string makeDuplicateResourceError( return OS.str(); } +static void printStringOrID(const WindowsResourceParser::StringOrID &S, + raw_string_ostream &OS, bool IsType, bool IsID) { + if (S.IsString) { + std::string UTF8; + if (!convertUTF16LEToUTF8String(S.String, UTF8)) + UTF8 = "(failed conversion from UTF16)"; + OS << '\"' << UTF8 << '\"'; + } else if (IsType) + printResourceTypeName(S.ID, OS); + else if (IsID) + OS << "ID " << S.ID; + else + OS << S.ID; +} + +static std::string makeDuplicateResourceError( + const std::vector &Context, + StringRef File1, StringRef File2) { + std::string Ret; + raw_string_ostream OS(Ret); + + OS << "duplicate resource:"; + + if (Context.size() >= 1) { + OS << " type "; + printStringOrID(Context[0], OS, /* IsType */ true, /* IsID */ true); + } + + if (Context.size() >= 2) { + OS << "/name "; + printStringOrID(Context[1], OS, /* IsType */ false, /* IsID */ true); + } + + if (Context.size() >= 3) { + OS << "/language "; + printStringOrID(Context[2], OS, /* IsType */ false, /* IsID */ false); + } + OS << ", in " << File1 << " and in " << File2; + + return OS.str(); +} + +// MinGW specific. Remove default manifests (with language zero) if there are +// other manifests present, and report an error if there are more than one +// manifest with a non-zero language code. +// GCC has the concept of a default manifest resource object, which gets +// linked in implicitly if present. This default manifest has got language +// id zero, and should be dropped silently if there's another manifest present. +// If the user resources surprisignly had a manifest with language id zero, +// we should also ignore the duplicate default manifest. +void WindowsResourceParser::cleanUpManifests( + std::vector &Duplicates) { + auto TypeIt = Root.IDChildren.find(/* RT_MANIFEST */ 24); + if (TypeIt == Root.IDChildren.end()) + return; + + TreeNode *TypeNode = TypeIt->second.get(); + auto NameIt = + TypeNode->IDChildren.find(/* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1); + if (NameIt == TypeNode->IDChildren.end()) + return; + + TreeNode *NameNode = NameIt->second.get(); + if (NameNode->IDChildren.size() <= 1) + return; // None or one manifest present, all good. + + // If we have more than one manifest, drop the language zero one if present, + // and check again. + auto LangZeroIt = NameNode->IDChildren.find(0); + if (LangZeroIt != NameNode->IDChildren.end() && + LangZeroIt->second->IsDataNode) { + uint32_t RemovedIndex = LangZeroIt->second->DataIndex; + NameNode->IDChildren.erase(LangZeroIt); + Data.erase(Data.begin() + RemovedIndex); + Root.shiftDataIndexDown(RemovedIndex); + + // If we're now down to one manifest, all is good. + if (NameNode->IDChildren.size() <= 1) + return; + } + + // More than one non-language-zero manifest + auto FirstIt = NameNode->IDChildren.begin(); + uint32_t FirstLang = FirstIt->first; + TreeNode *FirstNode = FirstIt->second.get(); + auto LastIt = NameNode->IDChildren.rbegin(); + uint32_t LastLang = LastIt->first; + TreeNode *LastNode = LastIt->second.get(); + Duplicates.push_back( + ("duplicate non-default manifests with languages " + Twine(FirstLang) + + " in " + InputFilenames[FirstNode->Origin] + " and " + Twine(LastLang) + + " in " + InputFilenames[LastNode->Origin]) + .str()); +} + +// Ignore duplicates of manifests with language zero (the default manifest), +// in case the user has provided a manifest with that language id. See +// the function comment above for context. Only returns true if MinGW is set +// to true. +bool WindowsResourceParser::shouldIgnoreDuplicate( + const ResourceEntryRef &Entry) const { + return MinGW && !Entry.checkTypeString() && + Entry.getTypeID() == /* RT_MANIFEST */ 24 && + !Entry.checkNameString() && + Entry.getNameID() == /* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1 && + Entry.getLanguage() == 0; +} + +bool WindowsResourceParser::shouldIgnoreDuplicate( + const std::vector &Context) const { + return MinGW && Context.size() == 3 && !Context[0].IsString && + Context[0].ID == /* RT_MANIFEST */ 24 && !Context[1].IsString && + Context[1].ID == /* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1 && + !Context[2].IsString && Context[2].ID == 0; +} + Error WindowsResourceParser::parse(WindowsResource *WR, std::vector &Duplicates) { auto EntryOrErr = WR->getHeadEntry(); @@ -219,112 +345,176 @@ Error WindowsResourceParser::parse(WindowsResource *WR, } ResourceEntryRef Entry = EntryOrErr.get(); + uint32_t Origin = InputFilenames.size(); + InputFilenames.push_back(WR->getFileName()); bool End = false; while (!End) { - Data.push_back(Entry.getData()); - bool IsNewTypeString = false; - bool IsNewNameString = false; - - TreeNode* Node; - bool IsNewNode = Root.addEntry(Entry, InputFilenames.size(), - IsNewTypeString, IsNewNameString, Node); - InputFilenames.push_back(WR->getFileName()); + TreeNode *Node; + bool IsNewNode = Root.addEntry(Entry, Origin, Data, StringTable, Node); if (!IsNewNode) { - Duplicates.push_back(makeDuplicateResourceError( - Entry, InputFilenames[Node->Origin], WR->getFileName())); + if (!shouldIgnoreDuplicate(Entry)) + Duplicates.push_back(makeDuplicateResourceError( + Entry, InputFilenames[Node->Origin], WR->getFileName())); } - if (IsNewTypeString) - StringTable.push_back(Entry.getTypeString()); - - if (IsNewNameString) - StringTable.push_back(Entry.getNameString()); - RETURN_IF_ERROR(Entry.moveNext(End)); } return Error::success(); } +Error WindowsResourceParser::parse(ResourceSectionRef &RSR, StringRef Filename, + std::vector &Duplicates) { + UNWRAP_REF_OR_RETURN(BaseTable, RSR.getBaseTable()); + uint32_t Origin = InputFilenames.size(); + InputFilenames.push_back(Filename); + std::vector Context; + return addChildren(Root, RSR, BaseTable, Origin, Context, Duplicates); +} + void WindowsResourceParser::printTree(raw_ostream &OS) const { ScopedPrinter Writer(OS); Root.print(Writer, "Resource Tree"); } -bool WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry, - uint32_t Origin, - bool &IsNewTypeString, - bool &IsNewNameString, - TreeNode *&Result) { - TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString); - TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString); - return NameNode.addLanguageNode(Entry, Origin, Result); +bool WindowsResourceParser::TreeNode::addEntry( + const ResourceEntryRef &Entry, uint32_t Origin, + std::vector> &Data, + std::vector> &StringTable, TreeNode *&Result) { + TreeNode &TypeNode = addTypeNode(Entry, StringTable); + TreeNode &NameNode = TypeNode.addNameNode(Entry, StringTable); + return NameNode.addLanguageNode(Entry, Origin, Data, Result); } -WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) { - if (IsStringNode) - StringIndex = StringCount++; +Error WindowsResourceParser::addChildren(TreeNode &Node, + ResourceSectionRef &RSR, + const coff_resource_dir_table &Table, + uint32_t Origin, + std::vector &Context, + std::vector &Duplicates) { + + for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries; + i++) { + UNWRAP_REF_OR_RETURN(Entry, RSR.getTableEntry(Table, i)); + TreeNode *Child; + + if (Entry.Offset.isSubDir()) { + + // Create a new subdirectory and recurse + if (i < Table.NumberOfNameEntries) { + UNWRAP_OR_RETURN(NameString, RSR.getEntryNameString(Entry)); + Child = &Node.addNameChild(NameString, StringTable); + Context.push_back(StringOrID(NameString)); + } else { + Child = &Node.addIDChild(Entry.Identifier.ID); + Context.push_back(StringOrID(Entry.Identifier.ID)); + } + + UNWRAP_REF_OR_RETURN(NextTable, RSR.getEntrySubDir(Entry)); + Error E = + addChildren(*Child, RSR, NextTable, Origin, Context, Duplicates); + if (E) + return E; + Context.pop_back(); + + } else { + + // Data leaves are supposed to have a numeric ID as identifier (language). + if (Table.NumberOfNameEntries > 0) + return createStringError(object_error::parse_failed, + "unexpected string key for data object"); + + // Try adding a data leaf + UNWRAP_REF_OR_RETURN(DataEntry, RSR.getEntryData(Entry)); + TreeNode *Child; + Context.push_back(StringOrID(Entry.Identifier.ID)); + bool Added = Node.addDataChild(Entry.Identifier.ID, Table.MajorVersion, + Table.MinorVersion, Table.Characteristics, + Origin, Data.size(), Child); + if (Added) { + UNWRAP_OR_RETURN(Contents, RSR.getContents(DataEntry)); + Data.push_back(ArrayRef( + reinterpret_cast(Contents.data()), + Contents.size())); + } else { + if (!shouldIgnoreDuplicate(Context)) + Duplicates.push_back(makeDuplicateResourceError( + Context, InputFilenames[Child->Origin], InputFilenames.back())); + } + Context.pop_back(); + + } + } + return Error::success(); } +WindowsResourceParser::TreeNode::TreeNode(uint32_t StringIndex) + : StringIndex(StringIndex) {} + WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, uint16_t MinorVersion, uint32_t Characteristics, - uint32_t Origin) - : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion), - Characteristics(Characteristics), Origin(Origin) { - DataIndex = DataCount++; -} + uint32_t Origin, uint32_t DataIndex) + : IsDataNode(true), DataIndex(DataIndex), MajorVersion(MajorVersion), + MinorVersion(MinorVersion), Characteristics(Characteristics), + Origin(Origin) {} std::unique_ptr -WindowsResourceParser::TreeNode::createStringNode() { - return std::unique_ptr(new TreeNode(true)); +WindowsResourceParser::TreeNode::createStringNode(uint32_t Index) { + return std::unique_ptr(new TreeNode(Index)); } std::unique_ptr WindowsResourceParser::TreeNode::createIDNode() { - return std::unique_ptr(new TreeNode(false)); + return std::unique_ptr(new TreeNode(0)); } std::unique_ptr WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, uint16_t MinorVersion, uint32_t Characteristics, - uint32_t Origin) { - return std::unique_ptr( - new TreeNode(MajorVersion, MinorVersion, Characteristics, Origin)); + uint32_t Origin, + uint32_t DataIndex) { + return std::unique_ptr(new TreeNode( + MajorVersion, MinorVersion, Characteristics, Origin, DataIndex)); } -WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry, - bool &IsNewTypeString) { +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addTypeNode( + const ResourceEntryRef &Entry, + std::vector> &StringTable) { if (Entry.checkTypeString()) - return addNameChild(Entry.getTypeString(), IsNewTypeString); + return addNameChild(Entry.getTypeString(), StringTable); else return addIDChild(Entry.getTypeID()); } -WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry, - bool &IsNewNameString) { +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addNameNode( + const ResourceEntryRef &Entry, + std::vector> &StringTable) { if (Entry.checkNameString()) - return addNameChild(Entry.getNameString(), IsNewNameString); + return addNameChild(Entry.getNameString(), StringTable); else return addIDChild(Entry.getNameID()); } bool WindowsResourceParser::TreeNode::addLanguageNode( - const ResourceEntryRef &Entry, uint32_t Origin, TreeNode *&Result) { - return addDataChild(Entry.getLanguage(), Entry.getMajorVersion(), - Entry.getMinorVersion(), Entry.getCharacteristics(), - Origin, Result); + const ResourceEntryRef &Entry, uint32_t Origin, + std::vector> &Data, TreeNode *&Result) { + bool Added = addDataChild(Entry.getLanguage(), Entry.getMajorVersion(), + Entry.getMinorVersion(), Entry.getCharacteristics(), + Origin, Data.size(), Result); + if (Added) + Data.push_back(Entry.getData()); + return Added; } bool WindowsResourceParser::TreeNode::addDataChild( uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion, - uint32_t Characteristics, uint32_t Origin, TreeNode *&Result) { - auto NewChild = - createDataNode(MajorVersion, MinorVersion, Characteristics, Origin); + uint32_t Characteristics, uint32_t Origin, uint32_t DataIndex, + TreeNode *&Result) { + auto NewChild = createDataNode(MajorVersion, MinorVersion, Characteristics, + Origin, DataIndex); auto ElementInserted = IDChildren.emplace(ID, std::move(NewChild)); Result = ElementInserted.first->second.get(); return ElementInserted.second; @@ -342,16 +532,15 @@ WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addIDChild( return *(Child->second); } -WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addNameChild(ArrayRef NameRef, - bool &IsNewString) { +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addNameChild( + ArrayRef NameRef, std::vector> &StringTable) { std::string NameString; convertUTF16LEToUTF8String(NameRef, NameString); auto Child = StringChildren.find(NameString); if (Child == StringChildren.end()) { - auto NewChild = createStringNode(); - IsNewString = true; + auto NewChild = createStringNode(StringTable.size()); + StringTable.push_back(NameRef); WindowsResourceParser::TreeNode &Node = *NewChild; StringChildren.emplace(NameString, std::move(NewChild)); return Node; @@ -396,6 +585,19 @@ uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { return Size; } +// Shift DataIndex of all data children with an Index greater or equal to the +// given one, to fill a gap from removing an entry from the Data vector. +void WindowsResourceParser::TreeNode::shiftDataIndexDown(uint32_t Index) { + if (IsDataNode && DataIndex >= Index) { + DataIndex--; + } else { + for (auto &Child : IDChildren) + Child.second->shiftDataIndexDown(Index); + for (auto &Child : StringChildren) + Child.second->shiftDataIndexDown(Index); + } +} + class WindowsResourceCOFFWriter { public: WindowsResourceCOFFWriter(COFF::MachineTypes MachineType, @@ -515,6 +717,14 @@ WindowsResourceCOFFWriter::write(uint32_t TimeDateStamp) { return std::move(OutputBuffer); } +// According to COFF specification, if the Src has a size equal to Dest, +// it's okay to *not* copy the trailing zero. +static void coffnamecpy(char (&Dest)[COFF::NameSize], StringRef Src) { + assert(Src.size() <= COFF::NameSize && + "Src is not larger than COFF::NameSize"); + strncpy(Dest, Src.data(), (size_t)COFF::NameSize); +} + void WindowsResourceCOFFWriter::writeCOFFHeader(uint32_t TimeDateStamp) { // Write the COFF header. auto *Header = reinterpret_cast(BufferStart); @@ -534,7 +744,7 @@ void WindowsResourceCOFFWriter::writeFirstSectionHeader() { CurrentOffset += sizeof(coff_file_header); auto *SectionOneHeader = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize); + coffnamecpy(SectionOneHeader->Name, ".rsrc$01"); SectionOneHeader->VirtualSize = 0; SectionOneHeader->VirtualAddress = 0; SectionOneHeader->SizeOfRawData = SectionOneSize; @@ -552,7 +762,7 @@ void WindowsResourceCOFFWriter::writeSecondSectionHeader() { CurrentOffset += sizeof(coff_section); auto *SectionTwoHeader = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize); + coffnamecpy(SectionTwoHeader->Name, ".rsrc$02"); SectionTwoHeader->VirtualSize = 0; SectionTwoHeader->VirtualAddress = 0; SectionTwoHeader->SizeOfRawData = SectionTwoSize; @@ -590,7 +800,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write the symbol table. // First, the feat symbol. auto *Symbol = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize); + coffnamecpy(Symbol->Name.ShortName, "@feat.00"); Symbol->Value = 0x11; Symbol->SectionNumber = 0xffff; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; @@ -600,7 +810,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write the .rsrc1 symbol + aux. Symbol = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize); + coffnamecpy(Symbol->Name.ShortName, ".rsrc$01"); Symbol->Value = 0; Symbol->SectionNumber = 1; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; @@ -619,7 +829,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write the .rsrc2 symbol + aux. Symbol = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize); + coffnamecpy(Symbol->Name.ShortName, ".rsrc$02"); Symbol->Value = 0; Symbol->SectionNumber = 2; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; @@ -640,7 +850,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { for (unsigned i = 0; i < Data.size(); i++) { auto RelocationName = formatv("$R{0:X-6}", i & 0xffffff).sstr(); Symbol = reinterpret_cast(BufferStart + CurrentOffset); - memcpy(Symbol->Name.ShortName, RelocationName.data(), (size_t) COFF::NameSize); + coffnamecpy(Symbol->Name.ShortName, RelocationName); Symbol->Value = DataOffsets[i]; Symbol->SectionNumber = 2; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; diff --git a/lib/Object/XCOFFObjectFile.cpp b/lib/Object/XCOFFObjectFile.cpp index 602b7357986a..98782c2701c1 100644 --- a/lib/Object/XCOFFObjectFile.cpp +++ b/lib/Object/XCOFFObjectFile.cpp @@ -11,17 +11,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/XCOFFObjectFile.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include #include namespace llvm { namespace object { +enum { FUNCTION_SYM = 0x20, SYM_TYPE_MASK = 0x07, RELOC_OVERFLOW = 65535 }; + // Checks that [Ptr, Ptr + Size) bytes fall inside the memory buffer // 'M'. Returns a pointer to the underlying object on success. template @@ -42,10 +39,25 @@ template static const T *viewAs(uintptr_t in) { return reinterpret_cast(in); } -static StringRef generateStringRef(const char *Name, uint64_t Size) { - auto NulCharPtr = static_cast(memchr(Name, '\0', Size)); +static StringRef generateXCOFFFixedNameStringRef(const char *Name) { + auto NulCharPtr = + static_cast(memchr(Name, '\0', XCOFF::NameSize)); return NulCharPtr ? StringRef(Name, NulCharPtr - Name) - : StringRef(Name, Size); + : StringRef(Name, XCOFF::NameSize); +} + +bool XCOFFRelocation32::isRelocationSigned() const { + return Info & XR_SIGN_INDICATOR_MASK; +} + +bool XCOFFRelocation32::isFixupIndicated() const { + return Info & XR_FIXUP_INDICATOR_MASK; +} + +uint8_t XCOFFRelocation32::getRelocatedLength() const { + // The relocation encodes the bit length being relocated minus 1. Add back + // the 1 to get the actual length being relocated. + return (Info & XR_BIASED_LENGTH_MASK) + 1; } void XCOFFObjectFile::checkSectionAddress(uintptr_t Addr, @@ -83,6 +95,9 @@ XCOFFObjectFile::toSection64(DataRefImpl Ref) const { const XCOFFSymbolEntry *XCOFFObjectFile::toSymbolEntry(DataRefImpl Ref) const { assert(!is64Bit() && "Symbol table support not implemented for 64-bit."); assert(Ref.p != 0 && "Symbol table pointer can not be nullptr!"); +#ifndef NDEBUG + checkSymbolEntryPointer(Ref.p); +#endif auto SymEntPtr = viewAs(Ref.p); return SymEntPtr; } @@ -112,23 +127,19 @@ XCOFFObjectFile::sectionHeaderTable64() const { void XCOFFObjectFile::moveSymbolNext(DataRefImpl &Symb) const { const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb); SymEntPtr += SymEntPtr->NumberOfAuxEntries + 1; +#ifndef NDEBUG + // This function is used by basic_symbol_iterator, which allows to + // point to the end-of-symbol-table address. + if (reinterpret_cast(SymEntPtr) != getEndOfSymbolTableAddress()) + checkSymbolEntryPointer(reinterpret_cast(SymEntPtr)); +#endif Symb.p = reinterpret_cast(SymEntPtr); } -Expected XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const { - const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb); - - if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC) - return generateStringRef(SymEntPtr->SymbolName, XCOFF::SymbolNameSize); - - // A storage class value with the high-order bit on indicates that the name is - // a symbolic debugger stabstring. - if (SymEntPtr->StorageClass & 0x80) - return StringRef("Unimplemented Debug Name"); - - uint32_t Offset = SymEntPtr->NameInStrTbl.Offset; - // The byte offset is relative to the start of the string table - // or .debug section. A byte offset value of 0 is a null or zero-length symbol +Expected +XCOFFObjectFile::getStringTableEntry(uint32_t Offset) const { + // The byte offset is relative to the start of the string table. + // A byte offset value of 0 is a null or zero-length symbol // name. A byte offset in the range 1 to 3 (inclusive) points into the length // field; as a soft-error recovery mechanism, we treat such cases as having an // offset of 0. @@ -138,10 +149,32 @@ Expected XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const { if (StringTable.Data != nullptr && StringTable.Size > Offset) return (StringTable.Data + Offset); - return make_error("Symbol Name parse failed", + return make_error("Bad offset for string table entry", object_error::parse_failed); } +Expected +XCOFFObjectFile::getCFileName(const XCOFFFileAuxEnt *CFileEntPtr) const { + if (CFileEntPtr->NameInStrTbl.Magic != + XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC) + return generateXCOFFFixedNameStringRef(CFileEntPtr->Name); + return getStringTableEntry(CFileEntPtr->NameInStrTbl.Offset); +} + +Expected XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const { + const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb); + + // A storage class value with the high-order bit on indicates that the name is + // a symbolic debugger stabstring. + if (SymEntPtr->StorageClass & 0x80) + return StringRef("Unimplemented Debug Name"); + + if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC) + return generateXCOFFFixedNameStringRef(SymEntPtr->SymbolName); + + return getStringTableEntry(SymEntPtr->NameInStrTbl.Offset); +} + Expected XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const { uint64_t Result = 0; llvm_unreachable("Not yet implemented!"); @@ -149,6 +182,7 @@ Expected XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const { } uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { + assert(!is64Bit() && "Symbol table support not implemented for 64-bit."); return toSymbolEntry(Symb)->Value; } @@ -185,7 +219,7 @@ void XCOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const { } Expected XCOFFObjectFile::getSectionName(DataRefImpl Sec) const { - return generateStringRef(getSectionNameInternal(Sec), XCOFF::SectionNameSize); + return generateXCOFFFixedNameStringRef(getSectionNameInternal(Sec)); } uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const { @@ -393,8 +427,8 @@ XCOFFObjectFile::getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const { default: Expected SecRef = getSectionByNum(SectionNum); if (SecRef) - return generateStringRef(getSectionNameInternal(SecRef.get()), - XCOFF::SectionNameSize); + return generateXCOFFFixedNameStringRef( + getSectionNameInternal(SecRef.get())); return SecRef.takeError(); } } @@ -442,6 +476,48 @@ uint32_t XCOFFObjectFile::getNumberOfSymbolTableEntries64() const { return fileHeader64()->NumberOfSymTableEntries; } +uintptr_t XCOFFObjectFile::getEndOfSymbolTableAddress() const { + uint32_t NumberOfSymTableEntries = + is64Bit() ? getNumberOfSymbolTableEntries64() + : getLogicalNumberOfSymbolTableEntries32(); + return getWithOffset(reinterpret_cast(SymbolTblPtr), + XCOFF::SymbolTableEntrySize * NumberOfSymTableEntries); +} + +void XCOFFObjectFile::checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const { + if (SymbolEntPtr < reinterpret_cast(SymbolTblPtr)) + report_fatal_error("Symbol table entry is outside of symbol table."); + + if (SymbolEntPtr >= getEndOfSymbolTableAddress()) + report_fatal_error("Symbol table entry is outside of symbol table."); + + ptrdiff_t Offset = reinterpret_cast(SymbolEntPtr) - + reinterpret_cast(SymbolTblPtr); + + if (Offset % XCOFF::SymbolTableEntrySize != 0) + report_fatal_error( + "Symbol table entry position is not valid inside of symbol table."); +} + +uint32_t XCOFFObjectFile::getSymbolIndex(uintptr_t SymbolEntPtr) const { + return (reinterpret_cast(SymbolEntPtr) - + reinterpret_cast(SymbolTblPtr)) / + XCOFF::SymbolTableEntrySize; +} + +Expected +XCOFFObjectFile::getSymbolNameByIndex(uint32_t Index) const { + if (is64Bit()) + report_fatal_error("64-bit symbol table support not implemented yet."); + + if (Index >= getLogicalNumberOfSymbolTableEntries32()) + return errorCodeToError(object_error::invalid_symbol_index); + + DataRefImpl SymDRI; + SymDRI.p = reinterpret_cast(getPointerToSymbolTable() + Index); + return getSymbolName(SymDRI); +} + uint16_t XCOFFObjectFile::getFlags() const { return is64Bit() ? fileHeader64()->Flags : fileHeader32()->Flags; } @@ -477,6 +553,46 @@ ArrayRef XCOFFObjectFile::sections32() const { TablePtr + getNumberOfSections()); } +// In an XCOFF32 file, when the field value is 65535, then an STYP_OVRFLO +// section header contains the actual count of relocation entries in the s_paddr +// field. STYP_OVRFLO headers contain the section index of their corresponding +// sections as their raw "NumberOfRelocations" field value. +Expected XCOFFObjectFile::getLogicalNumberOfRelocationEntries( + const XCOFFSectionHeader32 &Sec) const { + + uint16_t SectionIndex = &Sec - sectionHeaderTable32() + 1; + + if (Sec.NumberOfRelocations < RELOC_OVERFLOW) + return Sec.NumberOfRelocations; + for (const auto &Sec : sections32()) { + if (Sec.Flags == XCOFF::STYP_OVRFLO && + Sec.NumberOfRelocations == SectionIndex) + return Sec.PhysicalAddress; + } + return errorCodeToError(object_error::parse_failed); +} + +Expected> +XCOFFObjectFile::relocations(const XCOFFSectionHeader32 &Sec) const { + uintptr_t RelocAddr = getWithOffset(reinterpret_cast(FileHeader), + Sec.FileOffsetToRelocationInfo); + auto NumRelocEntriesOrErr = getLogicalNumberOfRelocationEntries(Sec); + if (Error E = NumRelocEntriesOrErr.takeError()) + return std::move(E); + + uint32_t NumRelocEntries = NumRelocEntriesOrErr.get(); + + auto RelocationOrErr = + getObject(Data, reinterpret_cast(RelocAddr), + NumRelocEntries * sizeof(XCOFFRelocation32)); + if (Error E = RelocationOrErr.takeError()) + return std::move(E); + + const XCOFFRelocation32 *StartReloc = RelocationOrErr.get(); + + return ArrayRef(StartReloc, StartReloc + NumRelocEntries); +} + Expected XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) { // If there is a string table, then the buffer must contain at least 4 bytes @@ -507,7 +623,7 @@ XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) { Expected> XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) { - // Can't use make_unique because of the private constructor. + // Can't use std::make_unique because of the private constructor. std::unique_ptr Obj; Obj.reset(new XCOFFObjectFile(Type, MBR)); @@ -573,11 +689,77 @@ ObjectFile::createXCOFFObjectFile(MemoryBufferRef MemBufRef, } StringRef XCOFFSectionHeader32::getName() const { - return generateStringRef(Name, XCOFF::SectionNameSize); + return generateXCOFFFixedNameStringRef(Name); } StringRef XCOFFSectionHeader64::getName() const { - return generateStringRef(Name, XCOFF::SectionNameSize); + return generateXCOFFFixedNameStringRef(Name); +} + +XCOFF::StorageClass XCOFFSymbolRef::getStorageClass() const { + return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->StorageClass; +} + +uint8_t XCOFFSymbolRef::getNumberOfAuxEntries() const { + return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->NumberOfAuxEntries; +} + +const XCOFFCsectAuxEnt32 *XCOFFSymbolRef::getXCOFFCsectAuxEnt32() const { + assert(!OwningObjectPtr->is64Bit() && + "32-bit interface called on 64-bit object file."); + assert(hasCsectAuxEnt() && "No Csect Auxiliary Entry is found."); + + // In XCOFF32, the csect auxilliary entry is always the last auxiliary + // entry for the symbol. + uintptr_t AuxAddr = getWithOffset( + SymEntDataRef.p, XCOFF::SymbolTableEntrySize * getNumberOfAuxEntries()); + +#ifndef NDEBUG + OwningObjectPtr->checkSymbolEntryPointer(AuxAddr); +#endif + + return reinterpret_cast(AuxAddr); +} + +uint16_t XCOFFSymbolRef::getType() const { + return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->SymbolType; +} + +int16_t XCOFFSymbolRef::getSectionNumber() const { + return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->SectionNumber; +} + +bool XCOFFSymbolRef::hasCsectAuxEnt() const { + XCOFF::StorageClass SC = getStorageClass(); + return (SC == XCOFF::C_EXT || SC == XCOFF::C_WEAKEXT || + SC == XCOFF::C_HIDEXT); +} + +bool XCOFFSymbolRef::isFunction() const { + if (OwningObjectPtr->is64Bit()) + report_fatal_error("64-bit support is unimplemented yet."); + + if (getType() & FUNCTION_SYM) + return true; + + if (!hasCsectAuxEnt()) + return false; + + const XCOFFCsectAuxEnt32 *CsectAuxEnt = getXCOFFCsectAuxEnt32(); + + // A function definition should be a label definition. + if ((CsectAuxEnt->SymbolAlignmentAndType & SYM_TYPE_MASK) != XCOFF::XTY_LD) + return false; + + if (CsectAuxEnt->StorageMappingClass != XCOFF::XMC_PR) + return false; + + int16_t SectNum = getSectionNumber(); + Expected SI = OwningObjectPtr->getSectionByNum(SectNum); + if (!SI) + return false; + + return (OwningObjectPtr->getSectionFlags(SI.get()) & XCOFF::STYP_TEXT); } } // namespace object diff --git a/lib/ObjectYAML/COFFEmitter.cpp b/lib/ObjectYAML/COFFEmitter.cpp new file mode 100644 index 000000000000..efcdc51e1670 --- /dev/null +++ b/lib/ObjectYAML/COFFEmitter.cpp @@ -0,0 +1,622 @@ +//===- yaml2coff - Convert YAML to a COFF object file ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The COFF component of yaml2obj. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/Object/COFF.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +namespace { + +/// This parses a yaml stream that represents a COFF object file. +/// See docs/yaml2obj for the yaml scheema. +struct COFFParser { + COFFParser(COFFYAML::Object &Obj, yaml::ErrorHandler EH) + : Obj(Obj), SectionTableStart(0), SectionTableSize(0), ErrHandler(EH) { + // A COFF string table always starts with a 4 byte size field. Offsets into + // it include this size, so allocate it now. + StringTable.append(4, char(0)); + } + + bool useBigObj() const { + return static_cast(Obj.Sections.size()) > + COFF::MaxNumberOfSections16; + } + + bool isPE() const { return Obj.OptionalHeader.hasValue(); } + bool is64Bit() const { + return Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 || + Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64; + } + + uint32_t getFileAlignment() const { + return Obj.OptionalHeader->Header.FileAlignment; + } + + unsigned getHeaderSize() const { + return useBigObj() ? COFF::Header32Size : COFF::Header16Size; + } + + unsigned getSymbolSize() const { + return useBigObj() ? COFF::Symbol32Size : COFF::Symbol16Size; + } + + bool parseSections() { + for (std::vector::iterator i = Obj.Sections.begin(), + e = Obj.Sections.end(); + i != e; ++i) { + COFFYAML::Section &Sec = *i; + + // If the name is less than 8 bytes, store it in place, otherwise + // store it in the string table. + StringRef Name = Sec.Name; + + if (Name.size() <= COFF::NameSize) { + std::copy(Name.begin(), Name.end(), Sec.Header.Name); + } else { + // Add string to the string table and format the index for output. + unsigned Index = getStringIndex(Name); + std::string str = utostr(Index); + if (str.size() > 7) { + ErrHandler("string table got too large"); + return false; + } + Sec.Header.Name[0] = '/'; + std::copy(str.begin(), str.end(), Sec.Header.Name + 1); + } + + if (Sec.Alignment) { + if (Sec.Alignment > 8192) { + ErrHandler("section alignment is too large"); + return false; + } + if (!isPowerOf2_32(Sec.Alignment)) { + ErrHandler("section alignment is not a power of 2"); + return false; + } + Sec.Header.Characteristics |= (Log2_32(Sec.Alignment) + 1) << 20; + } + } + return true; + } + + bool parseSymbols() { + for (std::vector::iterator i = Obj.Symbols.begin(), + e = Obj.Symbols.end(); + i != e; ++i) { + COFFYAML::Symbol &Sym = *i; + + // If the name is less than 8 bytes, store it in place, otherwise + // store it in the string table. + StringRef Name = Sym.Name; + if (Name.size() <= COFF::NameSize) { + std::copy(Name.begin(), Name.end(), Sym.Header.Name); + } else { + // Add string to the string table and format the index for output. + unsigned Index = getStringIndex(Name); + *reinterpret_cast(Sym.Header.Name + 4) = + Index; + } + + Sym.Header.Type = Sym.SimpleType; + Sym.Header.Type |= Sym.ComplexType << COFF::SCT_COMPLEX_TYPE_SHIFT; + } + return true; + } + + bool parse() { + if (!parseSections()) + return false; + if (!parseSymbols()) + return false; + return true; + } + + unsigned getStringIndex(StringRef Str) { + StringMap::iterator i = StringTableMap.find(Str); + if (i == StringTableMap.end()) { + unsigned Index = StringTable.size(); + StringTable.append(Str.begin(), Str.end()); + StringTable.push_back(0); + StringTableMap[Str] = Index; + return Index; + } + return i->second; + } + + COFFYAML::Object &Obj; + + codeview::StringsAndChecksums StringsAndChecksums; + BumpPtrAllocator Allocator; + StringMap StringTableMap; + std::string StringTable; + uint32_t SectionTableStart; + uint32_t SectionTableSize; + + yaml::ErrorHandler ErrHandler; +}; + +enum { DOSStubSize = 128 }; + +} // end anonymous namespace + +// Take a CP and assign addresses and sizes to everything. Returns false if the +// layout is not valid to do. +static bool layoutOptionalHeader(COFFParser &CP) { + if (!CP.isPE()) + return true; + unsigned PEHeaderSize = CP.is64Bit() ? sizeof(object::pe32plus_header) + : sizeof(object::pe32_header); + CP.Obj.Header.SizeOfOptionalHeader = + PEHeaderSize + + sizeof(object::data_directory) * (COFF::NUM_DATA_DIRECTORIES + 1); + return true; +} + +static yaml::BinaryRef +toDebugS(ArrayRef Subsections, + const codeview::StringsAndChecksums &SC, BumpPtrAllocator &Allocator) { + using namespace codeview; + ExitOnError Err("Error occurred writing .debug$S section"); + auto CVSS = + Err(CodeViewYAML::toCodeViewSubsectionList(Allocator, Subsections, SC)); + + std::vector Builders; + uint32_t Size = sizeof(uint32_t); + for (auto &SS : CVSS) { + DebugSubsectionRecordBuilder B(SS, CodeViewContainer::ObjectFile); + Size += B.calculateSerializedLength(); + Builders.push_back(std::move(B)); + } + uint8_t *Buffer = Allocator.Allocate(Size); + MutableArrayRef Output(Buffer, Size); + BinaryStreamWriter Writer(Output, support::little); + + Err(Writer.writeInteger(COFF::DEBUG_SECTION_MAGIC)); + for (const auto &B : Builders) { + Err(B.commit(Writer)); + } + return {Output}; +} + +// Take a CP and assign addresses and sizes to everything. Returns false if the +// layout is not valid to do. +static bool layoutCOFF(COFFParser &CP) { + // The section table starts immediately after the header, including the + // optional header. + CP.SectionTableStart = + CP.getHeaderSize() + CP.Obj.Header.SizeOfOptionalHeader; + if (CP.isPE()) + CP.SectionTableStart += DOSStubSize + sizeof(COFF::PEMagic); + CP.SectionTableSize = COFF::SectionSize * CP.Obj.Sections.size(); + + uint32_t CurrentSectionDataOffset = + CP.SectionTableStart + CP.SectionTableSize; + + for (COFFYAML::Section &S : CP.Obj.Sections) { + // We support specifying exactly one of SectionData or Subsections. So if + // there is already some SectionData, then we don't need to do any of this. + if (S.Name == ".debug$S" && S.SectionData.binary_size() == 0) { + CodeViewYAML::initializeStringsAndChecksums(S.DebugS, + CP.StringsAndChecksums); + if (CP.StringsAndChecksums.hasChecksums() && + CP.StringsAndChecksums.hasStrings()) + break; + } + } + + // Assign each section data address consecutively. + for (COFFYAML::Section &S : CP.Obj.Sections) { + if (S.Name == ".debug$S") { + if (S.SectionData.binary_size() == 0) { + assert(CP.StringsAndChecksums.hasStrings() && + "Object file does not have debug string table!"); + + S.SectionData = + toDebugS(S.DebugS, CP.StringsAndChecksums, CP.Allocator); + } + } else if (S.Name == ".debug$T") { + if (S.SectionData.binary_size() == 0) + S.SectionData = CodeViewYAML::toDebugT(S.DebugT, CP.Allocator, S.Name); + } else if (S.Name == ".debug$P") { + if (S.SectionData.binary_size() == 0) + S.SectionData = CodeViewYAML::toDebugT(S.DebugP, CP.Allocator, S.Name); + } else if (S.Name == ".debug$H") { + if (S.DebugH.hasValue() && S.SectionData.binary_size() == 0) + S.SectionData = CodeViewYAML::toDebugH(*S.DebugH, CP.Allocator); + } + + if (S.SectionData.binary_size() > 0) { + CurrentSectionDataOffset = alignTo(CurrentSectionDataOffset, + CP.isPE() ? CP.getFileAlignment() : 4); + S.Header.SizeOfRawData = S.SectionData.binary_size(); + if (CP.isPE()) + S.Header.SizeOfRawData = + alignTo(S.Header.SizeOfRawData, CP.getFileAlignment()); + S.Header.PointerToRawData = CurrentSectionDataOffset; + CurrentSectionDataOffset += S.Header.SizeOfRawData; + if (!S.Relocations.empty()) { + S.Header.PointerToRelocations = CurrentSectionDataOffset; + S.Header.NumberOfRelocations = S.Relocations.size(); + CurrentSectionDataOffset += + S.Header.NumberOfRelocations * COFF::RelocationSize; + } + } else { + // Leave SizeOfRawData unaltered. For .bss sections in object files, it + // carries the section size. + S.Header.PointerToRawData = 0; + } + } + + uint32_t SymbolTableStart = CurrentSectionDataOffset; + + // Calculate number of symbols. + uint32_t NumberOfSymbols = 0; + for (std::vector::iterator i = CP.Obj.Symbols.begin(), + e = CP.Obj.Symbols.end(); + i != e; ++i) { + uint32_t NumberOfAuxSymbols = 0; + if (i->FunctionDefinition) + NumberOfAuxSymbols += 1; + if (i->bfAndefSymbol) + NumberOfAuxSymbols += 1; + if (i->WeakExternal) + NumberOfAuxSymbols += 1; + if (!i->File.empty()) + NumberOfAuxSymbols += + (i->File.size() + CP.getSymbolSize() - 1) / CP.getSymbolSize(); + if (i->SectionDefinition) + NumberOfAuxSymbols += 1; + if (i->CLRToken) + NumberOfAuxSymbols += 1; + i->Header.NumberOfAuxSymbols = NumberOfAuxSymbols; + NumberOfSymbols += 1 + NumberOfAuxSymbols; + } + + // Store all the allocated start addresses in the header. + CP.Obj.Header.NumberOfSections = CP.Obj.Sections.size(); + CP.Obj.Header.NumberOfSymbols = NumberOfSymbols; + if (NumberOfSymbols > 0 || CP.StringTable.size() > 4) + CP.Obj.Header.PointerToSymbolTable = SymbolTableStart; + else + CP.Obj.Header.PointerToSymbolTable = 0; + + *reinterpret_cast(&CP.StringTable[0]) = + CP.StringTable.size(); + + return true; +} + +template struct binary_le_impl { + value_type Value; + binary_le_impl(value_type V) : Value(V) {} +}; + +template +raw_ostream &operator<<(raw_ostream &OS, + const binary_le_impl &BLE) { + char Buffer[sizeof(BLE.Value)]; + support::endian::write( + Buffer, BLE.Value); + OS.write(Buffer, sizeof(BLE.Value)); + return OS; +} + +template +binary_le_impl binary_le(value_type V) { + return binary_le_impl(V); +} + +template struct zeros_impl {}; + +template +raw_ostream &operator<<(raw_ostream &OS, const zeros_impl &) { + char Buffer[NumBytes]; + memset(Buffer, 0, sizeof(Buffer)); + OS.write(Buffer, sizeof(Buffer)); + return OS; +} + +template zeros_impl zeros(const T &) { + return zeros_impl(); +} + +template +static uint32_t initializeOptionalHeader(COFFParser &CP, uint16_t Magic, + T Header) { + memset(Header, 0, sizeof(*Header)); + Header->Magic = Magic; + Header->SectionAlignment = CP.Obj.OptionalHeader->Header.SectionAlignment; + Header->FileAlignment = CP.Obj.OptionalHeader->Header.FileAlignment; + uint32_t SizeOfCode = 0, SizeOfInitializedData = 0, + SizeOfUninitializedData = 0; + uint32_t SizeOfHeaders = alignTo(CP.SectionTableStart + CP.SectionTableSize, + Header->FileAlignment); + uint32_t SizeOfImage = alignTo(SizeOfHeaders, Header->SectionAlignment); + uint32_t BaseOfData = 0; + for (const COFFYAML::Section &S : CP.Obj.Sections) { + if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_CODE) + SizeOfCode += S.Header.SizeOfRawData; + if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + SizeOfInitializedData += S.Header.SizeOfRawData; + if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + SizeOfUninitializedData += S.Header.SizeOfRawData; + if (S.Name.equals(".text")) + Header->BaseOfCode = S.Header.VirtualAddress; // RVA + else if (S.Name.equals(".data")) + BaseOfData = S.Header.VirtualAddress; // RVA + if (S.Header.VirtualAddress) + SizeOfImage += alignTo(S.Header.VirtualSize, Header->SectionAlignment); + } + Header->SizeOfCode = SizeOfCode; + Header->SizeOfInitializedData = SizeOfInitializedData; + Header->SizeOfUninitializedData = SizeOfUninitializedData; + Header->AddressOfEntryPoint = + CP.Obj.OptionalHeader->Header.AddressOfEntryPoint; // RVA + Header->ImageBase = CP.Obj.OptionalHeader->Header.ImageBase; + Header->MajorOperatingSystemVersion = + CP.Obj.OptionalHeader->Header.MajorOperatingSystemVersion; + Header->MinorOperatingSystemVersion = + CP.Obj.OptionalHeader->Header.MinorOperatingSystemVersion; + Header->MajorImageVersion = CP.Obj.OptionalHeader->Header.MajorImageVersion; + Header->MinorImageVersion = CP.Obj.OptionalHeader->Header.MinorImageVersion; + Header->MajorSubsystemVersion = + CP.Obj.OptionalHeader->Header.MajorSubsystemVersion; + Header->MinorSubsystemVersion = + CP.Obj.OptionalHeader->Header.MinorSubsystemVersion; + Header->SizeOfImage = SizeOfImage; + Header->SizeOfHeaders = SizeOfHeaders; + Header->Subsystem = CP.Obj.OptionalHeader->Header.Subsystem; + Header->DLLCharacteristics = CP.Obj.OptionalHeader->Header.DLLCharacteristics; + Header->SizeOfStackReserve = CP.Obj.OptionalHeader->Header.SizeOfStackReserve; + Header->SizeOfStackCommit = CP.Obj.OptionalHeader->Header.SizeOfStackCommit; + Header->SizeOfHeapReserve = CP.Obj.OptionalHeader->Header.SizeOfHeapReserve; + Header->SizeOfHeapCommit = CP.Obj.OptionalHeader->Header.SizeOfHeapCommit; + Header->NumberOfRvaAndSize = COFF::NUM_DATA_DIRECTORIES + 1; + return BaseOfData; +} + +static bool writeCOFF(COFFParser &CP, raw_ostream &OS) { + if (CP.isPE()) { + // PE files start with a DOS stub. + object::dos_header DH; + memset(&DH, 0, sizeof(DH)); + + // DOS EXEs start with "MZ" magic. + DH.Magic[0] = 'M'; + DH.Magic[1] = 'Z'; + // Initializing the AddressOfRelocationTable is strictly optional but + // mollifies certain tools which expect it to have a value greater than + // 0x40. + DH.AddressOfRelocationTable = sizeof(DH); + // This is the address of the PE signature. + DH.AddressOfNewExeHeader = DOSStubSize; + + // Write out our DOS stub. + OS.write(reinterpret_cast(&DH), sizeof(DH)); + // Write padding until we reach the position of where our PE signature + // should live. + OS.write_zeros(DOSStubSize - sizeof(DH)); + // Write out the PE signature. + OS.write(COFF::PEMagic, sizeof(COFF::PEMagic)); + } + if (CP.useBigObj()) { + OS << binary_le(static_cast(COFF::IMAGE_FILE_MACHINE_UNKNOWN)) + << binary_le(static_cast(0xffff)) + << binary_le( + static_cast(COFF::BigObjHeader::MinBigObjectVersion)) + << binary_le(CP.Obj.Header.Machine) + << binary_le(CP.Obj.Header.TimeDateStamp); + OS.write(COFF::BigObjMagic, sizeof(COFF::BigObjMagic)); + OS << zeros(uint32_t(0)) << zeros(uint32_t(0)) << zeros(uint32_t(0)) + << zeros(uint32_t(0)) << binary_le(CP.Obj.Header.NumberOfSections) + << binary_le(CP.Obj.Header.PointerToSymbolTable) + << binary_le(CP.Obj.Header.NumberOfSymbols); + } else { + OS << binary_le(CP.Obj.Header.Machine) + << binary_le(static_cast(CP.Obj.Header.NumberOfSections)) + << binary_le(CP.Obj.Header.TimeDateStamp) + << binary_le(CP.Obj.Header.PointerToSymbolTable) + << binary_le(CP.Obj.Header.NumberOfSymbols) + << binary_le(CP.Obj.Header.SizeOfOptionalHeader) + << binary_le(CP.Obj.Header.Characteristics); + } + if (CP.isPE()) { + if (CP.is64Bit()) { + object::pe32plus_header PEH; + initializeOptionalHeader(CP, COFF::PE32Header::PE32_PLUS, &PEH); + OS.write(reinterpret_cast(&PEH), sizeof(PEH)); + } else { + object::pe32_header PEH; + uint32_t BaseOfData = + initializeOptionalHeader(CP, COFF::PE32Header::PE32, &PEH); + PEH.BaseOfData = BaseOfData; + OS.write(reinterpret_cast(&PEH), sizeof(PEH)); + } + for (const Optional &DD : + CP.Obj.OptionalHeader->DataDirectories) { + if (!DD.hasValue()) { + OS << zeros(uint32_t(0)); + OS << zeros(uint32_t(0)); + } else { + OS << binary_le(DD->RelativeVirtualAddress); + OS << binary_le(DD->Size); + } + } + OS << zeros(uint32_t(0)); + OS << zeros(uint32_t(0)); + } + + assert(OS.tell() == CP.SectionTableStart); + // Output section table. + for (std::vector::iterator i = CP.Obj.Sections.begin(), + e = CP.Obj.Sections.end(); + i != e; ++i) { + OS.write(i->Header.Name, COFF::NameSize); + OS << binary_le(i->Header.VirtualSize) + << binary_le(i->Header.VirtualAddress) + << binary_le(i->Header.SizeOfRawData) + << binary_le(i->Header.PointerToRawData) + << binary_le(i->Header.PointerToRelocations) + << binary_le(i->Header.PointerToLineNumbers) + << binary_le(i->Header.NumberOfRelocations) + << binary_le(i->Header.NumberOfLineNumbers) + << binary_le(i->Header.Characteristics); + } + assert(OS.tell() == CP.SectionTableStart + CP.SectionTableSize); + + unsigned CurSymbol = 0; + StringMap SymbolTableIndexMap; + for (std::vector::iterator I = CP.Obj.Symbols.begin(), + E = CP.Obj.Symbols.end(); + I != E; ++I) { + SymbolTableIndexMap[I->Name] = CurSymbol; + CurSymbol += 1 + I->Header.NumberOfAuxSymbols; + } + + // Output section data. + for (const COFFYAML::Section &S : CP.Obj.Sections) { + if (S.Header.SizeOfRawData == 0 || S.Header.PointerToRawData == 0) + continue; + assert(S.Header.PointerToRawData >= OS.tell()); + OS.write_zeros(S.Header.PointerToRawData - OS.tell()); + S.SectionData.writeAsBinary(OS); + assert(S.Header.SizeOfRawData >= S.SectionData.binary_size()); + OS.write_zeros(S.Header.SizeOfRawData - S.SectionData.binary_size()); + for (const COFFYAML::Relocation &R : S.Relocations) { + uint32_t SymbolTableIndex; + if (R.SymbolTableIndex) { + if (!R.SymbolName.empty()) + WithColor::error() + << "Both SymbolName and SymbolTableIndex specified\n"; + SymbolTableIndex = *R.SymbolTableIndex; + } else { + SymbolTableIndex = SymbolTableIndexMap[R.SymbolName]; + } + OS << binary_le(R.VirtualAddress) << binary_le(SymbolTableIndex) + << binary_le(R.Type); + } + } + + // Output symbol table. + + for (std::vector::const_iterator i = CP.Obj.Symbols.begin(), + e = CP.Obj.Symbols.end(); + i != e; ++i) { + OS.write(i->Header.Name, COFF::NameSize); + OS << binary_le(i->Header.Value); + if (CP.useBigObj()) + OS << binary_le(i->Header.SectionNumber); + else + OS << binary_le(static_cast(i->Header.SectionNumber)); + OS << binary_le(i->Header.Type) << binary_le(i->Header.StorageClass) + << binary_le(i->Header.NumberOfAuxSymbols); + + if (i->FunctionDefinition) { + OS << binary_le(i->FunctionDefinition->TagIndex) + << binary_le(i->FunctionDefinition->TotalSize) + << binary_le(i->FunctionDefinition->PointerToLinenumber) + << binary_le(i->FunctionDefinition->PointerToNextFunction) + << zeros(i->FunctionDefinition->unused); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + if (i->bfAndefSymbol) { + OS << zeros(i->bfAndefSymbol->unused1) + << binary_le(i->bfAndefSymbol->Linenumber) + << zeros(i->bfAndefSymbol->unused2) + << binary_le(i->bfAndefSymbol->PointerToNextFunction) + << zeros(i->bfAndefSymbol->unused3); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + if (i->WeakExternal) { + OS << binary_le(i->WeakExternal->TagIndex) + << binary_le(i->WeakExternal->Characteristics) + << zeros(i->WeakExternal->unused); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + if (!i->File.empty()) { + unsigned SymbolSize = CP.getSymbolSize(); + uint32_t NumberOfAuxRecords = + (i->File.size() + SymbolSize - 1) / SymbolSize; + uint32_t NumberOfAuxBytes = NumberOfAuxRecords * SymbolSize; + uint32_t NumZeros = NumberOfAuxBytes - i->File.size(); + OS.write(i->File.data(), i->File.size()); + OS.write_zeros(NumZeros); + } + if (i->SectionDefinition) { + OS << binary_le(i->SectionDefinition->Length) + << binary_le(i->SectionDefinition->NumberOfRelocations) + << binary_le(i->SectionDefinition->NumberOfLinenumbers) + << binary_le(i->SectionDefinition->CheckSum) + << binary_le(static_cast(i->SectionDefinition->Number)) + << binary_le(i->SectionDefinition->Selection) + << zeros(i->SectionDefinition->unused) + << binary_le(static_cast(i->SectionDefinition->Number >> 16)); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + if (i->CLRToken) { + OS << binary_le(i->CLRToken->AuxType) << zeros(i->CLRToken->unused1) + << binary_le(i->CLRToken->SymbolTableIndex) + << zeros(i->CLRToken->unused2); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + } + + // Output string table. + if (CP.Obj.Header.PointerToSymbolTable) + OS.write(&CP.StringTable[0], CP.StringTable.size()); + return true; +} + +namespace llvm { +namespace yaml { + +bool yaml2coff(llvm::COFFYAML::Object &Doc, raw_ostream &Out, + ErrorHandler ErrHandler) { + COFFParser CP(Doc, ErrHandler); + if (!CP.parse()) { + ErrHandler("failed to parse YAML file"); + return false; + } + + if (!layoutOptionalHeader(CP)) { + ErrHandler("failed to layout optional header for COFF file"); + return false; + } + + if (!layoutCOFF(CP)) { + ErrHandler("failed to layout COFF file"); + return false; + } + if (!writeCOFF(CP, Out)) { + ErrHandler("failed to write COFF file"); + return false; + } + return true; +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index 227107c051dd..95409fdc3300 100644 --- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -391,7 +391,7 @@ template <> void SymbolRecordImpl::map(IO &IO) { } template <> void SymbolRecordImpl::map(IO &IO) { - IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Offset", Symbol.Hdr.Offset); IO.mapRequired("Range", Symbol.Range); IO.mapRequired("Gaps", Symbol.Gaps); } diff --git a/lib/ObjectYAML/ELFEmitter.cpp b/lib/ObjectYAML/ELFEmitter.cpp new file mode 100644 index 000000000000..e0faed256f6b --- /dev/null +++ b/lib/ObjectYAML/ELFEmitter.cpp @@ -0,0 +1,1152 @@ +//===- yaml2elf - Convert YAML to a ELF object file -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The ELF component of yaml2obj. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/ObjectYAML/ELFYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// This class is used to build up a contiguous binary blob while keeping +// track of an offset in the output (which notionally begins at +// `InitialOffset`). +namespace { +class ContiguousBlobAccumulator { + const uint64_t InitialOffset; + SmallVector Buf; + raw_svector_ostream OS; + + /// \returns The new offset. + uint64_t padToAlignment(unsigned Align) { + if (Align == 0) + Align = 1; + uint64_t CurrentOffset = InitialOffset + OS.tell(); + uint64_t AlignedOffset = alignTo(CurrentOffset, Align); + OS.write_zeros(AlignedOffset - CurrentOffset); + return AlignedOffset; // == CurrentOffset; + } + +public: + ContiguousBlobAccumulator(uint64_t InitialOffset_) + : InitialOffset(InitialOffset_), Buf(), OS(Buf) {} + template + raw_ostream &getOSAndAlignedOffset(Integer &Offset, unsigned Align) { + Offset = padToAlignment(Align); + return OS; + } + void writeBlobToStream(raw_ostream &Out) { Out << OS.str(); } +}; + +// Used to keep track of section and symbol names, so that in the YAML file +// sections and symbols can be referenced by name instead of by index. +class NameToIdxMap { + StringMap Map; + +public: + /// \Returns false if name is already present in the map. + bool addName(StringRef Name, unsigned Ndx) { + return Map.insert({Name, Ndx}).second; + } + /// \Returns false if name is not present in the map. + bool lookup(StringRef Name, unsigned &Idx) const { + auto I = Map.find(Name); + if (I == Map.end()) + return false; + Idx = I->getValue(); + return true; + } + /// Asserts if name is not present in the map. + unsigned get(StringRef Name) const { + unsigned Idx; + if (lookup(Name, Idx)) + return Idx; + assert(false && "Expected section not found in index"); + return 0; + } + unsigned size() const { return Map.size(); } +}; + +/// "Single point of truth" for the ELF file construction. +/// TODO: This class still has a ways to go before it is truly a "single +/// point of truth". +template class ELFState { + typedef typename ELFT::Ehdr Elf_Ehdr; + typedef typename ELFT::Phdr Elf_Phdr; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Relr Elf_Relr; + typedef typename ELFT::Dyn Elf_Dyn; + + enum class SymtabType { Static, Dynamic }; + + /// The future ".strtab" section. + StringTableBuilder DotStrtab{StringTableBuilder::ELF}; + + /// The future ".shstrtab" section. + StringTableBuilder DotShStrtab{StringTableBuilder::ELF}; + + /// The future ".dynstr" section. + StringTableBuilder DotDynstr{StringTableBuilder::ELF}; + + NameToIdxMap SN2I; + NameToIdxMap SymN2I; + NameToIdxMap DynSymN2I; + ELFYAML::Object &Doc; + + bool HasError = false; + yaml::ErrorHandler ErrHandler; + void reportError(const Twine &Msg); + + std::vector toELFSymbols(ArrayRef Symbols, + const StringTableBuilder &Strtab); + unsigned toSectionIndex(StringRef S, StringRef LocSec, StringRef LocSym = ""); + unsigned toSymbolIndex(StringRef S, StringRef LocSec, bool IsDynamic); + + void buildSectionIndex(); + void buildSymbolIndexes(); + void initProgramHeaders(std::vector &PHeaders); + bool initImplicitHeader(ContiguousBlobAccumulator &CBA, Elf_Shdr &Header, + StringRef SecName, ELFYAML::Section *YAMLSec); + void initSectionHeaders(std::vector &SHeaders, + ContiguousBlobAccumulator &CBA); + void initSymtabSectionHeader(Elf_Shdr &SHeader, SymtabType STType, + ContiguousBlobAccumulator &CBA, + ELFYAML::Section *YAMLSec); + void initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name, + StringTableBuilder &STB, + ContiguousBlobAccumulator &CBA, + ELFYAML::Section *YAMLSec); + void setProgramHeaderLayout(std::vector &PHeaders, + std::vector &SHeaders); + void finalizeStrings(); + void writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream &OS); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::RawContentSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::RelocationSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::Group &Group, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::SymtabShndxSection &Shndx, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::SymverSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::VerneedSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::VerdefSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::MipsABIFlags &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::DynamicSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::StackSizesSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::HashSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::AddrsigSection &Section, + ContiguousBlobAccumulator &CBA); + + ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH); + +public: + static bool writeELF(raw_ostream &OS, ELFYAML::Object &Doc, + yaml::ErrorHandler EH); +}; +} // end anonymous namespace + +template static size_t arrayDataSize(ArrayRef A) { + return A.size() * sizeof(T); +} + +template static void writeArrayData(raw_ostream &OS, ArrayRef A) { + OS.write((const char *)A.data(), arrayDataSize(A)); +} + +template static void zero(T &Obj) { memset(&Obj, 0, sizeof(Obj)); } + +template +ELFState::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH) + : Doc(D), ErrHandler(EH) { + StringSet<> DocSections; + for (std::unique_ptr &D : Doc.Sections) { + if (!D->Name.empty()) + DocSections.insert(D->Name); + + // Some sections wants to link to .symtab by default. + // That means we want to create the symbol table for them. + if (D->Type == llvm::ELF::SHT_REL || D->Type == llvm::ELF::SHT_RELA) + if (!Doc.Symbols && D->Link.empty()) + Doc.Symbols.emplace(); + } + + // Insert SHT_NULL section implicitly when it is not defined in YAML. + if (Doc.Sections.empty() || Doc.Sections.front()->Type != ELF::SHT_NULL) + Doc.Sections.insert( + Doc.Sections.begin(), + std::make_unique( + ELFYAML::Section::SectionKind::RawContent, /*IsImplicit=*/true)); + + std::vector ImplicitSections; + if (Doc.Symbols) + ImplicitSections.push_back(".symtab"); + ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"}); + + if (!Doc.DynamicSymbols.empty()) + ImplicitSections.insert(ImplicitSections.end(), {".dynsym", ".dynstr"}); + + // Insert placeholders for implicit sections that are not + // defined explicitly in YAML. + for (StringRef SecName : ImplicitSections) { + if (DocSections.count(SecName)) + continue; + + std::unique_ptr Sec = std::make_unique( + ELFYAML::Section::SectionKind::RawContent, true /*IsImplicit*/); + Sec->Name = SecName; + Doc.Sections.push_back(std::move(Sec)); + } +} + +template +void ELFState::writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream &OS) { + using namespace llvm::ELF; + + Elf_Ehdr Header; + zero(Header); + Header.e_ident[EI_MAG0] = 0x7f; + Header.e_ident[EI_MAG1] = 'E'; + Header.e_ident[EI_MAG2] = 'L'; + Header.e_ident[EI_MAG3] = 'F'; + Header.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + Header.e_ident[EI_DATA] = Doc.Header.Data; + Header.e_ident[EI_VERSION] = EV_CURRENT; + Header.e_ident[EI_OSABI] = Doc.Header.OSABI; + Header.e_ident[EI_ABIVERSION] = Doc.Header.ABIVersion; + Header.e_type = Doc.Header.Type; + Header.e_machine = Doc.Header.Machine; + Header.e_version = EV_CURRENT; + Header.e_entry = Doc.Header.Entry; + Header.e_phoff = Doc.ProgramHeaders.size() ? sizeof(Header) : 0; + Header.e_flags = Doc.Header.Flags; + Header.e_ehsize = sizeof(Elf_Ehdr); + Header.e_phentsize = Doc.ProgramHeaders.size() ? sizeof(Elf_Phdr) : 0; + Header.e_phnum = Doc.ProgramHeaders.size(); + + Header.e_shentsize = + Doc.Header.SHEntSize ? (uint16_t)*Doc.Header.SHEntSize : sizeof(Elf_Shdr); + // Immediately following the ELF header and program headers. + // Align the start of the section header and write the ELF header. + uint64_t SHOff; + CBA.getOSAndAlignedOffset(SHOff, sizeof(typename ELFT::uint)); + Header.e_shoff = + Doc.Header.SHOff ? typename ELFT::uint(*Doc.Header.SHOff) : SHOff; + Header.e_shnum = + Doc.Header.SHNum ? (uint16_t)*Doc.Header.SHNum : Doc.Sections.size(); + Header.e_shstrndx = Doc.Header.SHStrNdx ? (uint16_t)*Doc.Header.SHStrNdx + : SN2I.get(".shstrtab"); + + OS.write((const char *)&Header, sizeof(Header)); +} + +template +void ELFState::initProgramHeaders(std::vector &PHeaders) { + for (const auto &YamlPhdr : Doc.ProgramHeaders) { + Elf_Phdr Phdr; + Phdr.p_type = YamlPhdr.Type; + Phdr.p_flags = YamlPhdr.Flags; + Phdr.p_vaddr = YamlPhdr.VAddr; + Phdr.p_paddr = YamlPhdr.PAddr; + PHeaders.push_back(Phdr); + } +} + +template +unsigned ELFState::toSectionIndex(StringRef S, StringRef LocSec, + StringRef LocSym) { + unsigned Index; + if (SN2I.lookup(S, Index) || to_integer(S, Index)) + return Index; + + assert(LocSec.empty() || LocSym.empty()); + if (!LocSym.empty()) + reportError("unknown section referenced: '" + S + "' by YAML symbol '" + + LocSym + "'"); + else + reportError("unknown section referenced: '" + S + "' by YAML section '" + + LocSec + "'"); + return 0; +} + +template +unsigned ELFState::toSymbolIndex(StringRef S, StringRef LocSec, + bool IsDynamic) { + const NameToIdxMap &SymMap = IsDynamic ? DynSymN2I : SymN2I; + unsigned Index; + // Here we try to look up S in the symbol table. If it is not there, + // treat its value as a symbol index. + if (!SymMap.lookup(S, Index) && !to_integer(S, Index)) { + reportError("unknown symbol referenced: '" + S + "' by YAML section '" + + LocSec + "'"); + return 0; + } + return Index; +} + +template +bool ELFState::initImplicitHeader(ContiguousBlobAccumulator &CBA, + Elf_Shdr &Header, StringRef SecName, + ELFYAML::Section *YAMLSec) { + // Check if the header was already initialized. + if (Header.sh_offset) + return false; + + if (SecName == ".symtab") + initSymtabSectionHeader(Header, SymtabType::Static, CBA, YAMLSec); + else if (SecName == ".strtab") + initStrtabSectionHeader(Header, SecName, DotStrtab, CBA, YAMLSec); + else if (SecName == ".shstrtab") + initStrtabSectionHeader(Header, SecName, DotShStrtab, CBA, YAMLSec); + else if (SecName == ".dynsym") + initSymtabSectionHeader(Header, SymtabType::Dynamic, CBA, YAMLSec); + else if (SecName == ".dynstr") + initStrtabSectionHeader(Header, SecName, DotDynstr, CBA, YAMLSec); + else + return false; + + // Override the fields if requested. + if (YAMLSec) { + if (YAMLSec->ShName) + Header.sh_name = *YAMLSec->ShName; + if (YAMLSec->ShOffset) + Header.sh_offset = *YAMLSec->ShOffset; + if (YAMLSec->ShSize) + Header.sh_size = *YAMLSec->ShSize; + } + + return true; +} + +StringRef llvm::ELFYAML::dropUniqueSuffix(StringRef S) { + size_t SuffixPos = S.rfind(" ["); + if (SuffixPos == StringRef::npos) + return S; + return S.substr(0, SuffixPos); +} + +template +void ELFState::initSectionHeaders(std::vector &SHeaders, + ContiguousBlobAccumulator &CBA) { + // Ensure SHN_UNDEF entry is present. An all-zero section header is a + // valid SHN_UNDEF entry since SHT_NULL == 0. + SHeaders.resize(Doc.Sections.size()); + + for (size_t I = 0; I < Doc.Sections.size(); ++I) { + ELFYAML::Section *Sec = Doc.Sections[I].get(); + if (I == 0 && Sec->IsImplicit) + continue; + + // We have a few sections like string or symbol tables that are usually + // added implicitly to the end. However, if they are explicitly specified + // in the YAML, we need to write them here. This ensures the file offset + // remains correct. + Elf_Shdr &SHeader = SHeaders[I]; + if (initImplicitHeader(CBA, SHeader, Sec->Name, + Sec->IsImplicit ? nullptr : Sec)) + continue; + + assert(Sec && "It can't be null unless it is an implicit section. But all " + "implicit sections should already have been handled above."); + + SHeader.sh_name = + DotShStrtab.getOffset(ELFYAML::dropUniqueSuffix(Sec->Name)); + SHeader.sh_type = Sec->Type; + if (Sec->Flags) + SHeader.sh_flags = *Sec->Flags; + SHeader.sh_addr = Sec->Address; + SHeader.sh_addralign = Sec->AddressAlign; + + if (!Sec->Link.empty()) + SHeader.sh_link = toSectionIndex(Sec->Link, Sec->Name); + + if (I == 0) { + if (auto RawSec = dyn_cast(Sec)) { + // We do not write any content for special SHN_UNDEF section. + if (RawSec->Size) + SHeader.sh_size = *RawSec->Size; + if (RawSec->Info) + SHeader.sh_info = *RawSec->Info; + } + if (Sec->EntSize) + SHeader.sh_entsize = *Sec->EntSize; + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + SHeader.sh_entsize = 0; + SHeader.sh_size = S->Size; + // SHT_NOBITS section does not have content + // so just to setup the section offset. + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else { + llvm_unreachable("Unknown section type"); + } + + // Override the fields if requested. + if (Sec) { + if (Sec->ShName) + SHeader.sh_name = *Sec->ShName; + if (Sec->ShOffset) + SHeader.sh_offset = *Sec->ShOffset; + if (Sec->ShSize) + SHeader.sh_size = *Sec->ShSize; + } + } +} + +static size_t findFirstNonGlobal(ArrayRef Symbols) { + for (size_t I = 0; I < Symbols.size(); ++I) + if (Symbols[I].Binding.value != ELF::STB_LOCAL) + return I; + return Symbols.size(); +} + +static uint64_t writeContent(raw_ostream &OS, + const Optional &Content, + const Optional &Size) { + size_t ContentSize = 0; + if (Content) { + Content->writeAsBinary(OS); + ContentSize = Content->binary_size(); + } + + if (!Size) + return ContentSize; + + OS.write_zeros(*Size - ContentSize); + return *Size; +} + +template +std::vector +ELFState::toELFSymbols(ArrayRef Symbols, + const StringTableBuilder &Strtab) { + std::vector Ret; + Ret.resize(Symbols.size() + 1); + + size_t I = 0; + for (const auto &Sym : Symbols) { + Elf_Sym &Symbol = Ret[++I]; + + // If NameIndex, which contains the name offset, is explicitly specified, we + // use it. This is useful for preparing broken objects. Otherwise, we add + // the specified Name to the string table builder to get its offset. + if (Sym.NameIndex) + Symbol.st_name = *Sym.NameIndex; + else if (!Sym.Name.empty()) + Symbol.st_name = Strtab.getOffset(ELFYAML::dropUniqueSuffix(Sym.Name)); + + Symbol.setBindingAndType(Sym.Binding, Sym.Type); + if (!Sym.Section.empty()) + Symbol.st_shndx = toSectionIndex(Sym.Section, "", Sym.Name); + else if (Sym.Index) + Symbol.st_shndx = *Sym.Index; + + Symbol.st_value = Sym.Value; + Symbol.st_other = Sym.Other ? *Sym.Other : 0; + Symbol.st_size = Sym.Size; + } + + return Ret; +} + +template +void ELFState::initSymtabSectionHeader(Elf_Shdr &SHeader, + SymtabType STType, + ContiguousBlobAccumulator &CBA, + ELFYAML::Section *YAMLSec) { + + bool IsStatic = STType == SymtabType::Static; + ArrayRef Symbols; + if (IsStatic && Doc.Symbols) + Symbols = *Doc.Symbols; + else if (!IsStatic) + Symbols = Doc.DynamicSymbols; + + ELFYAML::RawContentSection *RawSec = + dyn_cast_or_null(YAMLSec); + if (RawSec && !Symbols.empty() && (RawSec->Content || RawSec->Size)) { + if (RawSec->Content) + reportError("cannot specify both `Content` and " + + (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) + + " for symbol table section '" + RawSec->Name + "'"); + if (RawSec->Size) + reportError("cannot specify both `Size` and " + + (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) + + " for symbol table section '" + RawSec->Name + "'"); + return; + } + + zero(SHeader); + SHeader.sh_name = DotShStrtab.getOffset(IsStatic ? ".symtab" : ".dynsym"); + + if (YAMLSec) + SHeader.sh_type = YAMLSec->Type; + else + SHeader.sh_type = IsStatic ? ELF::SHT_SYMTAB : ELF::SHT_DYNSYM; + + if (RawSec && !RawSec->Link.empty()) { + // If the Link field is explicitly defined in the document, + // we should use it. + SHeader.sh_link = toSectionIndex(RawSec->Link, RawSec->Name); + } else { + // When we describe the .dynsym section in the document explicitly, it is + // allowed to omit the "DynamicSymbols" tag. In this case .dynstr is not + // added implicitly and we should be able to leave the Link zeroed if + // .dynstr is not defined. + unsigned Link = 0; + if (IsStatic) + Link = SN2I.get(".strtab"); + else + SN2I.lookup(".dynstr", Link); + SHeader.sh_link = Link; + } + + if (YAMLSec && YAMLSec->Flags) + SHeader.sh_flags = *YAMLSec->Flags; + else if (!IsStatic) + SHeader.sh_flags = ELF::SHF_ALLOC; + + // If the symbol table section is explicitly described in the YAML + // then we should set the fields requested. + SHeader.sh_info = (RawSec && RawSec->Info) ? (unsigned)(*RawSec->Info) + : findFirstNonGlobal(Symbols) + 1; + SHeader.sh_entsize = (YAMLSec && YAMLSec->EntSize) + ? (uint64_t)(*YAMLSec->EntSize) + : sizeof(Elf_Sym); + SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 8; + SHeader.sh_addr = YAMLSec ? (uint64_t)YAMLSec->Address : 0; + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + if (RawSec && (RawSec->Content || RawSec->Size)) { + assert(Symbols.empty()); + SHeader.sh_size = writeContent(OS, RawSec->Content, RawSec->Size); + return; + } + + std::vector Syms = + toELFSymbols(Symbols, IsStatic ? DotStrtab : DotDynstr); + writeArrayData(OS, makeArrayRef(Syms)); + SHeader.sh_size = arrayDataSize(makeArrayRef(Syms)); +} + +template +void ELFState::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name, + StringTableBuilder &STB, + ContiguousBlobAccumulator &CBA, + ELFYAML::Section *YAMLSec) { + zero(SHeader); + SHeader.sh_name = DotShStrtab.getOffset(Name); + SHeader.sh_type = YAMLSec ? YAMLSec->Type : ELF::SHT_STRTAB; + SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 1; + + ELFYAML::RawContentSection *RawSec = + dyn_cast_or_null(YAMLSec); + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + if (RawSec && (RawSec->Content || RawSec->Size)) { + SHeader.sh_size = writeContent(OS, RawSec->Content, RawSec->Size); + } else { + STB.write(OS); + SHeader.sh_size = STB.getSize(); + } + + if (YAMLSec && YAMLSec->EntSize) + SHeader.sh_entsize = *YAMLSec->EntSize; + + if (RawSec && RawSec->Info) + SHeader.sh_info = *RawSec->Info; + + if (YAMLSec && YAMLSec->Flags) + SHeader.sh_flags = *YAMLSec->Flags; + else if (Name == ".dynstr") + SHeader.sh_flags = ELF::SHF_ALLOC; + + // If the section is explicitly described in the YAML + // then we want to use its section address. + if (YAMLSec) + SHeader.sh_addr = YAMLSec->Address; +} + +template void ELFState::reportError(const Twine &Msg) { + ErrHandler(Msg); + HasError = true; +} + +template +void ELFState::setProgramHeaderLayout(std::vector &PHeaders, + std::vector &SHeaders) { + uint32_t PhdrIdx = 0; + for (auto &YamlPhdr : Doc.ProgramHeaders) { + Elf_Phdr &PHeader = PHeaders[PhdrIdx++]; + + std::vector Sections; + for (const ELFYAML::SectionName &SecName : YamlPhdr.Sections) { + unsigned Index; + if (!SN2I.lookup(SecName.Section, Index)) { + reportError("unknown section referenced: '" + SecName.Section + + "' by program header"); + continue; + } + Sections.push_back(&SHeaders[Index]); + } + + if (YamlPhdr.Offset) { + PHeader.p_offset = *YamlPhdr.Offset; + } else { + if (YamlPhdr.Sections.size()) + PHeader.p_offset = UINT32_MAX; + else + PHeader.p_offset = 0; + + // Find the minimum offset for the program header. + for (Elf_Shdr *SHeader : Sections) + PHeader.p_offset = std::min(PHeader.p_offset, SHeader->sh_offset); + } + + // Find the maximum offset of the end of a section in order to set p_filesz + // and p_memsz. When setting p_filesz, trailing SHT_NOBITS sections are not + // counted. + uint64_t FileOffset = PHeader.p_offset, MemOffset = PHeader.p_offset; + for (Elf_Shdr *SHeader : Sections) { + uint64_t End = SHeader->sh_offset + SHeader->sh_size; + MemOffset = std::max(MemOffset, End); + + if (SHeader->sh_type != llvm::ELF::SHT_NOBITS) + FileOffset = std::max(FileOffset, End); + } + + // Set the file size and the memory size if not set explicitly. + PHeader.p_filesz = YamlPhdr.FileSize ? uint64_t(*YamlPhdr.FileSize) + : FileOffset - PHeader.p_offset; + PHeader.p_memsz = YamlPhdr.MemSize ? uint64_t(*YamlPhdr.MemSize) + : MemOffset - PHeader.p_offset; + + if (YamlPhdr.Align) { + PHeader.p_align = *YamlPhdr.Align; + } else { + // Set the alignment of the segment to be the maximum alignment of the + // sections so that by default the segment has a valid and sensible + // alignment. + PHeader.p_align = 1; + for (Elf_Shdr *SHeader : Sections) + PHeader.p_align = std::max(PHeader.p_align, SHeader->sh_addralign); + } + } +} + +template +void ELFState::writeSectionContent( + Elf_Shdr &SHeader, const ELFYAML::RawContentSection &Section, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + SHeader.sh_size = writeContent(OS, Section.Content, Section.Size); + + if (Section.EntSize) + SHeader.sh_entsize = *Section.EntSize; + else if (Section.Type == llvm::ELF::SHT_RELR) + SHeader.sh_entsize = sizeof(Elf_Relr); + else + SHeader.sh_entsize = 0; + + if (Section.Info) + SHeader.sh_info = *Section.Info; +} + +static bool isMips64EL(const ELFYAML::Object &Doc) { + return Doc.Header.Machine == ELFYAML::ELF_EM(llvm::ELF::EM_MIPS) && + Doc.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64) && + Doc.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB); +} + +template +void ELFState::writeSectionContent( + Elf_Shdr &SHeader, const ELFYAML::RelocationSection &Section, + ContiguousBlobAccumulator &CBA) { + assert((Section.Type == llvm::ELF::SHT_REL || + Section.Type == llvm::ELF::SHT_RELA) && + "Section type is not SHT_REL nor SHT_RELA"); + + bool IsRela = Section.Type == llvm::ELF::SHT_RELA; + SHeader.sh_entsize = IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + SHeader.sh_size = SHeader.sh_entsize * Section.Relocations.size(); + + // For relocation section set link to .symtab by default. + if (Section.Link.empty()) + SHeader.sh_link = SN2I.get(".symtab"); + + if (!Section.RelocatableSec.empty()) + SHeader.sh_info = toSectionIndex(Section.RelocatableSec, Section.Name); + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + for (const auto &Rel : Section.Relocations) { + unsigned SymIdx = Rel.Symbol ? toSymbolIndex(*Rel.Symbol, Section.Name, + Section.Link == ".dynsym") + : 0; + if (IsRela) { + Elf_Rela REntry; + zero(REntry); + REntry.r_offset = Rel.Offset; + REntry.r_addend = Rel.Addend; + REntry.setSymbolAndType(SymIdx, Rel.Type, isMips64EL(Doc)); + OS.write((const char *)&REntry, sizeof(REntry)); + } else { + Elf_Rel REntry; + zero(REntry); + REntry.r_offset = Rel.Offset; + REntry.setSymbolAndType(SymIdx, Rel.Type, isMips64EL(Doc)); + OS.write((const char *)&REntry, sizeof(REntry)); + } + } +} + +template +void ELFState::writeSectionContent( + Elf_Shdr &SHeader, const ELFYAML::SymtabShndxSection &Shndx, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + for (uint32_t E : Shndx.Entries) + support::endian::write(OS, E, ELFT::TargetEndianness); + + SHeader.sh_entsize = Shndx.EntSize ? (uint64_t)*Shndx.EntSize : 4; + SHeader.sh_size = Shndx.Entries.size() * SHeader.sh_entsize; +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::Group &Section, + ContiguousBlobAccumulator &CBA) { + assert(Section.Type == llvm::ELF::SHT_GROUP && + "Section type is not SHT_GROUP"); + + SHeader.sh_entsize = 4; + SHeader.sh_size = SHeader.sh_entsize * Section.Members.size(); + SHeader.sh_info = + toSymbolIndex(Section.Signature, Section.Name, /*IsDynamic=*/false); + + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + for (const ELFYAML::SectionOrType &Member : Section.Members) { + unsigned int SectionIndex = 0; + if (Member.sectionNameOrType == "GRP_COMDAT") + SectionIndex = llvm::ELF::GRP_COMDAT; + else + SectionIndex = toSectionIndex(Member.sectionNameOrType, Section.Name); + support::endian::write(OS, SectionIndex, ELFT::TargetEndianness); + } +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::SymverSection &Section, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + for (uint16_t Version : Section.Entries) + support::endian::write(OS, Version, ELFT::TargetEndianness); + + SHeader.sh_entsize = Section.EntSize ? (uint64_t)*Section.EntSize : 2; + SHeader.sh_size = Section.Entries.size() * SHeader.sh_entsize; +} + +template +void ELFState::writeSectionContent( + Elf_Shdr &SHeader, const ELFYAML::StackSizesSection &Section, + ContiguousBlobAccumulator &CBA) { + using uintX_t = typename ELFT::uint; + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + if (Section.Content || Section.Size) { + SHeader.sh_size = writeContent(OS, Section.Content, Section.Size); + return; + } + + for (const ELFYAML::StackSizeEntry &E : *Section.Entries) { + support::endian::write(OS, E.Address, ELFT::TargetEndianness); + SHeader.sh_size += sizeof(uintX_t) + encodeULEB128(E.Size, OS); + } +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::HashSection &Section, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + unsigned Link = 0; + if (Section.Link.empty() && SN2I.lookup(".dynsym", Link)) + SHeader.sh_link = Link; + + if (Section.Content || Section.Size) { + SHeader.sh_size = writeContent(OS, Section.Content, Section.Size); + return; + } + + support::endian::write(OS, Section.Bucket->size(), + ELFT::TargetEndianness); + support::endian::write(OS, Section.Chain->size(), + ELFT::TargetEndianness); + for (uint32_t Val : *Section.Bucket) + support::endian::write(OS, Val, ELFT::TargetEndianness); + for (uint32_t Val : *Section.Chain) + support::endian::write(OS, Val, ELFT::TargetEndianness); + + SHeader.sh_size = (2 + Section.Bucket->size() + Section.Chain->size()) * 4; +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::VerdefSection &Section, + ContiguousBlobAccumulator &CBA) { + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Verdaux Elf_Verdaux; + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + uint64_t AuxCnt = 0; + for (size_t I = 0; I < Section.Entries.size(); ++I) { + const ELFYAML::VerdefEntry &E = Section.Entries[I]; + + Elf_Verdef VerDef; + VerDef.vd_version = E.Version; + VerDef.vd_flags = E.Flags; + VerDef.vd_ndx = E.VersionNdx; + VerDef.vd_hash = E.Hash; + VerDef.vd_aux = sizeof(Elf_Verdef); + VerDef.vd_cnt = E.VerNames.size(); + if (I == Section.Entries.size() - 1) + VerDef.vd_next = 0; + else + VerDef.vd_next = + sizeof(Elf_Verdef) + E.VerNames.size() * sizeof(Elf_Verdaux); + OS.write((const char *)&VerDef, sizeof(Elf_Verdef)); + + for (size_t J = 0; J < E.VerNames.size(); ++J, ++AuxCnt) { + Elf_Verdaux VernAux; + VernAux.vda_name = DotDynstr.getOffset(E.VerNames[J]); + if (J == E.VerNames.size() - 1) + VernAux.vda_next = 0; + else + VernAux.vda_next = sizeof(Elf_Verdaux); + OS.write((const char *)&VernAux, sizeof(Elf_Verdaux)); + } + } + + SHeader.sh_size = Section.Entries.size() * sizeof(Elf_Verdef) + + AuxCnt * sizeof(Elf_Verdaux); + SHeader.sh_info = Section.Info; +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::VerneedSection &Section, + ContiguousBlobAccumulator &CBA) { + typedef typename ELFT::Verneed Elf_Verneed; + typedef typename ELFT::Vernaux Elf_Vernaux; + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + uint64_t AuxCnt = 0; + for (size_t I = 0; I < Section.VerneedV.size(); ++I) { + const ELFYAML::VerneedEntry &VE = Section.VerneedV[I]; + + Elf_Verneed VerNeed; + VerNeed.vn_version = VE.Version; + VerNeed.vn_file = DotDynstr.getOffset(VE.File); + if (I == Section.VerneedV.size() - 1) + VerNeed.vn_next = 0; + else + VerNeed.vn_next = + sizeof(Elf_Verneed) + VE.AuxV.size() * sizeof(Elf_Vernaux); + VerNeed.vn_cnt = VE.AuxV.size(); + VerNeed.vn_aux = sizeof(Elf_Verneed); + OS.write((const char *)&VerNeed, sizeof(Elf_Verneed)); + + for (size_t J = 0; J < VE.AuxV.size(); ++J, ++AuxCnt) { + const ELFYAML::VernauxEntry &VAuxE = VE.AuxV[J]; + + Elf_Vernaux VernAux; + VernAux.vna_hash = VAuxE.Hash; + VernAux.vna_flags = VAuxE.Flags; + VernAux.vna_other = VAuxE.Other; + VernAux.vna_name = DotDynstr.getOffset(VAuxE.Name); + if (J == VE.AuxV.size() - 1) + VernAux.vna_next = 0; + else + VernAux.vna_next = sizeof(Elf_Vernaux); + OS.write((const char *)&VernAux, sizeof(Elf_Vernaux)); + } + } + + SHeader.sh_size = Section.VerneedV.size() * sizeof(Elf_Verneed) + + AuxCnt * sizeof(Elf_Vernaux); + SHeader.sh_info = Section.Info; +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::MipsABIFlags &Section, + ContiguousBlobAccumulator &CBA) { + assert(Section.Type == llvm::ELF::SHT_MIPS_ABIFLAGS && + "Section type is not SHT_MIPS_ABIFLAGS"); + + object::Elf_Mips_ABIFlags Flags; + zero(Flags); + SHeader.sh_entsize = sizeof(Flags); + SHeader.sh_size = SHeader.sh_entsize; + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + Flags.version = Section.Version; + Flags.isa_level = Section.ISALevel; + Flags.isa_rev = Section.ISARevision; + Flags.gpr_size = Section.GPRSize; + Flags.cpr1_size = Section.CPR1Size; + Flags.cpr2_size = Section.CPR2Size; + Flags.fp_abi = Section.FpABI; + Flags.isa_ext = Section.ISAExtension; + Flags.ases = Section.ASEs; + Flags.flags1 = Section.Flags1; + Flags.flags2 = Section.Flags2; + OS.write((const char *)&Flags, sizeof(Flags)); +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::DynamicSection &Section, + ContiguousBlobAccumulator &CBA) { + typedef typename ELFT::uint uintX_t; + + assert(Section.Type == llvm::ELF::SHT_DYNAMIC && + "Section type is not SHT_DYNAMIC"); + + if (!Section.Entries.empty() && Section.Content) + reportError("cannot specify both raw content and explicit entries " + "for dynamic section '" + + Section.Name + "'"); + + if (Section.Content) + SHeader.sh_size = Section.Content->binary_size(); + else + SHeader.sh_size = 2 * sizeof(uintX_t) * Section.Entries.size(); + if (Section.EntSize) + SHeader.sh_entsize = *Section.EntSize; + else + SHeader.sh_entsize = sizeof(Elf_Dyn); + + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + for (const ELFYAML::DynamicEntry &DE : Section.Entries) { + support::endian::write(OS, DE.Tag, ELFT::TargetEndianness); + support::endian::write(OS, DE.Val, ELFT::TargetEndianness); + } + if (Section.Content) + Section.Content->writeAsBinary(OS); +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::AddrsigSection &Section, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + unsigned Link = 0; + if (Section.Link.empty() && SN2I.lookup(".symtab", Link)) + SHeader.sh_link = Link; + + if (Section.Content || Section.Size) { + SHeader.sh_size = writeContent(OS, Section.Content, Section.Size); + return; + } + + for (const ELFYAML::AddrsigSymbol &Sym : *Section.Symbols) { + uint64_t Val = + Sym.Name ? toSymbolIndex(*Sym.Name, Section.Name, /*IsDynamic=*/false) + : (uint32_t)*Sym.Index; + SHeader.sh_size += encodeULEB128(Val, OS); + } +} + +template void ELFState::buildSectionIndex() { + for (unsigned I = 0, E = Doc.Sections.size(); I != E; ++I) { + StringRef Name = Doc.Sections[I]->Name; + if (Name.empty()) + continue; + + DotShStrtab.add(ELFYAML::dropUniqueSuffix(Name)); + if (!SN2I.addName(Name, I)) + reportError("repeated section name: '" + Name + + "' at YAML section number " + Twine(I)); + } + + DotShStrtab.finalize(); +} + +template void ELFState::buildSymbolIndexes() { + auto Build = [this](ArrayRef V, NameToIdxMap &Map) { + for (size_t I = 0, S = V.size(); I < S; ++I) { + const ELFYAML::Symbol &Sym = V[I]; + if (!Sym.Name.empty() && !Map.addName(Sym.Name, I + 1)) + reportError("repeated symbol name: '" + Sym.Name + "'"); + } + }; + + if (Doc.Symbols) + Build(*Doc.Symbols, SymN2I); + Build(Doc.DynamicSymbols, DynSymN2I); +} + +template void ELFState::finalizeStrings() { + // Add the regular symbol names to .strtab section. + if (Doc.Symbols) + for (const ELFYAML::Symbol &Sym : *Doc.Symbols) + DotStrtab.add(ELFYAML::dropUniqueSuffix(Sym.Name)); + DotStrtab.finalize(); + + // Add the dynamic symbol names to .dynstr section. + for (const ELFYAML::Symbol &Sym : Doc.DynamicSymbols) + DotDynstr.add(ELFYAML::dropUniqueSuffix(Sym.Name)); + + // SHT_GNU_verdef and SHT_GNU_verneed sections might also + // add strings to .dynstr section. + for (const std::unique_ptr &Sec : Doc.Sections) { + if (auto VerNeed = dyn_cast(Sec.get())) { + for (const ELFYAML::VerneedEntry &VE : VerNeed->VerneedV) { + DotDynstr.add(VE.File); + for (const ELFYAML::VernauxEntry &Aux : VE.AuxV) + DotDynstr.add(Aux.Name); + } + } else if (auto VerDef = dyn_cast(Sec.get())) { + for (const ELFYAML::VerdefEntry &E : VerDef->Entries) + for (StringRef Name : E.VerNames) + DotDynstr.add(Name); + } + } + + DotDynstr.finalize(); +} + +template +bool ELFState::writeELF(raw_ostream &OS, ELFYAML::Object &Doc, + yaml::ErrorHandler EH) { + ELFState State(Doc, EH); + + // Finalize .strtab and .dynstr sections. We do that early because want to + // finalize the string table builders before writing the content of the + // sections that might want to use them. + State.finalizeStrings(); + + State.buildSectionIndex(); + State.buildSymbolIndexes(); + + std::vector PHeaders; + State.initProgramHeaders(PHeaders); + + // XXX: This offset is tightly coupled with the order that we write + // things to `OS`. + const size_t SectionContentBeginOffset = + sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * Doc.ProgramHeaders.size(); + ContiguousBlobAccumulator CBA(SectionContentBeginOffset); + + std::vector SHeaders; + State.initSectionHeaders(SHeaders, CBA); + + // Now we can decide segment offsets + State.setProgramHeaderLayout(PHeaders, SHeaders); + + if (State.HasError) + return false; + + State.writeELFHeader(CBA, OS); + writeArrayData(OS, makeArrayRef(PHeaders)); + CBA.writeBlobToStream(OS); + writeArrayData(OS, makeArrayRef(SHeaders)); + return true; +} + +namespace llvm { +namespace yaml { + +bool yaml2elf(llvm::ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH) { + bool IsLE = Doc.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB); + bool Is64Bit = Doc.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64); + if (Is64Bit) { + if (IsLE) + return ELFState::writeELF(Out, Doc, EH); + return ELFState::writeELF(Out, Doc, EH); + } + if (IsLE) + return ELFState::writeELF(Out, Doc, EH); + return ELFState::writeELF(Out, Doc, EH); +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp index 7497154c757d..29585abe6e80 100644 --- a/lib/ObjectYAML/ELFYAML.cpp +++ b/lib/ObjectYAML/ELFYAML.cpp @@ -11,12 +11,14 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/ELFYAML.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MipsABIFlags.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/WithColor.h" #include #include @@ -50,6 +52,8 @@ void ScalarEnumerationTraits::enumeration( ECase(PT_PHDR); ECase(PT_TLS); ECase(PT_GNU_EH_FRAME); + ECase(PT_GNU_STACK); + ECase(PT_GNU_RELRO); #undef ECase IO.enumFallback(Value); } @@ -217,6 +221,7 @@ void ScalarEnumerationTraits::enumeration( ECase(EM_LANAI); ECase(EM_BPF); #undef ECase + IO.enumFallback(Value); } void ScalarEnumerationTraits::enumeration( @@ -459,6 +464,9 @@ void ScalarEnumerationTraits::enumeration( ECase(SHT_LLVM_CALL_GRAPH_PROFILE); ECase(SHT_LLVM_ADDRSIG); ECase(SHT_LLVM_DEPENDENT_LIBRARIES); + ECase(SHT_LLVM_SYMPART); + ECase(SHT_LLVM_PART_EHDR); + ECase(SHT_LLVM_PART_PHDR); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); ECase(SHT_GNU_verdef); @@ -563,7 +571,7 @@ void ScalarEnumerationTraits::enumeration( ECase(SHN_HEXAGON_SCOMMON_4); ECase(SHN_HEXAGON_SCOMMON_8); #undef ECase - IO.enumFallback(Value); + IO.enumFallback(Value); } void ScalarEnumerationTraits::enumeration( @@ -592,34 +600,6 @@ void ScalarEnumerationTraits::enumeration( IO.enumFallback(Value); } -void ScalarEnumerationTraits::enumeration( - IO &IO, ELFYAML::ELF_STV &Value) { -#define ECase(X) IO.enumCase(Value, #X, ELF::X) - ECase(STV_DEFAULT); - ECase(STV_INTERNAL); - ECase(STV_HIDDEN); - ECase(STV_PROTECTED); -#undef ECase -} - -void ScalarBitSetTraits::bitset(IO &IO, - ELFYAML::ELF_STO &Value) { - const auto *Object = static_cast(IO.getContext()); - assert(Object && "The IO context is not initialized"); -#define BCase(X) IO.bitSetCase(Value, #X, ELF::X) - switch (Object->Header.Machine) { - case ELF::EM_MIPS: - BCase(STO_MIPS_OPTIONAL); - BCase(STO_MIPS_PLT); - BCase(STO_MIPS_PIC); - BCase(STO_MIPS_MICROMIPS); - break; - default: - break; // Nothing to do - } -#undef BCase -#undef BCaseMask -} void ScalarEnumerationTraits::enumeration( IO &IO, ELFYAML::ELF_RSS &Value) { @@ -671,8 +651,12 @@ void ScalarEnumerationTraits::enumeration( case ELF::EM_BPF: #include "llvm/BinaryFormat/ELFRelocs/BPF.def" break; + case ELF::EM_PPC64: +#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" + break; default: - llvm_unreachable("Unsupported architecture"); + // Nothing to do. + break; } #undef ELF_RELOC IO.enumFallback(Value); @@ -845,7 +829,7 @@ void MappingTraits::mapping(IO &IO, IO.mapOptional("Entry", FileHdr.Entry, Hex64(0)); IO.mapOptional("SHEntSize", FileHdr.SHEntSize); - IO.mapOptional("SHOffset", FileHdr.SHOffset); + IO.mapOptional("SHOff", FileHdr.SHOff); IO.mapOptional("SHNum", FileHdr.SHNum); IO.mapOptional("SHStrNdx", FileHdr.SHStrNdx); } @@ -863,18 +847,111 @@ void MappingTraits::mapping( IO.mapOptional("Offset", Phdr.Offset); } +LLVM_YAML_STRONG_TYPEDEF(StringRef, StOtherPiece) + +template <> struct ScalarTraits { + static void output(const StOtherPiece &Val, void *, raw_ostream &Out) { + Out << Val; + } + static StringRef input(StringRef Scalar, void *, StOtherPiece &Val) { + Val = Scalar; + return {}; + } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; +template <> struct SequenceElementTraits { + static const bool flow = true; +}; + namespace { struct NormalizedOther { - NormalizedOther(IO &) - : Visibility(ELFYAML::ELF_STV(0)), Other(ELFYAML::ELF_STO(0)) {} - NormalizedOther(IO &, uint8_t Original) - : Visibility(Original & 0x3), Other(Original & ~0x3) {} + NormalizedOther(IO &IO) : YamlIO(IO) {} + NormalizedOther(IO &IO, Optional Original) : YamlIO(IO) { + assert(Original && "This constructor is only used for outputting YAML and " + "assumes a non-empty Original"); + std::vector Ret; + const auto *Object = static_cast(YamlIO.getContext()); + for (std::pair &P : + getFlags(Object->Header.Machine).takeVector()) { + uint8_t FlagValue = P.second; + if ((*Original & FlagValue) != FlagValue) + continue; + *Original &= ~FlagValue; + Ret.push_back({P.first}); + } + + if (*Original != 0) { + UnknownFlagsHolder = std::to_string(*Original); + Ret.push_back({UnknownFlagsHolder}); + } + + if (!Ret.empty()) + Other = std::move(Ret); + } + + uint8_t toValue(StringRef Name) { + const auto *Object = static_cast(YamlIO.getContext()); + MapVector Flags = getFlags(Object->Header.Machine); - uint8_t denormalize(IO &) { return Visibility | Other; } + auto It = Flags.find(Name); + if (It != Flags.end()) + return It->second; + + uint8_t Val; + if (to_integer(Name, Val)) + return Val; + + YamlIO.setError("an unknown value is used for symbol's 'Other' field: " + + Name); + return 0; + } - ELFYAML::ELF_STV Visibility; - ELFYAML::ELF_STO Other; + Optional denormalize(IO &) { + if (!Other) + return None; + uint8_t Ret = 0; + for (StOtherPiece &Val : *Other) + Ret |= toValue(Val); + return Ret; + } + + // st_other field is used to encode symbol visibility and platform-dependent + // flags and values. This method returns a name to value map that is used for + // parsing and encoding this field. + MapVector getFlags(unsigned EMachine) { + MapVector Map; + // STV_* values are just enumeration values. We add them in a reversed order + // because when we convert the st_other to named constants when printing + // YAML we want to use a maximum number of bits on each step: + // when we have st_other == 3, we want to print it as STV_PROTECTED (3), but + // not as STV_HIDDEN (2) + STV_INTERNAL (1). + Map["STV_PROTECTED"] = ELF::STV_PROTECTED; + Map["STV_HIDDEN"] = ELF::STV_HIDDEN; + Map["STV_INTERNAL"] = ELF::STV_INTERNAL; + // STV_DEFAULT is used to represent the default visibility and has a value + // 0. We want to be able to read it from YAML documents, but there is no + // reason to print it. + if (!YamlIO.outputting()) + Map["STV_DEFAULT"] = ELF::STV_DEFAULT; + + // MIPS is not consistent. All of the STO_MIPS_* values are bit flags, + // except STO_MIPS_MIPS16 which overlaps them. It should be checked and + // consumed first when we print the output, because we do not want to print + // any other flags that have the same bits instead. + if (EMachine == ELF::EM_MIPS) { + Map["STO_MIPS_MIPS16"] = ELF::STO_MIPS_MIPS16; + Map["STO_MIPS_MICROMIPS"] = ELF::STO_MIPS_MICROMIPS; + Map["STO_MIPS_PIC"] = ELF::STO_MIPS_PIC; + Map["STO_MIPS_PLT"] = ELF::STO_MIPS_PLT; + Map["STO_MIPS_OPTIONAL"] = ELF::STO_MIPS_OPTIONAL; + } + return Map; + } + + IO &YamlIO; + Optional> Other; + std::string UnknownFlagsHolder; }; } // end anonymous namespace @@ -888,17 +965,21 @@ void MappingTraits::mapping(IO &IO, ELFYAML::Symbol &Symbol) { IO.mapOptional("Binding", Symbol.Binding, ELFYAML::ELF_STB(0)); IO.mapOptional("Value", Symbol.Value, Hex64(0)); IO.mapOptional("Size", Symbol.Size, Hex64(0)); - MappingNormalization Keys(IO, Symbol.Other); - IO.mapOptional("Visibility", Keys->Visibility, ELFYAML::ELF_STV(0)); - IO.mapOptional("Other", Keys->Other, ELFYAML::ELF_STO(0)); + + // Symbol's Other field is a bit special. It is usually a field that + // represents st_other and holds the symbol visibility. However, on some + // platforms, it can contain bit fields and regular values, or even sometimes a + // crazy mix of them (see comments for NormalizedOther). Because of this, we + // need special handling. + MappingNormalization> Keys(IO, + Symbol.Other); + IO.mapOptional("Other", Keys->Other); } StringRef MappingTraits::validate(IO &IO, ELFYAML::Symbol &Symbol) { if (Symbol.Index && Symbol.Section.data()) return "Index and Section cannot both be specified for Symbol"; - if (Symbol.Index && *Symbol.Index == ELFYAML::ELF_SHN(ELF::SHN_XINDEX)) - return "Large indexes are not supported"; if (Symbol.NameIndex && !Symbol.Name.empty()) return "Name and NameIndex cannot both be specified for Symbol"; return StringRef(); @@ -914,10 +995,11 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) { IO.mapOptional("EntSize", Section.EntSize); // obj2yaml does not dump these fields. They are expected to be empty when we - // are producing YAML, because yaml2obj sets appropriate values for sh_offset - // and sh_size automatically when they are not explicitly defined. + // are producing YAML, because yaml2obj sets appropriate values for them + // automatically when they are not explicitly defined. assert(!IO.outputting() || (!Section.ShOffset.hasValue() && !Section.ShSize.hasValue())); + IO.mapOptional("ShName", Section.ShName); IO.mapOptional("ShOffset", Section.ShOffset); IO.mapOptional("ShSize", Section.ShSize); } @@ -935,6 +1017,21 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) { IO.mapOptional("Info", Section.Info); } +static void sectionMapping(IO &IO, ELFYAML::StackSizesSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Content", Section.Content); + IO.mapOptional("Size", Section.Size); + IO.mapOptional("Entries", Section.Entries); +} + +static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Content", Section.Content); + IO.mapOptional("Bucket", Section.Bucket); + IO.mapOptional("Chain", Section.Chain); + IO.mapOptional("Size", Section.Size); +} + static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) { commonSectionMapping(IO, Section); IO.mapOptional("Size", Section.Size, Hex64(0)); @@ -969,6 +1066,18 @@ static void groupSectionMapping(IO &IO, ELFYAML::Group &Group) { IO.mapRequired("Members", Group.Members); } +static void sectionMapping(IO &IO, ELFYAML::SymtabShndxSection &Section) { + commonSectionMapping(IO, Section); + IO.mapRequired("Entries", Section.Entries); +} + +static void sectionMapping(IO &IO, ELFYAML::AddrsigSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Content", Section.Content); + IO.mapOptional("Size", Section.Size); + IO.mapOptional("Symbols", Section.Symbols); +} + void MappingTraits::mapping( IO &IO, ELFYAML::SectionOrType §ionOrType) { IO.mapRequired("SectionOrType", sectionOrType.sectionNameOrType); @@ -1029,6 +1138,11 @@ void MappingTraits>::mapping( Section.reset(new ELFYAML::NoBitsSection()); sectionMapping(IO, *cast(Section.get())); break; + case ELF::SHT_HASH: + if (!IO.outputting()) + Section.reset(new ELFYAML::HashSection()); + sectionMapping(IO, *cast(Section.get())); + break; case ELF::SHT_MIPS_ABIFLAGS: if (!IO.outputting()) Section.reset(new ELFYAML::MipsABIFlags()); @@ -1049,21 +1163,113 @@ void MappingTraits>::mapping( Section.reset(new ELFYAML::VerneedSection()); sectionMapping(IO, *cast(Section.get())); break; - default: + case ELF::SHT_SYMTAB_SHNDX: if (!IO.outputting()) - Section.reset(new ELFYAML::RawContentSection()); - sectionMapping(IO, *cast(Section.get())); + Section.reset(new ELFYAML::SymtabShndxSection()); + sectionMapping(IO, *cast(Section.get())); + break; + case ELF::SHT_LLVM_ADDRSIG: + if (!IO.outputting()) + Section.reset(new ELFYAML::AddrsigSection()); + sectionMapping(IO, *cast(Section.get())); + break; + default: + if (!IO.outputting()) { + StringRef Name; + IO.mapOptional("Name", Name, StringRef()); + Name = ELFYAML::dropUniqueSuffix(Name); + + if (ELFYAML::StackSizesSection::nameMatches(Name)) + Section = std::make_unique(); + else + Section = std::make_unique(); + } + + if (auto S = dyn_cast(Section.get())) + sectionMapping(IO, *S); + else + sectionMapping(IO, *cast(Section.get())); } } StringRef MappingTraits>::validate( IO &io, std::unique_ptr &Section) { - const auto *RawSection = dyn_cast(Section.get()); - if (!RawSection) + if (const auto *RawSection = + dyn_cast(Section.get())) { + if (RawSection->Size && RawSection->Content && + (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size()) + return "Section size must be greater than or equal to the content size"; return {}; - if (RawSection->Size && RawSection->Content && - (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size()) - return "Section size must be greater than or equal to the content size"; + } + + if (const auto *SS = dyn_cast(Section.get())) { + if (!SS->Entries && !SS->Content && !SS->Size) + return ".stack_sizes: one of Content, Entries and Size must be specified"; + + if (SS->Size && SS->Content && + (uint64_t)(*SS->Size) < SS->Content->binary_size()) + return ".stack_sizes: Size must be greater than or equal to the content " + "size"; + + // We accept Content, Size or both together when there are no Entries. + if (!SS->Entries) + return {}; + + if (SS->Size) + return ".stack_sizes: Size and Entries cannot be used together"; + if (SS->Content) + return ".stack_sizes: Content and Entries cannot be used together"; + return {}; + } + + if (const auto *HS = dyn_cast(Section.get())) { + if (!HS->Content && !HS->Bucket && !HS->Chain && !HS->Size) + return "one of \"Content\", \"Size\", \"Bucket\" or \"Chain\" must be " + "specified"; + + if (HS->Content || HS->Size) { + if (HS->Size && HS->Content && + (uint64_t)*HS->Size < HS->Content->binary_size()) + return "\"Size\" must be greater than or equal to the content " + "size"; + + if (HS->Bucket) + return "\"Bucket\" cannot be used with \"Content\" or \"Size\""; + if (HS->Chain) + return "\"Chain\" cannot be used with \"Content\" or \"Size\""; + return {}; + } + + if ((HS->Bucket && !HS->Chain) || (!HS->Bucket && HS->Chain)) + return "\"Bucket\" and \"Chain\" must be used together"; + return {}; + } + + if (const auto *Sec = dyn_cast(Section.get())) { + if (!Sec->Symbols && !Sec->Content && !Sec->Size) + return "one of \"Content\", \"Size\" or \"Symbols\" must be specified"; + + if (Sec->Content || Sec->Size) { + if (Sec->Size && Sec->Content && + (uint64_t)*Sec->Size < Sec->Content->binary_size()) + return "\"Size\" must be greater than or equal to the content " + "size"; + + if (Sec->Symbols) + return "\"Symbols\" cannot be used with \"Content\" or \"Size\""; + return {}; + } + + if (!Sec->Symbols) + return {}; + + for (const ELFYAML::AddrsigSymbol &AS : *Sec->Symbols) + if (AS.Index && AS.Name) + return "\"Index\" and \"Name\" cannot be used together when defining a " + "symbol"; + return {}; + } + return {}; } @@ -1092,6 +1298,13 @@ struct NormalizedMips64RelType { } // end anonymous namespace +void MappingTraits::mapping( + IO &IO, ELFYAML::StackSizeEntry &E) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapOptional("Address", E.Address, Hex64(0)); + IO.mapRequired("Size", E.Size); +} + void MappingTraits::mapping(IO &IO, ELFYAML::DynamicEntry &Rel) { assert(IO.getContext() && "The IO context is not initialized"); @@ -1164,6 +1377,12 @@ void MappingTraits::mapping(IO &IO, ELFYAML::Object &Object) { IO.setContext(nullptr); } +void MappingTraits::mapping(IO &IO, ELFYAML::AddrsigSymbol &Sym) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapOptional("Name", Sym.Name); + IO.mapOptional("Index", Sym.Index); +} + LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_AFL_REG) LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_ABI_FP) LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_EXT) diff --git a/lib/ObjectYAML/MachOEmitter.cpp b/lib/ObjectYAML/MachOEmitter.cpp new file mode 100644 index 000000000000..b56f811ce67d --- /dev/null +++ b/lib/ObjectYAML/MachOEmitter.cpp @@ -0,0 +1,580 @@ +//===- yaml2macho - Convert YAML to a Mach object file --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The Mach component of yaml2obj. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/ObjectYAML/DWARFEmitter.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/Support/Format.h" + +using namespace llvm; + +namespace { + +class MachOWriter { +public: + MachOWriter(MachOYAML::Object &Obj) : Obj(Obj), is64Bit(true), fileStart(0) { + is64Bit = Obj.Header.magic == MachO::MH_MAGIC_64 || + Obj.Header.magic == MachO::MH_CIGAM_64; + memset(reinterpret_cast(&Header), 0, sizeof(MachO::mach_header_64)); + } + + void writeMachO(raw_ostream &OS); + +private: + void writeHeader(raw_ostream &OS); + void writeLoadCommands(raw_ostream &OS); + void writeSectionData(raw_ostream &OS); + void writeLinkEditData(raw_ostream &OS); + + void writeBindOpcodes(raw_ostream &OS, + std::vector &BindOpcodes); + // LinkEdit writers + void writeRebaseOpcodes(raw_ostream &OS); + void writeBasicBindOpcodes(raw_ostream &OS); + void writeWeakBindOpcodes(raw_ostream &OS); + void writeLazyBindOpcodes(raw_ostream &OS); + void writeNameList(raw_ostream &OS); + void writeStringTable(raw_ostream &OS); + void writeExportTrie(raw_ostream &OS); + + void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry); + void ZeroToOffset(raw_ostream &OS, size_t offset); + + MachOYAML::Object &Obj; + bool is64Bit; + uint64_t fileStart; + + MachO::mach_header_64 Header; +}; + +void MachOWriter::writeMachO(raw_ostream &OS) { + fileStart = OS.tell(); + writeHeader(OS); + writeLoadCommands(OS); + writeSectionData(OS); +} + +void MachOWriter::writeHeader(raw_ostream &OS) { + Header.magic = Obj.Header.magic; + Header.cputype = Obj.Header.cputype; + Header.cpusubtype = Obj.Header.cpusubtype; + Header.filetype = Obj.Header.filetype; + Header.ncmds = Obj.Header.ncmds; + Header.sizeofcmds = Obj.Header.sizeofcmds; + Header.flags = Obj.Header.flags; + Header.reserved = Obj.Header.reserved; + + if (Obj.IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Header); + + auto header_size = + is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + OS.write((const char *)&Header, header_size); +} + +template +SectionType constructSection(MachOYAML::Section Sec) { + SectionType TempSec; + memcpy(reinterpret_cast(&TempSec.sectname[0]), &Sec.sectname[0], 16); + memcpy(reinterpret_cast(&TempSec.segname[0]), &Sec.segname[0], 16); + TempSec.addr = Sec.addr; + TempSec.size = Sec.size; + TempSec.offset = Sec.offset; + TempSec.align = Sec.align; + TempSec.reloff = Sec.reloff; + TempSec.nreloc = Sec.nreloc; + TempSec.flags = Sec.flags; + TempSec.reserved1 = Sec.reserved1; + TempSec.reserved2 = Sec.reserved2; + return TempSec; +} + +template +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, raw_ostream &OS, + bool IsLittleEndian) { + return 0; +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS, + bool IsLittleEndian) { + size_t BytesWritten = 0; + for (const auto &Sec : LC.Sections) { + auto TempSec = constructSection(Sec); + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(TempSec); + OS.write(reinterpret_cast(&(TempSec)), + sizeof(MachO::section)); + BytesWritten += sizeof(MachO::section); + } + return BytesWritten; +} + +template <> +size_t writeLoadCommandData( + MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) { + size_t BytesWritten = 0; + for (const auto &Sec : LC.Sections) { + auto TempSec = constructSection(Sec); + TempSec.reserved3 = Sec.reserved3; + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(TempSec); + OS.write(reinterpret_cast(&(TempSec)), + sizeof(MachO::section_64)); + BytesWritten += sizeof(MachO::section_64); + } + return BytesWritten; +} + +size_t writePayloadString(MachOYAML::LoadCommand &LC, raw_ostream &OS) { + size_t BytesWritten = 0; + if (!LC.PayloadString.empty()) { + OS.write(LC.PayloadString.c_str(), LC.PayloadString.length()); + BytesWritten = LC.PayloadString.length(); + } + return BytesWritten; +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS, + bool IsLittleEndian) { + return writePayloadString(LC, OS); +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS, + bool IsLittleEndian) { + return writePayloadString(LC, OS); +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS, + bool IsLittleEndian) { + return writePayloadString(LC, OS); +} + +template <> +size_t writeLoadCommandData( + MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) { + size_t BytesWritten = 0; + for (const auto &T : LC.Tools) { + struct MachO::build_tool_version tool = T; + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(tool); + OS.write(reinterpret_cast(&tool), + sizeof(MachO::build_tool_version)); + BytesWritten += sizeof(MachO::build_tool_version); + } + return BytesWritten; +} + +void ZeroFillBytes(raw_ostream &OS, size_t Size) { + std::vector FillData; + FillData.insert(FillData.begin(), Size, 0); + OS.write(reinterpret_cast(FillData.data()), Size); +} + +void Fill(raw_ostream &OS, size_t Size, uint32_t Data) { + std::vector FillData; + FillData.insert(FillData.begin(), (Size / 4) + 1, Data); + OS.write(reinterpret_cast(FillData.data()), Size); +} + +void MachOWriter::ZeroToOffset(raw_ostream &OS, size_t Offset) { + auto currOffset = OS.tell() - fileStart; + if (currOffset < Offset) + ZeroFillBytes(OS, Offset - currOffset); +} + +void MachOWriter::writeLoadCommands(raw_ostream &OS) { + for (auto &LC : Obj.LoadCommands) { + size_t BytesWritten = 0; + llvm::MachO::macho_load_command Data = LC.Data; + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + if (Obj.IsLittleEndian != sys::IsLittleEndianHost) \ + MachO::swapStruct(Data.LCStruct##_data); \ + OS.write(reinterpret_cast(&(Data.LCStruct##_data)), \ + sizeof(MachO::LCStruct)); \ + BytesWritten = sizeof(MachO::LCStruct); \ + BytesWritten += \ + writeLoadCommandData(LC, OS, Obj.IsLittleEndian); \ + break; + + switch (LC.Data.load_command_data.cmd) { + default: + if (Obj.IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Data.load_command_data); + OS.write(reinterpret_cast(&(Data.load_command_data)), + sizeof(MachO::load_command)); + BytesWritten = sizeof(MachO::load_command); + BytesWritten += + writeLoadCommandData(LC, OS, Obj.IsLittleEndian); + break; +#include "llvm/BinaryFormat/MachO.def" + } + + if (LC.PayloadBytes.size() > 0) { + OS.write(reinterpret_cast(LC.PayloadBytes.data()), + LC.PayloadBytes.size()); + BytesWritten += LC.PayloadBytes.size(); + } + + if (LC.ZeroPadBytes > 0) { + ZeroFillBytes(OS, LC.ZeroPadBytes); + BytesWritten += LC.ZeroPadBytes; + } + + // Fill remaining bytes with 0. This will only get hit in partially + // specified test cases. + auto BytesRemaining = LC.Data.load_command_data.cmdsize - BytesWritten; + if (BytesRemaining > 0) { + ZeroFillBytes(OS, BytesRemaining); + } + } +} + +void MachOWriter::writeSectionData(raw_ostream &OS) { + bool FoundLinkEditSeg = false; + for (auto &LC : Obj.LoadCommands) { + switch (LC.Data.load_command_data.cmd) { + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + uint64_t segOff = is64Bit ? LC.Data.segment_command_64_data.fileoff + : LC.Data.segment_command_data.fileoff; + if (0 == + strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) { + FoundLinkEditSeg = true; + writeLinkEditData(OS); + } + for (auto &Sec : LC.Sections) { + ZeroToOffset(OS, Sec.offset); + // Zero Fill any data between the end of the last thing we wrote and the + // start of this section. + assert((OS.tell() - fileStart <= Sec.offset || + Sec.offset == (uint32_t)0) && + "Wrote too much data somewhere, section offsets don't line up."); + if (0 == strncmp(&Sec.segname[0], "__DWARF", 16)) { + if (0 == strncmp(&Sec.sectname[0], "__debug_str", 16)) { + DWARFYAML::EmitDebugStr(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_abbrev", 16)) { + DWARFYAML::EmitDebugAbbrev(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_aranges", 16)) { + DWARFYAML::EmitDebugAranges(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_pubnames", 16)) { + DWARFYAML::EmitPubSection(OS, Obj.DWARF.PubNames, + Obj.IsLittleEndian); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_pubtypes", 16)) { + DWARFYAML::EmitPubSection(OS, Obj.DWARF.PubTypes, + Obj.IsLittleEndian); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_info", 16)) { + DWARFYAML::EmitDebugInfo(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_line", 16)) { + DWARFYAML::EmitDebugLine(OS, Obj.DWARF); + } + + continue; + } + + // Skip if it's a virtual section. + if (MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE)) + continue; + + if (Sec.content) { + yaml::BinaryRef Content = *Sec.content; + Content.writeAsBinary(OS); + ZeroFillBytes(OS, Sec.size - Content.binary_size()); + } else { + // Fill section data with 0xDEADBEEF. + Fill(OS, Sec.size, 0xDEADBEEFu); + } + } + uint64_t segSize = is64Bit ? LC.Data.segment_command_64_data.filesize + : LC.Data.segment_command_data.filesize; + ZeroToOffset(OS, segOff + segSize); + break; + } + } + // Old PPC Object Files didn't have __LINKEDIT segments, the data was just + // stuck at the end of the file. + if (!FoundLinkEditSeg) + writeLinkEditData(OS); +} + +void MachOWriter::writeBindOpcodes( + raw_ostream &OS, std::vector &BindOpcodes) { + + for (auto Opcode : BindOpcodes) { + uint8_t OpByte = Opcode.Opcode | Opcode.Imm; + OS.write(reinterpret_cast(&OpByte), 1); + for (auto Data : Opcode.ULEBExtraData) { + encodeULEB128(Data, OS); + } + for (auto Data : Opcode.SLEBExtraData) { + encodeSLEB128(Data, OS); + } + if (!Opcode.Symbol.empty()) { + OS.write(Opcode.Symbol.data(), Opcode.Symbol.size()); + OS.write('\0'); + } + } +} + +void MachOWriter::dumpExportEntry(raw_ostream &OS, + MachOYAML::ExportEntry &Entry) { + encodeSLEB128(Entry.TerminalSize, OS); + if (Entry.TerminalSize > 0) { + encodeSLEB128(Entry.Flags, OS); + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { + encodeSLEB128(Entry.Other, OS); + OS << Entry.ImportName; + OS.write('\0'); + } else { + encodeSLEB128(Entry.Address, OS); + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + encodeSLEB128(Entry.Other, OS); + } + } + OS.write(static_cast(Entry.Children.size())); + for (auto EE : Entry.Children) { + OS << EE.Name; + OS.write('\0'); + encodeSLEB128(EE.NodeOffset, OS); + } + for (auto EE : Entry.Children) + dumpExportEntry(OS, EE); +} + +void MachOWriter::writeExportTrie(raw_ostream &OS) { + dumpExportEntry(OS, Obj.LinkEdit.ExportTrie); +} + +template +void writeNListEntry(MachOYAML::NListEntry &NLE, raw_ostream &OS, + bool IsLittleEndian) { + NListType ListEntry; + ListEntry.n_strx = NLE.n_strx; + ListEntry.n_type = NLE.n_type; + ListEntry.n_sect = NLE.n_sect; + ListEntry.n_desc = NLE.n_desc; + ListEntry.n_value = NLE.n_value; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(ListEntry); + OS.write(reinterpret_cast(&ListEntry), sizeof(NListType)); +} + +void MachOWriter::writeLinkEditData(raw_ostream &OS) { + typedef void (MachOWriter::*writeHandler)(raw_ostream &); + typedef std::pair writeOperation; + std::vector WriteQueue; + + MachO::dyld_info_command *DyldInfoOnlyCmd = 0; + MachO::symtab_command *SymtabCmd = 0; + for (auto &LC : Obj.LoadCommands) { + switch (LC.Data.load_command_data.cmd) { + case MachO::LC_SYMTAB: + SymtabCmd = &LC.Data.symtab_command_data; + WriteQueue.push_back( + std::make_pair(SymtabCmd->symoff, &MachOWriter::writeNameList)); + WriteQueue.push_back( + std::make_pair(SymtabCmd->stroff, &MachOWriter::writeStringTable)); + break; + case MachO::LC_DYLD_INFO_ONLY: + DyldInfoOnlyCmd = &LC.Data.dyld_info_command_data; + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->rebase_off, + &MachOWriter::writeRebaseOpcodes)); + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->bind_off, + &MachOWriter::writeBasicBindOpcodes)); + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->weak_bind_off, + &MachOWriter::writeWeakBindOpcodes)); + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->lazy_bind_off, + &MachOWriter::writeLazyBindOpcodes)); + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->export_off, + &MachOWriter::writeExportTrie)); + break; + } + } + + llvm::sort(WriteQueue, [](const writeOperation &a, const writeOperation &b) { + return a.first < b.first; + }); + + for (auto writeOp : WriteQueue) { + ZeroToOffset(OS, writeOp.first); + (this->*writeOp.second)(OS); + } +} + +void MachOWriter::writeRebaseOpcodes(raw_ostream &OS) { + MachOYAML::LinkEditData &LinkEdit = Obj.LinkEdit; + + for (auto Opcode : LinkEdit.RebaseOpcodes) { + uint8_t OpByte = Opcode.Opcode | Opcode.Imm; + OS.write(reinterpret_cast(&OpByte), 1); + for (auto Data : Opcode.ExtraData) + encodeULEB128(Data, OS); + } +} + +void MachOWriter::writeBasicBindOpcodes(raw_ostream &OS) { + writeBindOpcodes(OS, Obj.LinkEdit.BindOpcodes); +} + +void MachOWriter::writeWeakBindOpcodes(raw_ostream &OS) { + writeBindOpcodes(OS, Obj.LinkEdit.WeakBindOpcodes); +} + +void MachOWriter::writeLazyBindOpcodes(raw_ostream &OS) { + writeBindOpcodes(OS, Obj.LinkEdit.LazyBindOpcodes); +} + +void MachOWriter::writeNameList(raw_ostream &OS) { + for (auto NLE : Obj.LinkEdit.NameList) { + if (is64Bit) + writeNListEntry(NLE, OS, Obj.IsLittleEndian); + else + writeNListEntry(NLE, OS, Obj.IsLittleEndian); + } +} + +void MachOWriter::writeStringTable(raw_ostream &OS) { + for (auto Str : Obj.LinkEdit.StringTable) { + OS.write(Str.data(), Str.size()); + OS.write('\0'); + } +} + +class UniversalWriter { +public: + UniversalWriter(yaml::YamlObjectFile &ObjectFile) + : ObjectFile(ObjectFile), fileStart(0) {} + + void writeMachO(raw_ostream &OS); + +private: + void writeFatHeader(raw_ostream &OS); + void writeFatArchs(raw_ostream &OS); + + void ZeroToOffset(raw_ostream &OS, size_t offset); + + yaml::YamlObjectFile &ObjectFile; + uint64_t fileStart; +}; + +void UniversalWriter::writeMachO(raw_ostream &OS) { + fileStart = OS.tell(); + if (ObjectFile.MachO) { + MachOWriter Writer(*ObjectFile.MachO); + Writer.writeMachO(OS); + return; + } + + writeFatHeader(OS); + writeFatArchs(OS); + + auto &FatFile = *ObjectFile.FatMachO; + assert(FatFile.FatArchs.size() == FatFile.Slices.size()); + for (size_t i = 0; i < FatFile.Slices.size(); i++) { + ZeroToOffset(OS, FatFile.FatArchs[i].offset); + MachOWriter Writer(FatFile.Slices[i]); + Writer.writeMachO(OS); + + auto SliceEnd = FatFile.FatArchs[i].offset + FatFile.FatArchs[i].size; + ZeroToOffset(OS, SliceEnd); + } +} + +void UniversalWriter::writeFatHeader(raw_ostream &OS) { + auto &FatFile = *ObjectFile.FatMachO; + MachO::fat_header header; + header.magic = FatFile.Header.magic; + header.nfat_arch = FatFile.Header.nfat_arch; + if (sys::IsLittleEndianHost) + swapStruct(header); + OS.write(reinterpret_cast(&header), sizeof(MachO::fat_header)); +} + +template +FatArchType constructFatArch(MachOYAML::FatArch &Arch) { + FatArchType FatArch; + FatArch.cputype = Arch.cputype; + FatArch.cpusubtype = Arch.cpusubtype; + FatArch.offset = Arch.offset; + FatArch.size = Arch.size; + FatArch.align = Arch.align; + return FatArch; +} + +template +void writeFatArch(MachOYAML::FatArch &LC, raw_ostream &OS) {} + +template <> +void writeFatArch(MachOYAML::FatArch &Arch, raw_ostream &OS) { + auto FatArch = constructFatArch(Arch); + if (sys::IsLittleEndianHost) + swapStruct(FatArch); + OS.write(reinterpret_cast(&FatArch), sizeof(MachO::fat_arch)); +} + +template <> +void writeFatArch(MachOYAML::FatArch &Arch, + raw_ostream &OS) { + auto FatArch = constructFatArch(Arch); + FatArch.reserved = Arch.reserved; + if (sys::IsLittleEndianHost) + swapStruct(FatArch); + OS.write(reinterpret_cast(&FatArch), + sizeof(MachO::fat_arch_64)); +} + +void UniversalWriter::writeFatArchs(raw_ostream &OS) { + auto &FatFile = *ObjectFile.FatMachO; + bool is64Bit = FatFile.Header.magic == MachO::FAT_MAGIC_64; + for (auto Arch : FatFile.FatArchs) { + if (is64Bit) + writeFatArch(Arch, OS); + else + writeFatArch(Arch, OS); + } +} + +void UniversalWriter::ZeroToOffset(raw_ostream &OS, size_t Offset) { + auto currOffset = OS.tell() - fileStart; + if (currOffset < Offset) + ZeroFillBytes(OS, Offset - currOffset); +} + +} // end anonymous namespace + +namespace llvm { +namespace yaml { + +bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler /*EH*/) { + UniversalWriter Writer(Doc); + Writer.writeMachO(Out); + return true; +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp index d12f12cf4435..0f7cd1e1495c 100644 --- a/lib/ObjectYAML/MachOYAML.cpp +++ b/lib/ObjectYAML/MachOYAML.cpp @@ -287,6 +287,15 @@ void MappingTraits::mapping(IO &IO, IO.mapRequired("reserved1", Section.reserved1); IO.mapRequired("reserved2", Section.reserved2); IO.mapOptional("reserved3", Section.reserved3); + IO.mapOptional("content", Section.content); +} + +StringRef +MappingTraits::validate(IO &IO, + MachOYAML::Section &Section) { + if (Section.content && Section.size < Section.content->binary_size()) + return "Section size must be greater than or equal to the content size"; + return {}; } void MappingTraits::mapping( diff --git a/lib/ObjectYAML/MinidumpEmitter.cpp b/lib/ObjectYAML/MinidumpEmitter.cpp new file mode 100644 index 000000000000..bbfd2cd8cbab --- /dev/null +++ b/lib/ObjectYAML/MinidumpEmitter.cpp @@ -0,0 +1,247 @@ +//===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/MinidumpYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::minidump; +using namespace llvm::MinidumpYAML; + +namespace { +/// A helper class to manage the placement of various structures into the final +/// minidump binary. Space for objects can be allocated via various allocate*** +/// methods, while the final minidump file is written by calling the writeTo +/// method. The plain versions of allocation functions take a reference to the +/// data which is to be written (and hence the data must be available until +/// writeTo is called), while the "New" versions allocate the data in an +/// allocator-managed buffer, which is available until the allocator object is +/// destroyed. For both kinds of functions, it is possible to modify the +/// data for which the space has been "allocated" until the final writeTo call. +/// This is useful for "linking" the allocated structures via their offsets. +class BlobAllocator { +public: + size_t tell() const { return NextOffset; } + + size_t allocateCallback(size_t Size, + std::function Callback) { + size_t Offset = NextOffset; + NextOffset += Size; + Callbacks.push_back(std::move(Callback)); + return Offset; + } + + size_t allocateBytes(ArrayRef Data) { + return allocateCallback( + Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); }); + } + + size_t allocateBytes(yaml::BinaryRef Data) { + return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) { + Data.writeAsBinary(OS); + }); + } + + template size_t allocateArray(ArrayRef Data) { + return allocateBytes({reinterpret_cast(Data.data()), + sizeof(T) * Data.size()}); + } + + template + std::pair> + allocateNewArray(const iterator_range &Range); + + template size_t allocateObject(const T &Data) { + return allocateArray(makeArrayRef(Data)); + } + + template + std::pair allocateNewObject(Types &&... Args) { + T *Object = new (Temporaries.Allocate()) T(std::forward(Args)...); + return {allocateObject(*Object), Object}; + } + + size_t allocateString(StringRef Str); + + void writeTo(raw_ostream &OS) const; + +private: + size_t NextOffset = 0; + + BumpPtrAllocator Temporaries; + std::vector> Callbacks; +}; +} // namespace + +template +std::pair> +BlobAllocator::allocateNewArray(const iterator_range &Range) { + size_t Num = std::distance(Range.begin(), Range.end()); + MutableArrayRef Array(Temporaries.Allocate(Num), Num); + std::uninitialized_copy(Range.begin(), Range.end(), Array.begin()); + return {allocateArray(Array), Array}; +} + +size_t BlobAllocator::allocateString(StringRef Str) { + SmallVector WStr; + bool OK = convertUTF8ToUTF16String(Str, WStr); + assert(OK && "Invalid UTF8 in Str?"); + (void)OK; + + // The utf16 string is null-terminated, but the terminator is not counted in + // the string size. + WStr.push_back(0); + size_t Result = + allocateNewObject(2 * (WStr.size() - 1)).first; + allocateNewArray(make_range(WStr.begin(), WStr.end())); + return Result; +} + +void BlobAllocator::writeTo(raw_ostream &OS) const { + size_t BeginOffset = OS.tell(); + for (const auto &Callback : Callbacks) + Callback(OS); + assert(OS.tell() == BeginOffset + NextOffset && + "Callbacks wrote an unexpected number of bytes."); + (void)BeginOffset; +} + +static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) { + return {support::ulittle32_t(Data.binary_size()), + support::ulittle32_t(File.allocateBytes(Data))}; +} + +static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) { + File.allocateObject(S.MDExceptionStream); + + size_t DataEnd = File.tell(); + + // Lay out the thread context data, (which is not a part of the stream). + // TODO: This usually (always?) matches the thread context of the + // corresponding thread, and may overlap memory regions as well. We could + // add a level of indirection to the MinidumpYAML format (like an array of + // Blobs that the LocationDescriptors index into) to be able to distinguish + // the cases where location descriptions overlap vs happen to reference + // identical data. + S.MDExceptionStream.ThreadContext = layout(File, S.ThreadContext); + + return DataEnd; +} + +static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) { + Range.Entry.Memory = layout(File, Range.Content); +} + +static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) { + M.Entry.ModuleNameRVA = File.allocateString(M.Name); + + M.Entry.CvRecord = layout(File, M.CvRecord); + M.Entry.MiscRecord = layout(File, M.MiscRecord); +} + +static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) { + T.Entry.Stack.Memory = layout(File, T.Stack); + T.Entry.Context = layout(File, T.Context); +} + +template +static size_t layout(BlobAllocator &File, + MinidumpYAML::detail::ListStream &S) { + + File.allocateNewObject(S.Entries.size()); + for (auto &E : S.Entries) + File.allocateObject(E.Entry); + + size_t DataEnd = File.tell(); + + // Lay out the auxiliary data, (which is not a part of the stream). + DataEnd = File.tell(); + for (auto &E : S.Entries) + layout(File, E); + + return DataEnd; +} + +static Directory layout(BlobAllocator &File, Stream &S) { + Directory Result; + Result.Type = S.Type; + Result.Location.RVA = File.tell(); + Optional DataEnd; + switch (S.Kind) { + case Stream::StreamKind::Exception: + DataEnd = layout(File, cast(S)); + break; + case Stream::StreamKind::MemoryInfoList: { + MemoryInfoListStream &InfoList = cast(S); + File.allocateNewObject( + sizeof(minidump::MemoryInfoListHeader), sizeof(minidump::MemoryInfo), + InfoList.Infos.size()); + File.allocateArray(makeArrayRef(InfoList.Infos)); + break; + } + case Stream::StreamKind::MemoryList: + DataEnd = layout(File, cast(S)); + break; + case Stream::StreamKind::ModuleList: + DataEnd = layout(File, cast(S)); + break; + case Stream::StreamKind::RawContent: { + RawContentStream &Raw = cast(S); + File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { + Raw.Content.writeAsBinary(OS); + assert(Raw.Content.binary_size() <= Raw.Size); + OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); + }); + break; + } + case Stream::StreamKind::SystemInfo: { + SystemInfoStream &SystemInfo = cast(S); + File.allocateObject(SystemInfo.Info); + // The CSD string is not a part of the stream. + DataEnd = File.tell(); + SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion); + break; + } + case Stream::StreamKind::TextContent: + File.allocateArray(arrayRefFromStringRef(cast(S).Text)); + break; + case Stream::StreamKind::ThreadList: + DataEnd = layout(File, cast(S)); + break; + } + // If DataEnd is not set, we assume everything we generated is a part of the + // stream. + Result.Location.DataSize = + DataEnd.getValueOr(File.tell()) - Result.Location.RVA; + return Result; +} + +namespace llvm { +namespace yaml { + +bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out, + ErrorHandler /*EH*/) { + BlobAllocator File; + File.allocateObject(Obj.Header); + + std::vector StreamDirectory(Obj.Streams.size()); + Obj.Header.StreamDirectoryRVA = + File.allocateArray(makeArrayRef(StreamDirectory)); + Obj.Header.NumberOfStreams = StreamDirectory.size(); + + for (auto &Stream : enumerate(Obj.Streams)) + StreamDirectory[Stream.index()] = layout(File, *Stream.value()); + + File.writeTo(Out); + return true; +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/MinidumpYAML.cpp b/lib/ObjectYAML/MinidumpYAML.cpp index f5f2acd0cc4b..21b2a4d78629 100644 --- a/lib/ObjectYAML/MinidumpYAML.cpp +++ b/lib/ObjectYAML/MinidumpYAML.cpp @@ -8,110 +8,11 @@ #include "llvm/ObjectYAML/MinidumpYAML.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/ConvertUTF.h" using namespace llvm; using namespace llvm::MinidumpYAML; using namespace llvm::minidump; -namespace { -/// A helper class to manage the placement of various structures into the final -/// minidump binary. Space for objects can be allocated via various allocate*** -/// methods, while the final minidump file is written by calling the writeTo -/// method. The plain versions of allocation functions take a reference to the -/// data which is to be written (and hence the data must be available until -/// writeTo is called), while the "New" versions allocate the data in an -/// allocator-managed buffer, which is available until the allocator object is -/// destroyed. For both kinds of functions, it is possible to modify the -/// data for which the space has been "allocated" until the final writeTo call. -/// This is useful for "linking" the allocated structures via their offsets. -class BlobAllocator { -public: - size_t tell() const { return NextOffset; } - - size_t allocateCallback(size_t Size, - std::function Callback) { - size_t Offset = NextOffset; - NextOffset += Size; - Callbacks.push_back(std::move(Callback)); - return Offset; - } - - size_t allocateBytes(ArrayRef Data) { - return allocateCallback( - Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); }); - } - - size_t allocateBytes(yaml::BinaryRef Data) { - return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) { - Data.writeAsBinary(OS); - }); - } - - template size_t allocateArray(ArrayRef Data) { - return allocateBytes({reinterpret_cast(Data.data()), - sizeof(T) * Data.size()}); - } - - template - std::pair> - allocateNewArray(const iterator_range &Range); - - template size_t allocateObject(const T &Data) { - return allocateArray(makeArrayRef(Data)); - } - - template - std::pair allocateNewObject(Types &&... Args) { - T *Object = new (Temporaries.Allocate()) T(std::forward(Args)...); - return {allocateObject(*Object), Object}; - } - - size_t allocateString(StringRef Str); - - void writeTo(raw_ostream &OS) const; - -private: - size_t NextOffset = 0; - - BumpPtrAllocator Temporaries; - std::vector> Callbacks; -}; -} // namespace - -template -std::pair> -BlobAllocator::allocateNewArray(const iterator_range &Range) { - size_t Num = std::distance(Range.begin(), Range.end()); - MutableArrayRef Array(Temporaries.Allocate(Num), Num); - std::uninitialized_copy(Range.begin(), Range.end(), Array.begin()); - return {allocateArray(Array), Array}; -} - -size_t BlobAllocator::allocateString(StringRef Str) { - SmallVector WStr; - bool OK = convertUTF8ToUTF16String(Str, WStr); - assert(OK && "Invalid UTF8 in Str?"); - (void)OK; - - // The utf16 string is null-terminated, but the terminator is not counted in - // the string size. - WStr.push_back(0); - size_t Result = - allocateNewObject(2 * (WStr.size() - 1)).first; - allocateNewArray(make_range(WStr.begin(), WStr.end())); - return Result; -} - -void BlobAllocator::writeTo(raw_ostream &OS) const { - size_t BeginOffset = OS.tell(); - for (const auto &Callback : Callbacks) - Callback(OS); - assert(OS.tell() == BeginOffset + NextOffset && - "Callbacks wrote an unexpected number of bytes."); - (void)BeginOffset; -} - /// Perform an optional yaml-mapping of an endian-aware type EndianType. The /// only purpose of this function is to avoid casting the Default value to the /// endian type; @@ -168,6 +69,10 @@ Stream::~Stream() = default; Stream::StreamKind Stream::getKind(StreamType Type) { switch (Type) { + case StreamType::Exception: + return StreamKind::Exception; + case StreamType::MemoryInfoList: + return StreamKind::MemoryInfoList; case StreamType::MemoryList: return StreamKind::MemoryList; case StreamType::ModuleList: @@ -192,22 +97,45 @@ Stream::StreamKind Stream::getKind(StreamType Type) { std::unique_ptr Stream::create(StreamType Type) { StreamKind Kind = getKind(Type); switch (Kind) { + case StreamKind::Exception: + return std::make_unique(); + case StreamKind::MemoryInfoList: + return std::make_unique(); case StreamKind::MemoryList: - return llvm::make_unique(); + return std::make_unique(); case StreamKind::ModuleList: - return llvm::make_unique(); + return std::make_unique(); case StreamKind::RawContent: - return llvm::make_unique(Type); + return std::make_unique(Type); case StreamKind::SystemInfo: - return llvm::make_unique(); + return std::make_unique(); case StreamKind::TextContent: - return llvm::make_unique(Type); + return std::make_unique(Type); case StreamKind::ThreadList: - return llvm::make_unique(); + return std::make_unique(); } llvm_unreachable("Unhandled stream kind!"); } +void yaml::ScalarBitSetTraits::bitset( + IO &IO, MemoryProtection &Protect) { +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(Protect, #NATIVENAME, MemoryProtection::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + +void yaml::ScalarBitSetTraits::bitset(IO &IO, MemoryState &State) { +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(State, #NATIVENAME, MemoryState::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + +void yaml::ScalarBitSetTraits::bitset(IO &IO, MemoryType &Type) { +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(Type, #NATIVENAME, MemoryType::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + void yaml::ScalarEnumerationTraits::enumeration( IO &IO, ProcessorArchitecture &Arch) { #define HANDLE_MDMP_ARCH(CODE, NAME) \ @@ -314,6 +242,20 @@ void yaml::MappingTraits::mapping(IO &IO, mapOptionalHex(IO, "AMD Extended Features", Info.AMDExtendedFeatures, 0); } +void yaml::MappingTraits::mapping(IO &IO, MemoryInfo &Info) { + mapRequiredHex(IO, "Base Address", Info.BaseAddress); + mapOptionalHex(IO, "Allocation Base", Info.AllocationBase, Info.BaseAddress); + mapRequiredAs(IO, "Allocation Protect", + Info.AllocationProtect); + mapOptionalHex(IO, "Reserved0", Info.Reserved0, 0); + mapRequiredHex(IO, "Region Size", Info.RegionSize); + mapRequiredAs(IO, "State", Info.State); + mapOptionalAs(IO, "Protect", Info.Protect, + Info.AllocationProtect); + mapRequiredAs(IO, "Type", Info.Type); + mapOptionalHex(IO, "Reserved1", Info.Reserved1, 0); +} + void yaml::MappingTraits::mapping(IO &IO, VSFixedFileInfo &Info) { mapOptionalHex(IO, "Signature", Info.Signature, 0); @@ -336,8 +278,7 @@ void yaml::MappingTraits::mapping( mapRequiredHex(IO, "Base of Image", M.Entry.BaseOfImage); mapRequiredHex(IO, "Size of Image", M.Entry.SizeOfImage); mapOptionalHex(IO, "Checksum", M.Entry.Checksum, 0); - IO.mapOptional("Time Date Stamp", M.Entry.TimeDateStamp, - support::ulittle32_t(0)); + mapOptional(IO, "Time Date Stamp", M.Entry.TimeDateStamp, 0); IO.mapRequired("Module Name", M.Name); IO.mapOptional("Version Info", M.Entry.VersionInfo, VSFixedFileInfo()); IO.mapRequired("CodeView Record", M.CvRecord); @@ -363,6 +304,10 @@ void yaml::MappingTraits::mapping( IO, Range.Entry, Range.Content); } +static void streamMapping(yaml::IO &IO, MemoryInfoListStream &Stream) { + IO.mapRequired("Memory Ranges", Stream.Infos); +} + static void streamMapping(yaml::IO &IO, MemoryListStream &Stream) { IO.mapRequired("Memory Ranges", Stream.Entries); } @@ -425,6 +370,32 @@ static void streamMapping(yaml::IO &IO, ThreadListStream &Stream) { IO.mapRequired("Threads", Stream.Entries); } +static void streamMapping(yaml::IO &IO, MinidumpYAML::ExceptionStream &Stream) { + mapRequiredHex(IO, "Thread ID", Stream.MDExceptionStream.ThreadId); + IO.mapRequired("Exception Record", Stream.MDExceptionStream.ExceptionRecord); + IO.mapRequired("Thread Context", Stream.ThreadContext); +} + +void yaml::MappingTraits::mapping( + yaml::IO &IO, minidump::Exception &Exception) { + mapRequiredHex(IO, "Exception Code", Exception.ExceptionCode); + mapOptionalHex(IO, "Exception Flags", Exception.ExceptionFlags, 0); + mapOptionalHex(IO, "Exception Record", Exception.ExceptionRecord, 0); + mapOptionalHex(IO, "Exception Address", Exception.ExceptionAddress, 0); + mapOptional(IO, "Number of Parameters", Exception.NumberParameters, 0); + + for (size_t Index = 0; Index < Exception.MaxParameters; ++Index) { + SmallString<16> Name("Parameter "); + Twine(Index).toVector(Name); + support::ulittle64_t &Field = Exception.ExceptionInformation[Index]; + + if (Index < Exception.NumberParameters) + mapRequiredHex(IO, Name.c_str(), Field); + else + mapOptionalHex(IO, Name.c_str(), Field, 0); + } +} + void yaml::MappingTraits>::mapping( yaml::IO &IO, std::unique_ptr &S) { StreamType Type; @@ -435,6 +406,12 @@ void yaml::MappingTraits>::mapping( if (!IO.outputting()) S = MinidumpYAML::Stream::create(Type); switch (S->Kind) { + case MinidumpYAML::Stream::StreamKind::Exception: + streamMapping(IO, llvm::cast(*S)); + break; + case MinidumpYAML::Stream::StreamKind::MemoryInfoList: + streamMapping(IO, llvm::cast(*S)); + break; case MinidumpYAML::Stream::StreamKind::MemoryList: streamMapping(IO, llvm::cast(*S)); break; @@ -461,6 +438,8 @@ StringRef yaml::MappingTraits>::validate( switch (S->Kind) { case MinidumpYAML::Stream::StreamKind::RawContent: return streamValidate(cast(*S)); + case MinidumpYAML::Stream::StreamKind::Exception: + case MinidumpYAML::Stream::StreamKind::MemoryInfoList: case MinidumpYAML::Stream::StreamKind::MemoryList: case MinidumpYAML::Stream::StreamKind::ModuleList: case MinidumpYAML::Stream::StreamKind::SystemInfo: @@ -479,118 +458,28 @@ void yaml::MappingTraits::mapping(IO &IO, Object &O) { IO.mapRequired("Streams", O.Streams); } -static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) { - return {support::ulittle32_t(Data.binary_size()), - support::ulittle32_t(File.allocateBytes(Data))}; -} - -static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) { - Range.Entry.Memory = layout(File, Range.Content); -} - -static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) { - M.Entry.ModuleNameRVA = File.allocateString(M.Name); - - M.Entry.CvRecord = layout(File, M.CvRecord); - M.Entry.MiscRecord = layout(File, M.MiscRecord); -} - -static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) { - T.Entry.Stack.Memory = layout(File, T.Stack); - T.Entry.Context = layout(File, T.Context); -} - -template -static size_t layout(BlobAllocator &File, - MinidumpYAML::detail::ListStream &S) { - - File.allocateNewObject(S.Entries.size()); - for (auto &E : S.Entries) - File.allocateObject(E.Entry); - - size_t DataEnd = File.tell(); - - // Lay out the auxiliary data, (which is not a part of the stream). - DataEnd = File.tell(); - for (auto &E : S.Entries) - layout(File, E); - - return DataEnd; -} - -static Directory layout(BlobAllocator &File, Stream &S) { - Directory Result; - Result.Type = S.Type; - Result.Location.RVA = File.tell(); - Optional DataEnd; - switch (S.Kind) { - case Stream::StreamKind::MemoryList: - DataEnd = layout(File, cast(S)); - break; - case Stream::StreamKind::ModuleList: - DataEnd = layout(File, cast(S)); - break; - case Stream::StreamKind::RawContent: { - RawContentStream &Raw = cast(S); - File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { - Raw.Content.writeAsBinary(OS); - assert(Raw.Content.binary_size() <= Raw.Size); - OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); - }); - break; - } - case Stream::StreamKind::SystemInfo: { - SystemInfoStream &SystemInfo = cast(S); - File.allocateObject(SystemInfo.Info); - // The CSD string is not a part of the stream. - DataEnd = File.tell(); - SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion); - break; - } - case Stream::StreamKind::TextContent: - File.allocateArray(arrayRefFromStringRef(cast(S).Text)); - break; - case Stream::StreamKind::ThreadList: - DataEnd = layout(File, cast(S)); - break; - } - // If DataEnd is not set, we assume everything we generated is a part of the - // stream. - Result.Location.DataSize = - DataEnd.getValueOr(File.tell()) - Result.Location.RVA; - return Result; -} - -void MinidumpYAML::writeAsBinary(Object &Obj, raw_ostream &OS) { - BlobAllocator File; - File.allocateObject(Obj.Header); - - std::vector StreamDirectory(Obj.Streams.size()); - Obj.Header.StreamDirectoryRVA = - File.allocateArray(makeArrayRef(StreamDirectory)); - Obj.Header.NumberOfStreams = StreamDirectory.size(); - - for (auto &Stream : enumerate(Obj.Streams)) - StreamDirectory[Stream.index()] = layout(File, *Stream.value()); - - File.writeTo(OS); -} - -Error MinidumpYAML::writeAsBinary(StringRef Yaml, raw_ostream &OS) { - yaml::Input Input(Yaml); - Object Obj; - Input >> Obj; - if (std::error_code EC = Input.error()) - return errorCodeToError(EC); - - writeAsBinary(Obj, OS); - return Error::success(); -} - Expected> Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { StreamKind Kind = getKind(StreamDesc.Type); switch (Kind) { + case StreamKind::Exception: { + Expected ExpectedExceptionStream = + File.getExceptionStream(); + if (!ExpectedExceptionStream) + return ExpectedExceptionStream.takeError(); + Expected> ExpectedThreadContext = + File.getRawData(ExpectedExceptionStream->ThreadContext); + if (!ExpectedThreadContext) + return ExpectedThreadContext.takeError(); + return std::make_unique(*ExpectedExceptionStream, + *ExpectedThreadContext); + } + case StreamKind::MemoryInfoList: { + if (auto ExpectedList = File.getMemoryInfoList()) + return std::make_unique(*ExpectedList); + else + return ExpectedList.takeError(); + } case StreamKind::MemoryList: { auto ExpectedList = File.getMemoryList(); if (!ExpectedList) @@ -602,7 +491,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { return ExpectedContent.takeError(); Ranges.push_back({MD, *ExpectedContent}); } - return llvm::make_unique(std::move(Ranges)); + return std::make_unique(std::move(Ranges)); } case StreamKind::ModuleList: { auto ExpectedList = File.getModuleList(); @@ -622,10 +511,10 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { Modules.push_back( {M, std::move(*ExpectedName), *ExpectedCv, *ExpectedMisc}); } - return llvm::make_unique(std::move(Modules)); + return std::make_unique(std::move(Modules)); } case StreamKind::RawContent: - return llvm::make_unique(StreamDesc.Type, + return std::make_unique(StreamDesc.Type, File.getRawStream(StreamDesc)); case StreamKind::SystemInfo: { auto ExpectedInfo = File.getSystemInfo(); @@ -634,11 +523,11 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA); if (!ExpectedCSDVersion) return ExpectedInfo.takeError(); - return llvm::make_unique(*ExpectedInfo, + return std::make_unique(*ExpectedInfo, std::move(*ExpectedCSDVersion)); } case StreamKind::TextContent: - return llvm::make_unique( + return std::make_unique( StreamDesc.Type, toStringRef(File.getRawStream(StreamDesc))); case StreamKind::ThreadList: { auto ExpectedList = File.getThreadList(); @@ -654,7 +543,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { return ExpectedContext.takeError(); Threads.push_back({T, *ExpectedStack, *ExpectedContext}); } - return llvm::make_unique(std::move(Threads)); + return std::make_unique(std::move(Threads)); } } llvm_unreachable("Unhandled stream kind!"); diff --git a/lib/ObjectYAML/WasmEmitter.cpp b/lib/ObjectYAML/WasmEmitter.cpp new file mode 100644 index 000000000000..debc040587a8 --- /dev/null +++ b/lib/ObjectYAML/WasmEmitter.cpp @@ -0,0 +1,633 @@ +//===- yaml2wasm - Convert YAML to a Wasm object file --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The Wasm component of yaml2obj. +/// +//===----------------------------------------------------------------------===// +// + +#include "llvm/Object/Wasm.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; + +namespace { +/// This parses a yaml stream that represents a Wasm object file. +/// See docs/yaml2obj for the yaml scheema. +class WasmWriter { +public: + WasmWriter(WasmYAML::Object &Obj, yaml::ErrorHandler EH) + : Obj(Obj), ErrHandler(EH) {} + bool writeWasm(raw_ostream &OS); + +private: + void writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec, + uint32_t SectionIndex); + + void writeInitExpr(raw_ostream &OS, const wasm::WasmInitExpr &InitExpr); + + void writeSectionContent(raw_ostream &OS, WasmYAML::CustomSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::TypeSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::ImportSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::FunctionSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::TableSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::MemorySection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::GlobalSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::EventSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::ExportSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::StartSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::ElemSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::CodeSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::DataSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::DataCountSection &Section); + + // Custom section types + void writeSectionContent(raw_ostream &OS, WasmYAML::DylinkSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::NameSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::LinkingSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::ProducersSection &Section); + void writeSectionContent(raw_ostream &OS, + WasmYAML::TargetFeaturesSection &Section); + WasmYAML::Object &Obj; + uint32_t NumImportedFunctions = 0; + uint32_t NumImportedGlobals = 0; + uint32_t NumImportedEvents = 0; + + bool HasError = false; + yaml::ErrorHandler ErrHandler; + void reportError(const Twine &Msg); +}; + +class SubSectionWriter { + raw_ostream &OS; + std::string OutString; + raw_string_ostream StringStream; + +public: + SubSectionWriter(raw_ostream &OS) : OS(OS), StringStream(OutString) {} + + void done() { + StringStream.flush(); + encodeULEB128(OutString.size(), OS); + OS << OutString; + OutString.clear(); + } + + raw_ostream &getStream() { return StringStream; } +}; + +} // end anonymous namespace + +static int writeUint64(raw_ostream &OS, uint64_t Value) { + char Data[sizeof(Value)]; + support::endian::write64le(Data, Value); + OS.write(Data, sizeof(Data)); + return 0; +} + +static int writeUint32(raw_ostream &OS, uint32_t Value) { + char Data[sizeof(Value)]; + support::endian::write32le(Data, Value); + OS.write(Data, sizeof(Data)); + return 0; +} + +static int writeUint8(raw_ostream &OS, uint8_t Value) { + char Data[sizeof(Value)]; + memcpy(Data, &Value, sizeof(Data)); + OS.write(Data, sizeof(Data)); + return 0; +} + +static int writeStringRef(const StringRef &Str, raw_ostream &OS) { + encodeULEB128(Str.size(), OS); + OS << Str; + return 0; +} + +static int writeLimits(const WasmYAML::Limits &Lim, raw_ostream &OS) { + writeUint8(OS, Lim.Flags); + encodeULEB128(Lim.Initial, OS); + if (Lim.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) + encodeULEB128(Lim.Maximum, OS); + return 0; +} + +void WasmWriter::reportError(const Twine &Msg) { + ErrHandler(Msg); + HasError = true; +} + +void WasmWriter::writeInitExpr(raw_ostream &OS, + const wasm::WasmInitExpr &InitExpr) { + writeUint8(OS, InitExpr.Opcode); + switch (InitExpr.Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + encodeSLEB128(InitExpr.Value.Int32, OS); + break; + case wasm::WASM_OPCODE_I64_CONST: + encodeSLEB128(InitExpr.Value.Int64, OS); + break; + case wasm::WASM_OPCODE_F32_CONST: + writeUint32(OS, InitExpr.Value.Float32); + break; + case wasm::WASM_OPCODE_F64_CONST: + writeUint64(OS, InitExpr.Value.Float64); + break; + case wasm::WASM_OPCODE_GLOBAL_GET: + encodeULEB128(InitExpr.Value.Global, OS); + break; + default: + reportError("unknown opcode in init_expr: " + Twine(InitExpr.Opcode)); + return; + } + writeUint8(OS, wasm::WASM_OPCODE_END); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::DylinkSection &Section) { + writeStringRef(Section.Name, OS); + encodeULEB128(Section.MemorySize, OS); + encodeULEB128(Section.MemoryAlignment, OS); + encodeULEB128(Section.TableSize, OS); + encodeULEB128(Section.TableAlignment, OS); + encodeULEB128(Section.Needed.size(), OS); + for (StringRef Needed : Section.Needed) + writeStringRef(Needed, OS); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::LinkingSection &Section) { + writeStringRef(Section.Name, OS); + encodeULEB128(Section.Version, OS); + + SubSectionWriter SubSection(OS); + + // SYMBOL_TABLE subsection + if (Section.SymbolTable.size()) { + writeUint8(OS, wasm::WASM_SYMBOL_TABLE); + + encodeULEB128(Section.SymbolTable.size(), SubSection.getStream()); +#ifndef NDEBUG + uint32_t SymbolIndex = 0; +#endif + for (const WasmYAML::SymbolInfo &Info : Section.SymbolTable) { + assert(Info.Index == SymbolIndex++); + writeUint8(SubSection.getStream(), Info.Kind); + encodeULEB128(Info.Flags, SubSection.getStream()); + switch (Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + case wasm::WASM_SYMBOL_TYPE_EVENT: + encodeULEB128(Info.ElementIndex, SubSection.getStream()); + if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 || + (Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) + writeStringRef(Info.Name, SubSection.getStream()); + break; + case wasm::WASM_SYMBOL_TYPE_DATA: + writeStringRef(Info.Name, SubSection.getStream()); + if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) { + encodeULEB128(Info.DataRef.Segment, SubSection.getStream()); + encodeULEB128(Info.DataRef.Offset, SubSection.getStream()); + encodeULEB128(Info.DataRef.Size, SubSection.getStream()); + } + break; + case wasm::WASM_SYMBOL_TYPE_SECTION: + encodeULEB128(Info.ElementIndex, SubSection.getStream()); + break; + default: + llvm_unreachable("unexpected kind"); + } + } + + SubSection.done(); + } + + // SEGMENT_NAMES subsection + if (Section.SegmentInfos.size()) { + writeUint8(OS, wasm::WASM_SEGMENT_INFO); + encodeULEB128(Section.SegmentInfos.size(), SubSection.getStream()); + for (const WasmYAML::SegmentInfo &SegmentInfo : Section.SegmentInfos) { + writeStringRef(SegmentInfo.Name, SubSection.getStream()); + encodeULEB128(SegmentInfo.Alignment, SubSection.getStream()); + encodeULEB128(SegmentInfo.Flags, SubSection.getStream()); + } + SubSection.done(); + } + + // INIT_FUNCS subsection + if (Section.InitFunctions.size()) { + writeUint8(OS, wasm::WASM_INIT_FUNCS); + encodeULEB128(Section.InitFunctions.size(), SubSection.getStream()); + for (const WasmYAML::InitFunction &Func : Section.InitFunctions) { + encodeULEB128(Func.Priority, SubSection.getStream()); + encodeULEB128(Func.Symbol, SubSection.getStream()); + } + SubSection.done(); + } + + // COMDAT_INFO subsection + if (Section.Comdats.size()) { + writeUint8(OS, wasm::WASM_COMDAT_INFO); + encodeULEB128(Section.Comdats.size(), SubSection.getStream()); + for (const auto &C : Section.Comdats) { + writeStringRef(C.Name, SubSection.getStream()); + encodeULEB128(0, SubSection.getStream()); // flags for future use + encodeULEB128(C.Entries.size(), SubSection.getStream()); + for (const WasmYAML::ComdatEntry &Entry : C.Entries) { + writeUint8(SubSection.getStream(), Entry.Kind); + encodeULEB128(Entry.Index, SubSection.getStream()); + } + } + SubSection.done(); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::NameSection &Section) { + writeStringRef(Section.Name, OS); + if (Section.FunctionNames.size()) { + writeUint8(OS, wasm::WASM_NAMES_FUNCTION); + + SubSectionWriter SubSection(OS); + + encodeULEB128(Section.FunctionNames.size(), SubSection.getStream()); + for (const WasmYAML::NameEntry &NameEntry : Section.FunctionNames) { + encodeULEB128(NameEntry.Index, SubSection.getStream()); + writeStringRef(NameEntry.Name, SubSection.getStream()); + } + + SubSection.done(); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::ProducersSection &Section) { + writeStringRef(Section.Name, OS); + int Fields = int(!Section.Languages.empty()) + int(!Section.Tools.empty()) + + int(!Section.SDKs.empty()); + if (Fields == 0) + return; + encodeULEB128(Fields, OS); + for (auto &Field : {std::make_pair(StringRef("language"), &Section.Languages), + std::make_pair(StringRef("processed-by"), &Section.Tools), + std::make_pair(StringRef("sdk"), &Section.SDKs)}) { + if (Field.second->empty()) + continue; + writeStringRef(Field.first, OS); + encodeULEB128(Field.second->size(), OS); + for (auto &Entry : *Field.second) { + writeStringRef(Entry.Name, OS); + writeStringRef(Entry.Version, OS); + } + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::TargetFeaturesSection &Section) { + writeStringRef(Section.Name, OS); + encodeULEB128(Section.Features.size(), OS); + for (auto &E : Section.Features) { + writeUint8(OS, E.Prefix); + writeStringRef(E.Name, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::CustomSection &Section) { + if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else { + writeStringRef(Section.Name, OS); + Section.Payload.writeAsBinary(OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::TypeSection &Section) { + encodeULEB128(Section.Signatures.size(), OS); + uint32_t ExpectedIndex = 0; + for (const WasmYAML::Signature &Sig : Section.Signatures) { + if (Sig.Index != ExpectedIndex) { + reportError("unexpected type index: " + Twine(Sig.Index)); + return; + } + ++ExpectedIndex; + writeUint8(OS, Sig.Form); + encodeULEB128(Sig.ParamTypes.size(), OS); + for (auto ParamType : Sig.ParamTypes) + writeUint8(OS, ParamType); + encodeULEB128(Sig.ReturnTypes.size(), OS); + for (auto ReturnType : Sig.ReturnTypes) + writeUint8(OS, ReturnType); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::ImportSection &Section) { + encodeULEB128(Section.Imports.size(), OS); + for (const WasmYAML::Import &Import : Section.Imports) { + writeStringRef(Import.Module, OS); + writeStringRef(Import.Field, OS); + writeUint8(OS, Import.Kind); + switch (Import.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + encodeULEB128(Import.SigIndex, OS); + NumImportedFunctions++; + break; + case wasm::WASM_EXTERNAL_GLOBAL: + writeUint8(OS, Import.GlobalImport.Type); + writeUint8(OS, Import.GlobalImport.Mutable); + NumImportedGlobals++; + break; + case wasm::WASM_EXTERNAL_EVENT: + writeUint32(OS, Import.EventImport.Attribute); + writeUint32(OS, Import.EventImport.SigIndex); + NumImportedGlobals++; + break; + case wasm::WASM_EXTERNAL_MEMORY: + writeLimits(Import.Memory, OS); + break; + case wasm::WASM_EXTERNAL_TABLE: + writeUint8(OS, Import.TableImport.ElemType); + writeLimits(Import.TableImport.TableLimits, OS); + break; + default: + reportError("unknown import type: " +Twine(Import.Kind)); + return; + } + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::FunctionSection &Section) { + encodeULEB128(Section.FunctionTypes.size(), OS); + for (uint32_t FuncType : Section.FunctionTypes) + encodeULEB128(FuncType, OS); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::ExportSection &Section) { + encodeULEB128(Section.Exports.size(), OS); + for (const WasmYAML::Export &Export : Section.Exports) { + writeStringRef(Export.Name, OS); + writeUint8(OS, Export.Kind); + encodeULEB128(Export.Index, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::StartSection &Section) { + encodeULEB128(Section.StartFunction, OS); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::TableSection &Section) { + encodeULEB128(Section.Tables.size(), OS); + for (auto &Table : Section.Tables) { + writeUint8(OS, Table.ElemType); + writeLimits(Table.TableLimits, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::MemorySection &Section) { + encodeULEB128(Section.Memories.size(), OS); + for (const WasmYAML::Limits &Mem : Section.Memories) + writeLimits(Mem, OS); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::GlobalSection &Section) { + encodeULEB128(Section.Globals.size(), OS); + uint32_t ExpectedIndex = NumImportedGlobals; + for (auto &Global : Section.Globals) { + if (Global.Index != ExpectedIndex) { + reportError("unexpected global index: " + Twine(Global.Index)); + return; + } + ++ExpectedIndex; + writeUint8(OS, Global.Type); + writeUint8(OS, Global.Mutable); + writeInitExpr(OS, Global.InitExpr); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::EventSection &Section) { + encodeULEB128(Section.Events.size(), OS); + uint32_t ExpectedIndex = NumImportedEvents; + for (auto &Event : Section.Events) { + if (Event.Index != ExpectedIndex) { + reportError("unexpected event index: " + Twine(Event.Index)); + return; + } + ++ExpectedIndex; + encodeULEB128(Event.Attribute, OS); + encodeULEB128(Event.SigIndex, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::ElemSection &Section) { + encodeULEB128(Section.Segments.size(), OS); + for (auto &Segment : Section.Segments) { + encodeULEB128(Segment.TableIndex, OS); + writeInitExpr(OS, Segment.Offset); + + encodeULEB128(Segment.Functions.size(), OS); + for (auto &Function : Segment.Functions) + encodeULEB128(Function, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::CodeSection &Section) { + encodeULEB128(Section.Functions.size(), OS); + uint32_t ExpectedIndex = NumImportedFunctions; + for (auto &Func : Section.Functions) { + std::string OutString; + raw_string_ostream StringStream(OutString); + if (Func.Index != ExpectedIndex) { + reportError("unexpected function index: " + Twine(Func.Index)); + return; + } + ++ExpectedIndex; + + encodeULEB128(Func.Locals.size(), StringStream); + for (auto &LocalDecl : Func.Locals) { + encodeULEB128(LocalDecl.Count, StringStream); + writeUint8(StringStream, LocalDecl.Type); + } + + Func.Body.writeAsBinary(StringStream); + + // Write the section size followed by the content + StringStream.flush(); + encodeULEB128(OutString.size(), OS); + OS << OutString; + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::DataSection &Section) { + encodeULEB128(Section.Segments.size(), OS); + for (auto &Segment : Section.Segments) { + encodeULEB128(Segment.InitFlags, OS); + if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX) + encodeULEB128(Segment.MemoryIndex, OS); + if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) + writeInitExpr(OS, Segment.Offset); + encodeULEB128(Segment.Content.binary_size(), OS); + Segment.Content.writeAsBinary(OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::DataCountSection &Section) { + encodeULEB128(Section.Count, OS); +} + +void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec, + uint32_t SectionIndex) { + switch (Sec.Type) { + case wasm::WASM_SEC_CODE: + writeStringRef("reloc.CODE", OS); + break; + case wasm::WASM_SEC_DATA: + writeStringRef("reloc.DATA", OS); + break; + case wasm::WASM_SEC_CUSTOM: { + auto *CustomSection = cast(&Sec); + writeStringRef(("reloc." + CustomSection->Name).str(), OS); + break; + } + default: + llvm_unreachable("not yet implemented"); + } + + encodeULEB128(SectionIndex, OS); + encodeULEB128(Sec.Relocations.size(), OS); + + for (auto Reloc : Sec.Relocations) { + writeUint8(OS, Reloc.Type); + encodeULEB128(Reloc.Offset, OS); + encodeULEB128(Reloc.Index, OS); + switch (Reloc.Type) { + case wasm::R_WASM_MEMORY_ADDR_LEB: + case wasm::R_WASM_MEMORY_ADDR_SLEB: + case wasm::R_WASM_MEMORY_ADDR_I32: + case wasm::R_WASM_FUNCTION_OFFSET_I32: + case wasm::R_WASM_SECTION_OFFSET_I32: + encodeULEB128(Reloc.Addend, OS); + } + } +} + +bool WasmWriter::writeWasm(raw_ostream &OS) { + // Write headers + OS.write(wasm::WasmMagic, sizeof(wasm::WasmMagic)); + writeUint32(OS, Obj.Header.Version); + + // Write each section + llvm::object::WasmSectionOrderChecker Checker; + for (const std::unique_ptr &Sec : Obj.Sections) { + StringRef SecName = ""; + if (auto S = dyn_cast(Sec.get())) + SecName = S->Name; + if (!Checker.isValidSectionOrder(Sec->Type, SecName)) { + reportError("out of order section type: " + Twine(Sec->Type)); + return false; + } + encodeULEB128(Sec->Type, OS); + std::string OutString; + raw_string_ostream StringStream(OutString); + if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else + reportError("unknown section type: " + Twine(Sec->Type)); + + if (HasError) + return false; + + StringStream.flush(); + + // Write the section size followed by the content + encodeULEB128(OutString.size(), OS); + OS << OutString; + } + + // write reloc sections for any section that have relocations + uint32_t SectionIndex = 0; + for (const std::unique_ptr &Sec : Obj.Sections) { + if (Sec->Relocations.empty()) { + SectionIndex++; + continue; + } + + writeUint8(OS, wasm::WASM_SEC_CUSTOM); + std::string OutString; + raw_string_ostream StringStream(OutString); + writeRelocSection(StringStream, *Sec, SectionIndex++); + StringStream.flush(); + + encodeULEB128(OutString.size(), OS); + OS << OutString; + } + + return true; +} + +namespace llvm { +namespace yaml { + +bool yaml2wasm(WasmYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH) { + WasmWriter Writer(Doc, EH); + return Writer.writeWasm(Out); +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 88491d955c49..232d5122004a 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -295,8 +295,8 @@ void ScalarEnumerationTraits::enumeration( void MappingTraits::mapping( IO &IO, WasmYAML::Signature &Signature) { IO.mapRequired("Index", Signature.Index); - IO.mapRequired("ReturnType", Signature.ReturnType); IO.mapRequired("ParamTypes", Signature.ParamTypes); + IO.mapRequired("ReturnTypes", Signature.ReturnTypes); } void MappingTraits::mapping(IO &IO, WasmYAML::Table &Table) { @@ -535,6 +535,7 @@ void ScalarBitSetTraits::bitset( BCaseMask(UNDEFINED, UNDEFINED); BCaseMask(EXPORTED, EXPORTED); BCaseMask(EXPLICIT_NAME, EXPLICIT_NAME); + BCaseMask(NO_STRIP, NO_STRIP); #undef BCaseMask } @@ -559,7 +560,6 @@ void ScalarEnumerationTraits::enumeration( ECase(V128); ECase(FUNCREF); ECase(FUNC); - ECase(NORESULT); #undef ECase } diff --git a/lib/ObjectYAML/yaml2obj.cpp b/lib/ObjectYAML/yaml2obj.cpp new file mode 100644 index 000000000000..c18fa5cfdb5e --- /dev/null +++ b/lib/ObjectYAML/yaml2obj.cpp @@ -0,0 +1,77 @@ +//===-- yaml2obj.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/YAMLTraits.h" + +namespace llvm { +namespace yaml { + +bool convertYAML(yaml::Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler, + unsigned DocNum) { + unsigned CurDocNum = 0; + do { + if (++CurDocNum != DocNum) + continue; + + yaml::YamlObjectFile Doc; + YIn >> Doc; + if (std::error_code EC = YIn.error()) { + ErrHandler("failed to parse YAML input: " + EC.message()); + return false; + } + + if (Doc.Elf) + return yaml2elf(*Doc.Elf, Out, ErrHandler); + if (Doc.Coff) + return yaml2coff(*Doc.Coff, Out, ErrHandler); + if (Doc.MachO || Doc.FatMachO) + return yaml2macho(Doc, Out, ErrHandler); + if (Doc.Minidump) + return yaml2minidump(*Doc.Minidump, Out, ErrHandler); + if (Doc.Wasm) + return yaml2wasm(*Doc.Wasm, Out, ErrHandler); + + ErrHandler("unknown document type"); + return false; + + } while (YIn.nextDocument()); + + ErrHandler("cannot find the " + Twine(DocNum) + + getOrdinalSuffix(DocNum).data() + " document"); + return false; +} + +std::unique_ptr +yaml2ObjectFile(SmallVectorImpl &Storage, StringRef Yaml, + ErrorHandler ErrHandler) { + Storage.clear(); + raw_svector_ostream OS(Storage); + + yaml::Input YIn(Yaml); + if (!convertYAML(YIn, OS, ErrHandler)) + return {}; + + Expected> ObjOrErr = + object::ObjectFile::createObjectFile( + MemoryBufferRef(OS.str(), "YamlObject")); + if (ObjOrErr) + return std::move(*ObjOrErr); + + ErrHandler(toString(ObjOrErr.takeError())); + return {}; +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index f37c142da69b..09e921502eb6 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -241,7 +241,7 @@ void DerivedArgList::AddSynthesizedArg(Arg *A) { Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const { SynthesizedArgs.push_back( - make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + std::make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), BaseArgs.MakeIndex(Opt.getName()), BaseArg)); return SynthesizedArgs.back().get(); } @@ -250,7 +250,7 @@ Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Value); SynthesizedArgs.push_back( - make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + std::make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, BaseArgs.getArgString(Index), BaseArg)); return SynthesizedArgs.back().get(); } @@ -259,7 +259,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value); SynthesizedArgs.push_back( - make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + std::make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, BaseArgs.getArgString(Index + 1), BaseArg)); return SynthesizedArgs.back().get(); } @@ -267,7 +267,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt, Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex((Opt.getName() + Value).str()); - SynthesizedArgs.push_back(make_unique( + SynthesizedArgs.push_back(std::make_unique( Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg)); return SynthesizedArgs.back().get(); diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index e2b2a2b25268..1aaccb510f8c 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/DDG.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DominanceFrontier.h" @@ -35,6 +36,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" @@ -51,6 +53,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/UnreachableBlockElim.h" #include "llvm/IR/Dominators.h" @@ -101,6 +104,7 @@ #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" @@ -138,13 +142,14 @@ #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" -#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" @@ -206,7 +211,7 @@ static cl::opt EnableSyntheticCounts( cl::desc("Run synthetic function entry count generation " "pass")); -static Regex DefaultAliasRegex( +static const Regex DefaultAliasRegex( "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); // This option is used in simplifying testing SampleFDO optimizations for @@ -466,8 +471,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) && PTO.LoopUnrolling) - LPM2.addPass( - LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll)); + LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false, + PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) C(LPM2, Level); @@ -475,10 +480,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. FPM.addPass(RequireAnalysisPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging)); + FPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM1), EnableMSSALoopDependency, DebugLogging)); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging)); + // The loop passes in LPM2 (IndVarSimplifyPass, LoopIdiomRecognizePass, + // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. + // *All* loop passes must preserve it, in order to be able to use it. + FPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging)); // Eliminate redundancies. if (Level != O1) { @@ -515,7 +525,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - DebugLogging)); + EnableMSSALoopDependency, DebugLogging)); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); @@ -540,6 +550,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile) { + assert(Level != O0 && "Not expecting O0 here!"); // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification @@ -570,34 +581,63 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); + + // Delete anything that is now dead to make sure that we don't instrument + // dead code. Instrumentation can end up keeping dead code around and + // dramatically increase code size. + MPM.addPass(GlobalDCEPass()); } - // Delete anything that is now dead to make sure that we don't instrument - // dead code. Instrumentation can end up keeping dead code around and - // dramatically increase code size. - MPM.addPass(GlobalDCEPass()); + if (!RunProfileGen) { + assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); + MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass()); + return; + } - if (RunProfileGen) { - MPM.addPass(PGOInstrumentationGen(IsCS)); + // Perform PGO instrumentation. + MPM.addPass(PGOInstrumentationGen(IsCS)); - FunctionPassManager FPM; - FPM.addPass( - createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - - // Add the profile lowering pass. - InstrProfOptions Options; - if (!ProfileFile.empty()) - Options.InstrProfileOutput = ProfileFile; - Options.DoCounterPromotion = true; - Options.UseBFIInPromotion = IsCS; - MPM.addPass(InstrProfiling(Options, IsCS)); - } else if (!ProfileFile.empty()) { + FunctionPassManager FPM; + FPM.addPass(createFunctionToLoopPassAdaptor( + LoopRotatePass(), EnableMSSALoopDependency, DebugLogging)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + // Add the profile lowering pass. + InstrProfOptions Options; + if (!ProfileFile.empty()) + Options.InstrProfileOutput = ProfileFile; + // Do counter promotion at Level greater than O0. + Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; + MPM.addPass(InstrProfiling(Options, IsCS)); +} + +void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, + bool DebugLogging, bool RunProfileGen, + bool IsCS, std::string ProfileFile, + std::string ProfileRemappingFile) { + if (!RunProfileGen) { + assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); + return; } + + // Perform PGO instrumentation. + MPM.addPass(PGOInstrumentationGen(IsCS)); + // Add the profile lowering pass. + InstrProfOptions Options; + if (!ProfileFile.empty()) + Options.InstrProfileOutput = ProfileFile; + // Do not do counter promotion at O0. + Options.DoCounterPromotion = false; + Options.UseBFIInPromotion = IsCS; + MPM.addPass(InstrProfiling(Options, IsCS)); } static InlineParams @@ -852,6 +892,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( FunctionPassManager OptimizePM(DebugLogging); OptimizePM.addPass(Float2IntPass()); + OptimizePM.addPass(LowerConstantIntrinsicsPass()); + // FIXME: We need to run some loop optimizations to re-rotate loops after // simplify-cfg and others undo their rotation. @@ -863,8 +905,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( C(OptimizePM, Level); // First rotate loops that may have been un-rotated by prior passes. - OptimizePM.addPass( - createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); + OptimizePM.addPass(createFunctionToLoopPassAdaptor( + LoopRotatePass(), EnableMSSALoopDependency, DebugLogging)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is @@ -911,19 +953,19 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll - if (EnableUnrollAndJam) { + if (EnableUnrollAndJam && PTO.LoopUnrolling) { OptimizePM.addPass( createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level))); } - if (PTO.LoopUnrolling) - OptimizePM.addPass(LoopUnrollPass( - LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll))); + OptimizePM.addPass(LoopUnrollPass( + LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll))); OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - DebugLogging)); + EnableMSSALoopDependency, DebugLogging)); // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. @@ -1422,12 +1464,23 @@ Expected parseLoopUnrollOptions(StringRef Params) { UnrollOpts.setOptLevel(OptLevel); continue; } + if (ParamName.consume_front("full-unroll-max=")) { + int Count; + if (ParamName.getAsInteger(0, Count)) + return make_error( + formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + UnrollOpts.setFullUnrollMaxCount(Count); + continue; + } bool Enable = !ParamName.consume_front("no-"); if (ParamName == "partial") { UnrollOpts.setPartial(Enable); } else if (ParamName == "peeling") { UnrollOpts.setPeeling(Enable); + } else if (ParamName == "profile-peeling") { + UnrollOpts.setProfileBasedPeeling(Enable); } else if (ParamName == "runtime") { UnrollOpts.setRuntime(Enable); } else if (ParamName == "upperbound") { @@ -1542,6 +1595,26 @@ Expected parseLoopUnswitchOptions(StringRef Params) { } return Result; } + +Expected parseMergedLoadStoreMotionOptions(StringRef Params) { + bool Result = false; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "split-footer-bb") { + Result = Enable; + } else { + return make_error( + formatv("invalid MergedLoadStoreMotion pass parameter '{0}' ", + ParamName) + .str(), + inconvertibleErrorCode()); + } + } + return Result; +} } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline @@ -1629,7 +1702,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "function") return true; - if (Name == "loop") + if (Name == "loop" || Name == "loop-mssa") return true; // Explicitly handle custom-parsed pass names. @@ -1653,7 +1726,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { template static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. - if (Name == "loop") + if (Name == "loop" || Name == "loop-mssa") return true; // Explicitly handle custom-parsed pass names. @@ -1800,9 +1873,19 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, .Case("O3", O3) .Case("Os", Os) .Case("Oz", Oz); - if (L == O0) - // At O0 we do nothing at all! + if (L == O0) { + // Add instrumentation PGO passes -- at O0 we can still do PGO. + if (PGOOpt && Matches[1] != "thinlto" && + (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse)) + addPGOInstrPassesForO0( + MPM, DebugLogging, + /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), + /* IsCS */ false, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + // Do nothing else at all! return Error::success(); + } if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); @@ -1947,14 +2030,15 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, FPM.addPass(std::move(NestedFPM)); return Error::success(); } - if (Name == "loop") { + if (Name == "loop" || Name == "loop-mssa") { LoopPassManager LPM(DebugLogging); if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass, DebugLogging)) return Err; // Add the nested pass manager with the appropriate adaptor. - FPM.addPass( - createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging)); + bool UseMemorySSA = (Name == "loop-mssa"); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA, + DebugLogging)); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index 347f75870eb3..1fa274d172b1 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -24,7 +24,6 @@ MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis()) MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis()) MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis()) MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis()) -MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) MODULE_ANALYSIS("verify", VerifierAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis()) @@ -87,7 +86,10 @@ MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) +MODULE_PASS("msan-module", MemorySanitizerPass({})) +MODULE_PASS("tsan-module", ThreadSanitizerPass()) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) +MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) #undef MODULE_PASS @@ -185,6 +187,7 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass()) +FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass()) FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("gvn", GVN()) @@ -195,7 +198,6 @@ FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) FUNCTION_PASS("mergeicmps", MergeICmpsPass()) -FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) @@ -270,6 +272,11 @@ FUNCTION_PASS_WITH_PARAMS("loop-vectorize", return LoopVectorizePass(Opts); }, parseLoopVectorizeOptions) +FUNCTION_PASS_WITH_PARAMS("mldst-motion", + [](MergedLoadStoreMotionOptions Opts) { + return MergedLoadStoreMotionPass(Opts); + }, + parseMergedLoadStoreMotionOptions) #undef FUNCTION_PASS_WITH_PARAMS #ifndef LOOP_ANALYSIS @@ -277,6 +284,7 @@ FUNCTION_PASS_WITH_PARAMS("loop-vectorize", #endif LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis()) LOOP_ANALYSIS("access-info", LoopAccessAnalysis()) +LOOP_ANALYSIS("ddg", DDGAnalysis()) LOOP_ANALYSIS("ivusers", IVUsersAnalysis()) LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) #undef LOOP_ANALYSIS @@ -299,7 +307,9 @@ LOOP_PASS("irce", IRCEPass()) LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass()) LOOP_PASS("unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) +LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) +LOOP_PASS("print", LoopCachePrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) LOOP_PASS("guard-widening", GuardWideningPass()) #undef LOOP_PASS diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp index afd6618e7cb3..8d5e56e26c0f 100644 --- a/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -194,6 +194,15 @@ void FunctionRecordIterator::skipOtherFiles() { *this = FunctionRecordIterator(); } +ArrayRef CoverageMapping::getImpreciseRecordIndicesForFilename( + StringRef Filename) const { + size_t FilenameHash = hash_value(Filename); + auto RecordIt = FilenameHash2RecordIndices.find(FilenameHash); + if (RecordIt == FilenameHash2RecordIndices.end()) + return {}; + return RecordIt->second; +} + Error CoverageMapping::loadFunctionRecord( const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader) { @@ -249,6 +258,20 @@ Error CoverageMapping::loadFunctionRecord( return Error::success(); Functions.push_back(std::move(Function)); + + // Performance optimization: keep track of the indices of the function records + // which correspond to each filename. This can be used to substantially speed + // up queries for coverage info in a file. + unsigned RecordIndex = Functions.size() - 1; + for (StringRef Filename : Record.Filenames) { + auto &RecordIndices = FilenameHash2RecordIndices[hash_value(Filename)]; + // Note that there may be duplicates in the filename set for a function + // record, because of e.g. macro expansions in the function in which both + // the macro and the function are defined in the same file. + if (RecordIndices.empty() || RecordIndices.back() != RecordIndex) + RecordIndices.push_back(RecordIndex); + } + return Error::success(); } @@ -270,6 +293,16 @@ Expected> CoverageMapping::load( return std::move(Coverage); } +// If E is a no_data_found error, returns success. Otherwise returns E. +static Error handleMaybeNoDataFoundError(Error E) { + return handleErrors( + std::move(E), [](const CoverageMapError &CME) { + if (CME.get() == coveragemap_error::no_data_found) + return static_cast(Error::success()); + return make_error(CME.get()); + }); +} + Expected> CoverageMapping::load(ArrayRef ObjectFilenames, StringRef ProfileFilename, ArrayRef Arches) { @@ -289,12 +322,21 @@ CoverageMapping::load(ArrayRef ObjectFilenames, CovMappingBufOrErr.get()->getMemBufferRef(); auto CoverageReadersOrErr = BinaryCoverageReader::create(CovMappingBufRef, Arch, Buffers); - if (Error E = CoverageReadersOrErr.takeError()) - return std::move(E); + if (Error E = CoverageReadersOrErr.takeError()) { + E = handleMaybeNoDataFoundError(std::move(E)); + if (E) + return std::move(E); + // E == success (originally a no_data_found error). + continue; + } for (auto &Reader : CoverageReadersOrErr.get()) Readers.push_back(std::move(Reader)); Buffers.push_back(std::move(CovMappingBufOrErr.get())); } + // If no readers were created, either no objects were provided or none of them + // had coverage data. Return an error in the latter case. + if (Readers.empty() && !ObjectFilenames.empty()) + return make_error(coveragemap_error::no_data_found); return load(Readers, *ProfileReader); } @@ -607,7 +649,12 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const { CoverageData FileCoverage(Filename); std::vector Regions; - for (const auto &Function : Functions) { + // Look up the function records in the given file. Due to hash collisions on + // the filename, we may get back some records that are not in the file. + ArrayRef RecordIndices = + getImpreciseRecordIndicesForFilename(Filename); + for (unsigned RecordIndex : RecordIndices) { + const FunctionRecord &Function = Functions[RecordIndex]; auto MainFileID = findMainViewFileID(Filename, Function); auto FileIDs = gatherFileIDs(Filename, Function); for (const auto &CR : Function.CountedRegions) @@ -627,7 +674,12 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const { std::vector CoverageMapping::getInstantiationGroups(StringRef Filename) const { FunctionInstantiationSetCollector InstantiationSetCollector; - for (const auto &Function : Functions) { + // Look up the function records in the given file. Due to hash collisions on + // the filename, we may get back some records that are not in the file. + ArrayRef RecordIndices = + getImpreciseRecordIndicesForFilename(Filename); + for (unsigned RecordIndex : RecordIndices) { + const FunctionRecord &Function = Functions[RecordIndex]; auto MainFileID = findMainViewFileID(Filename, Function); if (!MainFileID) continue; diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp index e193e10f91d9..679ff3525eeb 100644 --- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -506,7 +506,7 @@ public: return make_error(coveragemap_error::malformed); // Each coverage map has an alignment of 8, so we need to adjust alignment // before reading the next map. - Buf += alignmentAdjustment(Buf, 8); + Buf += offsetToAlignedAddr(Buf, Align(8)); auto CFR = reinterpret_cast(FunBuf); while ((const char *)CFR < FunEnd) { @@ -539,7 +539,7 @@ Expected> CovMapFuncRecordReader::get( switch (Version) { case CovMapVersion::Version1: - return llvm::make_unique>(P, R, F); case CovMapVersion::Version2: case CovMapVersion::Version3: @@ -547,10 +547,10 @@ Expected> CovMapFuncRecordReader::get( if (Error E = P.create(P.getNameData())) return std::move(E); if (Version == CovMapVersion::Version2) - return llvm::make_unique>(P, R, F); else - return llvm::make_unique>(P, R, F); } llvm_unreachable("Unsupported version"); @@ -648,7 +648,7 @@ loadTestingFormat(StringRef Data) { // Skip the padding bytes because coverage map data has an alignment of 8. if (CoverageMapping.empty()) return make_error(coveragemap_error::truncated); - size_t Pad = alignmentAdjustment(CoverageMapping.data(), 8); + size_t Pad = offsetToAlignedAddr(CoverageMapping.data(), Align(8)); if (CoverageMapping.size() < Pad) return make_error(coveragemap_error::malformed); CoverageMapping = CoverageMapping.substr(Pad); @@ -666,11 +666,11 @@ static Expected lookupSection(ObjectFile &OF, StringRef Name) { }; Name = stripSuffix(Name); - StringRef FoundName; for (const auto &Section : OF.sections()) { - if (auto EC = Section.getName(FoundName)) - return errorCodeToError(EC); - if (stripSuffix(FoundName) == Name) + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + if (stripSuffix(*NameOrErr) == Name) return Section; } return make_error(coveragemap_error::no_data_found); @@ -682,7 +682,7 @@ loadBinaryFormat(std::unique_ptr Bin, StringRef Arch) { if (auto *Universal = dyn_cast(Bin.get())) { // If we have a universal binary, try to look up the object for the // appropriate architecture. - auto ObjectFileOrErr = Universal->getObjectForArch(Arch); + auto ObjectFileOrErr = Universal->getMachOObjectForArch(Arch); if (!ObjectFileOrErr) return ObjectFileOrErr.takeError(); OF = std::move(ObjectFileOrErr.get()); diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index 432b20f217ca..d75854a60d1e 100644 --- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -24,6 +24,16 @@ using namespace llvm; using namespace coverage; +CoverageFilenamesSectionWriter::CoverageFilenamesSectionWriter( + ArrayRef Filenames) + : Filenames(Filenames) { +#ifndef NDEBUG + StringSet<> NameSet; + for (StringRef Name : Filenames) + assert(NameSet.insert(Name).second && "Duplicate filename"); +#endif +} + void CoverageFilenamesSectionWriter::write(raw_ostream &OS) { encodeULEB128(Filenames.size(), OS); for (const auto &Filename : Filenames) { diff --git a/lib/ProfileData/GCOV.cpp b/lib/ProfileData/GCOV.cpp index fa4e433d7aa6..00e6294c57a6 100644 --- a/lib/ProfileData/GCOV.cpp +++ b/lib/ProfileData/GCOV.cpp @@ -40,7 +40,7 @@ bool GCOVFile::readGCNO(GCOVBuffer &Buffer) { while (true) { if (!Buffer.readFunctionTag()) break; - auto GFun = make_unique(*this); + auto GFun = std::make_unique(*this); if (!GFun->readGCNO(Buffer, Version)) return false; Functions.push_back(std::move(GFun)); @@ -164,7 +164,7 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { for (uint32_t i = 0, e = BlockCount; i != e; ++i) { if (!Buff.readInt(Dummy)) return false; // Block flags; - Blocks.push_back(make_unique(*this, i)); + Blocks.push_back(std::make_unique(*this, i)); } // read edges. @@ -185,7 +185,7 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { uint32_t Dst; if (!Buff.readInt(Dst)) return false; - Edges.push_back(make_unique(*Blocks[BlockNo], *Blocks[Dst])); + Edges.push_back(std::make_unique(*Blocks[BlockNo], *Blocks[Dst])); GCOVEdge *Edge = Edges.back().get(); Blocks[BlockNo]->addDstEdge(Edge); Blocks[Dst]->addSrcEdge(Edge); @@ -702,14 +702,14 @@ std::string FileInfo::getCoveragePath(StringRef Filename, std::unique_ptr FileInfo::openCoveragePath(StringRef CoveragePath) { if (Options.NoOutput) - return llvm::make_unique(); + return std::make_unique(); std::error_code EC; auto OS = - llvm::make_unique(CoveragePath, EC, sys::fs::F_Text); + std::make_unique(CoveragePath, EC, sys::fs::OF_Text); if (EC) { errs() << EC.message() << "\n"; - return llvm::make_unique(); + return std::make_unique(); } return std::move(OS); } diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 510fd9887d9a..57d4fbc59f83 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -478,7 +478,7 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { return Error::success(); } -void InstrProfRecord::accumuateCounts(CountSumOrPercent &Sum) const { +void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const { uint64_t FuncSum = 0; Sum.NumEntries += Counts.size(); for (size_t F = 0, E = Counts.size(); F < E; ++F) @@ -552,7 +552,7 @@ void InstrProfRecord::overlap(InstrProfRecord &Other, OverlapStats &Overlap, uint64_t ValueCutoff) { // FuncLevel CountSum for other should already computed and nonzero. assert(FuncLevelOverlap.Test.CountSum >= 1.0f); - accumuateCounts(FuncLevelOverlap.Base); + accumulateCounts(FuncLevelOverlap.Base); bool Mismatch = (Counts.size() != Other.Counts.size()); // Check if the value profiles mismatch. @@ -1078,12 +1078,10 @@ bool isIRPGOFlagSet(const Module *M) { if (!IRInstrVar->hasInitializer()) return false; - const Constant *InitVal = IRInstrVar->getInitializer(); + auto *InitVal = dyn_cast_or_null(IRInstrVar->getInitializer()); if (!InitVal) return false; - - return (dyn_cast(InitVal)->getZExtValue() & - VARIANT_MASK_IR_PROF) != 0; + return (InitVal->getZExtValue() & VARIANT_MASK_IR_PROF) != 0; } // Check if we can safely rename this Comdat function. @@ -1166,9 +1164,9 @@ void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) { } } -Error OverlapStats::accumuateCounts(const std::string &BaseFilename, - const std::string &TestFilename, - bool IsCS) { +Error OverlapStats::accumulateCounts(const std::string &BaseFilename, + const std::string &TestFilename, + bool IsCS) { auto getProfileSum = [IsCS](const std::string &Filename, CountSumOrPercent &Sum) -> Error { auto ReaderOrErr = InstrProfReader::create(Filename); @@ -1176,7 +1174,7 @@ Error OverlapStats::accumuateCounts(const std::string &BaseFilename, return E; } auto Reader = std::move(ReaderOrErr.get()); - Reader->accumuateCounts(Sum, IsCS); + Reader->accumulateCounts(Sum, IsCS); return Error::success(); }; auto Ret = getProfileSum(BaseFilename, Base); diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index fec1c152991c..23d078a3ddee 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -119,7 +119,7 @@ IndexedInstrProfReader::create(std::unique_ptr Buffer, // Create the reader. if (!IndexedInstrProfReader::hasFormat(*Buffer)) return make_error(instrprof_error::bad_magic); - auto Result = llvm::make_unique( + auto Result = std::make_unique( std::move(Buffer), std::move(RemappingBuffer)); // Initialize the reader and return the result. @@ -385,7 +385,7 @@ Error RawInstrProfReader::readHeader( NamesStart = Start + NamesOffset; ValueDataStart = reinterpret_cast(Start + ValueDataOffset); - std::unique_ptr NewSymtab = make_unique(); + std::unique_ptr NewSymtab = std::make_unique(); if (Error E = createSymtab(*NewSymtab.get())) return E; @@ -413,13 +413,19 @@ Error RawInstrProfReader::readRawCounts( if (NumCounters == 0) return error(instrprof_error::malformed); - auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters); auto *NamesStartAsCounter = reinterpret_cast(NamesStart); + ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart; - // Check bounds. - if (RawCounts.data() < CountersStart || - RawCounts.data() + RawCounts.size() > NamesStartAsCounter) + // Check bounds. Note that the counter pointer embedded in the data record + // may itself be corrupt. + if (NumCounters > MaxNumCounters) return error(instrprof_error::malformed); + ptrdiff_t CounterOffset = getCounterOffset(CounterPtr); + if (CounterOffset < 0 || CounterOffset > MaxNumCounters || + (CounterOffset + NumCounters) > MaxNumCounters) + return error(instrprof_error::malformed); + + auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters); if (ShouldSwapBytes) { Record.Counts.clear(); @@ -767,7 +773,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, UseCS ? this->CS_Summary : this->Summary; // initialize InstrProfSummary using the SummaryData from disk. - Summary = llvm::make_unique( + Summary = std::make_unique( UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, DetailedSummary, SummaryData->get(Summary::TotalBlockCount), SummaryData->get(Summary::MaxBlockCount), @@ -777,13 +783,13 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, SummaryData->get(Summary::TotalNumFunctions)); return Cur + SummarySize; } else { - // For older version of profile data, we need to compute on the fly: - using namespace IndexedInstrProf; - + // The older versions do not support a profile summary. This just computes + // an empty summary, which will not result in accurate hot/cold detection. + // We would need to call addRecord for all NamedInstrProfRecords to get the + // correct summary. However, this version is old (prior to early 2016) and + // has not been supporting an accurate summary for several years. InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); - // FIXME: This only computes an empty summary. Need to call addRecord for - // all NamedInstrProfRecords to get the correct summary. - this->Summary = Builder.getSummary(); + Summary = Builder.getSummary(); return Cur; } } @@ -827,18 +833,18 @@ Error IndexedInstrProfReader::readHeader() { // The rest of the file is an on disk hash table. auto IndexPtr = - llvm::make_unique>( + std::make_unique>( Start + HashOffset, Cur, Start, HashType, FormatVersion); // Load the remapping table now if requested. if (RemappingBuffer) { - Remapper = llvm::make_unique< + Remapper = std::make_unique< InstrProfReaderItaniumRemapper>( std::move(RemappingBuffer), *IndexPtr); if (Error E = Remapper->populateRemappings()) return E; } else { - Remapper = llvm::make_unique(*IndexPtr); + Remapper = std::make_unique(*IndexPtr); } Index = std::move(IndexPtr); @@ -849,7 +855,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { if (Symtab.get()) return *Symtab.get(); - std::unique_ptr NewSymtab = make_unique(); + std::unique_ptr NewSymtab = std::make_unique(); if (Error E = Index->populateSymtab(*NewSymtab.get())) { consumeError(error(InstrProfError::take(std::move(E)))); } @@ -901,7 +907,7 @@ Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { return success(); } -void InstrProfReader::accumuateCounts(CountSumOrPercent &Sum, bool IsCS) { +void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { uint64_t NumFuncs = 0; for (const auto &Func : *this) { if (isIRLevelProfile()) { @@ -909,7 +915,7 @@ void InstrProfReader::accumuateCounts(CountSumOrPercent &Sum, bool IsCS) { if (FuncIsCS != IsCS) continue; } - Func.accumuateCounts(Sum); + Func.accumulateCounts(Sum); ++NumFuncs; } Sum.NumEntries = NumFuncs; diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 4ca2defd26da..ccb270e0b719 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -193,7 +193,7 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, const OverlapFuncFilters &FuncFilter) { auto Name = Other.Name; auto Hash = Other.Hash; - Other.accumuateCounts(FuncLevelOverlap.Test); + Other.accumulateCounts(FuncLevelOverlap.Test); if (FunctionData.find(Name) == FunctionData.end()) { Overlap.addOneUnique(FuncLevelOverlap.Test); return; diff --git a/lib/ProfileData/ProfileSummaryBuilder.cpp b/lib/ProfileData/ProfileSummaryBuilder.cpp index 4d5b00935742..3299b5f92069 100644 --- a/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -93,14 +93,14 @@ void ProfileSummaryBuilder::computeDetailedSummary() { std::unique_ptr SampleProfileSummaryBuilder::getSummary() { computeDetailedSummary(); - return llvm::make_unique( + return std::make_unique( ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0, MaxFunctionCount, NumCounts, NumFunctions); } std::unique_ptr InstrProfSummaryBuilder::getSummary() { computeDetailedSummary(); - return llvm::make_unique( + return std::make_unique( ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount, MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions); } diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp index e17865cd15a4..003e8d4d4296 100644 --- a/lib/ProfileData/SampleProf.cpp +++ b/lib/ProfileData/SampleProf.cpp @@ -16,7 +16,9 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include @@ -28,8 +30,6 @@ using namespace sampleprof; namespace llvm { namespace sampleprof { SampleProfileFormat FunctionSamples::Format; -DenseMap FunctionSamples::GUIDToFuncNameMap; -Module *FunctionSamples::CurrentModule; } // namespace sampleprof } // namespace llvm @@ -68,6 +68,12 @@ class SampleProfErrorCategoryType : public std::error_category { return "Counter overflow"; case sampleprof_error::ostream_seek_unsupported: return "Ostream does not support seek"; + case sampleprof_error::compress_failed: + return "Compress failure"; + case sampleprof_error::uncompress_failed: + return "Uncompress failure"; + case sampleprof_error::zlib_unavailable: + return "Zlib is unavailable"; } llvm_unreachable("A value of sampleprof_error has no message."); } @@ -102,8 +108,8 @@ void SampleRecord::print(raw_ostream &OS, unsigned Indent) const { OS << NumSamples; if (hasCalls()) { OS << ", calls:"; - for (const auto &I : getCallTargets()) - OS << " " << I.first() << ":" << I.second; + for (const auto &I : getSortedCallTargets()) + OS << " " << I.first << ":" << I.second; } OS << "\n"; } @@ -149,6 +155,7 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { FS.second.print(OS, Indent + 4); } } + OS.indent(Indent); OS << "}\n"; } else { OS << "No inlined callsites in this function\n"; @@ -190,3 +197,44 @@ FunctionSamples::findFunctionSamples(const DILocation *DIL) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); } #endif + +std::error_code ProfileSymbolList::read(const uint8_t *Data, + uint64_t ListSize) { + const char *ListStart = reinterpret_cast(Data); + uint64_t Size = 0; + while (Size < ListSize) { + StringRef Str(ListStart + Size); + add(Str); + Size += Str.size() + 1; + } + if (Size != ListSize) + return sampleprof_error::malformed; + return sampleprof_error::success; +} + +std::error_code ProfileSymbolList::write(raw_ostream &OS) { + // Sort the symbols before output. If doing compression. + // It will make the compression much more effective. + std::vector SortedList; + SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end()); + llvm::sort(SortedList); + + std::string OutputString; + for (auto &Sym : SortedList) { + OutputString.append(Sym.str()); + OutputString.append(1, '\0'); + } + + OS << OutputString; + return sampleprof_error::success; +} + +void ProfileSymbolList::dump(raw_ostream &OS) const { + OS << "======== Dump profile symbol list ========\n"; + std::vector SortedList; + SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end()); + llvm::sort(SortedList); + + for (auto &Sym : SortedList) + OS << Sym << "\n"; +} diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp index 192b6c711562..001aafce7bfd 100644 --- a/lib/ProfileData/SampleProfReader.cpp +++ b/lib/ProfileData/SampleProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/LineIterator.h" @@ -190,7 +191,7 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth, /// the expected format. /// /// \returns true if the file was loaded successfully, false otherwise. -std::error_code SampleProfileReaderText::read() { +std::error_code SampleProfileReaderText::readImpl() { line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); sampleprof_error Result = sampleprof_error::success; @@ -345,7 +346,7 @@ inline ErrorOr SampleProfileReaderBinary::readStringIndex(T &Table) { return *Idx; } -ErrorOr SampleProfileReaderRawBinary::readStringFromTable() { +ErrorOr SampleProfileReaderBinary::readStringFromTable() { auto Idx = readStringIndex(NameTable); if (std::error_code EC = Idx.getError()) return EC; @@ -438,7 +439,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { return sampleprof_error::success; } -std::error_code SampleProfileReaderBinary::readFuncProfile() { +std::error_code +SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { + Data = Start; auto NumHeadSamples = readNumber(); if (std::error_code EC = NumHeadSamples.getError()) return EC; @@ -458,25 +461,210 @@ std::error_code SampleProfileReaderBinary::readFuncProfile() { return sampleprof_error::success; } -std::error_code SampleProfileReaderBinary::read() { +std::error_code SampleProfileReaderBinary::readImpl() { while (!at_eof()) { - if (std::error_code EC = readFuncProfile()) + if (std::error_code EC = readFuncProfile(Data)) + return EC; + } + + return sampleprof_error::success; +} + +std::error_code +SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start, + uint64_t Size, SecType Type) { + Data = Start; + End = Start + Size; + switch (Type) { + case SecProfSummary: + if (std::error_code EC = readSummary()) + return EC; + break; + case SecNameTable: + if (std::error_code EC = readNameTable()) + return EC; + break; + case SecLBRProfile: + if (std::error_code EC = readFuncProfiles()) + return EC; + break; + case SecProfileSymbolList: + if (std::error_code EC = readProfileSymbolList()) return EC; + break; + case SecFuncOffsetTable: + if (std::error_code EC = readFuncOffsetTable()) + return EC; + break; + default: + break; } + return sampleprof_error::success; +} + +void SampleProfileReaderExtBinary::collectFuncsFrom(const Module &M) { + UseAllFuncs = false; + FuncsToUse.clear(); + for (auto &F : M) + FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); +} + +std::error_code SampleProfileReaderExtBinary::readFuncOffsetTable() { + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + + FuncOffsetTable.reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + auto Offset = readNumber(); + if (std::error_code EC = Offset.getError()) + return EC; + + FuncOffsetTable[*FName] = *Offset; + } return sampleprof_error::success; } -std::error_code SampleProfileReaderCompactBinary::read() { - for (auto Name : FuncsToUse) { - auto GUID = std::to_string(MD5Hash(Name)); - auto iter = FuncOffsetTable.find(StringRef(GUID)); - if (iter == FuncOffsetTable.end()) +std::error_code SampleProfileReaderExtBinary::readFuncProfiles() { + const uint8_t *Start = Data; + if (UseAllFuncs) { + while (Data < End) { + if (std::error_code EC = readFuncProfile(Data)) + return EC; + } + assert(Data == End && "More data is read than expected"); + return sampleprof_error::success; + } + + if (Remapper) { + for (auto Name : FuncsToUse) { + Remapper->insert(Name); + } + } + + for (auto NameOffset : FuncOffsetTable) { + auto FuncName = NameOffset.first; + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + + Data = End; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() { + if (!ProfSymList) + ProfSymList = std::make_unique(); + + if (std::error_code EC = ProfSymList->read(Data, End - Data)) + return EC; + + Data = End; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinaryBase::decompressSection( + const uint8_t *SecStart, const uint64_t SecSize, + const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { + Data = SecStart; + End = SecStart + SecSize; + auto DecompressSize = readNumber(); + if (std::error_code EC = DecompressSize.getError()) + return EC; + DecompressBufSize = *DecompressSize; + + auto CompressSize = readNumber(); + if (std::error_code EC = CompressSize.getError()) + return EC; + + if (!llvm::zlib::isAvailable()) + return sampleprof_error::zlib_unavailable; + + StringRef CompressedStrings(reinterpret_cast(Data), + *CompressSize); + char *Buffer = Allocator.Allocate(DecompressBufSize); + size_t UCSize = DecompressBufSize; + llvm::Error E = + zlib::uncompress(CompressedStrings, Buffer, UCSize); + if (E) + return sampleprof_error::uncompress_failed; + DecompressBuf = reinterpret_cast(Buffer); + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinaryBase::readImpl() { + const uint8_t *BufStart = + reinterpret_cast(Buffer->getBufferStart()); + + for (auto &Entry : SecHdrTable) { + // Skip empty section. + if (!Entry.Size) + continue; + + const uint8_t *SecStart = BufStart + Entry.Offset; + uint64_t SecSize = Entry.Size; + + // If the section is compressed, decompress it into a buffer + // DecompressBuf before reading the actual data. The pointee of + // 'Data' will be changed to buffer hold by DecompressBuf + // temporarily when reading the actual data. + bool isCompressed = hasSecFlag(Entry, SecFlagCompress); + if (isCompressed) { + const uint8_t *DecompressBuf; + uint64_t DecompressBufSize; + if (std::error_code EC = decompressSection( + SecStart, SecSize, DecompressBuf, DecompressBufSize)) + return EC; + SecStart = DecompressBuf; + SecSize = DecompressBufSize; + } + + if (std::error_code EC = readOneSection(SecStart, SecSize, Entry.Type)) + return EC; + if (Data != SecStart + SecSize) + return sampleprof_error::malformed; + + // Change the pointee of 'Data' from DecompressBuf to original Buffer. + if (isCompressed) { + Data = BufStart + Entry.Offset; + End = BufStart + Buffer->getBufferSize(); + } + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderCompactBinary::readImpl() { + std::vector OffsetsToUse; + if (UseAllFuncs) { + for (auto FuncEntry : FuncOffsetTable) { + OffsetsToUse.push_back(FuncEntry.second); + } + } + else { + for (auto Name : FuncsToUse) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + OffsetsToUse.push_back(iter->second); + } + } + + for (auto Offset : OffsetsToUse) { const uint8_t *SavedData = Data; - Data = reinterpret_cast(Buffer->getBufferStart()) + - iter->second; - if (std::error_code EC = readFuncProfile()) + if (std::error_code EC = readFuncProfile( + reinterpret_cast(Buffer->getBufferStart()) + + Offset)) return EC; Data = SavedData; } @@ -489,6 +677,12 @@ std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { return sampleprof_error::bad_magic; } +std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { + if (Magic == SPMagic(SPF_Ext_Binary)) + return sampleprof_error::success; + return sampleprof_error::bad_magic; +} + std::error_code SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) { if (Magic == SPMagic(SPF_Compact_Binary)) @@ -496,7 +690,7 @@ SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) { return sampleprof_error::bad_magic; } -std::error_code SampleProfileReaderRawBinary::readNameTable() { +std::error_code SampleProfileReaderBinary::readNameTable() { auto Size = readNumber(); if (std::error_code EC = Size.getError()) return EC; @@ -525,10 +719,98 @@ std::error_code SampleProfileReaderCompactBinary::readNameTable() { return sampleprof_error::success; } -std::error_code SampleProfileReaderBinary::readHeader() { - Data = reinterpret_cast(Buffer->getBufferStart()); - End = Data + Buffer->getBufferSize(); +std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTableEntry() { + SecHdrTableEntry Entry; + auto Type = readUnencodedNumber(); + if (std::error_code EC = Type.getError()) + return EC; + Entry.Type = static_cast(*Type); + auto Flags = readUnencodedNumber(); + if (std::error_code EC = Flags.getError()) + return EC; + Entry.Flags = *Flags; + + auto Offset = readUnencodedNumber(); + if (std::error_code EC = Offset.getError()) + return EC; + Entry.Offset = *Offset; + + auto Size = readUnencodedNumber(); + if (std::error_code EC = Size.getError()) + return EC; + Entry.Size = *Size; + + SecHdrTable.push_back(std::move(Entry)); + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { + auto EntryNum = readUnencodedNumber(); + if (std::error_code EC = EntryNum.getError()) + return EC; + + for (uint32_t i = 0; i < (*EntryNum); i++) + if (std::error_code EC = readSecHdrTableEntry()) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinaryBase::readHeader() { + const uint8_t *BufStart = + reinterpret_cast(Buffer->getBufferStart()); + Data = BufStart; + End = BufStart + Buffer->getBufferSize(); + + if (std::error_code EC = readMagicIdent()) + return EC; + + if (std::error_code EC = readSecHdrTable()) + return EC; + + return sampleprof_error::success; +} + +uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { + for (auto &Entry : SecHdrTable) { + if (Entry.Type == Type) + return Entry.Size; + } + return 0; +} + +uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { + // Sections in SecHdrTable is not necessarily in the same order as + // sections in the profile because section like FuncOffsetTable needs + // to be written after section LBRProfile but needs to be read before + // section LBRProfile, so we cannot simply use the last entry in + // SecHdrTable to calculate the file size. + uint64_t FileSize = 0; + for (auto &Entry : SecHdrTable) { + FileSize = std::max(Entry.Offset + Entry.Size, FileSize); + } + return FileSize; +} + +bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { + uint64_t TotalSecsSize = 0; + for (auto &Entry : SecHdrTable) { + OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset + << ", Size: " << Entry.Size << "\n"; + TotalSecsSize += getSectionSize(Entry.Type); + } + uint64_t HeaderSize = SecHdrTable.front().Offset; + assert(HeaderSize + TotalSecsSize == getFileSize() && + "Size of 'header + sections' doesn't match the total size of profile"); + + OS << "Header Size: " << HeaderSize << "\n"; + OS << "Total Sections Size: " << TotalSecsSize << "\n"; + OS << "File Size: " << getFileSize() << "\n"; + return true; +} + +std::error_code SampleProfileReaderBinary::readMagicIdent() { // Read and check the magic identifier. auto Magic = readNumber(); if (std::error_code EC = Magic.getError()) @@ -543,6 +825,16 @@ std::error_code SampleProfileReaderBinary::readHeader() { else if (*Version != SPVersion()) return sampleprof_error::unsupported_version; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderBinary::readHeader() { + Data = reinterpret_cast(Buffer->getBufferStart()); + End = Data + Buffer->getBufferSize(); + + if (std::error_code EC = readMagicIdent()) + return EC; + if (std::error_code EC = readSummary()) return EC; @@ -590,12 +882,11 @@ std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() { return sampleprof_error::success; } -void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) { +void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) { + UseAllFuncs = false; FuncsToUse.clear(); - for (auto &F : M) { - StringRef CanonName = FunctionSamples::getCanonicalFnName(F); - FuncsToUse.insert(CanonName); - } + for (auto &F : M) + FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); } std::error_code SampleProfileReaderBinary::readSummaryEntry( @@ -647,7 +938,7 @@ std::error_code SampleProfileReaderBinary::readSummary() { if (EC != sampleprof_error::success) return EC; } - Summary = llvm::make_unique( + Summary = std::make_unique( ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, *MaxFunctionCount, *NumBlocks, *NumFunctions); @@ -661,6 +952,13 @@ bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { return Magic == SPMagic(); } +bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { + const uint8_t *Data = + reinterpret_cast(Buffer.getBufferStart()); + uint64_t Magic = decodeULEB128(Data); + return Magic == SPMagic(SPF_Ext_Binary); +} + bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) { const uint8_t *Data = reinterpret_cast(Buffer.getBufferStart()); @@ -894,7 +1192,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile( /// /// This format is generated by the Linux Perf conversion tool at /// https://github.com/google/autofdo. -std::error_code SampleProfileReaderGCC::read() { +std::error_code SampleProfileReaderGCC::readImpl() { // Read the string table. if (std::error_code EC = readNameTable()) return EC; @@ -911,38 +1209,31 @@ bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { return Magic == "adcg*704"; } -std::error_code SampleProfileReaderItaniumRemapper::read() { - // If the underlying data is in compact format, we can't remap it because +void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { + // If the reader is in compact format, we can't remap it because // we don't know what the original function names were. - if (getFormat() == SPF_Compact_Binary) { + if (Reader.getFormat() == SPF_Compact_Binary) { Ctx.diagnose(DiagnosticInfoSampleProfile( - Buffer->getBufferIdentifier(), + Reader.getBuffer()->getBufferIdentifier(), "Profile data remapping cannot be applied to profile data " "in compact format (original mangled names are not available).", DS_Warning)); - return sampleprof_error::success; - } - - if (Error E = Remappings.read(*Buffer)) { - handleAllErrors( - std::move(E), [&](const SymbolRemappingParseError &ParseError) { - reportError(ParseError.getLineNum(), ParseError.getMessage()); - }); - return sampleprof_error::malformed; + return; } - for (auto &Sample : getProfiles()) - if (auto Key = Remappings.insert(Sample.first())) + assert(Remappings && "should be initialized while creating remapper"); + for (auto &Sample : Reader.getProfiles()) + if (auto Key = Remappings->insert(Sample.first())) SampleMap.insert({Key, &Sample.second}); - return sampleprof_error::success; + RemappingApplied = true; } FunctionSamples * SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname) { - if (auto Key = Remappings.lookup(Fname)) + if (auto Key = Remappings->lookup(Fname)) return SampleMap.lookup(Key); - return SampleProfileReader::getSamplesFor(Fname); + return nullptr; } /// Prepare a memory buffer for the contents of \p Filename. @@ -968,13 +1259,16 @@ setupMemoryBuffer(const Twine &Filename) { /// /// \param C The LLVM context to use to emit diagnostics. /// +/// \param RemapFilename The file used for profile remapping. +/// /// \returns an error code indicating the status of the created reader. ErrorOr> -SampleProfileReader::create(const Twine &Filename, LLVMContext &C) { +SampleProfileReader::create(const std::string Filename, LLVMContext &C, + const std::string RemapFilename) { auto BufferOrError = setupMemoryBuffer(Filename); if (std::error_code EC = BufferOrError.getError()) return EC; - return create(BufferOrError.get(), C); + return create(BufferOrError.get(), C, RemapFilename); } /// Create a sample profile remapper from the given input, to remap the @@ -982,20 +1276,48 @@ SampleProfileReader::create(const Twine &Filename, LLVMContext &C) { /// /// \param Filename The file to open. /// -/// \param C The LLVM context to use to emit diagnostics. +/// \param Reader The profile reader the remapper is going to be applied to. /// -/// \param Underlying The underlying profile data reader to remap. +/// \param C The LLVM context to use to emit diagnostics. /// /// \returns an error code indicating the status of the created reader. -ErrorOr> -SampleProfileReaderItaniumRemapper::create( - const Twine &Filename, LLVMContext &C, - std::unique_ptr Underlying) { +ErrorOr> +SampleProfileReaderItaniumRemapper::create(const std::string Filename, + SampleProfileReader &Reader, + LLVMContext &C) { auto BufferOrError = setupMemoryBuffer(Filename); if (std::error_code EC = BufferOrError.getError()) return EC; - return llvm::make_unique( - std::move(BufferOrError.get()), C, std::move(Underlying)); + return create(BufferOrError.get(), Reader, C); +} + +/// Create a sample profile remapper from the given input, to remap the +/// function names in the given profile data. +/// +/// \param B The memory buffer to create the reader from (assumes ownership). +/// +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \param Reader The profile reader the remapper is going to be applied to. +/// +/// \returns an error code indicating the status of the created reader. +ErrorOr> +SampleProfileReaderItaniumRemapper::create(std::unique_ptr &B, + SampleProfileReader &Reader, + LLVMContext &C) { + auto Remappings = std::make_unique(); + if (Error E = Remappings->read(*B.get())) { + handleAllErrors( + std::move(E), [&](const SymbolRemappingParseError &ParseError) { + C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), + ParseError.getLineNum(), + ParseError.getMessage())); + }); + return sampleprof_error::malformed; + } + + return std::make_unique( + std::move(B), std::move(Remappings), Reader); } /// Create a sample profile reader based on the format of the input data. @@ -1004,12 +1326,17 @@ SampleProfileReaderItaniumRemapper::create( /// /// \param C The LLVM context to use to emit diagnostics. /// +/// \param RemapFilename The file used for profile remapping. +/// /// \returns an error code indicating the status of the created reader. ErrorOr> -SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C) { +SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C, + const std::string RemapFilename) { std::unique_ptr Reader; if (SampleProfileReaderRawBinary::hasFormat(*B)) Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); + else if (SampleProfileReaderExtBinary::hasFormat(*B)) + Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); else if (SampleProfileReaderCompactBinary::hasFormat(*B)) Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C)); else if (SampleProfileReaderGCC::hasFormat(*B)) @@ -1019,9 +1346,21 @@ SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C) { else return sampleprof_error::unrecognized_format; + if (!RemapFilename.empty()) { + auto ReaderOrErr = + SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not create remapper: " + EC.message(); + C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); + return EC; + } + Reader->Remapper = std::move(ReaderOrErr.get()); + } + FunctionSamples::Format = Reader->getFormat(); - if (std::error_code EC = Reader->readHeader()) + if (std::error_code EC = Reader->readHeader()) { return EC; + } return std::move(Reader); } diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp index 8b876e0aa5d9..8d09af31f94b 100644 --- a/lib/ProfileData/SampleProfWriter.cpp +++ b/lib/ProfileData/SampleProfWriter.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorOr.h" @@ -39,11 +40,8 @@ using namespace llvm; using namespace sampleprof; -std::error_code -SampleProfileWriter::write(const StringMap &ProfileMap) { - if (std::error_code EC = writeHeader(ProfileMap)) - return EC; - +std::error_code SampleProfileWriter::writeFuncProfiles( + const StringMap &ProfileMap) { // Sort the ProfileMap by total samples. typedef std::pair NameFunctionSamples; std::vector V; @@ -58,12 +56,161 @@ SampleProfileWriter::write(const StringMap &ProfileMap) { }); for (const auto &I : V) { - if (std::error_code EC = write(*I.second)) + if (std::error_code EC = writeSample(*I.second)) return EC; } return sampleprof_error::success; } +std::error_code +SampleProfileWriter::write(const StringMap &ProfileMap) { + if (std::error_code EC = writeHeader(ProfileMap)) + return EC; + + if (std::error_code EC = writeFuncProfiles(ProfileMap)) + return EC; + + return sampleprof_error::success; +} + +SecHdrTableEntry & +SampleProfileWriterExtBinaryBase::getEntryInLayout(SecType Type) { + auto SecIt = std::find_if( + SectionHdrLayout.begin(), SectionHdrLayout.end(), + [=](const auto &Entry) -> bool { return Entry.Type == Type; }); + return *SecIt; +} + +/// Return the current position and prepare to use it as the start +/// position of a section. +uint64_t SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type) { + uint64_t SectionStart = OutputStream->tell(); + auto &Entry = getEntryInLayout(Type); + // Use LocalBuf as a temporary output for writting data. + if (hasSecFlag(Entry, SecFlagCompress)) + LocalBufStream.swap(OutputStream); + return SectionStart; +} + +std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() { + if (!llvm::zlib::isAvailable()) + return sampleprof_error::zlib_unavailable; + std::string &UncompressedStrings = + static_cast(LocalBufStream.get())->str(); + if (UncompressedStrings.size() == 0) + return sampleprof_error::success; + auto &OS = *OutputStream; + SmallString<128> CompressedStrings; + llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings, + zlib::BestSizeCompression); + if (E) + return sampleprof_error::compress_failed; + encodeULEB128(UncompressedStrings.size(), OS); + encodeULEB128(CompressedStrings.size(), OS); + OS << CompressedStrings.str(); + UncompressedStrings.clear(); + return sampleprof_error::success; +} + +/// Add a new section into section header table. +std::error_code +SampleProfileWriterExtBinaryBase::addNewSection(SecType Type, + uint64_t SectionStart) { + auto Entry = getEntryInLayout(Type); + if (hasSecFlag(Entry, SecFlagCompress)) { + LocalBufStream.swap(OutputStream); + if (std::error_code EC = compressAndOutput()) + return EC; + } + SecHdrTable.push_back({Type, Entry.Flags, SectionStart - FileStart, + OutputStream->tell() - SectionStart}); + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinaryBase::write( + const StringMap &ProfileMap) { + if (std::error_code EC = writeHeader(ProfileMap)) + return EC; + + std::string LocalBuf; + LocalBufStream = std::make_unique(LocalBuf); + if (std::error_code EC = writeSections(ProfileMap)) + return EC; + + if (std::error_code EC = writeSecHdrTable()) + return EC; + + return sampleprof_error::success; +} + +std::error_code +SampleProfileWriterExtBinary::writeSample(const FunctionSamples &S) { + uint64_t Offset = OutputStream->tell(); + StringRef Name = S.getName(); + FuncOffsetTable[Name] = Offset - SecLBRProfileStart; + encodeULEB128(S.getHeadSamples(), *OutputStream); + return writeBody(S); +} + +std::error_code SampleProfileWriterExtBinary::writeFuncOffsetTable() { + auto &OS = *OutputStream; + + // Write out the table size. + encodeULEB128(FuncOffsetTable.size(), OS); + + // Write out FuncOffsetTable. + for (auto entry : FuncOffsetTable) { + writeNameIdx(entry.first); + encodeULEB128(entry.second, OS); + } + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinary::writeSections( + const StringMap &ProfileMap) { + uint64_t SectionStart = markSectionStart(SecProfSummary); + computeSummary(ProfileMap); + if (auto EC = writeSummary()) + return EC; + if (std::error_code EC = addNewSection(SecProfSummary, SectionStart)) + return EC; + + // Generate the name table for all the functions referenced in the profile. + SectionStart = markSectionStart(SecNameTable); + for (const auto &I : ProfileMap) { + addName(I.first()); + addNames(I.second); + } + writeNameTable(); + if (std::error_code EC = addNewSection(SecNameTable, SectionStart)) + return EC; + + SectionStart = markSectionStart(SecLBRProfile); + SecLBRProfileStart = OutputStream->tell(); + if (std::error_code EC = writeFuncProfiles(ProfileMap)) + return EC; + if (std::error_code EC = addNewSection(SecLBRProfile, SectionStart)) + return EC; + + if (ProfSymList && ProfSymList->toCompress()) + setToCompressSection(SecProfileSymbolList); + + SectionStart = markSectionStart(SecProfileSymbolList); + if (ProfSymList && ProfSymList->size() > 0) + if (std::error_code EC = ProfSymList->write(*OutputStream)) + return EC; + if (std::error_code EC = addNewSection(SecProfileSymbolList, SectionStart)) + return EC; + + SectionStart = markSectionStart(SecFuncOffsetTable); + if (std::error_code EC = writeFuncOffsetTable()) + return EC; + if (std::error_code EC = addNewSection(SecFuncOffsetTable, SectionStart)) + return EC; + + return sampleprof_error::success; +} + std::error_code SampleProfileWriterCompactBinary::write( const StringMap &ProfileMap) { if (std::error_code EC = SampleProfileWriter::write(ProfileMap)) @@ -81,7 +228,7 @@ std::error_code SampleProfileWriterCompactBinary::write( /// /// The format used here is more structured and deliberate because /// it needs to be parsed by the SampleProfileReaderText class. -std::error_code SampleProfileWriterText::write(const FunctionSamples &S) { +std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { auto &OS = *OutputStream; OS << S.getName() << ":" << S.getTotalSamples(); if (Indent == 0) @@ -100,8 +247,8 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) { OS << Sample.getSamples(); - for (const auto &J : Sample.getCallTargets()) - OS << " " << J.first() << ":" << J.second; + for (const auto &J : Sample.getSortedCallTargets()) + OS << " " << J.first << ":" << J.second; OS << "\n"; } @@ -117,7 +264,7 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) { OS << Loc.LineOffset << ": "; else OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; - if (std::error_code EC = write(CalleeSamples)) + if (std::error_code EC = writeSample(CalleeSamples)) return EC; } Indent -= 1; @@ -163,7 +310,7 @@ void SampleProfileWriterBinary::stablizeNameTable(std::set &V) { NameTable[N] = i++; } -std::error_code SampleProfileWriterRawBinary::writeNameTable() { +std::error_code SampleProfileWriterBinary::writeNameTable() { auto &OS = *OutputStream; std::set V; stablizeNameTable(V); @@ -214,25 +361,18 @@ std::error_code SampleProfileWriterCompactBinary::writeNameTable() { return sampleprof_error::success; } -std::error_code SampleProfileWriterRawBinary::writeMagicIdent() { - auto &OS = *OutputStream; - // Write file magic identifier. - encodeULEB128(SPMagic(), OS); - encodeULEB128(SPVersion(), OS); - return sampleprof_error::success; -} - -std::error_code SampleProfileWriterCompactBinary::writeMagicIdent() { +std::error_code +SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) { auto &OS = *OutputStream; // Write file magic identifier. - encodeULEB128(SPMagic(SPF_Compact_Binary), OS); + encodeULEB128(SPMagic(Format), OS); encodeULEB128(SPVersion(), OS); return sampleprof_error::success; } std::error_code SampleProfileWriterBinary::writeHeader( const StringMap &ProfileMap) { - writeMagicIdent(); + writeMagicIdent(Format); computeSummary(ProfileMap); if (auto EC = writeSummary()) @@ -248,6 +388,82 @@ std::error_code SampleProfileWriterBinary::writeHeader( return sampleprof_error::success; } +void SampleProfileWriterExtBinaryBase::setToCompressAllSections() { + for (auto &Entry : SectionHdrLayout) + addSecFlags(Entry, SecFlagCompress); +} + +void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) { + addSectionFlags(Type, SecFlagCompress); +} + +void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type, + SecFlags Flags) { + for (auto &Entry : SectionHdrLayout) { + if (Entry.Type == Type) + addSecFlags(Entry, Flags); + } +} + +void SampleProfileWriterExtBinaryBase::allocSecHdrTable() { + support::endian::Writer Writer(*OutputStream, support::little); + + Writer.write(static_cast(SectionHdrLayout.size())); + SecHdrTableOffset = OutputStream->tell(); + for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) { + Writer.write(static_cast(-1)); + Writer.write(static_cast(-1)); + Writer.write(static_cast(-1)); + Writer.write(static_cast(-1)); + } +} + +std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { + auto &OFS = static_cast(*OutputStream); + uint64_t Saved = OutputStream->tell(); + + // Set OutputStream to the location saved in SecHdrTableOffset. + if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1) + return sampleprof_error::ostream_seek_unsupported; + support::endian::Writer Writer(*OutputStream, support::little); + + DenseMap IndexMap; + for (uint32_t i = 0; i < SecHdrTable.size(); i++) { + IndexMap.insert({static_cast(SecHdrTable[i].Type), i}); + } + + // Write the section header table in the order specified in + // SectionHdrLayout. That is the sections order Reader will see. + // Note that the sections order in which Reader expects to read + // may be different from the order in which Writer is able to + // write, so we need to adjust the order in SecHdrTable to be + // consistent with SectionHdrLayout when we write SecHdrTable + // to the memory. + for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) { + uint32_t idx = IndexMap[static_cast(SectionHdrLayout[i].Type)]; + Writer.write(static_cast(SecHdrTable[idx].Type)); + Writer.write(static_cast(SecHdrTable[idx].Flags)); + Writer.write(static_cast(SecHdrTable[idx].Offset)); + Writer.write(static_cast(SecHdrTable[idx].Size)); + } + + // Reset OutputStream. + if (OFS.seek(Saved) == (uint64_t)-1) + return sampleprof_error::ostream_seek_unsupported; + + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinaryBase::writeHeader( + const StringMap &ProfileMap) { + auto &OS = *OutputStream; + FileStart = OS.tell(); + writeMagicIdent(Format); + + allocSecHdrTable(); + return sampleprof_error::success; +} + std::error_code SampleProfileWriterCompactBinary::writeHeader( const StringMap &ProfileMap) { support::endian::Writer Writer(*OutputStream, support::little); @@ -294,8 +510,8 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { encodeULEB128(Loc.Discriminator, OS); encodeULEB128(Sample.getSamples(), OS); encodeULEB128(Sample.getCallTargets().size(), OS); - for (const auto &J : Sample.getCallTargets()) { - StringRef Callee = J.first(); + for (const auto &J : Sample.getSortedCallTargets()) { + StringRef Callee = J.first; uint64_t CalleeSamples = J.second; if (std::error_code EC = writeNameIdx(Callee)) return EC; @@ -324,13 +540,14 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { /// Write samples of a top-level function to a binary file. /// /// \returns true if the samples were written successfully, false otherwise. -std::error_code SampleProfileWriterBinary::write(const FunctionSamples &S) { +std::error_code +SampleProfileWriterBinary::writeSample(const FunctionSamples &S) { encodeULEB128(S.getHeadSamples(), *OutputStream); return writeBody(S); } std::error_code -SampleProfileWriterCompactBinary::write(const FunctionSamples &S) { +SampleProfileWriterCompactBinary::writeSample(const FunctionSamples &S) { uint64_t Offset = OutputStream->tell(); StringRef Name = S.getName(); FuncOffsetTable[Name] = Offset; @@ -349,10 +566,11 @@ ErrorOr> SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) { std::error_code EC; std::unique_ptr OS; - if (Format == SPF_Binary || Format == SPF_Compact_Binary) - OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None)); + if (Format == SPF_Binary || Format == SPF_Ext_Binary || + Format == SPF_Compact_Binary) + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_None)); else - OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text)); + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_Text)); if (EC) return EC; @@ -374,6 +592,8 @@ SampleProfileWriter::create(std::unique_ptr &OS, if (Format == SPF_Binary) Writer.reset(new SampleProfileWriterRawBinary(OS)); + else if (Format == SPF_Ext_Binary) + Writer.reset(new SampleProfileWriterExtBinary(OS)); else if (Format == SPF_Compact_Binary) Writer.reset(new SampleProfileWriterCompactBinary(OS)); else if (Format == SPF_Text) @@ -386,6 +606,7 @@ SampleProfileWriter::create(std::unique_ptr &OS, if (EC) return EC; + Writer->Format = Format; return std::move(Writer); } diff --git a/lib/Remarks/BitstreamRemarkParser.cpp b/lib/Remarks/BitstreamRemarkParser.cpp new file mode 100644 index 000000000000..99a82e1ee3af --- /dev/null +++ b/lib/Remarks/BitstreamRemarkParser.cpp @@ -0,0 +1,597 @@ +//===- BitstreamRemarkParser.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides utility methods used by clients that want to use the +// parser for remark diagnostics in LLVM. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/BitstreamRemarkParser.h" +#include "BitstreamRemarkParser.h" +#include "llvm/Remarks/BitstreamRemarkContainer.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace llvm::remarks; + +static Error unknownRecord(const char *BlockName, unsigned RecordID) { + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: unknown record entry (%lu).", BlockName, + RecordID); +} + +static Error malformedRecord(const char *BlockName, const char *RecordName) { + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: malformed record entry (%s).", BlockName, + RecordName); +} + +BitstreamMetaParserHelper::BitstreamMetaParserHelper( + BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo) + : Stream(Stream), BlockInfo(BlockInfo) {} + +/// Parse a record and fill in the fields in the parser. +static Error parseRecord(BitstreamMetaParserHelper &Parser, unsigned Code) { + BitstreamCursor &Stream = Parser.Stream; + // Note: 2 is used here because it's the max number of fields we have per + // record. + SmallVector Record; + StringRef Blob; + Expected RecordID = Stream.readRecord(Code, Record, &Blob); + if (!RecordID) + return RecordID.takeError(); + + switch (*RecordID) { + case RECORD_META_CONTAINER_INFO: { + if (Record.size() != 2) + return malformedRecord("BLOCK_META", "RECORD_META_CONTAINER_INFO"); + Parser.ContainerVersion = Record[0]; + Parser.ContainerType = Record[1]; + break; + } + case RECORD_META_REMARK_VERSION: { + if (Record.size() != 1) + return malformedRecord("BLOCK_META", "RECORD_META_REMARK_VERSION"); + Parser.RemarkVersion = Record[0]; + break; + } + case RECORD_META_STRTAB: { + if (Record.size() != 0) + return malformedRecord("BLOCK_META", "RECORD_META_STRTAB"); + Parser.StrTabBuf = Blob; + break; + } + case RECORD_META_EXTERNAL_FILE: { + if (Record.size() != 0) + return malformedRecord("BLOCK_META", "RECORD_META_EXTERNAL_FILE"); + Parser.ExternalFilePath = Blob; + break; + } + default: + return unknownRecord("BLOCK_META", *RecordID); + } + return Error::success(); +} + +BitstreamRemarkParserHelper::BitstreamRemarkParserHelper( + BitstreamCursor &Stream) + : Stream(Stream) {} + +/// Parse a record and fill in the fields in the parser. +static Error parseRecord(BitstreamRemarkParserHelper &Parser, unsigned Code) { + BitstreamCursor &Stream = Parser.Stream; + // Note: 5 is used here because it's the max number of fields we have per + // record. + SmallVector Record; + StringRef Blob; + Expected RecordID = Stream.readRecord(Code, Record, &Blob); + if (!RecordID) + return RecordID.takeError(); + + switch (*RecordID) { + case RECORD_REMARK_HEADER: { + if (Record.size() != 4) + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HEADER"); + Parser.Type = Record[0]; + Parser.RemarkNameIdx = Record[1]; + Parser.PassNameIdx = Record[2]; + Parser.FunctionNameIdx = Record[3]; + break; + } + case RECORD_REMARK_DEBUG_LOC: { + if (Record.size() != 3) + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_DEBUG_LOC"); + Parser.SourceFileNameIdx = Record[0]; + Parser.SourceLine = Record[1]; + Parser.SourceColumn = Record[2]; + break; + } + case RECORD_REMARK_HOTNESS: { + if (Record.size() != 1) + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HOTNESS"); + Parser.Hotness = Record[0]; + break; + } + case RECORD_REMARK_ARG_WITH_DEBUGLOC: { + if (Record.size() != 5) + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_ARG_WITH_DEBUGLOC"); + // Create a temporary argument. Use that as a valid memory location for this + // argument entry. + Parser.TmpArgs.emplace_back(); + Parser.TmpArgs.back().KeyIdx = Record[0]; + Parser.TmpArgs.back().ValueIdx = Record[1]; + Parser.TmpArgs.back().SourceFileNameIdx = Record[2]; + Parser.TmpArgs.back().SourceLine = Record[3]; + Parser.TmpArgs.back().SourceColumn = Record[4]; + Parser.Args = + ArrayRef(Parser.TmpArgs); + break; + } + case RECORD_REMARK_ARG_WITHOUT_DEBUGLOC: { + if (Record.size() != 2) + return malformedRecord("BLOCK_REMARK", + "RECORD_REMARK_ARG_WITHOUT_DEBUGLOC"); + // Create a temporary argument. Use that as a valid memory location for this + // argument entry. + Parser.TmpArgs.emplace_back(); + Parser.TmpArgs.back().KeyIdx = Record[0]; + Parser.TmpArgs.back().ValueIdx = Record[1]; + Parser.Args = + ArrayRef(Parser.TmpArgs); + break; + } + default: + return unknownRecord("BLOCK_REMARK", *RecordID); + } + return Error::success(); +} + +template +static Error parseBlock(T &ParserHelper, unsigned BlockID, + const char *BlockName) { + BitstreamCursor &Stream = ParserHelper.Stream; + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + if (Next->Kind != BitstreamEntry::SubBlock || Next->ID != BlockID) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: expecting [ENTER_SUBBLOCK, %s, ...].", + BlockName, BlockName); + if (Stream.EnterSubBlock(BlockID)) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while entering %s.", BlockName); + + // Stop when there is nothing to read anymore or when we encounter an + // END_BLOCK. + while (!Stream.AtEndOfStream()) { + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + switch (Next->Kind) { + case BitstreamEntry::EndBlock: + return Error::success(); + case BitstreamEntry::Error: + case BitstreamEntry::SubBlock: + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: expecting records.", BlockName); + case BitstreamEntry::Record: + if (Error E = parseRecord(ParserHelper, Next->ID)) + return E; + continue; + } + } + // If we're here, it means we didn't get an END_BLOCK yet, but we're at the + // end of the stream. In this case, error. + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: unterminated block.", BlockName); +} + +Error BitstreamMetaParserHelper::parse() { + return parseBlock(*this, META_BLOCK_ID, "META_BLOCK"); +} + +Error BitstreamRemarkParserHelper::parse() { + return parseBlock(*this, REMARK_BLOCK_ID, "REMARK_BLOCK"); +} + +BitstreamParserHelper::BitstreamParserHelper(StringRef Buffer) + : Stream(Buffer) {} + +Expected> BitstreamParserHelper::parseMagic() { + std::array Result; + for (unsigned i = 0; i < 4; ++i) + if (Expected R = Stream.Read(8)) + Result[i] = *R; + else + return R.takeError(); + return Result; +} + +Error BitstreamParserHelper::parseBlockInfoBlock() { + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + if (Next->Kind != BitstreamEntry::SubBlock || + Next->ID != llvm::bitc::BLOCKINFO_BLOCK_ID) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCKINFO_BLOCK: expecting [ENTER_SUBBLOCK, " + "BLOCKINFO_BLOCK, ...]."); + + Expected> MaybeBlockInfo = + Stream.ReadBlockInfoBlock(); + if (!MaybeBlockInfo) + return MaybeBlockInfo.takeError(); + + if (!*MaybeBlockInfo) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCKINFO_BLOCK."); + + BlockInfo = **MaybeBlockInfo; + + Stream.setBlockInfo(&BlockInfo); + return Error::success(); +} + +static Expected isBlock(BitstreamCursor &Stream, unsigned BlockID) { + bool Result = false; + uint64_t PreviousBitNo = Stream.GetCurrentBitNo(); + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + switch (Next->Kind) { + case BitstreamEntry::SubBlock: + // Check for the block id. + Result = Next->ID == BlockID; + break; + case BitstreamEntry::Error: + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Unexpected error while parsing bitstream."); + default: + Result = false; + break; + } + if (Error E = Stream.JumpToBit(PreviousBitNo)) + return std::move(E); + return Result; +} + +Expected BitstreamParserHelper::isMetaBlock() { + return isBlock(Stream, META_BLOCK_ID); +} + +Expected BitstreamParserHelper::isRemarkBlock() { + return isBlock(Stream, META_BLOCK_ID); +} + +static Error validateMagicNumber(StringRef Magic) { + if (Magic != remarks::ContainerMagic) + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown magic number: expecting %s, got %.4s.", + remarks::ContainerMagic.data(), Magic.data()); + return Error::success(); +} + +static Error advanceToMetaBlock(BitstreamParserHelper &Helper) { + Expected> Magic = Helper.parseMagic(); + if (!Magic) + return Magic.takeError(); + if (Error E = validateMagicNumber(StringRef(Magic->data(), Magic->size()))) + return E; + if (Error E = Helper.parseBlockInfoBlock()) + return E; + Expected isMetaBlock = Helper.isMetaBlock(); + if (!isMetaBlock) + return isMetaBlock.takeError(); + if (!*isMetaBlock) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Expecting META_BLOCK after the BLOCKINFO_BLOCK."); + return Error::success(); +} + +Expected> +remarks::createBitstreamParserFromMeta( + StringRef Buf, Optional StrTab, + Optional ExternalFilePrependPath) { + BitstreamParserHelper Helper(Buf); + Expected> Magic = Helper.parseMagic(); + if (!Magic) + return Magic.takeError(); + + if (Error E = validateMagicNumber(StringRef(Magic->data(), Magic->size()))) + return std::move(E); + + auto Parser = + StrTab ? std::make_unique(Buf, std::move(*StrTab)) + : std::make_unique(Buf); + + if (ExternalFilePrependPath) + Parser->ExternalFilePrependPath = *ExternalFilePrependPath; + + return std::move(Parser); +} + +Expected> BitstreamRemarkParser::next() { + if (ParserHelper.atEndOfStream()) + return make_error(); + + if (!ReadyToParseRemarks) { + if (Error E = parseMeta()) + return std::move(E); + ReadyToParseRemarks = true; + } + + return parseRemark(); +} + +Error BitstreamRemarkParser::parseMeta() { + // Advance and to the meta block. + if (Error E = advanceToMetaBlock(ParserHelper)) + return E; + + BitstreamMetaParserHelper MetaHelper(ParserHelper.Stream, + ParserHelper.BlockInfo); + if (Error E = MetaHelper.parse()) + return E; + + if (Error E = processCommonMeta(MetaHelper)) + return E; + + switch (ContainerType) { + case BitstreamRemarkContainerType::Standalone: + return processStandaloneMeta(MetaHelper); + case BitstreamRemarkContainerType::SeparateRemarksFile: + return processSeparateRemarksFileMeta(MetaHelper); + case BitstreamRemarkContainerType::SeparateRemarksMeta: + return processSeparateRemarksMetaMeta(MetaHelper); + } + llvm_unreachable("Unknown BitstreamRemarkContainerType enum"); +} + +Error BitstreamRemarkParser::processCommonMeta( + BitstreamMetaParserHelper &MetaHelper) { + if (Optional Version = MetaHelper.ContainerVersion) + ContainerVersion = *Version; + else + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing container version."); + + if (Optional Type = MetaHelper.ContainerType) { + // Always >= BitstreamRemarkContainerType::First since it's unsigned. + if (*Type > static_cast(BitstreamRemarkContainerType::Last)) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: invalid container type."); + + ContainerType = static_cast(*Type); + } else + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing container type."); + + return Error::success(); +} + +static Error processStrTab(BitstreamRemarkParser &P, + Optional StrTabBuf) { + if (!StrTabBuf) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing string table."); + // Parse and assign the string table. + P.StrTab.emplace(*StrTabBuf); + return Error::success(); +} + +static Error processRemarkVersion(BitstreamRemarkParser &P, + Optional RemarkVersion) { + if (!RemarkVersion) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing remark version."); + P.RemarkVersion = *RemarkVersion; + return Error::success(); +} + +Error BitstreamRemarkParser::processExternalFilePath( + Optional ExternalFilePath) { + if (!ExternalFilePath) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing external file path."); + + SmallString<80> FullPath(ExternalFilePrependPath); + sys::path::append(FullPath, *ExternalFilePath); + + // External file: open the external file, parse it, check if its metadata + // matches the one from the separate metadata, then replace the current parser + // with the one parsing the remarks. + ErrorOr> BufferOrErr = + MemoryBuffer::getFile(FullPath); + if (std::error_code EC = BufferOrErr.getError()) + return createFileError(FullPath, EC); + TmpRemarkBuffer = std::move(*BufferOrErr); + + // Create a separate parser used for parsing the separate file. + ParserHelper = BitstreamParserHelper(TmpRemarkBuffer->getBuffer()); + // Advance and check until we can parse the meta block. + if (Error E = advanceToMetaBlock(ParserHelper)) + return E; + // Parse the meta from the separate file. + // Note: here we overwrite the BlockInfo with the one from the file. This will + // be used to parse the rest of the file. + BitstreamMetaParserHelper SeparateMetaHelper(ParserHelper.Stream, + ParserHelper.BlockInfo); + if (Error E = SeparateMetaHelper.parse()) + return E; + + uint64_t PreviousContainerVersion = ContainerVersion; + if (Error E = processCommonMeta(SeparateMetaHelper)) + return E; + + if (ContainerType != BitstreamRemarkContainerType::SeparateRemarksFile) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing external file's BLOCK_META: wrong container " + "type."); + + if (PreviousContainerVersion != ContainerVersion) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing external file's BLOCK_META: mismatching versions: " + "original meta: %lu, external file meta: %lu.", + PreviousContainerVersion, ContainerVersion); + + // Process the meta from the separate file. + return processSeparateRemarksFileMeta(SeparateMetaHelper); +} + +Error BitstreamRemarkParser::processStandaloneMeta( + BitstreamMetaParserHelper &Helper) { + if (Error E = processStrTab(*this, Helper.StrTabBuf)) + return E; + return processRemarkVersion(*this, Helper.RemarkVersion); +} + +Error BitstreamRemarkParser::processSeparateRemarksFileMeta( + BitstreamMetaParserHelper &Helper) { + return processRemarkVersion(*this, Helper.RemarkVersion); +} + +Error BitstreamRemarkParser::processSeparateRemarksMetaMeta( + BitstreamMetaParserHelper &Helper) { + if (Error E = processStrTab(*this, Helper.StrTabBuf)) + return E; + return processExternalFilePath(Helper.ExternalFilePath); +} + +Expected> BitstreamRemarkParser::parseRemark() { + BitstreamRemarkParserHelper RemarkHelper(ParserHelper.Stream); + if (Error E = RemarkHelper.parse()) + return std::move(E); + + return processRemark(RemarkHelper); +} + +Expected> +BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) { + std::unique_ptr Result = std::make_unique(); + Remark &R = *Result; + + if (StrTab == None) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Error while parsing BLOCK_REMARK: missing string table."); + + if (!Helper.Type) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark type."); + + // Always >= Type::First since it's unsigned. + if (*Helper.Type > static_cast(Type::Last)) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: unknown remark type."); + + R.RemarkType = static_cast(*Helper.Type); + + if (!Helper.RemarkNameIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark name."); + + if (Expected RemarkName = (*StrTab)[*Helper.RemarkNameIdx]) + R.RemarkName = *RemarkName; + else + return RemarkName.takeError(); + + if (!Helper.PassNameIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark pass."); + + if (Expected PassName = (*StrTab)[*Helper.PassNameIdx]) + R.PassName = *PassName; + else + return PassName.takeError(); + + if (!Helper.FunctionNameIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark function name."); + if (Expected FunctionName = (*StrTab)[*Helper.FunctionNameIdx]) + R.FunctionName = *FunctionName; + else + return FunctionName.takeError(); + + if (Helper.SourceFileNameIdx && Helper.SourceLine && Helper.SourceColumn) { + Expected SourceFileName = (*StrTab)[*Helper.SourceFileNameIdx]; + if (!SourceFileName) + return SourceFileName.takeError(); + R.Loc.emplace(); + R.Loc->SourceFilePath = *SourceFileName; + R.Loc->SourceLine = *Helper.SourceLine; + R.Loc->SourceColumn = *Helper.SourceColumn; + } + + if (Helper.Hotness) + R.Hotness = *Helper.Hotness; + + if (!Helper.Args) + return std::move(Result); + + for (const BitstreamRemarkParserHelper::Argument &Arg : *Helper.Args) { + if (!Arg.KeyIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing key in remark argument."); + if (!Arg.ValueIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing value in remark " + "argument."); + + // We have at least a key and a value, create an entry. + R.Args.emplace_back(); + + if (Expected Key = (*StrTab)[*Arg.KeyIdx]) + R.Args.back().Key = *Key; + else + return Key.takeError(); + + if (Expected Value = (*StrTab)[*Arg.ValueIdx]) + R.Args.back().Val = *Value; + else + return Value.takeError(); + + if (Arg.SourceFileNameIdx && Arg.SourceLine && Arg.SourceColumn) { + if (Expected SourceFileName = + (*StrTab)[*Arg.SourceFileNameIdx]) { + R.Args.back().Loc.emplace(); + R.Args.back().Loc->SourceFilePath = *SourceFileName; + R.Args.back().Loc->SourceLine = *Arg.SourceLine; + R.Args.back().Loc->SourceColumn = *Arg.SourceColumn; + } else + return SourceFileName.takeError(); + } + } + + return std::move(Result); +} diff --git a/lib/Remarks/BitstreamRemarkParser.h b/lib/Remarks/BitstreamRemarkParser.h new file mode 100644 index 000000000000..7c9cc2f1e7db --- /dev/null +++ b/lib/Remarks/BitstreamRemarkParser.h @@ -0,0 +1,83 @@ +//===-- BitstreamRemarkParser.h - Parser for Bitstream remarks --*- C++/-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the impementation of the Bitstream remark parser. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H +#define LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Remarks/BitstreamRemarkParser.h" +#include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkParser.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace llvm { +namespace remarks { +/// Parses and holds the state of the latest parsed remark. +struct BitstreamRemarkParser : public RemarkParser { + /// The buffer to parse. + BitstreamParserHelper ParserHelper; + /// The string table used for parsing strings. + Optional StrTab; + /// Temporary remark buffer used when the remarks are stored separately. + std::unique_ptr TmpRemarkBuffer; + /// The common metadata used to decide how to parse the buffer. + /// This is filled when parsing the metadata block. + uint64_t ContainerVersion; + uint64_t RemarkVersion; + BitstreamRemarkContainerType ContainerType; + /// Wether the parser is ready to parse remarks. + bool ReadyToParseRemarks = false; + + /// Create a parser that expects to find a string table embedded in the + /// stream. + BitstreamRemarkParser(StringRef Buf) + : RemarkParser(Format::Bitstream), ParserHelper(Buf) {} + + /// Create a parser that uses a pre-parsed string table. + BitstreamRemarkParser(StringRef Buf, ParsedStringTable StrTab) + : RemarkParser(Format::Bitstream), ParserHelper(Buf), + StrTab(std::move(StrTab)) {} + + Expected> next() override; + + static bool classof(const RemarkParser *P) { + return P->ParserFormat == Format::Bitstream; + } + + /// Parse and process the metadata of the buffer. + Error parseMeta(); + + /// Parse a Bitstream remark. + Expected> parseRemark(); + +private: + /// Helper functions. + Error processCommonMeta(BitstreamMetaParserHelper &Helper); + Error processStandaloneMeta(BitstreamMetaParserHelper &Helper); + Error processSeparateRemarksFileMeta(BitstreamMetaParserHelper &Helper); + Error processSeparateRemarksMetaMeta(BitstreamMetaParserHelper &Helper); + Expected> + processRemark(BitstreamRemarkParserHelper &Helper); + Error processExternalFilePath(Optional ExternalFilePath); +}; + +Expected> createBitstreamParserFromMeta( + StringRef Buf, Optional StrTab = None, + Optional ExternalFilePrependPath = None); + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H */ diff --git a/lib/Remarks/BitstreamRemarkSerializer.cpp b/lib/Remarks/BitstreamRemarkSerializer.cpp new file mode 100644 index 000000000000..d02782c7954d --- /dev/null +++ b/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -0,0 +1,386 @@ +//===- BitstreamRemarkSerializer.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the LLVM bitstream remark serializer +// using LLVM's bitstream writer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/BitstreamRemarkSerializer.h" + +using namespace llvm; +using namespace llvm::remarks; + +BitstreamRemarkSerializerHelper::BitstreamRemarkSerializerHelper( + BitstreamRemarkContainerType ContainerType) + : Encoded(), R(), Bitstream(Encoded), ContainerType(ContainerType) {} + +static void push(SmallVectorImpl &R, StringRef Str) { + for (const char C : Str) + R.push_back(C); +} + +static void setRecordName(unsigned RecordID, BitstreamWriter &Bitstream, + SmallVectorImpl &R, StringRef Str) { + R.clear(); + R.push_back(RecordID); + push(R, Str); + Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_SETRECORDNAME, R); +} + +static void initBlock(unsigned BlockID, BitstreamWriter &Bitstream, + SmallVectorImpl &R, StringRef Str) { + R.clear(); + R.push_back(BlockID); + Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_SETBID, R); + + R.clear(); + push(R, Str); + Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_BLOCKNAME, R); +} + +void BitstreamRemarkSerializerHelper::setupMetaBlockInfo() { + // Setup the metadata block. + initBlock(META_BLOCK_ID, Bitstream, R, MetaBlockName); + + // The container information. + setRecordName(RECORD_META_CONTAINER_INFO, Bitstream, R, + MetaContainerInfoName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_META_CONTAINER_INFO)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Version. + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // Type. + RecordMetaContainerInfoAbbrevID = + Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev); +} + +void BitstreamRemarkSerializerHelper::setupMetaRemarkVersion() { + setRecordName(RECORD_META_REMARK_VERSION, Bitstream, R, + MetaRemarkVersionName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_META_REMARK_VERSION)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Version. + RecordMetaRemarkVersionAbbrevID = + Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev); +} + +void BitstreamRemarkSerializerHelper::emitMetaRemarkVersion( + uint64_t RemarkVersion) { + // The remark version is emitted only if we emit remarks. + R.clear(); + R.push_back(RECORD_META_REMARK_VERSION); + R.push_back(RemarkVersion); + Bitstream.EmitRecordWithAbbrev(RecordMetaRemarkVersionAbbrevID, R); +} + +void BitstreamRemarkSerializerHelper::setupMetaStrTab() { + setRecordName(RECORD_META_STRTAB, Bitstream, R, MetaStrTabName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_META_STRTAB)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Raw table. + RecordMetaStrTabAbbrevID = + Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev); +} + +void BitstreamRemarkSerializerHelper::emitMetaStrTab( + const StringTable &StrTab) { + // The string table is not emitted if we emit remarks separately. + R.clear(); + R.push_back(RECORD_META_STRTAB); + + // Serialize to a blob. + std::string Buf; + raw_string_ostream OS(Buf); + StrTab.serialize(OS); + StringRef Blob = OS.str(); + Bitstream.EmitRecordWithBlob(RecordMetaStrTabAbbrevID, R, Blob); +} + +void BitstreamRemarkSerializerHelper::setupMetaExternalFile() { + setRecordName(RECORD_META_EXTERNAL_FILE, Bitstream, R, MetaExternalFileName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_META_EXTERNAL_FILE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Filename. + RecordMetaExternalFileAbbrevID = + Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev); +} + +void BitstreamRemarkSerializerHelper::emitMetaExternalFile(StringRef Filename) { + // The external file is emitted only if we emit the separate metadata. + R.clear(); + R.push_back(RECORD_META_EXTERNAL_FILE); + Bitstream.EmitRecordWithBlob(RecordMetaExternalFileAbbrevID, R, Filename); +} + +void BitstreamRemarkSerializerHelper::setupRemarkBlockInfo() { + // Setup the remark block. + initBlock(REMARK_BLOCK_ID, Bitstream, R, RemarkBlockName); + + // The header of a remark. + { + setRecordName(RECORD_REMARK_HEADER, Bitstream, R, RemarkHeaderName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_HEADER)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Type + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Remark Name + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Pass name + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Function name + RecordRemarkHeaderAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } + + // The location of a remark. + { + setRecordName(RECORD_REMARK_DEBUG_LOC, Bitstream, R, RemarkDebugLocName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_DEBUG_LOC)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // File + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Line + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Column + RecordRemarkDebugLocAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } + + // The hotness of a remark. + { + setRecordName(RECORD_REMARK_HOTNESS, Bitstream, R, RemarkHotnessName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_HOTNESS)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Hotness + RecordRemarkHotnessAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } + + // An argument entry with a debug location attached. + { + setRecordName(RECORD_REMARK_ARG_WITH_DEBUGLOC, Bitstream, R, + RemarkArgWithDebugLocName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_ARG_WITH_DEBUGLOC)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Key + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Value + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // File + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Line + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Column + RecordRemarkArgWithDebugLocAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } + + // An argument entry with no debug location attached. + { + setRecordName(RECORD_REMARK_ARG_WITHOUT_DEBUGLOC, Bitstream, R, + RemarkArgWithoutDebugLocName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_ARG_WITHOUT_DEBUGLOC)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Key + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Value + RecordRemarkArgWithoutDebugLocAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } +} + +void BitstreamRemarkSerializerHelper::setupBlockInfo() { + // Emit magic number. + for (const char C : ContainerMagic) + Bitstream.Emit(static_cast(C), 8); + + Bitstream.EnterBlockInfoBlock(); + + // Setup the main metadata. Depending on the container type, we'll setup the + // required records next. + setupMetaBlockInfo(); + + switch (ContainerType) { + case BitstreamRemarkContainerType::SeparateRemarksMeta: + // Needs a string table that the separate remark file is using. + setupMetaStrTab(); + // Needs to know where the external remarks file is. + setupMetaExternalFile(); + break; + case BitstreamRemarkContainerType::SeparateRemarksFile: + // Contains remarks: emit the version. + setupMetaRemarkVersion(); + // Contains remarks: emit the remark abbrevs. + setupRemarkBlockInfo(); + break; + case BitstreamRemarkContainerType::Standalone: + // Contains remarks: emit the version. + setupMetaRemarkVersion(); + // Needs a string table. + setupMetaStrTab(); + // Contains remarks: emit the remark abbrevs. + setupRemarkBlockInfo(); + break; + } + + Bitstream.ExitBlock(); +} + +void BitstreamRemarkSerializerHelper::emitMetaBlock( + uint64_t ContainerVersion, Optional RemarkVersion, + Optional StrTab, Optional Filename) { + // Emit the meta block + Bitstream.EnterSubblock(META_BLOCK_ID, 3); + + // The container version and type. + R.clear(); + R.push_back(RECORD_META_CONTAINER_INFO); + R.push_back(ContainerVersion); + R.push_back(static_cast(ContainerType)); + Bitstream.EmitRecordWithAbbrev(RecordMetaContainerInfoAbbrevID, R); + + switch (ContainerType) { + case BitstreamRemarkContainerType::SeparateRemarksMeta: + assert(StrTab != None && *StrTab != nullptr); + emitMetaStrTab(**StrTab); + assert(Filename != None); + emitMetaExternalFile(*Filename); + break; + case BitstreamRemarkContainerType::SeparateRemarksFile: + assert(RemarkVersion != None); + emitMetaRemarkVersion(*RemarkVersion); + break; + case BitstreamRemarkContainerType::Standalone: + assert(RemarkVersion != None); + emitMetaRemarkVersion(*RemarkVersion); + assert(StrTab != None && *StrTab != nullptr); + emitMetaStrTab(**StrTab); + break; + } + + Bitstream.ExitBlock(); +} + +void BitstreamRemarkSerializerHelper::emitRemarkBlock(const Remark &Remark, + StringTable &StrTab) { + Bitstream.EnterSubblock(REMARK_BLOCK_ID, 4); + + R.clear(); + R.push_back(RECORD_REMARK_HEADER); + R.push_back(static_cast(Remark.RemarkType)); + R.push_back(StrTab.add(Remark.RemarkName).first); + R.push_back(StrTab.add(Remark.PassName).first); + R.push_back(StrTab.add(Remark.FunctionName).first); + Bitstream.EmitRecordWithAbbrev(RecordRemarkHeaderAbbrevID, R); + + if (const Optional &Loc = Remark.Loc) { + R.clear(); + R.push_back(RECORD_REMARK_DEBUG_LOC); + R.push_back(StrTab.add(Loc->SourceFilePath).first); + R.push_back(Loc->SourceLine); + R.push_back(Loc->SourceColumn); + Bitstream.EmitRecordWithAbbrev(RecordRemarkDebugLocAbbrevID, R); + } + + if (Optional Hotness = Remark.Hotness) { + R.clear(); + R.push_back(RECORD_REMARK_HOTNESS); + R.push_back(*Hotness); + Bitstream.EmitRecordWithAbbrev(RecordRemarkHotnessAbbrevID, R); + } + + for (const Argument &Arg : Remark.Args) { + R.clear(); + unsigned Key = StrTab.add(Arg.Key).first; + unsigned Val = StrTab.add(Arg.Val).first; + bool HasDebugLoc = Arg.Loc != None; + R.push_back(HasDebugLoc ? RECORD_REMARK_ARG_WITH_DEBUGLOC + : RECORD_REMARK_ARG_WITHOUT_DEBUGLOC); + R.push_back(Key); + R.push_back(Val); + if (HasDebugLoc) { + R.push_back(StrTab.add(Arg.Loc->SourceFilePath).first); + R.push_back(Arg.Loc->SourceLine); + R.push_back(Arg.Loc->SourceColumn); + } + Bitstream.EmitRecordWithAbbrev(HasDebugLoc + ? RecordRemarkArgWithDebugLocAbbrevID + : RecordRemarkArgWithoutDebugLocAbbrevID, + R); + } + Bitstream.ExitBlock(); +} + +void BitstreamRemarkSerializerHelper::flushToStream(raw_ostream &OS) { + OS.write(Encoded.data(), Encoded.size()); + Encoded.clear(); +} + +StringRef BitstreamRemarkSerializerHelper::getBuffer() { + return StringRef(Encoded.data(), Encoded.size()); +} + +BitstreamRemarkSerializer::BitstreamRemarkSerializer(raw_ostream &OS, + SerializerMode Mode) + : RemarkSerializer(Format::Bitstream, OS, Mode), + Helper(BitstreamRemarkContainerType::SeparateRemarksFile) { + assert(Mode == SerializerMode::Separate && + "For SerializerMode::Standalone, a pre-filled string table needs to " + "be provided."); + // We always use a string table with bitstream. + StrTab.emplace(); +} + +BitstreamRemarkSerializer::BitstreamRemarkSerializer(raw_ostream &OS, + SerializerMode Mode, + StringTable StrTabIn) + : RemarkSerializer(Format::Bitstream, OS, Mode), + Helper(Mode == SerializerMode::Separate + ? BitstreamRemarkContainerType::SeparateRemarksFile + : BitstreamRemarkContainerType::Standalone) { + StrTab = std::move(StrTabIn); +} + +void BitstreamRemarkSerializer::emit(const Remark &Remark) { + if (!DidSetUp) { + // Emit the metadata that is embedded in the remark file. + // If we're in standalone mode, serialize the string table as well. + bool IsStandalone = + Helper.ContainerType == BitstreamRemarkContainerType::Standalone; + BitstreamMetaSerializer MetaSerializer( + OS, Helper, + IsStandalone ? &*StrTab : Optional(None)); + MetaSerializer.emit(); + DidSetUp = true; + } + + assert(DidSetUp && + "The Block info block and the meta block were not emitted yet."); + Helper.emitRemarkBlock(Remark, *StrTab); + + Helper.flushToStream(OS); +} + +std::unique_ptr BitstreamRemarkSerializer::metaSerializer( + raw_ostream &OS, Optional ExternalFilename) { + assert(Helper.ContainerType != + BitstreamRemarkContainerType::SeparateRemarksMeta); + bool IsStandalone = + Helper.ContainerType == BitstreamRemarkContainerType::Standalone; + return std::make_unique( + OS, + IsStandalone ? BitstreamRemarkContainerType::Standalone + : BitstreamRemarkContainerType::SeparateRemarksMeta, + &*StrTab, ExternalFilename); +} + +void BitstreamMetaSerializer::emit() { + Helper->setupBlockInfo(); + Helper->emitMetaBlock(CurrentContainerVersion, CurrentRemarkVersion, StrTab, + ExternalFilename); + Helper->flushToStream(OS); +} diff --git a/lib/Remarks/RemarkFormat.cpp b/lib/Remarks/RemarkFormat.cpp index bcd0f753ff64..f2d0331ec6a8 100644 --- a/lib/Remarks/RemarkFormat.cpp +++ b/lib/Remarks/RemarkFormat.cpp @@ -19,11 +19,13 @@ using namespace llvm::remarks; Expected llvm::remarks::parseFormat(StringRef FormatStr) { auto Result = StringSwitch(FormatStr) .Cases("", "yaml", Format::YAML) + .Case("yaml-strtab", Format::YAMLStrTab) + .Case("bitstream", Format::Bitstream) .Default(Format::Unknown); if (Result == Format::Unknown) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown remark serializer format: '%s'", + "Unknown remark format: '%s'", FormatStr.data()); return Result; diff --git a/lib/Remarks/RemarkParser.cpp b/lib/Remarks/RemarkParser.cpp index f67464073bd1..c5c3d0badd3e 100644 --- a/lib/Remarks/RemarkParser.cpp +++ b/lib/Remarks/RemarkParser.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Remarks/RemarkParser.h" +#include "BitstreamRemarkParser.h" #include "YAMLRemarkParser.h" #include "llvm-c/Remarks.h" #include "llvm/ADT/STLExtras.h" @@ -47,32 +48,81 @@ Expected ParsedStringTable::operator[](size_t Index) const { return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1); } -Expected> +Expected> +llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) { + switch (ParserFormat) { + case Format::YAML: + return std::make_unique(Buf); + case Format::YAMLStrTab: + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "The YAML with string table format requires a parsed string table."); + case Format::Bitstream: + return std::make_unique(Buf); + case Format::Unknown: + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown remark parser format."); + } + llvm_unreachable("unhandled ParseFormat"); +} + +Expected> llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf, - Optional StrTab) { + ParsedStringTable StrTab) { + switch (ParserFormat) { + case Format::YAML: + return createStringError(std::make_error_code(std::errc::invalid_argument), + "The YAML format can't be used with a string " + "table. Use yaml-strtab instead."); + case Format::YAMLStrTab: + return std::make_unique(Buf, std::move(StrTab)); + case Format::Bitstream: + return std::make_unique(Buf, std::move(StrTab)); + case Format::Unknown: + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown remark parser format."); + } + llvm_unreachable("unhandled ParseFormat"); +} + +Expected> +llvm::remarks::createRemarkParserFromMeta( + Format ParserFormat, StringRef Buf, Optional StrTab, + Optional ExternalFilePrependPath) { switch (ParserFormat) { + // Depending on the metadata, the format can be either yaml or yaml-strtab, + // regardless of the input argument. case Format::YAML: - return llvm::make_unique(Buf, StrTab); + case Format::YAMLStrTab: + return createYAMLParserFromMeta(Buf, std::move(StrTab), + std::move(ExternalFilePrependPath)); + case Format::Bitstream: + return createBitstreamParserFromMeta(Buf, std::move(StrTab), + std::move(ExternalFilePrependPath)); case Format::Unknown: return createStringError(std::make_error_code(std::errc::invalid_argument), "Unknown remark parser format."); } - llvm_unreachable("unknown format"); + llvm_unreachable("unhandled ParseFormat"); } +namespace { // Wrapper that holds the state needed to interact with the C API. struct CParser { - std::unique_ptr TheParser; + std::unique_ptr TheParser; Optional Err; CParser(Format ParserFormat, StringRef Buf, - Optional StrTab = None) - : TheParser(cantFail(createRemarkParser(ParserFormat, Buf, StrTab))) {} + Optional StrTab = None) + : TheParser(cantFail( + StrTab ? createRemarkParser(ParserFormat, Buf, std::move(*StrTab)) + : createRemarkParser(ParserFormat, Buf))) {} void handleError(Error E) { Err.emplace(toString(std::move(E))); } bool hasError() const { return Err.hasValue(); } const char *getMessage() const { return Err ? Err->c_str() : nullptr; }; }; +} // namespace // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(CParser, LLVMRemarkParserRef) @@ -83,10 +133,16 @@ extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf, StringRef(static_cast(Buf), Size))); } +extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateBitstream(const void *Buf, + uint64_t Size) { + return wrap(new CParser(Format::Bitstream, + StringRef(static_cast(Buf), Size))); +} + extern "C" LLVMRemarkEntryRef LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser) { CParser &TheCParser = *unwrap(Parser); - remarks::Parser &TheParser = *TheCParser.TheParser; + remarks::RemarkParser &TheParser = *TheCParser.TheParser; Expected> MaybeRemark = TheParser.next(); if (Error E = MaybeRemark.takeError()) { diff --git a/lib/Remarks/RemarkSerializer.cpp b/lib/Remarks/RemarkSerializer.cpp new file mode 100644 index 000000000000..ab19c84bbadb --- /dev/null +++ b/lib/Remarks/RemarkSerializer.cpp @@ -0,0 +1,54 @@ +//===- RemarkSerializer.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides tools for serializing remarks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Remarks/BitstreamRemarkSerializer.h" +#include "llvm/Remarks/YAMLRemarkSerializer.h" + +using namespace llvm; +using namespace llvm::remarks; + +Expected> +remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, + raw_ostream &OS) { + switch (RemarksFormat) { + case Format::Unknown: + return createStringError(std::errc::invalid_argument, + "Unknown remark serializer format."); + case Format::YAML: + return std::make_unique(OS, Mode); + case Format::YAMLStrTab: + return std::make_unique(OS, Mode); + case Format::Bitstream: + return std::make_unique(OS, Mode); + } + llvm_unreachable("Unknown remarks::Format enum"); +} + +Expected> +remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, + raw_ostream &OS, remarks::StringTable StrTab) { + switch (RemarksFormat) { + case Format::Unknown: + return createStringError(std::errc::invalid_argument, + "Unknown remark serializer format."); + case Format::YAML: + return std::make_unique(OS, Mode, std::move(StrTab)); + case Format::YAMLStrTab: + return std::make_unique(OS, Mode, + std::move(StrTab)); + case Format::Bitstream: + return std::make_unique(OS, Mode, + std::move(StrTab)); + } + llvm_unreachable("Unknown remarks::Format enum"); +} diff --git a/lib/Remarks/RemarkStringTable.cpp b/lib/Remarks/RemarkStringTable.cpp index 984aa5b33b48..51156465be51 100644 --- a/lib/Remarks/RemarkStringTable.cpp +++ b/lib/Remarks/RemarkStringTable.cpp @@ -11,6 +11,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Remarks/RemarkStringTable.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkParser.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" #include @@ -18,6 +20,14 @@ using namespace llvm; using namespace llvm::remarks; +StringTable::StringTable(const ParsedStringTable &Other) : StrTab() { + for (unsigned i = 0, e = Other.size(); i < e; ++i) + if (Expected MaybeStr = Other[i]) + add(*MaybeStr); + else + llvm_unreachable("Unexpected error while building remarks string table."); +} + std::pair StringTable::add(StringRef Str) { size_t NextID = StrTab.size(); auto KV = StrTab.insert({Str, NextID}); @@ -28,10 +38,22 @@ std::pair StringTable::add(StringRef Str) { return {KV.first->second, KV.first->first()}; } +void StringTable::internalize(Remark &R) { + auto Impl = [&](StringRef &S) { S = add(S).second; }; + Impl(R.PassName); + Impl(R.RemarkName); + Impl(R.FunctionName); + if (R.Loc) + Impl(R.Loc->SourceFilePath); + for (Argument &Arg : R.Args) { + Impl(Arg.Key); + Impl(Arg.Val); + if (Arg.Loc) + Impl(Arg.Loc->SourceFilePath); + } +} + void StringTable::serialize(raw_ostream &OS) const { - // Emit the number of strings. - uint64_t StrTabSize = SerializedSize; - support::endian::write(OS, StrTabSize, support::little); // Emit the sequence of strings. for (StringRef Str : serialize()) { OS << Str; diff --git a/lib/Remarks/YAMLRemarkParser.cpp b/lib/Remarks/YAMLRemarkParser.cpp index ed78b7ba5d95..dd834d85676e 100644 --- a/lib/Remarks/YAMLRemarkParser.cpp +++ b/lib/Remarks/YAMLRemarkParser.cpp @@ -14,6 +14,8 @@ #include "YAMLRemarkParser.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Remarks/RemarkParser.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Path.h" using namespace llvm; using namespace llvm::remarks; @@ -54,9 +56,123 @@ static SourceMgr setupSM(std::string &LastErrorMessage) { return SM; } +// Parse the magic number. This function returns true if this represents remark +// metadata, false otherwise. +static Expected parseMagic(StringRef &Buf) { + if (!Buf.consume_front(remarks::Magic)) + return false; + + if (Buf.size() < 1 || !Buf.consume_front(StringRef("\0", 1))) + return createStringError(std::errc::illegal_byte_sequence, + "Expecting \\0 after magic number."); + return true; +} + +static Expected parseVersion(StringRef &Buf) { + if (Buf.size() < sizeof(uint64_t)) + return createStringError(std::errc::illegal_byte_sequence, + "Expecting version number."); + + uint64_t Version = + support::endian::read( + Buf.data()); + if (Version != remarks::CurrentRemarkVersion) + return createStringError(std::errc::illegal_byte_sequence, + "Mismatching remark version. Got %" PRId64 + ", expected %" PRId64 ".", + Version, remarks::CurrentRemarkVersion); + Buf = Buf.drop_front(sizeof(uint64_t)); + return Version; +} + +static Expected parseStrTabSize(StringRef &Buf) { + if (Buf.size() < sizeof(uint64_t)) + return createStringError(std::errc::illegal_byte_sequence, + "Expecting string table size."); + uint64_t StrTabSize = + support::endian::read( + Buf.data()); + Buf = Buf.drop_front(sizeof(uint64_t)); + return StrTabSize; +} + +static Expected parseStrTab(StringRef &Buf, + uint64_t StrTabSize) { + if (Buf.size() < StrTabSize) + return createStringError(std::errc::illegal_byte_sequence, + "Expecting string table."); + + // Attach the string table to the parser. + ParsedStringTable Result(StringRef(Buf.data(), StrTabSize)); + Buf = Buf.drop_front(StrTabSize); + return Expected(std::move(Result)); +} + +Expected> +remarks::createYAMLParserFromMeta(StringRef Buf, + Optional StrTab, + Optional ExternalFilePrependPath) { + // We now have a magic number. The metadata has to be correct. + Expected isMeta = parseMagic(Buf); + if (!isMeta) + return isMeta.takeError(); + // If it's not recognized as metadata, roll back. + std::unique_ptr SeparateBuf; + if (*isMeta) { + Expected Version = parseVersion(Buf); + if (!Version) + return Version.takeError(); + + Expected StrTabSize = parseStrTabSize(Buf); + if (!StrTabSize) + return StrTabSize.takeError(); + + // If the size of string table is not 0, try to build one. + if (*StrTabSize != 0) { + if (StrTab) + return createStringError(std::errc::illegal_byte_sequence, + "String table already provided."); + Expected MaybeStrTab = parseStrTab(Buf, *StrTabSize); + if (!MaybeStrTab) + return MaybeStrTab.takeError(); + StrTab = std::move(*MaybeStrTab); + } + // If it starts with "---", there is no external file. + if (!Buf.startswith("---")) { + // At this point, we expect Buf to contain the external file path. + StringRef ExternalFilePath = Buf; + SmallString<80> FullPath; + if (ExternalFilePrependPath) + FullPath = *ExternalFilePrependPath; + sys::path::append(FullPath, ExternalFilePath); + + // Try to open the file and start parsing from there. + ErrorOr> BufferOrErr = + MemoryBuffer::getFile(FullPath); + if (std::error_code EC = BufferOrErr.getError()) + return createFileError(FullPath, EC); + + // Keep the buffer alive. + SeparateBuf = std::move(*BufferOrErr); + Buf = SeparateBuf->getBuffer(); + } + } + + std::unique_ptr Result = + StrTab + ? std::make_unique(Buf, std::move(*StrTab)) + : std::make_unique(Buf); + if (SeparateBuf) + Result->SeparateBuf = std::move(SeparateBuf); + return std::move(Result); +} + +YAMLRemarkParser::YAMLRemarkParser(StringRef Buf) + : YAMLRemarkParser(Buf, None) {} + YAMLRemarkParser::YAMLRemarkParser(StringRef Buf, - Optional StrTab) - : Parser{Format::YAML}, StrTab(StrTab), LastErrorMessage(), + Optional StrTab) + : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), LastErrorMessage(), SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {} Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) { @@ -86,7 +202,7 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { if (!Root) return error("document root is not of mapping type.", *YAMLRoot); - std::unique_ptr Result = llvm::make_unique(); + std::unique_ptr Result = std::make_unique(); Remark &TheRemark = *Result; // First, the type. It needs special handling since is not part of the @@ -179,22 +295,7 @@ Expected YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) { auto *Value = dyn_cast(Node.getValue()); if (!Value) return error("expected a value of scalar type.", Node); - StringRef Result; - if (!StrTab) { - Result = Value->getRawValue(); - } else { - // If we have a string table, parse it as an unsigned. - unsigned StrID = 0; - if (Expected MaybeStrID = parseUnsigned(Node)) - StrID = *MaybeStrID; - else - return MaybeStrID.takeError(); - - if (Expected Str = (**StrTab)[StrID]) - Result = *Str; - else - return Str.takeError(); - } + StringRef Result = Value->getRawValue(); if (Result.front() == '\'') Result = Result.drop_front(); @@ -325,3 +426,29 @@ Expected> YAMLRemarkParser::next() { return std::move(*MaybeResult); } + +Expected YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) { + auto *Value = dyn_cast(Node.getValue()); + if (!Value) + return error("expected a value of scalar type.", Node); + StringRef Result; + // If we have a string table, parse it as an unsigned. + unsigned StrID = 0; + if (Expected MaybeStrID = parseUnsigned(Node)) + StrID = *MaybeStrID; + else + return MaybeStrID.takeError(); + + if (Expected Str = (*StrTab)[StrID]) + Result = *Str; + else + return Str.takeError(); + + if (Result.front() == '\'') + Result = Result.drop_front(); + + if (Result.back() == '\'') + Result = Result.drop_back(); + + return Result; +} diff --git a/lib/Remarks/YAMLRemarkParser.h b/lib/Remarks/YAMLRemarkParser.h index cea76e63e75c..03707433bc03 100644 --- a/lib/Remarks/YAMLRemarkParser.h +++ b/lib/Remarks/YAMLRemarkParser.h @@ -18,6 +18,7 @@ #include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkParser.h" #include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/YAMLTraits.h" @@ -46,9 +47,9 @@ private: }; /// Regular YAML to Remark parser. -struct YAMLRemarkParser : public Parser { +struct YAMLRemarkParser : public RemarkParser { /// The string table used for parsing strings. - Optional StrTab; + Optional StrTab; /// Last error message that can come from the YAML parser diagnostics. /// We need this for catching errors in the constructor. std::string LastErrorMessage; @@ -58,17 +59,20 @@ struct YAMLRemarkParser : public Parser { yaml::Stream Stream; /// Iterator in the YAML stream. yaml::document_iterator YAMLIt; + /// If we parse remark metadata in separate mode, we need to open a new file + /// and parse that. + std::unique_ptr SeparateBuf; - YAMLRemarkParser(StringRef Buf, - Optional StrTab = None); + YAMLRemarkParser(StringRef Buf); Expected> next() override; - static bool classof(const Parser *P) { + static bool classof(const RemarkParser *P) { return P->ParserFormat == Format::YAML; } -private: +protected: + YAMLRemarkParser(StringRef Buf, Optional StrTab); /// Create a YAMLParseError error from an existing error generated by the YAML /// parser. /// If there is no error, this returns Success. @@ -82,7 +86,7 @@ private: /// Parse one key to a string. Expected parseKey(yaml::KeyValueNode &Node); /// Parse one value to a string. - Expected parseStr(yaml::KeyValueNode &Node); + virtual Expected parseStr(yaml::KeyValueNode &Node); /// Parse one value to an unsigned. Expected parseUnsigned(yaml::KeyValueNode &Node); /// Parse a debug location. @@ -90,6 +94,26 @@ private: /// Parse an argument. Expected parseArg(yaml::Node &Node); }; + +/// YAML with a string table to Remark parser. +struct YAMLStrTabRemarkParser : public YAMLRemarkParser { + YAMLStrTabRemarkParser(StringRef Buf, ParsedStringTable StrTab) + : YAMLRemarkParser(Buf, std::move(StrTab)) {} + + static bool classof(const RemarkParser *P) { + return P->ParserFormat == Format::YAMLStrTab; + } + +protected: + /// Parse one value to a string. + Expected parseStr(yaml::KeyValueNode &Node) override; +}; + +Expected> +createYAMLParserFromMeta(StringRef Buf, + Optional StrTab = None, + Optional ExternalFilePrependPath = None); + } // end namespace remarks } // end namespace llvm diff --git a/lib/Remarks/YAMLRemarkSerializer.cpp b/lib/Remarks/YAMLRemarkSerializer.cpp index d64ae8e12ab0..3a42fe0678eb 100644 --- a/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/lib/Remarks/YAMLRemarkSerializer.cpp @@ -11,16 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Remarks/YAMLRemarkSerializer.h" #include "llvm/Support/CommandLine.h" using namespace llvm; using namespace llvm::remarks; -cl::opt RemarksYAMLStringTable( - "remarks-yaml-string-table", cl::init(false), cl::Hidden, - cl::desc("Enable the usage of a string table with YAML remarks.")); - // Use the same keys whether we use a string table or not (respectively, T is an // unsigned or a StringRef). template @@ -60,11 +56,14 @@ template <> struct MappingTraits { else llvm_unreachable("Unknown remark type"); - if (Optional &StrTab = - reinterpret_cast(io.getContext())->StrTab) { - unsigned PassID = StrTab->add(Remark->PassName).first; - unsigned NameID = StrTab->add(Remark->RemarkName).first; - unsigned FunctionID = StrTab->add(Remark->FunctionName).first; + if (auto *Serializer = dyn_cast( + reinterpret_cast(io.getContext()))) { + assert(Serializer->StrTab.hasValue() && + "YAMLStrTabSerializer with no StrTab."); + StringTable &StrTab = *Serializer->StrTab; + unsigned PassID = StrTab.add(Remark->PassName).first; + unsigned NameID = StrTab.add(Remark->RemarkName).first; + unsigned FunctionID = StrTab.add(Remark->FunctionName).first; mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID, Remark->Hotness, Remark->Args); } else { @@ -82,9 +81,12 @@ template <> struct MappingTraits { unsigned Line = RL.SourceLine; unsigned Col = RL.SourceColumn; - if (Optional &StrTab = - reinterpret_cast(io.getContext())->StrTab) { - unsigned FileID = StrTab->add(File).first; + if (auto *Serializer = dyn_cast( + reinterpret_cast(io.getContext()))) { + assert(Serializer->StrTab.hasValue() && + "YAMLStrTabSerializer with no StrTab."); + StringTable &StrTab = *Serializer->StrTab; + unsigned FileID = StrTab.add(File).first; io.mapRequired("File", FileID); } else { io.mapRequired("File", File); @@ -101,7 +103,7 @@ template <> struct MappingTraits { /// newlines in strings. struct StringBlockVal { StringRef Value; - StringBlockVal(const std::string &Value) : Value(Value) {} + StringBlockVal(StringRef R) : Value(R) {} }; template <> struct BlockScalarTraits { @@ -134,9 +136,12 @@ template <> struct MappingTraits { static void mapping(IO &io, Argument &A) { assert(io.outputting() && "input not yet implemented"); - if (Optional &StrTab = - reinterpret_cast(io.getContext())->StrTab) { - auto ValueID = StrTab->add(A.Val).first; + if (auto *Serializer = dyn_cast( + reinterpret_cast(io.getContext()))) { + assert(Serializer->StrTab.hasValue() && + "YAMLStrTabSerializer with no StrTab."); + StringTable &StrTab = *Serializer->StrTab; + auto ValueID = StrTab.add(A.Val).first; io.mapRequired(A.Key.data(), ValueID); } else if (StringRef(A.Val).count('\n') > 1) { StringBlockVal S(A.Val); @@ -153,15 +158,100 @@ template <> struct MappingTraits { LLVM_YAML_IS_SEQUENCE_VECTOR(Argument) -YAMLSerializer::YAMLSerializer(raw_ostream &OS, UseStringTable UseStringTable) - : Serializer(OS), YAMLOutput(OS, reinterpret_cast(this)) { - if (UseStringTable == remarks::UseStringTable::Yes || RemarksYAMLStringTable) - StrTab.emplace(); +YAMLRemarkSerializer::YAMLRemarkSerializer(raw_ostream &OS, SerializerMode Mode, + Optional StrTabIn) + : YAMLRemarkSerializer(Format::YAML, OS, Mode, std::move(StrTabIn)) {} + +YAMLRemarkSerializer::YAMLRemarkSerializer(Format SerializerFormat, + raw_ostream &OS, SerializerMode Mode, + Optional StrTabIn) + : RemarkSerializer(SerializerFormat, OS, Mode), + YAMLOutput(OS, reinterpret_cast(this)) { + StrTab = std::move(StrTabIn); } -void YAMLSerializer::emit(const Remark &Remark) { +void YAMLRemarkSerializer::emit(const Remark &Remark) { // Again, YAMLTraits expect a non-const object for inputting, but we're not // using that here. auto R = const_cast(&Remark); YAMLOutput << R; } + +std::unique_ptr +YAMLRemarkSerializer::metaSerializer(raw_ostream &OS, + Optional ExternalFilename) { + return std::make_unique(OS, ExternalFilename); +} + +void YAMLStrTabRemarkSerializer::emit(const Remark &Remark) { + // In standalone mode, for the serializer with a string table, emit the + // metadata first and set DidEmitMeta to avoid emitting it again. + if (Mode == SerializerMode::Standalone && !DidEmitMeta) { + std::unique_ptr MetaSerializer = + metaSerializer(OS, /*ExternalFilename=*/None); + MetaSerializer->emit(); + DidEmitMeta = true; + } + + // Then do the usual remark emission. + YAMLRemarkSerializer::emit(Remark); +} + +std::unique_ptr YAMLStrTabRemarkSerializer::metaSerializer( + raw_ostream &OS, Optional ExternalFilename) { + assert(StrTab); + return std::make_unique(OS, ExternalFilename, + *StrTab); +} + +static void emitMagic(raw_ostream &OS) { + // Emit the magic number. + OS << remarks::Magic; + // Explicitly emit a '\0'. + OS.write('\0'); +} + +static void emitVersion(raw_ostream &OS) { + // Emit the version number: little-endian uint64_t. + std::array Version; + support::endian::write64le(Version.data(), remarks::CurrentRemarkVersion); + OS.write(Version.data(), Version.size()); +} + +static void emitStrTab(raw_ostream &OS, Optional StrTab) { + // Emit the string table in the section. + uint64_t StrTabSize = StrTab ? (*StrTab)->SerializedSize : 0; + // Emit the total size of the string table (the size itself excluded): + // little-endian uint64_t. + // Note: even if no string table is used, emit 0. + std::array StrTabSizeBuf; + support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); + OS.write(StrTabSizeBuf.data(), StrTabSizeBuf.size()); + if (StrTab) + (*StrTab)->serialize(OS); +} + +static void emitExternalFile(raw_ostream &OS, StringRef Filename) { + // Emit the null-terminated absolute path to the remark file. + SmallString<128> FilenameBuf = Filename; + sys::fs::make_absolute(FilenameBuf); + assert(!FilenameBuf.empty() && "The filename can't be empty."); + OS.write(FilenameBuf.data(), FilenameBuf.size()); + OS.write('\0'); +} + +void YAMLMetaSerializer::emit() { + emitMagic(OS); + emitVersion(OS); + emitStrTab(OS, None); + if (ExternalFilename) + emitExternalFile(OS, *ExternalFilename); +} + +void YAMLStrTabMetaSerializer::emit() { + emitMagic(OS); + emitVersion(OS); + emitStrTab(OS, &StrTab); + if (ExternalFilename) + emitExternalFile(OS, *ExternalFilename); +} diff --git a/lib/Support/AArch64TargetParser.cpp b/lib/Support/AArch64TargetParser.cpp index df4caa1f07fd..6f1d6d50eee2 100644 --- a/lib/Support/AArch64TargetParser.cpp +++ b/lib/Support/AArch64TargetParser.cpp @@ -96,8 +96,8 @@ bool AArch64::getExtensionFeatures(unsigned Extensions, Features.push_back("+sve2-sm4"); if (Extensions & AEK_SVE2SHA3) Features.push_back("+sve2-sha3"); - if (Extensions & AEK_BITPERM) - Features.push_back("+bitperm"); + if (Extensions & AEK_SVE2BITPERM) + Features.push_back("+sve2-bitperm"); if (Extensions & AEK_RCPC) Features.push_back("+rcpc"); diff --git a/lib/Support/ABIBreak.cpp b/lib/Support/ABIBreak.cpp new file mode 100644 index 000000000000..247b635e02b8 --- /dev/null +++ b/lib/Support/ABIBreak.cpp @@ -0,0 +1,24 @@ +//===----- lib/Support/ABIBreak.cpp - EnableABIBreakingChecks -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/abi-breaking.h" + +#ifndef _MSC_VER +namespace llvm { + +// One of these two variables will be referenced by a symbol defined in +// llvm-config.h. We provide a link-time (or load time for DSO) failure when +// there is a mismatch in the build configuration of the API client and LLVM. +#if LLVM_ENABLE_ABI_BREAKING_CHECKS +int EnableABIBreakingChecks; +#else +int DisableABIBreakingChecks; +#endif + +} // end namespace llvm +#endif diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 43173311cd80..758fe8b4f866 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -401,6 +401,33 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { } } +void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { + uint64_t maskBits = maskTrailingOnes(numBits); + subBits &= maskBits; + if (isSingleWord()) { + U.VAL &= ~(maskBits << bitPosition); + U.VAL |= subBits << bitPosition; + return; + } + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hiWord = whichWord(bitPosition + numBits - 1); + if (loWord == hiWord) { + U.pVal[loWord] &= ~(maskBits << loBit); + U.pVal[loWord] |= subBits << loBit; + return; + } + + static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); + unsigned wordBits = 8 * sizeof(WordType); + U.pVal[loWord] &= ~(maskBits << loBit); + U.pVal[loWord] |= subBits << loBit; + + U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); + U.pVal[hiWord] |= subBits >> (wordBits - loBit); +} + APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { assert(numBits > 0 && "Can't extract zero bits"); assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && @@ -438,6 +465,31 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { return Result.clearUnusedBits(); } +uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, + unsigned bitPosition) const { + assert(numBits > 0 && "Can't extract zero bits"); + assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && + "Illegal bit extraction"); + assert(numBits <= 64 && "Illegal bit extraction"); + + uint64_t maskBits = maskTrailingOnes(numBits); + if (isSingleWord()) + return (U.VAL >> bitPosition) & maskBits; + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hiWord = whichWord(bitPosition + numBits - 1); + if (loWord == hiWord) + return (U.pVal[loWord] >> loBit) & maskBits; + + static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); + unsigned wordBits = 8 * sizeof(WordType); + uint64_t retBits = U.pVal[loWord] >> loBit; + retBits |= U.pVal[hiWord] << (wordBits - loBit); + retBits &= maskBits; + return retBits; +} + unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { assert(!str.empty() && "Invalid string length"); assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || diff --git a/lib/Support/ARMTargetParser.cpp b/lib/Support/ARMTargetParser.cpp index be948cfc95d4..ce5daa7fe58c 100644 --- a/lib/Support/ARMTargetParser.cpp +++ b/lib/Support/ARMTargetParser.cpp @@ -176,10 +176,8 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector &Features) { // exist). {"+fpregs", "-fpregs", FPUVersion::VFPV2, FPURestriction::SP_D16}, - {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::None}, - {"+vfp2d16", "-vfp2d16", FPUVersion::VFPV2, FPURestriction::D16}, - {"+vfp2d16sp", "-vfp2d16sp", FPUVersion::VFPV2, FPURestriction::SP_D16}, - {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::None}, + {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::D16}, + {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::SP_D16}, {"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None}, {"+vfp3d16", "-vfp3d16", FPUVersion::VFPV3, FPURestriction::D16}, {"+vfp3d16sp", "-vfp3d16sp", FPUVersion::VFPV3, FPURestriction::SP_D16}, @@ -195,7 +193,7 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector &Features) { {"+fp-armv8sp", "-fp-armv8sp", FPUVersion::VFPV5, FPURestriction::None}, {"+fullfp16", "-fullfp16", FPUVersion::VFPV5_FULLFP16, FPURestriction::SP_D16}, {"+fp64", "-fp64", FPUVersion::VFPV2, FPURestriction::D16}, - {"+d32", "-d32", FPUVersion::VFPV2, FPURestriction::None}, + {"+d32", "-d32", FPUVersion::VFPV3, FPURestriction::None}, }; for (const auto &Info: FPUFeatureInfoList) { diff --git a/lib/Support/CRC.cpp b/lib/Support/CRC.cpp index fd98f3a24003..7c008d3b599d 100644 --- a/lib/Support/CRC.cpp +++ b/lib/Support/CRC.cpp @@ -6,63 +6,94 @@ // //===----------------------------------------------------------------------===// // -// This file implements llvm::crc32 function. +// This file contains implementations of CRC functions. +// +// The implementation technique is the one mentioned in: +// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table +// look-up. Commun. ACM 31, 8 (August 1988) +// +// See also Ross N. Williams "A Painless Guide to CRC Error Detection +// Algorithms" (https://zlib.net/crc_v3.txt) or Hacker's Delight (2nd ed.) +// Chapter 14 (Figure 14-7 in particular) for how the algorithm works. // //===----------------------------------------------------------------------===// #include "llvm/Support/CRC.h" + +#include "llvm/ADT/ArrayRef.h" #include "llvm/Config/config.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Threading.h" -#include using namespace llvm; #if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H -using CRC32Table = std::array; - -static void initCRC32Table(CRC32Table *Tbl) { - auto Shuffle = [](uint32_t V) { - return (V & 1) ? (V >> 1) ^ 0xEDB88320U : V >> 1; - }; - - for (size_t I = 0; I < Tbl->size(); ++I) { - uint32_t V = Shuffle(I); - V = Shuffle(V); - V = Shuffle(V); - V = Shuffle(V); - V = Shuffle(V); - V = Shuffle(V); - V = Shuffle(V); - (*Tbl)[I] = Shuffle(V); - } -} -uint32_t llvm::crc32(uint32_t CRC, StringRef S) { - static llvm::once_flag InitFlag; - static CRC32Table Tbl; - llvm::call_once(InitFlag, initCRC32Table, &Tbl); +static const uint32_t CRCTable[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d}; - const uint8_t *P = reinterpret_cast(S.data()); - size_t Len = S.size(); +uint32_t llvm::crc32(uint32_t CRC, ArrayRef Data) { CRC ^= 0xFFFFFFFFU; - for (; Len >= 8; Len -= 8) { - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); + for (uint8_t Byte : Data) { + int TableIdx = (CRC ^ Byte) & 0xff; + CRC = CRCTable[TableIdx] ^ (CRC >> 8); } - while (Len--) - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); return CRC ^ 0xFFFFFFFFU; } + #else + #include -uint32_t llvm::crc32(uint32_t CRC, StringRef S) { - return ::crc32(CRC, (const Bytef *)S.data(), S.size()); +uint32_t llvm::crc32(uint32_t CRC, ArrayRef Data) { + return ::crc32(CRC, (const Bytef *)Data.data(), Data.size()); } + #endif + +uint32_t llvm::crc32(ArrayRef Data) { return crc32(0, Data); } + +void JamCRC::update(ArrayRef Data) { + CRC ^= 0xFFFFFFFFU; // Undo CRC-32 Init. + CRC = crc32(CRC, Data); + CRC ^= 0xFFFFFFFFU; // Undo CRC-32 XorOut. +} diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp index 9813eec0e433..7a2f6c53435a 100644 --- a/lib/Support/CachePruning.cpp +++ b/lib/Support/CachePruning.cpp @@ -45,7 +45,7 @@ struct FileInfo { /// interval option. static void writeTimestampFile(StringRef TimestampFile) { std::error_code EC; - raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::F_None); + raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None); } static Expected parseDuration(StringRef Duration) { diff --git a/lib/Support/CodeGenCoverage.cpp b/lib/Support/CodeGenCoverage.cpp index f39eb7533b43..2db4193ce382 100644 --- a/lib/Support/CodeGenCoverage.cpp +++ b/lib/Support/CodeGenCoverage.cpp @@ -101,9 +101,9 @@ bool CodeGenCoverage::emit(StringRef CoveragePrefix, std::string CoverageFilename = (CoveragePrefix + Pid).str(); std::error_code EC; - sys::fs::OpenFlags OpenFlags = sys::fs::F_Append; + sys::fs::OpenFlags OpenFlags = sys::fs::OF_Append; std::unique_ptr CoverageFile = - llvm::make_unique(CoverageFilename, EC, OpenFlags); + std::make_unique(CoverageFilename, EC, OpenFlags); if (EC) return false; diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 25510fa58ff5..620f7ffd4c9f 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -692,7 +692,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName, return false; } -static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { +bool llvm::cl::ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { int Dummy = i; return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy); } diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index c2459256f8fe..9d13fce9cc52 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -10,8 +10,8 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" #include "llvm/Support/ThreadLocal.h" +#include #include using namespace llvm; @@ -71,7 +71,7 @@ public: } -static ManagedStatic gCrashRecoveryContextMutex; +static ManagedStatic gCrashRecoveryContextMutex; static bool gCrashRecoveryEnabled = false; static ManagedStatic> @@ -116,7 +116,7 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { } void CrashRecoveryContext::Enable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); + std::lock_guard L(*gCrashRecoveryContextMutex); // FIXME: Shouldn't this be a refcount or something? if (gCrashRecoveryEnabled) return; @@ -125,7 +125,7 @@ void CrashRecoveryContext::Enable() { } void CrashRecoveryContext::Disable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); + std::lock_guard L(*gCrashRecoveryContextMutex); if (!gCrashRecoveryEnabled) return; gCrashRecoveryEnabled = false; diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index 673bbb4d06f4..a98297cdb35f 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -7,111 +7,137 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" -#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/SwapByteOrder.h" + using namespace llvm; +static void unexpectedEndReached(Error *E) { + if (E) + *E = createStringError(errc::illegal_byte_sequence, + "unexpected end of data"); +} + +static bool isError(Error *E) { return E && *E; } + template -static T getU(uint32_t *offset_ptr, const DataExtractor *de, - bool isLittleEndian, const char *Data) { +static T getU(uint64_t *offset_ptr, const DataExtractor *de, + bool isLittleEndian, const char *Data, llvm::Error *Err) { + ErrorAsOutParameter ErrAsOut(Err); T val = 0; - uint32_t offset = *offset_ptr; - if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) { - std::memcpy(&val, &Data[offset], sizeof(val)); - if (sys::IsLittleEndianHost != isLittleEndian) - sys::swapByteOrder(val); - - // Advance the offset - *offset_ptr += sizeof(val); + if (isError(Err)) + return val; + + uint64_t offset = *offset_ptr; + if (!de->isValidOffsetForDataOfSize(offset, sizeof(T))) { + unexpectedEndReached(Err); + return val; } + std::memcpy(&val, &Data[offset], sizeof(val)); + if (sys::IsLittleEndianHost != isLittleEndian) + sys::swapByteOrder(val); + + // Advance the offset + *offset_ptr += sizeof(val); return val; } template -static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count, - const DataExtractor *de, bool isLittleEndian, const char *Data){ - uint32_t offset = *offset_ptr; - - if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) { - for (T *value_ptr = dst, *end = dst + count; value_ptr != end; - ++value_ptr, offset += sizeof(*dst)) - *value_ptr = getU(offset_ptr, de, isLittleEndian, Data); - // Advance the offset - *offset_ptr = offset; - // Return a non-NULL pointer to the converted data as an indicator of - // success - return dst; +static T *getUs(uint64_t *offset_ptr, T *dst, uint32_t count, + const DataExtractor *de, bool isLittleEndian, const char *Data, + llvm::Error *Err) { + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return nullptr; + + uint64_t offset = *offset_ptr; + + if (!de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count)) { + unexpectedEndReached(Err); + return nullptr; } - return nullptr; + for (T *value_ptr = dst, *end = dst + count; value_ptr != end; + ++value_ptr, offset += sizeof(*dst)) + *value_ptr = getU(offset_ptr, de, isLittleEndian, Data, Err); + // Advance the offset + *offset_ptr = offset; + // Return a non-NULL pointer to the converted data as an indicator of + // success + return dst; } -uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } uint8_t * -DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const { +DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } +uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const { + return getUs(&C.Offset, Dst, Count, this, IsLittleEndian, + Data.data(), &C.Err); +} -uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } -uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst, +uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } -uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const { +uint32_t DataExtractor::getU24(uint64_t *offset_ptr) const { uint24_t ExtractedVal = - getU(offset_ptr, this, IsLittleEndian, Data.data()); + getU(offset_ptr, this, IsLittleEndian, Data.data(), nullptr); // The 3 bytes are in the correct byte order for the host. return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); } -uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } -uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst, +uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } -uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } -uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst, +uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } -uint64_t -DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const { +uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, + llvm::Error *Err) const { switch (byte_size) { case 1: - return getU8(offset_ptr); + return getU8(offset_ptr, Err); case 2: - return getU16(offset_ptr); + return getU16(offset_ptr, Err); case 4: - return getU32(offset_ptr); + return getU32(offset_ptr, Err); case 8: - return getU64(offset_ptr); + return getU64(offset_ptr, Err); } llvm_unreachable("getUnsigned unhandled case!"); } int64_t -DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const { +DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const { switch (byte_size) { case 1: return (int8_t)getU8(offset_ptr); @@ -125,8 +151,8 @@ DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const { llvm_unreachable("getSigned unhandled case!"); } -const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { - uint32_t offset = *offset_ptr; +const char *DataExtractor::getCStr(uint64_t *offset_ptr) const { + uint64_t offset = *offset_ptr; StringRef::size_type pos = Data.find('\0', offset); if (pos != StringRef::npos) { *offset_ptr = pos + 1; @@ -135,31 +161,38 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { return nullptr; } -StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const { - uint32_t Start = *OffsetPtr; +StringRef DataExtractor::getCStrRef(uint64_t *offset_ptr) const { + uint64_t Start = *offset_ptr; StringRef::size_type Pos = Data.find('\0', Start); if (Pos != StringRef::npos) { - *OffsetPtr = Pos + 1; + *offset_ptr = Pos + 1; return StringRef(Data.data() + Start, Pos - Start); } return StringRef(); } -uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { +uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, + llvm::Error *Err) const { assert(*offset_ptr <= Data.size()); + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return 0; const char *error; unsigned bytes_read; uint64_t result = decodeULEB128( reinterpret_cast(Data.data() + *offset_ptr), &bytes_read, reinterpret_cast(Data.data() + Data.size()), &error); - if (error) + if (error) { + if (Err) + *Err = createStringError(errc::illegal_byte_sequence, error); return 0; + } *offset_ptr += bytes_read; return result; } -int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { +int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr) const { assert(*offset_ptr <= Data.size()); const char *error; @@ -172,3 +205,14 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { *offset_ptr += bytes_read; return result; } + +void DataExtractor::skip(Cursor &C, uint64_t Length) const { + ErrorAsOutParameter ErrAsOut(&C.Err); + if (isError(&C.Err)) + return; + + if (isValidOffsetForDataOfSize(C.Offset, Length)) + C.Offset += Length; + else + unexpectedEndReached(&C.Err); +} diff --git a/lib/Support/Error.cpp b/lib/Support/Error.cpp index 72bc08af2ddb..9ea08c37478e 100644 --- a/lib/Support/Error.cpp +++ b/lib/Support/Error.cpp @@ -87,7 +87,7 @@ std::error_code FileError::convertToErrorCode() const { Error errorCodeToError(std::error_code EC) { if (!EC) return Error::success(); - return Error(llvm::make_unique(ECError(EC))); + return Error(std::make_unique(ECError(EC))); } std::error_code errorToErrorCode(Error Err) { @@ -167,18 +167,3 @@ void LLVMDisposeErrorMessage(char *ErrMsg) { delete[] ErrMsg; } LLVMErrorTypeId LLVMGetStringErrorTypeId() { return reinterpret_cast(&StringError::ID); } - -#ifndef _MSC_VER -namespace llvm { - -// One of these two variables will be referenced by a symbol defined in -// llvm-config.h. We provide a link-time (or load time for DSO) failure when -// there is a mismatch in the build configuration of the API client and LLVM. -#if LLVM_ENABLE_ABI_BREAKING_CHECKS -int EnableABIBreakingChecks; -#else -int DisableABIBreakingChecks; -#endif - -} // end namespace llvm -#endif diff --git a/lib/Support/FileCheck.cpp b/lib/Support/FileCheck.cpp index e0f17787bdf8..841e406a7b69 100644 --- a/lib/Support/FileCheck.cpp +++ b/lib/Support/FileCheck.cpp @@ -14,31 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileCheck.h" +#include "FileCheckImpl.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/FormatVariadic.h" #include #include -#include #include #include using namespace llvm; -void FileCheckNumericVariable::setValue(uint64_t NewValue) { - assert(!Value && "Overwriting numeric variable's value is not allowed"); - Value = NewValue; -} - -void FileCheckNumericVariable::clearValue() { - if (!Value) - return; - Value = None; -} - Expected FileCheckNumericVariableUse::eval() const { Optional Value = NumericVariable->getValue(); if (Value) return *Value; + return make_error(Name); } @@ -109,7 +100,7 @@ FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) { // StringRef holding all characters considered as horizontal whitespaces by // FileCheck input canonicalization. -StringRef SpaceChars = " \t"; +constexpr StringLiteral SpaceChars = " \t"; // Parsing helper function that strips the first character in S and returns it. static char popFront(StringRef &S) { @@ -159,7 +150,9 @@ FileCheckPattern::parseNumericVariableDefinition( Expected> FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo, - const SourceMgr &SM) const { + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM) { if (IsPseudo && !Name.equals("@LINE")) return FileCheckErrorDiagnostic::get( SM, Name, "invalid pseudo numeric variable '" + Name + "'"); @@ -185,21 +178,25 @@ FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo, if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) return FileCheckErrorDiagnostic::get( SM, Name, - "numeric variable '" + Name + "' defined on the same line as used"); + "numeric variable '" + Name + + "' defined earlier in the same CHECK directive"); - return llvm::make_unique(Name, NumericVariable); + return std::make_unique(Name, NumericVariable); } Expected> FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO, - const SourceMgr &SM) const { + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM) { if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { // Try to parse as a numeric variable use. Expected ParseVarResult = parseVariable(Expr, SM); if (ParseVarResult) return parseNumericVariableUse(ParseVarResult->Name, - ParseVarResult->IsPseudo, SM); + ParseVarResult->IsPseudo, LineNumber, + Context, SM); if (AO == AllowedOperand::LineVar) return ParseVarResult.takeError(); // Ignore the error and retry parsing as a literal. @@ -209,7 +206,7 @@ FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO, // Otherwise, parse it as a literal. uint64_t LiteralValue; if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue)) - return llvm::make_unique(LiteralValue); + return std::make_unique(LiteralValue); return FileCheckErrorDiagnostic::get(SM, Expr, "invalid operand format '" + Expr + "'"); @@ -223,10 +220,10 @@ static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) { return LeftOp - RightOp; } -Expected> -FileCheckPattern::parseBinop(StringRef &Expr, - std::unique_ptr LeftOp, - bool IsLegacyLineExpr, const SourceMgr &SM) const { +Expected> FileCheckPattern::parseBinop( + StringRef &Expr, std::unique_ptr LeftOp, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM) { Expr = Expr.ltrim(SpaceChars); if (Expr.empty()) return std::move(LeftOp); @@ -257,12 +254,12 @@ FileCheckPattern::parseBinop(StringRef &Expr, AllowedOperand AO = IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any; Expected> RightOpResult = - parseNumericOperand(Expr, AO, SM); + parseNumericOperand(Expr, AO, LineNumber, Context, SM); if (!RightOpResult) return RightOpResult; Expr = Expr.ltrim(SpaceChars); - return llvm::make_unique(EvalBinop, std::move(LeftOp), + return std::make_unique(EvalBinop, std::move(LeftOp), std::move(*RightOpResult)); } @@ -270,56 +267,60 @@ Expected> FileCheckPattern::parseNumericSubstitutionBlock( StringRef Expr, Optional &DefinedNumericVariable, - bool IsLegacyLineExpr, const SourceMgr &SM) const { - // Parse the numeric variable definition. + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM) { + std::unique_ptr ExpressionAST = nullptr; + StringRef DefExpr = StringRef(); DefinedNumericVariable = None; + // Save variable definition expression if any. size_t DefEnd = Expr.find(':'); if (DefEnd != StringRef::npos) { - StringRef DefExpr = Expr.substr(0, DefEnd); - StringRef UseExpr = Expr.substr(DefEnd + 1); + DefExpr = Expr.substr(0, DefEnd); + Expr = Expr.substr(DefEnd + 1); + } - UseExpr = UseExpr.ltrim(SpaceChars); - if (!UseExpr.empty()) - return FileCheckErrorDiagnostic::get( - SM, UseExpr, - "unexpected string after variable definition: '" + UseExpr + "'"); + // Parse the expression itself. + Expr = Expr.ltrim(SpaceChars); + if (!Expr.empty()) { + // The first operand in a legacy @LINE expression is always the @LINE + // pseudo variable. + AllowedOperand AO = + IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; + Expected> ParseResult = + parseNumericOperand(Expr, AO, LineNumber, Context, SM); + while (ParseResult && !Expr.empty()) { + ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, + LineNumber, Context, SM); + // Legacy @LINE expressions only allow 2 operands. + if (ParseResult && IsLegacyLineExpr && !Expr.empty()) + return FileCheckErrorDiagnostic::get( + SM, Expr, + "unexpected characters at end of expression '" + Expr + "'"); + } + if (!ParseResult) + return ParseResult; + ExpressionAST = std::move(*ParseResult); + } + // Parse the numeric variable definition. + if (DefEnd != StringRef::npos) { DefExpr = DefExpr.ltrim(SpaceChars); Expected ParseResult = parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM); + if (!ParseResult) return ParseResult.takeError(); DefinedNumericVariable = *ParseResult; - - return nullptr; } - // Parse the expression itself. - Expr = Expr.ltrim(SpaceChars); - // The first operand in a legacy @LINE expression is always the @LINE pseudo - // variable. - AllowedOperand AO = - IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; - Expected> ParseResult = - parseNumericOperand(Expr, AO, SM); - while (ParseResult && !Expr.empty()) { - ParseResult = - parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, SM); - // Legacy @LINE expressions only allow 2 operands. - if (ParseResult && IsLegacyLineExpr && !Expr.empty()) - return FileCheckErrorDiagnostic::get( - SM, Expr, - "unexpected characters at end of expression '" + Expr + "'"); - } - if (!ParseResult) - return ParseResult; - return std::move(*ParseResult); + return std::move(ExpressionAST); } bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, const FileCheckRequest &Req) { bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; + IgnoreCase = Req.IgnoreCase; PatternLoc = SMLoc::getFromPointer(PatternStr.data()); @@ -396,14 +397,15 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, continue; } - // String and numeric substitution blocks. String substitution blocks come + // String and numeric substitution blocks. Pattern substitution blocks come // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some // other regex) and assigns it to the string variable 'foo'. The latter - // substitutes foo's value. Numeric substitution blocks work the same way - // as string ones, but start with a '#' sign after the double brackets. - // Both string and numeric variable names must satisfy the regular - // expression "[a-zA-Z_][0-9a-zA-Z_]*" to be valid, as this helps catch - // some common errors. + // substitutes foo's value. Numeric substitution blocks recognize the same + // form as string ones, but start with a '#' sign after the double + // brackets. They also accept a combined form which sets a numeric variable + // to the evaluation of an expression. Both string and numeric variable + // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be + // valid, as this helps catch some common errors. if (PatternStr.startswith("[[")) { StringRef UnparsedPatternStr = PatternStr.substr(2); // Find the closing bracket pair ending the match. End is going to be an @@ -424,6 +426,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, PatternStr = UnparsedPatternStr.substr(End + 2); bool IsDefinition = false; + bool SubstNeeded = false; // Whether the substitution block is a legacy use of @LINE with string // substitution block syntax. bool IsLegacyLineExpr = false; @@ -454,6 +457,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, bool IsPseudo = ParseVarResult->IsPseudo; IsDefinition = (VarEndIdx != StringRef::npos); + SubstNeeded = !IsDefinition; if (IsDefinition) { if ((IsPseudo || !MatchStr.consume_front(":"))) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()), @@ -488,22 +492,61 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, if (IsNumBlock) { Expected> ParseResult = parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, - IsLegacyLineExpr, SM); + IsLegacyLineExpr, LineNumber, Context, + SM); if (!ParseResult) { logAllUnhandledErrors(ParseResult.takeError(), errs()); return true; } ExpressionAST = std::move(*ParseResult); + SubstNeeded = ExpressionAST != nullptr; if (DefinedNumericVariable) { IsDefinition = true; DefName = (*DefinedNumericVariable)->getName(); - MatchRegexp = StringRef("[0-9]+"); - } else + } + if (SubstNeeded) SubstStr = MatchStr; + else + MatchRegexp = "[0-9]+"; } + // Handle variable definition: [[:(...)]] and [[#(...):(...)]]. + if (IsDefinition) { + RegExStr += '('; + ++SubstInsertIdx; + + if (IsNumBlock) { + FileCheckNumericVariableMatch NumericVariableDefinition = { + *DefinedNumericVariable, CurParen}; + NumericVariableDefs[DefName] = NumericVariableDefinition; + // This store is done here rather than in match() to allow + // parseNumericVariableUse() to get the pointer to the class instance + // of the right variable definition corresponding to a given numeric + // variable use. + Context->GlobalNumericVariableTable[DefName] = + *DefinedNumericVariable; + } else { + VariableDefs[DefName] = CurParen; + // Mark string variable as defined to detect collisions between + // string and numeric variables in parseNumericVariableUse() and + // defineCmdlineVariables() when the latter is created later than the + // former. We cannot reuse GlobalVariableTable for this by populating + // it with an empty string since we would then lose the ability to + // detect the use of an undefined variable in match(). + Context->DefinedVariableTable[DefName] = true; + } + + ++CurParen; + } + + if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) + return true; + + if (IsDefinition) + RegExStr += ')'; + // Handle substitutions: [[foo]] and [[#]]. - if (!IsDefinition) { + if (SubstNeeded) { // Handle substitution of string variables that were defined earlier on // the same line by emitting a backreference. Expressions do not // support substituting a numeric variable defined on the same line. @@ -526,37 +569,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); Substitutions.push_back(Substitution); } - continue; - } - - // Handle variable definitions: [[:(...)]] and - // [[#(...):(...)]]. - if (IsNumBlock) { - FileCheckNumericVariableMatch NumericVariableDefinition = { - *DefinedNumericVariable, CurParen}; - NumericVariableDefs[DefName] = NumericVariableDefinition; - // This store is done here rather than in match() to allow - // parseNumericVariableUse() to get the pointer to the class instance - // of the right variable definition corresponding to a given numeric - // variable use. - Context->GlobalNumericVariableTable[DefName] = *DefinedNumericVariable; - } else { - VariableDefs[DefName] = CurParen; - // Mark the string variable as defined to detect collisions between - // string and numeric variables in parseNumericVariableUse() and - // DefineCmdlineVariables() when the latter is created later than the - // former. We cannot reuse GlobalVariableTable for this by populating - // it with an empty string since we would then lose the ability to - // detect the use of an undefined variable in match(). - Context->DefinedVariableTable[DefName] = true; } - RegExStr += '('; - ++CurParen; - - if (AddRegExToRegEx(MatchRegexp, CurParen, SM)) - return true; - - RegExStr += ')'; } // Handle fixed string matches. @@ -607,7 +620,8 @@ Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, // If this is a fixed string pattern, just match it now. if (!FixedStr.empty()) { MatchLen = FixedStr.size(); - size_t Pos = Buffer.find(FixedStr); + size_t Pos = IgnoreCase ? Buffer.find_lower(FixedStr) + : Buffer.find(FixedStr); if (Pos == StringRef::npos) return make_error(); return Pos; @@ -631,10 +645,8 @@ Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, for (const auto &Substitution : Substitutions) { // Substitute and check for failure (e.g. use of undefined variable). Expected Value = Substitution->getResult(); - if (!Value) { - Context->LineVariable->clearValue(); + if (!Value) return Value.takeError(); - } // Plop it into the regex at the adjusted offset. TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, @@ -644,11 +656,13 @@ Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, // Match the newly constructed regex. RegExToMatch = TmpStr; - Context->LineVariable->clearValue(); } SmallVector MatchInfo; - if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) + unsigned int Flags = Regex::Newline; + if (IgnoreCase) + Flags |= Regex::IgnoreCase; + if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) return make_error(); // Successful regex match. @@ -824,7 +838,7 @@ template FileCheckNumericVariable * FileCheckPatternContext::makeNumericVariable(Types... args) { NumericVariables.push_back( - llvm::make_unique(args...)); + std::make_unique(args...)); return NumericVariables.back().get(); } @@ -832,14 +846,14 @@ FileCheckSubstitution * FileCheckPatternContext::makeStringSubstitution(StringRef VarName, size_t InsertIdx) { Substitutions.push_back( - llvm::make_unique(this, VarName, InsertIdx)); + std::make_unique(this, VarName, InsertIdx)); return Substitutions.back().get(); } FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution( StringRef ExpressionStr, std::unique_ptr ExpressionAST, size_t InsertIdx) { - Substitutions.push_back(llvm::make_unique( + Substitutions.push_back(std::make_unique( this, ExpressionStr, std::move(ExpressionAST), InsertIdx)); return Substitutions.back().get(); } @@ -1108,16 +1122,22 @@ void FileCheckPatternContext::createLineVariable() { GlobalNumericVariableTable[LineName] = LineVariable; } -bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, - std::vector &CheckStrings) { +FileCheck::FileCheck(FileCheckRequest Req) + : Req(Req), PatternContext(std::make_unique()), + CheckStrings(std::make_unique>()) {} + +FileCheck::~FileCheck() = default; + +bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer, + Regex &PrefixRE) { Error DefineError = - PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM); + PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); if (DefineError) { logAllUnhandledErrors(std::move(DefineError), errs()); return true; } - PatternContext.createLineVariable(); + PatternContext->createLineVariable(); std::vector ImplicitNegativeChecks; for (const auto &PatternString : Req.ImplicitCheckNot) { @@ -1133,7 +1153,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); ImplicitNegativeChecks.push_back( - FileCheckPattern(Check::CheckNot, &PatternContext)); + FileCheckPattern(Check::CheckNot, PatternContext.get())); ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, "IMPLICIT-CHECK", SM, Req); } @@ -1196,7 +1216,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); // Parse the pattern. - FileCheckPattern P(CheckTy, &PatternContext, LineNumber); + FileCheckPattern P(CheckTy, PatternContext.get(), LineNumber); if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req)) return true; @@ -1214,7 +1234,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || CheckTy == Check::CheckEmpty) && - CheckStrings.empty()) { + CheckStrings->empty()) { StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; @@ -1232,21 +1252,21 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, } // Okay, add the string we captured to the output vector and move on. - CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); - std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); + CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); + std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); DagNotMatches = ImplicitNegativeChecks; } // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first // prefix as a filler for the error message. if (!DagNotMatches.empty()) { - CheckStrings.emplace_back( - FileCheckPattern(Check::CheckEOF, &PatternContext, LineNumber + 1), + CheckStrings->emplace_back( + FileCheckPattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); - std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); + std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); } - if (CheckStrings.empty()) { + if (CheckStrings->empty()) { errs() << "error: no check strings found with prefix" << (Req.CheckPrefixes.size() > 1 ? "es " : " "); auto I = Req.CheckPrefixes.begin(); @@ -1704,7 +1724,7 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, // A check prefix must contain only alphanumeric, hyphens and underscores. static bool ValidateCheckPrefix(StringRef CheckPrefix) { - Regex Validator("^[a-zA-Z0-9_-]*$"); + static const Regex Validator("^[a-zA-Z0-9_-]*$"); return Validator.match(CheckPrefix); } @@ -1759,11 +1779,32 @@ Error FileCheckPatternContext::defineCmdlineVariables( unsigned I = 0; Error Errs = Error::success(); std::string CmdlineDefsDiag; - StringRef Prefix1 = "Global define #"; - StringRef Prefix2 = ": "; - for (StringRef CmdlineDef : CmdlineDefines) - CmdlineDefsDiag += - (Prefix1 + Twine(++I) + Prefix2 + CmdlineDef + "\n").str(); + SmallVector, 4> CmdlineDefsIndices; + for (StringRef CmdlineDef : CmdlineDefines) { + std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); + size_t EqIdx = CmdlineDef.find('='); + if (EqIdx == StringRef::npos) { + CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); + continue; + } + // Numeric variable definition. + if (CmdlineDef[0] == '#') { + // Append a copy of the command-line definition adapted to use the same + // format as in the input file to be able to reuse + // parseNumericSubstitutionBlock. + CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); + std::string SubstitutionStr = CmdlineDef; + SubstitutionStr[EqIdx] = ':'; + CmdlineDefsIndices.push_back( + std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); + CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); + } else { + CmdlineDefsDiag += DefPrefix; + CmdlineDefsIndices.push_back( + std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); + CmdlineDefsDiag += (CmdlineDef + "\n").str(); + } + } // Create a buffer with fake command line content in order to display // parsing diagnostic with location information and point to the @@ -1773,14 +1814,10 @@ Error FileCheckPatternContext::defineCmdlineVariables( StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); - SmallVector CmdlineDefsDiagVec; - CmdlineDefsDiagRef.split(CmdlineDefsDiagVec, '\n', -1 /*MaxSplit*/, - false /*KeepEmpty*/); - for (StringRef CmdlineDefDiag : CmdlineDefsDiagVec) { - unsigned DefStart = CmdlineDefDiag.find(Prefix2) + Prefix2.size(); - StringRef CmdlineDef = CmdlineDefDiag.substr(DefStart); - size_t EqIdx = CmdlineDef.find('='); - if (EqIdx == StringRef::npos) { + for (std::pair CmdlineDefIndices : CmdlineDefsIndices) { + StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, + CmdlineDefIndices.second); + if (CmdlineDef.empty()) { Errs = joinErrors( std::move(Errs), FileCheckErrorDiagnostic::get( @@ -1790,31 +1827,35 @@ Error FileCheckPatternContext::defineCmdlineVariables( // Numeric variable definition. if (CmdlineDef[0] == '#') { - StringRef CmdlineName = CmdlineDef.substr(1, EqIdx - 1); - Expected ParseResult = - FileCheckPattern::parseNumericVariableDefinition(CmdlineName, this, - None, SM); - if (!ParseResult) { - Errs = joinErrors(std::move(Errs), ParseResult.takeError()); + // Now parse the definition both to check that the syntax is correct and + // to create the necessary class instance. + StringRef CmdlineDefExpr = CmdlineDef.substr(1); + Optional DefinedNumericVariable; + Expected> ExpressionASTResult = + FileCheckPattern::parseNumericSubstitutionBlock( + CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); + if (!ExpressionASTResult) { + Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError()); continue; } - - StringRef CmdlineVal = CmdlineDef.substr(EqIdx + 1); - uint64_t Val; - if (CmdlineVal.getAsInteger(10, Val)) { - Errs = joinErrors(std::move(Errs), - FileCheckErrorDiagnostic::get( - SM, CmdlineVal, - "invalid value in numeric variable definition '" + - CmdlineVal + "'")); + std::unique_ptr ExpressionAST = + std::move(*ExpressionASTResult); + // Now evaluate the expression whose value this variable should be set + // to, since the expression of a command-line variable definition should + // only use variables defined earlier on the command-line. If not, this + // is an error and we report it. + Expected Value = ExpressionAST->eval(); + if (!Value) { + Errs = joinErrors(std::move(Errs), Value.takeError()); continue; } - FileCheckNumericVariable *DefinedNumericVariable = *ParseResult; - DefinedNumericVariable->setValue(Val); + + assert(DefinedNumericVariable && "No variable defined"); + (*DefinedNumericVariable)->setValue(*Value); // Record this variable definition. - GlobalNumericVariableTable[DefinedNumericVariable->getName()] = - DefinedNumericVariable; + GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = + *DefinedNumericVariable; } else { // String variable definition. std::pair CmdlineNameVal = CmdlineDef.split('='); @@ -1851,7 +1892,7 @@ Error FileCheckPatternContext::defineCmdlineVariables( } GlobalVariableTable.insert(CmdlineNameVal); // Mark the string variable as defined to detect collisions between - // string and numeric variables in DefineCmdlineVariables when the latter + // string and numeric variables in defineCmdlineVariables when the latter // is created later than the former. We cannot reuse GlobalVariableTable // for this by populating it with an empty string since we would then // lose the ability to detect the use of an undefined variable in @@ -1887,18 +1928,17 @@ void FileCheckPatternContext::clearLocalVars() { GlobalNumericVariableTable.erase(Var); } -bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer, - ArrayRef CheckStrings, +bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, std::vector *Diags) { bool ChecksFailed = false; - unsigned i = 0, j = 0, e = CheckStrings.size(); + unsigned i = 0, j = 0, e = CheckStrings->size(); while (true) { StringRef CheckRegion; if (j == e) { CheckRegion = Buffer; } else { - const FileCheckString &CheckLabelStr = CheckStrings[j]; + const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { ++j; continue; @@ -1921,10 +1961,10 @@ bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer, // CHECK-LABEL and it would clear variables defined on the command-line // before they get used. if (i != 0 && Req.EnableVarScope) - PatternContext.clearLocalVars(); + PatternContext->clearLocalVars(); for (; i != j; ++i) { - const FileCheckString &CheckStr = CheckStrings[i]; + const FileCheckString &CheckStr = (*CheckStrings)[i]; // Check each string within the scanned region, including a second check // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) diff --git a/lib/Support/FileCheckImpl.h b/lib/Support/FileCheckImpl.h new file mode 100644 index 000000000000..06ce8301cec4 --- /dev/null +++ b/lib/Support/FileCheckImpl.h @@ -0,0 +1,624 @@ +//===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the private interfaces of FileCheck. Its purpose is to +// allow unit testing of FileCheck and to separate the interface from the +// implementation. It is only meant to be used by FileCheck. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_SUPPORT_FILECHECKIMPL_H +#define LLVM_LIB_SUPPORT_FILECHECKIMPL_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/SourceMgr.h" +#include +#include +#include + +namespace llvm { + +//===----------------------------------------------------------------------===// +// Numeric substitution handling code. +//===----------------------------------------------------------------------===// + +/// Base class representing the AST of a given expression. +class FileCheckExpressionAST { +public: + virtual ~FileCheckExpressionAST() = default; + + /// Evaluates and \returns the value of the expression represented by this + /// AST or an error if evaluation fails. + virtual Expected eval() const = 0; +}; + +/// Class representing an unsigned literal in the AST of an expression. +class FileCheckExpressionLiteral : public FileCheckExpressionAST { +private: + /// Actual value of the literal. + uint64_t Value; + +public: + /// Constructs a literal with the specified value. + FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} + + /// \returns the literal's value. + Expected eval() const { return Value; } +}; + +/// Class to represent an undefined variable error, which quotes that +/// variable's name when printed. +class FileCheckUndefVarError : public ErrorInfo { +private: + StringRef VarName; + +public: + static char ID; + + FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} + + StringRef getVarName() const { return VarName; } + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print name of variable associated with this error. + void log(raw_ostream &OS) const override { + OS << "\""; + OS.write_escaped(VarName) << "\""; + } +}; + +/// Class representing a numeric variable and its associated current value. +class FileCheckNumericVariable { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Value of numeric variable, if defined, or None otherwise. + Optional Value; + + /// Line number where this variable is defined, or None if defined before + /// input is parsed. Used to determine whether a variable is defined on the + /// same line as a given use. + Optional DefLineNumber; + +public: + /// Constructor for a variable \p Name defined at line \p DefLineNumber or + /// defined before input is parsed if \p DefLineNumber is None. + explicit FileCheckNumericVariable(StringRef Name, + Optional DefLineNumber = None) + : Name(Name), DefLineNumber(DefLineNumber) {} + + /// \returns name of this numeric variable. + StringRef getName() const { return Name; } + + /// \returns this variable's value. + Optional getValue() const { return Value; } + + /// Sets value of this numeric variable to \p NewValue. + void setValue(uint64_t NewValue) { Value = NewValue; } + + /// Clears value of this numeric variable, regardless of whether it is + /// currently defined or not. + void clearValue() { Value = None; } + + /// \returns the line number where this variable is defined, if any, or None + /// if defined before input is parsed. + Optional getDefLineNumber() { return DefLineNumber; } +}; + +/// Class representing the use of a numeric variable in the AST of an +/// expression. +class FileCheckNumericVariableUse : public FileCheckExpressionAST { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Pointer to the class instance for the variable this use is about. + FileCheckNumericVariable *NumericVariable; + +public: + FileCheckNumericVariableUse(StringRef Name, + FileCheckNumericVariable *NumericVariable) + : Name(Name), NumericVariable(NumericVariable) {} + + /// \returns the value of the variable referenced by this instance. + Expected eval() const; +}; + +/// Type of functions evaluating a given binary operation. +using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); + +/// Class representing a single binary operation in the AST of an expression. +class FileCheckASTBinop : public FileCheckExpressionAST { +private: + /// Left operand. + std::unique_ptr LeftOperand; + + /// Right operand. + std::unique_ptr RightOperand; + + /// Pointer to function that can evaluate this binary operation. + binop_eval_t EvalBinop; + +public: + FileCheckASTBinop(binop_eval_t EvalBinop, + std::unique_ptr LeftOp, + std::unique_ptr RightOp) + : EvalBinop(EvalBinop) { + LeftOperand = std::move(LeftOp); + RightOperand = std::move(RightOp); + } + + /// Evaluates the value of the binary operation represented by this AST, + /// using EvalBinop on the result of recursively evaluating the operands. + /// \returns the expression value or an error if an undefined numeric + /// variable is used in one of the operands. + Expected eval() const; +}; + +class FileCheckPatternContext; + +/// Class representing a substitution to perform in the RegExStr string. +class FileCheckSubstitution { +protected: + /// Pointer to a class instance holding, among other things, the table with + /// the values of live string variables at the start of any given CHECK line. + /// Used for substituting string variables with the text they were defined + /// as. Expressions are linked to the numeric variables they use at + /// parse time and directly access the value of the numeric variable to + /// evaluate their value. + FileCheckPatternContext *Context; + + /// The string that needs to be substituted for something else. For a + /// string variable this is its name, otherwise this is the whole expression. + StringRef FromStr; + + // Index in RegExStr of where to do the substitution. + size_t InsertIdx; + +public: + FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, + size_t InsertIdx) + : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} + + virtual ~FileCheckSubstitution() = default; + + /// \returns the string to be substituted for something else. + StringRef getFromString() const { return FromStr; } + + /// \returns the index where the substitution is to be performed in RegExStr. + size_t getIndex() const { return InsertIdx; } + + /// \returns a string containing the result of the substitution represented + /// by this class instance or an error if substitution failed. + virtual Expected getResult() const = 0; +}; + +class FileCheckStringSubstitution : public FileCheckSubstitution { +public: + FileCheckStringSubstitution(FileCheckPatternContext *Context, + StringRef VarName, size_t InsertIdx) + : FileCheckSubstitution(Context, VarName, InsertIdx) {} + + /// \returns the text that the string variable in this substitution matched + /// when defined, or an error if the variable is undefined. + Expected getResult() const override; +}; + +class FileCheckNumericSubstitution : public FileCheckSubstitution { +private: + /// Pointer to the class representing the expression whose value is to be + /// substituted. + std::unique_ptr ExpressionAST; + +public: + FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, + std::unique_ptr ExprAST, + size_t InsertIdx) + : FileCheckSubstitution(Context, Expr, InsertIdx) { + ExpressionAST = std::move(ExprAST); + } + + /// \returns a string containing the result of evaluating the expression in + /// this substitution, or an error if evaluation failed. + Expected getResult() const override; +}; + +//===----------------------------------------------------------------------===// +// Pattern handling code. +//===----------------------------------------------------------------------===// + +struct FileCheckDiag; + +/// Class holding the FileCheckPattern global state, shared by all patterns: +/// tables holding values of variables and whether they are defined or not at +/// any given time in the matching process. +class FileCheckPatternContext { + friend class FileCheckPattern; + +private: + /// When matching a given pattern, this holds the value of all the string + /// variables defined in previous patterns. In a pattern, only the last + /// definition for a given variable is recorded in this table. + /// Back-references are used for uses after any the other definition. + StringMap GlobalVariableTable; + + /// Map of all string variables defined so far. Used at parse time to detect + /// a name conflict between a numeric variable and a string variable when + /// the former is defined on a later line than the latter. + StringMap DefinedVariableTable; + + /// When matching a given pattern, this holds the pointers to the classes + /// representing the numeric variables defined in previous patterns. When + /// matching a pattern all definitions for that pattern are recorded in the + /// NumericVariableDefs table in the FileCheckPattern instance of that + /// pattern. + StringMap GlobalNumericVariableTable; + + /// Pointer to the class instance representing the @LINE pseudo variable for + /// easily updating its value. + FileCheckNumericVariable *LineVariable = nullptr; + + /// Vector holding pointers to all parsed numeric variables. Used to + /// automatically free them once they are guaranteed to no longer be used. + std::vector> NumericVariables; + + /// Vector holding pointers to all substitutions. Used to automatically free + /// them once they are guaranteed to no longer be used. + std::vector> Substitutions; + +public: + /// \returns the value of string variable \p VarName or an error if no such + /// variable has been defined. + Expected getPatternVarValue(StringRef VarName); + + /// Defines string and numeric variables from definitions given on the + /// command line, passed as a vector of [#]VAR=VAL strings in + /// \p CmdlineDefines. \returns an error list containing diagnostics against + /// \p SM for all definition parsing failures, if any, or Success otherwise. + Error defineCmdlineVariables(std::vector &CmdlineDefines, + SourceMgr &SM); + + /// Create @LINE pseudo variable. Value is set when pattern are being + /// matched. + void createLineVariable(); + + /// Undefines local variables (variables whose name does not start with a '$' + /// sign), i.e. removes them from GlobalVariableTable and from + /// GlobalNumericVariableTable and also clears the value of numeric + /// variables. + void clearLocalVars(); + +private: + /// Makes a new numeric variable and registers it for destruction when the + /// context is destroyed. + template + FileCheckNumericVariable *makeNumericVariable(Types... args); + + /// Makes a new string substitution and registers it for destruction when the + /// context is destroyed. + FileCheckSubstitution *makeStringSubstitution(StringRef VarName, + size_t InsertIdx); + + /// Makes a new numeric substitution and registers it for destruction when + /// the context is destroyed. + FileCheckSubstitution * + makeNumericSubstitution(StringRef ExpressionStr, + std::unique_ptr ExpressionAST, + size_t InsertIdx); +}; + +/// Class to represent an error holding a diagnostic with location information +/// used when printing it. +class FileCheckErrorDiagnostic : public ErrorInfo { +private: + SMDiagnostic Diagnostic; + +public: + static char ID; + + FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } + + static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { + return make_error( + SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); + } + + static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { + return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); + } +}; + +class FileCheckNotFoundError : public ErrorInfo { +public: + static char ID; + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { + OS << "String not found in input"; + } +}; + +class FileCheckPattern { + SMLoc PatternLoc; + + /// A fixed string to match as the pattern or empty if this pattern requires + /// a regex match. + StringRef FixedStr; + + /// A regex string to match as the pattern or empty if this pattern requires + /// a fixed string to match. + std::string RegExStr; + + /// Entries in this vector represent a substitution of a string variable or + /// an expression in the RegExStr regex at match time. For example, in the + /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", + /// RegExStr will contain "foobaz" and we'll get two entries in this vector + /// that tells us to insert the value of string variable "bar" at offset 3 + /// and the value of expression "N+1" at offset 6. + std::vector Substitutions; + + /// Maps names of string variables defined in a pattern to the number of + /// their parenthesis group in RegExStr capturing their last definition. + /// + /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", + /// RegExStr will be "foo(.*)baz(\1(.*))" where is + /// the value captured for QUUX on the earlier line where it was defined, and + /// VariableDefs will map "bar" to the third parenthesis group which captures + /// the second definition of "bar". + /// + /// Note: uses std::map rather than StringMap to be able to get the key when + /// iterating over values. + std::map VariableDefs; + + /// Structure representing the definition of a numeric variable in a pattern. + /// It holds the pointer to the class representing the numeric variable whose + /// value is being defined and the number of the parenthesis group in + /// RegExStr to capture that value. + struct FileCheckNumericVariableMatch { + /// Pointer to class representing the numeric variable whose value is being + /// defined. + FileCheckNumericVariable *DefinedNumericVariable; + + /// Number of the parenthesis group in RegExStr that captures the value of + /// this numeric variable definition. + unsigned CaptureParenGroup; + }; + + /// Holds the number of the parenthesis group in RegExStr and pointer to the + /// corresponding FileCheckNumericVariable class instance of all numeric + /// variable definitions. Used to set the matched value of all those + /// variables. + StringMap NumericVariableDefs; + + /// Pointer to a class instance holding the global state shared by all + /// patterns: + /// - separate tables with the values of live string and numeric variables + /// respectively at the start of any given CHECK line; + /// - table holding whether a string variable has been defined at any given + /// point during the parsing phase. + FileCheckPatternContext *Context; + + Check::FileCheckType CheckTy; + + /// Line number for this CHECK pattern or None if it is an implicit pattern. + /// Used to determine whether a variable definition is made on an earlier + /// line to the one with this CHECK. + Optional LineNumber; + + /// Ignore case while matching if set to true. + bool IgnoreCase = false; + +public: + FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, + Optional Line = None) + : Context(Context), CheckTy(Ty), LineNumber(Line) {} + + /// \returns the location in source code. + SMLoc getLoc() const { return PatternLoc; } + + /// \returns the pointer to the global state for all patterns in this + /// FileCheck instance. + FileCheckPatternContext *getContext() const { return Context; } + + /// \returns whether \p C is a valid first character for a variable name. + static bool isValidVarNameStart(char C); + + /// Parsing information about a variable. + struct VariableProperties { + StringRef Name; + bool IsPseudo; + }; + + /// Parses the string at the start of \p Str for a variable name. \returns + /// a VariableProperties structure holding the variable name and whether it + /// is the name of a pseudo variable, or an error holding a diagnostic + /// against \p SM if parsing fail. If parsing was successful, also strips + /// \p Str from the variable name. + static Expected parseVariable(StringRef &Str, + const SourceMgr &SM); + /// Parses \p Expr for a numeric substitution block at line \p LineNumber, + /// or before input is parsed if \p LineNumber is None. Parameter + /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE + /// expression and \p Context points to the class instance holding the live + /// string and numeric variables. \returns a pointer to the class instance + /// representing the AST of the expression whose value must be substitued, or + /// an error holding a diagnostic against \p SM if parsing fails. If + /// substitution was successful, sets \p DefinedNumericVariable to point to + /// the class representing the numeric variable defined in this numeric + /// substitution block, or None if this block does not define any variable. + static Expected> + parseNumericSubstitutionBlock( + StringRef Expr, + Optional &DefinedNumericVariable, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern + /// instance accordingly. + /// + /// \p Prefix provides which prefix is being matched, \p Req describes the + /// global options that influence the parsing such as whitespace + /// canonicalization, \p SM provides the SourceMgr used for error reports. + /// \returns true in case of an error, false otherwise. + bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, + const FileCheckRequest &Req); + /// Matches the pattern string against the input buffer \p Buffer + /// + /// \returns the position that is matched or an error indicating why matching + /// failed. If there is a match, updates \p MatchLen with the size of the + /// matched string. + /// + /// The GlobalVariableTable StringMap in the FileCheckPatternContext class + /// instance provides the current values of FileCheck string variables and + /// is updated if this match defines new values. Likewise, the + /// GlobalNumericVariableTable StringMap in the same class provides the + /// current values of FileCheck numeric variables and is updated if this + /// match defines new numeric values. + Expected match(StringRef Buffer, size_t &MatchLen, + const SourceMgr &SM) const; + /// Prints the value of successful substitutions or the name of the undefined + /// string or numeric variables preventing a successful substitution. + void printSubstitutions(const SourceMgr &SM, StringRef Buffer, + SMRange MatchRange = None) const; + void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, + std::vector *Diags) const; + + bool hasVariable() const { + return !(Substitutions.empty() && VariableDefs.empty()); + } + + Check::FileCheckType getCheckTy() const { return CheckTy; } + + int getCount() const { return CheckTy.getCount(); } + +private: + bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); + void AddBackrefToRegEx(unsigned BackrefNum); + /// Computes an arbitrary estimate for the quality of matching this pattern + /// at the start of \p Buffer; a distance of zero should correspond to a + /// perfect match. + unsigned computeMatchDistance(StringRef Buffer) const; + /// Finds the closing sequence of a regex variable usage or definition. + /// + /// \p Str has to point in the beginning of the definition (right after the + /// opening sequence). \p SM holds the SourceMgr used for error repporting. + /// \returns the offset of the closing sequence within Str, or npos if it + /// was not found. + size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); + + /// Parses \p Expr for the name of a numeric variable to be defined at line + /// \p LineNumber, or before input is parsed if \p LineNumber is None. + /// \returns a pointer to the class instance representing that variable, + /// creating it if needed, or an error holding a diagnostic against \p SM + /// should defining such a variable be invalid. + static Expected parseNumericVariableDefinition( + StringRef &Expr, FileCheckPatternContext *Context, + Optional LineNumber, const SourceMgr &SM); + /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use + /// at line \p LineNumber, or before input is parsed if \p LineNumber is + /// None. Parameter \p Context points to the class instance holding the live + /// string and numeric variables. \returns the pointer to the class instance + /// representing that variable if successful, or an error holding a + /// diagnostic against \p SM otherwise. + static Expected> + parseNumericVariableUse(StringRef Name, bool IsPseudo, + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM); + enum class AllowedOperand { LineVar, Literal, Any }; + /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or + /// before input is parsed if \p LineNumber is None. Accepts both literal + /// values and numeric variables, depending on the value of \p AO. Parameter + /// \p Context points to the class instance holding the live string and + /// numeric variables. \returns the class representing that operand in the + /// AST of the expression or an error holding a diagnostic against \p SM + /// otherwise. + static Expected> + parseNumericOperand(StringRef &Expr, AllowedOperand AO, + Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses \p Expr for a binary operation at line \p LineNumber, or before + /// input is parsed if \p LineNumber is None. The left operand of this binary + /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether + /// we are parsing a legacy @LINE expression. Parameter \p Context points to + /// the class instance holding the live string and numeric variables. + /// \returns the class representing the binary operation in the AST of the + /// expression, or an error holding a diagnostic against \p SM otherwise. + static Expected> + parseBinop(StringRef &Expr, std::unique_ptr LeftOp, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); +}; + +//===----------------------------------------------------------------------===// +// Check Strings. +//===----------------------------------------------------------------------===// + +/// A check that we found in the input file. +struct FileCheckString { + /// The pattern to match. + FileCheckPattern Pat; + + /// Which prefix name this check matched. + StringRef Prefix; + + /// The location in the match file that the check string was specified. + SMLoc Loc; + + /// All of the strings that are disallowed from occurring between this match + /// string and the previous one (or start of file). + std::vector DagNotStrings; + + FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) + : Pat(P), Prefix(S), Loc(L) {} + + /// Matches check string and its "not strings" and/or "dag strings". + size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, + size_t &MatchLen, FileCheckRequest &Req, + std::vector *Diags) const; + + /// Verifies that there is a single line in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that there is no newline in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that none of the strings in \p NotStrings are found in the given + /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in + /// \p Diags according to the verbosity level set in \p Req. + bool CheckNot(const SourceMgr &SM, StringRef Buffer, + const std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const; + /// Matches "dag strings" and their mixed "not strings". + size_t CheckDag(const SourceMgr &SM, StringRef Buffer, + std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const; +}; + +} // namespace llvm + +#endif diff --git a/lib/Support/FileCollector.cpp b/lib/Support/FileCollector.cpp new file mode 100644 index 000000000000..47fca6413722 --- /dev/null +++ b/lib/Support/FileCollector.cpp @@ -0,0 +1,268 @@ +//===-- FileCollector.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileCollector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" + +using namespace llvm; + +static bool isCaseSensitivePath(StringRef Path) { + SmallString<256> TmpDest = Path, UpperDest, RealDest; + + // Remove component traversals, links, etc. + if (!sys::fs::real_path(Path, TmpDest)) + return true; // Current default value in vfs.yaml + Path = TmpDest; + + // Change path to all upper case and ask for its real path, if the latter + // exists and is equal to path, it's not case sensitive. Default to case + // sensitive in the absence of real_path, since this is the YAMLVFSWriter + // default. + UpperDest = Path.upper(); + if (sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest)) + return false; + return true; +} + +FileCollector::FileCollector(std::string Root, std::string OverlayRoot) + : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) { + sys::fs::create_directories(this->Root, true); +} + +bool FileCollector::getRealPath(StringRef SrcPath, + SmallVectorImpl &Result) { + SmallString<256> RealPath; + StringRef FileName = sys::path::filename(SrcPath); + std::string Directory = sys::path::parent_path(SrcPath).str(); + auto DirWithSymlink = SymlinkMap.find(Directory); + + // Use real_path to fix any symbolic link component present in a path. + // Computing the real path is expensive, cache the search through the parent + // path Directory. + if (DirWithSymlink == SymlinkMap.end()) { + auto EC = sys::fs::real_path(Directory, RealPath); + if (EC) + return false; + SymlinkMap[Directory] = RealPath.str(); + } else { + RealPath = DirWithSymlink->second; + } + + sys::path::append(RealPath, FileName); + Result.swap(RealPath); + return true; +} + +void FileCollector::addFile(const Twine &file) { + std::lock_guard lock(Mutex); + std::string FileStr = file.str(); + if (markAsSeen(FileStr)) + addFileImpl(FileStr); +} + +void FileCollector::addFileImpl(StringRef SrcPath) { + // We need an absolute src path to append to the root. + SmallString<256> AbsoluteSrc = SrcPath; + sys::fs::make_absolute(AbsoluteSrc); + + // Canonicalize src to a native path to avoid mixed separator styles. + sys::path::native(AbsoluteSrc); + + // Remove redundant leading "./" pieces and consecutive separators. + AbsoluteSrc = sys::path::remove_leading_dotslash(AbsoluteSrc); + + // Canonicalize the source path by removing "..", "." components. + SmallString<256> VirtualPath = AbsoluteSrc; + sys::path::remove_dots(VirtualPath, /*remove_dot_dot=*/true); + + // If a ".." component is present after a symlink component, remove_dots may + // lead to the wrong real destination path. Let the source be canonicalized + // like that but make sure we always use the real path for the destination. + SmallString<256> CopyFrom; + if (!getRealPath(AbsoluteSrc, CopyFrom)) + CopyFrom = VirtualPath; + + SmallString<256> DstPath = StringRef(Root); + sys::path::append(DstPath, sys::path::relative_path(CopyFrom)); + + // Always map a canonical src path to its real path into the YAML, by doing + // this we map different virtual src paths to the same entry in the VFS + // overlay, which is a way to emulate symlink inside the VFS; this is also + // needed for correctness, not doing that can lead to module redefinition + // errors. + addFileToMapping(VirtualPath, DstPath); +} + +/// Set the access and modification time for the given file from the given +/// status object. +static std::error_code +copyAccessAndModificationTime(StringRef Filename, + const sys::fs::file_status &Stat) { + int FD; + + if (auto EC = + sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting)) + return EC; + + if (auto EC = sys::fs::setLastAccessAndModificationTime( + FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime())) + return EC; + + if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) + return EC; + + return {}; +} + +std::error_code FileCollector::copyFiles(bool StopOnError) { + for (auto &entry : VFSWriter.getMappings()) { + // Create directory tree. + if (std::error_code EC = + sys::fs::create_directories(sys::path::parent_path(entry.RPath), + /*IgnoreExisting=*/true)) { + if (StopOnError) + return EC; + } + + // Get the status of the original file/directory. + sys::fs::file_status Stat; + if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) { + if (StopOnError) + return EC; + continue; + } + + if (Stat.type() == sys::fs::file_type::directory_file) { + // Construct a directory when it's just a directory entry. + if (std::error_code EC = + sys::fs::create_directories(entry.RPath, + /*IgnoreExisting=*/true)) { + if (StopOnError) + return EC; + } + continue; + } + + // Copy file over. + if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) { + if (StopOnError) + return EC; + } + + // Copy over permissions. + if (auto perms = sys::fs::getPermissions(entry.VPath)) { + if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) { + if (StopOnError) + return EC; + } + } + + // Copy over modification time. + copyAccessAndModificationTime(entry.RPath, Stat); + } + return {}; +} + +std::error_code FileCollector::writeMapping(StringRef mapping_file) { + std::lock_guard lock(Mutex); + + VFSWriter.setOverlayDir(OverlayRoot); + VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot)); + VFSWriter.setUseExternalNames(false); + + std::error_code EC; + raw_fd_ostream os(mapping_file, EC, sys::fs::OF_Text); + if (EC) + return EC; + + VFSWriter.write(os); + + return {}; +} + +namespace { + +class FileCollectorFileSystem : public vfs::FileSystem { +public: + explicit FileCollectorFileSystem(IntrusiveRefCntPtr FS, + std::shared_ptr Collector) + : FS(std::move(FS)), Collector(std::move(Collector)) {} + + llvm::ErrorOr status(const Twine &Path) override { + auto Result = FS->status(Path); + if (Result && Result->exists()) + Collector->addFile(Path); + return Result; + } + + llvm::ErrorOr> + openFileForRead(const Twine &Path) override { + auto Result = FS->openFileForRead(Path); + if (Result && *Result) + Collector->addFile(Path); + return Result; + } + + llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir, + std::error_code &EC) override { + auto It = FS->dir_begin(Dir, EC); + if (EC) + return It; + // Collect everything that's listed in case the user needs it. + Collector->addFile(Dir); + for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) { + if (It->type() == sys::fs::file_type::regular_file || + It->type() == sys::fs::file_type::directory_file || + It->type() == sys::fs::file_type::symlink_file) { + Collector->addFile(It->path()); + } + } + if (EC) + return It; + // Return a new iterator. + return FS->dir_begin(Dir, EC); + } + + std::error_code getRealPath(const Twine &Path, + SmallVectorImpl &Output) const override { + auto EC = FS->getRealPath(Path, Output); + if (!EC) { + Collector->addFile(Path); + if (Output.size() > 0) + Collector->addFile(Output); + } + return EC; + } + + std::error_code isLocal(const Twine &Path, bool &Result) override { + return FS->isLocal(Path, Result); + } + + llvm::ErrorOr getCurrentWorkingDirectory() const override { + return FS->getCurrentWorkingDirectory(); + } + + std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override { + return FS->setCurrentWorkingDirectory(Path); + } + +private: + IntrusiveRefCntPtr FS; + std::shared_ptr Collector; +}; + +} // end anonymous namespace + +IntrusiveRefCntPtr +FileCollector::createCollectorVFS(IntrusiveRefCntPtr BaseFS, + std::shared_ptr Collector) { + return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector)); +} diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index 3d6b569f2993..024dd3e57a40 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -121,7 +121,7 @@ createInMemoryBuffer(StringRef Path, size_t Size, unsigned Mode) { Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); if (EC) return errorCodeToError(EC); - return llvm::make_unique(Path, MB, Size, Mode); + return std::make_unique(Path, MB, Size, Mode); } static Expected> @@ -146,7 +146,7 @@ createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) { // Mmap it. std::error_code EC; - auto MappedFile = llvm::make_unique( + auto MappedFile = std::make_unique( fs::convertFDToNativeFile(File.FD), fs::mapped_file_region::readwrite, Size, 0, EC); @@ -157,7 +157,7 @@ createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) { return createInMemoryBuffer(Path, Size, Mode); } - return llvm::make_unique(Path, std::move(File), + return std::make_unique(Path, std::move(File), std::move(MappedFile)); } diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index 62eb7bfda195..d11fbb54dc0d 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileUtilities.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -264,3 +267,66 @@ int llvm::DiffFilesWithTolerance(StringRef NameA, return CompareFailed; } + +void llvm::AtomicFileWriteError::log(raw_ostream &OS) const { + OS << "atomic_write_error: "; + switch (Error) { + case atomic_write_error::failed_to_create_uniq_file: + OS << "failed_to_create_uniq_file"; + return; + case atomic_write_error::output_stream_error: + OS << "output_stream_error"; + return; + case atomic_write_error::failed_to_rename_temp_file: + OS << "failed_to_rename_temp_file"; + return; + } + llvm_unreachable("unknown atomic_write_error value in " + "failed_to_rename_temp_file::log()"); +} + +llvm::Error llvm::writeFileAtomically(StringRef TempPathModel, + StringRef FinalPath, StringRef Buffer) { + return writeFileAtomically(TempPathModel, FinalPath, + [&Buffer](llvm::raw_ostream &OS) { + OS.write(Buffer.data(), Buffer.size()); + return llvm::Error::success(); + }); +} + +llvm::Error llvm::writeFileAtomically( + StringRef TempPathModel, StringRef FinalPath, + std::function Writer) { + SmallString<128> GeneratedUniqPath; + int TempFD; + if (sys::fs::createUniqueFile(TempPathModel.str(), TempFD, + GeneratedUniqPath)) { + return llvm::make_error( + atomic_write_error::failed_to_create_uniq_file); + } + llvm::FileRemover RemoveTmpFileOnFail(GeneratedUniqPath); + + raw_fd_ostream OS(TempFD, /*shouldClose=*/true); + if (llvm::Error Err = Writer(OS)) { + return Err; + } + + OS.close(); + if (OS.has_error()) { + OS.clear_error(); + return llvm::make_error( + atomic_write_error::output_stream_error); + } + + if (const std::error_code Error = + sys::fs::rename(/*from=*/GeneratedUniqPath.c_str(), + /*to=*/FinalPath.str().c_str())) { + return llvm::make_error( + atomic_write_error::failed_to_rename_temp_file); + } + + RemoveTmpFileOnFail.releaseFile(); + return Error::success(); +} + +char llvm::AtomicFileWriteError::ID; diff --git a/lib/Support/GlobPattern.cpp b/lib/Support/GlobPattern.cpp index 6011be86d77f..8dae6941ec77 100644 --- a/lib/Support/GlobPattern.cpp +++ b/lib/Support/GlobPattern.cpp @@ -19,7 +19,7 @@ using namespace llvm; static bool hasWildcard(StringRef S) { - return S.find_first_of("?*[") != StringRef::npos; + return S.find_first_of("?*[\\") != StringRef::npos; } // Expands character ranges and returns a bitmap. @@ -60,8 +60,9 @@ static Expected expand(StringRef S, StringRef Original) { } // This is a scanner for the glob pattern. -// A glob pattern token is one of "*", "?", "[]", "[^]" -// (which is a negative form of "[]"), or a non-meta character. +// A glob pattern token is one of "*", "?", "\", "[]", "[^]" +// (which is a negative form of "[]"), "[!]" (which is +// equivalent to "[^]"), or a non-meta character. // This function returns the first token in S. static Expected scan(StringRef &S, StringRef Original) { switch (S[0]) { @@ -74,14 +75,16 @@ static Expected scan(StringRef &S, StringRef Original) { S = S.substr(1); return BitVector(256, true); case '[': { - size_t End = S.find(']', 1); + // ']' is allowed as the first character of a character class. '[]' is + // invalid. So, just skip the first character. + size_t End = S.find(']', 2); if (End == StringRef::npos) return make_error("invalid glob pattern: " + Original, errc::invalid_argument); StringRef Chars = S.substr(1, End - 1); S = S.substr(End + 1); - if (Chars.startswith("^")) { + if (Chars.startswith("^") || Chars.startswith("!")) { Expected BV = expand(Chars.substr(1), Original); if (!BV) return BV.takeError(); @@ -89,6 +92,11 @@ static Expected scan(StringRef &S, StringRef Original) { } return expand(Chars, Original); } + case '\\': + // Eat this character and fall through below to treat it like a non-meta + // character. + S = S.substr(1); + LLVM_FALLTHROUGH; default: BitVector BV(256, false); BV[(uint8_t)S[0]] = true; @@ -107,8 +115,9 @@ Expected GlobPattern::create(StringRef S) { return Pat; } - // S is something like "foo*". We can use startswith(). - if (S.endswith("*") && !hasWildcard(S.drop_back())) { + // S is something like "foo*", and the "* is not escaped. We can use + // startswith(). + if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { Pat.Prefix = S.drop_back(); return Pat; } diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index d491912bdc0c..2a473a1994c2 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -316,7 +316,7 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { unsigned int Id; if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { if (Id >= 8561 && HaveVectorSupport) - return "arch13"; + return "z15"; if (Id >= 3906 && HaveVectorSupport) return "z14"; if (Id >= 2964 && HaveVectorSupport) @@ -680,7 +680,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, // Skylake Xeon: case 0x55: *Type = X86::INTEL_COREI7; - if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64))) + if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake" else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake" @@ -746,6 +746,13 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, break; default: // Unknown family 6 CPU, try to guess. + // TODO detect tigerlake host + if (Features3 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 64))) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_TIGERLAKE; + break; + } + if (Features & (1 << X86::FEATURE_AVX512VBMI2)) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; @@ -758,7 +765,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, break; } - if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64))) { + if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_COOPERLAKE; break; @@ -1034,7 +1041,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(X86::FEATURE_BMI); if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) setFeature(X86::FEATURE_AVX2); - if (HasLeaf7 && ((EBX >> 9) & 1)) + if (HasLeaf7 && ((EBX >> 8) & 1)) setFeature(X86::FEATURE_BMI2); if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512F); @@ -1078,6 +1085,13 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(X86::FEATURE_AVX5124VNNIW); if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX5124FMAPS); + if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512VP2INTERSECT); + + bool HasLeaf7Subleaf1 = + MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512BF16); unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -1369,7 +1383,6 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); - Features["mpx"] = HasLeaf7 && ((EBX >> 14) & 1); // AVX512 is only supported if the OS supports the context save for it. Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; @@ -1499,6 +1512,17 @@ bool sys::getHostCPUFeatures(StringMap &Features) { return true; } +#elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) +bool sys::getHostCPUFeatures(StringMap &Features) { + if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + Features["neon"] = true; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) + Features["crc"] = true; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + Features["crypto"] = true; + + return true; +} #else bool sys::getHostCPUFeatures(StringMap &Features) { return false; } #endif diff --git a/lib/Support/JSON.cpp b/lib/Support/JSON.cpp index 95e5ed654277..16b1d11efd08 100644 --- a/lib/Support/JSON.cpp +++ b/lib/Support/JSON.cpp @@ -502,7 +502,7 @@ bool Parser::parseError(const char *Msg) { } } Err.emplace( - llvm::make_unique(Msg, Line, P - StartOfLine, P - Start)); + std::make_unique(Msg, Line, P - StartOfLine, P - Start)); return false; } } // namespace diff --git a/lib/Support/JamCRC.cpp b/lib/Support/JamCRC.cpp deleted file mode 100644 index e043a3c33c28..000000000000 --- a/lib/Support/JamCRC.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//===-- JamCRC.cpp - Cyclic Redundancy Check --------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains an implementation of JamCRC. -// -//===----------------------------------------------------------------------===// -// -// The implementation technique is the one mentioned in: -// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table -// look-up. Commun. ACM 31, 8 (August 1988) -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/JamCRC.h" -#include "llvm/ADT/ArrayRef.h" - -using namespace llvm; - -static const uint32_t CRCTable[256] = { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, - 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, - 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, - 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, - 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, - 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, - 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, - 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, - 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, - 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, - 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, - 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, - 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, - 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, - 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, - 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, - 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, - 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -void JamCRC::update(ArrayRef Data) { - for (char Byte : Data) { - int TableIdx = (CRC ^ Byte) & 0xff; - CRC = CRCTable[TableIdx] ^ (CRC >> 8); - } -} diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index 28ceb1a70e42..053493f72fb5 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -12,21 +12,20 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Config/config.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Threading.h" #include +#include using namespace llvm; static const ManagedStaticBase *StaticList = nullptr; -static sys::Mutex *ManagedStaticMutex = nullptr; +static std::recursive_mutex *ManagedStaticMutex = nullptr; static llvm::once_flag mutex_init_flag; static void initializeMutex() { - ManagedStaticMutex = new sys::Mutex(); + ManagedStaticMutex = new std::recursive_mutex(); } -static sys::Mutex* getManagedStaticMutex() { +static std::recursive_mutex *getManagedStaticMutex() { llvm::call_once(mutex_init_flag, initializeMutex); return ManagedStaticMutex; } @@ -35,7 +34,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), void (*Deleter)(void*)) const { assert(Creator); if (llvm_is_multithreaded()) { - MutexGuard Lock(*getManagedStaticMutex()); + std::lock_guard Lock(*getManagedStaticMutex()); if (!Ptr.load(std::memory_order_relaxed)) { void *Tmp = Creator(); @@ -77,7 +76,7 @@ void ManagedStaticBase::destroy() const { /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. void llvm::llvm_shutdown() { - MutexGuard Lock(*getManagedStaticMutex()); + std::lock_guard Lock(*getManagedStaticMutex()); while (StaticList) StaticList->destroy(); diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index d0e5bb154c1a..e4027ca7bbfd 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -211,15 +211,17 @@ static ErrorOr> getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) { const ssize_t ChunkSize = 4096*4; SmallString Buffer; - size_t ReadBytes; // Read into Buffer until we hit EOF. - do { + for (;;) { Buffer.reserve(Buffer.size() + ChunkSize); - if (auto EC = sys::fs::readNativeFile( - FD, makeMutableArrayRef(Buffer.end(), ChunkSize), &ReadBytes)) - return EC; - Buffer.set_size(Buffer.size() + ReadBytes); - } while (ReadBytes != 0); + Expected ReadBytes = sys::fs::readNativeFile( + FD, makeMutableArrayRef(Buffer.end(), ChunkSize)); + if (!ReadBytes) + return errorToErrorCode(ReadBytes.takeError()); + if (*ReadBytes == 0) + break; + Buffer.set_size(Buffer.size() + *ReadBytes); + } return getMemBufferCopyImpl(Buffer, BufferName); } @@ -458,7 +460,20 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, return make_error_code(errc::not_enough_memory); } - sys::fs::readNativeFileSlice(FD, Buf->getBuffer(), Offset); + // Read until EOF, zero-initialize the rest. + MutableArrayRef ToRead = Buf->getBuffer(); + while (!ToRead.empty()) { + Expected ReadBytes = + sys::fs::readNativeFileSlice(FD, ToRead, Offset); + if (!ReadBytes) + return errorToErrorCode(ReadBytes.takeError()); + if (*ReadBytes == 0) { + std::memset(ToRead.data(), 0, ToRead.size()); + break; + } + ToRead = ToRead.drop_front(*ReadBytes); + Offset += *ReadBytes; + } return std::move(Buf); } diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp deleted file mode 100644 index 69b7b8126ab1..000000000000 --- a/lib/Support/Mutex.cpp +++ /dev/null @@ -1,123 +0,0 @@ -//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the llvm::sys::Mutex class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/Mutex.h" -#include "llvm/Config/config.h" -#include "llvm/Support/ErrorHandling.h" - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only TRULY operating system -//=== independent code. -//===----------------------------------------------------------------------===// - -#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 -// Define all methods as no-ops if threading is explicitly disabled -namespace llvm { -using namespace sys; -MutexImpl::MutexImpl( bool recursive) { } -MutexImpl::~MutexImpl() { } -bool MutexImpl::acquire() { return true; } -bool MutexImpl::release() { return true; } -bool MutexImpl::tryacquire() { return true; } -} -#else - -#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK) - -#include -#include -#include - -namespace llvm { -using namespace sys; - -// Construct a Mutex using pthread calls -MutexImpl::MutexImpl( bool recursive) - : data_(nullptr) -{ - // Declare the pthread_mutex data structures - pthread_mutex_t* mutex = - static_cast(safe_malloc(sizeof(pthread_mutex_t))); - - pthread_mutexattr_t attr; - - // Initialize the mutex attributes - int errorcode = pthread_mutexattr_init(&attr); - assert(errorcode == 0); (void)errorcode; - - // Initialize the mutex as a recursive mutex, if requested, or normal - // otherwise. - int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL ); - errorcode = pthread_mutexattr_settype(&attr, kind); - assert(errorcode == 0); - - // Initialize the mutex - errorcode = pthread_mutex_init(mutex, &attr); - assert(errorcode == 0); - - // Destroy the attributes - errorcode = pthread_mutexattr_destroy(&attr); - assert(errorcode == 0); - - // Assign the data member - data_ = mutex; -} - -// Destruct a Mutex -MutexImpl::~MutexImpl() -{ - pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != nullptr); - pthread_mutex_destroy(mutex); - free(mutex); -} - -bool -MutexImpl::acquire() -{ - pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != nullptr); - - int errorcode = pthread_mutex_lock(mutex); - return errorcode == 0; -} - -bool -MutexImpl::release() -{ - pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != nullptr); - - int errorcode = pthread_mutex_unlock(mutex); - return errorcode == 0; -} - -bool -MutexImpl::tryacquire() -{ - pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != nullptr); - - int errorcode = pthread_mutex_trylock(mutex); - return errorcode == 0; -} - -} - -#elif defined(LLVM_ON_UNIX) -#include "Unix/Mutex.inc" -#elif defined( _WIN32) -#include "Windows/Mutex.inc" -#else -#warning Neither LLVM_ON_UNIX nor _WIN32 was set in Support/Mutex.cpp -#endif -#endif diff --git a/lib/Support/Parallel.cpp b/lib/Support/Parallel.cpp index 621bccbf2a4c..355c64b7d079 100644 --- a/lib/Support/Parallel.cpp +++ b/lib/Support/Parallel.cpp @@ -32,34 +32,6 @@ public: static Executor *getDefaultExecutor(); }; -#if defined(_MSC_VER) -/// An Executor that runs tasks via ConcRT. -class ConcRTExecutor : public Executor { - struct Taskish { - Taskish(std::function Task) : Task(Task) {} - - std::function Task; - - static void run(void *P) { - Taskish *Self = static_cast(P); - Self->Task(); - concurrency::Free(Self); - } - }; - -public: - virtual void add(std::function F) { - Concurrency::CurrentScheduler::ScheduleTask( - Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F)); - } -}; - -Executor *Executor::getDefaultExecutor() { - static ConcRTExecutor exec; - return &exec; -} - -#else /// An implementation of an Executor that runs closures on a thread pool /// in filo order. class ThreadPoolExecutor : public Executor { @@ -117,8 +89,7 @@ Executor *Executor::getDefaultExecutor() { static ThreadPoolExecutor exec; return &exec; } -#endif -} +} // namespace static std::atomic TaskGroupInstances; diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index c49260125dba..14def83802da 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -855,11 +855,11 @@ void make_absolute(const Twine ¤t_directory, StringRef p(path.data(), path.size()); bool rootDirectory = path::has_root_directory(p); - bool rootName = - (real_style(Style::native) != Style::windows) || path::has_root_name(p); + bool rootName = path::has_root_name(p); // Already absolute. - if (rootName && rootDirectory) + if ((rootName || real_style(Style::native) != Style::windows) && + rootDirectory) return; // All of the following conditions will need the current directory. diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index aec00baec0e3..bfb238cc8539 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -121,31 +121,63 @@ extern "C" const char *__crashreporter_info__ asm(".desc ___crashreporter_info__, 0x10"); #endif -/// CrashHandler - This callback is run if a fatal signal is delivered to the -/// process, it prints the pretty stack trace. +static void setCrashLogMessage(const char *msg) LLVM_ATTRIBUTE_UNUSED; +static void setCrashLogMessage(const char *msg) { +#ifdef HAVE_CRASHREPORTERCLIENT_H + (void)CRSetCrashLogMessage(msg); +#elif HAVE_CRASHREPORTER_INFO + __crashreporter_info__ = msg; +#endif + // Don't reorder subsequent operations: whatever comes after might crash and + // we want the system crash handling to see the message we just set. + std::atomic_signal_fence(std::memory_order_seq_cst); +} + +#ifdef __APPLE__ +using CrashHandlerString = SmallString<2048>; +using CrashHandlerStringStorage = + std::aligned_storage::type; +static CrashHandlerStringStorage crashHandlerStringStorage; +#endif + +/// This callback is run if a fatal signal is delivered to the process, it +/// prints the pretty stack trace. static void CrashHandler(void *) { #ifndef __APPLE__ // On non-apple systems, just emit the crash stack trace to stderr. PrintCurStackTrace(errs()); #else - // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also - // put it into __crashreporter_info__. - SmallString<2048> TmpStr; + // Emit the crash stack trace to a SmallString, put it where the system crash + // handling will find it, and also send it to stderr. + // + // The SmallString is fairly large in the hope that we don't allocate (we're + // handling a fatal signal, something is already pretty wrong, allocation + // might not work). Further, we don't use a magic static in case that's also + // borked. We leak any allocation that does occur because the program is about + // to die anyways. This is technically racy if we were handling two fatal + // signals, however if we're in that situation a race is the least of our + // worries. + auto &crashHandlerString = + *new (&crashHandlerStringStorage) CrashHandlerString; + + // If we crash while trying to print the stack trace, we still want the system + // crash handling to have some partial information. That'll work out as long + // as the SmallString doesn't allocate. If it does allocate then the system + // crash handling will see some garbage because the inline buffer now contains + // a pointer. + setCrashLogMessage(crashHandlerString.c_str()); + { - raw_svector_ostream Stream(TmpStr); + raw_svector_ostream Stream(crashHandlerString); PrintCurStackTrace(Stream); } - if (!TmpStr.empty()) { -#ifdef HAVE_CRASHREPORTERCLIENT_H - // Cast to void to avoid warning. - (void)CRSetCrashLogMessage(TmpStr.c_str()); -#elif HAVE_CRASHREPORTER_INFO - __crashreporter_info__ = strdup(TmpStr.c_str()); -#endif - errs() << TmpStr.str(); - } - + if (!crashHandlerString.empty()) { + setCrashLogMessage(crashHandlerString.c_str()); + errs() << crashHandlerString.str(); + } else + setCrashLogMessage("No crash information."); #endif } diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp index 7ce856b716c6..5accf73e5f94 100644 --- a/lib/Support/RWMutex.cpp +++ b/lib/Support/RWMutex.cpp @@ -14,24 +14,20 @@ #include "llvm/Support/RWMutex.h" #include "llvm/Config/config.h" -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only TRULY operating system -//=== independent code. -//===----------------------------------------------------------------------===// +#if defined(LLVM_USE_RW_MUTEX_IMPL) +using namespace llvm; +using namespace sys; #if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 // Define all methods as no-ops if threading is explicitly disabled -using namespace llvm; -using namespace sys; - RWMutexImpl::RWMutexImpl() = default; RWMutexImpl::~RWMutexImpl() = default; -bool RWMutexImpl::reader_acquire() { return true; } -bool RWMutexImpl::reader_release() { return true; } -bool RWMutexImpl::writer_acquire() { return true; } -bool RWMutexImpl::writer_release() { return true; } +bool RWMutexImpl::lock_shared() { return true; } +bool RWMutexImpl::unlock_shared() { return true; } +bool RWMutexImpl::lock() { return true; } +bool RWMutexImpl::unlock() { return true; } #else @@ -41,9 +37,6 @@ bool RWMutexImpl::writer_release() { return true; } #include #include -using namespace llvm; -using namespace sys; - // Construct a RWMutex using pthread calls RWMutexImpl::RWMutexImpl() { @@ -75,7 +68,7 @@ RWMutexImpl::~RWMutexImpl() } bool -RWMutexImpl::reader_acquire() +RWMutexImpl::lock_shared() { pthread_rwlock_t* rwlock = static_cast(data_); assert(rwlock != nullptr); @@ -85,7 +78,7 @@ RWMutexImpl::reader_acquire() } bool -RWMutexImpl::reader_release() +RWMutexImpl::unlock_shared() { pthread_rwlock_t* rwlock = static_cast(data_); assert(rwlock != nullptr); @@ -95,7 +88,7 @@ RWMutexImpl::reader_release() } bool -RWMutexImpl::writer_acquire() +RWMutexImpl::lock() { pthread_rwlock_t* rwlock = static_cast(data_); assert(rwlock != nullptr); @@ -105,7 +98,7 @@ RWMutexImpl::writer_acquire() } bool -RWMutexImpl::writer_release() +RWMutexImpl::unlock() { pthread_rwlock_t* rwlock = static_cast(data_); assert(rwlock != nullptr); @@ -114,11 +107,30 @@ RWMutexImpl::writer_release() return errorcode == 0; } -#elif defined(LLVM_ON_UNIX) -#include "Unix/RWMutex.inc" -#elif defined( _WIN32) -#include "Windows/RWMutex.inc" #else -#warning Neither LLVM_ON_UNIX nor _WIN32 was set in Support/Mutex.cpp + +RWMutexImpl::RWMutexImpl() : data_(new MutexImpl(false)) { } + +RWMutexImpl::~RWMutexImpl() { + delete static_cast(data_); +} + +bool RWMutexImpl::lock_shared() { + return static_cast(data_)->acquire(); +} + +bool RWMutexImpl::unlock_shared() { + return static_cast(data_)->release(); +} + +bool RWMutexImpl::lock() { + return static_cast(data_)->acquire(); +} + +bool RWMutexImpl::unlock() { + return static_cast(data_)->release(); +} + +#endif #endif #endif diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 4c1b07038024..8da345d4f140 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -52,14 +52,24 @@ Regex::~Regex() { } } -bool Regex::isValid(std::string &Error) const { - if (!error) - return true; +namespace { +/// Utility to convert a regex error code into a human-readable string. +void RegexErrorToString(int error, struct llvm_regex *preg, + std::string &Error) { size_t len = llvm_regerror(error, preg, nullptr, 0); Error.resize(len - 1); llvm_regerror(error, preg, &Error[0], len); +} + +} // namespace + +bool Regex::isValid(std::string &Error) const { + if (!error) + return true; + + RegexErrorToString(error, preg, Error); return false; } @@ -69,8 +79,14 @@ unsigned Regex::getNumMatches() const { return preg->re_nsub; } -bool Regex::match(StringRef String, SmallVectorImpl *Matches){ - if (error) +bool Regex::match(StringRef String, SmallVectorImpl *Matches, + std::string *Error) const { + // Reset error, if given. + if (Error && !Error->empty()) + *Error = ""; + + // Check if the regex itself didn't successfully compile. + if (Error ? !isValid(*Error) : !isValid()) return false; unsigned nmatch = Matches ? preg->re_nsub+1 : 0; @@ -83,11 +99,13 @@ bool Regex::match(StringRef String, SmallVectorImpl *Matches){ int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND); + // Failure to match is not an error, it's just a normal return value. + // Any other error code is considered abnormal, and is logged in the Error. if (rc == REG_NOMATCH) return false; if (rc != 0) { - // regexec can fail due to invalid pattern or running out of memory. - error = rc; + if (Error) + RegexErrorToString(error, preg, *Error); return false; } @@ -112,14 +130,11 @@ bool Regex::match(StringRef String, SmallVectorImpl *Matches){ } std::string Regex::sub(StringRef Repl, StringRef String, - std::string *Error) { + std::string *Error) const { SmallVector Matches; - // Reset error, if given. - if (Error && !Error->empty()) *Error = ""; - // Return the input if there was no match. - if (!match(String, &Matches)) + if (!match(String, &Matches, Error)) return String; // Otherwise splice in the replacement string, starting with the prefix before diff --git a/lib/Support/Signposts.cpp b/lib/Support/Signposts.cpp index d456f41d2fa6..aa159e1da2ae 100644 --- a/lib/Support/Signposts.cpp +++ b/lib/Support/Signposts.cpp @@ -78,6 +78,8 @@ public: #if LLVM_SUPPORT_XCODE_SIGNPOSTS #define HAVE_ANY_SIGNPOST_IMPL 1 +#else +#define HAVE_ANY_SIGNPOST_IMPL 0 #endif SignpostEmitter::SignpostEmitter() { diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index 96e09f9552bb..9bd1f18a4ee7 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -53,7 +53,7 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp, return false; RegExes.emplace_back( - std::make_pair(make_unique(std::move(CheckRE)), LineNumber)); + std::make_pair(std::make_unique(std::move(CheckRE)), LineNumber)); return true; } @@ -175,7 +175,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, // Create this section if it has not been seen before. if (SectionsMap.find(Section) == SectionsMap.end()) { - std::unique_ptr M = make_unique(); + std::unique_ptr M = std::make_unique(); std::string REError; if (!M->insert(Section, LineNo, REError)) { Error = (Twine("malformed section ") + Section + ": '" + REError).str(); diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index e4f0535d21aa..8b4177c7fba6 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -57,7 +57,7 @@ namespace { /// This class is also used to look up statistic values from applications that /// use LLVM. class StatisticInfo { - std::vector Stats; + std::vector Stats; friend void llvm::PrintStatistics(); friend void llvm::PrintStatistics(raw_ostream &OS); @@ -66,14 +66,12 @@ class StatisticInfo { /// Sort statistics by debugtype,name,description. void sort(); public: - using const_iterator = std::vector::const_iterator; + using const_iterator = std::vector::const_iterator; StatisticInfo(); ~StatisticInfo(); - void addStatistic(Statistic *S) { - Stats.push_back(S); - } + void addStatistic(TrackingStatistic *S) { Stats.push_back(S); } const_iterator begin() const { return Stats.begin(); } const_iterator end() const { return Stats.end(); } @@ -90,7 +88,7 @@ static ManagedStatic > StatLock; /// RegisterStatistic - The first time a statistic is bumped, this method is /// called. -void Statistic::RegisterStatistic() { +void TrackingStatistic::RegisterStatistic() { // If stats are enabled, inform StatInfo that this statistic should be // printed. // llvm_shutdown calls destructors while holding the ManagedStatic mutex. @@ -135,15 +133,16 @@ bool llvm::AreStatisticsEnabled() { } void StatisticInfo::sort() { - llvm::stable_sort(Stats, [](const Statistic *LHS, const Statistic *RHS) { - if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType())) - return Cmp < 0; + llvm::stable_sort( + Stats, [](const TrackingStatistic *LHS, const TrackingStatistic *RHS) { + if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType())) + return Cmp < 0; - if (int Cmp = std::strcmp(LHS->getName(), RHS->getName())) - return Cmp < 0; + if (int Cmp = std::strcmp(LHS->getName(), RHS->getName())) + return Cmp < 0; - return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; - }); + return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; + }); } void StatisticInfo::reset() { @@ -207,7 +206,7 @@ void llvm::PrintStatisticsJSON(raw_ostream &OS) { // Print all of the statistics. OS << "{\n"; const char *delim = ""; - for (const Statistic *Stat : Stats.Stats) { + for (const TrackingStatistic *Stat : Stats.Stats) { OS << delim; assert(yaml::needsQuotes(Stat->getDebugType()) == yaml::QuotingType::None && "Statistic group/type name is simple."); diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index bf28b2be5657..af8dd463e125 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -60,7 +60,9 @@ void llvm::SplitString(StringRef Source, void llvm::printEscapedString(StringRef Name, raw_ostream &Out) { for (unsigned i = 0, e = Name.size(); i != e; ++i) { unsigned char C = Name[i]; - if (isPrint(C) && C != '\\' && C != '"') + if (C == '\\') + Out << '\\' << C; + else if (isPrint(C) && C != '"') Out << C; else Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); diff --git a/lib/Support/TimeProfiler.cpp b/lib/Support/TimeProfiler.cpp index bc2340815645..ca9119e30b65 100644 --- a/lib/Support/TimeProfiler.cpp +++ b/lib/Support/TimeProfiler.cpp @@ -24,29 +24,38 @@ using namespace std::chrono; namespace llvm { -static cl::opt TimeTraceGranularity( - "time-trace-granularity", - cl::desc( - "Minimum time granularity (in microseconds) traced by time profiler"), - cl::init(500)); - TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; typedef duration DurationType; +typedef time_point TimePointType; typedef std::pair CountAndDurationType; typedef std::pair NameAndCountAndDurationType; struct Entry { - time_point Start; - DurationType Duration; + TimePointType Start; + TimePointType End; std::string Name; std::string Detail; - Entry(time_point &&S, DurationType &&D, std::string &&N, - std::string &&Dt) - : Start(std::move(S)), Duration(std::move(D)), Name(std::move(N)), + Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) + : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Detail(std::move(Dt)){}; + + // Calculate timings for FlameGraph. Cast time points to microsecond precision + // rather than casting duration. This avoid truncation issues causing inner + // scopes overruning outer scopes. + steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const { + return (time_point_cast(Start) - + time_point_cast(StartTime)) + .count(); + } + + steady_clock::rep getFlameGraphDurUs() const { + return (time_point_cast(End) - + time_point_cast(Start)) + .count(); + } }; struct TimeTraceProfiler { @@ -55,17 +64,27 @@ struct TimeTraceProfiler { } void begin(std::string Name, llvm::function_ref Detail) { - Stack.emplace_back(steady_clock::now(), DurationType{}, std::move(Name), + Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), Detail()); } void end() { assert(!Stack.empty() && "Must call begin() first"); auto &E = Stack.back(); - E.Duration = steady_clock::now() - E.Start; + E.End = steady_clock::now(); + + // Check that end times monotonically increase. + assert((Entries.empty() || + (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= + Entries.back().getFlameGraphStartUs(StartTime) + + Entries.back().getFlameGraphDurUs())) && + "TimeProfiler scope ended earlier than previous scope"); - // Only include sections longer than TimeTraceGranularity msec. - if (duration_cast(E.Duration).count() > TimeTraceGranularity) + // Calculate duration at full precision for overall counts. + DurationType Duration = E.End - E.Start; + + // Only include sections longer or equal to TimeTraceGranularity msec. + if (duration_cast(Duration).count() >= TimeTraceGranularity) Entries.emplace_back(E); // Track total time taken by each "name", but only the topmost levels of @@ -78,7 +97,7 @@ struct TimeTraceProfiler { }) == Stack.rend()) { auto &CountAndTotal = CountAndTotalPerName[E.Name]; CountAndTotal.first++; - CountAndTotal.second += E.Duration; + CountAndTotal.second += Duration; } Stack.pop_back(); @@ -94,8 +113,8 @@ struct TimeTraceProfiler { // Emit all events for the main flame graph. for (const auto &E : Entries) { - auto StartUs = duration_cast(E.Start - StartTime).count(); - auto DurUs = duration_cast(E.Duration).count(); + auto StartUs = E.getFlameGraphStartUs(StartTime); + auto DurUs = E.getFlameGraphDurUs(); J.object([&]{ J.attribute("pid", 1); @@ -160,13 +179,17 @@ struct TimeTraceProfiler { SmallVector Stack; SmallVector Entries; StringMap CountAndTotalPerName; - time_point StartTime; + TimePointType StartTime; + + // Minimum time granularity (in microseconds) + unsigned TimeTraceGranularity; }; -void timeTraceProfilerInitialize() { +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity) { assert(TimeTraceProfilerInstance == nullptr && "Profiler should not be initialized"); TimeTraceProfilerInstance = new TimeTraceProfiler(); + TimeTraceProfilerInstance->TimeTraceGranularity = TimeTraceGranularity; } void timeTraceProfilerCleanup() { diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 2a7ff1eaaf63..10c9b8e0b329 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -58,23 +58,23 @@ namespace { std::unique_ptr llvm::CreateInfoOutputFile() { const std::string &OutputFilename = getLibSupportInfoOutputFilename(); if (OutputFilename.empty()) - return llvm::make_unique(2, false); // stderr. + return std::make_unique(2, false); // stderr. if (OutputFilename == "-") - return llvm::make_unique(1, false); // stdout. + return std::make_unique(1, false); // stdout. // Append mode is used because the info output file is opened and closed // each time -stats or -time-passes wants to print output to it. To // compensate for this, the test-suite Makefiles have code to delete the // info output file before running commands which write to it. std::error_code EC; - auto Result = llvm::make_unique( - OutputFilename, EC, sys::fs::F_Append | sys::fs::F_Text); + auto Result = std::make_unique( + OutputFilename, EC, sys::fs::OF_Append | sys::fs::OF_Text); if (!EC) return Result; errs() << "Error opening info-output-file '" << OutputFilename << " for appending!\n"; - return llvm::make_unique(2, false); // stderr. + return std::make_unique(2, false); // stderr. } namespace { diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index a0927da50e48..05f8e32896fa 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -176,7 +176,7 @@ Memory::releaseMappedMemory(MemoryBlock &M) { std::error_code Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { - static const size_t PageSize = Process::getPageSizeEstimate(); + static const Align PageSize = Align(Process::getPageSizeEstimate()); if (M.Address == nullptr || M.AllocatedSize == 0) return std::error_code(); @@ -184,8 +184,8 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { return std::error_code(EINVAL, std::generic_category()); int Protect = getPosixProtectionFlags(Flags); - uintptr_t Start = alignAddr((uint8_t *)M.Address - PageSize + 1, PageSize); - uintptr_t End = alignAddr((uint8_t *)M.Address + M.AllocatedSize, PageSize); + uintptr_t Start = alignAddr((const uint8_t *)M.Address - PageSize.value() + 1, PageSize); + uintptr_t End = alignAddr((const uint8_t *)M.Address + M.AllocatedSize, PageSize); bool InvalidateCache = (Flags & MF_EXEC); diff --git a/lib/Support/Unix/Mutex.inc b/lib/Support/Unix/Mutex.inc deleted file mode 100644 index 2c982b38d6ff..000000000000 --- a/lib/Support/Unix/Mutex.inc +++ /dev/null @@ -1,42 +0,0 @@ -//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the Unix specific (non-pthread) Mutex class. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic UNIX code that -//=== is guaranteed to work on *all* UNIX variants. -//===----------------------------------------------------------------------===// - -namespace llvm -{ -using namespace sys; - -MutexImpl::MutexImpl( bool recursive) -{ -} - -MutexImpl::~MutexImpl() -{ -} - -bool -MutexImpl::release() -{ - return true; -} - -bool -MutexImpl::tryacquire( void ) -{ - return true; -} - -} diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index e80880c6b3cb..a617eca3566a 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -186,12 +186,12 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__minix) || defined(__DragonFly__) || \ defined(__FreeBSD_kernel__) || defined(_AIX) - StringRef curproc("/proc/curproc/file"); + const char *curproc = "/proc/curproc/file"; char exe_path[PATH_MAX]; // /proc is not mounted by default under FreeBSD, but gives more accurate // information than argv[0] when it is. if (sys::fs::exists(curproc)) { - ssize_t len = readlink(curproc.str().c_str(), exe_path, sizeof(exe_path)); + ssize_t len = readlink(curproc, exe_path, sizeof(exe_path)); if (len > 0) { // Null terminate the string for realpath. readlink never null // terminates its output. @@ -205,10 +205,10 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { return exe_path; #elif defined(__linux__) || defined(__CYGWIN__) char exe_path[MAXPATHLEN]; - StringRef aPath("/proc/self/exe"); + const char *aPath = "/proc/self/exe"; if (sys::fs::exists(aPath)) { // /proc is not always mounted under Linux (chroot for example). - ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path)); + ssize_t len = readlink(aPath, exe_path, sizeof(exe_path)); if (len < 0) return ""; @@ -443,7 +443,7 @@ static bool is_local_impl(struct STATVFS &Vfs) { std::unique_ptr Buf; int Tries = 3; while (Tries--) { - Buf = llvm::make_unique(BufSize); + Buf = std::make_unique(BufSize); Ret = mntctl(MCTL_QUERY, BufSize, Buf.get()); if (Ret != 0) break; @@ -833,7 +833,10 @@ std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) { static file_type direntType(dirent* Entry) { // Most platforms provide the file type in the dirent: Linux/BSD/Mac. // The DTTOIF macro lets us reuse our status -> type conversion. -#if defined(_DIRENT_HAVE_D_TYPE) && defined(DTTOIF) + // Note that while glibc provides a macro to see if this is supported, + // _DIRENT_HAVE_D_TYPE, it's not defined on BSD/Mac, so we test for the + // d_type-to-mode_t conversion macro instead. +#if defined(DTTOIF) return typeForMode(DTTOIF(Entry->d_type)); #else // Other platforms such as Solaris require a stat() to get the type. @@ -884,9 +887,9 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags, else if (Access == (FA_Read | FA_Write)) Result |= O_RDWR; - // This is for compatibility with old code that assumed F_Append implied + // This is for compatibility with old code that assumed OF_Append implied // would open an existing file. See Windows/Path.inc for a longer comment. - if (Flags & F_Append) + if (Flags & OF_Append) Disp = CD_OpenAlways; if (Disp == CD_CreateNew) { @@ -901,7 +904,7 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags, // Nothing special, just don't add O_CREAT and we get these semantics. } - if (Flags & F_Append) + if (Flags & OF_Append) Result |= O_APPEND; #ifdef O_CLOEXEC @@ -996,44 +999,28 @@ file_t getStdinHandle() { return 0; } file_t getStdoutHandle() { return 1; } file_t getStderrHandle() { return 2; } -std::error_code readNativeFile(file_t FD, MutableArrayRef Buf, - size_t *BytesRead) { - *BytesRead = sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size()); - if (ssize_t(*BytesRead) == -1) - return std::error_code(errno, std::generic_category()); - return std::error_code(); +Expected readNativeFile(file_t FD, MutableArrayRef Buf) { + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size()); + if (ssize_t(NumRead) == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + return NumRead; } -std::error_code readNativeFileSlice(file_t FD, MutableArrayRef Buf, - size_t Offset) { - char *BufPtr = Buf.data(); - size_t BytesLeft = Buf.size(); - -#ifndef HAVE_PREAD - // If we don't have pread, seek to Offset. - if (lseek(FD, Offset, SEEK_SET) == -1) - return std::error_code(errno, std::generic_category()); -#endif - - while (BytesLeft) { +Expected readNativeFileSlice(file_t FD, MutableArrayRef Buf, + uint64_t Offset) { #ifdef HAVE_PREAD - ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft, - Buf.size() - BytesLeft + Offset); + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::pread, FD, Buf.data(), Buf.size(), Offset); #else - ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft); + if (lseek(FD, Offset, SEEK_SET) == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size()); #endif - if (NumRead == -1) { - // Error while reading. - return std::error_code(errno, std::generic_category()); - } - if (NumRead == 0) { - memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer. - break; - } - BytesLeft -= NumRead; - BufPtr += NumRead; - } - return std::error_code(); + if (NumRead == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + return NumRead; } std::error_code closeFile(file_t &F) { @@ -1200,7 +1187,7 @@ namespace fs { /// implementation. std::error_code copy_file(const Twine &From, const Twine &To) { uint32_t Flag = COPYFILE_DATA; -#if __has_builtin(__builtin_available) +#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE) if (__builtin_available(macos 10.12, *)) { bool IsSymlink; if (std::error_code Error = is_symlink_file(From, IsSymlink)) diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 4115ee396582..dfe81d7e2833 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -15,8 +15,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" +#include #if HAVE_FCNTL_H #include #endif @@ -327,13 +326,13 @@ extern "C" int tigetnum(char *capname); #endif #ifdef HAVE_TERMINFO -static ManagedStatic TermColorMutex; +static ManagedStatic TermColorMutex; #endif static bool terminalHasColors(int fd) { #ifdef HAVE_TERMINFO // First, acquire a global lock because these C routines are thread hostile. - MutexGuard G(*TermColorMutex); + std::lock_guard G(*TermColorMutex); int errret = 0; if (setupterm(nullptr, fd, &errret) != 0) diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index c4123a64046f..520685a0e987 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -136,7 +136,7 @@ static bool RedirectIO_PS(const std::string *Path, int FD, std::string *ErrMsg, if (int Err = posix_spawn_file_actions_addopen( FileActions, FD, File, FD == 0 ? O_RDONLY : O_WRONLY | O_CREAT, 0666)) - return MakeErrMsg(ErrMsg, "Cannot dup2", Err); + return MakeErrMsg(ErrMsg, "Cannot posix_spawn_file_actions_addopen", Err); return false; } #endif @@ -444,7 +444,7 @@ std::error_code llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, WindowsEncodingMethod Encoding /*unused*/) { std::error_code EC; - llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::F_Text); + llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::OF_Text); if (EC) return EC; diff --git a/lib/Support/Unix/RWMutex.inc b/lib/Support/Unix/RWMutex.inc deleted file mode 100644 index 8b47dfa0f85c..000000000000 --- a/lib/Support/Unix/RWMutex.inc +++ /dev/null @@ -1,50 +0,0 @@ -//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the Unix specific (non-pthread) RWMutex class. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic UNIX code that -//=== is guaranteed to work on *all* UNIX variants. -//===----------------------------------------------------------------------===// - -#include "llvm/Support/Mutex.h" - -namespace llvm { - -using namespace sys; - -// This naive implementation treats readers the same as writers. This -// will therefore deadlock if a thread tries to acquire a read lock -// multiple times. - -RWMutexImpl::RWMutexImpl() : data_(new MutexImpl(false)) { } - -RWMutexImpl::~RWMutexImpl() { - delete static_cast(data_); -} - -bool RWMutexImpl::reader_acquire() { - return static_cast(data_)->acquire(); -} - -bool RWMutexImpl::reader_release() { - return static_cast(data_)->release(); -} - -bool RWMutexImpl::writer_acquire() { - return static_cast(data_)->acquire(); -} - -bool RWMutexImpl::writer_release() { - return static_cast(data_)->release(); -} - -} diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 634c16aa36c7..5e0cde4a81ed 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -43,7 +43,6 @@ #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" #include "llvm/Support/SaveAndRestore.h" -#include "llvm/Support/UniqueLock.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -83,12 +82,18 @@ using namespace llvm; static RETSIGTYPE SignalHandler(int Sig); // defined below. static RETSIGTYPE InfoSignalHandler(int Sig); // defined below. +static void DefaultPipeSignalFunction() { + exit(EX_IOERR); +} + using SignalHandlerFunctionType = void (*)(); /// The function to call if ctrl-c is pressed. static std::atomic InterruptFunction = ATOMIC_VAR_INIT(nullptr); static std::atomic InfoSignalFunction = ATOMIC_VAR_INIT(nullptr); +static std::atomic PipeSignalFunction = + ATOMIC_VAR_INIT(DefaultPipeSignalFunction); namespace { /// Signal-safe removal of files. @@ -364,7 +369,8 @@ static RETSIGTYPE SignalHandler(int Sig) { // Send a special return code that drivers can check for, from sysexits.h. if (Sig == SIGPIPE) - exit(EX_IOERR); + if (SignalHandlerFunctionType CurrentPipeFunction = PipeSignalFunction) + CurrentPipeFunction(); raise(Sig); // Execute the default handler. return; @@ -404,6 +410,11 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) { RegisterHandlers(); } +void llvm::sys::SetPipeSignalFunction(void (*Handler)()) { + PipeSignalFunction.exchange(Handler); + RegisterHandlers(); +} + // The public API bool llvm::sys::RemoveFileOnSignal(StringRef Filename, std::string* ErrMsg) { diff --git a/lib/Support/VirtualFileSystem.cpp b/lib/Support/VirtualFileSystem.cpp index 5d3480e97148..c390cb1b2227 100644 --- a/lib/Support/VirtualFileSystem.cpp +++ b/lib/Support/VirtualFileSystem.cpp @@ -176,9 +176,9 @@ class RealFile : public File { Status S; std::string RealName; - RealFile(file_t FD, StringRef NewName, StringRef NewRealPathName) - : FD(FD), S(NewName, {}, {}, {}, {}, {}, - llvm::sys::fs::file_type::status_error, {}), + RealFile(file_t RawFD, StringRef NewName, StringRef NewRealPathName) + : FD(RawFD), S(NewName, {}, {}, {}, {}, {}, + llvm::sys::fs::file_type::status_error, {}), RealName(NewRealPathName.str()) { assert(FD != kInvalidFile && "Invalid or inactive file descriptor"); } @@ -349,7 +349,7 @@ IntrusiveRefCntPtr vfs::getRealFileSystem() { } std::unique_ptr vfs::createPhysicalFileSystem() { - return llvm::make_unique(false); + return std::make_unique(false); } namespace { @@ -754,7 +754,7 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file, NewDirectoryPerms); Dir = cast(Dir->addChild( - Name, llvm::make_unique(std::move(Stat)))); + Name, std::make_unique(std::move(Stat)))); continue; } @@ -989,6 +989,16 @@ std::error_code InMemoryFileSystem::isLocal(const Twine &Path, bool &Result) { // RedirectingFileSystem implementation //===-----------------------------------------------------------------------===/ +RedirectingFileSystem::RedirectingFileSystem(IntrusiveRefCntPtr FS) + : ExternalFS(std::move(FS)) { + if (ExternalFS) + if (auto ExternalWorkingDirectory = + ExternalFS->getCurrentWorkingDirectory()) { + WorkingDirectory = *ExternalWorkingDirectory; + ExternalFSValidWD = true; + } +} + // FIXME: reuse implementation common with OverlayFSDirIterImpl as these // iterators are conceptually similar. class llvm::vfs::VFSFromYamlDirIterImpl @@ -1035,12 +1045,27 @@ public: llvm::ErrorOr RedirectingFileSystem::getCurrentWorkingDirectory() const { - return ExternalFS->getCurrentWorkingDirectory(); + return WorkingDirectory; } std::error_code RedirectingFileSystem::setCurrentWorkingDirectory(const Twine &Path) { - return ExternalFS->setCurrentWorkingDirectory(Path); + // Don't change the working directory if the path doesn't exist. + if (!exists(Path)) + return errc::no_such_file_or_directory; + + // Always change the external FS but ignore its result. + if (ExternalFS) { + auto EC = ExternalFS->setCurrentWorkingDirectory(Path); + ExternalFSValidWD = !static_cast(EC); + } + + SmallString<128> AbsolutePath; + Path.toVector(AbsolutePath); + if (std::error_code EC = makeAbsolute(AbsolutePath)) + return EC; + WorkingDirectory = AbsolutePath.str(); + return {}; } std::error_code RedirectingFileSystem::isLocal(const Twine &Path, @@ -1053,7 +1078,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir, ErrorOr E = lookupPath(Dir); if (!E) { EC = E.getError(); - if (IsFallthrough && EC == errc::no_such_file_or_directory) + if (shouldUseExternalFS() && EC == errc::no_such_file_or_directory) return ExternalFS->dir_begin(Dir, EC); return {}; } @@ -1071,7 +1096,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir, auto *D = cast(*E); return directory_iterator(std::make_shared( Dir, D->contents_begin(), D->contents_end(), - /*IterateExternalFS=*/IsFallthrough, *ExternalFS, EC)); + /*IterateExternalFS=*/shouldUseExternalFS(), *ExternalFS, EC)); } void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) { @@ -1082,20 +1107,19 @@ StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const { return ExternalContentsPrefixDir; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const { +void RedirectingFileSystem::dump(raw_ostream &OS) const { for (const auto &Root : Roots) - dumpEntry(Root.get()); + dumpEntry(OS, Root.get()); } -LLVM_DUMP_METHOD void -RedirectingFileSystem::dumpEntry(RedirectingFileSystem::Entry *E, - int NumSpaces) const { +void RedirectingFileSystem::dumpEntry(raw_ostream &OS, + RedirectingFileSystem::Entry *E, + int NumSpaces) const { StringRef Name = E->getName(); for (int i = 0, e = NumSpaces; i < e; ++i) - dbgs() << " "; - dbgs() << "'" << Name.str().c_str() << "'" - << "\n"; + OS << " "; + OS << "'" << Name.str().c_str() << "'" + << "\n"; if (E->getKind() == RedirectingFileSystem::EK_Directory) { auto *DE = dyn_cast(E); @@ -1103,9 +1127,12 @@ RedirectingFileSystem::dumpEntry(RedirectingFileSystem::Entry *E, for (std::unique_ptr &SubEntry : llvm::make_range(DE->contents_begin(), DE->contents_end())) - dumpEntry(SubEntry.get(), NumSpaces + 2); + dumpEntry(OS, SubEntry.get(), NumSpaces + 2); } } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const { dump(dbgs()); } #endif /// A helper class to hold the common YAML parsing state. @@ -1209,7 +1236,7 @@ class llvm::vfs::RedirectingFileSystemParser { // ... or create a new one std::unique_ptr E = - llvm::make_unique( + std::make_unique( Name, Status("", getNextVirtualUniqueID(), std::chrono::system_clock::now(), 0, 0, 0, file_type::directory_file, sys::fs::all_all)); @@ -1221,7 +1248,7 @@ class llvm::vfs::RedirectingFileSystemParser { } auto *DE = - dyn_cast(ParentEntry); + cast(ParentEntry); DE->addContent(std::move(E)); return DE->getLastContent(); } @@ -1232,9 +1259,7 @@ class llvm::vfs::RedirectingFileSystemParser { StringRef Name = SrcE->getName(); switch (SrcE->getKind()) { case RedirectingFileSystem::EK_Directory: { - auto *DE = - dyn_cast(SrcE); - assert(DE && "Must be a directory"); + auto *DE = cast(SrcE); // Empty directories could be present in the YAML as a way to // describe a file for a current directory after some of its subdir // is parsed. This only leads to redundant walks, ignore it. @@ -1246,13 +1271,12 @@ class llvm::vfs::RedirectingFileSystemParser { break; } case RedirectingFileSystem::EK_File: { - auto *FE = dyn_cast(SrcE); - assert(FE && "Must be a file"); assert(NewParentE && "Parent entry must exist"); - auto *DE = dyn_cast( - NewParentE); + auto *FE = cast(SrcE); + auto *DE = + cast(NewParentE); DE->addContent( - llvm::make_unique( + std::make_unique( Name, FE->getExternalContentsPath(), FE->getUseName())); break; } @@ -1423,12 +1447,12 @@ class llvm::vfs::RedirectingFileSystemParser { std::unique_ptr Result; switch (Kind) { case RedirectingFileSystem::EK_File: - Result = llvm::make_unique( + Result = std::make_unique( LastComponent, std::move(ExternalContentsPath), UseExternalName); break; case RedirectingFileSystem::EK_Directory: Result = - llvm::make_unique( + std::make_unique( LastComponent, std::move(EntryArrayContents), Status("", getNextVirtualUniqueID(), std::chrono::system_clock::now(), 0, 0, 0, @@ -1447,7 +1471,7 @@ class llvm::vfs::RedirectingFileSystemParser { std::vector> Entries; Entries.push_back(std::move(Result)); Result = - llvm::make_unique( + std::make_unique( *I, std::move(Entries), Status("", getNextVirtualUniqueID(), std::chrono::system_clock::now(), 0, 0, 0, @@ -1573,7 +1597,7 @@ RedirectingFileSystem::create(std::unique_ptr Buffer, RedirectingFileSystemParser P(Stream); std::unique_ptr FS( - new RedirectingFileSystem(std::move(ExternalFS))); + new RedirectingFileSystem(ExternalFS)); if (!YAMLFilePath.empty()) { // Use the YAML path from -ivfsoverlay to compute the dir to be prefixed @@ -1702,7 +1726,7 @@ ErrorOr RedirectingFileSystem::status(const Twine &Path, ErrorOr RedirectingFileSystem::status(const Twine &Path) { ErrorOr Result = lookupPath(Path); if (!Result) { - if (IsFallthrough && + if (shouldUseExternalFS() && Result.getError() == llvm::errc::no_such_file_or_directory) { return ExternalFS->status(Path); } @@ -1740,7 +1764,7 @@ ErrorOr> RedirectingFileSystem::openFileForRead(const Twine &Path) { ErrorOr E = lookupPath(Path); if (!E) { - if (IsFallthrough && + if (shouldUseExternalFS() && E.getError() == llvm::errc::no_such_file_or_directory) { return ExternalFS->openFileForRead(Path); } @@ -1763,7 +1787,7 @@ RedirectingFileSystem::openFileForRead(const Twine &Path) { Status S = getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames), *ExternalStatus); return std::unique_ptr( - llvm::make_unique(std::move(*Result), S)); + std::make_unique(std::move(*Result), S)); } std::error_code @@ -1771,7 +1795,7 @@ RedirectingFileSystem::getRealPath(const Twine &Path, SmallVectorImpl &Output) const { ErrorOr Result = lookupPath(Path); if (!Result) { - if (IsFallthrough && + if (shouldUseExternalFS() && Result.getError() == llvm::errc::no_such_file_or_directory) { return ExternalFS->getRealPath(Path, Output); } @@ -1784,8 +1808,8 @@ RedirectingFileSystem::getRealPath(const Twine &Path, } // Even if there is a directory entry, fall back to ExternalFS if allowed, // because directories don't have a single external contents path. - return IsFallthrough ? ExternalFS->getRealPath(Path, Output) - : llvm::errc::invalid_argument; + return shouldUseExternalFS() ? ExternalFS->getRealPath(Path, Output) + : llvm::errc::invalid_argument; } IntrusiveRefCntPtr diff --git a/lib/Support/Windows/Mutex.inc b/lib/Support/Windows/Mutex.inc deleted file mode 100644 index b55b14febf2c..000000000000 --- a/lib/Support/Windows/Mutex.inc +++ /dev/null @@ -1,56 +0,0 @@ -//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the Win32 specific (non-pthread) Mutex class. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic Win32 code that -//=== is guaranteed to work on *all* Win32 variants. -//===----------------------------------------------------------------------===// - -#include "WindowsSupport.h" -#include "llvm/Support/Mutex.h" - -namespace llvm { - -sys::MutexImpl::MutexImpl(bool /*recursive*/) -{ - data_ = new CRITICAL_SECTION; - InitializeCriticalSection((LPCRITICAL_SECTION)data_); -} - -sys::MutexImpl::~MutexImpl() -{ - DeleteCriticalSection((LPCRITICAL_SECTION)data_); - delete (LPCRITICAL_SECTION)data_; - data_ = 0; -} - -bool -sys::MutexImpl::acquire() -{ - EnterCriticalSection((LPCRITICAL_SECTION)data_); - return true; -} - -bool -sys::MutexImpl::release() -{ - LeaveCriticalSection((LPCRITICAL_SECTION)data_); - return true; -} - -bool -sys::MutexImpl::tryacquire() -{ - return TryEnterCriticalSection((LPCRITICAL_SECTION)data_); -} - -} diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 5704930aeecc..c3b13abef5de 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -371,13 +371,19 @@ static std::error_code realPathFromHandle(HANDLE H, if (std::error_code EC = realPathFromHandle(H, Buffer)) return EC; - const wchar_t *Data = Buffer.data(); + // Strip the \\?\ prefix. We don't want it ending up in output, and such + // paths don't get canonicalized by file APIs. + wchar_t *Data = Buffer.data(); DWORD CountChars = Buffer.size(); - if (CountChars >= 4) { - if (0 == ::memcmp(Data, L"\\\\?\\", 8)) { - CountChars -= 4; - Data += 4; - } + if (CountChars >= 8 && ::memcmp(Data, L"\\\\?\\UNC\\", 16) == 0) { + // Convert \\?\UNC\foo\bar to \\foo\bar + CountChars -= 6; + Data += 6; + Data[0] = '\\'; + } else if (CountChars >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) { + // Convert \\?\c:\foo to c:\foo + CountChars -= 4; + Data += 4; } // Convert the result from UTF-16 to UTF-8. @@ -1217,57 +1223,34 @@ file_t getStdinHandle() { return ::GetStdHandle(STD_INPUT_HANDLE); } file_t getStdoutHandle() { return ::GetStdHandle(STD_OUTPUT_HANDLE); } file_t getStderrHandle() { return ::GetStdHandle(STD_ERROR_HANDLE); } -std::error_code readNativeFileImpl(file_t FileHandle, char *BufPtr, size_t BytesToRead, - size_t *BytesRead, OVERLAPPED *Overlap) { +Expected readNativeFileImpl(file_t FileHandle, + MutableArrayRef Buf, + OVERLAPPED *Overlap) { // ReadFile can only read 2GB at a time. The caller should check the number of // bytes and read in a loop until termination. - DWORD BytesToRead32 = - std::min(size_t(std::numeric_limits::max()), BytesToRead); - DWORD BytesRead32 = 0; - bool Success = - ::ReadFile(FileHandle, BufPtr, BytesToRead32, &BytesRead32, Overlap); - *BytesRead = BytesRead32; - if (!Success) { - DWORD Err = ::GetLastError(); - // Pipe EOF is not an error. - if (Err == ERROR_BROKEN_PIPE) - return std::error_code(); - return mapWindowsError(Err); - } - return std::error_code(); -} - -std::error_code readNativeFile(file_t FileHandle, MutableArrayRef Buf, - size_t *BytesRead) { - return readNativeFileImpl(FileHandle, Buf.data(), Buf.size(), BytesRead, - /*Overlap=*/nullptr); -} - -std::error_code readNativeFileSlice(file_t FileHandle, - MutableArrayRef Buf, size_t Offset) { - char *BufPtr = Buf.data(); - size_t BytesLeft = Buf.size(); - - while (BytesLeft) { - uint64_t CurOff = Buf.size() - BytesLeft + Offset; - OVERLAPPED Overlapped = {}; - Overlapped.Offset = uint32_t(CurOff); - Overlapped.OffsetHigh = uint32_t(uint64_t(CurOff) >> 32); - - size_t BytesRead = 0; - if (auto EC = readNativeFileImpl(FileHandle, BufPtr, BytesLeft, &BytesRead, - &Overlapped)) - return EC; - - // Once we reach EOF, zero the remaining bytes in the buffer. - if (BytesRead == 0) { - memset(BufPtr, 0, BytesLeft); - break; - } - BytesLeft -= BytesRead; - BufPtr += BytesRead; - } - return std::error_code(); + DWORD BytesToRead = + std::min(size_t(std::numeric_limits::max()), Buf.size()); + DWORD BytesRead = 0; + if (::ReadFile(FileHandle, Buf.data(), BytesToRead, &BytesRead, Overlap)) + return BytesRead; + DWORD Err = ::GetLastError(); + // EOF is not an error. + if (Err == ERROR_BROKEN_PIPE || Err == ERROR_HANDLE_EOF) + return BytesRead; + return errorCodeToError(mapWindowsError(Err)); +} + +Expected readNativeFile(file_t FileHandle, MutableArrayRef Buf) { + return readNativeFileImpl(FileHandle, Buf, /*Overlap=*/nullptr); +} + +Expected readNativeFileSlice(file_t FileHandle, + MutableArrayRef Buf, + uint64_t Offset) { + OVERLAPPED Overlapped = {}; + Overlapped.Offset = uint32_t(Offset); + Overlapped.OffsetHigh = uint32_t(Offset >> 32); + return readNativeFileImpl(FileHandle, Buf, &Overlapped); } std::error_code closeFile(file_t &F) { diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index 0f54e59ee55b..a23ed95fc390 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -470,7 +470,7 @@ std::error_code llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, WindowsEncodingMethod Encoding) { std::error_code EC; - llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::F_Text); + llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OF_Text); if (EC) return EC; diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc deleted file mode 100644 index 8df9bc394160..000000000000 --- a/lib/Support/Windows/RWMutex.inc +++ /dev/null @@ -1,128 +0,0 @@ -//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the Win32 specific (non-pthread) RWMutex class. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic Win32 code that -//=== is guaranteed to work on *all* Win32 variants. -//===----------------------------------------------------------------------===// - -#include "WindowsSupport.h" - -namespace llvm { - -// Windows has slim read-writer lock support on Vista and higher, so we -// will attempt to load the APIs. If they exist, we will use them, and -// if not, we will fall back on critical sections. When we drop support -// for XP, we can stop lazy-loading these APIs and just use them directly. -#if defined(__MINGW32__) - // Taken from WinNT.h - typedef struct _RTL_SRWLOCK { - PVOID Ptr; - } RTL_SRWLOCK, *PRTL_SRWLOCK; - - // Taken from WinBase.h - typedef RTL_SRWLOCK SRWLOCK, *PSRWLOCK; -#endif - -static VOID (WINAPI *fpInitializeSRWLock)(PSRWLOCK lock) = NULL; -static VOID (WINAPI *fpAcquireSRWLockExclusive)(PSRWLOCK lock) = NULL; -static VOID (WINAPI *fpAcquireSRWLockShared)(PSRWLOCK lock) = NULL; -static VOID (WINAPI *fpReleaseSRWLockExclusive)(PSRWLOCK lock) = NULL; -static VOID (WINAPI *fpReleaseSRWLockShared)(PSRWLOCK lock) = NULL; - -static bool sHasSRW = false; - -static bool loadSRW() { - static bool sChecked = false; - if (!sChecked) { - sChecked = true; - - if (HMODULE hLib = ::GetModuleHandleW(L"Kernel32.dll")) { - fpInitializeSRWLock = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "InitializeSRWLock"); - fpAcquireSRWLockExclusive = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "AcquireSRWLockExclusive"); - fpAcquireSRWLockShared = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "AcquireSRWLockShared"); - fpReleaseSRWLockExclusive = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "ReleaseSRWLockExclusive"); - fpReleaseSRWLockShared = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "ReleaseSRWLockShared"); - - if (fpInitializeSRWLock != NULL) { - sHasSRW = true; - } - } - } - return sHasSRW; -} - -sys::RWMutexImpl::RWMutexImpl() { - if (loadSRW()) { - data_ = safe_calloc(1, sizeof(SRWLOCK)); - fpInitializeSRWLock(static_cast(data_)); - } else { - data_ = safe_calloc(1, sizeof(CRITICAL_SECTION)); - InitializeCriticalSection(static_cast(data_)); - } -} - -sys::RWMutexImpl::~RWMutexImpl() { - if (!sHasSRW) - DeleteCriticalSection(static_cast(data_)); - // Nothing to do in the case of slim reader/writers except free the memory. - free(data_); -} - -bool sys::RWMutexImpl::reader_acquire() { - if (sHasSRW) { - fpAcquireSRWLockShared(static_cast(data_)); - } else { - EnterCriticalSection(static_cast(data_)); - } - return true; -} - -bool sys::RWMutexImpl::reader_release() { - if (sHasSRW) { - fpReleaseSRWLockShared(static_cast(data_)); - } else { - LeaveCriticalSection(static_cast(data_)); - } - return true; -} - -bool sys::RWMutexImpl::writer_acquire() { - if (sHasSRW) { - fpAcquireSRWLockExclusive(static_cast(data_)); - } else { - EnterCriticalSection(static_cast(data_)); - } - return true; -} - -bool sys::RWMutexImpl::writer_release() { - if (sHasSRW) { - fpReleaseSRWLockExclusive(static_cast(data_)); - } else { - LeaveCriticalSection(static_cast(data_)); - } - return true; -} - - -} diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index 6a820ef22b1e..d962daf79348 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -560,6 +560,9 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) { // Unimplemented. } +void llvm::sys::SetPipeSignalFunction(void (*Handler)()) { + // Unimplemented. +} /// Add a function to be called when a signal is delivered to the process. The /// handler can have a cookie passed to it to identify what instance of the diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h index fed9b2f462ef..2e2e97430b76 100644 --- a/lib/Support/Windows/WindowsSupport.h +++ b/lib/Support/Windows/WindowsSupport.h @@ -38,6 +38,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" // Get build system configuration settings +#include "llvm/Support/Allocator.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/VersionTuple.h" diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc index bbbf7ea6a777..0a4fda1d4e8c 100644 --- a/lib/Support/Windows/explicit_symbols.inc +++ b/lib/Support/Windows/explicit_symbols.inc @@ -90,12 +90,6 @@ INLINE_DEF_FLOAT_SYMBOL(tanf, 1) INLINE_DEF_FLOAT_SYMBOL(tanhf, 1) - // These were added in VS 2013. -#if (1800 <= _MSC_VER && _MSC_VER < 1900) - INLINE_DEF_FLOAT_SYMBOL(copysignf, 2) - INLINE_DEF_FLOAT_SYMBOL(fminf, 2) - INLINE_DEF_FLOAT_SYMBOL(fmaxf, 2) -#endif #undef INLINE_DEF_FLOAT_SYMBOL #endif diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index 09eb36943de9..eba22fd14725 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -40,7 +40,7 @@ IO::IO(void *Context) : Ctxt(Context) {} IO::~IO() = default; -void *IO::getContext() { +void *IO::getContext() const { return Ctxt; } @@ -79,7 +79,7 @@ void Input::ScalarHNode::anchor() {} void Input::MapHNode::anchor() {} void Input::SequenceHNode::anchor() {} -bool Input::outputting() { +bool Input::outputting() const { return false; } @@ -377,12 +377,12 @@ std::unique_ptr Input::createHNodes(Node *N) { // Copy string to permanent storage KeyStr = StringStorage.str().copy(StringAllocator); } - return llvm::make_unique(N, KeyStr); + return std::make_unique(N, KeyStr); } else if (BlockScalarNode *BSN = dyn_cast(N)) { StringRef ValueCopy = BSN->getValue().copy(StringAllocator); - return llvm::make_unique(N, ValueCopy); + return std::make_unique(N, ValueCopy); } else if (SequenceNode *SQ = dyn_cast(N)) { - auto SQHNode = llvm::make_unique(N); + auto SQHNode = std::make_unique(N); for (Node &SN : *SQ) { auto Entry = createHNodes(&SN); if (EC) @@ -391,7 +391,7 @@ std::unique_ptr Input::createHNodes(Node *N) { } return std::move(SQHNode); } else if (MappingNode *Map = dyn_cast(N)) { - auto mapHNode = llvm::make_unique(N); + auto mapHNode = std::make_unique(N); for (KeyValueNode &KVN : *Map) { Node *KeyNode = KVN.getKey(); ScalarNode *Key = dyn_cast(KeyNode); @@ -416,7 +416,7 @@ std::unique_ptr Input::createHNodes(Node *N) { } return std::move(mapHNode); } else if (isa(N)) { - return llvm::make_unique(N); + return std::make_unique(N); } else { setError(N, "unknown node kind"); return nullptr; @@ -440,7 +440,7 @@ Output::Output(raw_ostream &yout, void *context, int WrapColumn) Output::~Output() = default; -bool Output::outputting() { +bool Output::outputting() const { return true; } diff --git a/lib/Support/Z3Solver.cpp b/lib/Support/Z3Solver.cpp index f1a6fdf87cf2..a83d0f441a4b 100644 --- a/lib/Support/Z3Solver.cpp +++ b/lib/Support/Z3Solver.cpp @@ -886,7 +886,7 @@ public: llvm::SMTSolverRef llvm::CreateZ3Solver() { #if LLVM_WITH_Z3 - return llvm::make_unique(); + return std::make_unique(); #else llvm::report_fatal_error("LLVM was not compiled with Z3 support, rebuild " "with -DLLVM_ENABLE_Z3_SOLVER=ON", diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 2baccaa0cbd7..b9989371f5ea 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -65,6 +65,17 @@ using namespace llvm; +const raw_ostream::Colors raw_ostream::BLACK; +const raw_ostream::Colors raw_ostream::RED; +const raw_ostream::Colors raw_ostream::GREEN; +const raw_ostream::Colors raw_ostream::YELLOW; +const raw_ostream::Colors raw_ostream::BLUE; +const raw_ostream::Colors raw_ostream::MAGENTA; +const raw_ostream::Colors raw_ostream::CYAN; +const raw_ostream::Colors raw_ostream::WHITE; +const raw_ostream::Colors raw_ostream::SAVEDCOLOR; +const raw_ostream::Colors raw_ostream::RESET; + raw_ostream::~raw_ostream() { // raw_ostream's subclasses should take care to flush the buffer // in their destructors. @@ -133,6 +144,14 @@ raw_ostream &raw_ostream::write_hex(unsigned long long N) { return *this; } +raw_ostream &raw_ostream::operator<<(Colors C) { + if (C == Colors::RESET) + resetColor(); + else + changeColor(C); + return *this; +} + raw_ostream &raw_ostream::write_uuid(const uuid_t UUID) { for (int Idx = 0; Idx < 16; ++Idx) { *this << format("%02" PRIX32, UUID[Idx]); @@ -784,11 +803,15 @@ size_t raw_fd_ostream::preferred_buffer_size() const { raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, bool bg) { + if (!ColorEnabled) + return *this; + if (sys::Process::ColorNeedsFlush()) flush(); const char *colorcode = - (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg) - : sys::Process::OutputColor(colors, bold, bg); + (colors == SAVEDCOLOR) + ? sys::Process::OutputBold(bg) + : sys::Process::OutputColor(static_cast(colors), bold, bg); if (colorcode) { size_t len = strlen(colorcode); write(colorcode, len); @@ -799,6 +822,9 @@ raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, } raw_ostream &raw_fd_ostream::resetColor() { + if (!ColorEnabled) + return *this; + if (sys::Process::ColorNeedsFlush()) flush(); const char *colorcode = sys::Process::ResetColor(); @@ -812,6 +838,9 @@ raw_ostream &raw_fd_ostream::resetColor() { } raw_ostream &raw_fd_ostream::reverseColor() { + if (!ColorEnabled) + return *this; + if (sys::Process::ColorNeedsFlush()) flush(); const char *colorcode = sys::Process::OutputReverse(); @@ -843,7 +872,7 @@ void raw_fd_ostream::anchor() {} raw_ostream &llvm::outs() { // Set buffer settings to model stdout behavior. std::error_code EC; - static raw_fd_ostream S("-", EC, sys::fs::F_None); + static raw_fd_ostream S("-", EC, sys::fs::OF_None); assert(!EC); return S; } diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c index 12669ab75d1a..ee2a1d87a267 100644 --- a/lib/Support/regcomp.c +++ b/lib/Support/regcomp.c @@ -48,6 +48,7 @@ #include "regex2.h" #include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" /* character-class table */ static struct cclass { @@ -537,7 +538,7 @@ p_ere_exp(struct parse *p) break; case '{': /* okay as ordinary except if digit follows */ REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); - /* FALLTHROUGH */ + LLVM_FALLTHROUGH; default: ordinary(p, c); break; @@ -733,7 +734,7 @@ p_simp_re(struct parse *p, break; case '*': REQUIRE(starordinary, REG_BADRPT); - /* FALLTHROUGH */ + LLVM_FALLTHROUGH; default: ordinary(p, (char)c); break; @@ -1635,7 +1636,7 @@ findmust(struct parse *p, struct re_guts *g) return; } } while (OP(s) != O_QUEST && OP(s) != O_CH); - /* fallthrough */ + LLVM_FALLTHROUGH; default: /* things that break a sequence */ if (newlen > g->mlen) { /* ends one */ start = newstart; diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index 7523b32ca0e5..54b063cb4f8d 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -39,6 +39,8 @@ static void PrintMessage(ArrayRef Loc, SourceMgr::DiagKind Kind, "instantiated from multiclass"); } +void PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; } + void PrintNote(ArrayRef NoteLoc, const Twine &Msg) { PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg); } diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index bcd39584e450..48ded6c45a46 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -49,6 +49,9 @@ static cl::list MacroNames("D", cl::desc("Name of the macro to be defined"), cl::value_desc("macro name"), cl::Prefix); +static cl::opt +WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); + static int reportError(const char *ProgName, Twine Msg) { errs() << ProgName << ": " << Msg; errs().flush(); @@ -64,7 +67,7 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) { return reportError(argv0, "the option -d must be used together with -o\n"); std::error_code EC; - ToolOutputFile DepOut(DependFilename, EC, sys::fs::F_Text); + ToolOutputFile DepOut(DependFilename, EC, sys::fs::OF_None); if (EC) return reportError(argv0, "error opening " + DependFilename + ":" + EC.message() + "\n"); @@ -114,15 +117,17 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) { return Ret; } - // Only updates the real output file if there are any differences. - // This prevents recompilation of all the files depending on it if there - // aren't any. - if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename)) - if (std::move(ExistingOrErr.get())->getBuffer() == Out.str()) - return 0; + if (WriteIfChanged) { + // Only updates the real output file if there are any differences. + // This prevents recompilation of all the files depending on it if there + // aren't any. + if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename)) + if (std::move(ExistingOrErr.get())->getBuffer() == Out.str()) + return 0; + } std::error_code EC; - ToolOutputFile OutFile(OutputFilename, EC, sys::fs::F_Text); + ToolOutputFile OutFile(OutputFilename, EC, sys::fs::OF_None); if (EC) return reportError(argv0, "error opening " + OutputFilename + ":" + EC.message() + "\n"); diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 27d1bdc7f4c3..835ef8c7141b 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -438,7 +438,7 @@ Init *BitsInit::resolveReferences(Resolver &R) const { CachedBitVarRef = CurBitVar->getBitVar(); CachedBitVarResolved = CachedBitVarRef->resolveReferences(R); } - + assert(CachedBitVarResolved && "Unresolved bitvar reference"); NewBit = CachedBitVarResolved->getBit(CurBitVar->getBitNum()); } else { // getBit(0) implicitly converts int and bits<1> values to bit. @@ -1616,7 +1616,7 @@ void VarDefInit::Profile(FoldingSetNodeID &ID) const { DefInit *VarDefInit::instantiate() { if (!Def) { RecordKeeper &Records = Class->getRecords(); - auto NewRecOwner = make_unique(Records.getNewAnonymousName(), + auto NewRecOwner = std::make_unique(Records.getNewAnonymousName(), Class->getLoc(), Records, /*IsAnonymous=*/true); Record *NewRec = NewRecOwner.get(); @@ -1930,6 +1930,13 @@ void DagInit::Profile(FoldingSetNodeID &ID) const { ProfileDagInit(ID, Val, ValName, makeArrayRef(getTrailingObjects(), NumArgs), makeArrayRef(getTrailingObjects(), NumArgNames)); } +Record *DagInit::getOperatorAsDef(ArrayRef Loc) const { + if (DefInit *DefI = dyn_cast(Val)) + return DefI->getDef(); + PrintFatalError(Loc, "Expected record as operator"); + return nullptr; +} + Init *DagInit::resolveReferences(Resolver &R) const { SmallVector NewArgs; NewArgs.reserve(arg_size()); diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp index a870e41d58f8..5a30ee98cce9 100644 --- a/lib/TableGen/SetTheory.cpp +++ b/lib/TableGen/SetTheory.cpp @@ -255,16 +255,16 @@ void SetTheory::Operator::anchor() {} void SetTheory::Expander::anchor() {} SetTheory::SetTheory() { - addOperator("add", llvm::make_unique()); - addOperator("sub", llvm::make_unique()); - addOperator("and", llvm::make_unique()); - addOperator("shl", llvm::make_unique()); - addOperator("trunc", llvm::make_unique()); - addOperator("rotl", llvm::make_unique(false)); - addOperator("rotr", llvm::make_unique(true)); - addOperator("decimate", llvm::make_unique()); - addOperator("interleave", llvm::make_unique()); - addOperator("sequence", llvm::make_unique()); + addOperator("add", std::make_unique()); + addOperator("sub", std::make_unique()); + addOperator("and", std::make_unique()); + addOperator("shl", std::make_unique()); + addOperator("trunc", std::make_unique()); + addOperator("rotl", std::make_unique(false)); + addOperator("rotr", std::make_unique(true)); + addOperator("decimate", std::make_unique()); + addOperator("interleave", std::make_unique()); + addOperator("sequence", std::make_unique()); } void SetTheory::addOperator(StringRef Name, std::unique_ptr Op) { @@ -276,7 +276,7 @@ void SetTheory::addExpander(StringRef ClassName, std::unique_ptr E) { } void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) { - addExpander(ClassName, llvm::make_unique(FieldName)); + addExpander(ClassName, std::make_unique(FieldName)); } void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef Loc) { diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index d28c62b3133d..da2286e41fe5 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -51,7 +51,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef Macros) : SrcMgr(SM) { // Pretend that we enter the "top-level" include file. PrepIncludeStack.push_back( - make_unique>()); + std::make_unique>()); // Put all macros defined in the command line into the DefinedMacros set. std::for_each(Macros.begin(), Macros.end(), @@ -393,7 +393,7 @@ bool TGLexer::LexInclude() { CurPtr = CurBuf.begin(); PrepIncludeStack.push_back( - make_unique>()); + std::make_unique>()); return false; } diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index a9ace152d59e..c373e2899a5d 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -378,7 +378,7 @@ bool TGParser::resolve(const ForeachLoop &Loop, SubstStack &Substs, auto LI = dyn_cast(List); if (!LI) { if (!Final) { - Dest->emplace_back(make_unique(Loop.Loc, Loop.IterVar, + Dest->emplace_back(std::make_unique(Loop.Loc, Loop.IterVar, List)); return resolve(Loop.Entries, Substs, Final, &Dest->back().Loop->Entries, Loc); @@ -413,7 +413,7 @@ bool TGParser::resolve(const std::vector &Source, if (E.Loop) { Error = resolve(*E.Loop, Substs, Final, Dest); } else { - auto Rec = make_unique(*E.Rec); + auto Rec = std::make_unique(*E.Rec); if (Loc) Rec->appendLoc(*Loc); @@ -1147,9 +1147,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { if (!InitList.back()) return nullptr; // All BinOps require their arguments to be of compatible types. - TypedInit *TI = dyn_cast(InitList.back()); + RecTy *ListType = cast(InitList.back())->getType(); if (!ArgType) { - ArgType = TI->getType(); + ArgType = ListType; switch (Code) { case BinOpInit::LISTCONCAT: @@ -1198,11 +1198,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { default: llvm_unreachable("other ops have fixed argument types"); } } else { - RecTy *Resolved = resolveTypes(ArgType, TI->getType()); + RecTy *Resolved = resolveTypes(ArgType, ListType); if (!Resolved) { Error(InitLoc, Twine("expected value of type '") + - ArgType->getAsString() + "', got '" + - TI->getType()->getAsString() + "'"); + ArgType->getAsString() + "', got '" + + ListType->getAsString() + "'"); return nullptr; } if (Code != BinOpInit::ADD && Code != BinOpInit::AND && @@ -1330,7 +1330,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { std::unique_ptr ParseRecTmp; Record *ParseRec = CurRec; if (!ParseRec) { - ParseRecTmp = make_unique(".parse", ArrayRef{}, Records); + ParseRecTmp = std::make_unique(".parse", ArrayRef{}, Records); ParseRec = ParseRecTmp.get(); } @@ -1597,7 +1597,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { std::unique_ptr ParseRecTmp; Record *ParseRec = CurRec; if (!ParseRec) { - ParseRecTmp = make_unique(".parse", ArrayRef{}, Records); + ParseRecTmp = std::make_unique(".parse", ArrayRef{}, Records); ParseRec = ParseRecTmp.get(); } @@ -2702,10 +2702,10 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { return true; if (isa(Name)) - CurRec = make_unique(Records.getNewAnonymousName(), DefLoc, Records, + CurRec = std::make_unique(Records.getNewAnonymousName(), DefLoc, Records, /*Anonymous=*/true); else - CurRec = make_unique(Name, DefLoc, Records); + CurRec = std::make_unique(Name, DefLoc, Records); if (ParseObjectBody(CurRec.get())) return true; @@ -2783,7 +2783,7 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) { Lex.Lex(); // Eat the in // Create a loop object and remember it. - Loops.push_back(llvm::make_unique(Loc, IterName, ListValue)); + Loops.push_back(std::make_unique(Loc, IterName, ListValue)); if (Lex.getCode() != tgtok::l_brace) { // FOREACH Declaration IN Object @@ -2834,7 +2834,7 @@ bool TGParser::ParseClass() { } else { // If this is the first reference to this class, create and add it. auto NewRec = - llvm::make_unique(Lex.getCurStrVal(), Lex.getLoc(), Records, + std::make_unique(Lex.getCurStrVal(), Lex.getLoc(), Records, /*Class=*/true); CurRec = NewRec.get(); Records.addClass(std::move(NewRec)); @@ -2963,7 +2963,7 @@ bool TGParser::ParseMultiClass() { auto Result = MultiClasses.insert(std::make_pair(Name, - llvm::make_unique(Name, Lex.getLoc(),Records))); + std::make_unique(Name, Lex.getLoc(),Records))); if (!Result.second) return TokError("multiclass '" + Name + "' already defined"); diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index 6965403a25ab..ac765ebcddc0 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -55,8 +55,9 @@ FunctionPass *createAArch64CollectLOHPass(); InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &); -FunctionPass *createAArch64PreLegalizeCombiner(); -FunctionPass *createAArch64StackTaggingPass(); +FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone); +FunctionPass *createAArch64StackTaggingPass(bool MergeInit); +FunctionPass *createAArch64StackTaggingPreRAPass(); void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); @@ -80,6 +81,7 @@ void initializeFalkorHWPFFixPass(PassRegistry&); void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&); void initializeLDTLSCleanupPass(PassRegistry&); void initializeAArch64StackTaggingPass(PassRegistry&); +void initializeAArch64StackTaggingPreRAPass(PassRegistry&); } // end namespace llvm #endif diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index e39c6995e367..5b4c9e2149da 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -115,11 +115,12 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>; -def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true", +def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", "Enable bit permutation SVE2 instructions", [FeatureSVE2]>; def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; + def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", "Has zero-cycle zeroing instructions for generic registers">; @@ -284,6 +285,10 @@ def FeatureSEL2 : SubtargetFeature< "sel2", "HasSEL2", "true", "Enable v8.4-A Secure Exception Level 2 extension">; +def FeaturePMU : SubtargetFeature< + "pmu", "HasPMU", "true", + "Enable v8.4-A PMU extension">; + def FeatureTLB_RMI : SubtargetFeature< "tlb-rmi", "HasTLB_RMI", "true", "Enable v8.4-A TLB Range and Maintenance Instructions">; @@ -345,6 +350,21 @@ def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen", def FeatureMTE : SubtargetFeature<"mte", "HasMTE", "true", "Enable Memory Tagging Extension" >; +def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE", + "true", "Enable Trace Buffer Extension">; + +def FeatureETE : SubtargetFeature<"ete", "HasETE", + "true", "Enable Embedded Trace Extension", + [FeatureTRBE]>; + +def FeatureTME : SubtargetFeature<"tme", "HasTME", + "true", "Enable Transactional Memory Extension" >; + +def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", + "AllowTaggedGlobals", + "true", "Use an instruction sequence for taking the address of a global " + "that allows a memory tag in the upper address bits">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -354,7 +374,7 @@ def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", FeaturePAN, FeatureLOR, FeatureVH]>; def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", - "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, + "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>; def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", @@ -364,7 +384,7 @@ def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd, FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT, - FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, + FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI, FeatureFMI, FeatureRCPC_IMMO]>; def HasV8_5aOps : SubtargetFeature< @@ -390,6 +410,7 @@ include "AArch64Schedule.td" include "AArch64InstrInfo.td" include "AArch64SchedPredicates.td" include "AArch64SchedPredExynos.td" +include "AArch64Combine.td" def AArch64InstrInfo : InstrInfo; @@ -484,6 +505,19 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeaturePredictableSelectIsExpensive ]>; +def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", + "Cortex-A65 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeatureRAS, + FeatureRCPC, + FeatureSSBS, + ]>; + def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", "Cortex-A72 ARM processors", [ FeatureCRC, @@ -641,6 +675,33 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", FeatureSlowSTRQro ]>; +def ProcNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", + "NeoverseE1", + "Neoverse E1 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeatureRCPC, + FeatureSSBS, + ]>; + +def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", + "NeoverseN1", + "Neoverse N1 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeatureRCPC, + FeatureSPE, + FeatureSSBS, + ]>; + def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", "Qualcomm Saphira processors", [ FeatureCrypto, @@ -732,19 +793,28 @@ def : ProcessorModel<"generic", NoSchedModel, [ FeatureFuseAES, FeatureNEON, FeaturePerfMon, - FeaturePostRAScheduler + FeaturePostRAScheduler, +// ETE and TRBE are future architecture extensions. We temporariliy enable them +// by default for users targeting generic AArch64, until it is decided in which +// armv8.x-a architecture revision they will end up. The extensions do not +// affect code generated by the compiler and can be used only by explicitly +// mentioning the new system register names in assembly. + FeatureETE ]>; -// FIXME: Cortex-A35 and Cortex-A55 are currently modeled as a Cortex-A53. def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>; def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; +def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>; +def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>; def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>; def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>; def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>; def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>; def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>; +def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>; +def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>; def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>; diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 92c8c4955d50..13d389cec7a0 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -552,7 +552,7 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, std::vector ToErase; for (auto &U : I.operands()) { if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) { - unsigned OrigReg = U.getReg(); + Register OrigReg = U.getReg(); U.setReg(Substs[OrigReg]); if (U.isKill()) // Don't erase straight away, because there may be other operands @@ -611,12 +611,12 @@ void AArch64A57FPLoadBalancing::scanInstruction( // Create a new chain. Multiplies don't require forwarding so can go on any // unit. - unsigned DestReg = MI->getOperand(0).getReg(); + Register DestReg = MI->getOperand(0).getReg(); LLVM_DEBUG(dbgs() << "New chain started for register " << printReg(DestReg, TRI) << " at " << *MI); - auto G = llvm::make_unique(MI, Idx, getColor(DestReg)); + auto G = std::make_unique(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); AllChains.push_back(std::move(G)); @@ -624,8 +624,8 @@ void AArch64A57FPLoadBalancing::scanInstruction( // It is beneficial to keep MLAs on the same functional unit as their // accumulator operand. - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned AccumReg = MI->getOperand(3).getReg(); + Register DestReg = MI->getOperand(0).getReg(); + Register AccumReg = MI->getOperand(3).getReg(); maybeKillChain(MI->getOperand(1), Idx, ActiveChains); maybeKillChain(MI->getOperand(2), Idx, ActiveChains); @@ -661,7 +661,7 @@ void AArch64A57FPLoadBalancing::scanInstruction( LLVM_DEBUG(dbgs() << "Creating new chain for dest register " << printReg(DestReg, TRI) << "\n"); - auto G = llvm::make_unique(MI, Idx, getColor(DestReg)); + auto G = std::make_unique(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); AllChains.push_back(std::move(G)); diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp index 89404463e1f0..981b366c14b1 100644 --- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp +++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -105,14 +105,14 @@ static bool isGPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { if (SubReg) return false; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass); return AArch64::GPR64RegClass.contains(Reg); } static bool isFPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) && SubReg == 0) || (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) && @@ -201,8 +201,8 @@ bool AArch64AdvSIMDScalar::isProfitableToTransform( unsigned NumNewCopies = 3; unsigned NumRemovableCopies = 0; - unsigned OrigSrc0 = MI.getOperand(1).getReg(); - unsigned OrigSrc1 = MI.getOperand(2).getReg(); + Register OrigSrc0 = MI.getOperand(1).getReg(); + Register OrigSrc1 = MI.getOperand(2).getReg(); unsigned SubReg0; unsigned SubReg1; if (!MRI->def_empty(OrigSrc0)) { @@ -236,7 +236,7 @@ bool AArch64AdvSIMDScalar::isProfitableToTransform( // any of the uses is a transformable instruction, it's likely the tranforms // will chain, enabling us to save a copy there, too. This is an aggressive // heuristic that approximates the graph based cost analysis described above. - unsigned Dst = MI.getOperand(0).getReg(); + Register Dst = MI.getOperand(0).getReg(); bool AllUsesAreCopies = true; for (MachineRegisterInfo::use_instr_nodbg_iterator Use = MRI->use_instr_nodbg_begin(Dst), @@ -293,8 +293,8 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { assert(OldOpc != NewOpc && "transform an instruction to itself?!"); // Check if we need a copy for the source registers. - unsigned OrigSrc0 = MI.getOperand(1).getReg(); - unsigned OrigSrc1 = MI.getOperand(2).getReg(); + Register OrigSrc0 = MI.getOperand(1).getReg(); + Register OrigSrc1 = MI.getOperand(2).getReg(); unsigned Src0 = 0, SubReg0; unsigned Src1 = 0, SubReg1; bool KillSrc0 = false, KillSrc1 = false; @@ -354,7 +354,7 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { // Create a vreg for the destination. // FIXME: No need to do this if the ultimate user expects an FPR64. // Check for that and avoid the copy if possible. - unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); + Register Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); // For now, all of the new instructions have the same simple three-register // form, so no need to special case based on what instruction we're diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 094fbd999523..7ea7915c2ca6 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -99,7 +99,8 @@ public: void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - std::map, MCSymbol *> HwasanMemaccessSymbols; + typedef std::tuple HwasanMemaccessTuple; + std::map HwasanMemaccessSymbols; void LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI); void EmitHwasanMemaccessSymbols(Module &M); @@ -150,7 +151,7 @@ private: void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O); bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O); bool printAsmRegInClass(const MachineOperand &MO, - const TargetRegisterClass *RC, bool isVector, + const TargetRegisterClass *RC, unsigned AltName, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, @@ -236,9 +237,12 @@ void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) } void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) { - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); + bool IsShort = + MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES; uint32_t AccessInfo = MI.getOperand(1).getImm(); - MCSymbol *&Sym = HwasanMemaccessSymbols[{Reg, AccessInfo}]; + MCSymbol *&Sym = + HwasanMemaccessSymbols[HwasanMemaccessTuple(Reg, IsShort, AccessInfo)]; if (!Sym) { // FIXME: Make this work on non-ELF. if (!TM.getTargetTriple().isOSBinFormatELF()) @@ -246,6 +250,8 @@ void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) { std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" + utostr(AccessInfo); + if (IsShort) + SymName += "_short"; Sym = OutContext.getOrCreateSymbol(SymName); } @@ -263,15 +269,22 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) { std::unique_ptr STI( TM.getTarget().createMCSubtargetInfo(TT.str(), "", "")); - MCSymbol *HwasanTagMismatchSym = + MCSymbol *HwasanTagMismatchV1Sym = OutContext.getOrCreateSymbol("__hwasan_tag_mismatch"); + MCSymbol *HwasanTagMismatchV2Sym = + OutContext.getOrCreateSymbol("__hwasan_tag_mismatch_v2"); - const MCSymbolRefExpr *HwasanTagMismatchRef = - MCSymbolRefExpr::create(HwasanTagMismatchSym, OutContext); + const MCSymbolRefExpr *HwasanTagMismatchV1Ref = + MCSymbolRefExpr::create(HwasanTagMismatchV1Sym, OutContext); + const MCSymbolRefExpr *HwasanTagMismatchV2Ref = + MCSymbolRefExpr::create(HwasanTagMismatchV2Sym, OutContext); for (auto &P : HwasanMemaccessSymbols) { - unsigned Reg = P.first.first; - uint32_t AccessInfo = P.first.second; + unsigned Reg = std::get<0>(P.first); + bool IsShort = std::get<1>(P.first); + uint32_t AccessInfo = std::get<2>(P.first); + const MCSymbolRefExpr *HwasanTagMismatchRef = + IsShort ? HwasanTagMismatchV2Ref : HwasanTagMismatchV1Ref; MCSymbol *Sym = P.second; OutStreamer->SwitchSection(OutContext.getELFSection( @@ -304,82 +317,86 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) { .addReg(Reg) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)), *STI); - MCSymbol *HandlePartialSym = OutContext.createTempSymbol(); + MCSymbol *HandleMismatchOrPartialSym = OutContext.createTempSymbol(); OutStreamer->EmitInstruction( MCInstBuilder(AArch64::Bcc) .addImm(AArch64CC::NE) - .addExpr(MCSymbolRefExpr::create(HandlePartialSym, OutContext)), + .addExpr(MCSymbolRefExpr::create(HandleMismatchOrPartialSym, + OutContext)), *STI); MCSymbol *ReturnSym = OutContext.createTempSymbol(); OutStreamer->EmitLabel(ReturnSym); OutStreamer->EmitInstruction( MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI); + OutStreamer->EmitLabel(HandleMismatchOrPartialSym); - OutStreamer->EmitLabel(HandlePartialSym); - OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri) - .addReg(AArch64::WZR) - .addReg(AArch64::W16) - .addImm(15) - .addImm(0), - *STI); - MCSymbol *HandleMismatchSym = OutContext.createTempSymbol(); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::Bcc) - .addImm(AArch64CC::HI) - .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)), - *STI); - - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::ANDXri) - .addReg(AArch64::X17) - .addReg(Reg) - .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)), - *STI); - unsigned Size = 1 << (AccessInfo & 0xf); - if (Size != 1) - OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri) - .addReg(AArch64::X17) - .addReg(AArch64::X17) - .addImm(Size - 1) + if (IsShort) { + OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri) + .addReg(AArch64::WZR) + .addReg(AArch64::W16) + .addImm(15) .addImm(0), *STI); - OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs) - .addReg(AArch64::WZR) - .addReg(AArch64::W16) - .addReg(AArch64::W17) - .addImm(0), - *STI); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::Bcc) - .addImm(AArch64CC::LS) - .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)), - *STI); - - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::ORRXri) - .addReg(AArch64::X16) - .addReg(Reg) - .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)), - *STI); - OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui) - .addReg(AArch64::W16) - .addReg(AArch64::X16) - .addImm(0), - *STI); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::SUBSXrs) - .addReg(AArch64::XZR) - .addReg(AArch64::X16) - .addReg(Reg) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)), - *STI); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::Bcc) - .addImm(AArch64CC::EQ) - .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)), - *STI); + MCSymbol *HandleMismatchSym = OutContext.createTempSymbol(); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::Bcc) + .addImm(AArch64CC::HI) + .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)), + *STI); + + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::ANDXri) + .addReg(AArch64::X17) + .addReg(Reg) + .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)), + *STI); + unsigned Size = 1 << (AccessInfo & 0xf); + if (Size != 1) + OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri) + .addReg(AArch64::X17) + .addReg(AArch64::X17) + .addImm(Size - 1) + .addImm(0), + *STI); + OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs) + .addReg(AArch64::WZR) + .addReg(AArch64::W16) + .addReg(AArch64::W17) + .addImm(0), + *STI); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::Bcc) + .addImm(AArch64CC::LS) + .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)), + *STI); + + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::ORRXri) + .addReg(AArch64::X16) + .addReg(Reg) + .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)), + *STI); + OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui) + .addReg(AArch64::W16) + .addReg(AArch64::X16) + .addImm(0), + *STI); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::SUBSXrs) + .addReg(AArch64::XZR) + .addReg(AArch64::X16) + .addReg(Reg) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)), + *STI); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::Bcc) + .addImm(AArch64CC::EQ) + .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)), + *STI); + + OutStreamer->EmitLabel(HandleMismatchSym); + } - OutStreamer->EmitLabel(HandleMismatchSym); OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXpre) .addReg(AArch64::SP) .addReg(AArch64::X0) @@ -414,16 +431,16 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) { MCInstBuilder(AArch64::ADRP) .addReg(AArch64::X16) .addExpr(AArch64MCExpr::create( - HwasanTagMismatchRef, - AArch64MCExpr::VariantKind::VK_GOT_PAGE, OutContext)), + HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE, + OutContext)), *STI); OutStreamer->EmitInstruction( MCInstBuilder(AArch64::LDRXui) .addReg(AArch64::X16) .addReg(AArch64::X16) .addExpr(AArch64MCExpr::create( - HwasanTagMismatchRef, - AArch64MCExpr::VariantKind::VK_GOT_LO12, OutContext)), + HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12, + OutContext)), *STI); OutStreamer->EmitInstruction( MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI); @@ -485,15 +502,14 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, default: llvm_unreachable(""); case MachineOperand::MO_Register: { - unsigned Reg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + Register Reg = MO.getReg(); + assert(Register::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); O << AArch64InstPrinter::getRegisterName(Reg); break; } case MachineOperand::MO_Immediate: { - int64_t Imm = MO.getImm(); - O << '#' << Imm; + O << MO.getImm(); break; } case MachineOperand::MO_GlobalAddress: { @@ -510,7 +526,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); switch (Mode) { default: return true; // Unknown mode. @@ -531,14 +547,13 @@ bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, // printing. bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, const TargetRegisterClass *RC, - bool isVector, raw_ostream &O) { + unsigned AltName, raw_ostream &O) { assert(MO.isReg() && "Should only get here with a register!"); const TargetRegisterInfo *RI = STI->getRegisterInfo(); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg)); assert(RI->regsOverlap(RegToPrint, Reg)); - O << AArch64InstPrinter::getRegisterName( - RegToPrint, isVector ? AArch64::vreg : AArch64::NoRegAltName); + O << AArch64InstPrinter::getRegisterName(RegToPrint, AltName); return false; } @@ -574,6 +589,7 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 's': // Print S register. case 'd': // Print D register. case 'q': // Print Q register. + case 'z': // Print Z register. if (MO.isReg()) { const TargetRegisterClass *RC; switch (ExtraCode[0]) { @@ -592,10 +608,13 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 'q': RC = &AArch64::FPR128RegClass; break; + case 'z': + RC = &AArch64::ZPRRegClass; + break; default: return true; } - return printAsmRegInClass(MO, RC, false /* vector */, O); + return printAsmRegInClass(MO, RC, AArch64::NoRegAltName, O); } printOperand(MI, OpNum, O); return false; @@ -605,16 +624,26 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // According to ARM, we should emit x and v registers unless we have a // modifier. if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If this is a w or x register, print an x register. if (AArch64::GPR32allRegClass.contains(Reg) || AArch64::GPR64allRegClass.contains(Reg)) return printAsmMRegister(MO, 'x', O); + unsigned AltName = AArch64::NoRegAltName; + const TargetRegisterClass *RegClass; + if (AArch64::ZPRRegClass.contains(Reg)) { + RegClass = &AArch64::ZPRRegClass; + } else if (AArch64::PPRRegClass.contains(Reg)) { + RegClass = &AArch64::PPRRegClass; + } else { + RegClass = &AArch64::FPR128RegClass; + AltName = AArch64::vreg; + } + // If this is a b, h, s, d, or q register, print it as a v register. - return printAsmRegInClass(MO, &AArch64::FPR128RegClass, true /* vector */, - O); + return printAsmRegInClass(MO, RegClass, AltName, O); } printOperand(MI, OpNum, O); @@ -682,7 +711,7 @@ void AArch64AsmPrinter::EmitJumpTableInfo() { if (JTBBs.empty()) continue; unsigned Size = AFI->getJumpTableEntrySize(JTI); - EmitAlignment(Log2_32(Size)); + EmitAlignment(Align(Size)); OutStreamer->EmitLabel(GetJTISymbol(JTI)); for (auto *JTBB : JTBBs) @@ -725,12 +754,12 @@ void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, /// add xDest, xDest, xScratch, lsl #2 void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer, const llvm::MachineInstr &MI) { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned ScratchReg = MI.getOperand(1).getReg(); - unsigned ScratchRegW = + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register ScratchRegW = STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32); - unsigned TableReg = MI.getOperand(2).getReg(); - unsigned EntryReg = MI.getOperand(3).getReg(); + Register TableReg = MI.getOperand(2).getReg(); + Register EntryReg = MI.getOperand(3).getReg(); int JTIdx = MI.getOperand(4).getIndex(); bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8; @@ -800,7 +829,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, if (CallTarget) { assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && "High 16 bits of call target should be zero."); - unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); + Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); EncodedBytes = 16; // Materialize the jump address: EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZXi) @@ -830,7 +859,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, } void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) { // Convert H/S/D register to corresponding Q register if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31) @@ -894,32 +923,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { default: break; case AArch64::MOVMCSym: { - unsigned DestReg = MI->getOperand(0).getReg(); - const MachineOperand &MO_Sym = MI->getOperand(1); - MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym); - MCOperand Hi_MCSym, Lo_MCSym; - - Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S); - Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC); - - MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym); - MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym); - - MCInst MovZ; - MovZ.setOpcode(AArch64::MOVZXi); - MovZ.addOperand(MCOperand::createReg(DestReg)); - MovZ.addOperand(Hi_MCSym); - MovZ.addOperand(MCOperand::createImm(16)); - EmitToStreamer(*OutStreamer, MovZ); - - MCInst MovK; - MovK.setOpcode(AArch64::MOVKXi); - MovK.addOperand(MCOperand::createReg(DestReg)); - MovK.addOperand(MCOperand::createReg(DestReg)); - MovK.addOperand(Lo_MCSym); - MovK.addOperand(MCOperand::createImm(0)); - EmitToStreamer(*OutStreamer, MovK); - return; + Register DestReg = MI->getOperand(0).getReg(); + const MachineOperand &MO_Sym = MI->getOperand(1); + MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym); + MCOperand Hi_MCSym, Lo_MCSym; + + Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S); + Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC); + + MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym); + MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym); + + MCInst MovZ; + MovZ.setOpcode(AArch64::MOVZXi); + MovZ.addOperand(MCOperand::createReg(DestReg)); + MovZ.addOperand(Hi_MCSym); + MovZ.addOperand(MCOperand::createImm(16)); + EmitToStreamer(*OutStreamer, MovZ); + + MCInst MovK; + MovK.setOpcode(AArch64::MOVKXi); + MovK.addOperand(MCOperand::createReg(DestReg)); + MovK.addOperand(MCOperand::createReg(DestReg)); + MovK.addOperand(Lo_MCSym); + MovK.addOperand(MCOperand::createImm(0)); + EmitToStreamer(*OutStreamer, MovK); + return; } case AArch64::MOVIv2d_ns: // If the target has , lower this @@ -1084,6 +1113,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; case AArch64::HWASAN_CHECK_MEMACCESS: + case AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES: LowerHWASAN_CHECK_MEMACCESS(*MI); return; @@ -1193,4 +1223,6 @@ extern "C" void LLVMInitializeAArch64AsmPrinter() { RegisterAsmPrinter X(getTheAArch64leTarget()); RegisterAsmPrinter Y(getTheAArch64beTarget()); RegisterAsmPrinter Z(getTheARM64Target()); + RegisterAsmPrinter W(getTheARM64_32Target()); + RegisterAsmPrinter V(getTheAArch64_32Target()); } diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index 59757769c89a..ed93d02aa615 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -99,7 +99,7 @@ struct IncomingArgHandler : public CallLowering::ValueHandler { /// (it's an implicit-def of the BL). virtual void markPhysRegUsed(unsigned PhysReg) = 0; - bool isArgumentHandler() const override { return true; } + bool isIncomingArgumentHandler() const override { return true; } uint64_t StackUsed; }; @@ -110,6 +110,7 @@ struct FormalArgHandler : public IncomingArgHandler { : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } }; @@ -129,14 +130,29 @@ struct CallReturnHandler : public IncomingArgHandler { struct OutgoingArgHandler : public CallLowering::ValueHandler { OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder MIB, CCAssignFn *AssignFn, - CCAssignFn *AssignFnVarArg) + CCAssignFn *AssignFnVarArg, bool IsTailCall = false, + int FPDiff = 0) : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), - AssignFnVarArg(AssignFnVarArg), StackSize(0) {} + AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff), + StackSize(0) {} + + bool isIncomingArgumentHandler() const override { return false; } Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { + MachineFunction &MF = MIRBuilder.getMF(); LLT p0 = LLT::pointer(0, 64); LLT s64 = LLT::scalar(64); + + if (IsTailCall) { + Offset += FPDiff; + int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); + Register FIReg = MRI.createGenericVirtualRegister(p0); + MIRBuilder.buildFrameIndex(FIReg, FI); + MPO = MachinePointerInfo::getFixedStack(MF, FI); + return FIReg; + } + Register SPReg = MRI.createGenericVirtualRegister(p0); MIRBuilder.buildCopy(SPReg, Register(AArch64::SP)); @@ -146,7 +162,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { Register AddrReg = MRI.createGenericVirtualRegister(p0); MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg); - MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset); + MPO = MachinePointerInfo::getStack(MF, Offset); return AddrReg; } @@ -173,12 +189,13 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, + ISD::ArgFlagsTy Flags, CCState &State) override { bool Res; if (Info.IsFixed) - Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); else - Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); StackSize = State.getNextStackOffset(); return Res; @@ -186,10 +203,19 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { MachineInstrBuilder MIB; CCAssignFn *AssignFnVarArg; + bool IsTailCall; + + /// For tail calls, the byte offset of the call's argument area from the + /// callee's. Unused elsewhere. + int FPDiff; uint64_t StackSize; }; } // namespace +static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { + return CallConv == CallingConv::Fast && TailCallOpt; +} + void AArch64CallLowering::splitToValueTypes( const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const { @@ -207,7 +233,7 @@ void AArch64CallLowering::splitToValueTypes( // No splitting to do, but we want to replace the original type (e.g. [1 x // double] -> double). SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), - OrigArg.Flags, OrigArg.IsFixed); + OrigArg.Flags[0], OrigArg.IsFixed); return; } @@ -218,13 +244,13 @@ void AArch64CallLowering::splitToValueTypes( OrigArg.Ty, CallConv, false); for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); - SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags, + SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0], OrigArg.IsFixed); if (NeedsRegBlock) - SplitArgs.back().Flags.setInConsecutiveRegs(); + SplitArgs.back().Flags[0].setInConsecutiveRegs(); } - SplitArgs.back().Flags.setInConsecutiveRegsLast(); + SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); } bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -344,6 +370,49 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, return Success; } +/// Helper function to compute forwarded registers for musttail calls. Computes +/// the forwarded registers, sets MBB liveness, and emits COPY instructions that +/// can be used to save + restore registers later. +static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, + CCAssignFn *AssignFn) { + MachineBasicBlock &MBB = MIRBuilder.getMBB(); + MachineFunction &MF = MIRBuilder.getMF(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (!MFI.hasMustTailInVarArgFunc()) + return; + + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + const Function &F = MF.getFunction(); + assert(F.isVarArg() && "Expected F to be vararg?"); + + // Compute the set of forwarded registers. The rest are scratch. + SmallVector ArgLocs; + CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, + F.getContext()); + SmallVector RegParmTypes; + RegParmTypes.push_back(MVT::i64); + RegParmTypes.push_back(MVT::f128); + + // Later on, we can use this vector to restore the registers if necessary. + SmallVectorImpl &Forwards = + FuncInfo->getForwardedMustTailRegParms(); + CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn); + + // Conservatively forward X8, since it might be used for an aggregate + // return. + if (!CCInfo.isAllocated(AArch64::X8)) { + unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); + Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); + } + + // Add the forwards to the MachineBasicBlock and MachineFunction. + for (const auto &F : Forwards) { + MBB.addLiveIn(F.PReg); + MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg)); + } +} + bool AArch64CallLowering::lowerFormalArguments( MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const { @@ -376,64 +445,530 @@ bool AArch64CallLowering::lowerFormalArguments( if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + uint64_t StackOffset = Handler.StackUsed; if (F.isVarArg()) { - if (!MF.getSubtarget().isTargetDarwin()) { - // FIXME: we need to reimplement saveVarArgsRegisters from + auto &Subtarget = MF.getSubtarget(); + if (!Subtarget.isTargetDarwin()) { + // FIXME: we need to reimplement saveVarArgsRegisters from // AArch64ISelLowering. return false; } - // We currently pass all varargs at 8-byte alignment. - uint64_t StackOffset = alignTo(Handler.StackUsed, 8); + // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. + StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8); auto &MFI = MIRBuilder.getMF().getFrameInfo(); - AArch64FunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); } + if (doesCalleeRestoreStack(F.getCallingConv(), + MF.getTarget().Options.GuaranteedTailCallOpt)) { + // We have a non-standard ABI, so why not make full use of the stack that + // we're going to pop? It must be aligned to 16 B in any case. + StackOffset = alignTo(StackOffset, 16); + + // If we're expected to restore the stack (e.g. fastcc), then we'll be + // adding a multiple of 16. + FuncInfo->setArgumentStackToRestore(StackOffset); + + // Our own callers will guarantee that the space is free by giving an + // aligned value to CALLSEQ_START. + } + + // When we tail call, we need to check if the callee's arguments + // will fit on the caller's stack. So, whenever we lower formal arguments, + // we should keep track of this information, since we might lower a tail call + // in this function later. + FuncInfo->setBytesInStackArgArea(StackOffset); + auto &Subtarget = MF.getSubtarget(); if (Subtarget.hasCustomCallingConv()) Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); + handleMustTailForwardedRegisters(MIRBuilder, AssignFn); + // Move back to the end of the basic block. MIRBuilder.setMBB(MBB); return true; } +/// Return true if the calling convention is one that we can guarantee TCO for. +static bool canGuaranteeTCO(CallingConv::ID CC) { + return CC == CallingConv::Fast; +} + +/// Return true if we might ever do TCO for calls with this calling convention. +static bool mayTailCallThisCC(CallingConv::ID CC) { + switch (CC) { + case CallingConv::C: + case CallingConv::PreserveMost: + case CallingConv::Swift: + return true; + default: + return canGuaranteeTCO(CC); + } +} + +/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for +/// CC. +static std::pair +getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { + return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)}; +} + +bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( + CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &InArgs) const { + const Function &CallerF = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = CallerF.getCallingConv(); + + // If the calling conventions match, then everything must be the same. + if (CalleeCC == CallerCC) + return true; + + // Check if the caller and callee will handle arguments in the same way. + const AArch64TargetLowering &TLI = *getTLI(); + CCAssignFn *CalleeAssignFnFixed; + CCAssignFn *CalleeAssignFnVarArg; + std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = + getAssignFnsForCC(CalleeCC, TLI); + + CCAssignFn *CallerAssignFnFixed; + CCAssignFn *CallerAssignFnVarArg; + std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = + getAssignFnsForCC(CallerCC, TLI); + + if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed, + *CalleeAssignFnVarArg, *CallerAssignFnFixed, + *CallerAssignFnVarArg)) + return false; + + // Make sure that the caller and callee preserve all of the same registers. + auto TRI = MF.getSubtarget().getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (MF.getSubtarget().hasCustomCallingConv()) { + TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); + TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); + } + + return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved); +} + +bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( + CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &OutArgs) const { + // If there are no outgoing arguments, then we are done. + if (OutArgs.empty()) + return true; + + const Function &CallerF = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = CallerF.getCallingConv(); + const AArch64TargetLowering &TLI = *getTLI(); + + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); + + // We have outgoing arguments. Make sure that we can tail call with them. + SmallVector OutLocs; + CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext()); + + if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) { + LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); + return false; + } + + // Make sure that they can fit on the caller's stack. + const AArch64FunctionInfo *FuncInfo = MF.getInfo(); + if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) { + LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n"); + return false; + } + + // Verify that the parameters in callee-saved registers match. + // TODO: Port this over to CallLowering as general code once swiftself is + // supported. + auto TRI = MF.getSubtarget().getRegisterInfo(); + const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned i = 0; i < OutLocs.size(); ++i) { + auto &ArgLoc = OutLocs[i]; + // If it's not a register, it's fine. + if (!ArgLoc.isRegLoc()) { + if (Info.IsVarArg) { + // Be conservative and disallow variadic memory operands to match SDAG's + // behaviour. + // FIXME: If the caller's calling convention is C, then we can + // potentially use its argument area. However, for cases like fastcc, + // we can't do anything. + LLVM_DEBUG( + dbgs() + << "... Cannot tail call vararg function with stack arguments\n"); + return false; + } + continue; + } + + Register Reg = ArgLoc.getLocReg(); + + // Only look at callee-saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) + continue; + + LLVM_DEBUG( + dbgs() + << "... Call has an argument passed in a callee-saved register.\n"); + + // Check if it was copied from. + ArgInfo &OutInfo = OutArgs[i]; + + if (OutInfo.Regs.size() > 1) { + LLVM_DEBUG( + dbgs() << "... Cannot handle arguments in multiple registers.\n"); + return false; + } + + // Check if we copy the register, walking through copies from virtual + // registers. Note that getDefIgnoringCopies does not ignore copies from + // physical registers. + MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); + if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { + LLVM_DEBUG( + dbgs() + << "... Parameter was not copied into a VReg, cannot tail call.\n"); + return false; + } + + // Got a copy. Verify that it's the same as the register we want. + Register CopyRHS = RegDef->getOperand(1).getReg(); + if (CopyRHS != Reg) { + LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " + "VReg, cannot tail call.\n"); + return false; + } + } + + return true; +} + +bool AArch64CallLowering::isEligibleForTailCallOptimization( + MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, + SmallVectorImpl &InArgs, + SmallVectorImpl &OutArgs) const { + + // Must pass all target-independent checks in order to tail call optimize. + if (!Info.IsTailCall) + return false; + + CallingConv::ID CalleeCC = Info.CallConv; + MachineFunction &MF = MIRBuilder.getMF(); + const Function &CallerF = MF.getFunction(); + + LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n"); + + if (Info.SwiftErrorVReg) { + // TODO: We should handle this. + // Note that this is also handled by the check for no outgoing arguments. + // Proactively disabling this though, because the swifterror handling in + // lowerCall inserts a COPY *after* the location of the call. + LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n"); + return false; + } + + if (!mayTailCallThisCC(CalleeCC)) { + LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n"); + return false; + } + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible (see + // X86). + // + // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try + // it? + // + // On Windows, "inreg" attributes signify non-aggregate indirect returns. + // In this case, it is necessary to save/restore X0 in the callee. Tail + // call opt interferes with this. So we disable tail call opt when the + // caller has an argument with "inreg" attribute. + // + // FIXME: Check whether the callee also has an "inreg" argument. + // + // When the caller has a swifterror argument, we don't want to tail call + // because would have to move into the swifterror register before the + // tail call. + if (any_of(CallerF.args(), [](const Argument &A) { + return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); + })) { + LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " + "inreg, or swifterror arguments\n"); + return false; + } + + // Externally-defined functions with weak linkage should not be + // tail-called on AArch64 when the OS does not support dynamic + // pre-emption of symbols, as the AAELF spec requires normal calls + // to undefined weak functions to be replaced with a NOP or jump to the + // next instruction. The behaviour of branch instructions in this + // situation (as used for tail calls) is implementation-defined, so we + // cannot rely on the linker replacing the tail call with a return. + if (Info.Callee.isGlobal()) { + const GlobalValue *GV = Info.Callee.getGlobal(); + const Triple &TT = MF.getTarget().getTargetTriple(); + if (GV->hasExternalWeakLinkage() && + (!TT.isOSWindows() || TT.isOSBinFormatELF() || + TT.isOSBinFormatMachO())) { + LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " + "with weak linkage for this OS.\n"); + return false; + } + } + + // If we have -tailcallopt, then we're done. + if (MF.getTarget().Options.GuaranteedTailCallOpt) + return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv(); + + // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). + // Try to find cases where we can do that. + + // I want anyone implementing a new calling convention to think long and hard + // about this assert. + assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && + "Unexpected variadic calling convention"); + + // Verify that the incoming and outgoing arguments from the callee are + // safe to tail call. + if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { + LLVM_DEBUG( + dbgs() + << "... Caller and callee have incompatible calling conventions.\n"); + return false; + } + + if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs)) + return false; + + LLVM_DEBUG( + dbgs() << "... Call is eligible for tail call optimization.\n"); + return true; +} + +static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect, + bool IsTailCall) { + if (!IsTailCall) + return IsIndirect ? AArch64::BLR : AArch64::BL; + + if (!IsIndirect) + return AArch64::TCRETURNdi; + + // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use + // x16 or x17. + if (CallerF.hasFnAttribute("branch-target-enforcement")) + return AArch64::TCRETURNriBTI; + + return AArch64::TCRETURNri; +} + +bool AArch64CallLowering::lowerTailCall( + MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, + SmallVectorImpl &OutArgs) const { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const AArch64TargetLowering &TLI = *getTLI(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + + // True when we're tail calling, but without -tailcallopt. + bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt; + + // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 + // register class. Until we can do that, we should fall back here. + if (F.hasFnAttribute("branch-target-enforcement")) { + LLVM_DEBUG( + dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n"); + return false; + } + + // Find out which ABI gets to decide where things go. + CallingConv::ID CalleeCC = Info.CallConv; + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); + + MachineInstrBuilder CallSeqStart; + if (!IsSibCall) + CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); + + unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true); + auto MIB = MIRBuilder.buildInstrNoInsert(Opc); + MIB.add(Info.Callee); + + // Byte offset for the tail call. When we are sibcalling, this will always + // be 0. + MIB.addImm(0); + + // Tell the call which registers are clobbered. + auto TRI = MF.getSubtarget().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv()); + if (MF.getSubtarget().hasCustomCallingConv()) + TRI->UpdateCustomCallPreservedMask(MF, &Mask); + MIB.addRegMask(Mask); + + if (TRI->isAnyArgRegReserved(MF)) + TRI->emitReservedArgRegCallError(MF); + + // FPDiff is the byte offset of the call's argument area from the callee's. + // Stores to callee stack arguments will be placed in FixedStackSlots offset + // by this amount for a tail call. In a sibling call it must be 0 because the + // caller will deallocate the entire stack and the callee still expects its + // arguments to begin at SP+0. + int FPDiff = 0; + + // This will be 0 for sibcalls, potentially nonzero for tail calls produced + // by -tailcallopt. For sibcalls, the memory operands for the call are + // already available in the caller's incoming argument space. + unsigned NumBytes = 0; + if (!IsSibCall) { + // We aren't sibcalling, so we need to compute FPDiff. We need to do this + // before handling assignments, because FPDiff must be known for memory + // arguments. + unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + SmallVector OutLocs; + CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); + analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg); + + // The callee will pop the argument stack as a tail call. Thus, we must + // keep it 16-byte aligned. + NumBytes = alignTo(OutInfo.getNextStackOffset(), 16); + + // FPDiff will be negative if this tail call requires more space than we + // would automatically have in our incoming argument space. Positive if we + // actually shrink the stack. + FPDiff = NumReusableBytes - NumBytes; + + // The stack pointer must be 16-byte aligned at all times it's used for a + // memory operation, which in practice means at *all* times and in + // particular across call boundaries. Therefore our own arguments started at + // a 16-byte aligned SP and the delta applied for the tail call should + // satisfy the same constraint. + assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); + } + + const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); + + // Do the actual argument marshalling. + SmallVector PhysRegs; + OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, + AssignFnVarArg, true, FPDiff); + if (!handleAssignments(MIRBuilder, OutArgs, Handler)) + return false; + + if (Info.IsVarArg && Info.IsMustTailCall) { + // Now we know what's being passed to the function. Add uses to the call for + // the forwarded registers that we *aren't* passing as parameters. This will + // preserve the copies we build earlier. + for (const auto &F : Forwards) { + Register ForwardedReg = F.PReg; + // If the register is already passed, or aliases a register which is + // already being passed, then skip it. + if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) { + if (!Use.isReg()) + return false; + return TRI->regsOverlap(Use.getReg(), ForwardedReg); + })) + continue; + + // We aren't passing it already, so we should add it to the call. + MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg)); + MIB.addReg(ForwardedReg, RegState::Implicit); + } + } + + // If we have -tailcallopt, we need to adjust the stack. We'll do the call + // sequence start and end here. + if (!IsSibCall) { + MIB->getOperand(1).setImm(FPDiff); + CallSeqStart.addImm(NumBytes).addImm(0); + // End the call sequence *before* emitting the call. Normally, we would + // tidy the frame up after the call. However, here, we've laid out the + // parameters so that when SP is reset, they will be in the correct + // location. + MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0); + } + + // Now we can add the actual call instruction to the correct basic block. + MIRBuilder.insertInstr(MIB); + + // If Callee is a reg, since it is used by a target specific instruction, + // it must have a register class matching the constraint of that instruction. + if (Info.Callee.isReg()) + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), + *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee, + 0)); + + MF.getFrameInfo().setHasTailCall(); + Info.LoweredTailCall = true; + return true; +} + bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, - const MachineOperand &Callee, - const ArgInfo &OrigRet, - ArrayRef OrigArgs, - Register SwiftErrorVReg) const { + CallLoweringInfo &Info) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = F.getParent()->getDataLayout(); + const AArch64TargetLowering &TLI = *getTLI(); - SmallVector SplitArgs; - for (auto &OrigArg : OrigArgs) { - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv); + SmallVector OutArgs; + for (auto &OrigArg : Info.OrigArgs) { + splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv); // AAPCS requires that we zero-extend i1 to 8 bits by the caller. if (OrigArg.Ty->isIntegerTy(1)) - SplitArgs.back().Flags.setZExt(); + OutArgs.back().Flags[0].setZExt(); + } + + SmallVector InArgs; + if (!Info.OrigRet.Ty->isVoidTy()) + splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv()); + + // If we can lower as a tail call, do that instead. + bool CanTailCallOpt = + isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); + + // We must emit a tail call if we have musttail. + if (Info.IsMustTailCall && !CanTailCallOpt) { + // There are types of incoming/outgoing arguments we can't handle yet, so + // it doesn't make sense to actually die here like in ISelLowering. Instead, + // fall back to SelectionDAG and let it try to handle this. + LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n"); + return false; } + if (CanTailCallOpt) + return lowerTailCall(MIRBuilder, Info, OutArgs); + // Find out which ABI gets to decide where things go. - const AArch64TargetLowering &TLI = *getTLI(); - CCAssignFn *AssignFnFixed = - TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - CCAssignFn *AssignFnVarArg = - TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/true); + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = + getAssignFnsForCC(Info.CallConv, TLI); - auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); + MachineInstrBuilder CallSeqStart; + CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. - auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR - : AArch64::BL); - MIB.add(Callee); + unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false); + + auto MIB = MIRBuilder.buildInstrNoInsert(Opc); + MIB.add(Info.Callee); // Tell the call which registers are clobbered. auto TRI = MF.getSubtarget().getRegisterInfo(); @@ -448,8 +983,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Do the actual argument marshalling. SmallVector PhysRegs; OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, - AssignFnVarArg); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + AssignFnVarArg, false); + if (!handleAssignments(MIRBuilder, OutArgs, Handler)) return false; // Now we can add the actual call instruction to the correct basic block. @@ -458,34 +993,37 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // If Callee is a reg, since it is used by a target specific // instruction, it must have a register class matching the // constraint of that instruction. - if (Callee.isReg()) + if (Info.Callee.isReg()) MIB->getOperand(0).setReg(constrainOperandRegClass( MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0)); + *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee, + 0)); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arugments, the physical register must be an // implicit-define of the call instruction. - CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); - if (!OrigRet.Ty->isVoidTy()) { - SplitArgs.clear(); - - splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv()); - + if (!Info.OrigRet.Ty->isVoidTy()) { + CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + if (!handleAssignments(MIRBuilder, InArgs, Handler)) return false; } - if (SwiftErrorVReg) { + if (Info.SwiftErrorVReg) { MIB.addDef(AArch64::X21, RegState::Implicit); - MIRBuilder.buildCopy(SwiftErrorVReg, Register(AArch64::X21)); + MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); } + uint64_t CalleePopBytes = + doesCalleeRestoreStack(Info.CallConv, + MF.getTarget().Options.GuaranteedTailCallOpt) + ? alignTo(Handler.StackSize, 16) + : 0; + CallSeqStart.addImm(Handler.StackSize).addImm(0); MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) .addImm(Handler.StackSize) - .addImm(0); + .addImm(CalleePopBytes); return true; } diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h index 4f428f254537..b0c601c7062c 100644 --- a/lib/Target/AArch64/AArch64CallLowering.h +++ b/lib/Target/AArch64/AArch64CallLowering.h @@ -40,16 +40,15 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs, - Register SwiftErrorVReg) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const override { - return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs, 0); - } + /// Returns true if the call can be lowered as a tail call. + bool + isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info, + SmallVectorImpl &InArgs, + SmallVectorImpl &OutArgs) const; bool supportSwiftError() const override { return true; } @@ -64,6 +63,18 @@ private: SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const; + + bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, + SmallVectorImpl &OutArgs) const; + + bool + doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, + MachineFunction &MF, + SmallVectorImpl &InArgs) const; + + bool + areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &OutArgs) const; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64CallingConvention.cpp b/lib/Target/AArch64/AArch64CallingConvention.cpp index 02538a187611..a0695cef615f 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.cpp +++ b/lib/Target/AArch64/AArch64CallingConvention.cpp @@ -40,12 +40,14 @@ static bool finishStackBlock(SmallVectorImpl &PendingMembers, MVT LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State, unsigned SlotAlign) { unsigned Size = LocVT.getSizeInBits() / 8; - unsigned StackAlign = + const Align StackAlign = State.getMachineFunction().getDataLayout().getStackAlignment(); - unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign); + const Align OrigAlign(ArgFlags.getOrigAlign()); + const Align Align = std::min(OrigAlign, StackAlign); for (auto &It : PendingMembers) { - It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign))); + It.convertToMem(State.AllocateStack( + Size, std::max((unsigned)Align.value(), SlotAlign))); State.addLoc(It); SlotAlign = 1; } @@ -79,10 +81,14 @@ static bool CC_AArch64_Custom_Stack_Block( static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { + const AArch64Subtarget &Subtarget = static_cast( + State.getMachineFunction().getSubtarget()); + bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO(); + // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. ArrayRef RegList; - if (LocVT.SimpleTy == MVT::i64) + if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32)) RegList = XRegList; else if (LocVT.SimpleTy == MVT::f16) RegList = HRegList; @@ -107,8 +113,12 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, if (!ArgFlags.isInConsecutiveRegsLast()) return true; - unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); - if (RegResult) { + // [N x i32] arguments get packed into x-registers on Darwin's arm64_32 + // because that's how the armv7k Clang front-end emits small structs. + unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1; + unsigned RegResult = State.AllocateRegBlock( + RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg); + if (RegResult && EltsPerReg == 1) { for (auto &It : PendingMembers) { It.convertToReg(RegResult); State.addLoc(It); @@ -116,14 +126,26 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, } PendingMembers.clear(); return true; + } else if (RegResult) { + assert(EltsPerReg == 2 && "unexpected ABI"); + bool UseHigh = false; + CCValAssign::LocInfo Info; + for (auto &It : PendingMembers) { + Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; + State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult, + MVT::i64, Info)); + UseHigh = !UseHigh; + if (!UseHigh) + ++RegResult; + } + PendingMembers.clear(); + return true; } // Mark all regs in the class as unavailable for (auto Reg : RegList) State.AllocateReg(Reg); - const AArch64Subtarget &Subtarget = static_cast( - State.getMachineFunction().getSubtarget()); unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8; return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h index 13cc0c583fd2..5a55d090d7c8 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.h +++ b/lib/Target/AArch64/AArch64CallingConvention.h @@ -25,6 +25,9 @@ bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); +bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index d969a9e1ab3a..bccbbd4591ed 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -17,6 +17,10 @@ class CCIfAlign : class CCIfBigEndian : CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; +class CCIfILP32 : + CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; + + //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// @@ -70,6 +74,18 @@ def CC_AArch64_AAPCS : CallingConv<[ CCIfConsecutiveRegs>, + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCPassIndirect>, + + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>>, + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCPassIndirect>, + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. CCIfType<[i1, i8, i16], CCPromoteToType>, @@ -111,6 +127,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, + CCIfConsecutiveRegs>, CCIfSwiftError>>, // Big endian vectors must be passed as if they were 1-element vectors so that @@ -135,7 +152,14 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>> ]>; // Vararg functions on windows pass floats in integer registers @@ -202,6 +226,12 @@ def CC_AArch64_DarwinPCS : CallingConv<[ CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Re-demote pointers to 32-bits so we don't end up storing 64-bit + // values and clobbering neighbouring stack locations. Not very pretty. + CCIfPtr>>, + CCIfPtr>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], CCAssignToStack<8, 8>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], @@ -229,6 +259,29 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ CCAssignToStack<16, 16>> ]>; +// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the +// same as the normal Darwin VarArgs handling. +let Entry = 1 in +def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // Handle all scalar types as either i32 or f32. + CCIfType<[i8, i16], CCPromoteToType>, + CCIfType<[f16], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfPtr>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + + // The WebKit_JS calling convention only passes the first argument (the callee) // in register and the remaining arguments on stack. We allow 32bit stack slots, // so that WebKit can write partial values in the stack and define the other @@ -298,6 +351,12 @@ def CC_AArch64_GHC : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> ]>; +// The order of the callee-saves in this file is important, because the +// FrameLowering code will use this order to determine the layout the +// callee-save area in the stack frame. As can be observed below, Darwin +// requires the frame-record (LR, FP) to be at the top the callee-save area, +// whereas for other platforms they are at the bottom. + // FIXME: LR is only callee-saved in the sense that *we* preserve it and are // presumably a callee to someone. External functions may not do so, but this // is currently safe since BL has LR as an implicit-def and what happens after a @@ -306,7 +365,13 @@ def CC_AArch64_GHC : CallingConv<[ // It would be better to model its preservation semantics properly (create a // vreg on entry, use it in RET & tail call generation; make that vreg def if we // end up saving LR as part of a call frame). Watch this space... -def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +// Darwin puts the frame-record at the top of the callee-save area. +def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, D8, D9, D10, D11, D12, D13, D14, D15)>; @@ -314,17 +379,24 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, // Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. // We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, // and not (LR,FP) pairs. -def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, FP, LR, D8, D9, D10, D11, D12, D13, D14, D15)>; // AArch64 PCS for vector functions (VPCS) // must (additionally) preserve full Q8-Q23 registers -def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, (sequence "Q%u", 8, 23))>; +// Functions taking SVE arguments or returning an SVE type +// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15 +def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, + (sequence "Z%u", 8, 23), + (sequence "P%u", 4, 15))>; + // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since // 'this' and the pointer return value are both passed in X0 in these cases, // this can be partially modelled by treating X0 as a callee-saved register; @@ -336,7 +408,7 @@ def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; def CSR_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>; // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the @@ -352,7 +424,7 @@ def CSR_AArch64_TLS_Darwin // fast path calls a function that follows CSR_AArch64_TLS_Darwin, // CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. def CSR_AArch64_CXX_TLS_Darwin - : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), (sequence "D%u", 0, 31))>; diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index 9f324b433209..35e6fef24363 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -103,6 +103,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -181,6 +182,7 @@ static bool canDefBePartOfLOH(const MachineInstr &MI) { case AArch64::ADDXri: return canAddBePartOfLOH(MI); case AArch64::LDRXui: + case AArch64::LDRWui: // Check immediate to see if the immediate is an address. switch (MI.getOperand(2).getType()) { default: @@ -312,7 +314,8 @@ static void handleUse(const MachineInstr &MI, const MachineOperand &MO, Info.Type = MCLOH_AdrpAdd; Info.IsCandidate = true; Info.MI0 = &MI; - } else if (MI.getOpcode() == AArch64::LDRXui && + } else if ((MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRWui) && MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { Info.Type = MCLOH_AdrpLdrGot; Info.IsCandidate = true; @@ -357,7 +360,9 @@ static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo, return true; } } else { - assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui"); + assert((MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRWui) && + "Expect LDRXui or LDRWui"); assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && "Expected GOT relocation"); if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { @@ -474,13 +479,23 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { handleClobber(LOHInfos[Idx]); } // Handle uses. + + SmallSet UsesSeen; for (const MachineOperand &MO : MI.uses()) { if (!MO.isReg() || !MO.readsReg()) continue; int Idx = mapRegToGPRIndex(MO.getReg()); if (Idx < 0) continue; - handleUse(MI, MO, LOHInfos[Idx]); + + // Multiple uses of the same register within a single instruction don't + // count as MultiUser or block optimization. This is especially important on + // arm64_32, where any memory operation is likely to be an explicit use of + // xN and an implicit use of wN (the base address register). + if (!UsesSeen.count(Idx)) { + handleUse(MI, MO, LOHInfos[Idx]); + UsesSeen.insert(Idx); + } } } @@ -512,6 +527,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { switch (Opcode) { case AArch64::ADDXri: case AArch64::LDRXui: + case AArch64::LDRWui: if (canDefBePartOfLOH(MI)) { const MachineOperand &Def = MI.getOperand(0); const MachineOperand &Op = MI.getOperand(1); diff --git a/lib/Target/AArch64/AArch64Combine.td b/lib/Target/AArch64/AArch64Combine.td new file mode 100644 index 000000000000..bb99f2516ecf --- /dev/null +++ b/lib/Target/AArch64/AArch64Combine.td @@ -0,0 +1,18 @@ +//=- AArch64.td - Define AArch64 Combine Rules ---------------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/GlobalISel/Combine.td" + +def AArch64PreLegalizerCombinerHelper: GICombinerHelper< + "AArch64GenPreLegalizerCombinerHelper", [all_combines, + elide_br_by_inverting_cond]> { + let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; +} diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp index 453132e09669..25e23e4623de 100644 --- a/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -78,7 +78,7 @@ void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const { } MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) { - if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!Register::isVirtualRegister(MO.getReg())) return nullptr; return MRI->getUniqueVRegDef(MO.getReg()); } @@ -98,7 +98,7 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, } bool Is64Bit; unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit); - unsigned NewDestReg = MI.getOperand(0).getReg(); + Register NewDestReg = MI.getOperand(0).getReg(); if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 2cfbcc592d6a..43ae9f8ec47f 100644 --- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -220,7 +220,7 @@ bool SSACCmpConv::trivialTailPHIs() { // PHI operands come in (VReg, MBB) pairs. for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) { MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB(); - unsigned Reg = I.getOperand(oi).getReg(); + Register Reg = I.getOperand(oi).getReg(); if (MBB == Head) { assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands"); HeadReg = Reg; @@ -259,7 +259,7 @@ bool SSACCmpConv::isDeadDef(unsigned DstReg) { // Writes to the zero register are dead. if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) return true; - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + if (!Register::isVirtualRegister(DstReg)) return false; // A virtual register def without any uses will be marked dead later, and // eventually replaced by the zero register. @@ -631,7 +631,7 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { } const MCInstrDesc &MCID = TII->get(Opc); // Create a dummy virtual register for the SUBS def. - unsigned DestReg = + Register DestReg = MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. BuildMI(*Head, Head->end(), TermDL, MCID) diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index a43077cb88ec..bc3808df1dbc 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -145,8 +145,8 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( continue; // We should not have any relevant physreg defs that are replacable by // zero before register allocation. So we just check for dead vreg defs. - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg) || + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg) || (!MO.isDead() && !MRI->use_nodbg_empty(Reg))) continue; assert(!MO.isImplicit() && "Unexpected implicit def!"); diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 210c10eb1842..082e17e44d04 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -109,7 +109,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned BitSize) { MachineInstr &MI = *MBBI; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); uint64_t Imm = MI.getOperand(1).getImm(); if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { @@ -150,7 +150,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, } break; case AArch64::MOVKWi: case AArch64::MOVKXi: { - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) .addReg(DstReg, @@ -174,14 +174,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP( MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); + Register StatusReg = MI.getOperand(1).getReg(); bool StatusDead = MI.getOperand(1).isDead(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned DesiredReg = MI.getOperand(3).getReg(); - unsigned NewReg = MI.getOperand(4).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register DesiredReg = MI.getOperand(3).getReg(); + Register NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -254,16 +254,16 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( DebugLoc DL = MI.getDebugLoc(); MachineOperand &DestLo = MI.getOperand(0); MachineOperand &DestHi = MI.getOperand(1); - unsigned StatusReg = MI.getOperand(2).getReg(); + Register StatusReg = MI.getOperand(2).getReg(); bool StatusDead = MI.getOperand(2).isDead(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); - unsigned AddrReg = MI.getOperand(3).getReg(); - unsigned DesiredLoReg = MI.getOperand(4).getReg(); - unsigned DesiredHiReg = MI.getOperand(5).getReg(); - unsigned NewLoReg = MI.getOperand(6).getReg(); - unsigned NewHiReg = MI.getOperand(7).getReg(); + Register AddrReg = MI.getOperand(3).getReg(); + Register DesiredLoReg = MI.getOperand(4).getReg(); + Register DesiredHiReg = MI.getOperand(5).getReg(); + Register NewLoReg = MI.getOperand(6).getReg(); + Register NewHiReg = MI.getOperand(7).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -475,7 +475,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case AArch64::LOADgot: { MachineFunction *MF = MBB.getParent(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); const MachineOperand &MO1 = MI.getOperand(1); unsigned Flags = MO1.getTargetFlags(); @@ -495,12 +495,26 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, } } else { // Small codemodel expand into ADRP + LDR. + MachineFunction &MF = *MI.getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) - .add(MI.getOperand(0)) - .addReg(DstReg); + + MachineInstrBuilder MIB2; + if (MF.getSubtarget().isTargetILP32()) { + auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); + unsigned DstFlags = MI.getOperand(0).getTargetFlags(); + MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) + .addDef(Reg32) + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, DstFlags | RegState::Implicit); + } else { + unsigned DstReg = MI.getOperand(0).getReg(); + MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) + .add(MI.getOperand(0)) + .addUse(DstReg, RegState::Kill); + } if (MO1.isGlobal()) { MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); @@ -534,11 +548,28 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case AArch64::MOVaddrTLS: case AArch64::MOVaddrEXT: { // Expand into ADRP + ADD. - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) .add(MI.getOperand(1)); + if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { + // MO_TAGGED on the page indicates a tagged address. Set the tag now. + // We do so by creating a MOVK that sets bits 48-63 of the register to + // (global address + 0x100000000 - PC) >> 48. This assumes that we're in + // the small code model so we can assume a binary size of <= 4GB, which + // makes the untagged PC relative offset positive. The binary must also be + // loaded into address range [0, 2^48). Both of these properties need to + // be ensured at runtime when using tagged addresses. + auto Tag = MI.getOperand(1); + Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); + Tag.setOffset(0x100000000); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) + .addReg(DstReg) + .add(Tag) + .addImm(48); + } + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) .add(MI.getOperand(0)) @@ -561,7 +592,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return true; case AArch64::MOVbaseTLS: { - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); auto SysReg = AArch64SysReg::TPIDR_EL0; MachineFunction *MF = MBB.getParent(); if (MF->getTarget().getTargetTriple().isOSFuchsia() && @@ -642,11 +673,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, // instruction sequence. int BaseOffset = -AFI->getTaggedBasePointerOffset(); unsigned FrameReg; - int FrameRegOffset = TFI->resolveFrameOffsetReference( - MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false, + StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( + MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, + /*PreferFP=*/false, /*ForSimm=*/true); Register SrcReg = FrameReg; - if (FrameRegOffset != 0) { + if (FrameRegOffset) { // Use output register as temporary. SrcReg = MI.getOperand(0).getReg(); emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index 3b3182128c4c..b54fc2e51bac 100644 --- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -642,7 +642,7 @@ static Optional getLoadInfo(const MachineInstr &MI) { } // Loads from the stack pointer don't get prefetched. - unsigned BaseReg = MI.getOperand(BaseRegIdx).getReg(); + Register BaseReg = MI.getOperand(BaseRegIdx).getReg(); if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP) return None; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 8dc2768b9597..277a3052f1e5 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -459,7 +459,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) return 0; - unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); + unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); if (!DestEVT.isSimple()) @@ -474,12 +474,32 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { ADRPReg) .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); - ResultReg = createResultReg(&AArch64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), + unsigned LdrOpc; + if (Subtarget->isTargetILP32()) { + ResultReg = createResultReg(&AArch64::GPR32RegClass); + LdrOpc = AArch64::LDRWui; + } else { + ResultReg = createResultReg(&AArch64::GPR64RegClass); + LdrOpc = AArch64::LDRXui; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags); + .addReg(ADRPReg) + .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC | OpFlags); + if (!Subtarget->isTargetILP32()) + return ResultReg; + + // LDRWui produces a 32-bit register, but pointers in-register are 64-bits + // so we must extend the result on ILP32. + unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::SUBREG_TO_REG)) + .addDef(Result64) + .addImm(0) + .addReg(ResultReg, RegState::Kill) + .addImm(AArch64::sub_32); + return Result64; } else { // ADRP + ADDX BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), @@ -504,6 +524,15 @@ unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); + // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, + // 'null' pointers need to have a somewhat special treatment. + if (const auto *CPN = dyn_cast(C)) { + (void)CPN; + assert(CPN->getType()->getPointerAddressSpace() == 0 && + "Unexpected address space"); + assert(VT == MVT::i64 && "Expected 64-bit pointers"); + return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); + } if (const auto *CI = dyn_cast(C)) return materializeInt(CI, VT); @@ -946,6 +975,9 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(DL, Ty, true); + if (Subtarget->isTargetILP32() && Ty->isPointerTy()) + return false; + // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; @@ -988,6 +1020,9 @@ bool AArch64FastISel::isValueAvailable(const Value *V) const { } bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { + if (Subtarget->isTargetILP32()) + return false; + unsigned ScaleFactor = getImplicitScaleFactor(VT); if (!ScaleFactor) return false; @@ -3165,6 +3200,11 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (IsTailCall) return false; + // FIXME: we could and should support this, but for now correctness at -O0 is + // more important. + if (Subtarget->isTargetILP32()) + return false; + CodeModel::Model CM = TM.getCodeModel(); // Only support the small-addressing and large code models. if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) @@ -3434,8 +3474,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { MFI.setFrameAddressIsTaken(true); const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); - unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); - unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); + Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); // Recursively load frame address @@ -3796,6 +3836,11 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + // FIXME: in principle it could. Mostly just a case of zero extending outgoing + // pointers. + if (Subtarget->isTargetILP32()) + return false; + if (F.isVarArg()) return false; @@ -3842,7 +3887,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - unsigned DestReg = VA.getLocReg(); + Register DestReg = VA.getLocReg(); // Avoid a cross-class copy. This is very unlikely. if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; @@ -3970,7 +4015,7 @@ unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { if (DestVT == MVT::i64) { // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. - unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), Reg64) .addImm(0) @@ -4123,7 +4168,7 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { - unsigned TmpReg = MRI.createVirtualRegister(RC); + Register TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), TmpReg) .addImm(0) @@ -4244,7 +4289,7 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { - unsigned TmpReg = MRI.createVirtualRegister(RC); + Register TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), TmpReg) .addImm(0) @@ -4353,7 +4398,7 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { - unsigned TmpReg = MRI.createVirtualRegister(RC); + Register TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), TmpReg) .addImm(0) @@ -4412,7 +4457,7 @@ unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, if (DestVT == MVT::i8 || DestVT == MVT::i16) DestVT = MVT::i32; else if (DestVT == MVT::i64) { - unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), Src64) .addImm(0) @@ -4495,7 +4540,7 @@ bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, const auto *LoadMI = MI; if (LoadMI->getOpcode() == TargetOpcode::COPY && LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { - unsigned LoadReg = MI->getOperand(1).getReg(); + Register LoadReg = MI->getOperand(1).getReg(); LoadMI = MRI.getUniqueVRegDef(LoadReg); assert(LoadMI && "Expected valid instruction"); } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 8c6e5cbd5c13..68e1e6a30224 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -44,11 +44,19 @@ // | | // |-----------------------------------| // | | -// | prev_fp, prev_lr | +// | callee-saved gpr registers | <--. +// | | | On Darwin platforms these +// |- - - - - - - - - - - - - - - - - -| | callee saves are swapped, +// | | | (frame record first) +// | prev_fp, prev_lr | <--' // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) // | | -// | other callee-saved registers | +// | callee-saved fp/simd/SVE regs | +// | | +// |-----------------------------------| +// | | +// | SVE stack objects | // | | // |-----------------------------------| // |.empty.space.to.make.part.below....| @@ -80,6 +88,20 @@ // * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // +// For Darwin platforms the frame-record (fp, lr) is stored at the top of the +// callee-saved area, since the unwind encoding does not allow for encoding +// this dynamically and existing tools depend on this layout. For other +// platforms, the frame-record is stored at the bottom of the (gpr) callee-saved +// area to allow SVE stack objects (allocated directly below the callee-saves, +// if available) to be accessed directly from the framepointer. +// The SVE spill/fill instructions have VL-scaled addressing modes such +// as: +// ldr z8, [fp, #-7 mul vl] +// For SVE the size of the vector length (VL) is not known at compile-time, so +// '#-7 mul vl' is an offset that can only be evaluated at runtime. With this +// layout, we don't need to add an unscaled offset to the framepointer before +// accessing the SVE object in the frame. +// // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads @@ -94,6 +116,7 @@ #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" +#include "AArch64StackOffset.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -173,7 +196,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { if (!MO.isFI()) continue; - int Offset = 0; + StackOffset Offset; if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == AArch64FrameOffsetCannotUpdate) return 0; @@ -183,6 +206,12 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { return DefaultSafeSPDisplacement; } +/// Returns the size of the entire SVE stackframe (calleesaves + spills). +static StackOffset getSVEStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8}; +} + bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -195,7 +224,8 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo(); unsigned NumBytes = AFI->getLocalStackSize(); - return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128); + return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 || + getSVEStackSize(MF)); } /// hasFP - Return true if the specified function should have a dedicated frame @@ -273,14 +303,15 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( // Most call frames will be allocated at the start of a function so // this is OK, but it is a limitation that needs dealing with. assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); - emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8}, + TII); } } else if (CalleePopAmount != 0) { // If the calling convention demands that the callee pops arguments from the // stack, we want to add it back if we have a reserved call frame. assert(CalleePopAmount < 0xffffff && "call frame too large"); - emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, - TII); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, + {-(int64_t)CalleePopAmount, MVT::i8}, TII); } return MBB.erase(I); } @@ -416,6 +447,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + if (MF.getFunction().hasOptSize()) + return false; + if (AFI->getLocalStackSize() == 0) return false; @@ -436,6 +470,11 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( if (canUseRedZone(MF)) return false; + // When there is an SVE area on the stack, always allocate the + // callee-saves and spills/locals separately. + if (getSVEStackSize(MF)) + return false; + return true; } @@ -474,8 +513,8 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, Imm = -Imm; LLVM_FALLTHROUGH; case AArch64::STPXpre: { - unsigned Reg0 = MBBI->getOperand(1).getReg(); - unsigned Reg1 = MBBI->getOperand(2).getReg(); + Register Reg0 = MBBI->getOperand(1).getReg(); + Register Reg1 = MBBI->getOperand(2).getReg(); if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) .addImm(Imm * 8) @@ -523,8 +562,8 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, } case AArch64::STPXi: case AArch64::LDPXi: { - unsigned Reg0 = MBBI->getOperand(0).getReg(); - unsigned Reg1 = MBBI->getOperand(1).getReg(); + Register Reg0 = MBBI->getOperand(0).getReg(); + Register Reg1 = MBBI->getOperand(1).getReg(); if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) .addImm(Imm * 8) @@ -791,6 +830,10 @@ static bool needsWinCFI(const MachineFunction &MF) { F.needsUnwindTableEntry(); } +static bool isTargetDarwin(const MachineFunction &MF) { + return MF.getSubtarget().isTargetDarwin(); +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -846,6 +889,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Ideally it should match SP value after prologue. AFI->setTaggedBasePointerOffset(MFI.getStackSize()); + const StackOffset &SVEStackSize = getSVEStackSize(MF); + // getStackSize() includes all the locals in its size calculation. We don't // include these locals when computing the stack size of a funclet, as they // are allocated in the parent's stack frame and accessed via the frame @@ -856,6 +901,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, : (int)MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); + assert(!SVEStackSize && + "unexpected function without stack frame but with SVE objects"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); if (!NumBytes) @@ -866,8 +913,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, AFI->setHasRedZone(true); ++NumRedZoneFunctions; } else { - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, + {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, + false, NeedsWinCFI, &HasWinCFI); if (!NeedsWinCFI) { // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); @@ -901,8 +949,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, AFI->setLocalStackSize(NumBytes - PrologueSaveSize); bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); if (CombineSPBump) { - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, + {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false, + NeedsWinCFI, &HasWinCFI); NumBytes = 0; } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( @@ -948,9 +998,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } if (HasFP) { - // Only set up FP if we actually need to. Frame pointer is fp = - // sp - fixedobject - 16. - int FPOffset = AFI->getCalleeSavedStackSize() - 16; + // Only set up FP if we actually need to. + int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; + if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); @@ -958,8 +1008,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. - emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, + {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false, + NeedsWinCFI, &HasWinCFI); } if (windowsRequiresStackProbe(MF, NumBytes)) { @@ -1056,6 +1107,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, NumBytes = 0; } + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII, + MachineInstr::FrameSetup); + // Allocate space for the rest of the frame. if (NumBytes) { const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); @@ -1071,8 +1125,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. - emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, + {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, + false, NeedsWinCFI, &HasWinCFI); if (NeedsRealignment) { const unsigned Alignment = MFI.getMaxAlignment(); @@ -1130,8 +1185,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); - const int StackGrowth = -TD.getPointerSize(0); - unsigned FramePtr = RegInfo->getFrameRegister(MF); + const int StackGrowth = isTargetDarwin(MF) + ? (2 * -TD.getPointerSize(0)) + : -AFI->getCalleeSavedStackSize(); + Register FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo @@ -1202,7 +1259,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, Reg, 2 * StackGrowth - FixedObject)); + nullptr, Reg, StackGrowth - FixedObject)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1401,11 +1458,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameDestroy); } + const StackOffset &SVEStackSize = getSVEStackSize(MF); + // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { + assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, - NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy, - false, NeedsWinCFI, &HasWinCFI); + {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (NeedsWinCFI && HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) @@ -1416,6 +1476,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); + // Deallocate the SVE area. + if (SVEStackSize) + if (!AFI->isStackRealigned()) + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize, + TII, MachineInstr::FrameDestroy); + if (!hasFP(MF)) { bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the @@ -1437,8 +1503,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackRestoreBytes, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI); + {StackRestoreBytes, MVT::i8}, TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (Done) { if (NeedsWinCFI) { HasWinCFI = true; @@ -1456,13 +1522,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. - if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) + if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { + int64_t OffsetToFrameRecord = + isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0; emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - -AFI->getCalleeSavedStackSize() + 16, TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI); - else if (NumBytes) - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI); + {OffsetToFrameRecord, MVT::i8}, + TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); + } else if (NumBytes) + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save @@ -1483,8 +1552,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI); + {(int64_t)AfterCSRPopSize, MVT::i8}, TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } if (NeedsWinCFI && HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) @@ -1501,10 +1570,11 @@ int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { return resolveFrameIndexReference( - MF, FI, FrameReg, - /*PreferFP=*/ - MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), - /*ForSimm=*/false); + MF, FI, FrameReg, + /*PreferFP=*/ + MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), + /*ForSimm=*/false) + .getBytes(); } int AArch64FrameLowering::getNonLocalFrameIndexReference( @@ -1512,18 +1582,19 @@ int AArch64FrameLowering::getNonLocalFrameIndexReference( return getSEHFrameIndexOffset(MF, FI); } -static int getFPOffset(const MachineFunction &MF, int ObjectOffset) { +static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) { const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; - return ObjectOffset + FixedObject + 16; + unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize(); + return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; } -static int getStackOffset(const MachineFunction &MF, int ObjectOffset) { +static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) { const auto &MFI = MF.getFrameInfo(); - return ObjectOffset + MFI.getStackSize(); + return {ObjectOffset + (int)MFI.getStackSize(), MVT::i8}; } int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, @@ -1532,23 +1603,23 @@ int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, MF.getSubtarget().getRegisterInfo()); int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI); return RegInfo->getLocalAddressRegister(MF) == AArch64::FP - ? getFPOffset(MF, ObjectOffset) - : getStackOffset(MF, ObjectOffset); + ? getFPOffset(MF, ObjectOffset).getBytes() + : getStackOffset(MF, ObjectOffset).getBytes(); } -int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, - int FI, unsigned &FrameReg, - bool PreferFP, - bool ForSimm) const { +StackOffset AArch64FrameLowering::resolveFrameIndexReference( + const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP, + bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); int ObjectOffset = MFI.getObjectOffset(FI); bool isFixed = MFI.isFixedObjectIndex(FI); - return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg, + bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector; + return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg, PreferFP, ForSimm); } -int AArch64FrameLowering::resolveFrameOffsetReference( - const MachineFunction &MF, int ObjectOffset, bool isFixed, +StackOffset AArch64FrameLowering::resolveFrameOffsetReference( + const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE, unsigned &FrameReg, bool PreferFP, bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); const auto *RegInfo = static_cast( @@ -1556,17 +1627,23 @@ int AArch64FrameLowering::resolveFrameOffsetReference( const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); - int FPOffset = getFPOffset(MF, ObjectOffset); - int Offset = getStackOffset(MF, ObjectOffset); + int FPOffset = getFPOffset(MF, ObjectOffset).getBytes(); + int Offset = getStackOffset(MF, ObjectOffset).getBytes(); bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize()); + const StackOffset &SVEStackSize = getSVEStackSize(MF); + // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't // reliable as a base). Make sure useFPForScavengingIndex() does the // right thing for the emergency spill slot. bool UseFP = false; - if (AFI->hasStackFrame()) { + if (AFI->hasStackFrame() && !isSVE) { + // We shouldn't prefer using the FP when there is an SVE area + // in between the FP and the non-SVE locals/spills. + PreferFP &= !SVEStackSize; + // Note: Keeping the following as multiple 'if' statements rather than // merging to a single expression for readability. // @@ -1594,8 +1671,10 @@ int AArch64FrameLowering::resolveFrameOffsetReference( bool CanUseBP = RegInfo->hasBasePointer(MF); if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. UseFP = PreferFP; - else if (!CanUseBP) // Can't use BP. Forced to use FP. + else if (!CanUseBP) { // Can't use BP. Forced to use FP. + assert(!SVEStackSize && "Expected BP to be available"); UseFP = true; + } // else we can use BP and FP, but the offset from FP won't fit. // That will make us scavenge registers which we can probably avoid by // using BP. If it won't fit for BP either, we'll scavenge anyway. @@ -1625,9 +1704,36 @@ int AArch64FrameLowering::resolveFrameOffsetReference( "In the presence of dynamic stack pointer realignment, " "non-argument/CSR objects cannot be accessed through the frame pointer"); + if (isSVE) { + int64_t OffsetToSVEArea = + MFI.getStackSize() - AFI->getCalleeSavedStackSize(); + StackOffset FPOffset = {ObjectOffset, MVT::nxv1i8}; + StackOffset SPOffset = SVEStackSize + + StackOffset(ObjectOffset, MVT::nxv1i8) + + StackOffset(OffsetToSVEArea, MVT::i8); + // Always use the FP for SVE spills if available and beneficial. + if (hasFP(MF) && + (SPOffset.getBytes() || + FPOffset.getScalableBytes() < SPOffset.getScalableBytes() || + RegInfo->needsStackRealignment(MF))) { + FrameReg = RegInfo->getFrameRegister(MF); + return FPOffset; + } + + FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() + : (unsigned)AArch64::SP; + return SPOffset; + } + + StackOffset ScalableOffset = {}; + if (UseFP && !(isFixed || isCSR)) + ScalableOffset = -SVEStackSize; + if (!UseFP && (isFixed || isCSR)) + ScalableOffset = SVEStackSize; + if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); - return FPOffset; + return StackOffset(FPOffset, MVT::i8) + ScalableOffset; } // Use the base pointer if we have one. @@ -1644,7 +1750,7 @@ int AArch64FrameLowering::resolveFrameOffsetReference( Offset -= AFI->getLocalStackSize(); } - return Offset; + return StackOffset(Offset, MVT::i8) + ScalableOffset; } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { @@ -1682,6 +1788,23 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, return true; } +/// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction. +/// WindowsCFI requires that only consecutive registers can be paired. +/// LR and FP need to be allocated together when the frame needs to save +/// the frame-record. This means any other register pairing with LR is invalid. +static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, + bool NeedsWinCFI, bool NeedsFrameRecord) { + if (NeedsWinCFI) + return invalidateWindowsRegisterPairing(Reg1, Reg2, true); + + // If we need to store the frame record, don't pair any register + // with LR other than FP. + if (NeedsFrameRecord) + return Reg2 == AArch64::LR; + + return false; +} + namespace { struct RegPairInfo { @@ -1701,7 +1824,7 @@ struct RegPairInfo { static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs, - bool &NeedShadowCallStackProlog) { + bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) { if (CSI.empty()) return; @@ -1743,7 +1866,8 @@ static void computeCalleeSaveRegisterPairs( switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && - !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) + !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, + NeedsFrameRecord)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: @@ -1777,6 +1901,10 @@ static void computeCalleeSaveRegisterPairs( (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); + assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP || + RPI.Reg1 == AArch64::LR) && + "FrameRecord must be allocated together with LR"); + // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!produceCompactUnwindFrame(MF) || @@ -1825,7 +1953,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog); + NeedShadowCallStackProlog, hasFP(MF)); const MachineRegisterInfo &MRI = MF.getRegInfo(); if (NeedShadowCallStackProlog) { @@ -1955,7 +2083,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog); + NeedShadowCallStackProlog, hasFP(MF)); auto EmitMI = [&](const RegPairInfo &RPI) { unsigned Reg1 = RPI.Reg1; @@ -2113,19 +2241,26 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(AArch64::LR); } - LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; + LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:"; for (unsigned Reg : SavedRegs.set_bits()) dbgs() << ' ' << printReg(Reg, RegInfo); dbgs() << "\n";); // If any callee-saved registers are used, the frame cannot be eliminated. - bool CanEliminateFrame = SavedRegs.count() == 0; + unsigned MaxAlign = getStackAlignment(); + int64_t SVEStackSize = + alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign); + assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes"); + bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize; // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); - bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit; + + // Conservatively always assume BigStack when there are SVE spills. + bool BigStack = SVEStackSize || + (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); @@ -2145,7 +2280,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // store the pair. if (produceCompactUnwindFrame(MF)) SavedRegs.set(UnspilledCSGPRPaired); - ExtraCSSpill = UnspilledCSGPRPaired; + ExtraCSSpill = UnspilledCSGPR; } // If we didn't find an extra callee-saved register to spill, create @@ -2181,14 +2316,42 @@ bool AArch64FrameLowering::enableStackSlotScavenging( return AFI->hasCalleeSaveStackFreeSpace(); } +int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI, + unsigned &MaxAlign) const { + // Process all fixed stack objects. + int64_t Offset = 0; + for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) + if (MFI.getStackID(I) == TargetStackID::SVEVector) { + int64_t FixedOffset = -MFI.getObjectOffset(I); + if (FixedOffset > Offset) + Offset = FixedOffset; + } + + // Note: We don't take allocatable stack objects into + // account yet, because allocation for those is not yet + // implemented. + return Offset; +} + void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + + assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && + "Upwards growing stack unsupported"); + + unsigned MaxAlign = getStackAlignment(); + int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign); + + AArch64FunctionInfo *AFI = MF.getInfo(); + AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign)); + assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes"); + // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. if (!MF.hasEHFunclets()) return; const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - MachineFrameInfo &MFI = MF.getFrameInfo(); WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); MachineBasicBlock &MBB = MF.front(); diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 6dbd34b2189f..ac150e86c9eb 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H +#include "AArch64StackOffset.h" #include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { @@ -20,7 +21,7 @@ namespace llvm { class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() - : TargetFrameLowering(StackGrowsDown, 16, 0, 16, + : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16), true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, @@ -39,12 +40,13 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; - int resolveFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg, bool PreferFP, - bool ForSimm) const; - int resolveFrameOffsetReference(const MachineFunction &MF, int ObjectOffset, - bool isFixed, unsigned &FrameReg, - bool PreferFP, bool ForSimm) const; + StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg, bool PreferFP, + bool ForSimm) const; + StackOffset resolveFrameOffsetReference(const MachineFunction &MF, + int ObjectOffset, bool isFixed, + bool isSVE, unsigned &FrameReg, + bool PreferFP, bool ForSimm) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, @@ -85,9 +87,21 @@ public: int FI) const override; int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const; + bool isSupportedStackID(TargetStackID::Value ID) const override { + switch (ID) { + default: + return false; + case TargetStackID::Default: + case TargetStackID::SVEVector: + case TargetStackID::NoAlloc: + return true; + } + } + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, unsigned StackBumpBytes) const; + int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const; }; } // End llvm namespace diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index cd7e927ac80c..1f08505f37e7 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2053,7 +2053,7 @@ static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, } static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { - if (Depth >= 6) + if (Depth >= SelectionDAG::MaxRecursionDepth) return; // Initialize UsefulBits if (!Depth) { @@ -2913,49 +2913,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { return; break; - case ISD::EXTRACT_VECTOR_ELT: { - // Extracting lane zero is a special case where we can just use a plain - // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for - // the rest of the compiler, especially the register allocator and copyi - // propagation, to reason about, so is preferred when it's possible to - // use it. - ConstantSDNode *LaneNode = cast(Node->getOperand(1)); - // Bail and use the default Select() for non-zero lanes. - if (LaneNode->getZExtValue() != 0) - break; - // If the element type is not the same as the result type, likewise - // bail and use the default Select(), as there's more to do than just - // a cross-class COPY. This catches extracts of i8 and i16 elements - // since they will need an explicit zext. - if (VT != Node->getOperand(0).getValueType().getVectorElementType()) - break; - unsigned SubReg; - switch (Node->getOperand(0) - .getValueType() - .getVectorElementType() - .getSizeInBits()) { - default: - llvm_unreachable("Unexpected vector element type!"); - case 64: - SubReg = AArch64::dsub; - break; - case 32: - SubReg = AArch64::ssub; - break; - case 16: - SubReg = AArch64::hsub; - break; - case 8: - llvm_unreachable("unexpected zext-requiring extract element!"); - } - SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, - Node->getOperand(0)); - LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); - LLVM_DEBUG(Extract->dumpr(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); - ReplaceNode(Node, Extract.getNode()); - return; - } case ISD::Constant: { // Materialize zero constants as copies from WZR/XZR. This allows // the coalescer to propagate these into other instructions. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 7becc99fb5c7..2746117e8ee5 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -161,6 +162,29 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addQRTypeForNEON(MVT::v8f16); } + if (Subtarget->hasSVE()) { + // Add legal sve predicate types + addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass); + + // Add legal sve data types + addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass); + + addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv1f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass); + } + // Compute derived properties from the register classes computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -283,7 +307,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // AArch64 lacks both left-rotate and popcount instructions. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); } @@ -297,7 +321,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); } @@ -606,6 +630,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxLoadsPerMemcmpOptSize = 4; + MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() + ? MaxLoadsPerMemcmpOptSize : 8; + setStackPointerRegisterToSaveRestore(AArch64::SP); setSchedulingPreference(Sched::Hybrid); @@ -613,10 +641,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, EnableExtLdPromotion = true; // Set required alignment. - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(4)); // Set preferred alignments. - setPrefFunctionAlignment(STI.getPrefFunctionAlignment()); - setPrefLoopAlignment(STI.getPrefLoopAlignment()); + setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment())); + setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment())); // Only change the limit for entries in a jump table if specified by // the sub target, but not at the command line. @@ -725,7 +753,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); // Likewise, narrowing and extending vector loads/stores aren't handled // directly. - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) { @@ -741,7 +769,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); @@ -773,6 +801,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); } + if (Subtarget->hasSVE()) { + for (MVT VT : MVT::integer_scalable_vector_valuetypes()) { + if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1) + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + } + } + PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } @@ -1025,6 +1060,14 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( Known.One &= Known2.One; break; } + case AArch64ISD::LOADgot: + case AArch64ISD::ADDlow: { + if (!Subtarget->isTargetILP32()) + break; + // In ILP32 mode all valid pointers are in the low 4GB of the address-space. + Known.Zero = APInt::getHighBitsSet(64, 32); + break; + } case ISD::INTRINSIC_W_CHAIN: { ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); @@ -1100,6 +1143,32 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( return true; } +// Same as above but handling LLTs instead. +bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( + LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast) const { + if (Subtarget->requiresStrictAlign()) + return false; + + if (Fast) { + // Some CPUs are fine with unaligned stores except for 128-bit ones. + *Fast = !Subtarget->isMisaligned128StoreSlow() || + Ty.getSizeInBytes() != 16 || + // See comments in performSTORECombine() for more details about + // these conditions. + + // Code that uses clang vector extensions can mark that it + // wants unaligned accesses to be treated as fast by + // underspecifying alignment to be 1 or 2. + Align <= 2 || + + // Disregard v2i64. Memcpy lowering produces those and splitting + // them regresses performance on micro-benchmarks and olden/bh. + Ty == LLT::vector(2, 64); + } + return true; +} + FastISel * AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { @@ -1238,6 +1307,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::STZG: return "AArch64ISD::STZG"; case AArch64ISD::ST2G: return "AArch64ISD::ST2G"; case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G"; + case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI"; + case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO"; + case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI"; + case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO"; } return nullptr; } @@ -1263,9 +1336,9 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); MachineFunction::iterator It = ++MBB->getIterator(); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned IfTrueReg = MI.getOperand(1).getReg(); - unsigned IfFalseReg = MI.getOperand(2).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register IfTrueReg = MI.getOperand(1).getReg(); + Register IfFalseReg = MI.getOperand(2).getReg(); unsigned CondCode = MI.getOperand(3).getImm(); bool NZCVKilled = MI.getOperand(4).isKill(); @@ -2140,7 +2213,8 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { SmallVector Ops(Op->op_begin(), Op->op_end()); - return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; } // Returns true if the given Op is the overflow flag result of an overflow @@ -2349,7 +2423,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, // precise. That doesn't take part in the LibCall so we can't directly use // LowerF128Call. SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions, SDLoc(Op)).first; } @@ -2419,7 +2494,8 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); SmallVector Ops(Op->op_begin(), Op->op_end()); - return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first; + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -2773,6 +2849,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_sunpkhi: + return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_sunpklo: + return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_uunpkhi: + return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_uunpklo: + return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::localaddress: { const auto &MF = DAG.getMachineFunction(); const auto *RegInfo = Subtarget->getRegisterInfo(); @@ -2937,6 +3026,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SPLAT_VECTOR: + return LowerSPLAT_VECTOR(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::SRA: @@ -3014,8 +3105,11 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; - return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; - case CallingConv::Win64: + if (!IsVarArg) + return CC_AArch64_DarwinPCS; + return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg + : CC_AArch64_DarwinPCS_VarArg; + case CallingConv::Win64: return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; case CallingConv::AArch64_VectorCall: return CC_AArch64_AAPCS; @@ -3038,6 +3132,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // Assign locations to all of the incoming arguments. SmallVector ArgLocs; + DenseMap CopiedRegs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); @@ -3094,11 +3189,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments( continue; } + SDValue ArgValue; if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); - - SDValue ArgValue; const TargetRegisterClass *RC; if (RegVT == MVT::i32) @@ -3113,6 +3207,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments( RC = &AArch64::FPR64RegClass; else if (RegVT == MVT::f128 || RegVT.is128BitVector()) RC = &AArch64::FPR128RegClass; + else if (RegVT.isScalableVector() && + RegVT.getVectorElementType() == MVT::i1) + RC = &AArch64::PPRRegClass; + else if (RegVT.isScalableVector()) + RC = &AArch64::ZPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); @@ -3128,20 +3227,23 @@ SDValue AArch64TargetLowering::LowerFormalArguments( llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); break; case CCValAssign::AExt: case CCValAssign::SExt: case CCValAssign::ZExt: - // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt - // nodes after our lowering. - assert(RegVT == Ins[i].VT && "incorrect register location selected"); + break; + case CCValAssign::AExtUpper: + ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue, + DAG.getConstant(32, DL, RegVT)); + ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT()); break; } - - InVals.push_back(ArgValue); - } else { // VA.isRegLoc() assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); unsigned ArgOffset = VA.getLocMemOffset(); @@ -3156,7 +3258,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue ArgValue; // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; @@ -3165,9 +3266,14 @@ SDValue AArch64TargetLowering::LowerFormalArguments( switch (VA.getLocInfo()) { default: break; + case CCValAssign::Trunc: case CCValAssign::BCvt: MemVT = VA.getLocVT(); break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); case CCValAssign::SExt: ExtType = ISD::SEXTLOAD; break; @@ -3184,8 +3290,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), MemVT); - InVals.push_back(ArgValue); } + if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) + ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(), + ArgValue, DAG.getValueType(MVT::i32)); + InVals.push_back(ArgValue); } // varargs @@ -3202,8 +3311,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // This will point to the next argument passed via stack. unsigned StackOffset = CCInfo.getNextStackOffset(); - // We currently pass all varargs at 8-byte alignment. - StackOffset = ((StackOffset + 7) & ~7); + // We currently pass all varargs at 8-byte alignment, or 4 for ILP32 + StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); if (MFI.hasMustTailInVarArgFunc()) { @@ -3233,8 +3342,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( assert(!FuncInfo->getSRetReturnReg()); MVT PtrTy = getPointerTy(DAG.getDataLayout()); - unsigned Reg = - MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); + Register Reg = + MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); FuncInfo->setSRetReturnReg(Reg); SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]); @@ -3366,6 +3475,7 @@ SDValue AArch64TargetLowering::LowerCallResult( : RetCC_AArch64_AAPCS; // Assign locations to each value returned by this call. SmallVector RVLocs; + DenseMap CopiedRegs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC); @@ -3383,10 +3493,16 @@ SDValue AArch64TargetLowering::LowerCallResult( continue; } - SDValue Val = - DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); + // Avoid copying a physreg twice since RegAllocFast is incompetent and only + // allows one use of a physreg per block. + SDValue Val = CopiedRegs.lookup(VA.getLocReg()); + if (!Val) { + Val = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + CopiedRegs[VA.getLocReg()] = Val; + } switch (VA.getLocInfo()) { default: @@ -3396,6 +3512,15 @@ SDValue AArch64TargetLowering::LowerCallResult( case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; + case CCValAssign::AExtUpper: + Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val, + DAG.getConstant(32, DL, VA.getLocVT())); + LLVM_FALLTHROUGH; + case CCValAssign::AExt: + LLVM_FALLTHROUGH; + case CCValAssign::ZExt: + Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT()); + break; } InVals.push_back(Val); @@ -3593,6 +3718,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction::CallSiteInfo CSInfo; bool IsThisReturn = false; AArch64FunctionInfo *FuncInfo = MF.getInfo(); @@ -3709,6 +3835,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, getPointerTy(DAG.getDataLayout())); SmallVector, 8> RegsToPass; + SmallSet RegsUsed; SmallVector MemOpChains; auto PtrVT = getPointerTy(DAG.getDataLayout()); @@ -3716,7 +3843,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT); - RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val)); + RegsToPass.emplace_back(F.PReg, Val); } } @@ -3747,12 +3874,25 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::AExtUpper: + assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, VA.getLocVT())); + break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + Arg = DAG.getBitcast(VA.getLocVT(), Arg); + break; + case CCValAssign::Trunc: + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); break; case CCValAssign::FPExt: Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); } if (VA.isRegLoc()) { @@ -3764,7 +3904,33 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, "unexpected use of 'returned'"); IsThisReturn = true; } - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (RegsUsed.count(VA.getLocReg())) { + // If this register has already been used then we're trying to pack + // parts of an [N x i32] into an X-register. The extension type will + // take care of putting the two halves in the right place but we have to + // combine them. + SDValue &Bits = + std::find_if(RegsToPass.begin(), RegsToPass.end(), + [=](const std::pair &Elt) { + return Elt.first == VA.getLocReg(); + }) + ->second; + Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); + // Call site info is used for function's parameter entry value + // tracking. For now we track only simple cases when parameter + // is transferred through whole register. + CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(), + [&VA](MachineFunction::ArgRegPair ArgReg) { + return ArgReg.Reg == VA.getLocReg(); + }), + CSInfo.end()); + } else { + RegsToPass.emplace_back(VA.getLocReg(), Arg); + RegsUsed.insert(VA.getLocReg()); + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.EnableDebugEntryValues) + CSInfo.emplace_back(VA.getLocReg(), i); + } } else { assert(VA.isMemLoc()); @@ -3899,6 +4065,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegToPass.first, RegToPass.second.getValueType())); + // Check callee args/returns for SVE registers and set calling convention + // accordingly. + if (CallConv == CallingConv::C) { + bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ + return Out.VT.isScalableVector(); + }); + bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ + return In.VT.isScalableVector(); + }); + + if (CalleeInSVE || CalleeOutSVE) + CallConv = CallingConv::AArch64_SVE_VectorCall; + } + // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -3930,12 +4110,15 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // actual call instruction. if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); - return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); + SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); + DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); + return Ret; } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); InFlag = Chain.getValue(1); + DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0; @@ -3983,7 +4166,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Copy the result values into the output registers. SDValue Flag; - SmallVector RetOps(1, Chain); + SmallVector, 4> RetVals; + SmallSet RegsUsed; for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; @@ -4005,11 +4189,38 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; + case CCValAssign::AExt: + case CCValAssign::ZExt: + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); + break; + case CCValAssign::AExtUpper: + assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); + Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, VA.getLocVT())); + break; } - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); + if (RegsUsed.count(VA.getLocReg())) { + SDValue &Bits = + std::find_if(RetVals.begin(), RetVals.end(), + [=](const std::pair &Elt) { + return Elt.first == VA.getLocReg(); + }) + ->second; + Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); + } else { + RetVals.emplace_back(VA.getLocReg(), Arg); + RegsUsed.insert(VA.getLocReg()); + } + } + + SmallVector RetOps(1, Chain); + for (auto &RetVal : RetVals) { + Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag); Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + RetOps.push_back( + DAG.getRegister(RetVal.first, RetVal.second.getValueType())); } // Windows AArch64 ABIs require that for returning structs by value we copy @@ -4139,8 +4350,7 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); - unsigned char OpFlags = - Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); + unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); if (OpFlags != AArch64II::MO_NO_FLAG) assert(cast(Op)->getOffset() == 0 && @@ -4204,6 +4414,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, SDLoc DL(Op); MVT PtrVT = getPointerTy(DAG.getDataLayout()); + MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout()); const GlobalValue *GV = cast(Op)->getGlobal(); SDValue TLVPAddr = @@ -4214,13 +4425,15 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( - MVT::i64, DL, Chain, DescAddr, + PtrMemVT, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ 8, - MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant | - MachineMemOperand::MODereferenceable); + /* Alignment = */ PtrMemVT.getSizeInBits() / 8, + MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); Chain = FuncTLVGet.getValue(1); + // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer. + FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setAdjustsStack(true); @@ -4470,7 +4683,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // value of a libcall against zero, which is just what the rest of LowerBR_CC // is expecting to deal with. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -4736,7 +4949,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Handle f128 first, since one possible outcome is a normal integer // comparison which gets picked up by the next if statement. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, use it. if (!RHS.getNode()) { @@ -4798,7 +5011,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, // Handle f128 first, because it will result in a comparison of some RTLIB // call result against zero. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -5096,6 +5309,7 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, SDLoc DL(Op); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy(DAG.getDataLayout())); + FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout())); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV)); @@ -5202,15 +5416,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. SDLoc DL(Op); - unsigned VaListSize = - Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32; + unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; + unsigned VaListSize = (Subtarget->isTargetDarwin() || + Subtarget->isTargetWindows()) ? PtrSize : 32; const Value *DestSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); - return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), - Op.getOperand(2), - DAG.getConstant(VaListSize, DL, MVT::i32), - 8, false, false, false, MachinePointerInfo(DestSV), + return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize, + false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } @@ -5224,12 +5438,15 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); unsigned Align = Op.getConstantOperandVal(3); + unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8; auto PtrVT = getPointerTy(DAG.getDataLayout()); - - SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V)); + auto PtrMemVT = getPointerMemTy(DAG.getDataLayout()); + SDValue VAList = + DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V)); Chain = VAList.getValue(1); + VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT); - if (Align > 8) { + if (Align > MinSlotSize) { assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(Align - 1, DL, PtrVT)); @@ -5238,14 +5455,14 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { } Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); - uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); + unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); // Scalar integer and FP values smaller than 64 bits are implicitly extended // up to 64 bits. At the very least, we have to increase the striding of the // vaargs list to match this, and for FP values we need to introduce // FP_ROUND nodes as well. if (VT.isInteger() && !VT.isVector()) - ArgSize = 8; + ArgSize = std::max(ArgSize, MinSlotSize); bool NeedFPTrunc = false; if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { ArgSize = 8; @@ -5255,6 +5472,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Increment the pointer, VAList, to the next vaarg SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(ArgSize, DL, PtrVT)); + VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT); + // Store the incremented VAList to the legalized pointer SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V)); @@ -5284,10 +5503,15 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); SDValue FrameAddr = - DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT); + DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64); while (Depth--) FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); + + if (Subtarget->isTargetILP32()) + FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr, + DAG.getValueType(VT)); + return FrameAddr; } @@ -5306,9 +5530,9 @@ SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { - unsigned Reg = MatchRegisterName(RegName); +Register AArch64TargetLowering:: +getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const { + Register Reg = MatchRegisterName(RegName); if (AArch64::X1 <= Reg && Reg <= AArch64::X28) { const MCRegisterInfo *MRI = Subtarget->getRegisterInfo(); unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false); @@ -5653,6 +5877,21 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const { return "r"; } +enum PredicateConstraint { + Upl, + Upa, + Invalid +}; + +static PredicateConstraint parsePredicateConstraint(StringRef Constraint) { + PredicateConstraint P = PredicateConstraint::Invalid; + if (Constraint == "Upa") + P = PredicateConstraint::Upa; + if (Constraint == "Upl") + P = PredicateConstraint::Upl; + return P; +} + /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. AArch64TargetLowering::ConstraintType @@ -5661,19 +5900,30 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { switch (Constraint[0]) { default: break; - case 'z': - return C_Other; case 'x': case 'w': + case 'y': return C_RegisterClass; // An address with a single base register. Due to the way we // currently handle addresses it is the same as 'r'. case 'Q': return C_Memory; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'Y': + case 'Z': + return C_Immediate; + case 'z': case 'S': // A symbolic address return C_Other; } - } + } else if (parsePredicateConstraint(Constraint) != + PredicateConstraint::Invalid) + return C_RegisterClass; return TargetLowering::getConstraintType(Constraint); } @@ -5697,12 +5947,17 @@ AArch64TargetLowering::getSingleConstraintMatchWeight( break; case 'x': case 'w': + case 'y': if (type->isFloatingPointTy() || type->isVectorTy()) weight = CW_Register; break; case 'z': weight = CW_Constant; break; + case 'U': + if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid) + weight = CW_Register; + break; } return weight; } @@ -5719,6 +5974,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( case 'w': if (!Subtarget->hasFPARMv8()) break; + if (VT.isScalableVector()) + return std::make_pair(0U, &AArch64::ZPRRegClass); if (VT.getSizeInBits() == 16) return std::make_pair(0U, &AArch64::FPR16RegClass); if (VT.getSizeInBits() == 32) @@ -5733,9 +5990,25 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( case 'x': if (!Subtarget->hasFPARMv8()) break; + if (VT.isScalableVector()) + return std::make_pair(0U, &AArch64::ZPR_4bRegClass); if (VT.getSizeInBits() == 128) return std::make_pair(0U, &AArch64::FPR128_loRegClass); break; + case 'y': + if (!Subtarget->hasFPARMv8()) + break; + if (VT.isScalableVector()) + return std::make_pair(0U, &AArch64::ZPR_3bRegClass); + break; + } + } else { + PredicateConstraint PC = parsePredicateConstraint(Constraint); + if (PC != PredicateConstraint::Invalid) { + assert(VT.isScalableVector()); + bool restricted = (PC == PredicateConstraint::Upl); + return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass) + : std::make_pair(0U, &AArch64::PPRRegClass); } } if (StringRef("{cc}").equals_lower(Constraint)) @@ -6279,6 +6552,8 @@ static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { static bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { @@ -6446,8 +6721,7 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) { if (!isConcatMask(Mask, VT, SplitV0)) return SDValue(); - EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - VT.getVectorNumElements() / 2); + EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); if (SplitV0) { V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, DAG.getConstant(0, DL, MVT::i64)); @@ -6790,6 +7064,41 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return GenerateTBL(Op, ShuffleMask, DAG); } +SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT ElemVT = VT.getScalarType(); + + SDValue SplatVal = Op.getOperand(0); + + // Extend input splat value where needed to fit into a GPR (32b or 64b only) + // FPRs don't have this restriction. + switch (ElemVT.getSimpleVT().SimpleTy) { + case MVT::i8: + case MVT::i16: + SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32); + break; + case MVT::i64: + SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); + break; + case MVT::i32: + // Fine as is + break; + // TODO: we can support splats of i1s and float types, but haven't added + // patterns yet. + case MVT::i1: + case MVT::f16: + case MVT::f32: + case MVT::f64: + default: + llvm_unreachable("Unsupported SPLAT_VECTOR input operand type"); + break; + } + + return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal); +} + static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits) { EVT VT = BVN->getValueType(0); @@ -8063,7 +8372,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; - Info.align = 0; + Info.align.reset(); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; @@ -8089,7 +8398,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; - Info.align = 0; + Info.align.reset(); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; @@ -8101,7 +8410,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } @@ -8112,7 +8421,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } @@ -8122,7 +8431,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 16; + Info.align = Align(16); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; case Intrinsic::aarch64_stlxp: @@ -8131,7 +8440,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(2); Info.offset = 0; - Info.align = 16; + Info.align = Align(16); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; default: @@ -8278,7 +8587,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { // Get the shift amount based on the scaling factor: // log2(sizeof(IdxTy)) - log2(8). uint64_t ShiftAmt = - countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3; + countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3; // Is the constant foldable in the shift of the addressing mode? // I.e., shift amount is between 1 and 4 inclusive. if (ShiftAmt == 0 || ShiftAmt > 4) @@ -8739,6 +9048,39 @@ EVT AArch64TargetLowering::getOptimalMemOpType( return MVT::Other; } +LLT AArch64TargetLowering::getOptimalMemOpLLT( + uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, + bool ZeroMemset, bool MemcpyStrSrc, + const AttributeList &FuncAttributes) const { + bool CanImplicitFloat = + !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat); + bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; + bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat; + // Only use AdvSIMD to implement memset of 32-byte and above. It would have + // taken one instruction to materialize the v2i64 zero and one store (with + // restrictive addressing mode). Just do i64 stores. + bool IsSmallMemset = IsMemset && Size < 32; + auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { + if (memOpAlign(SrcAlign, DstAlign, AlignCheck)) + return true; + bool Fast; + return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, + &Fast) && + Fast; + }; + + if (CanUseNEON && IsMemset && !IsSmallMemset && + AlignmentIsAcceptable(MVT::v2i64, 16)) + return LLT::vector(2, 64); + if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) + return LLT::scalar(128); + if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + return LLT::scalar(64); + if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + return LLT::scalar(32); + return LLT(); +} + // 12-bit optionally shifted immediates are legal for adds. bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { if (Immed == std::numeric_limits::min()) { @@ -10065,6 +10407,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { Opcode = AArch64ISD::SQSHLU_I; IsRightShift = false; break; + case Intrinsic::aarch64_neon_sshl: + case Intrinsic::aarch64_neon_ushl: + // For positive shift amounts we can use SHL, as ushl/sshl perform a regular + // left shift for positive shift amounts. Below, we only replace the current + // node with VSHL, if this condition is met. + Opcode = AArch64ISD::VSHL; + IsRightShift = false; + break; } if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { @@ -10151,6 +10501,8 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_sqshlu: case Intrinsic::aarch64_neon_srshl: case Intrinsic::aarch64_neon_urshl: + case Intrinsic::aarch64_neon_sshl: + case Intrinsic::aarch64_neon_ushl: return tryCombineShiftImm(IID, N, DAG); case Intrinsic::aarch64_crc32b: case Intrinsic::aarch64_crc32cb: @@ -10482,10 +10834,10 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return ReplacedSplat; SDLoc DL(S); - unsigned NumElts = VT.getVectorNumElements() / 2; + // Split VT into two. - EVT HalfVT = - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts); + EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); + unsigned NumElts = HalfVT.getVectorNumElements(); SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, DAG.getConstant(0, DL, MVT::i64)); SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, @@ -10567,7 +10919,7 @@ static SDValue performPostLD1Combine(SDNode *N, // are predecessors to each other or the Vector. SmallPtrSet Visited; SmallVector Worklist; - Visited.insert(N); + Visited.insert(Addr.getNode()); Worklist.push_back(User); Worklist.push_back(LD); Worklist.push_back(Vector.getNode()); @@ -11983,6 +12335,27 @@ bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial( return Mask->getValue().isPowerOf2(); } +bool AArch64TargetLowering:: + shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + // Does baseline recommend not to perform the fold by default? + if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) + return false; + // Else, if this is a vector shift, prefer 'shl'. + return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL; +} + +bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG, + SDNode *N) const { + if (DAG.getMachineFunction().getFunction().hasMinSize() && + !Subtarget->isTargetWindows()) + return false; + return true; +} + void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { // Update IsSplitCSR in AArch64unctionInfo. AArch64FunctionInfo *AFI = Entry->getParent()->getInfo(); @@ -12009,7 +12382,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 4421c31f65c9..00fa96bc4e6d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -191,6 +191,11 @@ enum NodeType : unsigned { FRECPE, FRECPS, FRSQRTE, FRSQRTS, + SUNPKHI, + SUNPKLO, + UUNPKHI, + UUNPKLO, + // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, @@ -261,6 +266,14 @@ public: const SelectionDAG &DAG, unsigned Depth = 0) const override; + MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { + // Returning i64 unconditionally here (i.e. even for ILP32) means that the + // *DAG* representation of pointers will always be 64-bits. They will be + // truncated and extended when transferred to memory, but the 64-bit DAG + // allows us to use AArch64's addressing modes much more easily. + return MVT::getIntegerVT(64); + } + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override; @@ -272,6 +285,10 @@ public: EVT VT, unsigned AddrSpace = 0, unsigned Align = 1, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, bool *Fast = nullptr) const override; + /// LLT variant. + bool allowsMisalignedMemoryAccesses( + LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast = nullptr) const override; /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; @@ -358,6 +375,10 @@ public: bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, const AttributeList &FuncAttributes) const override; + LLT getOptimalMemOpLLT(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + const AttributeList &FuncAttributes) const override; + /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, @@ -480,11 +501,12 @@ public: return VT.getSizeInBits() >= 64; // vector 'bic' } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { - if (DAG.getMachineFunction().getFunction().hasMinSize()) - return false; - return true; - } + bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const override; + + bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override { @@ -655,6 +677,7 @@ private: SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; @@ -690,8 +713,8 @@ private: unsigned combineRepeatedFPDivisors() const override; ConstraintType getConstraintType(StringRef Constraint) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td index e22cb44d81ae..459b53923625 100644 --- a/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/lib/Target/AArch64/AArch64InstrAtomics.td @@ -204,19 +204,27 @@ def : Pat<(relaxed_store def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(ldxr_1 GPR64sp:$addr), (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; @@ -237,19 +245,27 @@ def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(ldaxr_1 GPR64sp:$addr), (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; @@ -271,22 +287,30 @@ def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), def stxr_1 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def stxr_2 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def stxr_4 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def stxr_8 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr), @@ -317,22 +341,30 @@ def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), @@ -422,4 +454,3 @@ let Predicates = [HasLSE] in { defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; } - diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d619137b55c5..f555e4123307 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -480,76 +480,40 @@ def BranchTarget14Operand : BranchTarget<14>; def BranchTarget26Operand : BranchTarget<26>; def PCRelLabel19Operand : PCRelLabel<19>; -def MovZSymbolG3AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG3"; +def MovWSymbolG3AsmOperand : AsmOperandClass { + let Name = "MovWSymbolG3"; let RenderMethod = "addImmOperands"; } -def movz_symbol_g3 : Operand { - let ParserMatchClass = MovZSymbolG3AsmOperand; +def movw_symbol_g3 : Operand { + let ParserMatchClass = MovWSymbolG3AsmOperand; } -def MovZSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG2"; +def MovWSymbolG2AsmOperand : AsmOperandClass { + let Name = "MovWSymbolG2"; let RenderMethod = "addImmOperands"; } -def movz_symbol_g2 : Operand { - let ParserMatchClass = MovZSymbolG2AsmOperand; +def movw_symbol_g2 : Operand { + let ParserMatchClass = MovWSymbolG2AsmOperand; } -def MovZSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG1"; +def MovWSymbolG1AsmOperand : AsmOperandClass { + let Name = "MovWSymbolG1"; let RenderMethod = "addImmOperands"; } -def movz_symbol_g1 : Operand { - let ParserMatchClass = MovZSymbolG1AsmOperand; +def movw_symbol_g1 : Operand { + let ParserMatchClass = MovWSymbolG1AsmOperand; } -def MovZSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG0"; +def MovWSymbolG0AsmOperand : AsmOperandClass { + let Name = "MovWSymbolG0"; let RenderMethod = "addImmOperands"; } -def movz_symbol_g0 : Operand { - let ParserMatchClass = MovZSymbolG0AsmOperand; -} - -def MovKSymbolG3AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG3"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g3 : Operand { - let ParserMatchClass = MovKSymbolG3AsmOperand; -} - -def MovKSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG2"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g2 : Operand { - let ParserMatchClass = MovKSymbolG2AsmOperand; -} - -def MovKSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG1"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g1 : Operand { - let ParserMatchClass = MovKSymbolG1AsmOperand; -} - -def MovKSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG0"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g0 : Operand { - let ParserMatchClass = MovKSymbolG0AsmOperand; +def movw_symbol_g0 : Operand { + let ParserMatchClass = MovWSymbolG0AsmOperand; } class fixedpoint_i32 @@ -673,6 +637,11 @@ def logical_imm64_XFORM : SDNodeXFormgetTargetConstant(enc, SDLoc(N), MVT::i32); }]>; +def gi_logical_imm32_XFORM : GICustomOperandRenderer<"renderLogicalImm32">, + GISDNodeXFormEquiv; +def gi_logical_imm64_XFORM : GICustomOperandRenderer<"renderLogicalImm64">, + GISDNodeXFormEquiv; + let DiagnosticType = "LogicalSecondSource" in { def LogicalImm32Operand : AsmOperandClass { let Name = "LogicalImm32"; @@ -714,12 +683,15 @@ def logical_imm64_not : Operand { let ParserMatchClass = LogicalImm64NotOperand; } -// imm0_65535 predicate - True if the immediate is in the range [0,65535]. -def imm0_65535 : Operand, ImmLeaf, PrintMethod = "printImmHex" in { +def i32_imm0_65535 : Operand, TImmLeaf { - let ParserMatchClass = AsmImmRange<0, 65535>; - let PrintMethod = "printImmHex"; +}]>; + +def i64_imm0_65535 : Operand, TImmLeaf; } // imm0_255 predicate - True if the immediate is in the range [0,255]. @@ -815,6 +787,14 @@ class arith_shifted_reg def arith_shifted_reg32 : arith_shifted_reg; def arith_shifted_reg64 : arith_shifted_reg; +def gi_arith_shifted_reg32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_arith_shifted_reg64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror // {5-0} - imm6 @@ -837,6 +817,14 @@ class logical_shifted_reg def logical_shifted_reg32 : logical_shifted_reg; def logical_shifted_reg64 : logical_shifted_reg; +def gi_logical_shifted_reg32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_logical_shifted_reg64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // A logical vector shifter operand: // {7-6} - shift type: 00 = lsl // {5-0} - imm6: #0, #8, #16, or #24 @@ -918,6 +906,14 @@ class neg_addsub_shifted_imm def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm; def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm; +def gi_neg_addsub_shifted_imm32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_neg_addsub_shifted_imm64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // An extend operand: // {5-3} - extend type // {2-0} - imm3 @@ -948,6 +944,21 @@ class arith_extended_reg32to64 : Operand, let MIOperandInfo = (ops GPR32, arith_extend64); } +def arith_extended_reg32_i32 : arith_extended_reg32; +def gi_arith_extended_reg32_i32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def arith_extended_reg32_i64 : arith_extended_reg32; +def gi_arith_extended_reg32_i64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def arith_extended_reg32to64_i64 : arith_extended_reg32to64; +def gi_arith_extended_reg32to64_i64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // Floating-point immediate. def fpimm16 : Operand, FPImmLeaf : AsmOperandClass { let RenderMethod = "addVectorIndexOperands"; } -class AsmVectorIndexOpnd - : Operand, ImmLeaf { +class AsmVectorIndexOpnd + : Operand, ImmLeaf { let ParserMatchClass = mc; let PrintMethod = "printVectorIndex"; } @@ -1012,11 +1023,17 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>; def VectorIndexSOperand : AsmVectorIndex<0, 3>; def VectorIndexDOperand : AsmVectorIndex<0, 1>; -def VectorIndex1 : AsmVectorIndexOpnd; -def VectorIndexB : AsmVectorIndexOpnd; -def VectorIndexH : AsmVectorIndexOpnd; -def VectorIndexS : AsmVectorIndexOpnd; -def VectorIndexD : AsmVectorIndexOpnd; +def VectorIndex1 : AsmVectorIndexOpnd; +def VectorIndexB : AsmVectorIndexOpnd; +def VectorIndexH : AsmVectorIndexOpnd; +def VectorIndexS : AsmVectorIndexOpnd; +def VectorIndexD : AsmVectorIndexOpnd; + +def VectorIndex132b : AsmVectorIndexOpnd; +def VectorIndexB32b : AsmVectorIndexOpnd; +def VectorIndexH32b : AsmVectorIndexOpnd; +def VectorIndexS32b : AsmVectorIndexOpnd; +def VectorIndexD32b : AsmVectorIndexOpnd; def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">; def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">; @@ -1025,15 +1042,15 @@ def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">; def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">; def sve_elm_idx_extdup_b - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_h - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_s - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_d - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_q - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; // 8-bit immediate for AdvSIMD where 64-bit values of the form: // aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh @@ -1082,6 +1099,45 @@ class RtSystemI let Inst{4-0} = Rt; } +// System instructions for transactional memory extension +class TMBaseSystemI CRm, bits<3> op2, dag oops, dag iops, + string asm, string operands, list pattern> + : BaseSystemI, + Sched<[WriteSys]> { + let Inst{20-12} = 0b000110011; + let Inst{11-8} = CRm; + let Inst{7-5} = op2; + let DecoderMethod = ""; + + let mayLoad = 1; + let mayStore = 1; +} + +// System instructions for transactional memory - single input operand +class TMSystemI CRm, string asm, list pattern> + : TMBaseSystemI<0b1, CRm, 0b011, + (outs GPR64:$Rt), (ins), asm, "\t$Rt", pattern> { + bits<5> Rt; + let Inst{4-0} = Rt; +} + +// System instructions for transactional memory - no operand +class TMSystemINoOperand CRm, string asm, list pattern> + : TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> { + let Inst{4-0} = 0b11111; +} + +// System instructions for exit from transactions +class TMSystemException op1, string asm, list pattern> + : I<(outs), (ins i64_imm0_65535:$imm), asm, "\t$imm", "", pattern>, + Sched<[WriteSys]> { + bits<16> imm; + let Inst{31-24} = 0b11010100; + let Inst{23-21} = op1; + let Inst{20-5} = imm; + let Inst{4-0} = 0b00000; +} + // Hint instructions that take both a CRm and a 3-bit immediate. // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot // model patterns with sufficiently fine granularity @@ -2180,11 +2236,11 @@ multiclass AddSub, mnemonic, OpNode> { + arith_extended_reg32_i32, mnemonic, OpNode> { let Inst{31} = 0; } def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + arith_extended_reg32to64_i64, mnemonic, OpNode> { let Inst{31} = 1; } } @@ -2254,11 +2310,11 @@ multiclass AddSubS, mnemonic, OpNode> { + arith_extended_reg32_i32, mnemonic, OpNode> { let Inst{31} = 0; } def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + arith_extended_reg32_i64, mnemonic, OpNode> { let Inst{31} = 1; } } @@ -2969,6 +3025,22 @@ def ro_Xindexed32 : ComplexPattern", []>; def ro_Xindexed64 : ComplexPattern", []>; def ro_Xindexed128 : ComplexPattern", []>; +def gi_ro_Xindexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + def ro_Windexed8 : ComplexPattern", []>; def ro_Windexed16 : ComplexPattern", []>; def ro_Windexed32 : ComplexPattern", []>; @@ -4086,7 +4158,7 @@ multiclass MemTagStore opc1, string insn> { let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in class ExceptionGeneration op1, bits<2> ll, string asm> - : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>, + : I<(outs), (ins i32_imm0_65535:$imm), asm, "\t$imm", "", []>, Sched<[WriteSys]> { bits<16> imm; let Inst{31-24} = 0b11010100; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 215e96a82d0e..5c35e5bcdd30 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Casting.h" @@ -82,6 +83,10 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); } + // Meta-instructions emit no code. + if (MI.isMetaInstruction()) + return 0; + // FIXME: We currently only handle pseudoinstructions that don't get expanded // before the assembly printer. unsigned NumBytes = 0; @@ -91,12 +96,6 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // Anything not explicitly designated otherwise is a normal 4-byte insn. NumBytes = 4; break; - case TargetOpcode::DBG_VALUE: - case TargetOpcode::EH_LABEL: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - NumBytes = 0; - break; case TargetOpcode::STACKMAP: // The upper bound for a stackmap intrinsic is the full length of its shadow NumBytes = StackMapOpers(&MI).getNumPatchBytes(); @@ -416,7 +415,7 @@ unsigned AArch64InstrInfo::insertBranch( // Find the original register that VReg is copied from. static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { - while (TargetRegisterInfo::isVirtualRegister(VReg)) { + while (Register::isVirtualRegister(VReg)) { const MachineInstr *DefMI = MRI.getVRegDef(VReg); if (!DefMI->isFullCopy()) return VReg; @@ -431,7 +430,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg = nullptr) { VReg = removeCopies(MRI, VReg); - if (!TargetRegisterInfo::isVirtualRegister(VReg)) + if (!Register::isVirtualRegister(VReg)) return 0; bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); @@ -574,7 +573,7 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, CC = AArch64CC::NE; break; } - unsigned SrcReg = Cond[2].getReg(); + Register SrcReg = Cond[2].getReg(); if (Is64Bit) { // cmp reg, #0 is actually subs xzr, reg, #0. MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); @@ -930,7 +929,7 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, } bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { const TargetRegisterInfo *TRI = &getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; @@ -1071,8 +1070,8 @@ static bool UpdateOperandRegClass(MachineInstr &Instr) { assert(MO.isReg() && "Operand has register constraints without being a register!"); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { if (!OpRegCstraints->contains(Reg)) return false; } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && @@ -1472,6 +1471,8 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return false; MachineBasicBlock &MBB = *MI.getParent(); + auto &Subtarget = MBB.getParent()->getSubtarget(); + auto TRI = Subtarget.getRegisterInfo(); DebugLoc DL = MI.getDebugLoc(); if (MI.getOpcode() == AArch64::CATCHRET) { @@ -1497,21 +1498,32 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); const GlobalValue *GV = cast((*MI.memoperands_begin())->getValue()); const TargetMachine &TM = MBB.getParent()->getTarget(); - unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); + unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); const unsigned char MO_NC = AArch64II::MO_NC; if ((OpFlags & AArch64II::MO_GOT) != 0) { BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) .addGlobalAddress(GV, 0, OpFlags); - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addImm(0) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()); + } } else if (TM.getCodeModel() == CodeModel::Large) { + assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) .addImm(0); @@ -1538,10 +1550,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, LoFlags) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()); + } } MBB.erase(MI); @@ -1581,7 +1603,7 @@ bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { break; case TargetOpcode::COPY: { // GPR32 copies will by lowered to ORRXrs - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); return (AArch64::GPR32RegClass.contains(DstReg) || AArch64::GPR64RegClass.contains(DstReg)); } @@ -1611,7 +1633,7 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { break; case TargetOpcode::COPY: { // FPR64 copies will by lowered to ORR.16b - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); return (AArch64::FPR64RegClass.contains(DstReg) || AArch64::FPR128RegClass.contains(DstReg)); } @@ -1917,7 +1939,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { // e.g., ldr x0, [x0] // This case will never occur with an FI base. if (MI.getOperand(1).isReg()) { - unsigned BaseReg = MI.getOperand(1).getReg(); + Register BaseReg = MI.getOperand(1).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (MI.modifiesRegister(BaseReg, TRI)) return false; @@ -1928,6 +1950,17 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { if (isLdStPairSuppressed(MI)) return false; + // Do not pair any callee-save store/reload instructions in the + // prologue/epilogue if the CFI information encoded the operations as separate + // instructions, as that will cause the size of the actual prologue to mismatch + // with the prologue size recorded in the Windows CFI. + const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); + bool NeedsWinCFI = MAI->usesWindowsCFI() && + MI.getMF()->getFunction().needsUnwindTableEntry(); + if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || + MI.getFlag(MachineInstr::FrameDestroy))) + return false; + // On some CPUs quad load/store pairs are slower than two single load/stores. if (Subtarget.isPaired128Slow()) { switch (MI.getOpcode()) { @@ -2165,6 +2198,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, MinOffset = -256; MaxOffset = 255; break; + case AArch64::LDR_PXI: + case AArch64::STR_PXI: + Scale = Width = 2; + MinOffset = -256; + MaxOffset = 255; + break; + case AArch64::LDR_ZXI: + case AArch64::STR_ZXI: + Scale = Width = 16; + MinOffset = -256; + MaxOffset = 255; + break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: Scale = 16; @@ -2350,7 +2395,7 @@ static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, if (!SubIdx) return MIB.addReg(Reg, State); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); return MIB.addReg(Reg, State, SubIdx); } @@ -2474,6 +2519,27 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Copy a Predicate register by ORRing with itself. + if (AArch64::PPRRegClass.contains(DestReg) && + AArch64::PPRRegClass.contains(SrcReg)) { + assert(Subtarget.hasSVE() && "Unexpected SVE register."); + BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) + .addReg(SrcReg) // Pg + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + // Copy a Z register by ORRing with itself. + if (AArch64::ZPRRegClass.contains(DestReg) && + AArch64::ZPRRegClass.contains(SrcReg)) { + assert(Subtarget.hasSVE() && "Unexpected SVE register."); + BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (AArch64::GPR64spRegClass.contains(DestReg) && (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { @@ -2722,7 +2788,7 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineMemOperand *MMO) { unsigned SrcReg0 = SrcReg; unsigned SrcReg1 = SrcReg; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (Register::isPhysicalRegister(SrcReg)) { SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); SubIdx0 = 0; SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); @@ -2761,7 +2827,7 @@ void AArch64InstrInfo::storeRegToStackSlot( case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { Opc = AArch64::STRWui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (Register::isVirtualRegister(SrcReg)) MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); else assert(SrcReg != AArch64::WSP); @@ -2771,7 +2837,7 @@ void AArch64InstrInfo::storeRegToStackSlot( case 8: if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { Opc = AArch64::STRXui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (Register::isVirtualRegister(SrcReg)) MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); else assert(SrcReg != AArch64::SP); @@ -2852,7 +2918,7 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, unsigned DestReg0 = DestReg; unsigned DestReg1 = DestReg; bool IsUndef = true; - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) { + if (Register::isPhysicalRegister(DestReg)) { DestReg0 = TRI.getSubReg(DestReg, SubIdx0); SubIdx0 = 0; DestReg1 = TRI.getSubReg(DestReg, SubIdx1); @@ -2892,7 +2958,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { Opc = AArch64::LDRWui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) + if (Register::isVirtualRegister(DestReg)) MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); else assert(DestReg != AArch64::WSP); @@ -2902,7 +2968,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( case 8: if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { Opc = AArch64::LDRXui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) + if (Register::isVirtualRegister(DestReg)) MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); else assert(DestReg != AArch64::SP); @@ -2972,21 +3038,39 @@ void AArch64InstrInfo::loadRegFromStackSlot( MI.addMemOperand(MMO); } -void llvm::emitFrameOffset(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - unsigned DestReg, unsigned SrcReg, int Offset, - const TargetInstrInfo *TII, - MachineInstr::MIFlag Flag, bool SetNZCV, - bool NeedsWinCFI, bool *HasWinCFI) { - if (DestReg == SrcReg && Offset == 0) - return; - - assert((DestReg != AArch64::SP || Offset % 16 == 0) && - "SP increment/decrement not 16-byte aligned"); - - bool isSub = Offset < 0; - if (isSub) - Offset = -Offset; +// Helper function to emit a frame offset adjustment from a given +// pointer (SrcReg), stored into DestReg. This function is explicit +// in that it requires the opcode. +static void emitFrameOffsetAdj(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, int64_t Offset, unsigned Opc, + const TargetInstrInfo *TII, + MachineInstr::MIFlag Flag, bool NeedsWinCFI, + bool *HasWinCFI) { + int Sign = 1; + unsigned MaxEncoding, ShiftSize; + switch (Opc) { + case AArch64::ADDXri: + case AArch64::ADDSXri: + case AArch64::SUBXri: + case AArch64::SUBSXri: + MaxEncoding = 0xfff; + ShiftSize = 12; + break; + case AArch64::ADDVL_XXI: + case AArch64::ADDPL_XXI: + MaxEncoding = 31; + ShiftSize = 0; + if (Offset < 0) { + MaxEncoding = 32; + Sign = -1; + Offset = -Offset; + } + break; + default: + llvm_unreachable("Unsupported opcode"); + } // FIXME: If the offset won't fit in 24-bits, compute the offset into a // scratch register. If DestReg is a virtual register, use it as the @@ -2999,65 +3083,94 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, // of code. // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); - unsigned Opc; - if (SetNZCV) - Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; - else - Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; - const unsigned MaxEncoding = 0xfff; - const unsigned ShiftSize = 12; const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; - while (((unsigned)Offset) >= (1 << ShiftSize)) { - unsigned ThisVal; - if (((unsigned)Offset) > MaxEncodableValue) { - ThisVal = MaxEncodableValue; - } else { - ThisVal = Offset & MaxEncodableValue; + do { + unsigned ThisVal = std::min(Offset, MaxEncodableValue); + unsigned LocalShiftSize = 0; + if (ThisVal > MaxEncoding) { + ThisVal = ThisVal >> ShiftSize; + LocalShiftSize = ShiftSize; } assert((ThisVal >> ShiftSize) <= MaxEncoding && "Encoding cannot handle value that big"); - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(ThisVal >> ShiftSize) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) - .setMIFlag(Flag); - - if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) { + auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addImm(Sign * (int)ThisVal); + if (ShiftSize) + MBI = MBI.addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); + MBI = MBI.setMIFlag(Flag); + + if (NeedsWinCFI) { + assert(Sign == 1 && "SEH directives should always have a positive sign"); + int Imm = (int)(ThisVal << LocalShiftSize); + if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || + (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { + if (HasWinCFI) + *HasWinCFI = true; + if (Imm == 0) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); + else + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) + .addImm(Imm) + .setMIFlag(Flag); + assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to " + "emit a single SEH directive"); + } else if (DestReg == AArch64::SP) { + if (HasWinCFI) + *HasWinCFI = true; + assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) + .addImm(Imm) + .setMIFlag(Flag); + } if (HasWinCFI) *HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) - .addImm(ThisVal) - .setMIFlag(Flag); } SrcReg = DestReg; - Offset -= ThisVal; - if (Offset == 0) - return; - } - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(Offset) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) - .setMIFlag(Flag); + Offset -= ThisVal << LocalShiftSize; + } while (Offset); +} - if (NeedsWinCFI) { - if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || - (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { - if (HasWinCFI) - *HasWinCFI = true; - if (Offset == 0) - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)). - setMIFlag(Flag); - else - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)). - addImm(Offset).setMIFlag(Flag); - } else if (DestReg == AArch64::SP) { - if (HasWinCFI) - *HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)). - addImm(Offset).setMIFlag(Flag); +void llvm::emitFrameOffset(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + unsigned DestReg, unsigned SrcReg, + StackOffset Offset, const TargetInstrInfo *TII, + MachineInstr::MIFlag Flag, bool SetNZCV, + bool NeedsWinCFI, bool *HasWinCFI) { + int64_t Bytes, NumPredicateVectors, NumDataVectors; + Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors); + + // First emit non-scalable frame offsets, or a simple 'mov'. + if (Bytes || (!Offset && SrcReg != DestReg)) { + assert((DestReg != AArch64::SP || Bytes % 16 == 0) && + "SP increment/decrement not 16-byte aligned"); + unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; + if (Bytes < 0) { + Bytes = -Bytes; + Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; } + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, + NeedsWinCFI, HasWinCFI); + SrcReg = DestReg; + } + + assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && + "SetNZCV not supported with SVE vectors"); + assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) && + "WinCFI not supported with SVE vectors"); + + if (NumDataVectors) { + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, + AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr); + SrcReg = DestReg; + } + + if (NumPredicateVectors) { + assert(DestReg != AArch64::SP && "Unaligned access to SP"); + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, + AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr); } } @@ -3079,15 +3192,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // // if (MI.isFullCopy()) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (SrcReg == AArch64::SP && - TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) { MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); return nullptr; } - if (DstReg == AArch64::SP && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) { MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); return nullptr; } @@ -3127,14 +3238,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineBasicBlock &MBB = *MI.getParent(); const MachineOperand &DstMO = MI.getOperand(0); const MachineOperand &SrcMO = MI.getOperand(1); - unsigned DstReg = DstMO.getReg(); - unsigned SrcReg = SrcMO.getReg(); + Register DstReg = DstMO.getReg(); + Register SrcReg = SrcMO.getReg(); // This is slightly expensive to compute for physical regs since // getMinimalPhysRegClass is slow. auto getRegClass = [&](unsigned Reg) { - return TargetRegisterInfo::isVirtualRegister(Reg) - ? MRI.getRegClass(Reg) - : TRI.getMinimalPhysRegClass(Reg); + return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) + : TRI.getMinimalPhysRegClass(Reg); }; if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { @@ -3159,8 +3269,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // // STRXui %xzr, %stack.0 // - if (IsSpill && DstMO.isUndef() && - TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) { assert(SrcMO.getSubReg() == 0 && "Unexpected subreg on physical register"); const TargetRegisterClass *SpillRC; @@ -3243,10 +3352,23 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( return nullptr; } -int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, +static bool isSVEScaledImmInstruction(unsigned Opcode) { + switch (Opcode) { + case AArch64::LDR_ZXI: + case AArch64::STR_ZXI: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: + return true; + default: + return false; + } +} + +int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, + StackOffset &SOffset, bool *OutUseUnscaledOp, unsigned *OutUnscaledOp, - int *EmittableOffset) { + int64_t *EmittableOffset) { // Set output values in case of early exit. if (EmittableOffset) *EmittableOffset = 0; @@ -3285,6 +3407,10 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); // Construct the complete offset. + bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode()); + int64_t Offset = + IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes()); + const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); Offset += ImmOpnd.getImm() * Scale; @@ -3304,7 +3430,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, "Cannot have remainder when using unscaled op"); assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); - int NewOffset = Offset / Scale; + int64_t NewOffset = Offset / Scale; if (MinOff <= NewOffset && NewOffset <= MaxOff) Offset = Remainder; else { @@ -3319,27 +3445,33 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, if (OutUnscaledOp && UnscaledOp) *OutUnscaledOp = *UnscaledOp; + if (IsMulVL) + SOffset = StackOffset(Offset, MVT::nxv1i8) + + StackOffset(SOffset.getBytes(), MVT::i8); + else + SOffset = StackOffset(Offset, MVT::i8) + + StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8); return AArch64FrameOffsetCanUpdate | - (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); + (SOffset ? 0 : AArch64FrameOffsetIsLegal); } bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, + unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII) { unsigned Opcode = MI.getOpcode(); unsigned ImmIdx = FrameRegIdx + 1; if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { - Offset += MI.getOperand(ImmIdx).getImm(); + Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8); emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), MI.getOperand(0).getReg(), FrameReg, Offset, TII, MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); MI.eraseFromParent(); - Offset = 0; + Offset = StackOffset(); return true; } - int NewOffset; + int64_t NewOffset; unsigned UnscaledOp; bool UseUnscaledOp; int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, @@ -3352,7 +3484,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MI.setDesc(TII->get(UnscaledOp)); MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); - return Offset == 0; + return !Offset; } return false; @@ -3428,13 +3560,19 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: break; + case AArch64::FADDHrr: case AArch64::FADDSrr: case AArch64::FADDDrr: + case AArch64::FADDv4f16: + case AArch64::FADDv8f16: case AArch64::FADDv2f32: case AArch64::FADDv2f64: case AArch64::FADDv4f32: + case AArch64::FSUBHrr: case AArch64::FSUBSrr: case AArch64::FSUBDrr: + case AArch64::FSUBv4f16: + case AArch64::FSUBv8f16: case AArch64::FSUBv2f32: case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: @@ -3459,7 +3597,7 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineInstr *MI = nullptr; - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) MI = MRI.getUniqueVRegDef(MO.getReg()); // And it needs to be in the trace (otherwise, it won't have a depth). if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) @@ -3544,86 +3682,48 @@ static bool getMaddPatterns(MachineInstr &Root, Opc = NewOpc; } + auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, + MachineCombinerPattern Pattern) { + if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { + Patterns.push_back(Pattern); + Found = true; + } + }; + + typedef MachineCombinerPattern MCP; + switch (Opc) { default: break; case AArch64::ADDWrr: assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && "ADDWrr does not have register operands"); - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1); + setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2); break; case AArch64::ADDXrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1); + setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2); break; case AArch64::SUBWrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1); + setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2); break; case AArch64::SUBXrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1); + setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2); break; case AArch64::ADDWri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1); break; case AArch64::ADDXri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1); break; case AArch64::SUBWri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1); break; case AArch64::SUBXri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); break; } return Found; @@ -3640,204 +3740,135 @@ static bool getFMAPatterns(MachineInstr &Root, MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; + auto Match = [&](int Opcode, int Operand, + MachineCombinerPattern Pattern) -> bool { + if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { + Patterns.push_back(Pattern); + return true; + } + return false; + }; + + typedef MachineCombinerPattern MCP; + switch (Root.getOpcode()) { default: assert(false && "Unsupported FP instruction in combiner\n"); break; + case AArch64::FADDHrr: + assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && + "FADDHrr does not have register operands"); + + Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1); + Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2); + break; case AArch64::FADDSrr: assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && - "FADDWrr does not have register operands"); - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); - Found = true; - } + "FADDSrr does not have register operands"); + + Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) || + Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1); + + Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) || + Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2); break; case AArch64::FADDDrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); - Found = true; - } + Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) || + Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1); + + Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) || + Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2); + break; + case AArch64::FADDv4f16: + Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) || + Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1); + + Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) || + Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2); + break; + case AArch64::FADDv8f16: + Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) || + Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1); + + Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) || + Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2); break; case AArch64::FADDv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); - Found = true; - } + Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) || + Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1); + + Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) || + Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2); break; case AArch64::FADDv2f64: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); - Found = true; - } + Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) || + Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1); + + Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) || + Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2); break; case AArch64::FADDv4f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); - Found = true; - } - break; + Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) || + Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1); + Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) || + Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2); + break; + case AArch64::FSUBHrr: + Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1); + Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2); + Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1); + break; case AArch64::FSUBSrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); - Found = true; - } + Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1); + + Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) || + Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2); + + Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1); break; case AArch64::FSUBDrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); - Found = true; - } + Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1); + + Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) || + Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2); + + Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1); + break; + case AArch64::FSUBv4f16: + Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || + Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); + + Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || + Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); + break; + case AArch64::FSUBv8f16: + Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || + Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2); + + Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) || + Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1); break; case AArch64::FSUBv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) || + Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2); + + Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || + Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1); break; case AArch64::FSUBv2f64: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) || + Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2); + + Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) || + Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1); break; case AArch64::FSUBv4f32: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) || + Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2); + + Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) || + Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1); break; } return Found; @@ -3851,6 +3882,10 @@ bool AArch64InstrInfo::isThroughputPattern( switch (Pattern) { default: break; + case MachineCombinerPattern::FMULADDH_OP1: + case MachineCombinerPattern::FMULADDH_OP2: + case MachineCombinerPattern::FMULSUBH_OP1: + case MachineCombinerPattern::FMULSUBH_OP2: case MachineCombinerPattern::FMULADDS_OP1: case MachineCombinerPattern::FMULADDS_OP2: case MachineCombinerPattern::FMULSUBS_OP1: @@ -3859,12 +3894,21 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMULADDD_OP2: case MachineCombinerPattern::FMULSUBD_OP1: case MachineCombinerPattern::FMULSUBD_OP2: + case MachineCombinerPattern::FNMULSUBH_OP1: case MachineCombinerPattern::FNMULSUBS_OP1: case MachineCombinerPattern::FNMULSUBD_OP1: + case MachineCombinerPattern::FMLAv4i16_indexed_OP1: + case MachineCombinerPattern::FMLAv4i16_indexed_OP2: + case MachineCombinerPattern::FMLAv8i16_indexed_OP1: + case MachineCombinerPattern::FMLAv8i16_indexed_OP2: case MachineCombinerPattern::FMLAv1i32_indexed_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP2: case MachineCombinerPattern::FMLAv1i64_indexed_OP1: case MachineCombinerPattern::FMLAv1i64_indexed_OP2: + case MachineCombinerPattern::FMLAv4f16_OP2: + case MachineCombinerPattern::FMLAv4f16_OP1: + case MachineCombinerPattern::FMLAv8f16_OP1: + case MachineCombinerPattern::FMLAv8f16_OP2: case MachineCombinerPattern::FMLAv2f32_OP2: case MachineCombinerPattern::FMLAv2f32_OP1: case MachineCombinerPattern::FMLAv2f64_OP1: @@ -3877,10 +3921,18 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMLAv4f32_OP2: case MachineCombinerPattern::FMLAv4i32_indexed_OP1: case MachineCombinerPattern::FMLAv4i32_indexed_OP2: + case MachineCombinerPattern::FMLSv4i16_indexed_OP1: + case MachineCombinerPattern::FMLSv4i16_indexed_OP2: + case MachineCombinerPattern::FMLSv8i16_indexed_OP1: + case MachineCombinerPattern::FMLSv8i16_indexed_OP2: case MachineCombinerPattern::FMLSv1i32_indexed_OP2: case MachineCombinerPattern::FMLSv1i64_indexed_OP2: case MachineCombinerPattern::FMLSv2i32_indexed_OP2: case MachineCombinerPattern::FMLSv2i64_indexed_OP2: + case MachineCombinerPattern::FMLSv4f16_OP1: + case MachineCombinerPattern::FMLSv4f16_OP2: + case MachineCombinerPattern::FMLSv8f16_OP1: + case MachineCombinerPattern::FMLSv8f16_OP2: case MachineCombinerPattern::FMLSv2f32_OP2: case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv4i32_indexed_OP2: @@ -3933,15 +3985,15 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, SmallVectorImpl &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind = FMAInstKind::Default, - const unsigned *ReplacedAddend = nullptr) { + const Register *ReplacedAddend = nullptr) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); - unsigned ResultReg = Root.getOperand(0).getReg(); - unsigned SrcReg0 = MUL->getOperand(1).getReg(); + Register ResultReg = Root.getOperand(0).getReg(); + Register SrcReg0 = MUL->getOperand(1).getReg(); bool Src0IsKill = MUL->getOperand(1).isKill(); - unsigned SrcReg1 = MUL->getOperand(2).getReg(); + Register SrcReg1 = MUL->getOperand(2).getReg(); bool Src1IsKill = MUL->getOperand(2).isKill(); unsigned SrcReg2; @@ -3955,13 +4007,13 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); } - if (TargetRegisterInfo::isVirtualRegister(ResultReg)) + if (Register::isVirtualRegister(ResultReg)) MRI.constrainRegClass(ResultReg, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) + if (Register::isVirtualRegister(SrcReg0)) MRI.constrainRegClass(SrcReg0, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) + if (Register::isVirtualRegister(SrcReg1)) MRI.constrainRegClass(SrcReg1, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) + if (Register::isVirtualRegister(SrcReg2)) MRI.constrainRegClass(SrcReg2, RC); MachineInstrBuilder MIB; @@ -4015,19 +4067,19 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, assert(IdxMulOpd == 1 || IdxMulOpd == 2); MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); - unsigned ResultReg = Root.getOperand(0).getReg(); - unsigned SrcReg0 = MUL->getOperand(1).getReg(); + Register ResultReg = Root.getOperand(0).getReg(); + Register SrcReg0 = MUL->getOperand(1).getReg(); bool Src0IsKill = MUL->getOperand(1).isKill(); - unsigned SrcReg1 = MUL->getOperand(2).getReg(); + Register SrcReg1 = MUL->getOperand(2).getReg(); bool Src1IsKill = MUL->getOperand(2).isKill(); - if (TargetRegisterInfo::isVirtualRegister(ResultReg)) + if (Register::isVirtualRegister(ResultReg)) MRI.constrainRegClass(ResultReg, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) + if (Register::isVirtualRegister(SrcReg0)) MRI.constrainRegClass(SrcReg0, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) + if (Register::isVirtualRegister(SrcReg1)) MRI.constrainRegClass(SrcReg1, RC); - if (TargetRegisterInfo::isVirtualRegister(VR)) + if (Register::isVirtualRegister(VR)) MRI.constrainRegClass(VR, RC); MachineInstrBuilder MIB = @@ -4116,7 +4168,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(OrrRC); + Register NewVR = MRI.createVirtualRegister(OrrRC); uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { @@ -4158,7 +4210,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(SubRC); + Register NewVR = MRI.createVirtualRegister(SubRC); // SUB NewVR, 0, C MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) @@ -4208,7 +4260,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(OrrRC); + Register NewVR = MRI.createVirtualRegister(OrrRC); uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { unsigned Val = Root.getOperand(3).getImm(); @@ -4228,34 +4280,35 @@ void AArch64InstrInfo::genAlternativeCodeSequence( break; } // Floating Point Support + case MachineCombinerPattern::FMULADDH_OP1: + Opc = AArch64::FMADDHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FMULADDS_OP1: + Opc = AArch64::FMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FMULADDD_OP1: - // MUL I=A,B,0 - // ADD R,I,C - // ==> MADD R,A,B,C - // --- Create(MADD); - if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { - Opc = AArch64::FMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FMADDDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; + + case MachineCombinerPattern::FMULADDH_OP2: + Opc = AArch64::FMADDHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; case MachineCombinerPattern::FMULADDS_OP2: + Opc = AArch64::FMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; case MachineCombinerPattern::FMULADDD_OP2: - // FMUL I=A,B,0 - // FADD R,C,I - // ==> FMADD R,A,B,C - // --- Create(FMADD); - if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { - Opc = AArch64::FMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FMADDDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; @@ -4285,6 +4338,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Indexed); break; + case MachineCombinerPattern::FMLAv4i16_indexed_OP1: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv4f16_OP1: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv4i16_indexed_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv4f16_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv2i32_indexed_OP1: case MachineCombinerPattern::FMLAv2f32_OP1: RC = &AArch64::FPR64RegClass; @@ -4312,6 +4390,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMLAv8i16_indexed_OP1: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv8f16_OP1: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv8i16_indexed_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv8f16_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv2i64_indexed_OP1: case MachineCombinerPattern::FMLAv2f64_OP1: RC = &AArch64::FPR128RegClass; @@ -4367,56 +4470,53 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMULSUBH_OP1: + Opc = AArch64::FNMSUBHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FMULSUBS_OP1: - case MachineCombinerPattern::FMULSUBD_OP1: { - // FMUL I=A,B,0 - // FSUB R,I,C - // ==> FNMSUB R,A,B,C // = -C + A*B - // --- Create(FNMSUB); - if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { - Opc = AArch64::FNMSUBSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FNMSUBDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FNMSUBSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::FMULSUBD_OP1: + Opc = AArch64::FNMSUBDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - } + case MachineCombinerPattern::FNMULSUBH_OP1: + Opc = AArch64::FNMADDHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FNMULSUBS_OP1: - case MachineCombinerPattern::FNMULSUBD_OP1: { - // FNMUL I=A,B,0 - // FSUB R,I,C - // ==> FNMADD R,A,B,C // = -A*B - C - // --- Create(FNMADD); - if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { - Opc = AArch64::FNMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FNMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FNMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::FNMULSUBD_OP1: + Opc = AArch64::FNMADDDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - } + case MachineCombinerPattern::FMULSUBH_OP2: + Opc = AArch64::FMSUBHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; case MachineCombinerPattern::FMULSUBS_OP2: - case MachineCombinerPattern::FMULSUBD_OP2: { - // FMUL I=A,B,0 - // FSUB R,C,I - // ==> FMSUB R,A,B,C (computes C - A*B) - // --- Create(FMSUB); - if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { - Opc = AArch64::FMSUBSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMSUBDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FMSUBSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::FMULSUBD_OP2: + Opc = AArch64::FMSUBDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - } case MachineCombinerPattern::FMLSv1i32_indexed_OP2: Opc = AArch64::FMLSv1i32_indexed; @@ -4432,6 +4532,39 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Indexed); break; + case MachineCombinerPattern::FMLSv4f16_OP1: + case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { + RC = &AArch64::FPR64RegClass; + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } else { + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } + break; + } + case MachineCombinerPattern::FMLSv4f16_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLSv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLSv4i16_indexed_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLSv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLSv2f32_OP2: case MachineCombinerPattern::FMLSv2i32_indexed_OP2: RC = &AArch64::FPR64RegClass; @@ -4446,6 +4579,39 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMLSv8f16_OP1: + case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { + RC = &AArch64::FPR128RegClass; + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } else { + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } + break; + } + case MachineCombinerPattern::FMLSv8f16_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLSv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLSv8i16_indexed_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLSv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv2i64_indexed_OP2: RC = &AArch64::FPR128RegClass; @@ -4476,7 +4642,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv2f32_OP1: case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { RC = &AArch64::FPR64RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) .add(Root.getOperand(2)); @@ -4496,7 +4662,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv4f32_OP1: case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { RC = &AArch64::FPR128RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) .add(Root.getOperand(2)); @@ -4516,7 +4682,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv2f64_OP1: case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { RC = &AArch64::FPR128RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) .add(Root.getOperand(2)); @@ -4617,15 +4783,15 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); - unsigned VReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VReg)) + Register VReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(VReg)) return false; MachineInstr *DefMI = MRI->getVRegDef(VReg); // Look through COPY instructions to find definition. while (DefMI->isCopy()) { - unsigned CopyVReg = DefMI->getOperand(1).getReg(); + Register CopyVReg = DefMI->getOperand(1).getReg(); if (!MRI->hasOneNonDBGUse(CopyVReg)) return false; if (!MRI->hasOneDef(CopyVReg)) @@ -4653,8 +4819,8 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { return false; MachineOperand &MO = DefMI->getOperand(1); - unsigned NewReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NewReg)) + Register NewReg = MO.getReg(); + if (!Register::isVirtualRegister(NewReg)) return false; assert(!MRI->def_empty(NewReg) && "Register must be defined."); @@ -4737,9 +4903,13 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { static const std::pair TargetFlags[] = { {MO_COFFSTUB, "aarch64-coffstub"}, - {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, - {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"}, - {MO_DLLIMPORT, "aarch64-dllimport"}}; + {MO_GOT, "aarch64-got"}, + {MO_NC, "aarch64-nc"}, + {MO_S, "aarch64-s"}, + {MO_TLS, "aarch64-tls"}, + {MO_DLLIMPORT, "aarch64-dllimport"}, + {MO_PREL, "aarch64-prel"}, + {MO_TAGGED, "aarch64-tagged"}}; return makeArrayRef(TargetFlags); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 7be4daba7dc4..1688045e4fb8 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -15,6 +15,7 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" +#include "AArch64StackOffset.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -55,8 +56,7 @@ public: bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; @@ -299,7 +299,7 @@ private: /// if necessary, to be replaced by the scavenger at the end of PEI. void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, - int Offset, const TargetInstrInfo *TII, + StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag = MachineInstr::NoFlags, bool SetNZCV = false, bool NeedsWinCFI = false, bool *HasWinCFI = nullptr); @@ -308,7 +308,7 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, /// FP. Return false if the offset could not be handled directly in MI, and /// return the left-over portion by reference. bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, + unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII); /// Use to report the frame offset status in isAArch64FrameOffsetLegal. @@ -332,10 +332,10 @@ enum AArch64FrameOffsetStatus { /// If set, @p EmittableOffset contains the amount that can be set in @p MI /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that /// is a legal offset. -int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, +int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp = nullptr, unsigned *OutUnscaledOp = nullptr, - int *EmittableOffset = nullptr); + int64_t *EmittableOffset = nullptr); static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index eed53f36d574..1981bd5d3bf0 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -62,6 +62,9 @@ def HasAM : Predicate<"Subtarget->hasAM()">, def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, AssemblerPredicate<"FeatureSEL2", "sel2">; +def HasPMU : Predicate<"Subtarget->hasPMU()">, + AssemblerPredicate<"FeaturePMU", "pmu">; + def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">; @@ -116,7 +119,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, - AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">; + AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicate<"FeatureRCPC", "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, @@ -133,6 +136,12 @@ def HasBTI : Predicate<"Subtarget->hasBTI()">, AssemblerPredicate<"FeatureBranchTargetId", "bti">; def HasMTE : Predicate<"Subtarget->hasMTE()">, AssemblerPredicate<"FeatureMTE", "mte">; +def HasTME : Predicate<"Subtarget->hasTME()">, + AssemblerPredicate<"FeatureTME", "tme">; +def HasETE : Predicate<"Subtarget->hasETE()">, + AssemblerPredicate<"FeatureETE", "ete">; +def HasTRBE : Predicate<"Subtarget->hasTRBE()">, + AssemblerPredicate<"FeatureTRBE", "trbe">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -415,6 +424,14 @@ def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, S def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def SDT_AArch64unpk : SDTypeProfile<1, 1, [ + SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> +]>; +def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; +def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; +def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; +def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -431,6 +448,13 @@ let RecomputePerFunction = 1 in { def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; + + // Toggles patterns which aren't beneficial in GlobalISel when we aren't + // optimizing. This allows us to selectively use patterns without impacting + // SelectionDAG's behaviour. + // FIXME: One day there will probably be a nicer way to check for this, but + // today is not that day. + def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; } include "AArch64InstrFormats.td" @@ -785,7 +809,11 @@ def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { def HWASAN_CHECK_MEMACCESS : Pseudo< (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), - [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>, + [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, + Sched<[]>; +def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< + (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), + [(int_hwasan_check_memaccess_shortgranules X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, Sched<[]>; } @@ -804,6 +832,23 @@ def : InstAlias<"sys $op1, $Cn, $Cm, $op2", (SYSxt imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; + +let Predicates = [HasTME] in { + +def TSTART : TMSystemI<0b0000, "tstart", + [(set GPR64:$Rt, (int_aarch64_tstart))]>; + +def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; + +def TCANCEL : TMSystemException<0b011, "tcancel", + [(int_aarch64_tcancel i64_imm0_65535:$imm)]>; + +def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { + let mayLoad = 0; + let mayStore = 0; +} +} // HasTME + //===----------------------------------------------------------------------===// // Move immediate instructions. //===----------------------------------------------------------------------===// @@ -815,37 +860,37 @@ let PostEncoderMethod = "fixMOVZ" in defm MOVZ : MoveImmediate<0b10, "movz">; // First group of aliases covers an implicit "lsl #0". -def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>; -def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>; -def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>; +def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>; +def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>; // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; -def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; // Final group of aliases covers true "mov $Rd, $imm" cases. multiclass movw_mov_alias, GISDNodeXFormEquiv; +let Predicates = [OptimizedGISelOrOtherSelector] in { +// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless +// copies. def : Pat<(i64 i64imm_32bit:$src), (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; +} // Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). def bitcast_fpimm_to_i32 : SDNodeXForm; let AddedComplexity = 1 in { -def : Pat<(sub GPR32sp:$R2, arith_extended_reg32:$R3), - (SUBSWrx GPR32sp:$R2, arith_extended_reg32:$R3)>; -def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64:$R3), - (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64:$R3)>; +def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), + (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; +def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), + (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; } // Because of the immediate format for add/sub-imm instructions, the @@ -2165,8 +2214,8 @@ def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ if (auto *G = dyn_cast(N)) { const DataLayout &DL = MF->getDataLayout(); - unsigned Align = G->getGlobal()->getPointerAlignment(DL); - return Align >= 4 && G->getOffset() % 4 == 0; + MaybeAlign Align = G->getGlobal()->getPointerAlignment(DL); + return Align && *Align >= 4 && G->getOffset() % 4 == 0; } if (auto *C = dyn_cast(N)) return C->getAlignment() >= 4 && C->getOffset() % 4 == 0; @@ -3281,20 +3330,37 @@ defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", // N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike // the NEON variant. + +// Here we handle first -(a + b*c) for FNMADD: + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), + (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and -// "(-a) + b*(-c)". +// Now it's time for "(-a) + (-b)*c" + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), + (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +// And here "(-a) + b*(-c)" + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))), + (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; @@ -6939,5 +7005,124 @@ def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; +// Extracting lane zero is a special case where we can just use a plain +// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the +// rest of the compiler, especially the register allocator and copy propagation, +// to reason about, so is preferred when it's possible to use it. +let AddedComplexity = 10 in { + def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; + def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; + def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; +} + +// dot_v4i8 +class mul_v4i8 : + PatFrag<(ops node:$Rn, node:$Rm, node:$offset), + (mul (ldop (add node:$Rn, node:$offset)), + (ldop (add node:$Rm, node:$offset)))>; +class mulz_v4i8 : + PatFrag<(ops node:$Rn, node:$Rm), + (mul (ldop node:$Rn), (ldop node:$Rm))>; + +def load_v4i8 : + OutPatFrag<(ops node:$R), + (INSERT_SUBREG + (v2i32 (IMPLICIT_DEF)), + (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), + ssub)>; + +class dot_v4i8 : + Pat<(i32 (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), + (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), + (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), + (mulz_v4i8 GPR64sp:$Rn, GPR64sp:$Rm))))), + (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), + (load_v4i8 GPR64sp:$Rn), + (load_v4i8 GPR64sp:$Rm))), + sub_32)>, Requires<[HasDotProd]>; + +// dot_v8i8 +class ee_v8i8 : + PatFrag<(ops node:$V, node:$K), + (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; + +class mul_v8i8 : + PatFrag<(ops node:$M, node:$N, node:$K), + (mulop (v4i16 (ee_v8i8 node:$M, node:$K)), + (v4i16 (ee_v8i8 node:$N, node:$K)))>; + +class idot_v8i8 : + PatFrag<(ops node:$M, node:$N), + (i32 (extractelt + (v4i32 (AArch64uaddv + (add (mul_v8i8 node:$M, node:$N, (i64 0)), + (mul_v8i8 node:$M, node:$N, (i64 4))))), + (i64 0)))>; + +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; + +class odot_v8i8 : + OutPatFrag<(ops node:$Vm, node:$Vn), + (EXTRACT_SUBREG + (VADDV_32 + (i64 (DOT (DUPv2i32gpr WZR), + (v8i8 node:$Vm), + (v8i8 node:$Vn)))), + sub_32)>; + +class dot_v8i8 : + Pat<(idot_v8i8 V64:$Vm, V64:$Vn), + (odot_v8i8 V64:$Vm, V64:$Vn)>, + Requires<[HasDotProd]>; + +// dot_v16i8 +class ee_v16i8 : + PatFrag<(ops node:$V, node:$K1, node:$K2), + (v4i16 (extract_subvector + (v8i16 (extend + (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; + +class mul_v16i8 : + PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), + (v4i32 + (mulop (v4i16 (ee_v16i8 node:$M, node:$K1, node:$K2)), + (v4i16 (ee_v16i8 node:$N, node:$K1, node:$K2))))>; + +class idot_v16i8 : + PatFrag<(ops node:$M, node:$N), + (i32 (extractelt + (v4i32 (AArch64uaddv + (add + (add (mul_v16i8 node:$M, node:$N, (i64 0), (i64 0)), + (mul_v16i8 node:$M, node:$N, (i64 8), (i64 0))), + (add (mul_v16i8 node:$M, node:$N, (i64 0), (i64 4)), + (mul_v16i8 node:$M, node:$N, (i64 8), (i64 4)))))), + (i64 0)))>; + +class odot_v16i8 : + OutPatFrag<(ops node:$Vm, node:$Vn), + (i32 (ADDVv4i32v + (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; + +class dot_v16i8 : + Pat<(idot_v16i8 V128:$Vm, V128:$Vn), + (odot_v16i8 V128:$Vm, V128:$Vn)>, + Requires<[HasDotProd]>; + +let AddedComplexity = 10 in { + def : dot_v4i8; + def : dot_v4i8; + def : dot_v8i8; + def : dot_v8i8; + def : dot_v16i8; + def : dot_v16i8; + + // FIXME: add patterns to generate vector by element dot product. + // FIXME: add SVE dot-product patterns. +} + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 4e13fb8e2027..961f38cad1e4 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -51,9 +51,19 @@ public: const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName() { return DEBUG_TYPE; } + void setupMF(MachineFunction &MF, GISelKnownBits &KB, + CodeGenCoverage &CoverageInfo) override { + InstructionSelector::setupMF(MF, KB, CoverageInfo); + + // hasFnAttribute() is expensive to call on every BRCOND selection, so + // cache it here for each run of the selector. + ProduceNonFlagSettingCondBr = + !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); + } + private: /// tblgen-erated 'select' implementation, used as the initial selector for /// the patterns that don't require complex C++. @@ -68,6 +78,10 @@ private: bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; + /// Eliminate same-sized cross-bank copies into stores before selectImpl(). + void contractCrossBankCopyIntoStore(MachineInstr &I, + MachineRegisterInfo &MRI) const; + bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, @@ -101,8 +115,6 @@ private: bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; - void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI, - SmallVectorImpl> &Idxs) const; bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; @@ -116,6 +128,7 @@ private: bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; MachineInstr *emitLoadFromConstantPool(Constant *CPVal, @@ -128,6 +141,8 @@ private: MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitTST(const Register &LHS, const Register &RHS, @@ -155,7 +170,9 @@ private: ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; + ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; ComplexRendererFns selectArithImmed(MachineOperand &Root) const; + ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, unsigned Size) const; @@ -183,11 +200,48 @@ private: return selectAddrModeIndexed(Root, Width / 8); } + bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, + const MachineRegisterInfo &MRI) const; + ComplexRendererFns + selectAddrModeShiftedExtendXReg(MachineOperand &Root, + unsigned SizeInBytes) const; + ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; + ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, + unsigned SizeInBytes) const; + template + ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { + return selectAddrModeXRO(Root, Width / 8); + } + + ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; + + ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { + return selectShiftedRegister(Root); + } + + ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { + // TODO: selectShiftedRegister should allow for rotates on logical shifts. + // For now, make them the same. The only difference between the two is that + // logical shifts are allowed to fold in rotates. Otherwise, these are + // functionally the same. + return selectShiftedRegister(Root); + } + + /// Instructions that accept extend modifiers like UXTW expect the register + /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a + /// subregister copy if necessary. Return either ExtReg, or the result of the + /// new copy. + Register narrowExtendRegIfNeeded(Register ExtReg, + MachineIRBuilder &MIB) const; + ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; + void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const; + void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const; + void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const; // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. void materializeLargeCMVal(MachineInstr &I, const Value *V, - unsigned char OpFlags) const; + unsigned OpFlags) const; // Optimization methods. bool tryOptVectorShuffle(MachineInstr &I) const; @@ -197,12 +251,22 @@ private: MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + /// Return true if \p MI is a load or store of \p NumBytes bytes. + bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; + + /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit + /// register zeroed out. In other words, the result of MI has been explicitly + /// zero extended. + bool isDef32(const MachineInstr &MI) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; const AArch64RegisterInfo &TRI; const AArch64RegisterBankInfo &RBI; + bool ProduceNonFlagSettingCondBr = false; + #define GET_GLOBALISEL_PREDICATES_DECL #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_DECL @@ -312,7 +376,7 @@ static bool getSubRegForClass(const TargetRegisterClass *RC, SubReg = AArch64::hsub; break; case 32: - if (RC == &AArch64::GPR32RegClass) + if (RC != &AArch64::FPR32RegClass) SubReg = AArch64::sub_32; else SubReg = AArch64::ssub; @@ -357,7 +421,7 @@ static bool unsupportedBinOp(const MachineInstr &I, // so, this will need to be taught about that, and we'll need to get the // bank out of the minimal class for the register. // Either way, this needs to be documented (and possibly verified). - if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (!Register::isVirtualRegister(MO.getReg())) { LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); return true; } @@ -492,8 +556,8 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); @@ -502,7 +566,7 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, (DstSize == SrcSize || // Copies are a mean to setup initial types, the number of // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || + (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || // Copies are a mean to copy bits around, as long as we are // on the same register class, that's fine. Otherwise, that // means we need some SUBREG_TO_REG or AND & co. @@ -526,7 +590,7 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, /// SubRegCopy (To class) = COPY CopyReg:SubReg /// Dst = COPY SubRegCopy static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, - const RegisterBankInfo &RBI, unsigned SrcReg, + const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *From, const TargetRegisterClass *To, unsigned SubReg) { @@ -539,7 +603,7 @@ static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, // It's possible that the destination register won't be constrained. Make // sure that happens. - if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg())) + if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); return true; @@ -553,8 +617,8 @@ static std::pair getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - unsigned DstReg = I.getOperand(0).getReg(); - unsigned SrcReg = I.getOperand(1).getReg(); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); @@ -579,8 +643,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - unsigned DstReg = I.getOperand(0).getReg(); - unsigned SrcReg = I.getOperand(1).getReg(); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); @@ -607,11 +671,10 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, // result. auto CheckCopy = [&]() { // If we have a bitcast or something, we can't have physical registers. - assert( - (I.isCopy() || - (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) && - !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) && - "No phys reg on generic operator!"); + assert((I.isCopy() || + (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && + !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && + "No phys reg on generic operator!"); assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI)); (void)KnownValid; return true; @@ -626,38 +689,38 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return false; } - // Is this a cross-bank copy? - if (DstRegBank.getID() != SrcRegBank.getID()) { - // If we're doing a cross-bank copy on different-sized registers, we need - // to do a bit more work. - unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); - unsigned DstSize = TRI.getRegSizeInBits(*DstRC); - - if (SrcSize > DstSize) { - // We're doing a cross-bank copy into a smaller register. We need a - // subregister copy. First, get a register class that's on the same bank - // as the destination, but the same size as the source. - const TargetRegisterClass *SubregRC = - getMinClassForRegBank(DstRegBank, SrcSize, true); - assert(SubregRC && "Didn't get a register class for subreg?"); - - // Get the appropriate subregister for the destination. - unsigned SubReg = 0; - if (!getSubRegForClass(DstRC, TRI, SubReg)) { - LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); - return false; - } - - // Now, insert a subregister copy using the new register class. - selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); - return CheckCopy(); + unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); + unsigned DstSize = TRI.getRegSizeInBits(*DstRC); + + // If we're doing a cross-bank copy on different-sized registers, we need + // to do a bit more work. + if (SrcSize > DstSize) { + // We're doing a cross-bank copy into a smaller register. We need a + // subregister copy. First, get a register class that's on the same bank + // as the destination, but the same size as the source. + const TargetRegisterClass *SubregRC = + getMinClassForRegBank(DstRegBank, SrcSize, true); + assert(SubregRC && "Didn't get a register class for subreg?"); + + // Get the appropriate subregister for the destination. + unsigned SubReg = 0; + if (!getSubRegForClass(DstRC, TRI, SubReg)) { + LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); + return false; } - else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && - SrcSize == 16) { + // Now, insert a subregister copy using the new register class. + selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); + return CheckCopy(); + } + + // Is this a cross-bank copy? + if (DstRegBank.getID() != SrcRegBank.getID()) { + if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && + SrcSize == 16) { // Special case for FPR16 to GPR32. // FIXME: This can probably be generalized like the above case. - unsigned PromoteReg = + Register PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG), PromoteReg) @@ -674,7 +737,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, // If the destination is a physical register, then there's nothing to // change, so we're done. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) return CheckCopy(); } @@ -955,7 +1018,9 @@ bool AArch64InstructionSelector::selectVectorSHL( return false; unsigned Opc = 0; - if (Ty == LLT::vector(4, 32)) { + if (Ty == LLT::vector(2, 64)) { + Opc = AArch64::USHLv2i64; + } else if (Ty == LLT::vector(4, 32)) { Opc = AArch64::USHLv4i32; } else if (Ty == LLT::vector(2, 32)) { Opc = AArch64::USHLv2i32; @@ -989,7 +1054,11 @@ bool AArch64InstructionSelector::selectVectorASHR( unsigned Opc = 0; unsigned NegOpc = 0; const TargetRegisterClass *RC = nullptr; - if (Ty == LLT::vector(4, 32)) { + if (Ty == LLT::vector(2, 64)) { + Opc = AArch64::SSHLv2i64; + NegOpc = AArch64::NEGv2i64; + RC = &AArch64::FPR128RegClass; + } else if (Ty == LLT::vector(4, 32)) { Opc = AArch64::SSHLv4i32; NegOpc = AArch64::NEGv4i32; RC = &AArch64::FPR128RegClass; @@ -1044,7 +1113,7 @@ bool AArch64InstructionSelector::selectVaStartDarwin( } void AArch64InstructionSelector::materializeLargeCMVal( - MachineInstr &I, const Value *V, unsigned char OpFlags) const { + MachineInstr &I, const Value *V, unsigned OpFlags) const { MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1097,8 +1166,8 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const { // some reason we receive input GMIR that has an s64 shift amount that's not // a G_CONSTANT, insert a truncate so that we can still select the s32 // register-register variant. - unsigned SrcReg = I.getOperand(1).getReg(); - unsigned ShiftReg = I.getOperand(2).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + Register ShiftReg = I.getOperand(2).getReg(); const LLT ShiftTy = MRI.getType(ShiftReg); const LLT SrcTy = MRI.getType(SrcReg); if (SrcTy.isVector()) @@ -1118,6 +1187,9 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const { } return; } + case TargetOpcode::G_STORE: + contractCrossBankCopyIntoStore(I, MRI); + return; default: return; } @@ -1158,6 +1230,48 @@ bool AArch64InstructionSelector::earlySelectSHL( return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); } +void AArch64InstructionSelector::contractCrossBankCopyIntoStore( + MachineInstr &I, MachineRegisterInfo &MRI) const { + assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); + // If we're storing a scalar, it doesn't matter what register bank that + // scalar is on. All that matters is the size. + // + // So, if we see something like this (with a 32-bit scalar as an example): + // + // %x:gpr(s32) = ... something ... + // %y:fpr(s32) = COPY %x:gpr(s32) + // G_STORE %y:fpr(s32) + // + // We can fix this up into something like this: + // + // G_STORE %x:gpr(s32) + // + // And then continue the selection process normally. + MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI); + if (!Def) + return; + Register DefDstReg = Def->getOperand(0).getReg(); + LLT DefDstTy = MRI.getType(DefDstReg); + Register StoreSrcReg = I.getOperand(0).getReg(); + LLT StoreSrcTy = MRI.getType(StoreSrcReg); + + // If we get something strange like a physical register, then we shouldn't + // go any further. + if (!DefDstTy.isValid()) + return; + + // Are the source and dst types the same size? + if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) + return; + + if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == + RBI.getRegBank(DefDstReg, MRI, TRI)) + return; + + // We have a cross-bank copy, which is entering a store. Let's fold it. + I.getOperand(0).setReg(DefDstReg); +} + bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1169,13 +1283,37 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { switch (I.getOpcode()) { case TargetOpcode::G_SHL: return earlySelectSHL(I, MRI); + case TargetOpcode::G_CONSTANT: { + bool IsZero = false; + if (I.getOperand(1).isCImm()) + IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; + else if (I.getOperand(1).isImm()) + IsZero = I.getOperand(1).getImm() == 0; + + if (!IsZero) + return false; + + Register DefReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(DefReg); + if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32)) + return false; + + if (Ty == LLT::scalar(64)) { + I.getOperand(1).ChangeToRegister(AArch64::XZR, false); + RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); + } else { + I.getOperand(1).ChangeToRegister(AArch64::WZR, false); + RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); + } + I.setDesc(TII.get(TargetOpcode::COPY)); + return true; + } default: return false; } } -bool AArch64InstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool AArch64InstructionSelector::select(MachineInstr &I) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1244,7 +1382,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, if (earlySelect(I)) return true; - if (selectImpl(I, CoverageInfo)) + if (selectImpl(I, *CoverageInfo)) return true; LLT Ty = @@ -1439,14 +1577,43 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return true; } case TargetOpcode::G_EXTRACT: { - LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); - LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(DstReg); (void)DstTy; unsigned SrcSize = SrcTy.getSizeInBits(); - // Larger extracts are vectors, same-size extracts should be something else - // by now (either split up or simplified to a COPY). - if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32) - return false; + + if (SrcTy.getSizeInBits() > 64) { + // This should be an extract of an s128, which is like a vector extract. + if (SrcTy.getSizeInBits() != 128) + return false; + // Only support extracting 64 bits from an s128 at the moment. + if (DstTy.getSizeInBits() != 64) + return false; + + const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); + // Check we have the right regbank always. + assert(SrcRB.getID() == AArch64::FPRRegBankID && + DstRB.getID() == AArch64::FPRRegBankID && + "Wrong extract regbank!"); + (void)SrcRB; + + // Emit the same code as a vector extract. + // Offset must be a multiple of 64. + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 64 != 0) + return false; + unsigned LaneIdx = Offset / 64; + MachineIRBuilder MIB(I); + MachineInstr *Extract = emitExtractVectorElt( + DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); + if (!Extract) + return false; + I.eraseFromParent(); + return true; + } I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + @@ -1458,7 +1625,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); + DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) .addReg(DstReg, 0, AArch64::sub_32); @@ -1521,11 +1688,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I, case TargetOpcode::G_GLOBAL_VALUE: { auto GV = I.getOperand(1).getGlobal(); - if (GV->isThreadLocal()) { - // FIXME: we don't support TLS yet. - return false; - } - unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM); + if (GV->isThreadLocal()) + return selectTLSGlobalValue(I, MRI); + + unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); if (OpFlags & AArch64II::MO_GOT) { I.setDesc(TII.get(AArch64::LOADgot)); I.getOperand(1).setTargetFlags(OpFlags); @@ -1562,8 +1728,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I, } auto &MemOp = **I.memoperands_begin(); - if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { - LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n"); + if (MemOp.isAtomic()) { + // For now we just support s8 acquire loads to be able to compile stack + // protector code. + if (MemOp.getOrdering() == AtomicOrdering::Acquire && + MemOp.getSize() == 1) { + I.setDesc(TII.get(AArch64::LDARB)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); return false; } unsigned MemSizeInBits = MemOp.getSize() * 8; @@ -1598,7 +1771,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, const unsigned Size = MemSizeInBits / 8; const unsigned Scale = Log2_32(Size); if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { - unsigned Ptr2Reg = PtrMI->getOperand(1).getReg(); + Register Ptr2Reg = PtrMI->getOperand(1).getReg(); I.getOperand(1).setReg(Ptr2Reg); PtrMI = MRI.getVRegDef(Ptr2Reg); Offset = Imm / Size; @@ -1688,8 +1861,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return selectVectorSHL(I, MRI); LLVM_FALLTHROUGH; case TargetOpcode::G_OR: - case TargetOpcode::G_LSHR: - case TargetOpcode::G_GEP: { + case TargetOpcode::G_LSHR: { // Reject the various things we don't support yet. if (unsupportedBinOp(I, RBI, MRI, TRI)) return false; @@ -1711,6 +1883,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } + case TargetOpcode::G_GEP: { + MachineIRBuilder MIRBuilder(I); + emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), + MIRBuilder); + I.eraseFromParent(); + return true; + } case TargetOpcode::G_UADDO: { // TODO: Support other types. unsigned OpSize = Ty.getSizeInBits(); @@ -1816,6 +1995,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I, constrainSelectedInstRegOperands(I, TII, TRI, RBI); return true; } + + if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { + MachineIRBuilder MIB(I); + MachineInstr *Extract = emitExtractVectorElt( + DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); + if (!Extract) + return false; + I.eraseFromParent(); + return true; + } } return false; @@ -1868,21 +2057,41 @@ bool AArch64InstructionSelector::select(MachineInstr &I, case TargetOpcode::G_ZEXT: case TargetOpcode::G_SEXT: { unsigned Opcode = I.getOpcode(); - const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), - SrcTy = MRI.getType(I.getOperand(1).getReg()); - const bool isSigned = Opcode == TargetOpcode::G_SEXT; + const bool IsSigned = Opcode == TargetOpcode::G_SEXT; const Register DefReg = I.getOperand(0).getReg(); const Register SrcReg = I.getOperand(1).getReg(); - const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); + const LLT DstTy = MRI.getType(DefReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned DstSize = DstTy.getSizeInBits(); + unsigned SrcSize = SrcTy.getSizeInBits(); - if (RB.getID() != AArch64::GPRRegBankID) { - LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB - << ", expected: GPR\n"); - return false; - } + assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == + AArch64::GPRRegBankID && + "Unexpected ext regbank"); + MachineIRBuilder MIB(I); MachineInstr *ExtI; - if (DstTy == LLT::scalar(64)) { + if (DstTy.isVector()) + return false; // Should be handled by imported patterns. + + // First check if we're extending the result of a load which has a dest type + // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest + // GPR register on AArch64 and all loads which are smaller automatically + // zero-extend the upper bits. E.g. + // %v(s8) = G_LOAD %p, :: (load 1) + // %v2(s32) = G_ZEXT %v(s8) + if (!IsSigned) { + auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); + if (LoadMI && + RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) { + const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); + unsigned BytesLoaded = MemOp->getSize(); + if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) + return selectCopy(I, TII, MRI, TRI, RBI); + } + } + + if (DstSize == 64) { // FIXME: Can we avoid manually doing this? if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) { LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) @@ -1890,33 +2099,26 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return false; } - const Register SrcXReg = - MRI.createVirtualRegister(&AArch64::GPR64RegClass); - BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) - .addDef(SrcXReg) - .addImm(0) - .addUse(SrcReg) - .addImm(AArch64::sub_32); - - const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri; - ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc)) - .addDef(DefReg) - .addUse(SrcXReg) - .addImm(0) - .addImm(SrcTy.getSizeInBits() - 1); - } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) { - const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri; - ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc)) - .addDef(DefReg) - .addUse(SrcReg) - .addImm(0) - .addImm(SrcTy.getSizeInBits() - 1); + auto SubregToReg = + MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {}) + .addImm(0) + .addUse(SrcReg) + .addImm(AArch64::sub_32); + + ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, + {DefReg}, {SubregToReg}) + .addImm(0) + .addImm(SrcSize - 1); + } else if (DstSize <= 32) { + ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, + {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); } else { return false; } constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); - I.eraseFromParent(); return true; } @@ -2163,6 +2365,37 @@ bool AArch64InstructionSelector::selectJumpTable( return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); } +bool AArch64InstructionSelector::selectTLSGlobalValue( + MachineInstr &I, MachineRegisterInfo &MRI) const { + if (!STI.isTargetMachO()) + return false; + MachineFunction &MF = *I.getParent()->getParent(); + MF.getFrameInfo().setAdjustsStack(true); + + const GlobalValue &GV = *I.getOperand(1).getGlobal(); + MachineIRBuilder MIB(I); + + MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {}) + .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); + + auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, + {Register(AArch64::X0)}) + .addImm(0); + + // TLS calls preserve all registers except those that absolutely must be + // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be + // silly). + MIB.buildInstr(AArch64::BLR, {}, {Load}) + .addDef(AArch64::X0, RegState::Implicit) + .addRegMask(TRI.getTLSCallPreservedMask()); + + MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); + RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, + MRI); + I.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectIntrinsicTrunc( MachineInstr &I, MachineRegisterInfo &MRI) const { const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); @@ -2478,16 +2711,40 @@ bool AArch64InstructionSelector::selectMergeValues( const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); + const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); - // At the moment we only support merging two s32s into an s64. if (I.getNumOperands() != 3) return false; - if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) - return false; - const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); + + // Merging 2 s64s into an s128. + if (DstTy == LLT::scalar(128)) { + if (SrcTy.getSizeInBits() != 64) + return false; + MachineIRBuilder MIB(I); + Register DstReg = I.getOperand(0).getReg(); + Register Src1Reg = I.getOperand(1).getReg(); + Register Src2Reg = I.getOperand(2).getReg(); + auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); + MachineInstr *InsMI = + emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); + if (!InsMI) + return false; + MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), + Src2Reg, /* LaneIdx */ 1, RB, MIB); + if (!Ins2MI) + return false; + constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); + constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); + I.eraseFromParent(); + return true; + } + if (RB.getID() != AArch64::GPRRegBankID) return false; + if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) + return false; + auto *DstRC = &AArch64::GPR64RegClass; Register SubToRegDef = MRI.createVirtualRegister(DstRC); MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -2695,7 +2952,8 @@ bool AArch64InstructionSelector::selectUnmergeValues( const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); const LLT WideTy = MRI.getType(SrcReg); (void)WideTy; - assert(WideTy.isVector() && "can only unmerge from vector types!"); + assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && + "can only unmerge from vector or s128 types!"); assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && "source register size too small!"); @@ -2802,29 +3060,6 @@ bool AArch64InstructionSelector::selectConcatVectors( return true; } -void AArch64InstructionSelector::collectShuffleMaskIndices( - MachineInstr &I, MachineRegisterInfo &MRI, - SmallVectorImpl> &Idxs) const { - MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg()); - assert( - MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR && - "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR"); - // Find the constant indices. - for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) { - // Look through copies. - MachineInstr *ScalarDef = - getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI); - assert(ScalarDef && "Could not find vreg def of shufflevec index op"); - if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) { - // This be an undef if not a constant. - assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF); - Idxs.push_back(None); - } else { - Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue()); - } - } -} - unsigned AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const { @@ -2905,6 +3140,31 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { return std::make_pair(Opc, SubregIdx); } +MachineInstr * +AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri}, + {AArch64::ADDWrr, AArch64::ADDWri}}; + bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32; + auto ImmFns = selectArithImmed(RHS); + unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; + auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()}); + + // If we matched a valid constant immediate, add those operands. + if (ImmFns) { + for (auto &RenderFn : *ImmFns) + RenderFn(AddMI); + } else { + AddMI.addUse(RHS.getReg()); + } + + constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI); + return &*AddMI; +} + MachineInstr * AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { @@ -3151,7 +3411,7 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { // Can't see past copies from physregs. if (Opc == TargetOpcode::COPY && - TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg())) + Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) return false; CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); @@ -3342,16 +3602,9 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { return false; // The shuffle's second operand doesn't matter if the mask is all zero. - auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI); - if (!ZeroVec) + const Constant *Mask = I.getOperand(3).getShuffleMask(); + if (!isa(Mask)) return false; - int64_t Zero = 0; - if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero) - return false; - for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) { - if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg()) - return false; // This wasn't an all zeros vector. - } // We're done, now find out what kind of splat we need. LLT VecTy = MRI.getType(I.getOperand(0).getReg()); @@ -3399,19 +3652,14 @@ bool AArch64InstructionSelector::selectShuffleVector( const LLT Src1Ty = MRI.getType(Src1Reg); Register Src2Reg = I.getOperand(2).getReg(); const LLT Src2Ty = MRI.getType(Src2Reg); + const Constant *ShuffleMask = I.getOperand(3).getShuffleMask(); MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); LLVMContext &Ctx = MF.getFunction().getContext(); - // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask - // operand, it comes in as a normal vector value which we have to analyze to - // find the mask indices. If the mask element is undef, then - // collectShuffleMaskIndices() will add a None entry for that index into - // the list. - SmallVector, 8> Mask; - collectShuffleMaskIndices(I, MRI, Mask); - assert(!Mask.empty() && "Expected to find mask indices"); + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask); // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if // it's originated from a <1 x T> type. Those should have been lowered into @@ -3424,10 +3672,10 @@ bool AArch64InstructionSelector::selectShuffleVector( unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; SmallVector CstIdxs; - for (auto &MaybeVal : Mask) { + for (int Val : Mask) { // For now, any undef indexes we'll just assume to be 0. This should be // optimized in future, e.g. to select DUP etc. - int Val = MaybeVal.hasValue() ? *MaybeVal : 0; + Val = Val < 0 ? 0 : Val; for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { unsigned Offset = Byte + Val * BytesPerElt; CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); @@ -3684,21 +3932,6 @@ static unsigned findIntrinsicID(MachineInstr &I) { return IntrinOp->getIntrinsicID(); } -/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr -/// intrinsic. -static unsigned getStlxrOpcode(unsigned NumBytesToStore) { - switch (NumBytesToStore) { - // TODO: 1, 2, and 4 byte stores. - case 8: - return AArch64::STLXRX; - default: - LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! (" - << NumBytesToStore << ")\n"); - break; - } - return 0; -} - bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( MachineInstr &I, MachineRegisterInfo &MRI) const { // Find the intrinsic ID. @@ -3719,32 +3952,6 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( return false; MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); break; - case Intrinsic::aarch64_stlxr: - Register StatReg = I.getOperand(0).getReg(); - assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 && - "Status register must be 32 bits!"); - Register SrcReg = I.getOperand(2).getReg(); - - if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) { - LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n"); - return false; - } - - Register PtrReg = I.getOperand(3).getReg(); - assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand"); - - // Expect only one memory operand. - if (!I.hasOneMemOperand()) - return false; - - const MachineMemOperand *MemOp = *I.memoperands_begin(); - unsigned NumBytesToStore = MemOp->getSize(); - unsigned Opc = getStlxrOpcode(NumBytesToStore); - if (!Opc) - return false; - - auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg}); - constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI); } I.eraseFromParent(); @@ -3860,6 +4067,30 @@ AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; } +/// Helper to select an immediate value that can be represented as a 12-bit +/// value shifted left by either 0 or 12. If it is possible to do so, return +/// the immediate and shift value. If not, return None. +/// +/// Used by selectArithImmed and selectNegArithImmed. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::select12BitValueWithLeftShift( + uint64_t Immed) const { + unsigned ShiftAmt; + if (Immed >> 12 == 0) { + ShiftAmt = 0; + } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { + ShiftAmt = 12; + Immed = Immed >> 12; + } else + return None; + + unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, + }}; +} + /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. @@ -3871,24 +4102,231 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { // here because the ComplexPattern opcode list is only used in // root-level opcode matching. auto MaybeImmed = getImmedFromMO(Root); + if (MaybeImmed == None) + return None; + return select12BitValueWithLeftShift(*MaybeImmed); +} + +/// SelectNegArithImmed - As above, but negates the value before trying to +/// select it. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { + // We need a register here, because we need to know if we have a 64 or 32 + // bit immediate. + if (!Root.isReg()) + return None; + auto MaybeImmed = getImmedFromMO(Root); if (MaybeImmed == None) return None; uint64_t Immed = *MaybeImmed; - unsigned ShiftAmt; - if (Immed >> 12 == 0) { - ShiftAmt = 0; - } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { - ShiftAmt = 12; - Immed = Immed >> 12; - } else + // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" + // have the opposite effect on the C flag, so this pattern mustn't match under + // those circumstances. + if (Immed == 0) return None; - unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, - }}; + // Check if we're dealing with a 32-bit type on the root or a 64-bit type on + // the root. + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + if (MRI.getType(Root.getReg()).getSizeInBits() == 32) + Immed = ~((uint32_t)Immed) + 1; + else + Immed = ~Immed + 1ULL; + + if (Immed & 0xFFFFFFFFFF000000ULL) + return None; + + Immed &= 0xFFFFFFULL; + return select12BitValueWithLeftShift(Immed); +} + +/// Return true if it is worth folding MI into an extended register. That is, +/// if it's safe to pull it into the addressing mode of a load or store as a +/// shift. +bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( + MachineInstr &MI, const MachineRegisterInfo &MRI) const { + // Always fold if there is one use, or if we're optimizing for size. + Register DefReg = MI.getOperand(0).getReg(); + if (MRI.hasOneUse(DefReg) || + MI.getParent()->getParent()->getFunction().hasMinSize()) + return true; + + // It's better to avoid folding and recomputing shifts when we don't have a + // fastpath. + if (!STI.hasLSLFast()) + return false; + + // We have a fastpath, so folding a shift in and potentially computing it + // many times may be beneficial. Check if this is only used in memory ops. + // If it is, then we should fold. + return all_of(MRI.use_instructions(DefReg), + [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); +} + +/// This is used for computing addresses like this: +/// +/// ldr x1, [x2, x3, lsl #3] +/// +/// Where x2 is the base register, and x3 is an offset register. The shift-left +/// is a constant value specific to this load instruction. That is, we'll never +/// see anything other than a 3 here (which corresponds to the size of the +/// element being loaded.) +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( + MachineOperand &Root, unsigned SizeInBytes) const { + if (!Root.isReg()) + return None; + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + + // Make sure that the memory op is a valid size. + int64_t LegalShiftVal = Log2_32(SizeInBytes); + if (LegalShiftVal == 0) + return None; + + // We want to find something like this: + // + // val = G_CONSTANT LegalShiftVal + // shift = G_SHL off_reg val + // ptr = G_GEP base_reg shift + // x = G_LOAD ptr + // + // And fold it into this addressing mode: + // + // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] + + // Check if we can find the G_GEP. + MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI); + if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI)) + return None; + + // Now, try to match an opcode which will match our specific offset. + // We want a G_SHL or a G_MUL. + MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI); + if (!OffsetInst) + return None; + + unsigned OffsetOpc = OffsetInst->getOpcode(); + if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) + return None; + + if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) + return None; + + // Now, try to find the specific G_CONSTANT. Start by assuming that the + // register we will offset is the LHS, and the register containing the + // constant is the RHS. + Register OffsetReg = OffsetInst->getOperand(1).getReg(); + Register ConstantReg = OffsetInst->getOperand(2).getReg(); + auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); + if (!ValAndVReg) { + // We didn't get a constant on the RHS. If the opcode is a shift, then + // we're done. + if (OffsetOpc == TargetOpcode::G_SHL) + return None; + + // If we have a G_MUL, we can use either register. Try looking at the RHS. + std::swap(OffsetReg, ConstantReg); + ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); + if (!ValAndVReg) + return None; + } + + // The value must fit into 3 bits, and must be positive. Make sure that is + // true. + int64_t ImmVal = ValAndVReg->Value; + + // Since we're going to pull this into a shift, the constant value must be + // a power of 2. If we got a multiply, then we need to check this. + if (OffsetOpc == TargetOpcode::G_MUL) { + if (!isPowerOf2_32(ImmVal)) + return None; + + // Got a power of 2. So, the amount we'll shift is the log base-2 of that. + ImmVal = Log2_32(ImmVal); + } + + if ((ImmVal & 0x7) != ImmVal) + return None; + + // We are only allowed to shift by LegalShiftVal. This shift value is built + // into the instruction, so we can't just use whatever we want. + if (ImmVal != LegalShiftVal) + return None; + + // We can use the LHS of the GEP as the base, and the LHS of the shift as an + // offset. Signify that we are shifting by setting the shift flag to 1. + return {{[=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(1).getReg()); + }, + [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, + [=](MachineInstrBuilder &MIB) { + // Need to add both immediates here to make sure that they are both + // added to the instruction. + MIB.addImm(0); + MIB.addImm(1); + }}}; +} + +/// This is used for computing addresses like this: +/// +/// ldr x1, [x2, x3] +/// +/// Where x2 is the base register, and x3 is an offset register. +/// +/// When possible (or profitable) to fold a G_GEP into the address calculation, +/// this will do so. Otherwise, it will return None. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeRegisterOffset( + MachineOperand &Root) const { + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + + // We need a GEP. + MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); + if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP) + return None; + + // If this is used more than once, let's not bother folding. + // TODO: Check if they are memory ops. If they are, then we can still fold + // without having to recompute anything. + if (!MRI.hasOneUse(Gep->getOperand(0).getReg())) + return None; + + // Base is the GEP's LHS, offset is its RHS. + return {{[=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(1).getReg()); + }, + [=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(2).getReg()); + }, + [=](MachineInstrBuilder &MIB) { + // Need to add both immediates here to make sure that they are both + // added to the instruction. + MIB.addImm(0); + MIB.addImm(0); + }}}; +} + +/// This is intended to be equivalent to selectAddrModeXRO in +/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, + unsigned SizeInBytes) const { + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + + // If we have a constant offset, then we probably don't want to match a + // register offset. + if (isBaseWithConstantOffset(Root, MRI)) + return None; + + // Try to fold shifts into the addressing mode. + auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); + if (AddrModeFns) + return AddrModeFns; + + // If that doesn't work, see if it's possible to fold in registers from + // a GEP. + return selectAddrModeRegisterOffset(Root); } /// Select a "register plus unscaled signed 9-bit immediate" address. This @@ -3994,6 +4432,205 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, }}; } +/// Given a shift instruction, return the correct shift type for that +/// instruction. +static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { + // TODO: Handle AArch64_AM::ROR + switch (MI.getOpcode()) { + default: + return AArch64_AM::InvalidShiftExtend; + case TargetOpcode::G_SHL: + return AArch64_AM::LSL; + case TargetOpcode::G_LSHR: + return AArch64_AM::LSR; + case TargetOpcode::G_ASHR: + return AArch64_AM::ASR; + } +} + +/// Select a "shifted register" operand. If the value is not shifted, set the +/// shift operand to a default value of "lsl 0". +/// +/// TODO: Allow shifted register to be rotated in logical instructions. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { + if (!Root.isReg()) + return None; + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + // Check if the operand is defined by an instruction which corresponds to + // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. + // + // TODO: Handle AArch64_AM::ROR for logical instructions. + MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); + if (!ShiftInst) + return None; + AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); + if (ShType == AArch64_AM::InvalidShiftExtend) + return None; + if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) + return None; + + // Need an immediate on the RHS. + MachineOperand &ShiftRHS = ShiftInst->getOperand(2); + auto Immed = getImmedFromMO(ShiftRHS); + if (!Immed) + return None; + + // We have something that we can fold. Fold in the shift's LHS and RHS into + // the instruction. + MachineOperand &ShiftLHS = ShiftInst->getOperand(1); + Register ShiftReg = ShiftLHS.getReg(); + + unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); + unsigned Val = *Immed & (NumBits - 1); + unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); + + return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; +} + +/// Get the correct ShiftExtendType for an extend instruction. +static AArch64_AM::ShiftExtendType +getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) { + unsigned Opc = MI.getOpcode(); + + // Handle explicit extend instructions first. + if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { + unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + assert(Size != 64 && "Extend from 64 bits?"); + switch (Size) { + case 8: + return AArch64_AM::SXTB; + case 16: + return AArch64_AM::SXTH; + case 32: + return AArch64_AM::SXTW; + default: + return AArch64_AM::InvalidShiftExtend; + } + } + + if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { + unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + assert(Size != 64 && "Extend from 64 bits?"); + switch (Size) { + case 8: + return AArch64_AM::UXTB; + case 16: + return AArch64_AM::UXTH; + case 32: + return AArch64_AM::UXTW; + default: + return AArch64_AM::InvalidShiftExtend; + } + } + + // Don't have an explicit extend. Try to handle a G_AND with a constant mask + // on the RHS. + if (Opc != TargetOpcode::G_AND) + return AArch64_AM::InvalidShiftExtend; + + Optional MaybeAndMask = getImmedFromMO(MI.getOperand(2)); + if (!MaybeAndMask) + return AArch64_AM::InvalidShiftExtend; + uint64_t AndMask = *MaybeAndMask; + switch (AndMask) { + default: + return AArch64_AM::InvalidShiftExtend; + case 0xFF: + return AArch64_AM::UXTB; + case 0xFFFF: + return AArch64_AM::UXTH; + case 0xFFFFFFFF: + return AArch64_AM::UXTW; + } +} + +Register AArch64InstructionSelector::narrowExtendRegIfNeeded( + Register ExtReg, MachineIRBuilder &MIB) const { + MachineRegisterInfo &MRI = *MIB.getMRI(); + if (MRI.getType(ExtReg).getSizeInBits() == 32) + return ExtReg; + + // Insert a copy to move ExtReg to GPR32. + Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); + + // Select the copy into a subregister copy. + selectCopy(*Copy, TII, MRI, TRI, RBI); + return Copy.getReg(0); +} + +/// Select an "extended register" operand. This operand folds in an extend +/// followed by an optional left shift. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectArithExtendedRegister( + MachineOperand &Root) const { + if (!Root.isReg()) + return None; + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + uint64_t ShiftVal = 0; + Register ExtReg; + AArch64_AM::ShiftExtendType Ext; + MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); + if (!RootDef) + return None; + + if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) + return None; + + // Check if we can fold a shift and an extend. + if (RootDef->getOpcode() == TargetOpcode::G_SHL) { + // Look for a constant on the RHS of the shift. + MachineOperand &RHS = RootDef->getOperand(2); + Optional MaybeShiftVal = getImmedFromMO(RHS); + if (!MaybeShiftVal) + return None; + ShiftVal = *MaybeShiftVal; + if (ShiftVal > 4) + return None; + // Look for a valid extend instruction on the LHS of the shift. + MachineOperand &LHS = RootDef->getOperand(1); + MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); + if (!ExtDef) + return None; + Ext = getExtendTypeForInst(*ExtDef, MRI); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; + ExtReg = ExtDef->getOperand(1).getReg(); + } else { + // Didn't get a shift. Try just folding an extend. + Ext = getExtendTypeForInst(*RootDef, MRI); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; + ExtReg = RootDef->getOperand(1).getReg(); + + // If we have a 32 bit instruction which zeroes out the high half of a + // register, we get an implicit zero extend for free. Check if we have one. + // FIXME: We actually emit the extend right now even though we don't have + // to. + if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { + MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); + if (ExtInst && isDef32(*ExtInst)) + return None; + } + } + + // We require a GPR32 here. Narrow the ExtReg if needed using a subregister + // copy. + MachineIRBuilder MIB(*RootDef); + ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); + + return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, + [=](MachineInstrBuilder &MIB) { + MIB.addImm(getArithExtendImm(Ext, ShiftVal)); + }}}; +} + void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -4003,6 +4640,51 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, MIB.addImm(CstVal.getValue()); } +void AArch64InstructionSelector::renderLogicalImm32( + MachineInstrBuilder &MIB, const MachineInstr &I) const { + assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); + uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); + MIB.addImm(Enc); +} + +void AArch64InstructionSelector::renderLogicalImm64( + MachineInstrBuilder &MIB, const MachineInstr &I) const { + assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); + uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); + MIB.addImm(Enc); +} + +bool AArch64InstructionSelector::isLoadStoreOfNumBytes( + const MachineInstr &MI, unsigned NumBytes) const { + if (!MI.mayLoadOrStore()) + return false; + assert(MI.hasOneMemOperand() && + "Expected load/store to have only one mem op!"); + return (*MI.memoperands_begin())->getSize() == NumBytes; +} + +bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) + return false; + + // Only return true if we know the operation will zero-out the high half of + // the 64-bit register. Truncates can be subregister copies, which don't + // zero out the high bits. Copies and other copy-like instructions can be + // fed by truncates, or could be lowered as subregister copies. + switch (MI.getOpcode()) { + default: + return true; + case TargetOpcode::COPY: + case TargetOpcode::G_BITCAST: + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_PHI: + return false; + } +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index a985b330eafa..7a1901bd5b1e 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -13,7 +13,9 @@ #include "AArch64LegalizerInfo.h" #include "AArch64Subtarget.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -50,6 +52,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { const LLT v2s64 = LLT::vector(2, 64); const LLT v2p0 = LLT::vector(2, p0); + // FIXME: support subtargets which have neon/fp-armv8 disabled. + if (!ST.hasNEON() || !ST.hasFPARMv8()) { + computeTables(); + return; + } + getActionDefinitionsBuilder(G_IMPLICIT_DEF) .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64}) .clampScalar(0, s1, s64) @@ -74,7 +82,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { getActionDefinitionsBuilder(G_BSWAP) .legalFor({s32, s64, v4s32, v2s32, v2s64}) - .clampScalar(0, s16, s64) + .clampScalar(0, s32, s64) .widenScalarToNextPow2(0); getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) @@ -104,6 +112,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { getActionDefinitionsBuilder({G_SDIV, G_UDIV}) .legalFor({s32, s64}) + .libcallFor({s128}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) .scalarize(0); @@ -115,8 +124,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && AmtTy.getSizeInBits() == 32; }) - .legalFor( - {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}}) + .legalFor({{s32, s32}, + {s32, s64}, + {s64, s64}, + {v2s32, v2s32}, + {v4s32, v4s32}, + {v2s64, v2s64}}) .clampScalar(1, s32, s64) .clampScalar(0, s32, s64) .minScalarSameAs(1, 0); @@ -191,14 +204,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { .legalIf([=](const LegalityQuery &Query) { const LLT &Ty0 = Query.Types[0]; const LLT &Ty1 = Query.Types[1]; - if (Ty1 != s32 && Ty1 != s64) + if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128) return false; if (Ty1 == p0) return true; return isPowerOf2_32(Ty0.getSizeInBits()) && (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); }) - .clampScalar(1, s32, s64) + .clampScalar(1, s32, s128) .widenScalarToNextPow2(1) .maxScalarIf(typeInSet(1, {s32}), 0, s16) .maxScalarIf(typeInSet(1, {s64}), 0, s32) @@ -236,6 +249,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { {s32, p0, 32, 8}, {s64, p0, 64, 8}, {p0, p0, 64, 8}, + {s128, p0, 128, 8}, {v8s8, p0, 64, 8}, {v16s8, p0, 128, 8}, {v4s16, p0, 64, 8}, @@ -247,14 +261,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) .clampScalar(0, s8, s64) - .widenScalarToNextPow2(0) - // TODO: We could support sum-of-pow2's but the lowering code doesn't know - // how to do that yet. - .unsupportedIfMemSizeNotPow2() + .lowerIfMemSizeNotPow2() // Lower any any-extending loads left into G_ANYEXT and G_LOAD .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; }) + .widenScalarToNextPow2(0) .clampMaxNumElements(0, s32, 2) .clampMaxNumElements(0, s64, 1) .customIf(IsPtrVecPred); @@ -262,9 +274,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { getActionDefinitionsBuilder(G_STORE) .legalForTypesWithMemDesc({{s8, p0, 8, 8}, {s16, p0, 16, 8}, + {s32, p0, 8, 8}, + {s32, p0, 16, 8}, {s32, p0, 32, 8}, {s64, p0, 64, 8}, {p0, p0, 64, 8}, + {s128, p0, 128, 8}, {v16s8, p0, 128, 8}, {v4s16, p0, 64, 8}, {v8s16, p0, 128, 8}, @@ -272,10 +287,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { {v4s32, p0, 128, 8}, {v2s64, p0, 128, 8}}) .clampScalar(0, s8, s64) - .widenScalarToNextPow2(0) - // TODO: We could support sum-of-pow2's but the lowering code doesn't know - // how to do that yet. - .unsupportedIfMemSizeNotPow2() + .lowerIfMemSizeNotPow2() .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].isScalar() && Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; @@ -305,8 +317,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { {v8s16, v8s16}, {v8s8, v8s8}, {v16s8, v16s8}}) - .clampScalar(0, s32, s32) .clampScalar(1, s32, s64) + .clampScalar(0, s32, s32) .minScalarEltSameAsIf( [=](const LegalityQuery &Query) { const LLT &Ty = Query.Types[0]; @@ -330,33 +342,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { .widenScalarToNextPow2(1); // Extensions - getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) - .legalIf([=](const LegalityQuery &Query) { - unsigned DstSize = Query.Types[0].getSizeInBits(); - - // Make sure that we have something that will fit in a register, and - // make sure it's a power of 2. - if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) - return false; + auto ExtLegalFunc = [=](const LegalityQuery &Query) { + unsigned DstSize = Query.Types[0].getSizeInBits(); + + if (DstSize == 128 && !Query.Types[0].isVector()) + return false; // Extending to a scalar s128 needs narrowing. + + // Make sure that we have something that will fit in a register, and + // make sure it's a power of 2. + if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) + return false; - const LLT &SrcTy = Query.Types[1]; + const LLT &SrcTy = Query.Types[1]; - // Special case for s1. - if (SrcTy == s1) - return true; + // Special case for s1. + if (SrcTy == s1) + return true; - // Make sure we fit in a register otherwise. Don't bother checking that - // the source type is below 128 bits. We shouldn't be allowing anything - // through which is wider than the destination in the first place. - unsigned SrcSize = SrcTy.getSizeInBits(); - if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) - return false; + // Make sure we fit in a register otherwise. Don't bother checking that + // the source type is below 128 bits. We shouldn't be allowing anything + // through which is wider than the destination in the first place. + unsigned SrcSize = SrcTy.getSizeInBits(); + if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) + return false; - return true; - }); + return true; + }; + getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) + .legalIf(ExtLegalFunc) + .clampScalar(0, s64, s64); // Just for s128, others are handled above. getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + // FP conversions getActionDefinitionsBuilder(G_FPTRUNC).legalFor( {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); @@ -591,6 +610,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { return Query.Types[0] == p0 && Query.Types[1] == s64; }); + getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); + computeTables(); verify(*ST.getInstrInfo()); } @@ -617,6 +638,24 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI, llvm_unreachable("expected switch to return"); } +bool AArch64LegalizerInfo::legalizeIntrinsic( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memset: + case Intrinsic::memmove: + if (createMemLibcall(MIRBuilder, MRI, MI) == + LegalizerHelper::UnableToLegalize) + return false; + MI.eraseFromParent(); + return true; + default: + break; + } + return true; +} + bool AArch64LegalizerInfo::legalizeShlAshrLshr( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const { @@ -655,7 +694,7 @@ bool AArch64LegalizerInfo::legalizeLoadStore( // legalized. In order to allow further legalization of the inst, we create // a new instruction and erase the existing one. - unsigned ValReg = MI.getOperand(0).getReg(); + Register ValReg = MI.getOperand(0).getReg(); const LLT ValTy = MRI.getType(ValReg); if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || @@ -672,7 +711,7 @@ bool AArch64LegalizerInfo::legalizeLoadStore( auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg}); MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO); } else { - unsigned NewReg = MRI.createGenericVirtualRegister(NewTy); + Register NewReg = MRI.createGenericVirtualRegister(NewTy); auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO); MIRBuilder.buildBitcast({ValReg}, {NewLoad}); } diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h index f3362a18620f..15161bab466c 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.h +++ b/lib/Target/AArch64/AArch64LegalizerInfo.h @@ -31,6 +31,9 @@ public: MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override; + bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; + private: bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 65b5f906e3f6..a0c4a25bb5b9 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -201,8 +201,22 @@ static bool isNarrowStore(unsigned Opc) { } } +// These instruction set memory tag and either keep memory contents unchanged or +// set it to zero, ignoring the address part of the source register. +static bool isTagStore(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + return true; + } +} + // Scaling factor for unscaled load or store. -static int getMemScale(MachineInstr &MI) { +static int getMemScale(const MachineInstr &MI) { switch (MI.getOpcode()) { default: llvm_unreachable("Opcode has unknown scale!"); @@ -255,6 +269,11 @@ static int getMemScale(MachineInstr &MI) { case AArch64::STURQi: case AArch64::LDPQi: case AArch64::STPQi: + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + case AArch64::STGPi: return 16; } } @@ -449,6 +468,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { return AArch64::STPWpre; case AArch64::STPXi: return AArch64::STPXpre; + case AArch64::STGOffset: + return AArch64::STGPreIndex; + case AArch64::STZGOffset: + return AArch64::STZGPreIndex; + case AArch64::ST2GOffset: + return AArch64::ST2GPreIndex; + case AArch64::STZ2GOffset: + return AArch64::STZ2GPreIndex; + case AArch64::STGPi: + return AArch64::STGPpre; } } @@ -518,6 +547,16 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { return AArch64::STPWpost; case AArch64::STPXi: return AArch64::STPXpost; + case AArch64::STGOffset: + return AArch64::STGPostIndex; + case AArch64::STZGOffset: + return AArch64::STZGPostIndex; + case AArch64::ST2GOffset: + return AArch64::ST2GPostIndex; + case AArch64::STZ2GOffset: + return AArch64::STZ2GPostIndex; + case AArch64::STGPi: + return AArch64::STGPpost; } } @@ -536,10 +575,30 @@ static bool isPairedLdSt(const MachineInstr &MI) { case AArch64::STPQi: case AArch64::STPWi: case AArch64::STPXi: + case AArch64::STGPi: return true; } } +// Returns the scale and offset range of pre/post indexed variants of MI. +static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, + int &MinOffset, int &MaxOffset) { + bool IsPaired = isPairedLdSt(MI); + bool IsTagStore = isTagStore(MI); + // ST*G and all paired ldst have the same scale in pre/post-indexed variants + // as in the "unsigned offset" variant. + // All other pre/post indexed ldst instructions are unscaled. + Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1; + + if (IsPaired) { + MinOffset = -64; + MaxOffset = 63; + } else { + MinOffset = -256; + MaxOffset = 255; + } +} + static const MachineOperand &getLdStRegOp(const MachineInstr &MI, unsigned PairedRegOp = 0) { assert(PairedRegOp < 2 && "Unexpected register operand idx."); @@ -618,6 +677,11 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) { case AArch64::LDRWui: case AArch64::LDRHHui: case AArch64::LDRBBui: + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + case AArch64::STGPi: // Unscaled instructions. case AArch64::STURSi: case AArch64::STURDi: @@ -808,7 +872,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, // STRWui %w1, ... // USE kill %w1 ; need to clear kill flag when moving STRWui downwards // STRW %w0 - unsigned Reg = getLdStRegOp(*I).getReg(); + Register Reg = getLdStRegOp(*I).getReg(); for (MachineInstr &MI : make_range(std::next(I), Paired)) MI.clearRegisterKills(Reg, TRI); } @@ -837,9 +901,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineOperand &DstMO = MIB->getOperand(SExtIdx); // Right now, DstMO has the extended register, since it comes from an // extended opcode. - unsigned DstRegX = DstMO.getReg(); + Register DstRegX = DstMO.getReg(); // Get the W variant of that register. - unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); + Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); // Update the result of LDP to use the W instead of the X variant. DstMO.setReg(DstRegW); LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); @@ -882,9 +946,9 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, int LoadSize = getMemScale(*LoadI); int StoreSize = getMemScale(*StoreI); - unsigned LdRt = getLdStRegOp(*LoadI).getReg(); + Register LdRt = getLdStRegOp(*LoadI).getReg(); const MachineOperand &StMO = getLdStRegOp(*StoreI); - unsigned StRt = getLdStRegOp(*StoreI).getReg(); + Register StRt = getLdStRegOp(*StoreI).getReg(); bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); assert((IsStoreXReg || @@ -933,10 +997,10 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, ? getLdStOffsetOp(*StoreI).getImm() : getLdStOffsetOp(*StoreI).getImm() * StoreSize; int Width = LoadSize * 8; - unsigned DestReg = IsStoreXReg - ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, - &AArch64::GPR64RegClass) - : LdRt; + unsigned DestReg = + IsStoreXReg ? Register(TRI->getMatchingSuperReg( + LdRt, AArch64::sub_32, &AArch64::GPR64RegClass)) + : LdRt; assert((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && @@ -1042,7 +1106,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( MachineBasicBlock::iterator B = I->getParent()->begin(); MachineBasicBlock::iterator MBBI = I; MachineInstr &LoadMI = *I; - unsigned BaseReg = getLdStBaseOp(LoadMI).getReg(); + Register BaseReg = getLdStBaseOp(LoadMI).getReg(); // If the load is the first instruction in the block, there's obviously // not any matching store. @@ -1156,8 +1220,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool MayLoad = FirstMI.mayLoad(); bool IsUnscaled = TII->isUnscaledLdSt(FirstMI); - unsigned Reg = getLdStRegOp(FirstMI).getReg(); - unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); + Register Reg = getLdStRegOp(FirstMI).getReg(); + Register BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); @@ -1188,7 +1252,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // check for +1/-1. Make sure to check the new instruction offset is // actually an immediate and not a symbolic reference destined for // a relocation. - unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); + Register MIBaseReg = getLdStBaseOp(MI).getReg(); int MIOffset = getLdStOffsetOp(MI).getImm(); bool MIIsUnscaled = TII->isUnscaledLdSt(MI); if (IsUnscaled != MIIsUnscaled) { @@ -1328,18 +1392,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) : getPostIndexedOpcode(I->getOpcode()); MachineInstrBuilder MIB; + int Scale, MinOffset, MaxOffset; + getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset); if (!isPairedLdSt(*I)) { // Non-paired instruction. MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .add(getLdStRegOp(*Update)) .add(getLdStRegOp(*I)) .add(getLdStBaseOp(*I)) - .addImm(Value) + .addImm(Value / Scale) .setMemRefs(I->memoperands()) .setMIFlags(I->mergeFlagsWith(*Update)); } else { // Paired instruction. - int Scale = getMemScale(*I); MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .add(getLdStRegOp(*Update)) .add(getLdStRegOp(*I, 0)) @@ -1395,28 +1460,21 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MI.getOperand(1).getReg() != BaseReg) break; - bool IsPairedInsn = isPairedLdSt(MemMI); int UpdateOffset = MI.getOperand(2).getImm(); if (MI.getOpcode() == AArch64::SUBXri) UpdateOffset = -UpdateOffset; - // For non-paired load/store instructions, the immediate must fit in a - // signed 9-bit integer. - if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) + // The immediate must be a multiple of the scaling factor of the pre/post + // indexed instruction. + int Scale, MinOffset, MaxOffset; + getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset); + if (UpdateOffset % Scale != 0) break; - // For paired load/store instructions, the immediate must be a multiple of - // the scaling factor. The scaled offset must also fit into a signed 7-bit - // integer. - if (IsPairedInsn) { - int Scale = getMemScale(MemMI); - if (UpdateOffset % Scale != 0) - break; - - int ScaledOffset = UpdateOffset / Scale; - if (ScaledOffset > 63 || ScaledOffset < -64) - break; - } + // Scaled offset must fit in the instruction immediate. + int ScaledOffset = UpdateOffset / Scale; + if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset) + break; // If we have a non-zero Offset, we check that it matches the amount // we're adding to the register. @@ -1433,7 +1491,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineInstr &MemMI = *I; MachineBasicBlock::iterator MBBI = I; - unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); + Register BaseReg = getLdStBaseOp(MemMI).getReg(); int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); // Scan forward looking for post-index opportunities. Updating instructions @@ -1442,13 +1500,19 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( if (MIUnscaledOffset != UnscaledOffset) return E; - // If the base register overlaps a destination register, we can't - // merge the update. - bool IsPairedInsn = isPairedLdSt(MemMI); - for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { - unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; + // If the base register overlaps a source/destination register, we can't + // merge the update. This does not apply to tag store instructions which + // ignore the address part of the source register. + // This does not apply to STGPi as well, which does not have unpredictable + // behavior in this case unlike normal stores, and always performs writeback + // after reading the source register value. + if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) { + bool IsPairedInsn = isPairedLdSt(MemMI); + for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { + Register DestReg = getLdStRegOp(MemMI, i).getReg(); + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + } } // Track which register units have been modified and used between the first @@ -1487,7 +1551,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( MachineInstr &MemMI = *I; MachineBasicBlock::iterator MBBI = I; - unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); + Register BaseReg = getLdStBaseOp(MemMI).getReg(); int Offset = getLdStOffsetOp(MemMI).getImm(); // If the load/store is the first instruction in the block, there's obviously @@ -1496,11 +1560,13 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; // If the base register overlaps a destination register, we can't // merge the update. - bool IsPairedInsn = isPairedLdSt(MemMI); - for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { - unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; + if (!isTagStore(MemMI)) { + bool IsPairedInsn = isPairedLdSt(MemMI); + for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { + Register DestReg = getLdStRegOp(MemMI, i).getReg(); + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + } } // Track which register units have been modified and used between the first @@ -1659,7 +1725,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate // however, is not, so adjust here. int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); - // Look forward to try to find a post-index instruction. For example, + // Look forward to try to find a pre-index instruction. For example, // ldr x1, [x0, #64] // add x0, x0, #64 // merged into: diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index e7d4a2789a28..afd5ae6bcbf2 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -148,6 +148,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_TLSDESC; break; } + } else if (MO.getTargetFlags() & AArch64II::MO_PREL) { + RefFlags |= AArch64MCExpr::VK_PREL; } else { // No modifier means this is a generic reference, classified as absolute for // the cases where it matters (:abs_g0: etc). diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 0efeeb272ec1..0009fb7b5520 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include @@ -95,6 +96,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// returned struct in a register. This field holds the virtual register into /// which the sret argument is passed. unsigned SRetReturnReg = 0; + /// SVE stack size (for predicates and data vectors) are maintained here + /// rather than in FrameInfo, as the placement and Stack IDs are target + /// specific. + uint64_t StackSizeSVE = 0; + + /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid. + bool HasCalculatedStackSizeSVE = false; /// Has a value when it is known whether or not the function uses a /// redzone, and no value otherwise. @@ -131,6 +139,15 @@ public: ArgumentStackToRestore = bytes; } + bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; } + + void setStackSizeSVE(uint64_t S) { + HasCalculatedStackSizeSVE = true; + StackSizeSVE = S; + } + + uint64_t getStackSizeSVE() const { return StackSizeSVE; } + bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index aff861aae6be..d503c39b1f90 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -162,11 +162,11 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, LiveIntervals &LIs = G.getMetadata().LIS; - if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) { - LLVM_DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd) - << '\n'); - LLVM_DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra) - << '\n'); + if (Register::isPhysicalRegister(Rd) || Register::isPhysicalRegister(Ra)) { + LLVM_DEBUG(dbgs() << "Rd is a physical reg:" + << Register::isPhysicalRegister(Rd) << '\n'); + LLVM_DEBUG(dbgs() << "Ra is a physical reg:" + << Register::isPhysicalRegister(Ra) << '\n'); return false; } @@ -359,8 +359,8 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) { case AArch64::FMADDDrrr: case AArch64::FNMSUBDrrr: case AArch64::FNMADDDrrr: { - unsigned Rd = MI.getOperand(0).getReg(); - unsigned Ra = MI.getOperand(3).getReg(); + Register Rd = MI.getOperand(0).getReg(); + Register Ra = MI.getOperand(3).getReg(); if (addIntraChainConstraint(G, Rd, Ra)) addInterChainConstraint(G, Rd, Ra); @@ -369,7 +369,7 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) { case AArch64::FMLAv2f32: case AArch64::FMLSv2f32: { - unsigned Rd = MI.getOperand(0).getReg(); + Register Rd = MI.getOperand(0).getReg(); addInterChainConstraint(G, Rd, Rd); break; } diff --git a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp index 5f7245bfbd74..d30ea120bae4 100644 --- a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp +++ b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp @@ -15,7 +15,9 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -25,12 +27,31 @@ using namespace llvm; using namespace MIPatternMatch; +#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS +#include "AArch64GenGICombiner.inc" +#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS + namespace { +#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H +#include "AArch64GenGICombiner.inc" +#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H + class AArch64PreLegalizerCombinerInfo : public CombinerInfo { + GISelKnownBits *KB; + MachineDominatorTree *MDT; + public: - AArch64PreLegalizerCombinerInfo() + AArch64GenPreLegalizerCombinerHelper Generated; + + AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, + GISelKnownBits *KB, MachineDominatorTree *MDT) : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, - /*LegalizerInfo*/ nullptr) {} + /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), + KB(KB), MDT(MDT) { + if (!Generated.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); + } + virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const override; }; @@ -38,24 +59,50 @@ public: bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const { - CombinerHelper Helper(Observer, B); + CombinerHelper Helper(Observer, B, KB, MDT); switch (MI.getOpcode()) { - default: - return false; - case TargetOpcode::COPY: - return Helper.tryCombineCopy(MI); - case TargetOpcode::G_BR: - return Helper.tryCombineBr(MI); + case TargetOpcode::G_CONCAT_VECTORS: + return Helper.tryCombineConcatVectors(MI); + case TargetOpcode::G_SHUFFLE_VECTOR: + return Helper.tryCombineShuffleVector(MI); case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: - return Helper.tryCombineExtendingLoads(MI); + case TargetOpcode::G_ZEXTLOAD: { + bool Changed = false; + Changed |= Helper.tryCombineExtendingLoads(MI); + Changed |= Helper.tryCombineIndexedLoadStore(MI); + return Changed; + } + case TargetOpcode::G_STORE: + return Helper.tryCombineIndexedLoadStore(MI); + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: { + // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other + // heuristics decide. + unsigned MaxLen = EnableOpt ? 0 : 32; + // Try to inline memcpy type calls if optimizations are enabled. + return (!EnableMinSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen) + : false; + } + default: + break; + } } + if (Generated.tryCombineAll(Observer, MI, B)) + return true; + return false; } +#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP +#include "AArch64GenGICombiner.inc" +#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP + // Pass boilerplate // ================ @@ -63,24 +110,33 @@ class AArch64PreLegalizerCombiner : public MachineFunctionPass { public: static char ID; - AArch64PreLegalizerCombiner(); + AArch64PreLegalizerCombiner(bool IsOptNone = false); StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; } bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; +private: + bool IsOptNone; }; -} +} // end anonymous namespace void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.setPreservesCFG(); getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + if (!IsOptNone) { + AU.addRequired(); + AU.addPreserved(); + } MachineFunctionPass::getAnalysisUsage(AU); } -AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) { +AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone) + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); } @@ -89,7 +145,14 @@ bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { MachineFunctionProperties::Property::FailedISel)) return false; auto *TPC = &getAnalysis(); - AArch64PreLegalizerCombinerInfo PCInfo; + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); + GISelKnownBits *KB = &getAnalysis().get(MF); + MachineDominatorTree *MDT = + IsOptNone ? nullptr : &getAnalysis(); + AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), + F.hasMinSize(), KB, MDT); Combiner C(PCInfo, TPC); return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); } @@ -99,13 +162,14 @@ INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE, "Combine AArch64 machine instrs before legalization", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE, "Combine AArch64 machine instrs before legalization", false, false) namespace llvm { -FunctionPass *createAArch64PreLegalizeCombiner() { - return new AArch64PreLegalizerCombiner(); +FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone) { + return new AArch64PreLegalizerCombiner(IsOptNone); } } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index b52259cc9acd..8ec73aa3c040 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -563,12 +563,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getSameKindOfOperandsMapping(MI); } case TargetOpcode::COPY: { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); // Check if one of the register is not a generic register. - if ((TargetRegisterInfo::isPhysicalRegister(DstReg) || + if ((Register::isPhysicalRegister(DstReg) || !MRI.getType(DstReg).isValid()) || - (TargetRegisterInfo::isPhysicalRegister(SrcReg) || + (Register::isPhysicalRegister(SrcReg) || !MRI.getType(SrcReg).isValid())) { const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); @@ -635,6 +635,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Some of the floating-point instructions have mixed GPR and FPR operands: // fine-tune the computed mapping. switch (Opc) { + case TargetOpcode::G_TRUNC: { + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; + break; + } case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: if (MRI.getType(MI.getOperand(0).getReg()).isVector()) @@ -687,7 +693,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_STORE: // Check if that store is fed by fp instructions. if (OpRegBankIdx[0] == PMI_FirstGPR) { - unsigned VReg = MI.getOperand(0).getReg(); + Register VReg = MI.getOperand(0).getReg(); if (!VReg) break; MachineInstr *DefMI = MRI.getVRegDef(VReg); @@ -702,11 +708,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; // If we're taking in vectors, we have no choice but to put everything on - // FPRs. + // FPRs, except for the condition. The condition must always be on a GPR. LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); if (SrcTy.isVector()) { - for (unsigned Idx = 0; Idx < 4; ++Idx) - OpRegBankIdx[Idx] = PMI_FirstFPR; + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; break; } @@ -740,7 +745,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // This doesn't check the condition, since it's just whatever is in NZCV. // This isn't passed explicitly in a register to fcsel/csel. for (unsigned Idx = 2; Idx < 4; ++Idx) { - unsigned VReg = MI.getOperand(Idx).getReg(); + Register VReg = MI.getOperand(Idx).getReg(); MachineInstr *DefMI = MRI.getVRegDef(VReg); if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || onlyDefinesFP(*DefMI, MRI, TRI)) @@ -750,8 +755,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // If we have more FP constraints than not, then move everything over to // FPR. if (NumFP >= 2) - for (unsigned Idx = 0; Idx < 4; ++Idx) - OpRegBankIdx[Idx] = PMI_FirstFPR; + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; break; } @@ -764,7 +768,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); // UNMERGE into scalars from a vector should always use FPR. // Likewise if any of the uses are FP instructions. - if (SrcTy.isVector() || + if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || any_of(MRI.use_instructions(MI.getOperand(0).getReg()), [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { // Set the register bank of every operand to FPR. @@ -795,12 +799,21 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Index needs to be a GPR. OpRegBankIdx[3] = PMI_FirstGPR; break; + case TargetOpcode::G_EXTRACT: { + // For s128 sources we have to use fpr. + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + if (SrcTy.getSizeInBits() == 128) { + OpRegBankIdx[0] = PMI_FirstFPR; + OpRegBankIdx[1] = PMI_FirstFPR; + } + break; + } case TargetOpcode::G_BUILD_VECTOR: // If the first source operand belongs to a FPR register bank, then make // sure that we preserve that. if (OpRegBankIdx[1] != PMI_FirstGPR) break; - unsigned VReg = MI.getOperand(1).getReg(); + Register VReg = MI.getOperand(1).getReg(); if (!VReg) break; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 6d5a4e3d2f76..de176088595d 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -15,6 +15,7 @@ #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64StackOffset.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" @@ -23,10 +24,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -63,8 +64,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AArch64_AAPCS_SwiftError_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) return CSR_AArch64_RT_MostRegs_SaveList; - else - return CSR_AArch64_AAPCS_SaveList; + if (MF->getSubtarget().isTargetDarwin()) + return CSR_Darwin_AArch64_AAPCS_SaveList; + return CSR_AArch64_AAPCS_SaveList; } const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy( @@ -120,6 +122,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, : CSR_AArch64_CXX_TLS_Darwin_RegMask; if (CC == CallingConv::AArch64_VectorCall) return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask; + if (CC == CallingConv::AArch64_SVE_VectorCall) + return CSR_AArch64_SVE_AAPCS_RegMask; if (MF.getSubtarget().getTargetLowering() ->supportSwiftError() && MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) @@ -388,7 +392,7 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const { assert(Offset <= INT_MAX && "Offset too big to fit in int."); assert(MI && "Unable to get the legal offset for nil instruction."); - int SaveOffset = Offset; + StackOffset SaveOffset(Offset, MVT::i8); return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal; } @@ -418,7 +422,9 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { - int Off = Offset; // ARM doesn't need the general 64-bit offsets + // ARM doesn't need the general 64-bit offsets + StackOffset Off(Offset, MVT::i8); + unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -441,40 +447,69 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64InstrInfo *TII = MF.getSubtarget().getInstrInfo(); const AArch64FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + bool Tagged = + MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED; unsigned FrameReg; - int Offset; // Special handling of dbg_value, stackmap and patchpoint instructions. if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, - /*PreferFP=*/true, - /*ForSimm=*/false); - Offset += MI.getOperand(FIOperandNum + 1).getImm(); + StackOffset Offset = + TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, + /*PreferFP=*/true, + /*ForSimm=*/false); + Offset += StackOffset(MI.getOperand(FIOperandNum + 1).getImm(), MVT::i8); MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getBytes()); return; } if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); - Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex); + int Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex); FI.ChangeToImmediate(Offset); return; } + StackOffset Offset; if (MI.getOpcode() == AArch64::TAGPstack) { // TAGPstack must use the virtual frame register in its 3rd operand. - const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64FunctionInfo *AFI = MF.getInfo(); FrameReg = MI.getOperand(3).getReg(); - Offset = - MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset(); + Offset = {MFI.getObjectOffset(FrameIndex) + + AFI->getTaggedBasePointerOffset(), + MVT::i8}; + } else if (Tagged) { + StackOffset SPOffset = { + MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), MVT::i8}; + if (MFI.hasVarSizedObjects() || + isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) != + (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) { + // Can't update to SP + offset in place. Precalculate the tagged pointer + // in a scratch register. + Offset = TFI->resolveFrameIndexReference( + MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); + Register ScratchReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, + TII); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(0); + MI.getOperand(FIOperandNum) + .ChangeToRegister(ScratchReg, false, false, true); + return; + } + FrameReg = AArch64::SP; + Offset = {MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), + MVT::i8}; } else { Offset = TFI->resolveFrameIndexReference( MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); @@ -490,7 +525,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above. Handle the rest, providing a register that is // SP+LargeImm. - unsigned ScratchReg = + Register ScratchReg = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); diff --git a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp index 854670079e40..28a7e680849b 100644 --- a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp +++ b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp @@ -426,16 +426,16 @@ bool AArch64SIMDInstrOpt::optimizeVectElement(MachineInstr &MI) { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Get the operands of the current SIMD arithmetic instruction. - unsigned MulDest = MI.getOperand(0).getReg(); - unsigned SrcReg0 = MI.getOperand(1).getReg(); + Register MulDest = MI.getOperand(0).getReg(); + Register SrcReg0 = MI.getOperand(1).getReg(); unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill()); - unsigned SrcReg1 = MI.getOperand(2).getReg(); + Register SrcReg1 = MI.getOperand(2).getReg(); unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill()); unsigned DupDest; // Instructions of interest have either 4 or 5 operands. if (MI.getNumOperands() == 5) { - unsigned SrcReg2 = MI.getOperand(3).getReg(); + Register SrcReg2 = MI.getOperand(3).getReg(); unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill()); unsigned LaneNumber = MI.getOperand(4).getImm(); // Create a new DUP instruction. Note that if an equivalent DUP instruction diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index 79ab42f4c080..b573eac76754 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -82,11 +82,11 @@ let Predicates = [HasSVE] in { defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">; defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">; - defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">; - defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">; + defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; + defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; - defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">; - defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">; + defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; + defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">; defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">; @@ -94,14 +94,14 @@ let Predicates = [HasSVE] in { defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">; defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">; defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">; - defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs">; - defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg">; - - defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls">; - defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz">; - defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt">; - defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot">; - defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not">; + defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>; + defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>; + + defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", null_frag>; + defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", null_frag>; + defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>; + defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", null_frag>; + defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", null_frag>; defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; @@ -138,12 +138,12 @@ let Predicates = [HasSVE] in { defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">; defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">; - defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd">; - defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub">; - defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul">; - defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul">; - defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps">; - defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts">; + defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>; + defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", null_frag>; + defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", null_frag>; + defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>; + defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", null_frag>; + defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", null_frag>; defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">; @@ -187,7 +187,7 @@ let Predicates = [HasSVE] in { defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">; // Splat scalar register (unpredicated, GPR or vector + element index) - defm DUP_ZR : sve_int_perm_dup_r<"dup">; + defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>; defm DUP_ZZI : sve_int_perm_dup_i<"dup">; // Splat scalar register (predicated) @@ -211,13 +211,13 @@ let Predicates = [HasSVE] in { defm REV_PP : sve_int_perm_reverse_p<"rev">; defm REV_ZZ : sve_int_perm_reverse_z<"rev">; - defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo">; - defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi">; - defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">; - defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">; + defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>; + defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>; + defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>; + defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi", AArch64uunpkhi>; - def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">; - def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">; + defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>; + defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>; defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; @@ -1020,6 +1020,56 @@ let Predicates = [HasSVE] in { (FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>; + + def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>; + + def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>; + + def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>; + + def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>; + + def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>; + + def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + } let Predicates = [HasSVE2] in { @@ -1164,6 +1214,13 @@ let Predicates = [HasSVE2] in { defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">; defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">; + // SVE2 predicated shifts + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; + // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">; defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">; @@ -1199,14 +1256,14 @@ let Predicates = [HasSVE2] in { defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">; // SVE2 bitwise shift and insert - defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">; - defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">; + defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">; + defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">; // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">; - defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">; - defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">; - defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">; + defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">; + defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">; + defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">; + defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">; // SVE2 complex integer add defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">; @@ -1228,41 +1285,47 @@ let Predicates = [HasSVE2] in { defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">; defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">; - // SVE2 bitwise shift right narrow - defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">; - defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">; - defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">; - defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">; - defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">; - defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">; - defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">; - defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">; - defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">; - defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">; - defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">; - defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">; - defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">; - defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">; - defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">; - defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">; - - // SVE2 integer add/subtract narrow high part - defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">; - defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">; - defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">; - defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">; - defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">; - defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">; - defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">; - defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">; - - // SVE2 saturating extract narrow - defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">; - defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">; - defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">; - defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">; - defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">; - defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">; + // SVE2 bitwise shift right narrow (bottom) + defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">; + defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">; + defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">; + defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">; + defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">; + defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">; + defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">; + defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">; + + // SVE2 bitwise shift right narrow (top) + defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">; + defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">; + defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">; + defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">; + defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">; + defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">; + defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">; + defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">; + + // SVE2 integer add/subtract narrow high part (bottom) + defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">; + defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">; + defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">; + defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">; + + // SVE2 integer add/subtract narrow high part (top) + defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">; + defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">; + defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">; + defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">; + + // SVE2 saturating extract narrow (bottom) + defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">; + defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">; + defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">; + + // SVE2 saturating extract narrow (top) + defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">; + defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">; + defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">; // SVE2 character match defm MATCH_PPzZZ : sve2_char_match<0b0, "match">; @@ -1289,10 +1352,14 @@ let Predicates = [HasSVE2] in { // SVE2 histogram generation (vector) defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">; + // SVE2 floating-point base 2 logarithm as integer + defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; + // SVE2 floating-point convert precision defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">; defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">; defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">; + def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; // SVE2 floating-point pairwise operations defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">; @@ -1321,58 +1388,45 @@ let Predicates = [HasSVE2] in { def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">; def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">; - // sve_int_rotate_imm + // SVE2 bitwise xor and rotate right by immediate defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">; // SVE2 extract vector (immediate offset, constructive) def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; - // SVE floating-point convert precision - def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; - - // SVE floating-point convert to integer - defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; - - // Non-temporal contiguous loads (vector + register) - defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>; - defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>; - defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>; - defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>; - defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>; - - defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>; - defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>; - defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>; - defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>; - defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>; - defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>; - defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>; + // SVE2 non-temporal gather loads + defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>; + defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>; + defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>; + defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>; + defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>; + + defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>; + defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>; + defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>; + defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>; + defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>; + defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>; + defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>; // SVE2 vector splice (constructive) defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; - // Predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; - - // Non-temporal contiguous stores (vector + register) - defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>; - defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>; - defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>; + // SVE2 non-temporal scatter stores + defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>; + defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>; + defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>; - defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>; - defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>; - defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>; - defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>; + defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>; + defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>; + defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>; + defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>; - // SVE table lookup (three sources) + // SVE2 table lookup (three sources) defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">; defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">; - // SVE integer compare scalar count and limit + // SVE2 integer compare scalar count and limit defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">; defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">; defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">; @@ -1383,7 +1437,7 @@ let Predicates = [HasSVE2] in { defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">; defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">; - // SVE pointer conflict compare + // SVE2 pointer conflict compare defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">; defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">; } diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 60dbace03ca6..ba61ed726e84 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -32,7 +32,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( const AArch64TargetLowering &TLI = *STI.getTargetLowering(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/lib/Target/AArch64/AArch64SpeculationHardening.cpp index 3087e6ce441d..7307961ddb5f 100644 --- a/lib/Target/AArch64/AArch64SpeculationHardening.cpp +++ b/lib/Target/AArch64/AArch64SpeculationHardening.cpp @@ -106,6 +106,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" #include @@ -115,9 +116,9 @@ using namespace llvm; #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass" -cl::opt HardenLoads("aarch64-slh-loads", cl::Hidden, - cl::desc("Sanitize loads from memory."), - cl::init(true)); +static cl::opt HardenLoads("aarch64-slh-loads", cl::Hidden, + cl::desc("Sanitize loads from memory."), + cl::init(true)); namespace { @@ -521,7 +522,7 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) { for (auto Use : MI.uses()) { if (!Use.isReg()) continue; - unsigned Reg = Use.getReg(); + Register Reg = Use.getReg(); // Some loads of floating point data have implicit defs/uses on a // super register of that floating point data. Some examples: // $s0 = LDRSui $sp, 22, implicit-def $q0 @@ -561,8 +562,8 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue( // miss-speculation isn't happening because we're already inserting barriers // to guarantee that. if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); // Mark this register and all its aliasing registers as needing to be // value speculation hardened before its next use, by using a CSDB // barrier instruction. diff --git a/lib/Target/AArch64/AArch64StackOffset.h b/lib/Target/AArch64/AArch64StackOffset.h new file mode 100644 index 000000000000..13f12a6c9c30 --- /dev/null +++ b/lib/Target/AArch64/AArch64StackOffset.h @@ -0,0 +1,138 @@ +//==--AArch64StackOffset.h ---------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the StackOffset class, which is used to +// describe scalable and non-scalable offsets during frame lowering. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H + +#include "llvm/Support/MachineValueType.h" + +namespace llvm { + +/// StackOffset is a wrapper around scalable and non-scalable offsets and is +/// used in several functions such as 'isAArch64FrameOffsetLegal' and +/// 'emitFrameOffset()'. StackOffsets are described by MVTs, e.g. +// +/// StackOffset(1, MVT::nxv16i8) +// +/// would describe an offset as being the size of a single SVE vector. +/// +/// The class also implements simple arithmetic (addition/subtraction) on these +/// offsets, e.g. +// +/// StackOffset(1, MVT::nxv16i8) + StackOffset(1, MVT::i64) +// +/// describes an offset that spans the combined storage required for an SVE +/// vector and a 64bit GPR. +class StackOffset { + int64_t Bytes; + int64_t ScalableBytes; + + explicit operator int() const; + +public: + using Part = std::pair; + + StackOffset() : Bytes(0), ScalableBytes(0) {} + + StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() { + assert(MVT(T).getSizeInBits() % 8 == 0 && + "Offset type is not a multiple of bytes"); + *this += Part(Offset, T); + } + + StackOffset(const StackOffset &Other) + : Bytes(Other.Bytes), ScalableBytes(Other.ScalableBytes) {} + + StackOffset &operator=(const StackOffset &) = default; + + StackOffset &operator+=(const StackOffset::Part &Other) { + int64_t OffsetInBytes = Other.first * (Other.second.getSizeInBits() / 8); + if (Other.second.isScalableVector()) + ScalableBytes += OffsetInBytes; + else + Bytes += OffsetInBytes; + return *this; + } + + StackOffset &operator+=(const StackOffset &Other) { + Bytes += Other.Bytes; + ScalableBytes += Other.ScalableBytes; + return *this; + } + + StackOffset operator+(const StackOffset &Other) const { + StackOffset Res(*this); + Res += Other; + return Res; + } + + StackOffset &operator-=(const StackOffset &Other) { + Bytes -= Other.Bytes; + ScalableBytes -= Other.ScalableBytes; + return *this; + } + + StackOffset operator-(const StackOffset &Other) const { + StackOffset Res(*this); + Res -= Other; + return Res; + } + + StackOffset operator-() const { + StackOffset Res = {}; + const StackOffset Other(*this); + Res -= Other; + return Res; + } + + /// Returns the scalable part of the offset in bytes. + int64_t getScalableBytes() const { return ScalableBytes; } + + /// Returns the non-scalable part of the offset in bytes. + int64_t getBytes() const { return Bytes; } + + /// Returns the offset in parts to which this frame offset can be + /// decomposed for the purpose of describing a frame offset. + /// For non-scalable offsets this is simply its byte size. + void getForFrameOffset(int64_t &NumBytes, int64_t &NumPredicateVectors, + int64_t &NumDataVectors) const { + assert(isValid() && "Invalid frame offset"); + + NumBytes = Bytes; + NumDataVectors = 0; + NumPredicateVectors = ScalableBytes / 2; + // This method is used to get the offsets to adjust the frame offset. + // If the function requires ADDPL to be used and needs more than two ADDPL + // instructions, part of the offset is folded into NumDataVectors so that it + // uses ADDVL for part of it, reducing the number of ADDPL instructions. + if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || + NumPredicateVectors > 62) { + NumDataVectors = NumPredicateVectors / 8; + NumPredicateVectors -= NumDataVectors * 8; + } + } + + /// Returns whether the offset is known zero. + explicit operator bool() const { return Bytes || ScalableBytes; } + + bool isValid() const { + // The smallest scalable element supported by scaled SVE addressing + // modes are predicates, which are 2 scalable bytes in size. So the scalable + // byte offset must always be a multiple of 2. + return ScalableBytes % 2 == 0; + } +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AArch64StackTagging.cpp b/lib/Target/AArch64/AArch64StackTagging.cpp index 6e99c48bf1d7..e6dbe01d3807 100644 --- a/lib/Target/AArch64/AArch64StackTagging.cpp +++ b/lib/Target/AArch64/AArch64StackTagging.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -55,9 +56,215 @@ using namespace llvm; #define DEBUG_TYPE "stack-tagging" -static constexpr unsigned kTagGranuleSize = 16; +static cl::opt ClMergeInit( + "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::desc("merge stack variable initializers with tagging when possible")); + +static cl::opt ClScanLimit("stack-tagging-merge-init-scan-limit", + cl::init(40), cl::Hidden); + +static const Align kTagGranuleSize = Align(16); namespace { + +class InitializerBuilder { + uint64_t Size; + const DataLayout *DL; + Value *BasePtr; + Function *SetTagFn; + Function *SetTagZeroFn; + Function *StgpFn; + + // List of initializers sorted by start offset. + struct Range { + uint64_t Start, End; + Instruction *Inst; + }; + SmallVector Ranges; + // 8-aligned offset => 8-byte initializer + // Missing keys are zero initialized. + std::map Out; + +public: + InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr, + Function *SetTagFn, Function *SetTagZeroFn, + Function *StgpFn) + : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn), + SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {} + + bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) { + auto I = std::lower_bound( + Ranges.begin(), Ranges.end(), Start, + [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; }); + if (I != Ranges.end() && End > I->Start) { + // Overlap - bail. + return false; + } + Ranges.insert(I, {Start, End, Inst}); + return true; + } + + bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) { + int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType()); + if (!addRange(Offset, Offset + StoreSize, SI)) + return false; + IRBuilder<> IRB(SI); + applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0)); + return true; + } + + bool addMemSet(uint64_t Offset, MemSetInst *MSI) { + uint64_t StoreSize = cast(MSI->getLength())->getZExtValue(); + if (!addRange(Offset, Offset + StoreSize, MSI)) + return false; + IRBuilder<> IRB(MSI); + applyMemSet(IRB, Offset, Offset + StoreSize, + cast(MSI->getValue())); + return true; + } + + void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End, + ConstantInt *V) { + // Out[] does not distinguish between zero and undef, and we already know + // that this memset does not overlap with any other initializer. Nothing to + // do for memset(0). + if (V->isZero()) + return; + for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { + uint64_t Cst = 0x0101010101010101UL; + int LowBits = Offset < Start ? (Start - Offset) * 8 : 0; + if (LowBits) + Cst = (Cst >> LowBits) << LowBits; + int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0; + if (HighBits) + Cst = (Cst << HighBits) >> HighBits; + ConstantInt *C = + ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue()); + + Value *&CurrentV = Out[Offset]; + if (!CurrentV) { + CurrentV = C; + } else { + CurrentV = IRB.CreateOr(CurrentV, C); + } + } + } + + // Take a 64-bit slice of the value starting at the given offset (in bytes). + // Offset can be negative. Pad with zeroes on both sides when necessary. + Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) { + if (Offset > 0) { + V = IRB.CreateLShr(V, Offset * 8); + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + } else if (Offset < 0) { + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + V = IRB.CreateShl(V, -Offset * 8); + } else { + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + } + return V; + } + + void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End, + Value *StoredValue) { + StoredValue = flatten(IRB, StoredValue); + for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { + Value *V = sliceValue(IRB, StoredValue, Offset - Start); + Value *&CurrentV = Out[Offset]; + if (!CurrentV) { + CurrentV = V; + } else { + CurrentV = IRB.CreateOr(CurrentV, V); + } + } + } + + void generate(IRBuilder<> &IRB) { + LLVM_DEBUG(dbgs() << "Combined initializer\n"); + // No initializers => the entire allocation is undef. + if (Ranges.empty()) { + emitUndef(IRB, 0, Size); + return; + } + + // Look through 8-byte initializer list 16 bytes at a time; + // If one of the two 8-byte halfs is non-zero non-undef, emit STGP. + // Otherwise, emit zeroes up to next available item. + uint64_t LastOffset = 0; + for (uint64_t Offset = 0; Offset < Size; Offset += 16) { + auto I1 = Out.find(Offset); + auto I2 = Out.find(Offset + 8); + if (I1 == Out.end() && I2 == Out.end()) + continue; + + if (Offset > LastOffset) + emitZeroes(IRB, LastOffset, Offset - LastOffset); + + Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty()) + : I1->second; + Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty()) + : I2->second; + emitPair(IRB, Offset, Store1, Store2); + LastOffset = Offset + 16; + } + + // memset(0) does not update Out[], therefore the tail can be either undef + // or zero. + if (LastOffset < Size) + emitZeroes(IRB, LastOffset, Size - LastOffset); + + for (const auto &R : Ranges) { + R.Inst->eraseFromParent(); + } + } + + void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size + << ") zero\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(SetTagZeroFn, + {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + } + + void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size + << ") undef\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + } + + void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n"); + LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(StgpFn, {Ptr, A, B}); + } + + Value *flatten(IRBuilder<> &IRB, Value *V) { + if (V->getType()->isIntegerTy()) + return V; + // vector of pointers -> vector of ints + if (VectorType *VecTy = dyn_cast(V->getType())) { + LLVMContext &Ctx = IRB.getContext(); + Type *EltTy = VecTy->getElementType(); + if (EltTy->isPointerTy()) { + uint32_t EltSize = DL->getTypeSizeInBits(EltTy); + Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize), + VecTy->getNumElements()); + V = IRB.CreatePointerCast(V, NewTy); + } + } + return IRB.CreateBitOrPointerCast( + V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8)); + } +}; + class AArch64StackTagging : public FunctionPass { struct AllocaInfo { AllocaInst *AI; @@ -67,10 +274,15 @@ class AArch64StackTagging : public FunctionPass { int Tag; // -1 for non-tagged allocations }; + bool MergeInit; + public: static char ID; // Pass ID, replacement for typeid - AArch64StackTagging() : FunctionPass(ID) { + AArch64StackTagging(bool MergeInit = true) + : FunctionPass(ID), + MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit + : MergeInit) { initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry()); } @@ -81,6 +293,9 @@ public: uint64_t Size); void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); + Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, + uint64_t Size, InitializerBuilder &IB); + Instruction * insertBaseTaggedPointer(const MapVector &Allocas, const DominatorTree *DT); @@ -92,9 +307,12 @@ private: Function *F; Function *SetTagFunc; const DataLayout *DL; + AAResults *AA; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + if (MergeInit) + AU.addRequired(); } }; @@ -107,8 +325,68 @@ INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging", INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging", false, false) -FunctionPass *llvm::createAArch64StackTaggingPass() { - return new AArch64StackTagging(); +FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) { + return new AArch64StackTagging(MergeInit); +} + +Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst, + Value *StartPtr, + uint64_t Size, + InitializerBuilder &IB) { + MemoryLocation AllocaLoc{StartPtr, Size}; + Instruction *LastInst = StartInst; + BasicBlock::iterator BI(StartInst); + + unsigned Count = 0; + for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) { + if (!isa(*BI)) + ++Count; + + if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc))) + continue; + + if (!isa(BI) && !isa(BI)) { + // If the instruction is readnone, ignore it, otherwise bail out. We + // don't even allow readonly here because we don't want something like: + // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). + if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) + break; + continue; + } + + if (StoreInst *NextStore = dyn_cast(BI)) { + if (!NextStore->isSimple()) + break; + + // Check to see if this store is to a constant offset from the start ptr. + Optional Offset = + isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL); + if (!Offset) + break; + + if (!IB.addStore(*Offset, NextStore, DL)) + break; + LastInst = NextStore; + } else { + MemSetInst *MSI = cast(BI); + + if (MSI->isVolatile() || !isa(MSI->getLength())) + break; + + if (!isa(MSI->getValue())) + break; + + // Check to see if this store is to a constant offset from the start ptr. + Optional Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL); + if (!Offset) + break; + + if (!IB.addMemSet(*Offset, MSI)) + break; + LastInst = MSI; + } + } + return LastInst; } bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) { @@ -127,8 +405,23 @@ bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) { void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, uint64_t Size) { + auto SetTagZeroFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero); + auto StgpFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp); + + InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc); + bool LittleEndian = + Triple(AI->getModule()->getTargetTriple()).isLittleEndian(); + // Current implementation of initializer merging assumes little endianness. + if (MergeInit && !F->hasOptNone() && LittleEndian) { + LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI + << ", size = " << Size << "\n"); + InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB); + } + IRBuilder<> IRB(InsertBefore); - IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + IB.generate(IRB); } void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, @@ -166,7 +459,8 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer( } void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { - unsigned NewAlignment = std::max(Info.AI->getAlignment(), kTagGranuleSize); + const Align NewAlignment = + max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize); Info.AI->setAlignment(NewAlignment); uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8; @@ -179,7 +473,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { Info.AI->isArrayAllocation() ? ArrayType::get( Info.AI->getAllocatedType(), - dyn_cast(Info.AI->getArraySize())->getZExtValue()) + cast(Info.AI->getArraySize())->getZExtValue()) : Info.AI->getAllocatedType(); Type *PaddingType = ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size); @@ -187,7 +481,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { auto *NewAI = new AllocaInst( TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI); NewAI->takeName(Info.AI); - NewAI->setAlignment(Info.AI->getAlignment()); + NewAI->setAlignment(MaybeAlign(Info.AI->getAlignment())); NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca()); NewAI->setSwiftError(Info.AI->isSwiftError()); NewAI->copyMetadata(*Info.AI); @@ -198,6 +492,24 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { Info.AI = NewAI; } +// Helper function to check for post-dominance. +static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A, + const IntrinsicInst *B) { + const BasicBlock *ABB = A->getParent(); + const BasicBlock *BBB = B->getParent(); + + if (ABB != BBB) + return PDT->dominates(ABB, BBB); + + for (const Instruction &I : *ABB) { + if (&I == B) + return true; + if (&I == A) + return false; + } + llvm_unreachable("Corrupt instruction list"); +} + // FIXME: check for MTE extension bool AArch64StackTagging::runOnFunction(Function &Fn) { if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag)) @@ -205,6 +517,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { F = &Fn; DL = &Fn.getParent()->getDataLayout(); + if (MergeInit) + AA = &getAnalysis().getAAResults(); MapVector Allocas; // need stable iteration order SmallVector RetVec; @@ -270,23 +584,31 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { if (NumInterestingAllocas == 0) return true; + std::unique_ptr DeleteDT; + DominatorTree *DT = nullptr; + if (auto *P = getAnalysisIfAvailable()) + DT = &P->getDomTree(); + + if (DT == nullptr && (NumInterestingAllocas > 1 || + !F->hasFnAttribute(Attribute::OptimizeNone))) { + DeleteDT = std::make_unique(*F); + DT = DeleteDT.get(); + } + + std::unique_ptr DeletePDT; + PostDominatorTree *PDT = nullptr; + if (auto *P = getAnalysisIfAvailable()) + PDT = &P->getPostDomTree(); + + if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) { + DeletePDT = std::make_unique(*F); + PDT = DeletePDT.get(); + } + SetTagFunc = Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag); - // Compute DT only if the function has the attribute, there are more than 1 - // interesting allocas, and it is not available for free. - Instruction *Base; - if (NumInterestingAllocas > 1) { - auto *DTWP = getAnalysisIfAvailable(); - if (DTWP) { - Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree()); - } else { - DominatorTree DT(*F); - Base = insertBaseTaggedPointer(Allocas, &DT); - } - } else { - Base = insertBaseTaggedPointer(Allocas, nullptr); - } + Instruction *Base = insertBaseTaggedPointer(Allocas, DT); for (auto &I : Allocas) { const AllocaInfo &Info = I.second; @@ -309,11 +631,37 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 && Info.LifetimeEnd.size() == 1) { IntrinsicInst *Start = Info.LifetimeStart[0]; + IntrinsicInst *End = Info.LifetimeEnd[0]; uint64_t Size = dyn_cast(Start->getArgOperand(0))->getZExtValue(); Size = alignTo(Size, kTagGranuleSize); tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size); - untagAlloca(AI, Info.LifetimeEnd[0], Size); + // We need to ensure that if we tag some object, we certainly untag it + // before the function exits. + if (PDT != nullptr && postDominates(PDT, End, Start)) { + untagAlloca(AI, End, Size); + } else { + SmallVector ReachableRetVec; + unsigned NumCoveredExits = 0; + for (auto &RI : RetVec) { + if (!isPotentiallyReachable(Start, RI, nullptr, DT)) + continue; + ReachableRetVec.push_back(RI); + if (DT != nullptr && DT->dominates(End, RI)) + ++NumCoveredExits; + } + // If there's a mix of covered and non-covered exits, just put the untag + // on exits, so we avoid the redundancy of untagging twice. + if (NumCoveredExits == ReachableRetVec.size()) { + untagAlloca(AI, End, Size); + } else { + for (auto &RI : ReachableRetVec) + untagAlloca(AI, RI, Size); + // We may have inserted untag outside of the lifetime interval. + // Remove the lifetime end call for this alloca. + End->eraseFromParent(); + } + } } else { uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8; Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy()); diff --git a/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp new file mode 100644 index 000000000000..3cc556f74aea --- /dev/null +++ b/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp @@ -0,0 +1,209 @@ +//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64InstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra" + +enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways }; + +cl::opt ClUncheckedLdSt( + "stack-tagging-unchecked-ld-st", cl::Hidden, + cl::init(UncheckedSafe), + cl::desc( + "Unconditionally apply unchecked-ld-st optimization (even for large " + "stack frames, or in the presence of variable sized allocas)."), + cl::values( + clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), + clEnumValN( + UncheckedSafe, "safe", + "apply unchecked-ld-st when the target is definitely within range"), + clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st"))); + +namespace { + +class AArch64StackTaggingPreRA : public MachineFunctionPass { + MachineFunction *MF; + AArch64FunctionInfo *AFI; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + const AArch64RegisterInfo *TRI; + const AArch64InstrInfo *TII; + + SmallVector ReTags; + +public: + static char ID; + AArch64StackTaggingPreRA() : MachineFunctionPass(ID) { + initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry()); + } + + bool mayUseUncheckedLoadStore(); + void uncheckUsesOf(unsigned TaggedReg, int FI); + void uncheckLoadsAndStores(); + + bool runOnMachineFunction(MachineFunction &Func) override; + StringRef getPassName() const override { + return "AArch64 Stack Tagging PreRA"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // end anonymous namespace + +char AArch64StackTaggingPreRA::ID = 0; + +INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", + "AArch64 Stack Tagging PreRA Pass", false, false) +INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", + "AArch64 Stack Tagging PreRA Pass", false, false) + +FunctionPass *llvm::createAArch64StackTaggingPreRAPass() { + return new AArch64StackTaggingPreRA(); +} + +static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) { + switch (Opcode) { + case AArch64::LDRWui: + case AArch64::LDRSHWui: + case AArch64::LDRXui: + case AArch64::LDRBui: + case AArch64::LDRBBui: + case AArch64::LDRHui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STRBui: + case AArch64::STRBBui: + case AArch64::STRHui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + return true; + default: + return false; + } +} + +bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { + if (ClUncheckedLdSt == UncheckedNever) + return false; + else if (ClUncheckedLdSt == UncheckedAlways) + return true; + + // This estimate can be improved if we had harder guarantees about stack frame + // layout. With LocalStackAllocation we can estimate SP offset to any + // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged + // objects ahead of non-tagged ones, but that's not always desirable. + // + // Underestimating SP offset here may require the use of LDG to materialize + // the tagged address of the stack slot, along with a scratch register + // allocation (post-regalloc!). + // + // For now we do the safe thing here and require that the entire stack frame + // is within range of the shortest of the unchecked instructions. + unsigned FrameSize = 0; + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) + FrameSize += MFI->getObjectSize(i); + bool EntireFrameReachableFromSP = FrameSize < 0xf00; + return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP; +} + +void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { + for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end(); + UI != E;) { + MachineInstr *UseI = &*(UI++); + if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) { + // FI operand is always the one before the immediate offset. + unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1; + if (UseI->getOperand(OpIdx).isReg() && + UseI->getOperand(OpIdx).getReg() == TaggedReg) { + UseI->getOperand(OpIdx).ChangeToFrameIndex(FI); + UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); + } + } else if (UseI->isCopy() && + Register::isVirtualRegister(UseI->getOperand(0).getReg())) { + uncheckUsesOf(UseI->getOperand(0).getReg(), FI); + } + } +} + +void AArch64StackTaggingPreRA::uncheckLoadsAndStores() { + for (auto *I : ReTags) { + unsigned TaggedReg = I->getOperand(0).getReg(); + int FI = I->getOperand(1).getIndex(); + uncheckUsesOf(TaggedReg, FI); + } +} + +bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + MRI = &MF->getRegInfo(); + AFI = MF->getInfo(); + TII = static_cast(MF->getSubtarget().getInstrInfo()); + TRI = static_cast( + MF->getSubtarget().getRegisterInfo()); + MFI = &MF->getFrameInfo(); + ReTags.clear(); + + assert(MRI->isSSA()); + + LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n" + << "********** Function: " << MF->getName() << '\n'); + + SmallSetVector TaggedSlots; + for (auto &BB : *MF) { + for (auto &I : BB) { + if (I.getOpcode() == AArch64::TAGPstack) { + ReTags.push_back(&I); + int FI = I.getOperand(1).getIndex(); + TaggedSlots.insert(FI); + // There should be no offsets in TAGP yet. + assert(I.getOperand(2).getImm() == 0); + } + } + } + + if (ReTags.empty()) + return false; + + if (mayUseUncheckedLoadStore()) + uncheckLoadsAndStores(); + + return true; +} diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp index 0e84a00df006..5deb601822b8 100644 --- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -151,7 +151,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) { int64_t Offset; if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) && BaseOp->isReg()) { - unsigned BaseReg = BaseOp->getReg(); + Register BaseReg = BaseOp->getReg(); if (PrevBaseReg == BaseReg) { // If this block can take STPs, skip ahead to the next block. if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 3bc89b91c3f7..558bea368eff 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -71,19 +71,22 @@ void AArch64Subtarget::initializeProperties() { case CortexA35: break; case CortexA53: - PrefFunctionAlignment = 3; + PrefFunctionLogAlignment = 3; break; case CortexA55: break; case CortexA57: MaxInterleaveFactor = 4; - PrefFunctionAlignment = 4; + PrefFunctionLogAlignment = 4; + break; + case CortexA65: + PrefFunctionLogAlignment = 3; break; case CortexA72: case CortexA73: case CortexA75: case CortexA76: - PrefFunctionAlignment = 4; + PrefFunctionLogAlignment = 4; break; case Cyclone: CacheLineSize = 64; @@ -94,14 +97,14 @@ void AArch64Subtarget::initializeProperties() { case ExynosM1: MaxInterleaveFactor = 4; MaxJumpTableSize = 8; - PrefFunctionAlignment = 4; - PrefLoopAlignment = 3; + PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 3; break; case ExynosM3: MaxInterleaveFactor = 4; MaxJumpTableSize = 20; - PrefFunctionAlignment = 5; - PrefLoopAlignment = 4; + PrefFunctionLogAlignment = 5; + PrefLoopLogAlignment = 4; break; case Falkor: MaxInterleaveFactor = 4; @@ -122,6 +125,12 @@ void AArch64Subtarget::initializeProperties() { // FIXME: remove this to enable 64-bit SLP if performance looks good. MinVectorRegisterBitWidth = 128; break; + case NeoverseE1: + PrefFunctionLogAlignment = 3; + break; + case NeoverseN1: + PrefFunctionLogAlignment = 4; + break; case Saphira: MaxInterleaveFactor = 4; // FIXME: remove this to enable 64-bit SLP if performance looks good. @@ -129,8 +138,8 @@ void AArch64Subtarget::initializeProperties() { break; case ThunderX2T99: CacheLineSize = 64; - PrefFunctionAlignment = 3; - PrefLoopAlignment = 2; + PrefFunctionLogAlignment = 3; + PrefLoopLogAlignment = 2; MaxInterleaveFactor = 4; PrefetchDistance = 128; MinPrefetchStride = 1024; @@ -143,15 +152,15 @@ void AArch64Subtarget::initializeProperties() { case ThunderXT81: case ThunderXT83: CacheLineSize = 128; - PrefFunctionAlignment = 3; - PrefLoopAlignment = 2; + PrefFunctionLogAlignment = 3; + PrefLoopLogAlignment = 2; // FIXME: remove this to enable 64-bit SLP if performance looks good. MinVectorRegisterBitWidth = 128; break; case TSV110: CacheLineSize = 64; - PrefFunctionAlignment = 4; - PrefLoopAlignment = 2; + PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 2; break; } } @@ -187,7 +196,7 @@ const CallLowering *AArch64Subtarget::getCallLowering() const { return CallLoweringInfo.get(); } -const InstructionSelector *AArch64Subtarget::getInstructionSelector() const { +InstructionSelector *AArch64Subtarget::getInstructionSelector() const { return InstSelector.get(); } @@ -201,7 +210,7 @@ const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { /// Find the target operand flags that describe how a global value should be /// referenced for the current subtarget. -unsigned char +unsigned AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // MachO large model always goes via a GOT, simply to get a single 8-byte @@ -224,10 +233,17 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, GV->hasExternalWeakLinkage()) return AArch64II::MO_GOT; + // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate + // that their nominal addresses are tagged and outside of the code model. In + // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the + // tag if necessary based on MO_TAGGED. + if (AllowTaggedGlobals && !isa(GV->getValueType())) + return AArch64II::MO_NC | AArch64II::MO_TAGGED; + return AArch64II::MO_NO_FLAG; } -unsigned char AArch64Subtarget::classifyGlobalFunctionReference( +unsigned AArch64Subtarget::classifyGlobalFunctionReference( const GlobalValue *GV, const TargetMachine &TM) const { // MachO large model always goes via a GOT, because we don't have the // relocations available to do anything else.. @@ -275,7 +291,7 @@ bool AArch64Subtarget::supportsAddressTopByteIgnored() const { std::unique_ptr AArch64Subtarget::getCustomPBQPConstraints() const { - return balanceFPOps() ? llvm::make_unique() : nullptr; + return balanceFPOps() ? std::make_unique() : nullptr; } void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 0c84cfb8329a..f3212fae8e5e 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -42,6 +42,7 @@ public: CortexA53, CortexA55, CortexA57, + CortexA65, CortexA72, CortexA73, CortexA75, @@ -51,6 +52,8 @@ public: ExynosM3, Falkor, Kryo, + NeoverseE1, + NeoverseN1, Saphira, ThunderX2T99, ThunderX, @@ -113,6 +116,7 @@ protected: bool HasTRACEV8_4 = false; bool HasAM = false; bool HasSEL2 = false; + bool HasPMU = false; bool HasTLB_RMI = false; bool HasFMI = false; bool HasRCPC_IMMO = false; @@ -134,6 +138,7 @@ protected: bool HasBTI = false; bool HasRandGen = false; bool HasMTE = false; + bool HasTME = false; // Arm SVE2 extensions bool HasSVE2AES = false; @@ -141,6 +146,10 @@ protected: bool HasSVE2SHA3 = false; bool HasSVE2BitPerm = false; + // Future architecture extensions. + bool HasETE = false; + bool HasTRBE = false; + // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove = false; @@ -183,14 +192,15 @@ protected: bool UseEL1ForTP = false; bool UseEL2ForTP = false; bool UseEL3ForTP = false; + bool AllowTaggedGlobals = false; uint8_t MaxInterleaveFactor = 2; uint8_t VectorInsertExtractBaseCost = 3; uint16_t CacheLineSize = 0; uint16_t PrefetchDistance = 0; uint16_t MinPrefetchStride = 1; unsigned MaxPrefetchIterationsAhead = UINT_MAX; - unsigned PrefFunctionAlignment = 0; - unsigned PrefLoopAlignment = 0; + unsigned PrefFunctionLogAlignment = 0; + unsigned PrefLoopLogAlignment = 0; unsigned MaxJumpTableSize = 0; unsigned WideningBaseCost = 0; @@ -247,7 +257,7 @@ public: return &getInstrInfo()->getRegisterInfo(); } const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; const Triple &getTargetTriple() const { return TargetTriple; } @@ -344,14 +354,16 @@ public: unsigned getVectorInsertExtractBaseCost() const { return VectorInsertExtractBaseCost; } - unsigned getCacheLineSize() const { return CacheLineSize; } - unsigned getPrefetchDistance() const { return PrefetchDistance; } - unsigned getMinPrefetchStride() const { return MinPrefetchStride; } - unsigned getMaxPrefetchIterationsAhead() const { + unsigned getCacheLineSize() const override { return CacheLineSize; } + unsigned getPrefetchDistance() const override { return PrefetchDistance; } + unsigned getMinPrefetchStride() const override { return MinPrefetchStride; } + unsigned getMaxPrefetchIterationsAhead() const override { return MaxPrefetchIterationsAhead; } - unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; } - unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; } + unsigned getPrefFunctionLogAlignment() const { + return PrefFunctionLogAlignment; + } + unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; } unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } @@ -380,6 +392,7 @@ public: bool hasBTI() const { return HasBTI; } bool hasRandGen() const { return HasRandGen; } bool hasMTE() const { return HasMTE; } + bool hasTME() const { return HasTME; } // Arm SVE2 extensions bool hasSVE2AES() const { return HasSVE2AES; } bool hasSVE2SM4() const { return HasSVE2SM4; } @@ -399,6 +412,8 @@ public: bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isTargetILP32() const { return TargetTriple.isArch32Bit(); } + bool useAA() const override { return UseAA; } bool hasVH() const { return HasVH; } @@ -421,10 +436,17 @@ public: bool hasTRACEV8_4() const { return HasTRACEV8_4; } bool hasAM() const { return HasAM; } bool hasSEL2() const { return HasSEL2; } + bool hasPMU() const { return HasPMU; } bool hasTLB_RMI() const { return HasTLB_RMI; } bool hasFMI() const { return HasFMI; } bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } + bool addrSinkUsingGEPs() const override { + // Keeping GEPs inbounds is important for exploiting AArch64 + // addressing-modes in ILP32 mode. + return useAA() || isTargetILP32(); + } + bool useSmallAddressing() const { switch (TLInfo.getTargetMachine().getCodeModel()) { case CodeModel::Kernel: @@ -443,11 +465,11 @@ public: /// ClassifyGlobalReference - Find the target operand flags that describe /// how a global value should be referenced for the current subtarget. - unsigned char ClassifyGlobalReference(const GlobalValue *GV, - const TargetMachine &TM) const; + unsigned ClassifyGlobalReference(const GlobalValue *GV, + const TargetMachine &TM) const; - unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, - const TargetMachine &TM) const; + unsigned classifyGlobalFunctionReference(const GlobalValue *GV, + const TargetMachine &TM) const; void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td index 536a6591478b..05249a4ea6a8 100644 --- a/lib/Target/AArch64/AArch64SystemOperands.td +++ b/lib/Target/AArch64/AArch64SystemOperands.td @@ -612,6 +612,7 @@ def : ROSysReg<"ISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b000>; def : ROSysReg<"CNTPCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b001>; def : ROSysReg<"CNTVCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b010>; def : ROSysReg<"ID_MMFR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b110>; +def : ROSysReg<"ID_MMFR5_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b110>; // Trace registers // Op0 Op1 CRn CRm Op2 @@ -1321,6 +1322,12 @@ def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>; def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>; } // FeatureSEL2 +// v8.4a PMU registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeaturePMU} }] in { +def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>; +} // FeaturePMU + // v8.4a RAS registers // Op0 Op1 CRn CRm Op2 let Requires = [{ {AArch64::FeatureRASv8_4} }] in { @@ -1452,14 +1459,37 @@ let Requires = [{ {AArch64::FeatureMTE} }] in { def : RWSysReg<"TCO", 0b11, 0b011, 0b0100, 0b0010, 0b111>; def : RWSysReg<"GCR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b110>; def : RWSysReg<"RGSR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b101>; -def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0110, 0b0101, 0b000>; -def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0110, 0b0101, 0b000>; -def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0110, 0b0110, 0b000>; -def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0110, 0b0110, 0b000>; -def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0110, 0b0110, 0b001>; +def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b000>; +def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0101, 0b0110, 0b000>; +def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0101, 0b0110, 0b000>; +def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0101, 0b0110, 0b000>; +def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b001>; def : ROSysReg<"GMID_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b100>; } // HasMTE +// Embedded Trace Extension R/W System registers +let Requires = [{ {AArch64::FeatureETE} }] in { +// Name Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TRCRSR", 0b10, 0b001, 0b0000, 0b1010, 0b000>; +// TRCEXTINSELR0 has the same encoding as ETM TRCEXTINSELR +def : RWSysReg<"TRCEXTINSELR0", 0b10, 0b001, 0b0000, 0b1000, 0b100>; +def : RWSysReg<"TRCEXTINSELR1", 0b10, 0b001, 0b0000, 0b1001, 0b100>; +def : RWSysReg<"TRCEXTINSELR2", 0b10, 0b001, 0b0000, 0b1010, 0b100>; +def : RWSysReg<"TRCEXTINSELR3", 0b10, 0b001, 0b0000, 0b1011, 0b100>; +} // FeatureETE + +// Trace Buffer Extension System registers +let Requires = [{ {AArch64::FeatureTRBE} }] in { +// Name Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TRBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b000>; +def : RWSysReg<"TRBPTR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b001>; +def : RWSysReg<"TRBBASER_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b010>; +def : RWSysReg<"TRBSR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b011>; +def : RWSysReg<"TRBMAR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b100>; +def : RWSysReg<"TRBTRG_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b110>; +def : ROSysReg<"TRBIDR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b111>; +} // FeatureTRBE + // Cyclone specific system registers // Op0 Op1 CRn CRm Op2 let Requires = [{ {AArch64::ProcCyclone} }] in diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 865461480499..b3ed96e815be 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -157,6 +157,8 @@ extern "C" void LLVMInitializeAArch64Target() { RegisterTargetMachine X(getTheAArch64leTarget()); RegisterTargetMachine Y(getTheAArch64beTarget()); RegisterTargetMachine Z(getTheARM64Target()); + RegisterTargetMachine W(getTheARM64_32Target()); + RegisterTargetMachine V(getTheAArch64_32Target()); auto PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); initializeAArch64A53Fix835769Pass(*PR); @@ -180,6 +182,7 @@ extern "C" void LLVMInitializeAArch64Target() { initializeLDTLSCleanupPass(*PR); initializeAArch64SpeculationHardeningPass(*PR); initializeAArch64StackTaggingPass(*PR); + initializeAArch64StackTaggingPreRAPass(*PR); } //===----------------------------------------------------------------------===// @@ -187,11 +190,11 @@ extern "C" void LLVMInitializeAArch64Target() { //===----------------------------------------------------------------------===// static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSBinFormatCOFF()) - return llvm::make_unique(); + return std::make_unique(); - return llvm::make_unique(); + return std::make_unique(); } // Helper function to build a DataLayout string @@ -200,8 +203,11 @@ static std::string computeDataLayout(const Triple &TT, bool LittleEndian) { if (Options.getABIName() == "ilp32") return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; - if (TT.isOSBinFormatMachO()) + if (TT.isOSBinFormatMachO()) { + if (TT.getArch() == Triple::aarch64_32) + return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128"; return "e-m:o-i64:64-i128:128-n32:64-S128"; + } if (TT.isOSBinFormatCOFF()) return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; if (LittleEndian) @@ -277,8 +283,11 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, this->Options.TrapUnreachable = true; } - // Enable GlobalISel at or below EnableGlobalISelAt0. - if (getOptLevel() <= EnableGlobalISelAtO) { + // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is + // MachO/CodeModel::Large, which GlobalISel does not support. + if (getOptLevel() <= EnableGlobalISelAtO && + TT.getArch() != Triple::aarch64_32 && + !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) { setGlobalISel(true); setGlobalISelAbort(GlobalISelAbortMode::Disable); } @@ -310,7 +319,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, + I = std::make_unique(TargetTriple, CPU, FS, *this, isLittle); } return I.get(); @@ -448,7 +457,8 @@ void AArch64PassConfig::addIRPasses() { addPass(createLICMPass()); } - addPass(createAArch64StackTaggingPass()); + addPass(createAArch64StackTaggingPass(/* MergeInit = */ TM->getOptLevel() != + CodeGenOpt::None)); } // Pass Pipeline Configuration @@ -502,7 +512,8 @@ bool AArch64PassConfig::addIRTranslator() { } void AArch64PassConfig::addPreLegalizeMachineIR() { - addPass(createAArch64PreLegalizeCombiner()); + bool IsOptNone = getOptLevel() == CodeGenOpt::None; + addPass(createAArch64PreLegalizeCombiner(IsOptNone)); } bool AArch64PassConfig::addLegalizeMachineIR() { @@ -516,9 +527,7 @@ bool AArch64PassConfig::addRegBankSelect() { } void AArch64PassConfig::addPreGlobalInstructionSelect() { - // Workaround the deficiency of the fast register allocator. - if (TM->getOptLevel() == CodeGenOpt::None) - addPass(new Localizer()); + addPass(new Localizer()); } bool AArch64PassConfig::addGlobalInstructionSelect() { @@ -540,6 +549,8 @@ bool AArch64PassConfig::addILPOpts() { if (EnableStPairSuppress) addPass(createAArch64StorePairSuppressPass()); addPass(createAArch64SIMDInstrOptPass()); + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createAArch64StackTaggingPreRAPass()); return true; } diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 1c3d5d0743ad..54562094fcf5 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -59,8 +59,8 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol( } const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( - const MCSymbol *Sym, const MCValue &MV, int64_t Offset, - MachineModuleInfo *MMI, MCStreamer &Streamer) const { + const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV, + int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { assert((Offset+MV.getConstant() == 0) && "Arch64 does not support GOT PC rel with extra offset"); // On ARM64 Darwin, we can reference symbols with foo@GOT-., which diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 7ead363d42fe..1cb4c028c80d 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -35,7 +35,8 @@ public: const TargetMachine &TM, MachineModuleInfo *MMI) const override; - const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a4b78f2a7d6b..dc916a7b3407 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } +AArch64TTIImpl::TTI::MemCmpExpansionOptions +AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + Options.AllowOverlappingLoads = !ST->requiresStrictAlign(); + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + // TODO: Though vector loads usually perform well on AArch64, in some targets + // they may wake up the FP unit, which raises the power consumption. Perhaps + // they could be used with no holds barred (-O3). + Options.LoadSizes = {8, 4, 2, 1}; + return Options; +} + int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { @@ -879,22 +892,6 @@ bool AArch64TTIImpl::shouldConsiderAddressTypePromotion( return Considerable; } -unsigned AArch64TTIImpl::getCacheLineSize() { - return ST->getCacheLineSize(); -} - -unsigned AArch64TTIImpl::getPrefetchDistance() { - return ST->getPrefetchDistance(); -} - -unsigned AArch64TTIImpl::getMinPrefetchStride() { - return ST->getMinPrefetchStride(); -} - -unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() { - return ST->getMaxPrefetchIterationsAhead(); -} - bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { assert(isa(Ty) && "Expected Ty to be a vector type"); diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 10c15a139b4c..32c59f41e1c3 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -85,7 +85,8 @@ public: bool enableInterleavedAccessVectorization() { return true; } - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { if (ST->hasNEON()) return 32; @@ -130,6 +131,9 @@ public: int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I = nullptr); + TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const; + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I = nullptr); @@ -153,14 +157,6 @@ public: shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader); - unsigned getCacheLineSize(); - - unsigned getPrefetchDistance(); - - unsigned getMinPrefetchStride(); - - unsigned getMaxPrefetchIterationsAhead(); - bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f4c55d48d215..4fb409f020d9 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -935,48 +935,34 @@ public: return false; } - bool isMovZSymbolG3() const { - return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); + bool isMovWSymbolG3() const { + return isMovWSymbol({AArch64MCExpr::VK_ABS_G3, AArch64MCExpr::VK_PREL_G3}); } - bool isMovZSymbolG2() const { - return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, - AArch64MCExpr::VK_TPREL_G2, - AArch64MCExpr::VK_DTPREL_G2}); - } - - bool isMovZSymbolG1() const { - return isMovWSymbol({ - AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, - AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1, - AArch64MCExpr::VK_DTPREL_G1, - }); - } - - bool isMovZSymbolG0() const { - return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, - AArch64MCExpr::VK_TPREL_G0, - AArch64MCExpr::VK_DTPREL_G0}); - } - - bool isMovKSymbolG3() const { - return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); - } - - bool isMovKSymbolG2() const { - return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC); + bool isMovWSymbolG2() const { + return isMovWSymbol( + {AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, + AArch64MCExpr::VK_ABS_G2_NC, AArch64MCExpr::VK_PREL_G2, + AArch64MCExpr::VK_PREL_G2_NC, AArch64MCExpr::VK_TPREL_G2, + AArch64MCExpr::VK_DTPREL_G2}); } - bool isMovKSymbolG1() const { - return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC, - AArch64MCExpr::VK_TPREL_G1_NC, - AArch64MCExpr::VK_DTPREL_G1_NC}); + bool isMovWSymbolG1() const { + return isMovWSymbol( + {AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, + AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_PREL_G1, + AArch64MCExpr::VK_PREL_G1_NC, AArch64MCExpr::VK_GOTTPREL_G1, + AArch64MCExpr::VK_TPREL_G1, AArch64MCExpr::VK_TPREL_G1_NC, + AArch64MCExpr::VK_DTPREL_G1, AArch64MCExpr::VK_DTPREL_G1_NC}); } - bool isMovKSymbolG0() const { + bool isMovWSymbolG0() const { return isMovWSymbol( - {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, - AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC}); + {AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, + AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_PREL_G0, + AArch64MCExpr::VK_PREL_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, + AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_TPREL_G0_NC, + AArch64MCExpr::VK_DTPREL_G0, AArch64MCExpr::VK_DTPREL_G0_NC}); } template @@ -1814,7 +1800,7 @@ public: static std::unique_ptr CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_Token, Ctx); + auto Op = std::make_unique(k_Token, Ctx); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->Tok.IsSuffix = IsSuffix; @@ -1829,7 +1815,7 @@ public: AArch64_AM::ShiftExtendType ExtTy = AArch64_AM::LSL, unsigned ShiftAmount = 0, unsigned HasExplicitAmount = false) { - auto Op = make_unique(k_Register, Ctx); + auto Op = std::make_unique(k_Register, Ctx); Op->Reg.RegNum = RegNum; Op->Reg.Kind = Kind; Op->Reg.ElementWidth = 0; @@ -1861,7 +1847,7 @@ public: CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements, unsigned ElementWidth, RegKind RegisterKind, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_VectorList, Ctx); + auto Op = std::make_unique(k_VectorList, Ctx); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.NumElements = NumElements; @@ -1874,7 +1860,7 @@ public: static std::unique_ptr CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_VectorIndex, Ctx); + auto Op = std::make_unique(k_VectorIndex, Ctx); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; @@ -1883,7 +1869,7 @@ public: static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_Immediate, Ctx); + auto Op = std::make_unique(k_Immediate, Ctx); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -1894,7 +1880,7 @@ public: unsigned ShiftAmount, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_ShiftedImm, Ctx); + auto Op = std::make_unique(k_ShiftedImm, Ctx); Op->ShiftedImm .Val = Val; Op->ShiftedImm.ShiftAmount = ShiftAmount; Op->StartLoc = S; @@ -1904,7 +1890,7 @@ public: static std::unique_ptr CreateCondCode(AArch64CC::CondCode Code, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_CondCode, Ctx); + auto Op = std::make_unique(k_CondCode, Ctx); Op->CondCode.Code = Code; Op->StartLoc = S; Op->EndLoc = E; @@ -1913,7 +1899,7 @@ public: static std::unique_ptr CreateFPImm(APFloat Val, bool IsExact, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_FPImm, Ctx); + auto Op = std::make_unique(k_FPImm, Ctx); Op->FPImm.Val = Val.bitcastToAPInt().getSExtValue(); Op->FPImm.IsExact = IsExact; Op->StartLoc = S; @@ -1925,7 +1911,7 @@ public: StringRef Str, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_Barrier, Ctx); + auto Op = std::make_unique(k_Barrier, Ctx); Op->Barrier.Val = Val; Op->Barrier.Data = Str.data(); Op->Barrier.Length = Str.size(); @@ -1939,7 +1925,7 @@ public: uint32_t MSRReg, uint32_t PStateField, MCContext &Ctx) { - auto Op = make_unique(k_SysReg, Ctx); + auto Op = std::make_unique(k_SysReg, Ctx); Op->SysReg.Data = Str.data(); Op->SysReg.Length = Str.size(); Op->SysReg.MRSReg = MRSReg; @@ -1952,7 +1938,7 @@ public: static std::unique_ptr CreateSysCR(unsigned Val, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_SysCR, Ctx); + auto Op = std::make_unique(k_SysCR, Ctx); Op->SysCRImm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -1963,7 +1949,7 @@ public: StringRef Str, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_Prefetch, Ctx); + auto Op = std::make_unique(k_Prefetch, Ctx); Op->Prefetch.Val = Val; Op->Barrier.Data = Str.data(); Op->Barrier.Length = Str.size(); @@ -1976,7 +1962,7 @@ public: StringRef Str, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_PSBHint, Ctx); + auto Op = std::make_unique(k_PSBHint, Ctx); Op->PSBHint.Val = Val; Op->PSBHint.Data = Str.data(); Op->PSBHint.Length = Str.size(); @@ -1989,7 +1975,7 @@ public: StringRef Str, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_BTIHint, Ctx); + auto Op = std::make_unique(k_BTIHint, Ctx); Op->BTIHint.Val = Val << 1 | 32; Op->BTIHint.Data = Str.data(); Op->BTIHint.Length = Str.size(); @@ -2001,7 +1987,7 @@ public: static std::unique_ptr CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val, bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_ShiftExtend, Ctx); + auto Op = std::make_unique(k_ShiftExtend, Ctx); Op->ShiftExtend.Type = ShOp; Op->ShiftExtend.Amount = Val; Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount; @@ -2840,7 +2826,7 @@ static const struct Extension { {"sve2-aes", {AArch64::FeatureSVE2AES}}, {"sve2-sm4", {AArch64::FeatureSVE2SM4}}, {"sve2-sha3", {AArch64::FeatureSVE2SHA3}}, - {"bitperm", {AArch64::FeatureSVE2BitPerm}}, + {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}}, // FIXME: Unsupported extensions {"pan", {}}, {"lor", {}}, @@ -3260,6 +3246,13 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { .Case("abs_g0", AArch64MCExpr::VK_ABS_G0) .Case("abs_g0_s", AArch64MCExpr::VK_ABS_G0_S) .Case("abs_g0_nc", AArch64MCExpr::VK_ABS_G0_NC) + .Case("prel_g3", AArch64MCExpr::VK_PREL_G3) + .Case("prel_g2", AArch64MCExpr::VK_PREL_G2) + .Case("prel_g2_nc", AArch64MCExpr::VK_PREL_G2_NC) + .Case("prel_g1", AArch64MCExpr::VK_PREL_G1) + .Case("prel_g1_nc", AArch64MCExpr::VK_PREL_G1_NC) + .Case("prel_g0", AArch64MCExpr::VK_PREL_G0) + .Case("prel_g0_nc", AArch64MCExpr::VK_PREL_G0_NC) .Case("dtprel_g2", AArch64MCExpr::VK_DTPREL_G2) .Case("dtprel_g1", AArch64MCExpr::VK_DTPREL_G1) .Case("dtprel_g1_nc", AArch64MCExpr::VK_DTPREL_G1_NC) @@ -5283,7 +5276,7 @@ bool AArch64AsmParser::parseDirectiveInst(SMLoc Loc) { auto parseOp = [&]() -> bool { SMLoc L = getLoc(); - const MCExpr *Expr; + const MCExpr *Expr = nullptr; if (check(getParser().parseExpression(Expr), L, "expected expression")) return true; const MCConstantExpr *Value = dyn_cast_or_null(Expr); @@ -5542,43 +5535,43 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, switch (Kind) { default: return Match_InvalidOperand; - case MCK__35_0: + case MCK__HASH_0: ExpectedVal = 0; break; - case MCK__35_1: + case MCK__HASH_1: ExpectedVal = 1; break; - case MCK__35_12: + case MCK__HASH_12: ExpectedVal = 12; break; - case MCK__35_16: + case MCK__HASH_16: ExpectedVal = 16; break; - case MCK__35_2: + case MCK__HASH_2: ExpectedVal = 2; break; - case MCK__35_24: + case MCK__HASH_24: ExpectedVal = 24; break; - case MCK__35_3: + case MCK__HASH_3: ExpectedVal = 3; break; - case MCK__35_32: + case MCK__HASH_32: ExpectedVal = 32; break; - case MCK__35_4: + case MCK__HASH_4: ExpectedVal = 4; break; - case MCK__35_48: + case MCK__HASH_48: ExpectedVal = 48; break; - case MCK__35_6: + case MCK__HASH_6: ExpectedVal = 6; break; - case MCK__35_64: + case MCK__HASH_64: ExpectedVal = 64; break; - case MCK__35_8: + case MCK__HASH_8: ExpectedVal = 8; break; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 6418211a4f55..21ce5785ea5e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -153,9 +153,8 @@ static unsigned AdrImmBits(unsigned Value) { static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, uint64_t Value, MCContext &Ctx, const Triple &TheTriple, bool IsResolved) { - unsigned Kind = Fixup.getKind(); int64_t SignedValue = static_cast(Value); - switch (Kind) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("Unknown fixup kind!"); case AArch64::fixup_aarch64_pcrel_adr_imm21: @@ -574,7 +573,7 @@ public: case MCCFIInstruction::OpDefCfa: { // Defines a frame pointer. unsigned XReg = - getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)); + getXRegFromWReg(*MRI.getLLVMRegNum(Inst.getRegister(), true)); // Other CFA registers than FP are not supported by compact unwind. // Fallback on DWARF. @@ -593,8 +592,8 @@ public: assert(FPPush.getOperation() == MCCFIInstruction::OpOffset && "Frame pointer not pushed!"); - unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true); - unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true); + unsigned LRReg = *MRI.getLLVMRegNum(LRPush.getRegister(), true); + unsigned FPReg = *MRI.getLLVMRegNum(FPPush.getRegister(), true); LRReg = getXRegFromWReg(LRReg); FPReg = getXRegFromWReg(FPReg); @@ -615,14 +614,14 @@ public: case MCCFIInstruction::OpOffset: { // Registers are saved in pairs. We expect there to be two consecutive // `.cfi_offset' instructions with the appropriate registers specified. - unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true); + unsigned Reg1 = *MRI.getLLVMRegNum(Inst.getRegister(), true); if (i + 1 == e) return CU::UNWIND_ARM64_MODE_DWARF; const MCCFIInstruction &Inst2 = Instrs[++i]; if (Inst2.getOperation() != MCCFIInstruction::OpOffset) return CU::UNWIND_ARM64_MODE_DWARF; - unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true); + unsigned Reg2 = *MRI.getLLVMRegNum(Inst2.getRegister(), true); // N.B. The encodings must be in register number order, and the X // registers before the D registers. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index c871e2c62eac..0fd1ca187be7 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -57,7 +57,7 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32) static bool isNonILP32reloc(const MCFixup &Fixup, AArch64MCExpr::VariantKind RefKind, MCContext &Ctx) { - if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw) + if (Fixup.getTargetKind() != AArch64::fixup_aarch64_movw) return false; switch (RefKind) { case AArch64MCExpr::VK_ABS_G3: @@ -120,7 +120,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, "Should only be expression-level modifiers here"); if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case FK_Data_1: Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported"); return ELF::R_AARCH64_NONE; @@ -184,7 +184,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } else { if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx)) return ELF::R_AARCH64_NONE; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case FK_NONE: return ELF::R_AARCH64_NONE; case FK_Data_1: @@ -394,6 +394,20 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(MOVW_SABS_G0); if (RefKind == AArch64MCExpr::VK_ABS_G0_NC) return R_CLS(MOVW_UABS_G0_NC); + if (RefKind == AArch64MCExpr::VK_PREL_G3) + return ELF::R_AARCH64_MOVW_PREL_G3; + if (RefKind == AArch64MCExpr::VK_PREL_G2) + return ELF::R_AARCH64_MOVW_PREL_G2; + if (RefKind == AArch64MCExpr::VK_PREL_G2_NC) + return ELF::R_AARCH64_MOVW_PREL_G2_NC; + if (RefKind == AArch64MCExpr::VK_PREL_G1) + return R_CLS(MOVW_PREL_G1); + if (RefKind == AArch64MCExpr::VK_PREL_G1_NC) + return ELF::R_AARCH64_MOVW_PREL_G1_NC; + if (RefKind == AArch64MCExpr::VK_PREL_G0) + return R_CLS(MOVW_PREL_G0); + if (RefKind == AArch64MCExpr::VK_PREL_G0_NC) + return R_CLS(MOVW_PREL_G0_NC); if (RefKind == AArch64MCExpr::VK_DTPREL_G2) return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; if (RefKind == AArch64MCExpr::VK_DTPREL_G1) @@ -434,5 +448,5 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createAArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32) { - return llvm::make_unique(OSABI, IsILP32); + return std::make_unique(OSABI, IsILP32); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index d0a544273b8b..1a16468484ad 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -172,7 +172,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, int ImmS = MI->getOperand(4).getImm(); if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) && - (ImmR == 0 || ImmS < ImmR)) { + (ImmR == 0 || ImmS < ImmR) && + STI.getFeatureBits()[AArch64::HasV8_2aOps]) { // BFC takes precedence over its entire range, sligtly differently to BFI. int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32; int LSB = (BitWidth - ImmR) % BitWidth; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index ecff1ab0a8b3..5926a4f81616 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -30,7 +30,7 @@ static cl::opt AsmWriterVariant( cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"), clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"))); -AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { +AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) { // We prefer NEON instructions to be printed in the short, Apple-specific // form when targeting Darwin. AssemblerDialect = AsmWriterVariant == Default ? Apple : AsmWriterVariant; @@ -39,7 +39,8 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { PrivateLabelPrefix = "L"; SeparatorString = "%%"; CommentString = ";"; - CodePointerSize = CalleeSaveStackSlotSize = 8; + CalleeSaveStackSlotSize = 8; + CodePointerSize = IsILP32 ? 4 : 8; AlignmentIsInBytes = false; UsesELFSectionDirectiveForBSS = true; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 36ae92afc8c1..7274ae79f74a 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -23,7 +23,7 @@ class Target; class Triple; struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { - explicit AArch64MCAsmInfoDarwin(); + explicit AArch64MCAsmInfoDarwin(bool IsILP32); const MCExpr * getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const override; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 0a529321edc8..548e399e05a3 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -42,6 +42,13 @@ StringRef AArch64MCExpr::getVariantKindName() const { case VK_ABS_G0: return ":abs_g0:"; case VK_ABS_G0_S: return ":abs_g0_s:"; case VK_ABS_G0_NC: return ":abs_g0_nc:"; + case VK_PREL_G3: return ":prel_g3:"; + case VK_PREL_G2: return ":prel_g2:"; + case VK_PREL_G2_NC: return ":prel_g2_nc:"; + case VK_PREL_G1: return ":prel_g1:"; + case VK_PREL_G1_NC: return ":prel_g1_nc:"; + case VK_PREL_G0: return ":prel_g0:"; + case VK_PREL_G0_NC: return ":prel_g0_nc:"; case VK_DTPREL_G2: return ":dtprel_g2:"; case VK_DTPREL_G1: return ":dtprel_g1:"; case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:"; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index ec9c95911628..a82ff2e91426 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -27,12 +27,13 @@ public: // symbol. E.g. direct, via the GOT, ... VK_ABS = 0x001, VK_SABS = 0x002, - VK_GOT = 0x003, - VK_DTPREL = 0x004, - VK_GOTTPREL = 0x005, - VK_TPREL = 0x006, - VK_TLSDESC = 0x007, - VK_SECREL = 0x008, + VK_PREL = 0x003, + VK_GOT = 0x004, + VK_DTPREL = 0x005, + VK_GOTTPREL = 0x006, + VK_TPREL = 0x007, + VK_TLSDESC = 0x008, + VK_SECREL = 0x009, VK_SymLocBits = 0x00f, // Variants specifying which part of the final address calculation is @@ -72,6 +73,13 @@ public: VK_ABS_G0_S = VK_SABS | VK_G0, VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC, VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC, + VK_PREL_G3 = VK_PREL | VK_G3, + VK_PREL_G2 = VK_PREL | VK_G2, + VK_PREL_G2_NC = VK_PREL | VK_G2 | VK_NC, + VK_PREL_G1 = VK_PREL | VK_G1, + VK_PREL_G1_NC = VK_PREL | VK_G1 | VK_NC, + VK_PREL_G0 = VK_PREL | VK_G0, + VK_PREL_G0_NC = VK_PREL | VK_G0 | VK_NC, VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC, VK_GOT_PAGE = VK_GOT | VK_PAGE, VK_DTPREL_G2 = VK_DTPREL | VK_G2, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index df12274d9470..1d583ec0087b 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -241,7 +241,7 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, const Triple &TheTriple) { MCAsmInfo *MAI; if (TheTriple.isOSBinFormatMachO()) - MAI = new AArch64MCAsmInfoDarwin(); + MAI = new AArch64MCAsmInfoDarwin(TheTriple.getArch() == Triple::aarch64_32); else if (TheTriple.isWindowsMSVCEnvironment()) MAI = new AArch64MCAsmInfoMicrosoftCOFF(); else if (TheTriple.isOSBinFormatCOFF()) diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index b3ce5ef22eef..fc04d37eb362 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -54,7 +54,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED); Log2Size = ~0U; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: return false; @@ -406,6 +406,6 @@ void AArch64MachObjectWriter::recordRelocation( std::unique_ptr llvm::createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype, bool IsILP32) { - return llvm::make_unique(CPUType, CPUSubtype, + return std::make_unique(CPUType, CPUSubtype, IsILP32); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp index a45880a07427..aa50bd05cb71 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -120,7 +120,7 @@ bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { namespace llvm { std::unique_ptr createAArch64WinCOFFObjectWriter() { - return llvm::make_unique(); + return std::make_unique(); } } // end namespace llvm diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td index 808e59467081..8ccf6aa675ba 100644 --- a/lib/Target/AArch64/SVEInstrFormats.td +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -279,6 +279,19 @@ let Predicates = [HasSVE] in { defm PTRUES : sve_int_ptrue<0b001, "ptrues">; } +//===----------------------------------------------------------------------===// +// SVE pattern match helpers. +//===----------------------------------------------------------------------===// + +class SVE_1_Op_Pat +: Pat<(vtd (op vt1:$Op1)), + (inst $Op1)>; + +class SVE_3_Op_Pat +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), + (inst $Op1, $Op2, $Op3)>; //===----------------------------------------------------------------------===// // SVE Predicate Misc Group @@ -403,12 +416,12 @@ multiclass sve_int_count_r_x64 opc, string asm> { } class sve_int_count_v sz8_64, bits<5> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg), - asm, "\t$Zdn, $Pg", + ZPRRegOp zprty, PPRRegOp pprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm), + asm, "\t$Zdn, $Pm", "", []>, Sched<[]> { - bits<4> Pg; + bits<4> Pm; bits<5> Zdn; let Inst{31-24} = 0b00100101; let Inst{23-22} = sz8_64; @@ -416,7 +429,7 @@ class sve_int_count_v sz8_64, bits<5> opc, string asm, let Inst{18-16} = opc{4-2}; let Inst{15-11} = 0b10000; let Inst{10-9} = opc{1-0}; - let Inst{8-5} = Pg; + let Inst{8-5} = Pm; let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; @@ -425,9 +438,16 @@ class sve_int_count_v sz8_64, bits<5> opc, string asm, } multiclass sve_int_count_v opc, string asm> { - def _H : sve_int_count_v<0b01, opc, asm, ZPR16>; - def _S : sve_int_count_v<0b10, opc, asm, ZPR32>; - def _D : sve_int_count_v<0b11, opc, asm, ZPR64>; + def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>; + def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>; + def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>; + + def : InstAlias(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>; + def : InstAlias(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>; + def : InstAlias(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>; } class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, @@ -609,11 +629,12 @@ multiclass sve_int_pred_pattern_b_x64 opc, string asm> { //===----------------------------------------------------------------------===// class sve_int_perm_dup_r sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegType> + ValueType vt, RegisterClass srcRegType, + SDPatternOperator op> : I<(outs zprty:$Zd), (ins srcRegType:$Rn), asm, "\t$Zd, $Rn", "", - []>, Sched<[]> { + [(set (vt zprty:$Zd), (op srcRegType:$Rn))]>, Sched<[]> { bits<5> Rn; bits<5> Zd; let Inst{31-24} = 0b00000101; @@ -623,11 +644,11 @@ class sve_int_perm_dup_r sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zd; } -multiclass sve_int_perm_dup_r { - def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, GPR32sp>; - def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, GPR32sp>; - def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, GPR32sp>; - def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, GPR64sp>; +multiclass sve_int_perm_dup_r { + def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, nxv16i8, GPR32sp, op>; + def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, nxv8i16, GPR32sp, op>; + def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, nxv4i32, GPR32sp, op>; + def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, nxv2i64, GPR64sp, op>; def : InstAlias<"mov $Zd, $Rn", (!cast(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>; @@ -744,7 +765,7 @@ multiclass sve2_int_perm_tbl { } class sve2_int_perm_tbx sz8_64, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), +: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { @@ -758,6 +779,8 @@ class sve2_int_perm_tbx sz8_64, string asm, ZPRRegOp zprty> let Inst{15-10} = 0b001011; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } multiclass sve2_int_perm_tbx { @@ -826,10 +849,14 @@ class sve_int_perm_unpk sz16_64, bits<2> opc, string asm, let Inst{4-0} = Zd; } -multiclass sve_int_perm_unpk opc, string asm> { +multiclass sve_int_perm_unpk opc, string asm, SDPatternOperator op> { def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>; def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>; def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>; + + def : SVE_1_Op_Pat(NAME # _H)>; + def : SVE_1_Op_Pat(NAME # _S)>; + def : SVE_1_Op_Pat(NAME # _D)>; } class sve_int_perm_insrs sz8_64, string asm, ZPRRegOp zprty, @@ -1197,10 +1224,12 @@ multiclass sve_fp_ftmad { //===----------------------------------------------------------------------===// class sve_fp_3op_u_zd sz, bits<3> opc, string asm, - ZPRRegOp zprty> + ZPRRegOp zprty, + ValueType vt, ValueType vt2, SDPatternOperator op> : I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), asm, "\t$Zd, $Zn, $Zm", - "", []>, Sched<[]> { + "", + [(set (vt zprty:$Zd), (op (vt zprty:$Zn), (vt2 zprty:$Zm)))]>, Sched<[]> { bits<5> Zd; bits<5> Zm; bits<5> Zn; @@ -1214,10 +1243,10 @@ class sve_fp_3op_u_zd sz, bits<3> opc, string asm, let Inst{4-0} = Zd; } -multiclass sve_fp_3op_u_zd opc, string asm> { - def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>; - def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>; - def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>; +multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op> { + def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16, nxv8f16, nxv8f16, op>; + def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32, nxv4f32, nxv4f32, op>; + def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64, nxv2f64, nxv2f64, op>; } //===----------------------------------------------------------------------===// @@ -1489,7 +1518,7 @@ multiclass sve_fp_fcadd { class sve2_fp_convert_precision opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn), +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn), asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> { @@ -1504,6 +1533,8 @@ class sve2_fp_convert_precision opc, string asm, let Inst{12-10} = Pg; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } multiclass sve2_fp_convert_down_narrow { @@ -1998,12 +2029,14 @@ class sve_intx_dot { +multiclass sve_intx_dot { def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>; def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>; + + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2028,22 +2061,27 @@ class sve_intx_dot_by_indexed_elem { - def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> { +multiclass sve_intx_dot_by_indexed_elem { + def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> { bits<2> iop; bits<3> Zm; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> { + def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> { bits<1> iop; bits<4> Zm; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b:$idx))), + (!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>; + def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b:$idx))), + (!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>; } //===----------------------------------------------------------------------===// @@ -2399,21 +2437,40 @@ multiclass sve2_misc_bitwise opc, string asm> { def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>; } -multiclass sve2_bitwise_xor_interleaved { - let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in { - def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>; - def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>; - def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>; - def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>; - } -} - multiclass sve2_misc_int_addsub_long_interleaved opc, string asm> { def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; } +class sve2_bitwise_xor_interleaved sz, bits<1> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), + asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-11} = 0b10010; + let Inst{10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve2_bitwise_xor_interleaved { + def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>; + def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>; + def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>; + def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>; +} + class sve2_bitwise_shift_left_long tsz8_64, bits<2> opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2, Operand immtype> @@ -2451,9 +2508,9 @@ multiclass sve2_bitwise_shift_left_long opc, string asm> { // SVE2 Accumulate Group //===----------------------------------------------------------------------===// -class sve2_int_bin_cons_shift_imm tsz8_64, bit opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm), +class sve2_int_bin_shift_imm tsz8_64, bit opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { bits<5> Zd; @@ -2468,38 +2525,40 @@ class sve2_int_bin_cons_shift_imm tsz8_64, bit opc, string asm, let Inst{10} = opc; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_cons_shift_imm_left { - def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { +multiclass sve2_int_bin_shift_imm_left { + def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } } -multiclass sve2_int_bin_cons_shift_imm_right { - def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { +multiclass sve2_int_bin_shift_imm_right { + def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } } -class sve2_int_bin_accum_cons_shift_imm tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty, Operand immtype> +class sve2_int_bin_accum_shift_imm tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty, Operand immtype> : I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm), asm, "\t$Zda, $Zn, $imm", "", []>, Sched<[]> { @@ -2521,15 +2580,15 @@ class sve2_int_bin_accum_cons_shift_imm tsz8_64, bits<2> opc, string asm let ElementSize = ElementSizeNone; } -multiclass sve2_int_bin_accum_cons_shift_imm_right opc, string asm> { - def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { +multiclass sve2_int_bin_accum_shift_imm_right opc, string asm> { + def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } @@ -2607,9 +2666,9 @@ multiclass sve2_int_addsub_long_carry opc, string asm> { // SVE2 Narrowing Group //===----------------------------------------------------------------------===// -class sve2_int_bin_cons_shift_imm_narrow tsz8_64, bits<4> opc, - string asm, ZPRRegOp zprty1, - ZPRRegOp zprty2, Operand immtype> +class sve2_int_bin_shift_imm_narrow_bottom tsz8_64, bits<3> opc, + string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand immtype> : I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { @@ -2622,26 +2681,63 @@ class sve2_int_bin_cons_shift_imm_narrow tsz8_64, bits<4> opc, let Inst{20-19} = tsz8_64{1-0}; let Inst{18-16} = imm{2-0}; // imm3 let Inst{15-14} = 0b00; - let Inst{13-10} = opc; + let Inst{13-11} = opc; + let Inst{10} = 0b0; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_bin_cons_shift_imm_right_narrow opc, string asm> { - def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16, - vecshiftR8>; - def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32, - vecshiftR16> { +multiclass sve2_int_bin_shift_imm_right_narrow_bottom opc, string asm> { + def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, + vecshiftR8>; + def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, + vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64, - vecshiftR32> { + def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, + vecshiftR32> { let Inst{20-19} = imm{4-3}; } } -class sve2_int_addsub_narrow_high sz, bits<3> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> +class sve2_int_bin_shift_imm_narrow_top tsz8_64, bits<3> opc, + string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand immtype> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm), + asm, "\t$Zd, $Zn, $imm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> imm; + let Inst{31-23} = 0b010001010; + let Inst{22} = tsz8_64{2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-16} = imm{2-0}; // imm3 + let Inst{15-14} = 0b00; + let Inst{13-11} = opc; + let Inst{10} = 0b1; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_bin_shift_imm_right_narrow_top opc, string asm> { + def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, + vecshiftR8>; + def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, + vecshiftR16> { + let Inst{19} = imm{3}; + } + def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, + vecshiftR32> { + let Inst{20-19} = imm{4-3}; + } +} + +class sve2_int_addsub_narrow_high_bottom sz, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zd; @@ -2652,19 +2748,46 @@ class sve2_int_addsub_narrow_high sz, bits<3> opc, string asm, let Inst{21} = 0b1; let Inst{20-16} = Zm; let Inst{15-13} = 0b011; - let Inst{12-10} = opc; // S, R, T + let Inst{12-11} = opc; // S, R + let Inst{10} = 0b0; // Top let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_addsub_narrow_high opc, string asm> { - def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>; +multiclass sve2_int_addsub_narrow_high_bottom opc, string asm> { + def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>; +} + +class sve2_int_addsub_narrow_high_top sz, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), + asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b011; + let Inst{12-11} = opc; // S, R + let Inst{10} = 0b1; // Top + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_addsub_narrow_high_top opc, string asm> { + def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>; } -class sve2_int_sat_extract_narrow tsz8_64, bits<3> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> +class sve2_int_sat_extract_narrow_bottom tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zd), (ins zprty2:$Zn), asm, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<5> Zd; @@ -2674,15 +2797,41 @@ class sve2_int_sat_extract_narrow tsz8_64, bits<3> opc, string asm, let Inst{21} = 0b1; let Inst{20-19} = tsz8_64{1-0}; let Inst{18-13} = 0b000010; - let Inst{12-10} = opc; + let Inst{12-11} = opc; + let Inst{10} = 0b0; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_sat_extract_narrow opc, string asm> { - def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>; +multiclass sve2_int_sat_extract_narrow_bottom opc, string asm> { + def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>; +} + +class sve2_int_sat_extract_narrow_top tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn), + asm, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-23} = 0b010001010; + let Inst{22} = tsz8_64{2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-13} = 0b000010; + let Inst{12-11} = opc; + let Inst{10} = 0b1; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_sat_extract_narrow_top opc, string asm> { + def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>; } //===----------------------------------------------------------------------===// @@ -2713,11 +2862,17 @@ class sve_int_un_pred_arit sz8_64, bits<4> opc, let ElementSize = zprty.ElementSize; } -multiclass sve_int_un_pred_arit_0 opc, string asm> { +multiclass sve_int_un_pred_arit_0 opc, string asm, + SDPatternOperator op> { def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>; def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_un_pred_arit_0_h opc, string asm> { @@ -2735,11 +2890,21 @@ multiclass sve_int_un_pred_arit_0_d opc, string asm> { def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; } -multiclass sve_int_un_pred_arit_1 opc, string asm> { +multiclass sve_int_un_pred_arit_1 opc, string asm, + SDPatternOperator op> { def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>; def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_un_pred_arit_1_fp opc, string asm> { @@ -3886,9 +4051,9 @@ multiclass sve_mem_cstnt_ss msz, string asm, RegisterOperand listty, (!cast(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; } -class sve2_mem_cstnt_vs_base opc, dag iops, string asm, - RegisterOperand VecList> -: I<(outs VecList:$Zt), iops, +class sve2_mem_sstnt_vs_base opc, string asm, + RegisterOperand listty, ZPRRegOp zprty> +: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), asm, "\t$Zt, $Pg, [$Zn, $Rm]", "", []>, Sched<[]> { @@ -3908,17 +4073,14 @@ class sve2_mem_cstnt_vs_base opc, dag iops, string asm, let mayStore = 1; } -multiclass sve2_mem_cstnt_vs opc, string asm, +multiclass sve2_mem_sstnt_vs opc, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve2_mem_cstnt_vs_base; + def _REAL : sve2_mem_sstnt_vs_base; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>; } @@ -4147,6 +4309,14 @@ class sve_int_perm_punpk let Inst{3-0} = Pd; } +multiclass sve_int_perm_punpk { + def NAME : sve_int_perm_punpk; + + def : SVE_1_Op_Pat(NAME)>; + def : SVE_1_Op_Pat(NAME)>; + def : SVE_1_Op_Pat(NAME)>; +} + class sve_int_rdffr_pred : I<(outs PPR8:$Pd), (ins PPRAny:$Pg), asm, "\t$Pd, $Pg/z", @@ -5094,7 +5264,7 @@ multiclass sve_mem_p_fill { (!cast(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; } -class sve2_mem_cldnt_vs_base opc, dag iops, string asm, +class sve2_mem_gldnt_vs_base opc, dag iops, string asm, RegisterOperand VecList> : I<(outs VecList:$Zt), iops, asm, "\t$Zt, $Pg/z, [$Zn, $Rm]", @@ -5119,17 +5289,15 @@ class sve2_mem_cldnt_vs_base opc, dag iops, string asm, let mayLoad = 1; } -multiclass sve2_mem_cldnt_vs opc, string asm, +multiclass sve2_mem_gldnt_vs opc, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve2_mem_cldnt_vs_base; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>; } diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 7bb075c36e79..c27fc7a112ec 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -125,7 +125,7 @@ namespace llvm { uint32_t AArch64SysReg::parseGenericRegister(StringRef Name) { // Try to parse an S____ register name - Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$"); + static const Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$"); std::string UpperName = Name.upper(); SmallVector Ops; diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index e5e2fc2cb0df..7a4fcac09ec4 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -313,9 +313,9 @@ struct SysAlias { uint16_t Encoding; FeatureBitset FeaturesRequired; - SysAlias (const char *N, uint16_t E) : Name(N), Encoding(E) {}; - SysAlias (const char *N, uint16_t E, FeatureBitset F) : - Name(N), Encoding(E), FeaturesRequired(F) {}; + constexpr SysAlias(const char *N, uint16_t E) : Name(N), Encoding(E) {} + constexpr SysAlias(const char *N, uint16_t E, FeatureBitset F) + : Name(N), Encoding(E), FeaturesRequired(F) {} bool haveFeatures(FeatureBitset ActiveFeatures) const { return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; @@ -326,9 +326,10 @@ struct SysAlias { struct SysAliasReg : SysAlias { bool NeedsReg; - SysAliasReg(const char *N, uint16_t E, bool R) : SysAlias(N, E), NeedsReg(R) {}; - SysAliasReg(const char *N, uint16_t E, bool R, FeatureBitset F) : SysAlias(N, E, F), - NeedsReg(R) {}; + constexpr SysAliasReg(const char *N, uint16_t E, bool R) + : SysAlias(N, E), NeedsReg(R) {} + constexpr SysAliasReg(const char *N, uint16_t E, bool R, FeatureBitset F) + : SysAlias(N, E, F), NeedsReg(R) {} }; namespace AArch64AT{ @@ -627,6 +628,18 @@ namespace AArch64II { /// MO_S - Indicates that the bits of the symbol operand represented by /// MO_G0 etc are signed. MO_S = 0x100, + + /// MO_PREL - Indicates that the bits of the symbol operand represented by + /// MO_G0 etc are PC relative. + MO_PREL = 0x200, + + /// MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag + /// in bits 56-63. + /// On a FrameIndex operand, indicates that the underlying memory is tagged + /// with an unknown tag value (MTE); this needs to be lowered either to an + /// SP-relative load or store instruction (which do not check tags), or to + /// an LDG instruction to obtain the tag value. + MO_TAGGED = 0x400, }; } // end namespace AArch64II diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 19a8bd901629..b64422ae5427 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -188,6 +188,10 @@ ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); ModulePass *createR600OpenCLImageTypeLoweringPass(); FunctionPass *createAMDGPUAnnotateUniformValues(); +ModulePass *createAMDGPUPrintfRuntimeBinding(); +void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); +extern char &AMDGPUPrintfRuntimeBindingID; + ModulePass* createAMDGPUUnifyMetadataPass(); void initializeAMDGPUUnifyMetadataPass(PassRegistry&); extern char &AMDGPUUnifyMetadataID; diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index baeba534012c..42b477e07b3b 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -10,6 +10,15 @@ include "llvm/TableGen/SearchableTable.td" include "llvm/Target/Target.td" include "AMDGPUFeatures.td" +def p0 : PtrValueType; +def p1 : PtrValueType; +def p2 : PtrValueType; +def p3 : PtrValueType; +def p4 : PtrValueType; +def p5 : PtrValueType; +def p6 : PtrValueType; + + class BoolToList { list ret = !if(Value, [1], []); } @@ -145,6 +154,12 @@ def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode" >; +def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug", + "HasMFMAInlineLiteralBug", + "true", + "MFMA cannot use inline literal as SrcC" +>; + def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard", "HasVcmpxPermlaneHazard", "true", @@ -802,6 +817,7 @@ def FeatureISAVersion9_0_8 : FeatureSet< FeaturePkFmacF16Inst, FeatureAtomicFaddInsts, FeatureSRAMECC, + FeatureMFMAInlineLiteralBug, FeatureCodeObjectV3]>; def FeatureISAVersion9_0_9 : FeatureSet< diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 419ebb2240ad..e72b3f4fde63 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -173,6 +173,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID, case Intrinsic::amdgcn_implicitarg_ptr: return "amdgpu-implicitarg-ptr"; case Intrinsic::amdgcn_queue_ptr: + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: + // TODO: Does not require queue ptr on gfx9+ case Intrinsic::trap: case Intrinsic::debugtrap: IsQueuePtr = true; @@ -194,18 +197,12 @@ static bool handleAttr(Function &Parent, const Function &Callee, static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr) { // X ids unnecessarily propagated to kernels. - static const StringRef AttrNames[] = { - { "amdgpu-work-item-id-x" }, - { "amdgpu-work-item-id-y" }, - { "amdgpu-work-item-id-z" }, - { "amdgpu-work-group-id-x" }, - { "amdgpu-work-group-id-y" }, - { "amdgpu-work-group-id-z" }, - { "amdgpu-dispatch-ptr" }, - { "amdgpu-dispatch-id" }, - { "amdgpu-kernarg-segment-ptr" }, - { "amdgpu-implicitarg-ptr" } - }; + static constexpr StringLiteral AttrNames[] = { + "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", + "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", + "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", + "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", + "amdgpu-kernarg-segment-ptr", "amdgpu-implicitarg-ptr"}; if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) NeedQueuePtr = true; diff --git a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index 097730441ed8..f0e7ee910f95 100644 --- a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -48,8 +48,8 @@ public: return ArgDescriptor(Reg, Mask, false, true); } - static ArgDescriptor createStack(Register Reg, unsigned Mask = ~0u) { - return ArgDescriptor(Reg, Mask, true, true); + static ArgDescriptor createStack(unsigned Offset, unsigned Mask = ~0u) { + return ArgDescriptor(Offset, Mask, true, true); } static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) { diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 743ac64b8f10..f2d903c8e7b1 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -229,7 +229,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { // alignment. Streamer.EmitValueToAlignment(64, 0, 1, 0); if (ReadOnlySection.getAlignment() < 64) - ReadOnlySection.setAlignment(64); + ReadOnlySection.setAlignment(Align(64)); const MCSubtargetInfo &STI = MF->getSubtarget(); @@ -273,7 +273,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { AsmPrinter::EmitFunctionEntryLabel(); } -void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { +void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) { // Write a line for the basic block label if it is not only fallthrough. DisasmLines.push_back( @@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { // Print comments that apply to both callable functions and entry points. void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, + Optional NumAGPR, + uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, @@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false); OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false); + if (NumAGPR) { + OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false); + OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR), + false); + } OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false); OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()), false); @@ -417,7 +424,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // The starting address of all shader programs must be 256 bytes aligned. // Regular functions just need the basic required instruction alignment. - MF.setAlignment(MFI->isEntryFunction() ? 8 : 2); + MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4)); SetupMachineFunction(MF); @@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()]; emitCommonFunctionComments( Info.NumVGPR, + STM.hasMAIInsts() ? Info.NumAGPR : Optional(), + Info.getTotalNumVGPRs(STM), Info.getTotalNumSGPRs(MF.getSubtarget()), Info.PrivateSegmentSize, getFunctionCodeSize(MF), MFI); @@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } OutStreamer->emitRawComment(" Kernel info:", false); - emitCommonFunctionComments(CurrentProgramInfo.NumVGPR, + emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR, + STM.hasMAIInsts() + ? CurrentProgramInfo.NumAccVGPR + : Optional(), + CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR, CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI); @@ -506,6 +519,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { " NumVGPRsForWavesPerEU: " + Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false); + OutStreamer->emitRawComment( + " Occupancy: " + + Twine(CurrentProgramInfo.Occupancy), false); + OutStreamer->emitRawComment( " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false); @@ -588,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( UsesVCC, UsesFlatScratch); } +int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs( + const GCNSubtarget &ST) const { + return std::max(NumVGPR, NumAGPR); +} + AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( const MachineFunction &MF) const { SIFunctionResourceInfo Info; @@ -634,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( HighestVGPRReg = Reg; break; } - MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg); - if (MRI.isPhysRegUsed(AReg)) { - HighestVGPRReg = AReg; - break; + } + + if (ST.hasMAIInsts()) { + MCPhysReg HighestAGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestAGPRReg = Reg; + break; + } } + Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestAGPRReg) + 1; } MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; @@ -660,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( } int32_t MaxVGPR = -1; + int32_t MaxAGPR = -1; int32_t MaxSGPR = -1; uint64_t CalleeFrameSize = 0; @@ -669,11 +699,12 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( for (const MachineOperand &MO : MI.operands()) { unsigned Width = 0; bool IsSGPR = false; + bool IsAGPR = false; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); switch (Reg) { case AMDGPU::EXEC: case AMDGPU::EXEC_LO: @@ -744,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 1; } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 1; } else if (AMDGPU::SReg_64RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_64RegClass.contains(Reg) && @@ -755,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 2; } else if (AMDGPU::AReg_64RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 2; } else if (AMDGPU::VReg_96RegClass.contains(Reg)) { IsSGPR = false; @@ -771,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 4; } else if (AMDGPU::AReg_128RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 4; } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && @@ -790,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 16; } else if (AMDGPU::AReg_512RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 16; } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) { IsSGPR = true; @@ -799,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 32; } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 32; } else { llvm_unreachable("Unknown register class"); @@ -807,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( int MaxUsed = HWReg + Width - 1; if (IsSGPR) { MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR; + } else if (IsAGPR) { + MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR; } else { MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR; } @@ -828,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( 47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace()); MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); + MaxAGPR = std::max(MaxAGPR, 23); CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384)); Info.UsesVCC = true; @@ -852,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR); MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR); + MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR); CalleeFrameSize = std::max(I->second.PrivateSegmentSize, CalleeFrameSize); Info.UsesVCC |= I->second.UsesVCC; @@ -868,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Info.NumExplicitSGPR = MaxSGPR + 1; Info.NumVGPR = MaxVGPR + 1; + Info.NumAGPR = MaxAGPR + 1; Info.PrivateSegmentSize += CalleeFrameSize; return Info; @@ -876,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const MachineFunction &MF) { SIFunctionResourceInfo Info = analyzeResourceUsage(MF); + const GCNSubtarget &STM = MF.getSubtarget(); - ProgInfo.NumVGPR = Info.NumVGPR; + ProgInfo.NumArchVGPR = Info.NumVGPR; + ProgInfo.NumAccVGPR = Info.NumAGPR; + ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM); ProgInfo.NumSGPR = Info.NumExplicitSGPR; ProgInfo.ScratchSize = Info.PrivateSegmentSize; ProgInfo.VCCUsed = Info.UsesVCC; @@ -890,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, MF.getFunction().getContext().diagnose(DiagStackSize); } - const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are @@ -1057,6 +1100,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP. S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) | S_00B84C_EXCP_EN(0); + + ProgInfo.Occupancy = STM.computeOccupancy(MF, ProgInfo.LDSSize, + ProgInfo.NumSGPRsForWavesPerEU, + ProgInfo.NumVGPRsForWavesPerEU); } static unsigned getRsrcReg(CallingConv::ID CallConv) { @@ -1214,17 +1261,16 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, if (STM.isXNACKEnabled()) Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; - unsigned MaxKernArgAlign; + Align MaxKernArgAlign; Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign); Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR; Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR; Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize; Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize; - // These alignment values are specified in powers of two, so alignment = - // 2^n. The minimum alignment is 2^4 = 16. - Out.kernarg_segment_alignment = std::max(4, - countTrailingZeros(MaxKernArgAlign)); + // kernarg_segment_alignment is specified as log of the alignment. + // The minimum alignment is 16. + Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign)); } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index cf77034329ef..c50c19a4609c 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -43,6 +43,7 @@ private: // Track the number of explicitly used VGPRs. Special registers reserved at // the end are tracked separately. int32_t NumVGPR = 0; + int32_t NumAGPR = 0; int32_t NumExplicitSGPR = 0; uint64_t PrivateSegmentSize = 0; bool UsesVCC = false; @@ -51,6 +52,7 @@ private: bool HasRecursion = false; int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const; + int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const; }; SIProgramInfo CurrentProgramInfo; @@ -77,6 +79,8 @@ private: void EmitPALMetadata(const MachineFunction &MF, const SIProgramInfo &KernelInfo); void emitCommonFunctionComments(uint32_t NumVGPR, + Optional NumAGPR, + uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, @@ -125,7 +129,7 @@ public: void EmitFunctionEntryLabel() override; - void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) override; void EmitGlobalVariable(const GlobalVariable *GV) override; @@ -140,8 +144,8 @@ public: const char *ExtraCode, raw_ostream &O) override; protected: - mutable std::vector DisasmLines, HexLines; - mutable size_t DisasmLineMaxLen; + std::vector DisasmLines, HexLines; + size_t DisasmLineMaxLen; }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 8a92e7d923fb..ba8343142c63 100644 --- a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -15,6 +15,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "SIDefines.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRBuilder.h" @@ -24,20 +25,10 @@ #define DEBUG_TYPE "amdgpu-atomic-optimizer" using namespace llvm; +using namespace llvm::AMDGPU; namespace { -enum DPP_CTRL { - DPP_ROW_SR1 = 0x111, - DPP_ROW_SR2 = 0x112, - DPP_ROW_SR3 = 0x113, - DPP_ROW_SR4 = 0x114, - DPP_ROW_SR8 = 0x118, - DPP_WF_SR1 = 0x138, - DPP_ROW_BCAST15 = 0x142, - DPP_ROW_BCAST31 = 0x143 -}; - struct ReplacementInfo { Instruction *I; AtomicRMWInst::BinOp Op; @@ -52,9 +43,12 @@ private: const LegacyDivergenceAnalysis *DA; const DataLayout *DL; DominatorTree *DT; - bool HasDPP; + const GCNSubtarget *ST; bool IsPixelShader; + Value *buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *V, + Value *const Identity) const; + Value *buildShiftRight(IRBuilder<> &B, Value *V, Value *const Identity) const; void optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx, bool ValDivergent) const; @@ -93,8 +87,7 @@ bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) { DT = DTW ? &DTW->getDomTree() : nullptr; const TargetPassConfig &TPC = getAnalysis(); const TargetMachine &TM = TPC.getTM(); - const GCNSubtarget &ST = TM.getSubtarget(F); - HasDPP = ST.hasDPP(); + ST = &TM.getSubtarget(F); IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS; visit(F); @@ -142,17 +135,18 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) { // If the pointer operand is divergent, then each lane is doing an atomic // operation on a different address, and we cannot optimize that. - if (DA->isDivergent(I.getOperand(PtrIdx))) { + if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) { return; } - const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx)); + const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx)); // If the value operand is divergent, each lane is contributing a different // value to the atomic calculation. We can only optimize divergent values if // we have DPP available on our subtarget, and the atomic operation is 32 // bits. - if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) { + if (ValDivergent && + (!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) { return; } @@ -219,20 +213,21 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) { const unsigned ValIdx = 0; - const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx)); + const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx)); // If the value operand is divergent, each lane is contributing a different // value to the atomic calculation. We can only optimize divergent values if // we have DPP available on our subtarget, and the atomic operation is 32 // bits. - if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) { + if (ValDivergent && + (!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) { return; } // If any of the other arguments to the intrinsic are divergent, we can't // optimize the operation. for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) { - if (DA->isDivergent(I.getOperand(Idx))) { + if (DA->isDivergentUse(&I.getOperandUse(Idx))) { return; } } @@ -282,6 +277,111 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op, return B.CreateSelect(Cond, LHS, RHS); } +// Use the builder to create an inclusive scan of V across the wavefront, with +// all lanes active. +Value *AMDGPUAtomicOptimizer::buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, + Value *V, Value *const Identity) const { + Type *const Ty = V->getType(); + Module *M = B.GetInsertBlock()->getModule(); + Function *UpdateDPP = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty); + Function *PermLaneX16 = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_permlanex16, {}); + Function *ReadLane = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, {}); + + for (unsigned Idx = 0; Idx < 4; Idx++) { + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::ROW_SHR0 | 1 << Idx), + B.getInt32(0xf), B.getInt32(0xf), B.getFalse()})); + } + if (ST->hasDPPBroadcasts()) { + // GFX9 has DPP row broadcast operations. + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::BCAST15), B.getInt32(0xa), + B.getInt32(0xf), B.getFalse()})); + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::BCAST31), B.getInt32(0xc), + B.getInt32(0xf), B.getFalse()})); + } else { + // On GFX10 all DPP operations are confined to a single row. To get cross- + // row operations we have to use permlane or readlane. + + // Combine lane 15 into lanes 16..31 (and, for wave 64, lane 47 into lanes + // 48..63). + Value *const PermX = + B.CreateCall(PermLaneX16, {V, V, B.getInt32(-1), B.getInt32(-1), + B.getFalse(), B.getFalse()}); + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, PermX, B.getInt32(DPP::QUAD_PERM_ID), + B.getInt32(0xa), B.getInt32(0xf), B.getFalse()})); + if (!ST->isWave32()) { + // Combine lane 31 into lanes 32..63. + Value *const Lane31 = B.CreateCall(ReadLane, {V, B.getInt32(31)}); + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID), + B.getInt32(0xc), B.getInt32(0xf), B.getFalse()})); + } + } + return V; +} + +// Use the builder to create a shift right of V across the wavefront, with all +// lanes active, to turn an inclusive scan into an exclusive scan. +Value *AMDGPUAtomicOptimizer::buildShiftRight(IRBuilder<> &B, Value *V, + Value *const Identity) const { + Type *const Ty = V->getType(); + Module *M = B.GetInsertBlock()->getModule(); + Function *UpdateDPP = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty); + Function *ReadLane = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, {}); + Function *WriteLane = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_writelane, {}); + + if (ST->hasDPPWavefrontShifts()) { + // GFX9 has DPP wavefront shift operations. + V = B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::WAVE_SHR1), B.getInt32(0xf), + B.getInt32(0xf), B.getFalse()}); + } else { + // On GFX10 all DPP operations are confined to a single row. To get cross- + // row operations we have to use permlane or readlane. + Value *Old = V; + V = B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::ROW_SHR0 + 1), + B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}); + + // Copy the old lane 15 to the new lane 16. + V = B.CreateCall(WriteLane, {B.CreateCall(ReadLane, {Old, B.getInt32(15)}), + B.getInt32(16), V}); + + if (!ST->isWave32()) { + // Copy the old lane 31 to the new lane 32. + V = B.CreateCall( + WriteLane, + {B.CreateCall(ReadLane, {Old, B.getInt32(31)}), B.getInt32(32), V}); + + // Copy the old lane 47 to the new lane 48. + V = B.CreateCall( + WriteLane, + {B.CreateCall(ReadLane, {Old, B.getInt32(47)}), B.getInt32(48), V}); + } + } + + return V; +} + static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op, unsigned BitWidth) { switch (Op) { @@ -345,23 +445,29 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // We need to know how many lanes are active within the wavefront, and we do // this by doing a ballot of active lanes. + Type *const WaveTy = B.getIntNTy(ST->getWavefrontSize()); CallInst *const Ballot = B.CreateIntrinsic( - Intrinsic::amdgcn_icmp, {B.getInt64Ty(), B.getInt32Ty()}, + Intrinsic::amdgcn_icmp, {WaveTy, B.getInt32Ty()}, {B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)}); // We need to know how many lanes are active within the wavefront that are // below us. If we counted each lane linearly starting from 0, a lane is // below us only if its associated index was less than ours. We do this by // using the mbcnt intrinsic. - Value *const BitCast = B.CreateBitCast(Ballot, VecTy); - Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0)); - Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1)); - CallInst *const PartialMbcnt = B.CreateIntrinsic( - Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)}); - Value *const Mbcnt = - B.CreateIntCast(B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, - {ExtractHi, PartialMbcnt}), - Ty, false); + Value *Mbcnt; + if (ST->isWave32()) { + Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, + {Ballot, B.getInt32(0)}); + } else { + Value *const BitCast = B.CreateBitCast(Ballot, VecTy); + Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0)); + Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1)); + Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, + {ExtractLo, B.getInt32(0)}); + Mbcnt = + B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {ExtractHi, Mbcnt}); + } + Mbcnt = B.CreateIntCast(Mbcnt, Ty, false); Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth)); @@ -373,47 +479,25 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, if (ValDivergent) { // First we need to set all inactive invocations to the identity value, so // that they can correctly contribute to the final result. - CallInst *const SetInactive = - B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity}); - - CallInst *const FirstDPP = - B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty, - {Identity, SetInactive, B.getInt32(DPP_WF_SR1), - B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}); - ExclScan = FirstDPP; - - const unsigned Iters = 7; - const unsigned DPPCtrl[Iters] = { - DPP_ROW_SR1, DPP_ROW_SR2, DPP_ROW_SR3, DPP_ROW_SR4, - DPP_ROW_SR8, DPP_ROW_BCAST15, DPP_ROW_BCAST31}; - const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xf, 0xa, 0xc}; - const unsigned BankMask[Iters] = {0xf, 0xf, 0xf, 0xe, 0xc, 0xf, 0xf}; - - // This loop performs an exclusive scan across the wavefront, with all lanes - // active (by using the WWM intrinsic). - for (unsigned Idx = 0; Idx < Iters; Idx++) { - Value *const UpdateValue = Idx < 3 ? FirstDPP : ExclScan; - CallInst *const DPP = B.CreateIntrinsic( - Intrinsic::amdgcn_update_dpp, Ty, - {Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]), - B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()}); - - ExclScan = buildNonAtomicBinOp(B, Op, ExclScan, DPP); - } + NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity}); - NewV = buildNonAtomicBinOp(B, Op, SetInactive, ExclScan); + const AtomicRMWInst::BinOp ScanOp = + Op == AtomicRMWInst::Sub ? AtomicRMWInst::Add : Op; + NewV = buildScan(B, ScanOp, NewV, Identity); + ExclScan = buildShiftRight(B, NewV, Identity); // Read the value from the last lane, which has accumlated the values of // each active lane in the wavefront. This will be our new value which we // will provide to the atomic operation. + Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1); if (TyBitWidth == 64) { Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty()); Value *const ExtractHi = - B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty()); + B.CreateTrunc(B.CreateLShr(NewV, 32), B.getInt32Ty()); CallInst *const ReadLaneLo = B.CreateIntrinsic( - Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)}); + Intrinsic::amdgcn_readlane, {}, {ExtractLo, LastLaneIdx}); CallInst *const ReadLaneHi = B.CreateIntrinsic( - Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)}); + Intrinsic::amdgcn_readlane, {}, {ExtractHi, LastLaneIdx}); Value *const PartialInsert = B.CreateInsertElement( UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0)); Value *const Insert = @@ -421,7 +505,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, NewV = B.CreateBitCast(Insert, Ty); } else if (TyBitWidth == 32) { NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, - {NewV, B.getInt32(63)}); + {NewV, LastLaneIdx}); } else { llvm_unreachable("Unhandled atomic bit width"); } @@ -493,77 +577,80 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // original instruction. B.SetInsertPoint(&I); - // Create a PHI node to get our new atomic result into the exit block. - PHINode *const PHI = B.CreatePHI(Ty, 2); - PHI->addIncoming(UndefValue::get(Ty), EntryBB); - PHI->addIncoming(NewI, SingleLaneTerminator->getParent()); - - // We need to broadcast the value who was the lowest active lane (the first - // lane) to all other lanes in the wavefront. We use an intrinsic for this, - // but have to handle 64-bit broadcasts with two calls to this intrinsic. - Value *BroadcastI = nullptr; - - if (TyBitWidth == 64) { - Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty()); - Value *const ExtractHi = - B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty()); - CallInst *const ReadFirstLaneLo = - B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo); - CallInst *const ReadFirstLaneHi = - B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi); - Value *const PartialInsert = B.CreateInsertElement( - UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0)); - Value *const Insert = - B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1)); - BroadcastI = B.CreateBitCast(Insert, Ty); - } else if (TyBitWidth == 32) { - - BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI); - } else { - llvm_unreachable("Unhandled atomic bit width"); - } + const bool NeedResult = !I.use_empty(); + if (NeedResult) { + // Create a PHI node to get our new atomic result into the exit block. + PHINode *const PHI = B.CreatePHI(Ty, 2); + PHI->addIncoming(UndefValue::get(Ty), EntryBB); + PHI->addIncoming(NewI, SingleLaneTerminator->getParent()); - // Now that we have the result of our single atomic operation, we need to - // get our individual lane's slice into the result. We use the lane offset we - // previously calculated combined with the atomic result value we got from the - // first lane, to get our lane's index into the atomic result. - Value *LaneOffset = nullptr; - if (ValDivergent) { - LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan); - } else { - switch (Op) { - default: - llvm_unreachable("Unhandled atomic op"); - case AtomicRMWInst::Add: - case AtomicRMWInst::Sub: - LaneOffset = B.CreateMul(V, Mbcnt); - break; - case AtomicRMWInst::And: - case AtomicRMWInst::Or: - case AtomicRMWInst::Max: - case AtomicRMWInst::Min: - case AtomicRMWInst::UMax: - case AtomicRMWInst::UMin: - LaneOffset = B.CreateSelect(Cond, Identity, V); - break; - case AtomicRMWInst::Xor: - LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1)); - break; + // We need to broadcast the value who was the lowest active lane (the first + // lane) to all other lanes in the wavefront. We use an intrinsic for this, + // but have to handle 64-bit broadcasts with two calls to this intrinsic. + Value *BroadcastI = nullptr; + + if (TyBitWidth == 64) { + Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty()); + Value *const ExtractHi = + B.CreateTrunc(B.CreateLShr(PHI, 32), B.getInt32Ty()); + CallInst *const ReadFirstLaneLo = + B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo); + CallInst *const ReadFirstLaneHi = + B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi); + Value *const PartialInsert = B.CreateInsertElement( + UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0)); + Value *const Insert = + B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1)); + BroadcastI = B.CreateBitCast(Insert, Ty); + } else if (TyBitWidth == 32) { + + BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI); + } else { + llvm_unreachable("Unhandled atomic bit width"); } - } - Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset); - if (IsPixelShader) { - // Need a final PHI to reconverge to above the helper lane branch mask. - B.SetInsertPoint(PixelExitBB->getFirstNonPHI()); + // Now that we have the result of our single atomic operation, we need to + // get our individual lane's slice into the result. We use the lane offset + // we previously calculated combined with the atomic result value we got + // from the first lane, to get our lane's index into the atomic result. + Value *LaneOffset = nullptr; + if (ValDivergent) { + LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan); + } else { + switch (Op) { + default: + llvm_unreachable("Unhandled atomic op"); + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + LaneOffset = B.CreateMul(V, Mbcnt); + break; + case AtomicRMWInst::And: + case AtomicRMWInst::Or: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + LaneOffset = B.CreateSelect(Cond, Identity, V); + break; + case AtomicRMWInst::Xor: + LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1)); + break; + } + } + Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset); - PHINode *const PHI = B.CreatePHI(Ty, 2); - PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB); - PHI->addIncoming(Result, I.getParent()); - I.replaceAllUsesWith(PHI); - } else { - // Replace the original atomic instruction with the new one. - I.replaceAllUsesWith(Result); + if (IsPixelShader) { + // Need a final PHI to reconverge to above the helper lane branch mask. + B.SetInsertPoint(PixelExitBB->getFirstNonPHI()); + + PHINode *const PHI = B.CreatePHI(Ty, 2); + PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB); + PHI->addIncoming(Result, I.getParent()); + I.replaceAllUsesWith(PHI); + } else { + // Replace the original atomic instruction with the new one. + I.replaceAllUsesWith(Result); + } } // And delete the original. diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index b107c357196d..58c44acde1a7 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -30,13 +30,15 @@ using namespace llvm; namespace { -struct OutgoingArgHandler : public CallLowering::ValueHandler { - OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder MIB, CCAssignFn *AssignFn) - : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} +struct OutgoingValueHandler : public CallLowering::ValueHandler { + OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB, CCAssignFn *AssignFn) + : ValueHandler(B, MRI, AssignFn), MIB(MIB) {} MachineInstrBuilder MIB; + bool isIncomingArgumentHandler() const override { return false; } + Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { llvm_unreachable("not implemented"); @@ -49,15 +51,96 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign &VA) override { - MIB.addUse(PhysReg); - MIRBuilder.buildCopy(PhysReg, ValVReg); + Register ExtReg; + if (VA.getLocVT().getSizeInBits() < 32) { + // 16-bit types are reported as legal for 32-bit registers. We need to + // extend and do a 32-bit copy to avoid the verifier complaining about it. + ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); + } else + ExtReg = extendRegister(ValVReg, VA); + + MIRBuilder.buildCopy(PhysReg, ExtReg); + MIB.addUse(PhysReg, RegState::Implicit); } bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, + ISD::ArgFlagsTy Flags, CCState &State) override { - return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); + } +}; + +struct IncomingArgHandler : public CallLowering::ValueHandler { + uint64_t StackUsed = 0; + + IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : ValueHandler(B, MRI, AssignFn) {} + + Register getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + auto &MFI = MIRBuilder.getMF().getFrameInfo(); + int FI = MFI.CreateFixedObject(Size, Offset, true); + MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); + Register AddrReg = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32)); + MIRBuilder.buildFrameIndex(AddrReg, FI); + StackUsed = std::max(StackUsed, Size + Offset); + return AddrReg; + } + + void assignValueToReg(Register ValVReg, Register PhysReg, + CCValAssign &VA) override { + markPhysRegUsed(PhysReg); + + if (VA.getLocVT().getSizeInBits() < 32) { + // 16-bit types are reported as legal for 32-bit registers. We need to do + // a 32-bit copy, and truncate to avoid the verifier complaining about it. + auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + return; + } + + switch (VA.getLocInfo()) { + case CCValAssign::LocInfo::SExt: + case CCValAssign::LocInfo::ZExt: + case CCValAssign::LocInfo::AExt: { + auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + break; + } + default: + MIRBuilder.buildCopy(ValVReg, PhysReg); + break; + } + } + + void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + // FIXME: Get alignment + auto MMO = MIRBuilder.getMF().getMachineMemOperand( + MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1); + MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + } + + /// How the physical register gets marked varies between formal + /// parameters (it's a basic-block live-in), and a call instruction + /// (it's an implicit-def of the BL). + virtual void markPhysRegUsed(unsigned PhysReg) = 0; + + // FIXME: What is the point of this being a callback? + bool isIncomingArgumentHandler() const override { return true; } +}; + +struct FormalArgHandler : public IncomingArgHandler { + FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : IncomingArgHandler(B, MRI, AssignFn) {} + + void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMBB().addLiveIn(PhysReg); } }; @@ -67,55 +150,198 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) : CallLowering(&TLI) { } -bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, +void AMDGPUCallLowering::splitToValueTypes( + const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, + SplitArgTy PerformArgSplit) const { + const SITargetLowering &TLI = *getTLI(); + LLVMContext &Ctx = OrigArg.Ty->getContext(); + + if (OrigArg.Ty->isVoidTy()) + return; + + SmallVector SplitVTs; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); + + assert(OrigArg.Regs.size() == SplitVTs.size()); + + int SplitIdx = 0; + for (EVT VT : SplitVTs) { + unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); + Type *Ty = VT.getTypeForEVT(Ctx); + + + + if (NumParts == 1) { + // No splitting to do, but we want to replace the original type (e.g. [1 x + // double] -> double). + SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, + OrigArg.Flags, OrigArg.IsFixed); + + ++SplitIdx; + continue; + } + + LLT LLTy = getLLTForType(*Ty, DL); + + SmallVector SplitRegs; + + EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); + Type *PartTy = PartVT.getTypeForEVT(Ctx); + LLT PartLLT = getLLTForType(*PartTy, DL); + + // FIXME: Should we be reporting all of the part registers for a single + // argument, and let handleAssignments take care of the repacking? + for (unsigned i = 0; i < NumParts; ++i) { + Register PartReg = MRI.createGenericVirtualRegister(PartLLT); + SplitRegs.push_back(PartReg); + SplitArgs.emplace_back(ArrayRef(PartReg), PartTy, OrigArg.Flags); + } + + PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); + + ++SplitIdx; + } +} + +// Get the appropriate type to make \p OrigTy \p Factor times bigger. +static LLT getMultipleType(LLT OrigTy, int Factor) { + if (OrigTy.isVector()) { + return LLT::vector(OrigTy.getNumElements() * Factor, + OrigTy.getElementType()); + } + + return LLT::scalar(OrigTy.getSizeInBits() * Factor); +} + +// TODO: Move to generic code +static void unpackRegsToOrigType(MachineIRBuilder &B, + ArrayRef DstRegs, + Register SrcReg, + LLT SrcTy, + LLT PartTy) { + assert(DstRegs.size() > 1 && "Nothing to unpack"); + + MachineFunction &MF = B.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + const unsigned SrcSize = SrcTy.getSizeInBits(); + const unsigned PartSize = PartTy.getSizeInBits(); + + if (SrcTy.isVector() && !PartTy.isVector() && + PartSize > SrcTy.getElementType().getSizeInBits()) { + // Vector was scalarized, and the elements extended. + auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), + SrcReg); + for (int i = 0, e = DstRegs.size(); i != e; ++i) + B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); + return; + } + + if (SrcSize % PartSize == 0) { + B.buildUnmerge(DstRegs, SrcReg); + return; + } + + const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; + + LLT BigTy = getMultipleType(PartTy, NumRoundedParts); + auto ImpDef = B.buildUndef(BigTy); + + Register BigReg = MRI.createGenericVirtualRegister(BigTy); + B.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0); + + int64_t Offset = 0; + for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) + B.buildExtract(DstRegs[i], BigReg, Offset); +} + +/// Lower the return value for the already existing \p Ret. This assumes that +/// \p B's insertion point is correct. +bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B, + const Value *Val, ArrayRef VRegs, + MachineInstrBuilder &Ret) const { + if (!Val) + return true; + + auto &MF = B.getMF(); + const auto &F = MF.getFunction(); + const DataLayout &DL = MF.getDataLayout(); + + CallingConv::ID CC = F.getCallingConv(); + const SITargetLowering &TLI = *getTLI(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + ArgInfo OrigRetInfo(VRegs, Val->getType()); + setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); + SmallVector SplitRetInfos; + + splitToValueTypes( + OrigRetInfo, SplitRetInfos, DL, MRI, CC, + [&](ArrayRef Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { + unpackRegsToOrigType(B, Regs, VRegs[VTSplitIdx], LLTy, PartLLT); + }); + + CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); + + OutgoingValueHandler RetHandler(B, MF.getRegInfo(), Ret, AssignFn); + return handleAssignments(B, SplitRetInfos, RetHandler); +} + +bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef VRegs) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *MFI = MF.getInfo(); MFI->setIfReturnsVoid(!Val); - if (!Val) { - MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); + assert(!Val == VRegs.empty() && "Return value without a vreg"); + + CallingConv::ID CC = B.getMF().getFunction().getCallingConv(); + const bool IsShader = AMDGPU::isShader(CC); + const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || + AMDGPU::isKernel(CC); + if (IsWaveEnd) { + B.buildInstr(AMDGPU::S_ENDPGM) + .addImm(0); return true; } - Register VReg = VRegs[0]; - - const Function &F = MF.getFunction(); - auto &DL = F.getParent()->getDataLayout(); - if (!AMDGPU::isShader(F.getCallingConv())) - return false; + auto const &ST = B.getMF().getSubtarget(); + unsigned ReturnOpc = + IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; - const AMDGPUTargetLowering &TLI = *getTLI(); - SmallVector SplitVTs; - SmallVector Offsets; - ArgInfo OrigArg{VReg, Val->getType()}; - setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); - ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - - SmallVector SplitArgs; - CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false); - for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { - Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext()); - SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed}); + auto Ret = B.buildInstrNoInsert(ReturnOpc); + Register ReturnAddrVReg; + if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { + ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); + Ret.addUse(ReturnAddrVReg); } - auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG); - OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + + if (!lowerReturnVal(B, Val, VRegs, Ret)) return false; - MIRBuilder.insertInstr(RetInstr); + if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), + &AMDGPU::SGPR_64RegClass); + B.buildCopy(ReturnAddrVReg, LiveInReturn); + } + + // TODO: Handle CalleeSavedRegsViaCopy. + + B.insertInstr(Ret); return true; } -Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, +Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); const SIMachineFunctionInfo *MFI = MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); @@ -128,79 +354,37 @@ Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); - MIRBuilder.buildConstant(OffsetReg, Offset); + B.buildConstant(OffsetReg, Offset); - MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); + B.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); return DstReg; } -void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, +void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset, unsigned Align, Register DstReg) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); - Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); + Register PtrReg = lowerParameterPtr(B, ParamTy, Offset); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | - MachineMemOperand::MONonTemporal | + MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, TypeSize, Align); - MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); -} - -static Register findFirstFreeSGPR(CCState &CCInfo) { - unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); - for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { - if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) { - return AMDGPU::SGPR0 + Reg; - } - } - llvm_unreachable("Cannot allocate sgpr"); -} - -static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { - const LLT S32 = LLT::scalar(32); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Info.hasWorkItemIDX()) { - Register Reg = AMDGPU::VGPR0; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg)); - } - - if (Info.hasWorkItemIDY()) { - Register Reg = AMDGPU::VGPR1; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg)); - } - - if (Info.hasWorkItemIDZ()) { - Register Reg = AMDGPU::VGPR2; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg)); - } + B.buildLoad(DstReg, PtrReg, *MMO); } // Allocate special inputs passed in user SGPRs. static void allocateHSAUserSGPRs(CCState &CCInfo, - MachineIRBuilder &MIRBuilder, + MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) { @@ -229,8 +413,8 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); Register VReg = MRI.createGenericVirtualRegister(P4); MRI.addLiveIn(InputPtrReg, VReg); - MIRBuilder.getMBB().addLiveIn(InputPtrReg); - MIRBuilder.buildCopy(VReg, InputPtrReg); + B.getMBB().addLiveIn(InputPtrReg); + B.buildCopy(VReg, InputPtrReg); CCInfo.AllocateReg(InputPtrReg); } @@ -250,74 +434,22 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, // these from the dispatch pointer. } -static void allocateSystemSGPRs(CCState &CCInfo, - MachineFunction &MF, - SIMachineFunctionInfo &Info, - CallingConv::ID CallConv, - bool IsShader) { - const LLT S32 = LLT::scalar(32); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Info.hasWorkGroupIDX()) { - Register Reg = Info.addWorkGroupIDX(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDY()) { - Register Reg = Info.addWorkGroupIDY(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDZ()) { - unsigned Reg = Info.addWorkGroupIDZ(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupInfo()) { - unsigned Reg = Info.addWorkGroupInfo(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasPrivateSegmentWaveByteOffset()) { - // Scratch wave offset passed in system SGPR. - unsigned PrivateSegmentWaveByteOffsetReg; - - if (IsShader) { - PrivateSegmentWaveByteOffsetReg = - Info.getPrivateSegmentWaveByteOffsetSystemSGPR(); - - // This is true if the scratch wave byte offset doesn't have a fixed - // location. - if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) { - PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); - Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); - } - } else - PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset(); - - MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); - CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); - } -} - bool AMDGPUCallLowering::lowerFormalArgumentsKernel( - MachineIRBuilder &MIRBuilder, const Function &F, + MachineIRBuilder &B, const Function &F, ArrayRef> VRegs) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); const GCNSubtarget *Subtarget = &MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const SITargetLowering &TLI = *getTLI(); + const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); - allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info); + allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info); unsigned i = 0; const unsigned KernArgBaseAlign = 16; @@ -343,123 +475,242 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel( : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); - lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg); + lowerParameter(B, ArgTy, ArgOffset, Align, ArgReg); if (OrigArgRegs.size() > 1) - unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder); + unpackRegs(OrigArgRegs, ArgReg, ArgTy, B); ++i; } - allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); + TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); + TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); return true; } +// TODO: Move this to generic code +static void packSplitRegsToOrigType(MachineIRBuilder &B, + ArrayRef OrigRegs, + ArrayRef Regs, + LLT LLTy, + LLT PartLLT) { + if (!LLTy.isVector() && !PartLLT.isVector()) { + B.buildMerge(OrigRegs[0], Regs); + return; + } + + if (LLTy.isVector() && PartLLT.isVector()) { + assert(LLTy.getElementType() == PartLLT.getElementType()); + + int DstElts = LLTy.getNumElements(); + int PartElts = PartLLT.getNumElements(); + if (DstElts % PartElts == 0) + B.buildConcatVectors(OrigRegs[0], Regs); + else { + // Deal with v3s16 split into v2s16 + assert(PartElts == 2 && DstElts % 2 != 0); + int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts); + + LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType()); + auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs); + B.buildExtract(OrigRegs[0], RoundedConcat, 0); + } + + return; + } + + assert(LLTy.isVector() && !PartLLT.isVector()); + + LLT DstEltTy = LLTy.getElementType(); + if (DstEltTy == PartLLT) { + // Vector was trivially scalarized. + B.buildBuildVector(OrigRegs[0], Regs); + } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { + // Deal with vector with 64-bit elements decomposed to 32-bit + // registers. Need to create intermediate 64-bit elements. + SmallVector EltMerges; + int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); + + assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); + + for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { + auto Merge = B.buildMerge(DstEltTy, + Regs.take_front(PartsPerElt)); + EltMerges.push_back(Merge.getReg(0)); + Regs = Regs.drop_front(PartsPerElt); + } + + B.buildBuildVector(OrigRegs[0], EltMerges); + } else { + // Vector was split, and elements promoted to a wider type. + LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); + auto BV = B.buildBuildVector(BVType, Regs); + B.buildTrunc(OrigRegs[0], BV); + } +} + bool AMDGPUCallLowering::lowerFormalArguments( - MachineIRBuilder &MIRBuilder, const Function &F, + MachineIRBuilder &B, const Function &F, ArrayRef> VRegs) const { + CallingConv::ID CC = F.getCallingConv(); + // The infrastructure for normal calling convention lowering is essentially // useless for kernels. We want to avoid any kind of legalization or argument // splitting. - if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) - return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs); + if (CC == CallingConv::AMDGPU_KERNEL) + return lowerFormalArgumentsKernel(B, F, VRegs); - // AMDGPU_GS and AMDGP_HS are not supported yet. - if (F.getCallingConv() == CallingConv::AMDGPU_GS || - F.getCallingConv() == CallingConv::AMDGPU_HS) - return false; + const bool IsShader = AMDGPU::isShader(CC); + const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); + MachineBasicBlock &MBB = B.getMBB(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const GCNSubtarget &Subtarget = MF.getSubtarget(); + const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); - bool IsShader = AMDGPU::isShader(F.getCallingConv()); SmallVector ArgLocs; - CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); + + if (!IsEntryFunc) { + Register ReturnAddrReg = TRI->getReturnAddressReg(MF); + Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, + &AMDGPU::SGPR_64RegClass); + MBB.addLiveIn(ReturnAddrReg); + B.buildCopy(LiveInReturn, ReturnAddrReg); + } if (Info->hasImplicitBufferPtr()) { - unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); + Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(ImplicitBufferPtrReg); } - unsigned NumArgs = F.arg_size(); - Function::const_arg_iterator CurOrigArg = F.arg_begin(); - const AMDGPUTargetLowering &TLI = *getTLI(); + + SmallVector SplitArgs; + unsigned Idx = 0; unsigned PSInputNum = 0; - BitVector Skipped(NumArgs); - for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { - EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType()); - - // We can only hanlde simple value types at the moment. - ISD::ArgFlagsTy Flags; - assert(VRegs[i].size() == 1 && "Can't lower into more than one register"); - ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()}; - setArgFlags(OrigArg, i + 1, DL, F); - Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); - - if (F.getCallingConv() == CallingConv::AMDGPU_PS && - !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() && - PSInputNum <= 15) { - if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) { - Skipped.set(i); - ++PSInputNum; + + for (auto &Arg : F.args()) { + if (DL.getTypeStoreSize(Arg.getType()) == 0) + continue; + + const bool InReg = Arg.hasAttribute(Attribute::InReg); + + // SGPR arguments to functions not implemented. + if (!IsShader && InReg) + return false; + + if (Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) + return false; + + if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { + const bool ArgUsed = !Arg.use_empty(); + bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); + + if (!SkipArg) { + Info->markPSInputAllocated(PSInputNum); + if (ArgUsed) + Info->markPSInputEnabled(PSInputNum); + } + + ++PSInputNum; + + if (SkipArg) { + for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) + B.buildUndef(VRegs[Idx][I]); + + ++Idx; continue; } + } - Info->markPSInputAllocated(PSInputNum); - if (!CurOrigArg->use_empty()) - Info->markPSInputEnabled(PSInputNum); + ArgInfo OrigArg(VRegs[Idx], Arg.getType()); + setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); - ++PSInputNum; + splitToValueTypes( + OrigArg, SplitArgs, DL, MRI, CC, + // FIXME: We should probably be passing multiple registers to + // handleAssignments to do this + [&](ArrayRef Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { + packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, + LLTy, PartLLT); + }); + + ++Idx; + } + + // At least one interpolation mode must be enabled or else the GPU will + // hang. + // + // Check PSInputAddr instead of PSInputEnable. The idea is that if the user + // set PSInputAddr, the user wants to enable some bits after the compilation + // based on run-time states. Since we can't know what the final PSInputEna + // will look like, so we shouldn't do anything here and the user should take + // responsibility for the correct programming. + // + // Otherwise, the following restrictions apply: + // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. + // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be + // enabled too. + if (CC == CallingConv::AMDGPU_PS) { + if ((Info->getPSInputAddr() & 0x7F) == 0 || + ((Info->getPSInputAddr() & 0xF) == 0 && + Info->isPSInputAllocated(11))) { + CCInfo.AllocateReg(AMDGPU::VGPR0); + CCInfo.AllocateReg(AMDGPU::VGPR1); + Info->markPSInputAllocated(0); + Info->markPSInputEnabled(0); } - CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), - /*IsVarArg=*/false); - - if (ValEVT.isVector()) { - EVT ElemVT = ValEVT.getVectorElementType(); - if (!ValEVT.isSimple()) - return false; - MVT ValVT = ElemVT.getSimpleVT(); - bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, - OrigArg.Flags, CCInfo); - if (!Res) - return false; - } else { - MVT ValVT = ValEVT.getSimpleVT(); - if (!ValEVT.isSimple()) - return false; - bool Res = - AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); - - // Fail if we don't know how to handle this type. - if (Res) - return false; + if (Subtarget.isAmdPalOS()) { + // For isAmdPalOS, the user does not enable some bits after compilation + // based on run-time states; the register values being generated here are + // the final ones set in hardware. Therefore we need to apply the + // workaround to PSInputAddr and PSInputEnable together. (The case where + // a bit is set in PSInputAddr but not PSInputEnable is where the frontend + // set up an input arg for a particular interpolation mode, but nothing + // uses that input arg. Really we should have an earlier pass that removes + // such an arg.) + unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); + if ((PsInputBits & 0x7F) == 0 || + ((PsInputBits & 0xF) == 0 && + (PsInputBits >> 11 & 1))) + Info->markPSInputEnabled( + countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); } } - Function::const_arg_iterator Arg = F.arg_begin(); - - if (F.getCallingConv() == CallingConv::AMDGPU_VS || - F.getCallingConv() == CallingConv::AMDGPU_PS) { - for (unsigned i = 0, OrigArgIdx = 0; - OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) { - if (Skipped.test(OrigArgIdx)) - continue; - assert(VRegs[OrigArgIdx].size() == 1 && - "Can't lower into more than 1 reg"); - CCValAssign &VA = ArgLocs[i++]; - MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]); - MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); - MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg()); - } + const SITargetLowering &TLI = *getTLI(); + CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); - return true; + if (!MBB.empty()) + B.setInstr(*MBB.begin()); + + FormalArgHandler Handler(B, MRI, AssignFn); + if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler)) + return false; + + if (!IsEntryFunc) { + // Special inputs come after user arguments. + TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); + } + + // Start adding system SGPRs. + if (IsEntryFunc) { + TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); + } else { + CCInfo.AllocateReg(Info->getScratchRSrcReg()); + CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); + CCInfo.AllocateReg(Info->getFrameOffsetReg()); + TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); } - return false; + // Move back to the end of the basic block. + B.setMBB(MBB); + + return true; } diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h index 3599659cac6a..53a562586bc0 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -20,26 +20,37 @@ namespace llvm { class AMDGPUTargetLowering; +class MachineInstrBuilder; class AMDGPUCallLowering: public CallLowering { - Register lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, + Register lowerParameterPtr(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset) const; - void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy, - uint64_t Offset, unsigned Align, - Register DstReg) const; + void lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset, + unsigned Align, Register DstReg) const; - public: + /// A function of this type is used to perform value split action. + using SplitArgTy = std::function, LLT, LLT, int)>; + + void splitToValueTypes(const ArgInfo &OrigArgInfo, + SmallVectorImpl &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI, + CallingConv::ID CallConv, + SplitArgTy SplitArg) const; + + bool lowerReturnVal(MachineIRBuilder &B, const Value *Val, + ArrayRef VRegs, MachineInstrBuilder &Ret) const; + +public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); - bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef VRegs) const override; - bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder, - const Function &F, + bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef> VRegs) const; - bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef> VRegs) const override; static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td index 3688cd77542e..f8a54a61aac2 100644 --- a/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -24,22 +24,9 @@ def CC_SI : CallingConv<[ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, - SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47, - SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55, - SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63, - SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71, - SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79, - SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87, - SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95, - SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103, - SGPR104, SGPR105 + SGPR40, SGPR41, SGPR42, SGPR43 ]>>>, - // We have no way of referring to the generated register tuples - // here, so use a custom function. - CCIfInReg>>, - CCIfByVal>>, - // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. CCIfNotInReg>, // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. @@ -138,7 +117,6 @@ def CC_AMDGPU_Func : CallingConv<[ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, - CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>, CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>, CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>, @@ -157,7 +135,6 @@ def RetCC_AMDGPU_Func : CallingConv<[ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, - CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">> ]>; def CC_AMDGPU : CallingConv<[ diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index b750c6b5f6d2..1640a4a59ee2 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -55,6 +55,12 @@ static cl::opt WidenLoads( cl::ReallyHidden, cl::init(true)); +static cl::opt UseMul24Intrin( + "amdgpu-codegenprepare-mul24", + cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, + cl::init(true)); + class AMDGPUCodeGenPrepare : public FunctionPass, public InstVisitor { const GCNSubtarget *ST = nullptr; @@ -509,7 +515,9 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const { } } - I.replaceAllUsesWith(insertValues(Builder, Ty, ResultVals)); + Value *NewVal = insertValues(Builder, Ty, ResultVals); + NewVal->takeName(&I); + I.replaceAllUsesWith(NewVal); I.eraseFromParent(); return true; @@ -879,7 +887,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) { DA->isUniform(&I) && promoteUniformOpToI32(I)) return true; - if (replaceMulWithMul24(I)) + if (UseMul24Intrin && replaceMulWithMul24(I)) return true; bool Changed = false; diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp index e80797736363..61ce83b30e00 100644 --- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp @@ -13,9 +13,9 @@ #include "AMDGPUFrameLowering.h" using namespace llvm; -AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, - int LAO, unsigned TransAl) - : TargetFrameLowering(D, StackAl, LAO, TransAl) { } +AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, Align StackAl, + int LAO, Align TransAl) + : TargetFrameLowering(D, StackAl, LAO, TransAl) {} AMDGPUFrameLowering::~AMDGPUFrameLowering() = default; diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h index 48b64488303e..92e256cf2829 100644 --- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h +++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h @@ -25,8 +25,8 @@ namespace llvm { /// See TargetFrameInfo for more comments. class AMDGPUFrameLowering : public TargetFrameLowering { public: - AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1); + AMDGPUFrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None()); ~AMDGPUFrameLowering() override; /// \returns The number of 32-bit sub-registers that are used when storing diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td index cad4c2ef404c..f2be1ca44d34 100644 --- a/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/lib/Target/AMDGPU/AMDGPUGISel.td @@ -12,10 +12,6 @@ include "AMDGPU.td" -def p0 : PtrValueType; -def p1 : PtrValueType; -def p4 : PtrValueType; - def sd_vsrc0 : ComplexPattern; def gi_vsrc0 : GIComplexOperandMatcher, @@ -38,6 +34,18 @@ def gi_vop3omods : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3omods0clamp0omod : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_vop3opselmods0 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_vop3opselmods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_smrd_imm : GIComplexOperandMatcher, GIComplexPatternEquiv; @@ -50,12 +58,19 @@ def gi_smrd_sgpr : GIComplexOperandMatcher, GIComplexPatternEquiv; +// FIXME: Why are the atomic versions separated? def gi_flat_offset : GIComplexOperandMatcher, GIComplexPatternEquiv; def gi_flat_offset_signed : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_flat_atomic : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_flat_signed_atomic : + GIComplexOperandMatcher, + GIComplexPatternEquiv; def gi_mubuf_scratch_offset : GIComplexOperandMatcher, @@ -64,6 +79,44 @@ def gi_mubuf_scratch_offen : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_ds_1addr_1offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + +// Separate load nodes are defined to glue m0 initialization in +// SelectionDAG. The GISel selector can just insert m0 initialization +// directly before before selecting a glue-less load, so hide this +// distinction. + +def : GINodeEquiv { + let CheckMMOIsNonAtomic = 1; +} + +def : GINodeEquiv { + let CheckMMOIsNonAtomic = 1; +} + +def : GINodeEquiv { + bit CheckMMOIsAtomic = 1; +} + + + +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + +def : GINodeEquiv; class GISelSop2Pat < SDPatternOperator node, @@ -143,20 +196,6 @@ multiclass GISelVop2IntrPat < def : GISelSop2Pat ; def : GISelVop2Pat ; -// FIXME: We can't re-use SelectionDAG patterns here because they match -// against a custom SDNode and we would need to create a generic machine -// instruction that is equivalent to the custom SDNode. This would also require -// us to custom legalize the intrinsic to the new generic machine instruction, -// but I can't get custom legalizing of intrinsic to work and I'm not sure if -// this is even supported yet. -def : GISelVop3Pat2ModsPat < - int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e64, v2f16, f32>; - -defm : GISelVop2IntrPat ; -def : GISelVop3Pat2ModsPat ; -defm : GISelVop2IntrPat ; -def : GISelVop3Pat2ModsPat ; - // Since GlobalISel is more flexible then SelectionDAG, I think we can get // away with adding patterns for integer types and not legalizing all // loads and stores to vector types. This should help simplify the load/store @@ -164,3 +203,6 @@ def : GISelVop3Pat2ModsPat ; foreach Ty = [i64, p0, p1, p4] in { defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>; } + +def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">, + GISDNodeXFormEquiv; diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def index 0a1f48231b18..85d1ad349157 100644 --- a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -22,15 +22,17 @@ enum PartialMappingIdx { PM_SGPR128 = 9, PM_SGPR256 = 10, PM_SGPR512 = 11, - PM_VGPR1 = 12, - PM_VGPR16 = 16, - PM_VGPR32 = 17, - PM_VGPR64 = 18, - PM_VGPR128 = 19, - PM_VGPR256 = 20, - PM_VGPR512 = 21, - PM_SGPR96 = 22, - PM_VGPR96 = 23 + PM_SGPR1024 = 12, + PM_VGPR1 = 13, + PM_VGPR16 = 17, + PM_VGPR32 = 18, + PM_VGPR64 = 19, + PM_VGPR128 = 20, + PM_VGPR256 = 21, + PM_VGPR512 = 22, + PM_VGPR1024 = 23, + PM_SGPR96 = 24, + PM_VGPR96 = 25 }; const RegisterBankInfo::PartialMapping PartMappings[] { @@ -45,6 +47,7 @@ const RegisterBankInfo::PartialMapping PartMappings[] { {0, 128, SGPRRegBank}, {0, 256, SGPRRegBank}, {0, 512, SGPRRegBank}, + {0, 1024, SGPRRegBank}, {0, 1, VGPRRegBank}, // VGPR begin {0, 16, VGPRRegBank}, @@ -53,8 +56,9 @@ const RegisterBankInfo::PartialMapping PartMappings[] { {0, 128, VGPRRegBank}, {0, 256, VGPRRegBank}, {0, 512, VGPRRegBank}, + {0, 1024, VGPRRegBank}, {0, 96, SGPRRegBank}, - {0, 96, VGPRRegBank}, + {0, 96, VGPRRegBank} }; const RegisterBankInfo::ValueMapping ValMappings[] { @@ -65,41 +69,43 @@ const RegisterBankInfo::ValueMapping ValMappings[] { {&PartMappings[1], 1}, // SGPRs - {&PartMappings[2], 1}, + {&PartMappings[2], 1}, // 1 {nullptr, 0}, // Illegal power of 2 sizes {nullptr, 0}, {nullptr, 0}, - {&PartMappings[3], 1}, - {&PartMappings[4], 1}, - {&PartMappings[5], 1}, - {&PartMappings[6], 1}, - {&PartMappings[7], 1}, - {&PartMappings[8], 1}, - - // VGPRs - {&PartMappings[9], 1}, + {&PartMappings[3], 1}, // 16 + {&PartMappings[4], 1}, // 32 + {&PartMappings[5], 1}, // 64 + {&PartMappings[6], 1}, // 128 + {&PartMappings[7], 1}, // 256 + {&PartMappings[8], 1}, // 512 + {&PartMappings[9], 1}, // 1024 + + // VGPRs + {&PartMappings[10], 1}, // 1 {nullptr, 0}, {nullptr, 0}, {nullptr, 0}, - {&PartMappings[10], 1}, - {&PartMappings[11], 1}, - {&PartMappings[12], 1}, - {&PartMappings[13], 1}, - {&PartMappings[14], 1}, - {&PartMappings[15], 1}, - {&PartMappings[16], 1}, - {&PartMappings[17], 1} + {&PartMappings[11], 1}, // 16 + {&PartMappings[12], 1}, // 32 + {&PartMappings[13], 1}, // 64 + {&PartMappings[14], 1}, // 128 + {&PartMappings[15], 1}, // 256 + {&PartMappings[16], 1}, // 512 + {&PartMappings[17], 1}, // 1024 + {&PartMappings[18], 1}, + {&PartMappings[19], 1} }; const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { - /*32-bit op*/ {0, 32, SGPRRegBank}, - /*2x32-bit op*/ {0, 32, SGPRRegBank}, - {32, 32, SGPRRegBank}, -/*<2x32-bit> op*/ {0, 64, SGPRRegBank}, - - /*32-bit op*/ {0, 32, VGPRRegBank}, - /*2x32-bit op*/ {0, 32, VGPRRegBank}, - {32, 32, VGPRRegBank}, + {0, 32, SGPRRegBank}, // 32-bit op + {0, 32, SGPRRegBank}, // 2x32-bit op + {32, 32, SGPRRegBank}, + {0, 64, SGPRRegBank}, // <2x32-bit> op + + {0, 32, VGPRRegBank}, // 32-bit op + {0, 32, VGPRRegBank}, // 2x32-bit op + {32, 32, VGPRRegBank}, }; @@ -116,7 +122,7 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, - VGPRStartIdx = 12 + VGPRStartIdx = 13 }; const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index b31de0af5018..9f5bcd8ff5f0 100644 --- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -218,12 +218,13 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL); - unsigned MaxKernArgAlign; + Align MaxKernArgAlign; HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, MaxKernArgAlign); HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; - HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); + HSACodeProps.mKernargSegmentAlign = + std::max(MaxKernArgAlign, Align(4)).value(); HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; @@ -883,7 +884,7 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, auto Kern = HSAMetadataDoc->getMapNode(); - unsigned MaxKernArgAlign; + Align MaxKernArgAlign; Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( STM.getKernArgSegmentSize(F, MaxKernArgAlign)); Kern[".group_segment_fixed_size"] = @@ -891,7 +892,7 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, Kern[".private_segment_fixed_size"] = Kern.getDocument()->getNode(ProgramInfo.ScratchSize); Kern[".kernarg_segment_align"] = - Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign)); + Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); Kern[".wavefront_size"] = Kern.getDocument()->getNode(STM.getWavefrontSize()); Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index 2eecddbd7b01..80ac8ca67bcd 100644 --- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -52,7 +52,7 @@ public: class MetadataStreamerV3 final : public MetadataStreamer { private: std::unique_ptr HSAMetadataDoc = - llvm::make_unique(); + std::make_unique(); void dump(StringRef HSAMetadataString) const; diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index ea730539f834..f330bd7ebcdd 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -172,8 +172,9 @@ private: MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; - SDNode *glueCopyToM0LDSInit(SDNode *N) const; + SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; + SDNode *glueCopyToM0LDSInit(SDNode *N) const; const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -186,10 +187,11 @@ private: bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const; + SDValue &TFE, SDValue &DLC, SDValue &SWZ) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, - SDValue &SLC, SDValue &TFE, SDValue &DLC) const; + SDValue &SLC, SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; @@ -202,21 +204,20 @@ private: bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const; + SDValue &TFE, SDValue &DLC, SDValue &SWZ) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; + template + bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; - template - bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset, SDValue &SLC) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; SDValue Expand32BitAddress(SDValue Addr) const; @@ -262,6 +263,8 @@ private: SDValue getHi16Elt(SDValue In) const; + SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; + void SelectADD_SUB_I64(SDNode *N); void SelectAddcSubb(SDNode *N); void SelectUADDO_USUBO(SDNode *N); @@ -282,6 +285,7 @@ private: void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectINTRINSIC_W_CHAIN(SDNode *N); + void SelectINTRINSIC_WO_CHAIN(SDNode *N); void SelectINTRINSIC_VOID(SDNode *N); protected: @@ -543,7 +547,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, if (!N->isMachineOpcode()) { if (N->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast(N->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); return MRI.getRegClass(Reg); } @@ -582,19 +586,10 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } } -SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { - const SITargetLowering& Lowering = - *static_cast(getTargetLowering()); - - assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain"); - - SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), - Val); - - SDValue Glue = M0.getValue(1); - +SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain, + SDValue Glue) const { SmallVector Ops; - Ops.push_back(M0); // Replace the chain. + Ops.push_back(NewChain); // Replace the chain. for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) Ops.push_back(N->getOperand(i)); @@ -602,6 +597,16 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); } +SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { + const SITargetLowering& Lowering = + *static_cast(getTargetLowering()); + + assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain"); + + SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val); + return glueCopyToOp(N, M0, M0.getValue(1)); +} + SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { unsigned AS = cast(N)->getAddressSpace(); if (AS == AMDGPUAS::LOCAL_ADDRESS) { @@ -635,13 +640,13 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { switch (NumVectorElts) { case 1: - return AMDGPU::SReg_32_XM0RegClassID; + return AMDGPU::SReg_32RegClassID; case 2: return AMDGPU::SReg_64RegClassID; case 3: return AMDGPU::SGPR_96RegClassID; case 4: - return AMDGPU::SReg_128RegClassID; + return AMDGPU::SGPR_128RegClassID; case 5: return AMDGPU::SGPR_160RegClassID; case 8: @@ -713,12 +718,17 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { return; // Already selected. } - if (isa(N) || + // isa almost works but is slightly too permissive for some DS + // intrinsics. + if (Opc == ISD::LOAD || Opc == ISD::STORE || isa(N) || (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || Opc == ISD::ATOMIC_LOAD_FADD || Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || - Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) + Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) { N = glueCopyToM0LDSInit(N); + SelectCode(N); + return; + } switch (Opc) { default: @@ -781,7 +791,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SDValue RC, SubReg0, SubReg1; SDLoc DL(N); if (N->getValueType(0) == MVT::i128) { - RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { @@ -815,14 +825,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); return; } - case ISD::LOAD: - case ISD::STORE: - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_STORE: { - N = glueCopyToM0LDSInit(N); - break; - } - case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { // There is a scalar version available, but unlike the vector version which @@ -908,6 +910,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectINTRINSIC_W_CHAIN(N); return; } + case ISD::INTRINSIC_WO_CHAIN: { + SelectINTRINSIC_WO_CHAIN(N); + return; + } case ISD::INTRINSIC_VOID: { SelectINTRINSIC_VOID(N); return; @@ -961,6 +967,14 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } +SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val, + const SDLoc &DL) const { + SDNode *Mov = CurDAG->getMachineNode( + AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getTargetConstant(Val, DL, MVT::i32)); + return SDValue(Mov, 0); +} + // FIXME: Should only handle addcarry/subcarry void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDLoc DL(N); @@ -1308,7 +1322,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const { + SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const { // Subtarget prefers to use flat instruction if (Subtarget->useFlatForGlobal()) return false; @@ -1321,6 +1336,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); DLC = CurDAG->getTargetConstant(0, DL, MVT::i1); + SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1); Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -1400,7 +1416,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE, - SDValue &DLC) const { + SDValue &DLC, SDValue &SWZ) const { SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. @@ -1408,7 +1424,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return false; if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE, DLC)) + GLC, SLC, TFE, DLC, SWZ)) return false; ConstantSDNode *C = cast(Addr64); @@ -1430,9 +1446,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &Offset, SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); - SDValue GLC, TFE, DLC; + SDValue GLC, TFE, DLC, SWZ; - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ); } static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { @@ -1557,13 +1573,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const { + SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; const SIInstrInfo *TII = static_cast(Subtarget->getInstrInfo()); if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE, DLC)) + GLC, SLC, TFE, DLC, SWZ)) return false; if (!cast(Offen)->getSExtValue() && @@ -1585,16 +1602,30 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset ) const { - SDValue GLC, SLC, TFE, DLC; + SDValue GLC, SLC, TFE, DLC, SWZ; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); } bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const { - SDValue GLC, TFE, DLC; + SDValue GLC, TFE, DLC, SWZ; + + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); +} - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); +// Find a load or store from corresponding pattern root. +// Roots may be build_vector, bitconvert or their combinations. +static MemSDNode* findMemSDNode(SDNode *N) { + N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); + if (MemSDNode *MN = dyn_cast(N)) + return MN; + assert(isa(N)); + for (SDValue V : N->op_values()) + if (MemSDNode *MN = + dyn_cast(AMDGPUTargetLowering::stripBitcast(V))) + return MN; + llvm_unreachable("cannot find MemSDNode in the pattern!"); } template @@ -1603,8 +1634,95 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return static_cast(getTargetLowering())-> - SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); + int64_t OffsetVal = 0; + + if (Subtarget->hasFlatInstOffsets() && + (!Subtarget->hasFlatSegmentOffsetBug() || + findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && + CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + uint64_t COffsetVal = cast(N1)->getSExtValue(); + + const SIInstrInfo *TII = Subtarget->getInstrInfo(); + unsigned AS = findMemSDNode(N)->getAddressSpace(); + if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) { + Addr = N0; + OffsetVal = COffsetVal; + } else { + // If the offset doesn't fit, put the low bits into the offset field and + // add the rest. + + SDLoc DL(N); + uint64_t ImmField; + const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned); + if (IsSigned) { + ImmField = SignExtend64(COffsetVal, NumBits); + + // Don't use a negative offset field if the base offset is positive. + // Since the scheduler currently relies on the offset field, doing so + // could result in strange scheduling decisions. + + // TODO: Should we not do this in the opposite direction as well? + if (static_cast(COffsetVal) > 0) { + if (static_cast(ImmField) < 0) { + const uint64_t OffsetMask = maskTrailingOnes(NumBits - 1); + ImmField = COffsetVal & OffsetMask; + } + } + } else { + // TODO: Should we do this for a negative offset? + const uint64_t OffsetMask = maskTrailingOnes(NumBits); + ImmField = COffsetVal & OffsetMask; + } + + uint64_t RemainderOffset = COffsetVal - ImmField; + + assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned)); + assert(RemainderOffset + ImmField == COffsetVal); + + OffsetVal = ImmField; + + // TODO: Should this try to use a scalar add pseudo if the base address is + // uniform and saddr is usable? + SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); + SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); + + SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, N0, Sub0); + SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, N0, Sub1); + + SDValue AddOffsetLo + = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL); + SDValue AddOffsetHi + = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL); + + SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1); + SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + + SDNode *Add = CurDAG->getMachineNode( + AMDGPU::V_ADD_I32_e64, DL, VTs, + {AddOffsetLo, SDValue(N0Lo, 0), Clamp}); + + SDNode *Addc = CurDAG->getMachineNode( + AMDGPU::V_ADDC_U32_e64, DL, VTs, + {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp}); + + SDValue RegSequenceArgs[] = { + CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32), + SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1 + }; + + Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, + MVT::i64, RegSequenceArgs), 0); + } + } + + VAddr = Addr; + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); + SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + return true; } bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, @@ -1616,10 +1734,10 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, } bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N, - SDValue Addr, - SDValue &VAddr, - SDValue &Offset, - SDValue &SLC) const { + SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { return SelectFlatOffset(N, Addr, VAddr, Offset, SLC); } @@ -2158,10 +2276,12 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { // offset field) % 64. Some versions of the programming guide omit the m0 // part, or claim it's from offset 0. if (ConstantSDNode *ConstOffset = dyn_cast(BaseOffset)) { - // If we have a constant offset, try to use the default value for m0 as a - // base to possibly avoid setting it up. - glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32)); - ImmOffset = ConstOffset->getZExtValue() + 1; + // If we have a constant offset, try to use the 0 in m0 as the base. + // TODO: Look into changing the default m0 initialization value. If the + // default -1 only set the low 16-bits, we could leave it as-is and add 1 to + // the immediate offset. + glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32)); + ImmOffset = ConstOffset->getZExtValue(); } else { if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { ImmOffset = BaseOffset.getConstantOperandVal(1); @@ -2182,22 +2302,7 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { glueCopyToM0(N, SDValue(M0Base, 0)); } - SDValue V0; SDValue Chain = N->getOperand(0); - SDValue Glue; - if (HasVSrc) { - SDValue VSrc0 = N->getOperand(2); - - // The manual doesn't mention this, but it seems only v0 works. - V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); - - SDValue CopyToV0 = CurDAG->getCopyToReg( - N->getOperand(0), SL, V0, VSrc0, - N->getOperand(N->getNumOperands() - 1)); - Chain = CopyToV0; - Glue = CopyToV0.getValue(1); - } - SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); // TODO: Can this just be removed from the instruction? @@ -2206,14 +2311,11 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { const unsigned Opc = gwsIntrinToOpcode(IntrID); SmallVector Ops; if (HasVSrc) - Ops.push_back(V0); + Ops.push_back(N->getOperand(2)); Ops.push_back(OffsetField); Ops.push_back(GDS); Ops.push_back(Chain); - if (HasVSrc) - Ops.push_back(Glue); - SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); CurDAG->setNodeMemRefs(cast(Selected), {MMO}); } @@ -2233,6 +2335,28 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { SelectCode(N); } +void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { + unsigned IntrID = cast(N->getOperand(0))->getZExtValue(); + unsigned Opcode; + switch (IntrID) { + case Intrinsic::amdgcn_wqm: + Opcode = AMDGPU::WQM; + break; + case Intrinsic::amdgcn_softwqm: + Opcode = AMDGPU::SOFT_WQM; + break; + case Intrinsic::amdgcn_wwm: + Opcode = AMDGPU::WWM; + break; + default: + SelectCode(N); + return; + } + + SDValue Src = N->getOperand(1); + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); +} + void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { unsigned IntrID = cast(N->getOperand(1))->getZExtValue(); switch (IntrID) { diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 39016ed37193..1115d8c23620 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -12,10 +12,6 @@ // //===----------------------------------------------------------------------===// -#define AMDGPU_LOG2E_F 1.44269504088896340735992468100189214f -#define AMDGPU_LN2_F 0.693147180559945309417232121458176568f -#define AMDGPU_LN10_F 2.30258509299404568401799145468436421f - #include "AMDGPUISelLowering.h" #include "AMDGPU.h" #include "AMDGPUCallLowering.h" @@ -37,82 +33,9 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; -static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State, - const TargetRegisterClass *RC, - unsigned NumRegs) { - ArrayRef RegList = makeArrayRef(RC->begin(), NumRegs); - unsigned RegResult = State.AllocateReg(RegList); - if (RegResult == AMDGPU::NoRegister) - return false; - - State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo)); - return true; -} - -static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - switch (LocVT.SimpleTy) { - case MVT::i64: - case MVT::f64: - case MVT::v2i32: - case MVT::v2f32: - case MVT::v4i16: - case MVT::v4f16: { - // Up to SGPR0-SGPR105 - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::SGPR_64RegClass, 53); - } - default: - return false; - } -} - -// Allocate up to VGPR31. -// -// TODO: Since there are no VGPR alignent requirements would it be better to -// split into individual scalar registers? -static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - switch (LocVT.SimpleTy) { - case MVT::i64: - case MVT::f64: - case MVT::v2i32: - case MVT::v2f32: - case MVT::v4i16: - case MVT::v4f16: { - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::VReg_64RegClass, 31); - } - case MVT::v4i32: - case MVT::v4f32: - case MVT::v2i64: - case MVT::v2f64: { - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::VReg_128RegClass, 29); - } - case MVT::v8i32: - case MVT::v8f32: { - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::VReg_256RegClass, 25); - - } - case MVT::v16i32: - case MVT::v16f32: { - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::VReg_512RegClass, 17); - - } - default: - return false; - } -} - #include "AMDGPUGenCallingConv.inc" // Find a larger type to do a load / store of a vector with. @@ -208,7 +131,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); } - for (MVT VT : MVT::integer_vector_valuetypes()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); @@ -218,6 +141,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); @@ -225,8 +151,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); @@ -286,8 +215,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand); setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); + setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand); + setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -571,6 +503,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FABS); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::AssertSext); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); } //===----------------------------------------------------------------------===// @@ -630,15 +563,26 @@ static bool hasSourceMods(const SDNode *N) { case ISD::FREM: case ISD::INLINEASM: case ISD::INLINEASM_BR: - case AMDGPUISD::INTERP_P1: - case AMDGPUISD::INTERP_P2: case AMDGPUISD::DIV_SCALE: + case ISD::INTRINSIC_W_CHAIN: // TODO: Should really be looking at the users of the bitcast. These are // problematic because bitcasts are used to legalize all stores to integer // types. case ISD::BITCAST: return false; + case ISD::INTRINSIC_WO_CHAIN: { + switch (cast(N->getOperand(0))->getZExtValue()) { + case Intrinsic::amdgcn_interp_p1: + case Intrinsic::amdgcn_interp_p2: + case Intrinsic::amdgcn_interp_mov: + case Intrinsic::amdgcn_interp_p1_f16: + case Intrinsic::amdgcn_interp_p2_f16: + return false; + default: + return true; + } + } default: return true; } @@ -745,8 +689,9 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy, return false; bool Fast = false; - return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy, - MMO, &Fast) && Fast; + return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + CastTy, MMO, &Fast) && + Fast; } // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also @@ -782,9 +727,8 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const { break; case ISD::LOAD: { - const LoadSDNode * L = dyn_cast(N); - if (L->getMemOperand()->getAddrSpace() - == AMDGPUAS::CONSTANT_ADDRESS_32BIT) + if (cast(N)->getMemOperand()->getAddrSpace() == + AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; return false; } @@ -1199,9 +1143,9 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::FROUND: return LowerFROUND(Op, DAG); case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); case ISD::FLOG: - return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F); + return LowerFLOG(Op, DAG, 1.0F / numbers::log2ef); case ISD::FLOG10: - return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F); + return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f); case ISD::FEXP: return lowerFEXP(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); @@ -1236,7 +1180,7 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, } } -static bool hasDefinedInitializer(const GlobalValue *GV) { +bool AMDGPUTargetLowering::hasDefinedInitializer(const GlobalValue *GV) { const GlobalVariable *GVar = dyn_cast(GV); if (!GVar || !GVar->hasInitializer()) return false; @@ -2349,30 +2293,13 @@ SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand); } -// Return M_LOG2E of appropriate type -static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) { - switch (VT.getScalarType().getSimpleVT().SimpleTy) { - case MVT::f32: - return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT); - case MVT::f16: - return DAG.getConstantFP( - APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"), - SL, VT); - case MVT::f64: - return DAG.getConstantFP( - APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT); - default: - llvm_unreachable("unsupported fp type"); - } -} - // exp2(M_LOG2E_F * f); SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc SL(Op); SDValue Src = Op.getOperand(0); - const SDValue K = getLog2EVal(DAG, SL, VT); + const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT); SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags()); return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags()); } @@ -2836,8 +2763,16 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) { static SDValue simplifyI24(SDNode *Node24, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - SDValue LHS = Node24->getOperand(0); - SDValue RHS = Node24->getOperand(1); + bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN; + + SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0); + SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1); + unsigned NewOpcode = Node24->getOpcode(); + if (IsIntrin) { + unsigned IID = cast(Node24->getOperand(0))->getZExtValue(); + NewOpcode = IID == Intrinsic::amdgcn_mul_i24 ? + AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; + } APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24); @@ -2847,7 +2782,7 @@ static SDValue simplifyI24(SDNode *Node24, SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded); SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded); if (DemandedLHS || DemandedRHS) - return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(), + return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(), DemandedLHS ? DemandedLHS : LHS, DemandedRHS ? DemandedRHS : RHS); @@ -2904,54 +2839,6 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { return true; } -// Find a load or store from corresponding pattern root. -// Roots may be build_vector, bitconvert or their combinations. -static MemSDNode* findMemSDNode(SDNode *N) { - N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); - if (MemSDNode *MN = dyn_cast(N)) - return MN; - assert(isa(N)); - for (SDValue V : N->op_values()) - if (MemSDNode *MN = - dyn_cast(AMDGPUTargetLowering::stripBitcast(V))) - return MN; - llvm_unreachable("cannot find MemSDNode in the pattern!"); -} - -bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned, - SelectionDAG &DAG, - SDNode *N, - SDValue Addr, - SDValue &VAddr, - SDValue &Offset, - SDValue &SLC) const { - const GCNSubtarget &ST = - DAG.getMachineFunction().getSubtarget(); - int64_t OffsetVal = 0; - - if (ST.hasFlatInstOffsets() && - (!ST.hasFlatSegmentOffsetBug() || - findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && - DAG.isBaseWithConstantOffset(Addr)) { - SDValue N0 = Addr.getOperand(0); - SDValue N1 = Addr.getOperand(1); - int64_t COffsetVal = cast(N1)->getSExtValue(); - - const SIInstrInfo *TII = ST.getInstrInfo(); - if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(), - IsSigned)) { - Addr = N0; - OffsetVal = COffsetVal; - } - } - - VAddr = Addr; - Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16); - SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1); - - return true; -} - // Replace load of an illegal type with a store of a bitcast to a friendlier // type. SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, @@ -3085,6 +2972,19 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, return SDValue(); } + +SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( + SDNode *N, DAGCombinerInfo &DCI) const { + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + switch (IID) { + case Intrinsic::amdgcn_mul_i24: + case Intrinsic::amdgcn_mul_u24: + return simplifyI24(N, DCI); + default: + return SDValue(); + } +} + /// Split the 64-bit value \p LHS into two 32-bit components, and perform the /// binary operation \p Opc to it with the corresponding constant operands. SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( @@ -4173,6 +4073,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, case ISD::AssertZext: case ISD::AssertSext: return performAssertSZExtCombine(N, DCI); + case ISD::INTRINSIC_WO_CHAIN: + return performIntrinsicWOChainCombine(N, DCI); } return SDValue(); } @@ -4203,14 +4105,28 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT); } +// This may be called multiple times, and nothing prevents creating multiple +// objects at the same offset. See if we already defined this object. +static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size, + int64_t Offset) { + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + if (MFI.getObjectOffset(I) == Offset) { + assert(MFI.getObjectSize(I) == Size); + return I; + } + } + + return MFI.CreateFixedObject(Size, Offset, true); +} + SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG, EVT VT, const SDLoc &SL, int64_t Offset) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset); - int FI = MFI.CreateFixedObject(VT.getStoreSize(), Offset, true); auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset); SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32); @@ -4260,7 +4176,7 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction()); unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction()); - unsigned Alignment = ST.getAlignmentForImplicitArgPtr(); + const Align Alignment = ST.getAlignmentForImplicitArgPtr(); uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + ExplicitArgOffset; switch (Param) { @@ -4295,6 +4211,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FRACT) NODE_NAME_CASE(SETCC) NODE_NAME_CASE(SETREG) + NODE_NAME_CASE(DENORM_MODE) NODE_NAME_CASE(FMA_W_CHAIN) NODE_NAME_CASE(FMUL_W_CHAIN) NODE_NAME_CASE(CLAMP) @@ -4377,13 +4294,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(KILL) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; - NODE_NAME_CASE(INIT_EXEC) - NODE_NAME_CASE(INIT_EXEC_FROM_INPUT) - NODE_NAME_CASE(SENDMSG) - NODE_NAME_CASE(SENDMSGHALT) - NODE_NAME_CASE(INTERP_MOV) - NODE_NAME_CASE(INTERP_P1) - NODE_NAME_CASE(INTERP_P2) NODE_NAME_CASE(INTERP_P1LL_F16) NODE_NAME_CASE(INTERP_P1LV_F16) NODE_NAME_CASE(INTERP_P2_F16) @@ -4428,6 +4338,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUFFER_ATOMIC_AND) NODE_NAME_CASE(BUFFER_ATOMIC_OR) NODE_NAME_CASE(BUFFER_ATOMIC_XOR) + NODE_NAME_CASE(BUFFER_ATOMIC_INC) + NODE_NAME_CASE(BUFFER_ATOMIC_DEC) NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) NODE_NAME_CASE(BUFFER_ATOMIC_FADD) NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD) @@ -4576,9 +4488,9 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; } else if (SelBits == 0x0c) { - Known.Zero |= 0xff << I; + Known.Zero |= 0xFFull << I; } else if (SelBits > 0x0c) { - Known.One |= 0xff << I; + Known.One |= 0xFFull << I; } Sel >>= 8; } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index fe7ad694943d..dea0d1d4343a 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -38,6 +38,7 @@ private: public: static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); + static bool hasDefinedInitializer(const GlobalValue *GV); protected: SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; @@ -78,6 +79,7 @@ protected: SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, @@ -324,10 +326,6 @@ public: } AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - - bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N, - SDValue Addr, SDValue &VAddr, SDValue &Offset, - SDValue &SLC) const; }; namespace AMDGPUISD { @@ -369,6 +367,9 @@ enum NodeType : unsigned { // result bit per item in the wavefront. SETCC, SETREG, + + DENORM_MODE, + // FP ops with input and output chain. FMA_W_CHAIN, FMUL_W_CHAIN, @@ -475,13 +476,6 @@ enum NodeType : unsigned { BUILD_VERTICAL_VECTOR, /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, - INIT_EXEC, - INIT_EXEC_FROM_INPUT, - SENDMSG, - SENDMSGHALT, - INTERP_MOV, - INTERP_P1, - INTERP_P2, INTERP_P1LL_F16, INTERP_P1LV_F16, INTERP_P2_F16, @@ -532,6 +526,8 @@ enum NodeType : unsigned { BUFFER_ATOMIC_AND, BUFFER_ATOMIC_OR, BUFFER_ATOMIC_XOR, + BUFFER_ATOMIC_INC, + BUFFER_ATOMIC_DEC, BUFFER_ATOMIC_CMPSWAP, BUFFER_ATOMIC_FADD, BUFFER_ATOMIC_PK_FADD, diff --git a/lib/Target/AMDGPU/AMDGPUInline.cpp b/lib/Target/AMDGPU/AMDGPUInline.cpp index f4df20b8f03e..a83ec23ec054 100644 --- a/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -51,7 +51,7 @@ ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), // Inliner constraint to achieve reasonable compilation time static cl::opt -MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300), +MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum BB number allowed in a function after inlining" " (compile time constraint)")); diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 4a8446955496..cf0ce5659951 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -110,39 +110,38 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; // Force dependencies for vector trunc stores def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; -def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; -def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; - +def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; +def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; // out = a - floor(a) -def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; +def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; // out = 1.0 / a -def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; +def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) -def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; +def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) -def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; -def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; +def AMDGPUrsq_legacy_impl : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; +def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) result clamped to +/- max_float. -def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; +def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; -def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; +def AMDGPUldexp_impl : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; -def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; -def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; -def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; -def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; -def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; +def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; +def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; +def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; +def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; +def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>; -def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; +def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; // out = max(a, b) a and b are floats, where a nan comparison fails. // This is not commutative because this gives the second operand: @@ -285,7 +284,7 @@ def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; -def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; +def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; @@ -320,7 +319,7 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, [] >; -def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; +def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, @@ -330,35 +329,6 @@ def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; -def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC", - SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPHasChain, SDNPInGlue]>; - -def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT", - SDTypeProfile<0, 2, - [SDTCisInt<0>, SDTCisInt<1>]>, - [SDNPHasChain, SDNPInGlue]>; - -def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", - SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPHasChain, SDNPInGlue]>; - -def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", - SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPHasChain, SDNPInGlue]>; - -def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", - SDTypeProfile<1, 3, [SDTCisFP<0>]>, - [SDNPInGlue]>; - -def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1", - SDTypeProfile<1, 3, [SDTCisFP<0>]>, - [SDNPInGlue, SDNPOutGlue]>; - -def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2", - SDTypeProfile<1, 4, [SDTCisFP<0>]>, - [SDNPInGlue]>; - def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16", SDTypeProfile<1, 7, [SDTCisFP<0>]>, [SDNPInGlue, SDNPOutGlue]>; @@ -425,3 +395,65 @@ def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; + + +//===----------------------------------------------------------------------===// +// Intrinsic/Custom node compatability PatFrags +//===----------------------------------------------------------------------===// + +def AMDGPUrcp : PatFrags<(ops node:$src), [(int_amdgcn_rcp node:$src), + (AMDGPUrcp_impl node:$src)]>; +def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src), + (AMDGPUrcp_legacy_impl node:$src)]>; + +def AMDGPUrsq_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rsq_legacy node:$src), + (AMDGPUrsq_legacy_impl node:$src)]>; + +def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src), + (AMDGPUrsq_impl node:$src)]>; + +def AMDGPUrsq_clamp : PatFrags<(ops node:$src), [(int_amdgcn_rsq_clamp node:$src), + (AMDGPUrsq_clamp_impl node:$src)]>; + +def AMDGPUsin : PatFrags<(ops node:$src), [(int_amdgcn_sin node:$src), + (AMDGPUsin_impl node:$src)]>; +def AMDGPUcos : PatFrags<(ops node:$src), [(int_amdgcn_cos node:$src), + (AMDGPUcos_impl node:$src)]>; +def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src), + (AMDGPUfract_impl node:$src)]>; + +def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_ldexp node:$src0, node:$src1), + (AMDGPUldexp_impl node:$src0, node:$src1)]>; + +def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_class node:$src0, node:$src1), + (AMDGPUfp_class_impl node:$src0, node:$src1)]>; + +def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), + [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), + (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>; + +def AMDGPUffbh_i32 : PatFrags<(ops node:$src), + [(int_amdgcn_sffbh node:$src), + (AMDGPUffbh_i32_impl node:$src)]>; + +def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1), + (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpknorm_i16_f32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pknorm_i16 node:$src0, node:$src1), + (AMDGPUpknorm_i16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpknorm_u16_f32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pknorm_u16 node:$src0, node:$src1), + (AMDGPUpknorm_u16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pk_i16 node:$src0, node:$src1), + (AMDGPUpk_i16_i32_impl node:$src0, node:$src1)]>; + +def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1), + (AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>; diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 901a2eaa8829..3cfa9d57ec46 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -19,8 +19,10 @@ #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -61,8 +63,14 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector( const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } +void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB, + CodeGenCoverage &CoverageInfo) { + MRI = &MF.getRegInfo(); + InstructionSelector::setupMF(MF, KB, CoverageInfo); +} + static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg == AMDGPU::SCC; auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); @@ -71,7 +79,9 @@ static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { if (RC) { // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the // context of the register bank has been lost. - if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) + // Has a hack getRegClassForSizeOnBank uses exactly SGPR_32RegClass, which + // won't ever beconstrained any further. + if (RC != &AMDGPU::SGPR_32RegClass) return false; const LLT Ty = MRI.getType(Reg); return Ty.isValid() && Ty.getSizeInBits() == 1; @@ -83,7 +93,7 @@ static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { bool AMDGPUInstructionSelector::isVCC(Register Reg, const MachineRegisterInfo &MRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg == TRI.getVCC(); auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); @@ -102,8 +112,6 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg, bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); I.setDesc(TII.get(TargetOpcode::COPY)); const MachineOperand &Src = I.getOperand(1); @@ -111,33 +119,33 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { Register DstReg = Dst.getReg(); Register SrcReg = Src.getReg(); - if (isVCC(DstReg, MRI)) { + if (isVCC(DstReg, *MRI)) { if (SrcReg == AMDGPU::SCC) { const TargetRegisterClass *RC - = TRI.getConstrainedRegClassForOperand(Dst, MRI); + = TRI.getConstrainedRegClassForOperand(Dst, *MRI); if (!RC) return true; - return RBI.constrainGenericRegister(DstReg, *RC, MRI); + return RBI.constrainGenericRegister(DstReg, *RC, *MRI); } - if (!isVCC(SrcReg, MRI)) { + if (!isVCC(SrcReg, *MRI)) { // TODO: Should probably leave the copy and let copyPhysReg expand it. - if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) + if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI)) return false; BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) .addImm(0) .addReg(SrcReg); - if (!MRI.getRegClassOrNull(SrcReg)) - MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); + if (!MRI->getRegClassOrNull(SrcReg)) + MRI->setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, *MRI)); I.eraseFromParent(); return true; } const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(Dst, MRI); - if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI)) + TRI.getConstrainedRegClassForOperand(Dst, *MRI); + if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) return false; // Don't constrain the source register to a class so the def instruction @@ -148,8 +156,8 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { // with size 1. An SReg_32 with size 1 is ambiguous with wave32. if (Src.isUndef()) { const TargetRegisterClass *SrcRC = - TRI.getConstrainedRegClassForOperand(Src, MRI); - if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) + TRI.getConstrainedRegClassForOperand(Src, *MRI); + if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) return false; } @@ -157,30 +165,26 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { } for (const MachineOperand &MO : I.operands()) { - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) continue; const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(MO, MRI); + TRI.getConstrainedRegClassForOperand(MO, *MRI); if (!RC) continue; - RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); + RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); } return true; } bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - const Register DefReg = I.getOperand(0).getReg(); - const LLT DefTy = MRI.getType(DefReg); + const LLT DefTy = MRI->getType(DefReg); // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) const RegClassOrRegBank &RegClassOrBank = - MRI.getRegClassOrRegBank(DefReg); + MRI->getRegClassOrRegBank(DefReg); const TargetRegisterClass *DefRC = RegClassOrBank.dyn_cast(); @@ -196,7 +200,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { return false; } - DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI); + DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, *MRI); if (!DefRC) { LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); return false; @@ -204,7 +208,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { } I.setDesc(TII.get(TargetOpcode::PHI)); - return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); + return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI); } MachineOperand @@ -214,13 +218,11 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, MachineInstr *MI = MO.getParent(); MachineBasicBlock *BB = MO.getParent()->getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - Register DstReg = MRI.createVirtualRegister(&SubRC); + Register DstReg = MRI->createVirtualRegister(&SubRC); if (MO.isReg()) { unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) .addReg(Reg, 0, ComposedSubIdx); @@ -244,10 +246,6 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, } } -static int64_t getConstant(const MachineInstr *MI) { - return MI->getOperand(1).getCImm()->getSExtValue(); -} - static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { switch (Opc) { case AMDGPU::G_AND: @@ -262,16 +260,13 @@ static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { } bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineOperand &Dst = I.getOperand(0); MachineOperand &Src0 = I.getOperand(1); MachineOperand &Src1 = I.getOperand(2); Register DstReg = Dst.getReg(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); + unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); - const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); if (DstRB->getID() == AMDGPU::VCCRegBankID) { const TargetRegisterClass *RC = TRI.getBoolRC(); unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), @@ -282,12 +277,12 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { // The selector for G_ICMP relies on seeing the register bank for the result // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will // be ambiguous whether it's a scalar or vector bool. - if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg())) - MRI.setRegClass(Src0.getReg(), RC); - if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg())) - MRI.setRegClass(Src1.getReg(), RC); + if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg())) + MRI->setRegClass(Src0.getReg(), RC); + if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg())) + MRI->setRegClass(Src1.getReg(), RC); - return RBI.constrainGenericRegister(DstReg, *RC, MRI); + return RBI.constrainGenericRegister(DstReg, *RC, *MRI); } // TODO: Should this allow an SCC bank result, and produce a copy from SCC for @@ -295,14 +290,7 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { if (DstRB->getID() == AMDGPU::SGPRRegBankID) { unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); I.setDesc(TII.get(InstOpc)); - - const TargetRegisterClass *RC - = TRI.getConstrainedRegClassForOperand(Dst, MRI); - if (!RC) - return false; - return RBI.constrainGenericRegister(DstReg, *RC, MRI) && - RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) && - RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } return false; @@ -311,11 +299,10 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); Register DstReg = I.getOperand(0).getReg(); const DebugLoc &DL = I.getDebugLoc(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); - const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); + unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; @@ -340,7 +327,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; - Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); + Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass()); MachineInstr *Add = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) .addDef(UnusedCarry, RegState::Dead) @@ -363,8 +350,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); - Register DstLo = MRI.createVirtualRegister(&HalfRC); - Register DstHi = MRI.createVirtualRegister(&HalfRC); + Register DstLo = MRI->createVirtualRegister(&HalfRC); + Register DstHi = MRI->createVirtualRegister(&HalfRC); if (IsSALU) { BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) @@ -375,14 +362,14 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { .add(Hi2); } else { const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); - Register CarryReg = MRI.createVirtualRegister(CarryRC); + Register CarryReg = MRI->createVirtualRegister(CarryRC); BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) .addDef(CarryReg) .add(Lo1) .add(Lo2) .addImm(0); MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) - .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) + .addDef(MRI->createVirtualRegister(CarryRC), RegState::Dead) .add(Hi1) .add(Hi2) .addReg(CarryReg, RegState::Kill) @@ -399,19 +386,61 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { .addImm(AMDGPU::sub1); - if (!RBI.constrainGenericRegister(DstReg, RC, MRI)) + if (!RBI.constrainGenericRegister(DstReg, RC, *MRI)) return false; I.eraseFromParent(); return true; } -bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { +bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - assert(I.getOperand(2).getImm() % 32 == 0); - unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); + const DebugLoc &DL = I.getDebugLoc(); + Register Dst0Reg = I.getOperand(0).getReg(); + Register Dst1Reg = I.getOperand(1).getReg(); + const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO; + + if (!isSCC(Dst1Reg, MRI)) { + // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned + // carry out despite the _i32 name. These were renamed in VI to _U32. + // FIXME: We should probably rename the opcodes here. + unsigned NewOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + I.setDesc(TII.get(NewOpc)); + I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + I.addOperand(*MF, MachineOperand::CreateImm(0)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + + Register Src0Reg = I.getOperand(2).getReg(); + Register Src1Reg = I.getOperand(3).getReg(); + unsigned NewOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; + BuildMI(*BB, &I, DL, TII.get(NewOpc), Dst0Reg) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg) + .addReg(AMDGPU::SCC); + + if (!MRI.getRegClassOrNull(Dst1Reg)) + MRI.setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass); + + if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, MRI) || + !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, MRI) || + !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, MRI)) + return false; + + I.eraseFromParent(); + return true; +} + +bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 32 != 0) + return false; + + unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32); const DebugLoc &DL = I.getDebugLoc(); MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), I.getOperand(0).getReg()) @@ -419,10 +448,10 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { for (const MachineOperand &MO : Copy->operands()) { const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(MO, MRI); + TRI.getConstrainedRegClassForOperand(MO, *MRI); if (!RC) continue; - RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); + RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); } I.eraseFromParent(); return true; @@ -430,21 +459,19 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { MachineBasicBlock *BB = MI.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + LLT DstTy = MRI->getType(DstReg); + LLT SrcTy = MRI->getType(MI.getOperand(1).getReg()); const unsigned SrcSize = SrcTy.getSizeInBits(); if (SrcSize < 32) return false; const DebugLoc &DL = MI.getDebugLoc(); - const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); const unsigned DstSize = DstTy.getSizeInBits(); const TargetRegisterClass *DstRC = - TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI); + TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); if (!DstRC) return false; @@ -457,12 +484,12 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { MIB.addImm(SubRegs[I]); const TargetRegisterClass *SrcRC - = TRI.getConstrainedRegClassForOperand(Src, MRI); - if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI)) + = TRI.getConstrainedRegClassForOperand(Src, *MRI); + if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI)) return false; } - if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) return false; MI.eraseFromParent(); @@ -471,25 +498,23 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { MachineBasicBlock *BB = MI.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const int NumDst = MI.getNumOperands() - 1; MachineOperand &Src = MI.getOperand(NumDst); Register SrcReg = Src.getReg(); Register DstReg0 = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg0); - LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI->getType(DstReg0); + LLT SrcTy = MRI->getType(SrcReg); const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); const DebugLoc &DL = MI.getDebugLoc(); - const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); const TargetRegisterClass *SrcRC = - TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI); - if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) + TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI); + if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) return false; const unsigned SrcFlags = getUndefRegState(Src.isUndef()); @@ -504,8 +529,8 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { .addReg(SrcReg, SrcFlags, SubRegs[I]); const TargetRegisterClass *DstRC = - TRI.getConstrainedRegClassForOperand(Dst, MRI); - if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)) + TRI.getConstrainedRegClassForOperand(Dst, *MRI); + if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI)) return false; } @@ -518,16 +543,13 @@ bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { } bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const MachineOperand &MO = I.getOperand(0); // FIXME: Interface for getConstrainedRegClassForOperand needs work. The // regbank check here is to know why getConstrainedRegClassForOperand failed. - const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); - if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || - (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { + const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI); + if ((!RC && !MRI->getRegBankOrNull(MO.getReg())) || + (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI))) { I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); return true; } @@ -537,44 +559,62 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); - DebugLoc DL = I.getDebugLoc(); - MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) - .addDef(I.getOperand(0).getReg()) - .addReg(I.getOperand(1).getReg()) - .addReg(I.getOperand(2).getReg()) - .addImm(SubReg); - - for (const MachineOperand &MO : Ins->operands()) { - if (!MO.isReg()) - continue; - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(MO, MRI); - if (!RC) - continue; - RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); - } + Register DstReg = I.getOperand(0).getReg(); + Register Src0Reg = I.getOperand(1).getReg(); + Register Src1Reg = I.getOperand(2).getReg(); + LLT Src1Ty = MRI->getType(Src1Reg); + + unsigned DstSize = MRI->getType(DstReg).getSizeInBits(); + unsigned InsSize = Src1Ty.getSizeInBits(); + + int64_t Offset = I.getOperand(3).getImm(); + if (Offset % 32 != 0) + return false; + + unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32); + if (SubReg == AMDGPU::NoSubRegister) + return false; + + const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); + const TargetRegisterClass *DstRC = + TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + if (!DstRC) + return false; + + const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI); + const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI); + const TargetRegisterClass *Src0RC = + TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI); + const TargetRegisterClass *Src1RC = + TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI); + + // Deal with weird cases where the class only partially supports the subreg + // index. + Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg); + if (!Src0RC) + return false; + + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || + !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) || + !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI)) + return false; + + const DebugLoc &DL = I.getDebugLoc(); + BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg) + .addReg(Src0Reg) + .addReg(Src1Reg) + .addImm(SubReg); + I.eraseFromParent(); return true; } -bool AMDGPUInstructionSelector::selectG_INTRINSIC( - MachineInstr &I, CodeGenCoverage &CoverageInfo) const { - unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); +bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { + unsigned IntrinsicID = I.getIntrinsicID(); switch (IntrinsicID) { - case Intrinsic::maxnum: - case Intrinsic::minnum: - case Intrinsic::amdgcn_cvt_pkrtz: - return selectImpl(I, CoverageInfo); case Intrinsic::amdgcn_if_break: { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. @@ -589,15 +629,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC( I.eraseFromParent(); - for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { - if (!MRI.getRegClassOrNull(Reg)) - MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); - } + for (Register Reg : { DstReg, Src0Reg, Src1Reg }) + MRI->setRegClass(Reg, TRI.getWaveMaskRegClass()); return true; } default: - return selectImpl(I, CoverageInfo); + return selectImpl(I, *CoverageInfo); } } @@ -677,17 +715,15 @@ int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const DebugLoc &DL = I.getDebugLoc(); - unsigned SrcReg = I.getOperand(2).getReg(); - unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); + Register SrcReg = I.getOperand(2).getReg(); + unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI); auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); - unsigned CCReg = I.getOperand(0).getReg(); - if (isSCC(CCReg, MRI)) { + Register CCReg = I.getOperand(0).getReg(); + if (isSCC(CCReg, *MRI)) { int Opcode = getS_CMPOpcode(Pred, Size); if (Opcode == -1) return false; @@ -698,7 +734,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { .addReg(AMDGPU::SCC); bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && - RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); + RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI); I.eraseFromParent(); return Ret; } @@ -712,7 +748,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { .add(I.getOperand(2)) .add(I.getOperand(3)); RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), - *TRI.getBoolRC(), MRI); + *TRI.getBoolRC(), *MRI); bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); I.eraseFromParent(); return Ret; @@ -736,19 +772,273 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, .addImm(Enabled); } +static bool isZero(Register Reg, MachineRegisterInfo &MRI) { + int64_t C; + if (mi_match(Reg, MRI, m_ICst(C)) && C == 0) + return true; + + // FIXME: matcher should ignore copies + return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0; +} + +static unsigned extractGLC(unsigned AuxiliaryData) { + return AuxiliaryData & 1; +} + +static unsigned extractSLC(unsigned AuxiliaryData) { + return (AuxiliaryData >> 1) & 1; +} + +static unsigned extractDLC(unsigned AuxiliaryData) { + return (AuxiliaryData >> 2) & 1; +} + +static unsigned extractSWZ(unsigned AuxiliaryData) { + return (AuxiliaryData >> 3) & 1; +} + +// Returns Base register, constant offset, and offset def point. +static std::tuple +getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { + MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (!Def) + return std::make_tuple(Reg, 0, nullptr); + + if (Def->getOpcode() == AMDGPU::G_CONSTANT) { + unsigned Offset; + const MachineOperand &Op = Def->getOperand(1); + if (Op.isImm()) + Offset = Op.getImm(); + else + Offset = Op.getCImm()->getZExtValue(); + + return std::make_tuple(Register(), Offset, Def); + } + + int64_t Offset; + if (Def->getOpcode() == AMDGPU::G_ADD) { + // TODO: Handle G_OR used for add case + if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset))) + return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def); + + // FIXME: matcher should ignore copies + if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset)))) + return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def); + } + + return std::make_tuple(Reg, 0, Def); +} + +static unsigned getBufferStoreOpcode(LLT Ty, + const unsigned MemSize, + const bool Offen) { + const int Size = Ty.getSizeInBits(); + switch (8 * MemSize) { + case 8: + return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : + AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; + case 16: + return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : + AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; + default: + unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : + AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; + if (Size > 32) + Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); + return Opc; + } +} + +static unsigned getBufferStoreFormatOpcode(LLT Ty, + const unsigned MemSize, + const bool Offen) { + bool IsD16Packed = Ty.getScalarSizeInBits() == 16; + bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits(); + int NumElts = Ty.isVector() ? Ty.getNumElements() : 1; + + if (IsD16Packed) { + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact; + default: + return -1; + } + } + + if (IsD16Unpacked) { + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact; + default: + return -1; + } + } + + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact; + default: + return -1; + } + + llvm_unreachable("unhandled buffer store"); +} + +// TODO: Move this to combiner +// Returns base register, imm offset, total constant offset. +std::tuple +AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B, + Register OrigOffset) const { + const unsigned MaxImm = 4095; + Register BaseReg; + unsigned TotalConstOffset; + MachineInstr *OffsetDef; + + std::tie(BaseReg, TotalConstOffset, OffsetDef) + = getBaseWithConstantOffset(*MRI, OrigOffset); + + unsigned ImmOffset = TotalConstOffset; + + // If the immediate value is too big for the immoffset field, put the value + // and -4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store.f + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } + + if (Overflow != 0) { + // In case this is in a waterfall loop, insert offset code at the def point + // of the offset, not inside the loop. + MachineBasicBlock::iterator OldInsPt = B.getInsertPt(); + MachineBasicBlock &OldMBB = B.getMBB(); + B.setInstr(*OffsetDef); + + if (!BaseReg) { + BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + B.buildInstr(AMDGPU::V_MOV_B32_e32) + .addDef(BaseReg) + .addImm(Overflow); + } else { + Register OverflowVal = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + B.buildInstr(AMDGPU::V_MOV_B32_e32) + .addDef(OverflowVal) + .addImm(Overflow); + + Register NewBaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg) + .addReg(BaseReg) + .addReg(OverflowVal, RegState::Kill) + .addImm(0); + BaseReg = NewBaseReg; + } + + B.setInsertPt(OldMBB, OldInsPt); + } + + return std::make_tuple(BaseReg, ImmOffset, TotalConstOffset); +} + +bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI, + bool IsFormat) const { + MachineIRBuilder B(MI); + MachineFunction &MF = B.getMF(); + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI->getType(VData); + + int Size = Ty.getSizeInBits(); + if (Size % 32 != 0) + return false; + + // FIXME: Verifier should enforce 1 MMO for these intrinsics. + MachineMemOperand *MMO = *MI.memoperands_begin(); + const int MemSize = MMO->getSize(); + + Register RSrc = MI.getOperand(2).getReg(); + Register VOffset = MI.getOperand(3).getReg(); + Register SOffset = MI.getOperand(4).getReg(); + unsigned AuxiliaryData = MI.getOperand(5).getImm(); + unsigned ImmOffset; + unsigned TotalOffset; + + std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset); + if (TotalOffset != 0) + MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize); + + const bool Offen = !isZero(VOffset, *MRI); + + int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) : + getBufferStoreOpcode(Ty, MemSize, Offen); + if (Opc == -1) + return false; + + MachineInstrBuilder MIB = B.buildInstr(Opc) + .addUse(VData); + + if (Offen) + MIB.addUse(VOffset); + + MIB.addUse(RSrc) + .addUse(SOffset) + .addImm(ImmOffset) + .addImm(extractGLC(AuxiliaryData)) + .addImm(extractSLC(AuxiliaryData)) + .addImm(0) // tfe: FIXME: Remove from inst + .addImm(extractDLC(AuxiliaryData)) + .addImm(extractSWZ(AuxiliaryData)) + .addMemOperand(MMO); + + MI.eraseFromParent(); + + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( - MachineInstr &I, CodeGenCoverage &CoverageInfo) const { + MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - - unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); + unsigned IntrinsicID = I.getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::amdgcn_exp: { - int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); - int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); - int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); - int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); + int64_t Tgt = I.getOperand(1).getImm(); + int64_t Enabled = I.getOperand(2).getImm(); + int64_t Done = I.getOperand(7).getImm(); + int64_t VM = I.getOperand(8).getImm(); MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), I.getOperand(4).getReg(), @@ -761,13 +1051,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( } case Intrinsic::amdgcn_exp_compr: { const DebugLoc &DL = I.getDebugLoc(); - int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); - int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); - unsigned Reg0 = I.getOperand(3).getReg(); - unsigned Reg1 = I.getOperand(4).getReg(); - unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); - int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); + int64_t Tgt = I.getOperand(1).getImm(); + int64_t Enabled = I.getOperand(2).getImm(); + Register Reg0 = I.getOperand(3).getReg(); + Register Reg1 = I.getOperand(4).getReg(); + Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + int64_t Done = I.getOperand(5).getImm(); + int64_t VM = I.getOperand(6).getImm(); BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, @@ -786,27 +1076,29 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( Register Reg = I.getOperand(1).getReg(); I.eraseFromParent(); - if (!MRI.getRegClassOrNull(Reg)) - MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); + if (!MRI->getRegClassOrNull(Reg)) + MRI->setRegClass(Reg, TRI.getWaveMaskRegClass()); return true; } + case Intrinsic::amdgcn_raw_buffer_store: + return selectStoreIntrinsic(I, false); + case Intrinsic::amdgcn_raw_buffer_store_format: + return selectStoreIntrinsic(I, true); default: - return selectImpl(I, CoverageInfo); + return selectImpl(I, *CoverageInfo); } } bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const DebugLoc &DL = I.getDebugLoc(); - unsigned DstReg = I.getOperand(0).getReg(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); + Register DstReg = I.getOperand(0).getReg(); + unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); assert(Size <= 32 || Size == 64); const MachineOperand &CCOp = I.getOperand(1); - unsigned CCReg = CCOp.getReg(); - if (isSCC(CCReg, MRI)) { + Register CCReg = CCOp.getReg(); + if (isSCC(CCReg, *MRI)) { unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) @@ -815,8 +1107,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { // The generic constrainSelectedInstRegOperands doesn't work for the scc register // bank, because it does not cover the register class that we used to represent // for it. So we need to manually set the register class here. - if (!MRI.getRegClassOrNull(CCReg)) - MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); + if (!MRI->getRegClassOrNull(CCReg)) + MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI)); MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) .add(I.getOperand(2)) .add(I.getOperand(3)); @@ -845,52 +1137,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { } bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - DebugLoc DL = I.getDebugLoc(); - unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI); - if (PtrSize != 64) { - LLVM_DEBUG(dbgs() << "Unhandled address space\n"); - return false; - } - - unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); - unsigned Opcode; - - // FIXME: Remove this when integers > s32 naturally selected. - switch (StoreSize) { - default: - return false; - case 32: - Opcode = AMDGPU::FLAT_STORE_DWORD; - break; - case 64: - Opcode = AMDGPU::FLAT_STORE_DWORDX2; - break; - case 96: - Opcode = AMDGPU::FLAT_STORE_DWORDX3; - break; - case 128: - Opcode = AMDGPU::FLAT_STORE_DWORDX4; - break; - } - - MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) - .add(I.getOperand(1)) - .add(I.getOperand(0)) - .addImm(0) // offset - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // dlc - - - // Now that we selected an opcode, we need to constrain the register - // operands to use appropriate classes. - bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); - - I.eraseFromParent(); - return Ret; + initM0(I); + return selectImpl(I, *CoverageInfo); } static int sizeToSubRegIndex(unsigned Size) { @@ -915,19 +1163,15 @@ static int sizeToSubRegIndex(unsigned Size) { } bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - - unsigned DstReg = I.getOperand(0).getReg(); - unsigned SrcReg = I.getOperand(1).getReg(); - const LLT DstTy = MRI.getType(DstReg); - const LLT SrcTy = MRI.getType(SrcReg); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + const LLT DstTy = MRI->getType(DstReg); + const LLT SrcTy = MRI->getType(SrcReg); if (!DstTy.isScalar()) return false; - const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); - const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); if (SrcRB != DstRB) return false; @@ -935,9 +1179,9 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { unsigned SrcSize = SrcTy.getSizeInBits(); const TargetRegisterClass *SrcRC - = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); + = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, *MRI); const TargetRegisterClass *DstRC - = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); + = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, *MRI); if (SrcSize > 32) { int SubRegIdx = sizeToSubRegIndex(DstSize); @@ -953,8 +1197,8 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { I.getOperand(1).setSubReg(SubRegIdx); } - if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || - !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) { LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); return false; } @@ -974,20 +1218,18 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { bool Signed = I.getOpcode() == AMDGPU::G_SEXT; const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock &MBB = *I.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); - const LLT DstTy = MRI.getType(DstReg); - const LLT SrcTy = MRI.getType(SrcReg); + const LLT DstTy = MRI->getType(DstReg); + const LLT SrcTy = MRI->getType(SrcReg); const LLT S1 = LLT::scalar(1); const unsigned SrcSize = SrcTy.getSizeInBits(); const unsigned DstSize = DstTy.getSizeInBits(); if (!DstTy.isScalar()) return false; - const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); if (SrcBank->getID() == AMDGPU::SCCRegBankID) { if (SrcTy != S1 || DstSize > 64) // Invalid @@ -1000,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { // FIXME: Create an extra copy to avoid incorrectly constraining the result // of the scc producer. - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) .addReg(SrcReg); BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) @@ -1010,7 +1252,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) .addImm(0) .addImm(Signed ? -1 : 1); - return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); } if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { @@ -1024,6 +1267,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { .addImm(0) // src1_modifiers .addImm(Signed ? -1 : 1) // src1 .addUse(SrcReg); + I.eraseFromParent(); return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); } @@ -1040,6 +1284,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) .addImm(Mask) .addReg(SrcReg); + I.eraseFromParent(); return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); } @@ -1049,11 +1294,12 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { .addReg(SrcReg) .addImm(0) // Offset .addImm(SrcSize); // Width + I.eraseFromParent(); return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); } if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { - if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) + if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI)) return false; if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { @@ -1061,7 +1307,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) .addReg(SrcReg); - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI); } const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; @@ -1070,10 +1317,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. if (DstSize > 32 && SrcSize <= 32) { // We need a 64-bit register source, but the high bits don't matter. - unsigned ExtReg - = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned UndefReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) .addReg(SrcReg) @@ -1085,7 +1330,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { .addReg(ExtReg) .addImm(SrcSize << 16); - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI); } unsigned Mask; @@ -1099,16 +1345,58 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { .addImm(SrcSize << 16); } - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI); } return false; } +static int64_t getFPTrueImmVal(unsigned Size, bool Signed) { + switch (Size) { + case 16: + return Signed ? 0xBC00 : 0x3C00; + case 32: + return Signed ? 0xbf800000 : 0x3f800000; + case 64: + return Signed ? 0xbff0000000000000 : 0x3ff0000000000000; + default: + llvm_unreachable("Invalid FP type size"); + } +} + +bool AMDGPUInstructionSelector::selectG_SITOFP_UITOFP(MachineInstr &I) const { + MachineBasicBlock *MBB = I.getParent(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + Register Src = I.getOperand(1).getReg(); + if (!isSCC(Src, MRI)) + return selectImpl(I, *CoverageInfo); + + bool Signed = I.getOpcode() == AMDGPU::G_SITOFP; + Register DstReg = I.getOperand(0).getReg(); + const LLT DstTy = MRI.getType(DstReg); + const unsigned DstSize = DstTy.getSizeInBits(); + const DebugLoc &DL = I.getDebugLoc(); + + BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) + .addReg(Src); + + unsigned NewOpc = + DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; + auto MIB = BuildMI(*MBB, I, DL, TII.get(NewOpc), DstReg) + .addImm(0) + .addImm(getFPTrueImmVal(DstSize, Signed)); + + if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineOperand &ImmOp = I.getOperand(1); // The AMDGPU backend only supports Imm operands and not CImm or FPImm. @@ -1119,15 +1407,15 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); } - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); unsigned Size; bool IsSgpr; - const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); + const RegisterBank *RB = MRI->getRegBankOrNull(I.getOperand(0).getReg()); if (RB) { IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; - Size = MRI.getType(DstReg).getSizeInBits(); + Size = MRI->getType(DstReg).getSizeInBits(); } else { - const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); + const TargetRegisterClass *RC = TRI.getRegClassForReg(*MRI, DstReg); IsSgpr = TRI.isSGPRClass(RC); Size = TRI.getRegSizeInBits(*RC); } @@ -1142,34 +1430,41 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - DebugLoc DL = I.getDebugLoc(); - const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : - &AMDGPU::VGPR_32RegClass; - unsigned LoReg = MRI.createVirtualRegister(RC); - unsigned HiReg = MRI.createVirtualRegister(RC); - const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); - - BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) - .addImm(Imm.trunc(32).getZExtValue()); + const DebugLoc &DL = I.getDebugLoc(); - BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) - .addImm(Imm.ashr(32).getZExtValue()); + APInt Imm(Size, I.getOperand(1).getImm()); - const MachineInstr *RS = - BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) - .addReg(LoReg) - .addImm(AMDGPU::sub0) - .addReg(HiReg) - .addImm(AMDGPU::sub1); + MachineInstr *ResInst; + if (IsSgpr && TII.isInlineConstant(Imm)) { + ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) + .addImm(I.getOperand(1).getImm()); + } else { + const TargetRegisterClass *RC = IsSgpr ? + &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass; + Register LoReg = MRI->createVirtualRegister(RC); + Register HiReg = MRI->createVirtualRegister(RC); + + BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) + .addImm(Imm.trunc(32).getZExtValue()); + + BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) + .addImm(Imm.ashr(32).getZExtValue()); + + ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(LoReg) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + } // We can't call constrainSelectedInstRegOperands here, because it doesn't // work for target independent opcodes I.eraseFromParent(); const TargetRegisterClass *DstRC = - TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); + TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI); if (!DstRC) return true; - return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); + return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); } static bool isConstant(const MachineInstr &MI) { @@ -1188,13 +1483,13 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, GEPInfo GEPInfo(*PtrMI); - for (unsigned i = 1, e = 3; i < e; ++i) { + for (unsigned i = 1; i != 3; ++i) { const MachineOperand &GEPOp = PtrMI->getOperand(i); const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); assert(OpDef); - if (isConstant(*OpDef)) { - // FIXME: Is it possible to have multiple Imm parts? Maybe if we - // are lacking other optimizations. + if (i == 2 && isConstant(*OpDef)) { + // TODO: Could handle constant base + variable offset, but a combine + // probably should have commuted it. assert(GEPInfo.Imm == 0); GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); continue; @@ -1240,16 +1535,26 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const { return false; } -bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { - // TODO: Can/should we insert m0 initialization here for DS instructions and - // call the normal selector? - return false; +void AMDGPUInstructionSelector::initM0(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + + const LLT PtrTy = MRI->getType(I.getOperand(1).getReg()); + unsigned AS = PtrTy.getAddressSpace(); + if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && + STI.ldsRequiresM0Init()) { + // If DS instructions require M0 initializtion, insert it before selecting. + BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addImm(-1); + } +} + +bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const { + initM0(I); + return selectImpl(I, *CoverageInfo); } bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineOperand &CondOp = I.getOperand(0); Register CondReg = CondOp.getReg(); const DebugLoc &DL = I.getDebugLoc(); @@ -1263,11 +1568,12 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { // GlobalISel, we should push that decision into RegBankSelect. Assume for now // RegBankSelect knows what it's doing if the branch condition is scc, even // though it currently does not. - if (isSCC(CondReg, MRI)) { + if (isSCC(CondReg, *MRI)) { CondPhysReg = AMDGPU::SCC; BrOpcode = AMDGPU::S_CBRANCH_SCC1; - ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; - } else if (isVCC(CondReg, MRI)) { + // FIXME: Hack for isSCC tests + ConstrainRC = &AMDGPU::SGPR_32RegClass; + } else if (isVCC(CondReg, *MRI)) { // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? // We sort of know that a VCC producer based on the register bank, that ands // inactive lanes with 0. What if there was a logical operation with vcc @@ -1279,8 +1585,8 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { } else return false; - if (!MRI.getRegClassOrNull(CondReg)) - MRI.setRegClass(CondReg, ConstrainRC); + if (!MRI->getRegClassOrNull(CondReg)) + MRI->setRegClass(CondReg, ConstrainRC); BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) .addReg(CondReg); @@ -1292,27 +1598,83 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { } bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - Register DstReg = I.getOperand(0).getReg(); - const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); if (IsVGPR) I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); return RBI.constrainGenericRegister( - DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI); + DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI); +} + +bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const { + uint64_t Align = I.getOperand(2).getImm(); + const uint64_t Mask = ~((UINT64_C(1) << Align) - 1); + + MachineBasicBlock *BB = I.getParent(); + + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); + const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; + unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32; + unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; + const TargetRegisterClass &RegRC + = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; + + LLT Ty = MRI->getType(DstReg); + + const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB, + *MRI); + const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB, + *MRI); + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || + !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) + return false; + + const DebugLoc &DL = I.getDebugLoc(); + Register ImmReg = MRI->createVirtualRegister(&RegRC); + BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg) + .addImm(Mask); + + if (Ty.getSizeInBits() == 32) { + BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg) + .addReg(SrcReg) + .addReg(ImmReg); + I.eraseFromParent(); + return true; + } + + Register HiReg = MRI->createVirtualRegister(&RegRC); + Register LoReg = MRI->createVirtualRegister(&RegRC); + Register MaskLo = MRI->createVirtualRegister(&RegRC); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg) + .addReg(SrcReg, 0, AMDGPU::sub0); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg) + .addReg(SrcReg, 0, AMDGPU::sub1); + + BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo) + .addReg(LoReg) + .addReg(ImmReg); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(MaskLo) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + I.eraseFromParent(); + return true; } -bool AMDGPUInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool AMDGPUInstructionSelector::select(MachineInstr &I) { if (I.isPHI()) return selectPHI(I); - if (!isPreISelGenericOpcode(I.getOpcode())) { + if (!I.isPreISelOpcode()) { if (I.isCopy()) return selectCOPY(I); return true; @@ -1324,16 +1686,18 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, case TargetOpcode::G_XOR: if (selectG_AND_OR_XOR(I)) return true; - return selectImpl(I, CoverageInfo); + return selectImpl(I, *CoverageInfo); case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: - if (selectG_ADD_SUB(I)) + if (selectImpl(I, *CoverageInfo)) return true; - LLVM_FALLTHROUGH; - default: - return selectImpl(I, CoverageInfo); + return selectG_ADD_SUB(I); + case TargetOpcode::G_UADDO: + case TargetOpcode::G_USUBO: + return selectG_UADDO_USUBO(I); case TargetOpcode::G_INTTOPTR: case TargetOpcode::G_BITCAST: + case TargetOpcode::G_PTRTOINT: return selectCOPY(I); case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: @@ -1353,32 +1717,40 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, case TargetOpcode::G_INSERT: return selectG_INSERT(I); case TargetOpcode::G_INTRINSIC: - return selectG_INTRINSIC(I, CoverageInfo); + return selectG_INTRINSIC(I); case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: - return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); + return selectG_INTRINSIC_W_SIDE_EFFECTS(I); case TargetOpcode::G_ICMP: if (selectG_ICMP(I)) return true; - return selectImpl(I, CoverageInfo); + return selectImpl(I, *CoverageInfo); case TargetOpcode::G_LOAD: - return selectImpl(I, CoverageInfo); + case TargetOpcode::G_ATOMIC_CMPXCHG: + case TargetOpcode::G_ATOMICRMW_XCHG: + case TargetOpcode::G_ATOMICRMW_ADD: + case TargetOpcode::G_ATOMICRMW_SUB: + case TargetOpcode::G_ATOMICRMW_AND: + case TargetOpcode::G_ATOMICRMW_OR: + case TargetOpcode::G_ATOMICRMW_XOR: + case TargetOpcode::G_ATOMICRMW_MIN: + case TargetOpcode::G_ATOMICRMW_MAX: + case TargetOpcode::G_ATOMICRMW_UMIN: + case TargetOpcode::G_ATOMICRMW_UMAX: + case TargetOpcode::G_ATOMICRMW_FADD: + return selectG_LOAD_ATOMICRMW(I); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_STORE: - if (selectImpl(I, CoverageInfo)) - return true; return selectG_STORE(I); case TargetOpcode::G_TRUNC: return selectG_TRUNC(I); case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: - if (selectG_SZA_EXT(I)) { - I.eraseFromParent(); - return true; - } - - return false; + return selectG_SZA_EXT(I); + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + return selectG_SITOFP_UITOFP(I); case TargetOpcode::G_BRCOND: return selectG_BRCOND(I); case TargetOpcode::G_FRAME_INDEX: @@ -1388,6 +1760,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, // is checking for G_CONSTANT I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); return true; + case TargetOpcode::G_PTR_MASK: + return selectG_PTR_MASK(I); + default: + return selectImpl(I, *CoverageInfo); } return false; } @@ -1402,14 +1778,14 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( - Register Src, const MachineRegisterInfo &MRI) const { + Register Src) const { unsigned Mods = 0; - MachineInstr *MI = MRI.getVRegDef(Src); + MachineInstr *MI = MRI->getVRegDef(Src); if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { Src = MI->getOperand(1).getReg(); Mods |= SISrcMods::NEG; - MI = MRI.getVRegDef(Src); + MI = MRI->getVRegDef(Src); } if (MI && MI->getOpcode() == AMDGPU::G_FABS) { @@ -1432,12 +1808,23 @@ AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { - MachineRegisterInfo &MRI - = Root.getParent()->getParent()->getParent()->getRegInfo(); + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod + }}; +} +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const { Register Src; unsigned Mods; - std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -1446,6 +1833,7 @@ AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod }}; } + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { return {{ @@ -1457,12 +1845,9 @@ AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { - MachineRegisterInfo &MRI - = Root.getParent()->getParent()->getParent()->getRegInfo(); - Register Src; unsigned Mods; - std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -1471,12 +1856,28 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { } InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { - MachineRegisterInfo &MRI = - Root.getParent()->getParent()->getParent()->getRegInfo(); +AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const { + // FIXME: Handle clamp and op_sel + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp + }}; +} +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { + // FIXME: Handle op_sel + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { SmallVector AddrInfo; - getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); + getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) return None; @@ -1496,11 +1897,8 @@ AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { - MachineRegisterInfo &MRI = - Root.getParent()->getParent()->getParent()->getRegInfo(); - SmallVector AddrInfo; - getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); + getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) return None; @@ -1521,10 +1919,9 @@ InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); SmallVector AddrInfo; - getAddrModeInfo(*MI, MRI, AddrInfo); + getAddrModeInfo(*MI, *MRI, AddrInfo); // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, // then we can select all ptr + 32-bit offsets not just immediate offsets. @@ -1540,7 +1937,7 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { // failed trying to select this load into one of the _IMM variants since // the _IMM Patterns are considered before the _SGPR patterns. unsigned PtrReg = GEPInfo.SgprParts[0]; - unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) .addImm(GEPInfo.Imm); return {{ @@ -1553,8 +1950,6 @@ template InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); - MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); InstructionSelector::ComplexRendererFns Default = {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, @@ -1565,12 +1960,12 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { if (!STI.hasFlatInstOffsets()) return Default; - const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); + const MachineInstr *OpDef = MRI->getVRegDef(Root.getReg()); if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) return Default; Optional Offset = - getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); + getConstantVRegVal(OpDef->getOperand(2).getReg(), *MRI); if (!Offset.hasValue()) return Default; @@ -1597,12 +1992,6 @@ AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { return selectFlatOffsetImpl(Root); } -// FIXME: Implement -static bool signBitIsZero(const MachineOperand &Op, - const MachineRegisterInfo &MRI) { - return false; -} - static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { auto PSV = PtrInfo.V.dyn_cast(); return PSV && PSV->isStack(); @@ -1613,12 +2002,11 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const SIMachineFunctionInfo *Info = MF->getInfo(); int64_t Offset = 0; - if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) { - Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) { + Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); // TODO: Should this be inside the render function? The iterator seems to // move. @@ -1652,18 +2040,18 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { // offsets. Optional FI; Register VAddr = Root.getReg(); - if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) { - if (isBaseWithConstantOffset(Root, MRI)) { + if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) { + if (isBaseWithConstantOffset(Root, *MRI)) { const MachineOperand &LHS = RootDef->getOperand(1); const MachineOperand &RHS = RootDef->getOperand(2); - const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); - const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg()); + const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg()); if (LHSDef && RHSDef) { int64_t PossibleOffset = RHSDef->getOperand(1).getCImm()->getSExtValue(); if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && (!STI.privateMemoryResourceIsRangeChecked() || - signBitIsZero(LHS, MRI))) { + KnownBits->signBitIsZero(LHS.getReg()))) { if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) FI = LHSDef->getOperand(1).getIndex(); else @@ -1700,15 +2088,30 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { }}}; } +bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, + const MachineOperand &Base, + int64_t Offset, + unsigned OffsetBits) const { + if ((OffsetBits == 16 && !isUInt<16>(Offset)) || + (OffsetBits == 8 && !isUInt<8>(Offset))) + return false; + + if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) + return true; + + // On Southern Islands instruction with a negative base value and an offset + // don't seem to work. + return KnownBits->signBitIsZero(Base.getReg()); +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectMUBUFScratchOffset( MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); int64_t Offset = 0; - if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) || + if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) || !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) return {}; @@ -1728,3 +2131,54 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset( [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset }}; } + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { + const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg()); + if (!RootDef) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + }}; + } + + int64_t ConstAddr = 0; + if (isBaseWithConstantOffset(Root, *MRI)) { + const MachineOperand &LHS = RootDef->getOperand(1); + const MachineOperand &RHS = RootDef->getOperand(2); + const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg()); + const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t PossibleOffset = + RHSDef->getOperand(1).getCImm()->getSExtValue(); + if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) { + // (add n0, c0) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } + }}; + } + } + } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { + + + + } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) { + + + } + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + }}; +} + +void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, + const MachineInstr &MI) const { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + Optional CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); + assert(CstVal && "Expected constant value"); + MIB.addImm(CstVal.getValue()); +} diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 4f489ddfb23d..d3c83a6a872a 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -35,6 +35,7 @@ class AMDGPUInstrInfo; class AMDGPURegisterBankInfo; class GCNSubtarget; class MachineInstr; +class MachineIRBuilder; class MachineOperand; class MachineRegisterInfo; class SIInstrInfo; @@ -42,14 +43,20 @@ class SIMachineFunctionInfo; class SIRegisterInfo; class AMDGPUInstructionSelector : public InstructionSelector { +private: + MachineRegisterInfo *MRI; + public: AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName(); + void setupMF(MachineFunction &MF, GISelKnownBits &KB, + CodeGenCoverage &CoverageInfo) override; + private: struct GEPInfo { const MachineInstr &GEP; @@ -72,32 +79,42 @@ private: bool selectPHI(MachineInstr &I) const; bool selectG_TRUNC(MachineInstr &I) const; bool selectG_SZA_EXT(MachineInstr &I) const; + bool selectG_SITOFP_UITOFP(MachineInstr &I) const; bool selectG_CONSTANT(MachineInstr &I) const; bool selectG_AND_OR_XOR(MachineInstr &I) const; bool selectG_ADD_SUB(MachineInstr &I) const; + bool selectG_UADDO_USUBO(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; bool selectG_UNMERGE_VALUES(MachineInstr &I) const; bool selectG_GEP(MachineInstr &I) const; bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; - bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; - bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const; + bool selectG_INTRINSIC(MachineInstr &I) const; + + std::tuple + splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const; + + bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const; + + bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; bool selectG_ICMP(MachineInstr &I) const; bool hasVgprParts(ArrayRef AddrInfo) const; void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const; bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; - bool selectG_LOAD(MachineInstr &I) const; - bool selectG_SELECT(MachineInstr &I) const; + + void initM0(MachineInstr &I) const; + bool selectG_LOAD_ATOMICRMW(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; + bool selectG_SELECT(MachineInstr &I) const; bool selectG_BRCOND(MachineInstr &I) const; bool selectG_FRAME_INDEX(MachineInstr &I) const; + bool selectG_PTR_MASK(MachineInstr &I) const; std::pair - selectVOP3ModsImpl(Register Src, const MachineRegisterInfo &MRI) const; + selectVOP3ModsImpl(Register Src) const; InstructionSelector::ComplexRendererFns selectVCSRC(MachineOperand &Root) const; @@ -108,10 +125,17 @@ private: InstructionSelector::ComplexRendererFns selectVOP3Mods0(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns + selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectVOP3OMods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectVOP3Mods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3OpSelMods0(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3OpSelMods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectSmrdImm(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns @@ -133,6 +157,16 @@ private: InstructionSelector::ComplexRendererFns selectMUBUFScratchOffset(MachineOperand &Root) const; + bool isDSOffsetLegal(const MachineRegisterInfo &MRI, + const MachineOperand &Base, + int64_t Offset, unsigned OffsetBits) const; + + InstructionSelector::ComplexRendererFns + selectDS1Addr1Offset(MachineOperand &Root) const; + + void renderTruncImm32(MachineInstrBuilder &MIB, + const MachineInstr &MI) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 61bc415c839d..846e7f577a28 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -75,7 +75,7 @@ class ILFormat pattern> let isCodeGenOnly = 1; } -def TruePredicate : Predicate<"true">; +def TruePredicate : Predicate<"">; class PredicateControl { Predicate SubtargetPredicate = TruePredicate; @@ -220,80 +220,48 @@ def hi_f16_elt : PatLeaf< // PatLeafs for floating-point comparisons //===----------------------------------------------------------------------===// -def COND_OEQ : PatLeaf < - (cond), - [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] ->; - -def COND_ONE : PatLeaf < - (cond), - [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}] ->; - -def COND_OGT : PatLeaf < - (cond), - [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] ->; - -def COND_OGE : PatLeaf < - (cond), - [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] ->; - -def COND_OLT : PatLeaf < - (cond), - [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] ->; - -def COND_OLE : PatLeaf < - (cond), - [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] ->; - -def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; -def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; +def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>; +def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>; +def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>; +def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>; +def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>; +def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>; +def COND_O : PatFrags<(ops), [(OtherVT SETO)]>; +def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>; //===----------------------------------------------------------------------===// // PatLeafs for unsigned / unordered comparisons //===----------------------------------------------------------------------===// -def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; -def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>; -def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; -def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; -def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; -def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; +def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>; +def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>; +def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>; +def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>; +def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>; +def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>; // XXX - For some reason R600 version is preferring to use unordered // for setne? -def COND_UNE_NE : PatLeaf < - (cond), - [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] ->; +def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>; //===----------------------------------------------------------------------===// // PatLeafs for signed comparisons //===----------------------------------------------------------------------===// -def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; -def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; -def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; -def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; +def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>; +def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>; +def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>; +def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>; //===----------------------------------------------------------------------===// // PatLeafs for integer equality //===----------------------------------------------------------------------===// -def COND_EQ : PatLeaf < - (cond), - [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] ->; - -def COND_NE : PatLeaf < - (cond), - [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] ->; +def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>; +def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>; +// FIXME: Should not need code predicate +//def COND_NULL : PatLeaf<(OtherVT null_frag)>; def COND_NULL : PatLeaf < (cond), [{(void)N; return false;}] @@ -335,17 +303,17 @@ def TEX_SHADOW_ARRAY : PatLeaf< // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// +def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] +>; + class AddressSpaceList AS> { list AddrSpaces = AS; } -class Aligned8Bytes : PatFrag (N)->getAlignment() % 8 == 0; -}]>; - -class Aligned16Bytes : PatFrag (N)->getAlignment() >= 16; -}]>; +class Aligned { + int MinAlignment = Bytes; +} class LoadFrag : PatFrag<(ops node:$ptr), (op node:$ptr)>; @@ -502,6 +470,35 @@ defm atomic_store_#as : binary_atomic_op; } // End foreach AddrSpace +multiclass ret_noret_binary_atomic_op { + foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { + let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { + defm "_"#as : binary_atomic_op; + + let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in { + defm "_"#as#"_noret" : binary_atomic_op; + } + + let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in { + defm "_"#as#"_ret" : binary_atomic_op; + } + } + } +} + +defm atomic_swap : ret_noret_binary_atomic_op; +defm atomic_load_add : ret_noret_binary_atomic_op; +defm atomic_load_and : ret_noret_binary_atomic_op; +defm atomic_load_max : ret_noret_binary_atomic_op; +defm atomic_load_min : ret_noret_binary_atomic_op; +defm atomic_load_or : ret_noret_binary_atomic_op; +defm atomic_load_sub : ret_noret_binary_atomic_op; +defm atomic_load_umax : ret_noret_binary_atomic_op; +defm atomic_load_umin : ret_noret_binary_atomic_op; +defm atomic_load_xor : ret_noret_binary_atomic_op; +defm atomic_load_fadd : ret_noret_binary_atomic_op; + + def store_hi16_private : StoreHi16 , PrivateAddress; def truncstorei8_hi16_private : StoreHi16, PrivateAddress; @@ -513,21 +510,31 @@ def store_local_hi16 : StoreHi16 , LocalAddress; def truncstorei8_local_hi16 : StoreHi16, LocalAddress; def atomic_store_local : LocalStore ; -def load_align8_local : Aligned8Bytes < - (ops node:$ptr), (load_local node:$ptr) ->; -def load_align16_local : Aligned16Bytes < - (ops node:$ptr), (load_local node:$ptr) ->; +def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 8; +} -def store_align8_local : Aligned8Bytes < - (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) ->; +def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 16; +} + +def store_align8_local: PatFrag<(ops node:$val, node:$ptr), + (store_local node:$val, node:$ptr)>, Aligned<8> { + let IsStore = 1; + let IsTruncStore = 0; +} + +def store_align16_local: PatFrag<(ops node:$val, node:$ptr), + (store_local node:$val, node:$ptr)>, Aligned<16> { + let IsStore = 1; + let IsTruncStore = 0; +} -def store_align16_local : Aligned16Bytes < - (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) ->; def atomic_store_flat : FlatStore ; def truncstorei8_hi16_flat : StoreHi16, FlatStoreAddress; @@ -547,69 +554,26 @@ class region_binary_atomic_op : }]>; -def atomic_swap_local : local_binary_atomic_op; -def atomic_load_add_local : local_binary_atomic_op; -def atomic_load_sub_local : local_binary_atomic_op; -def atomic_load_and_local : local_binary_atomic_op; -def atomic_load_or_local : local_binary_atomic_op; -def atomic_load_xor_local : local_binary_atomic_op; -def atomic_load_nand_local : local_binary_atomic_op; -def atomic_load_min_local : local_binary_atomic_op; -def atomic_load_max_local : local_binary_atomic_op; -def atomic_load_umin_local : local_binary_atomic_op; -def atomic_load_umax_local : local_binary_atomic_op; - def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; -class AtomicCmpSwapLocal : PatFrag< - (ops node:$ptr, node:$cmp, node:$swap), - (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ - AtomicSDNode *AN = cast(N); - return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; - -class AtomicCmpSwapRegion : PatFrag< - (ops node:$ptr, node:$cmp, node:$swap), - (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ - AtomicSDNode *AN = cast(N); - return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; -}]>; +let AddressSpaces = StoreAddress_local.AddrSpaces in { +defm atomic_cmp_swap_local : ternary_atomic_op; +defm atomic_cmp_swap_local_m0 : ternary_atomic_op; +} -def atomic_cmp_swap_local : AtomicCmpSwapLocal ; +let AddressSpaces = StoreAddress_region.AddrSpaces in { +defm atomic_cmp_swap_region : ternary_atomic_op; +defm atomic_cmp_swap_region_m0 : ternary_atomic_op; +} class global_binary_atomic_op_frag : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; -multiclass global_binary_atomic_op { - def "" : global_binary_atomic_op_frag; - - def _noret : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; - - def _ret : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; -} - -defm atomic_swap_global : global_binary_atomic_op; -defm atomic_add_global : global_binary_atomic_op; -defm atomic_and_global : global_binary_atomic_op; -defm atomic_max_global : global_binary_atomic_op; -defm atomic_min_global : global_binary_atomic_op; -defm atomic_or_global : global_binary_atomic_op; -defm atomic_sub_global : global_binary_atomic_op; -defm atomic_umax_global : global_binary_atomic_op; -defm atomic_umin_global : global_binary_atomic_op; -defm atomic_xor_global : global_binary_atomic_op; - // Legacy. def AMDGPUatomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$value), diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 670f6225fbf7..5aba35a19ced 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -11,6 +11,13 @@ /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// +#if defined(_MSC_VER) || defined(__MINGW32__) +// According to Microsoft, one must set _USE_MATH_DEFINES in order to get M_PI +// from the Visual C++ cmath / math.h headers: +// https://docs.microsoft.com/en-us/cpp/c-runtime-library/math-constants?view=vs-2019 +#define _USE_MATH_DEFINES +#endif + #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" #include "AMDGPUTargetMachine.h" @@ -20,6 +27,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" @@ -32,7 +40,7 @@ using namespace LegalityPredicates; static LegalityPredicate isMultiple32(unsigned TypeIdx, - unsigned MaxSize = 512) { + unsigned MaxSize = 1024) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; const LLT EltTy = Ty.getScalarType(); @@ -40,12 +48,27 @@ static LegalityPredicate isMultiple32(unsigned TypeIdx, }; } +static LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].getSizeInBits() == Size; + }; +} + static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; return Ty.isVector() && Ty.getNumElements() % 2 != 0 && - Ty.getElementType().getSizeInBits() < 32; + Ty.getElementType().getSizeInBits() < 32 && + Ty.getSizeInBits() % 32 != 0; + }; +} + +static LegalityPredicate isWideVec16(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + const LLT EltTy = Ty.getScalarType(); + return EltTy.getSizeInBits() == 16 && Ty.getNumElements() > 2; }; } @@ -68,6 +91,31 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) { }; } +// Increase the number of vector elements to reach the next multiple of 32-bit +// type. +static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + + const LLT EltTy = Ty.getElementType(); + const int Size = Ty.getSizeInBits(); + const int EltSize = EltTy.getSizeInBits(); + const int NextMul32 = (Size + 31) / 32; + + assert(EltSize < 32); + + const int NewNumElts = (32 * NextMul32 + EltSize - 1) / EltSize; + return std::make_pair(TypeIdx, LLT::vector(NewNumElts, EltTy)); + }; +} + +static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isVector() && QueryTy.getSizeInBits() < Size; + }; +} + static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; @@ -82,7 +130,7 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) { }; } -// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of +// Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of // v2s16. static LegalityPredicate isRegisterType(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { @@ -94,7 +142,21 @@ static LegalityPredicate isRegisterType(unsigned TypeIdx) { EltSize == 128 || EltSize == 256; } - return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512; + return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 1024; + }; +} + +static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].getElementType() == Type; + }; +} + +static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + return !Ty.isVector() && Ty.getSizeInBits() > 32 && + Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits(); }; } @@ -112,9 +174,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT S16 = LLT::scalar(16); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); + const LLT S96 = LLT::scalar(96); const LLT S128 = LLT::scalar(128); const LLT S256 = LLT::scalar(256); - const LLT S512 = LLT::scalar(512); + const LLT S1024 = LLT::scalar(1024); const LLT V2S16 = LLT::vector(2, 16); const LLT V4S16 = LLT::vector(4, 16); @@ -134,6 +197,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT V14S32 = LLT::vector(14, 32); const LLT V15S32 = LLT::vector(15, 32); const LLT V16S32 = LLT::vector(16, 32); + const LLT V32S32 = LLT::vector(32, 32); const LLT V2S64 = LLT::vector(2, 64); const LLT V3S64 = LLT::vector(3, 64); @@ -142,16 +206,19 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT V6S64 = LLT::vector(6, 64); const LLT V7S64 = LLT::vector(7, 64); const LLT V8S64 = LLT::vector(8, 64); + const LLT V16S64 = LLT::vector(16, 64); std::initializer_list AllS32Vectors = {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32, - V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32}; + V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32, V32S32}; std::initializer_list AllS64Vectors = - {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64}; + {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64, V16S64}; const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS); const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS); + const LLT Constant32Ptr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS_32BIT); const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS); + const LLT RegionPtr = GetAddrSpacePtr(AMDGPUAS::REGION_ADDRESS); const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS); const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS); @@ -162,7 +229,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, }; const std::initializer_list AddrSpaces32 = { - LocalPtr, PrivatePtr + LocalPtr, PrivatePtr, Constant32Ptr, RegionPtr }; const std::initializer_list FPTypesBase = { @@ -216,37 +283,34 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) .clampScalar(0, S32, S64) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0)) + .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0)) .widenScalarToNextPow2(0) .scalarize(0); - getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO, + getActionDefinitionsBuilder({G_UADDO, G_USUBO, G_UADDE, G_SADDE, G_USUBE, G_SSUBE}) .legalFor({{S32, S1}}) - .clampScalar(0, S32, S32); + .clampScalar(0, S32, S32) + .scalarize(0); // TODO: Implement. + + getActionDefinitionsBuilder({G_SADDO, G_SSUBO}) + .lower(); getActionDefinitionsBuilder(G_BITCAST) - .legalForCartesianProduct({S32, V2S16}) - .legalForCartesianProduct({S64, V2S32, V4S16}) - .legalForCartesianProduct({V2S64, V4S32}) // Don't worry about the size constraint. - .legalIf(all(isPointer(0), isPointer(1))); + .legalIf(all(isRegisterType(0), isRegisterType(1))) + // FIXME: Testing hack + .legalForCartesianProduct({S16, LLT::vector(2, 8), }); - if (ST.has16BitInsts()) { - getActionDefinitionsBuilder(G_FCONSTANT) - .legalFor({S32, S64, S16}) - .clampScalar(0, S16, S64); - } else { - getActionDefinitionsBuilder(G_FCONSTANT) - .legalFor({S32, S64}) - .clampScalar(0, S32, S64); - } + getActionDefinitionsBuilder(G_FCONSTANT) + .legalFor({S32, S64, S16}) + .clampScalar(0, S16, S64); getActionDefinitionsBuilder(G_IMPLICIT_DEF) - .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr, + .legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr, ConstantPtr, LocalPtr, FlatPtr, PrivatePtr}) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .clampScalarOrElt(0, S32, S512) + .clampScalarOrElt(0, S32, S1024) .legalIf(isMultiple32(0)) .widenScalarToNextPow2(0, 32) .clampMaxNumElements(0, S32, 16); @@ -256,23 +320,33 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // values may not be legal. We need to figure out how to distinguish // between these two scenarios. getActionDefinitionsBuilder(G_CONSTANT) - .legalFor({S1, S32, S64, GlobalPtr, + .legalFor({S1, S32, S64, S16, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr }) .clampScalar(0, S32, S64) .widenScalarToNextPow2(0) .legalIf(isPointer(0)); setAction({G_FRAME_INDEX, PrivatePtr}, Legal); + getActionDefinitionsBuilder(G_GLOBAL_VALUE) + .customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr}); + auto &FPOpActions = getActionDefinitionsBuilder( - { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE}) + { G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE}) .legalFor({S32, S64}); + auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS}) + .customFor({S32, S64}); + auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV) + .customFor({S32, S64}); if (ST.has16BitInsts()) { if (ST.hasVOP3PInsts()) FPOpActions.legalFor({S16, V2S16}); else FPOpActions.legalFor({S16}); + + TrigActions.customFor({S16}); + FDIVActions.customFor({S16}); } auto &MinNumMaxNum = getActionDefinitionsBuilder({ @@ -293,22 +367,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); } - // TODO: Implement - getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower(); - if (ST.hasVOP3PInsts()) FPOpActions.clampMaxNumElements(0, S16, 2); + FPOpActions .scalarize(0) .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + TrigActions + .scalarize(0) + .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + + FDIVActions + .scalarize(0) + .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + + getActionDefinitionsBuilder({G_FNEG, G_FABS}) + .legalFor(FPTypesPK16) + .clampMaxNumElements(0, S16, 2) + .scalarize(0) + .clampScalar(0, S16, S64); + + // TODO: Implement + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower(); + if (ST.has16BitInsts()) { - getActionDefinitionsBuilder(G_FSQRT) + getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR}) .legalFor({S32, S64, S16}) .scalarize(0) .clampScalar(0, S16, S64); } else { - getActionDefinitionsBuilder(G_FSQRT) + getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR}) .legalFor({S32, S64}) .scalarize(0) .clampScalar(0, S32, S64); @@ -334,23 +423,43 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .clampScalar(0, S32, S64); + // Whether this is legal depends on the floating point mode for the function. + auto &FMad = getActionDefinitionsBuilder(G_FMAD); + if (ST.hasMadF16()) + FMad.customFor({S32, S16}); + else + FMad.customFor({S32}); + FMad.scalarize(0) + .lower(); + getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, {S32, S1}, {S64, S1}, {S16, S1}, + {S96, S32}, // FIXME: Hack {S64, LLT::scalar(33)}, {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}}) .scalarize(0); - getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) - .legalFor({{S32, S32}, {S64, S32}}) + // TODO: Split s1->s64 during regbankselect for VALU. + auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) + .legalFor({{S32, S32}, {S64, S32}, {S16, S32}, {S32, S1}, {S16, S1}, {S64, S1}}) .lowerFor({{S32, S64}}) - .customFor({{S64, S64}}) - .scalarize(0); + .customFor({{S64, S64}}); + if (ST.has16BitInsts()) + IToFP.legalFor({{S16, S16}}); + IToFP.clampScalar(1, S32, S64) + .scalarize(0); + + auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) + .legalFor({{S32, S32}, {S32, S64}, {S32, S16}}); + if (ST.has16BitInsts()) + FPToI.legalFor({{S16, S16}}); + else + FPToI.minScalar(1, S32); - getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) - .legalFor({{S32, S32}, {S32, S64}}) - .scalarize(0); + FPToI.minScalar(0, S32) + .scalarize(0); getActionDefinitionsBuilder(G_INTRINSIC_ROUND) .legalFor({S32, S64}) @@ -374,6 +483,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalForCartesianProduct(AddrSpaces32, {S32}) .scalarize(0); + getActionDefinitionsBuilder(G_PTR_MASK) + .scalarize(0) + .alwaysLegal(); + setAction({G_BLOCK_ADDR, CodePtr}, Legal); auto &CmpBuilder = @@ -415,7 +528,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .widenScalarToNextPow2(1, 32); // TODO: Expand for > s32 - getActionDefinitionsBuilder(G_BSWAP) + getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE}) .legalFor({S32}) .clampScalar(0, S32, S32) .scalarize(0); @@ -491,87 +604,239 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits())); }); - if (ST.hasFlatAddressSpace()) { - getActionDefinitionsBuilder(G_ADDRSPACE_CAST) - .scalarize(0) - .custom(); - } + getActionDefinitionsBuilder(G_ADDRSPACE_CAST) + .scalarize(0) + .custom(); // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we // handle some operations by just promoting the register during // selection. There are also d16 loads on GFX9+ which preserve the high bits. - getActionDefinitionsBuilder({G_LOAD, G_STORE}) - .narrowScalarIf([](const LegalityQuery &Query) { - unsigned Size = Query.Types[0].getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; - return (Size > 32 && MemSize < Size); - }, - [](const LegalityQuery &Query) { - return std::make_pair(0, LLT::scalar(32)); - }) - .fewerElementsIf([=](const LegalityQuery &Query) { - unsigned MemSize = Query.MMODescrs[0].SizeInBits; - return (MemSize == 96) && - Query.Types[0].isVector() && - !ST.hasDwordx3LoadStores(); - }, - [=](const LegalityQuery &Query) { - return std::make_pair(0, V2S32); - }) - .legalIf([=](const LegalityQuery &Query) { - const LLT &Ty0 = Query.Types[0]; - - unsigned Size = Ty0.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; - if (Size < 32 || (Size > 32 && MemSize < Size)) - return false; - - if (Ty0.isVector() && Size != MemSize) - return false; - - // TODO: Decompose private loads into 4-byte components. - // TODO: Illegal flat loads on SI - switch (MemSize) { - case 8: - case 16: - return Size == 32; - case 32: - case 64: - case 128: - return true; + auto maxSizeForAddrSpace = [this](unsigned AS) -> unsigned { + switch (AS) { + // FIXME: Private element size. + case AMDGPUAS::PRIVATE_ADDRESS: + return 32; + // FIXME: Check subtarget + case AMDGPUAS::LOCAL_ADDRESS: + return ST.useDS128() ? 128 : 64; + + // Treat constant and global as identical. SMRD loads are sometimes usable + // for global loads (ideally constant address space should be eliminated) + // depending on the context. Legality cannot be context dependent, but + // RegBankSelect can split the load as necessary depending on the pointer + // register bank/uniformity and if the memory is invariant or not written in + // a kernel. + case AMDGPUAS::CONSTANT_ADDRESS: + case AMDGPUAS::GLOBAL_ADDRESS: + return 512; + default: + return 128; + } + }; - case 96: - return ST.hasDwordx3LoadStores(); - - case 256: - case 512: - // TODO: Possibly support loads of i256 and i512 . This will require - // adding i256 and i512 types to MVT in order for to be able to use - // TableGen. - // TODO: Add support for other vector types, this will require - // defining more value mappings for the new types. - return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 || - Ty0.getScalarType().getSizeInBits() == 64); - - default: - return false; - } - }) - .clampScalar(0, S32, S64); + const auto needToSplitLoad = [=](const LegalityQuery &Query) -> bool { + const LLT DstTy = Query.Types[0]; + + // Split vector extloads. + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize) + return true; + + const LLT PtrTy = Query.Types[1]; + unsigned AS = PtrTy.getAddressSpace(); + if (MemSize > maxSizeForAddrSpace(AS)) + return true; + + // Catch weird sized loads that don't evenly divide into the access sizes + // TODO: May be able to widen depending on alignment etc. + unsigned NumRegs = MemSize / 32; + if (NumRegs == 3 && !ST.hasDwordx3LoadStores()) + return true; + + unsigned Align = Query.MMODescrs[0].AlignInBits; + if (Align < MemSize) { + const SITargetLowering *TLI = ST.getTargetLowering(); + return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8); + } + + return false; + }; + unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32; + unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16; + unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8; + + // TODO: Refine based on subtargets which support unaligned access or 128-bit + // LDS + // TODO: Unsupported flat for SI. + + for (unsigned Op : {G_LOAD, G_STORE}) { + const bool IsStore = Op == G_STORE; + + auto &Actions = getActionDefinitionsBuilder(Op); + // Whitelist the common cases. + // TODO: Pointer loads + // TODO: Wide constant loads + // TODO: Only CI+ has 3x loads + // TODO: Loads to s16 on gfx9 + Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32}, + {V2S32, GlobalPtr, 64, GlobalAlign32}, + {V3S32, GlobalPtr, 96, GlobalAlign32}, + {S96, GlobalPtr, 96, GlobalAlign32}, + {V4S32, GlobalPtr, 128, GlobalAlign32}, + {S128, GlobalPtr, 128, GlobalAlign32}, + {S64, GlobalPtr, 64, GlobalAlign32}, + {V2S64, GlobalPtr, 128, GlobalAlign32}, + {V2S16, GlobalPtr, 32, GlobalAlign32}, + {S32, GlobalPtr, 8, GlobalAlign8}, + {S32, GlobalPtr, 16, GlobalAlign16}, + + {S32, LocalPtr, 32, 32}, + {S64, LocalPtr, 64, 32}, + {V2S32, LocalPtr, 64, 32}, + {S32, LocalPtr, 8, 8}, + {S32, LocalPtr, 16, 16}, + {V2S16, LocalPtr, 32, 32}, + + {S32, PrivatePtr, 32, 32}, + {S32, PrivatePtr, 8, 8}, + {S32, PrivatePtr, 16, 16}, + {V2S16, PrivatePtr, 32, 32}, + + {S32, FlatPtr, 32, GlobalAlign32}, + {S32, FlatPtr, 16, GlobalAlign16}, + {S32, FlatPtr, 8, GlobalAlign8}, + {V2S16, FlatPtr, 32, GlobalAlign32}, + + {S32, ConstantPtr, 32, GlobalAlign32}, + {V2S32, ConstantPtr, 64, GlobalAlign32}, + {V3S32, ConstantPtr, 96, GlobalAlign32}, + {V4S32, ConstantPtr, 128, GlobalAlign32}, + {S64, ConstantPtr, 64, GlobalAlign32}, + {S128, ConstantPtr, 128, GlobalAlign32}, + {V2S32, ConstantPtr, 32, GlobalAlign32}}); + Actions + .customIf(typeIs(1, Constant32Ptr)) + .narrowScalarIf( + [=](const LegalityQuery &Query) -> bool { + return !Query.Types[0].isVector() && needToSplitLoad(Query); + }, + [=](const LegalityQuery &Query) -> std::pair { + const LLT DstTy = Query.Types[0]; + const LLT PtrTy = Query.Types[1]; + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + + // Split extloads. + if (DstSize > MemSize) + return std::make_pair(0, LLT::scalar(MemSize)); + + if (DstSize > 32 && (DstSize % 32 != 0)) { + // FIXME: Need a way to specify non-extload of larger size if + // suitably aligned. + return std::make_pair(0, LLT::scalar(32 * (DstSize / 32))); + } + + unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace()); + if (MemSize > MaxSize) + return std::make_pair(0, LLT::scalar(MaxSize)); + + unsigned Align = Query.MMODescrs[0].AlignInBits; + return std::make_pair(0, LLT::scalar(Align)); + }) + .fewerElementsIf( + [=](const LegalityQuery &Query) -> bool { + return Query.Types[0].isVector() && needToSplitLoad(Query); + }, + [=](const LegalityQuery &Query) -> std::pair { + const LLT DstTy = Query.Types[0]; + const LLT PtrTy = Query.Types[1]; + + LLT EltTy = DstTy.getElementType(); + unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace()); + + // Split if it's too large for the address space. + if (Query.MMODescrs[0].SizeInBits > MaxSize) { + unsigned NumElts = DstTy.getNumElements(); + unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize; + + // FIXME: Refine when odd breakdowns handled + // The scalars will need to be re-legalized. + if (NumPieces == 1 || NumPieces >= NumElts || + NumElts % NumPieces != 0) + return std::make_pair(0, EltTy); + + return std::make_pair(0, + LLT::vector(NumElts / NumPieces, EltTy)); + } + + // Need to split because of alignment. + unsigned Align = Query.MMODescrs[0].AlignInBits; + unsigned EltSize = EltTy.getSizeInBits(); + if (EltSize > Align && + (EltSize / Align < DstTy.getNumElements())) { + return std::make_pair(0, LLT::vector(EltSize / Align, EltTy)); + } + + // May need relegalization for the scalars. + return std::make_pair(0, EltTy); + }) + .minScalar(0, S32); + + if (IsStore) + Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32)); + + // TODO: Need a bitcast lower option? + Actions + .legalIf([=](const LegalityQuery &Query) { + const LLT Ty0 = Query.Types[0]; + unsigned Size = Ty0.getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned Align = Query.MMODescrs[0].AlignInBits; + + // No extending vector loads. + if (Size > MemSize && Ty0.isVector()) + return false; + + // FIXME: Widening store from alignment not valid. + if (MemSize < Size) + MemSize = std::max(MemSize, Align); + + switch (MemSize) { + case 8: + case 16: + return Size == 32; + case 32: + case 64: + case 128: + return true; + case 96: + return ST.hasDwordx3LoadStores(); + case 256: + case 512: + return true; + default: + return false; + } + }) + .widenScalarToNextPow2(0) + // TODO: v3s32->v4s32 with alignment + .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)); + } - // FIXME: Handle alignment requirements. auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .legalForTypesWithMemDesc({ - {S32, GlobalPtr, 8, 8}, - {S32, GlobalPtr, 16, 8}, - {S32, LocalPtr, 8, 8}, - {S32, LocalPtr, 16, 8}, - {S32, PrivatePtr, 8, 8}, - {S32, PrivatePtr, 16, 8}}); + .legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8}, + {S32, GlobalPtr, 16, 2 * 8}, + {S32, LocalPtr, 8, 8}, + {S32, LocalPtr, 16, 16}, + {S32, PrivatePtr, 8, 8}, + {S32, PrivatePtr, 16, 16}, + {S32, ConstantPtr, 8, 8}, + {S32, ConstantPtr, 16, 2 * 8}}); if (ST.hasFlatAddressSpace()) { - ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8}, - {S32, FlatPtr, 16, 8}}); + ExtLoads.legalForTypesWithMemDesc( + {{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}}); } ExtLoads.clampScalar(0, S32, S32) @@ -590,6 +855,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}}); } + getActionDefinitionsBuilder(G_ATOMICRMW_FADD) + .legalFor({{S32, LocalPtr}}); + + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) + .lower(); + // TODO: Pointer types, any 32-bit or 64-bit vector getActionDefinitionsBuilder(G_SELECT) .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, @@ -643,7 +914,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return (EltTy.getSizeInBits() == 16 || EltTy.getSizeInBits() % 32 == 0) && VecTy.getSizeInBits() % 32 == 0 && - VecTy.getSizeInBits() <= 512 && + VecTy.getSizeInBits() <= 1024 && IdxTy.getSizeInBits() == 32; }) .clampScalar(EltTypeIdx, S32, S64) @@ -663,6 +934,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: Doesn't handle extract of illegal sizes. getActionDefinitionsBuilder(Op) + .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) + // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; const LLT LitTy = Query.Types[LitTyIdx]; @@ -686,18 +959,36 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, } - getActionDefinitionsBuilder(G_BUILD_VECTOR) - .legalForCartesianProduct(AllS32Vectors, {S32}) - .legalForCartesianProduct(AllS64Vectors, {S64}) - .clampNumElements(0, V16S32, V16S32) - .clampNumElements(0, V2S64, V8S64) - .minScalarSameAs(1, 0) - .legalIf(isRegisterType(0)) - .minScalarOrElt(0, S32); + auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR) + .legalForCartesianProduct(AllS32Vectors, {S32}) + .legalForCartesianProduct(AllS64Vectors, {S64}) + .clampNumElements(0, V16S32, V32S32) + .clampNumElements(0, V2S64, V16S64) + .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16)); + + if (ST.hasScalarPackInsts()) + BuildVector.legalFor({V2S16, S32}); + + BuildVector + .minScalarSameAs(1, 0) + .legalIf(isRegisterType(0)) + .minScalarOrElt(0, S32); + + if (ST.hasScalarPackInsts()) { + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) + .legalFor({V2S16, S32}) + .lower(); + } else { + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) + .lower(); + } getActionDefinitionsBuilder(G_CONCAT_VECTORS) .legalIf(isRegisterType(0)); + // TODO: Don't fully scalarize v2s16 pieces + getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower(); + // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; @@ -715,14 +1006,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return false; }; - getActionDefinitionsBuilder(Op) + auto &Builder = getActionDefinitionsBuilder(Op) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) // Clamp the little scalar to s8-s256 and make it a power of 2. It's not // worth considering the multiples of 64 since 2*192 and 2*384 are not // valid. .clampScalar(LitTyIdx, S16, S256) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) - + .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx)) + .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32), + elementTypeIs(1, S16)), + changeTo(1, V2S16)) // Break up vectors with weird elements into scalars .fewerElementsIf( [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, @@ -730,25 +1024,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .fewerElementsIf( [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, scalarize(1)) - .clampScalar(BigTyIdx, S32, S512) - .widenScalarIf( + .clampScalar(BigTyIdx, S32, S1024) + .lowerFor({{S16, V2S16}}); + + if (Op == G_MERGE_VALUES) { + Builder.widenScalarIf( + // TODO: Use 16-bit shifts if legal for 8-bit values? [=](const LegalityQuery &Query) { - const LLT &Ty = Query.Types[BigTyIdx]; - return !isPowerOf2_32(Ty.getSizeInBits()) && - Ty.getSizeInBits() % 16 != 0; + const LLT Ty = Query.Types[LitTyIdx]; + return Ty.getSizeInBits() < 32; }, - [=](const LegalityQuery &Query) { - // Pick the next power of 2, or a multiple of 64 over 128. - // Whichever is smaller. - const LLT &Ty = Query.Types[BigTyIdx]; - unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1); - if (NewSizeInBits >= 256) { - unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); - if (RoundedTo < NewSizeInBits) - NewSizeInBits = RoundedTo; - } - return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); - }) + changeTo(LitTyIdx, S32)); + } + + Builder.widenScalarIf( + [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[BigTyIdx]; + return !isPowerOf2_32(Ty.getSizeInBits()) && + Ty.getSizeInBits() % 16 != 0; + }, + [=](const LegalityQuery &Query) { + // Pick the next power of 2, or a multiple of 64 over 128. + // Whichever is smaller. + const LLT &Ty = Query.Types[BigTyIdx]; + unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1); + if (NewSizeInBits >= 256) { + unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); + if (RoundedTo < NewSizeInBits) + NewSizeInBits = RoundedTo; + } + return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); + }) .legalIf([=](const LegalityQuery &Query) { const LLT &BigTy = Query.Types[BigTyIdx]; const LLT &LitTy = Query.Types[LitTyIdx]; @@ -760,43 +1066,56 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return BigTy.getSizeInBits() % 16 == 0 && LitTy.getSizeInBits() % 16 == 0 && - BigTy.getSizeInBits() <= 512; + BigTy.getSizeInBits() <= 1024; }) // Any vectors left are the wrong size. Scalarize them. .scalarize(0) .scalarize(1); } + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + computeTables(); verify(*ST.getInstrInfo()); } bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder, + MachineIRBuilder &B, GISelChangeObserver &Observer) const { switch (MI.getOpcode()) { case TargetOpcode::G_ADDRSPACE_CAST: - return legalizeAddrSpaceCast(MI, MRI, MIRBuilder); + return legalizeAddrSpaceCast(MI, MRI, B); case TargetOpcode::G_FRINT: - return legalizeFrint(MI, MRI, MIRBuilder); + return legalizeFrint(MI, MRI, B); case TargetOpcode::G_FCEIL: - return legalizeFceil(MI, MRI, MIRBuilder); + return legalizeFceil(MI, MRI, B); case TargetOpcode::G_INTRINSIC_TRUNC: - return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder); + return legalizeIntrinsicTrunc(MI, MRI, B); case TargetOpcode::G_SITOFP: - return legalizeITOFP(MI, MRI, MIRBuilder, true); + return legalizeITOFP(MI, MRI, B, true); case TargetOpcode::G_UITOFP: - return legalizeITOFP(MI, MRI, MIRBuilder, false); + return legalizeITOFP(MI, MRI, B, false); case TargetOpcode::G_FMINNUM: case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMINNUM_IEEE: case TargetOpcode::G_FMAXNUM_IEEE: - return legalizeMinNumMaxNum(MI, MRI, MIRBuilder); + return legalizeMinNumMaxNum(MI, MRI, B); case TargetOpcode::G_EXTRACT_VECTOR_ELT: - return legalizeExtractVectorElt(MI, MRI, MIRBuilder); + return legalizeExtractVectorElt(MI, MRI, B); case TargetOpcode::G_INSERT_VECTOR_ELT: - return legalizeInsertVectorElt(MI, MRI, MIRBuilder); + return legalizeInsertVectorElt(MI, MRI, B); + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FCOS: + return legalizeSinCos(MI, MRI, B); + case TargetOpcode::G_GLOBAL_VALUE: + return legalizeGlobalValue(MI, MRI, B); + case TargetOpcode::G_LOAD: + return legalizeLoad(MI, MRI, B, Observer); + case TargetOpcode::G_FMAD: + return legalizeFMad(MI, MRI, B); + case TargetOpcode::G_FDIV: + return legalizeFDIV(MI, MRI, B); default: return false; } @@ -807,11 +1126,13 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, Register AMDGPULegalizerInfo::getSegmentAperture( unsigned AS, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineIRBuilder &B) const { + MachineFunction &MF = B.getMF(); const GCNSubtarget &ST = MF.getSubtarget(); const LLT S32 = LLT::scalar(32); + assert(AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS); + if (ST.hasApertureRegs()) { // FIXME: Use inline constants (src_{shared, private}_base) instead of // getreg. @@ -829,13 +1150,13 @@ Register AMDGPULegalizerInfo::getSegmentAperture( Register ApertureReg = MRI.createGenericVirtualRegister(S32); Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32) + B.buildInstr(AMDGPU::S_GETREG_B32) .addDef(GetReg) .addImm(Encoding); MRI.setType(GetReg, S32); - auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1); - MIRBuilder.buildInstr(TargetOpcode::G_SHL) + auto ShiftAmt = B.buildConstant(S32, WidthM1 + 1); + B.buildInstr(TargetOpcode::G_SHL) .addDef(ApertureReg) .addUse(GetReg) .addUse(ShiftAmt.getReg(0)); @@ -846,8 +1167,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture( Register QueuePtr = MRI.createGenericVirtualRegister( LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); - // FIXME: Placeholder until we can track the input registers. - MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef); + const SIMachineFunctionInfo *MFI = MF.getInfo(); + if (!loadInputValue(QueuePtr, B, &MFI->getArgInfo().QueuePtr)) + return Register(); // Offset into amd_queue_t for group_segment_aperture_base_hi / // private_segment_aperture_base_hi. @@ -870,18 +1192,19 @@ Register AMDGPULegalizerInfo::getSegmentAperture( Register LoadResult = MRI.createGenericVirtualRegister(S32); Register LoadAddr; - MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset); - MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO); + B.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset); + B.buildLoad(LoadResult, LoadAddr, *MMO); return LoadResult; } bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineIRBuilder &B) const { + MachineFunction &MF = B.getMF(); - MIRBuilder.setInstr(MI); + B.setInstr(MI); + const LLT S32 = LLT::scalar(32); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -899,7 +1222,28 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( const GCNSubtarget &ST = MF.getSubtarget(); if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) { - MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST)); + MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST)); + return true; + } + + if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + // Truncate. + B.buildExtract(Dst, Src, 0); + MI.eraseFromParent(); + return true; + } + + if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + const SIMachineFunctionInfo *Info = MF.getInfo(); + uint32_t AddrHiVal = Info->get32BitAddressHighBits(); + + // FIXME: This is a bit ugly due to creating a merge of 2 pointers to + // another. Merge operands are required to be the same type, but creating an + // extra ptrtoint would be kind of pointless. + auto HighAddr = B.buildConstant( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal); + B.buildMerge(Dst, {Src, HighAddr.getReg(0)}); + MI.eraseFromParent(); return true; } @@ -908,47 +1252,52 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( DestAS == AMDGPUAS::PRIVATE_ADDRESS); unsigned NullVal = TM.getNullPointerValue(DestAS); - auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal); - auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0); + auto SegmentNull = B.buildConstant(DstTy, NullVal); + auto FlatNull = B.buildConstant(SrcTy, 0); Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy); // Extract low 32-bits of the pointer. - MIRBuilder.buildExtract(PtrLo32, Src, 0); + B.buildExtract(PtrLo32, Src, 0); Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0)); - MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0)); + B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0)); + B.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0)); MI.eraseFromParent(); return true; } - assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS || - SrcAS == AMDGPUAS::PRIVATE_ADDRESS); + if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS) + return false; + + if (!ST.hasFlatAddressSpace()) + return false; auto SegmentNull = - MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); + B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); auto FlatNull = - MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); + B.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); - Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder); + Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); + if (!ApertureReg.isValid()) + return false; Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0)); + B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0)); Register BuildPtr = MRI.createGenericVirtualRegister(DstTy); // Coerce the type of the low half of the result so we can use merge_values. - Register SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32)); - MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT) + Register SrcAsInt = MRI.createGenericVirtualRegister(S32); + B.buildInstr(TargetOpcode::G_PTRTOINT) .addDef(SrcAsInt) .addUse(Src); // TODO: Should we allow mismatched types but matching sizes in merges to // avoid the ptrtoint? - MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg}); - MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0)); + B.buildMerge(BuildPtr, {SrcAsInt, ApertureReg}); + B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0)); MI.eraseFromParent(); return true; @@ -956,8 +1305,8 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( bool AMDGPULegalizerInfo::legalizeFrint( MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - MIRBuilder.setInstr(MI); + MachineIRBuilder &B) const { + B.setInstr(MI); Register Src = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(Src); @@ -966,18 +1315,18 @@ bool AMDGPULegalizerInfo::legalizeFrint( APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52"); APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51"); - auto C1 = MIRBuilder.buildFConstant(Ty, C1Val); - auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src); + auto C1 = B.buildFConstant(Ty, C1Val); + auto CopySign = B.buildFCopysign(Ty, C1, Src); // TODO: Should this propagate fast-math-flags? - auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign); - auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign); + auto Tmp1 = B.buildFAdd(Ty, Src, CopySign); + auto Tmp2 = B.buildFSub(Ty, Tmp1, CopySign); - auto C2 = MIRBuilder.buildFConstant(Ty, C2Val); - auto Fabs = MIRBuilder.buildFAbs(Ty, Src); + auto C2 = B.buildFConstant(Ty, C2Val); + auto Fabs = B.buildFAbs(Ty, Src); - auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2); - MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2); + auto Cond = B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2); + B.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2); return true; } @@ -1124,7 +1473,7 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); - HelperBuilder.setMBB(*MI.getParent()); + HelperBuilder.setInstr(MI); return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized; } @@ -1187,6 +1536,194 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt( return true; } +bool AMDGPULegalizerInfo::legalizeSinCos( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned Flags = MI.getFlags(); + + Register TrigVal; + auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI); + if (ST.hasTrigReducedRange()) { + auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags); + TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false) + .addUse(MulVal.getReg(0)) + .setMIFlags(Flags).getReg(0); + } else + TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0); + + Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ? + Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos; + B.buildIntrinsic(TrigIntrin, makeArrayRef(DstReg), false) + .addUse(TrigVal) + .setMIFlags(Flags); + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::buildPCRelGlobalAddress( + Register DstReg, LLT PtrTy, + MachineIRBuilder &B, const GlobalValue *GV, + unsigned Offset, unsigned GAFlags) const { + // In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered + // to the following code sequence: + // + // For constant address space: + // s_getpc_b64 s[0:1] + // s_add_u32 s0, s0, $symbol + // s_addc_u32 s1, s1, 0 + // + // s_getpc_b64 returns the address of the s_add_u32 instruction and then + // a fixup or relocation is emitted to replace $symbol with a literal + // constant, which is a pc-relative offset from the encoding of the $symbol + // operand to the global variable. + // + // For global address space: + // s_getpc_b64 s[0:1] + // s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo + // s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi + // + // s_getpc_b64 returns the address of the s_add_u32 instruction and then + // fixups or relocations are emitted to replace $symbol@*@lo and + // $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant, + // which is a 64-bit pc-relative offset from the encoding of the $symbol + // operand to the global variable. + // + // What we want here is an offset from the value returned by s_getpc + // (which is the address of the s_add_u32 instruction) to the global + // variable, but since the encoding of $symbol starts 4 bytes after the start + // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too + // small. This requires us to add 4 to the global variable offset in order to + // compute the correct address. + + LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + + Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg : + B.getMRI()->createGenericVirtualRegister(ConstPtrTy); + + MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET) + .addDef(PCReg); + + MIB.addGlobalAddress(GV, Offset + 4, GAFlags); + if (GAFlags == SIInstrInfo::MO_NONE) + MIB.addImm(0); + else + MIB.addGlobalAddress(GV, Offset + 4, GAFlags + 1); + + B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass); + + if (PtrTy.getSizeInBits() == 32) + B.buildExtract(DstReg, PCReg, 0); + return true; + } + +bool AMDGPULegalizerInfo::legalizeGlobalValue( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned AS = Ty.getAddressSpace(); + + const GlobalValue *GV = MI.getOperand(1).getGlobal(); + MachineFunction &MF = B.getMF(); + SIMachineFunctionInfo *MFI = MF.getInfo(); + B.setInstr(MI); + + if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { + if (!MFI->isEntryFunction()) { + const Function &Fn = MF.getFunction(); + DiagnosticInfoUnsupported BadLDSDecl( + Fn, "local memory global used by non-kernel function", MI.getDebugLoc()); + Fn.getContext().diagnose(BadLDSDecl); + } + + // TODO: We could emit code to handle the initialization somewhere. + if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) { + B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV)); + MI.eraseFromParent(); + return true; + } + + const Function &Fn = MF.getFunction(); + DiagnosticInfoUnsupported BadInit( + Fn, "unsupported initializer for address space", MI.getDebugLoc()); + Fn.getContext().diagnose(BadInit); + return true; + } + + const SITargetLowering *TLI = ST.getTargetLowering(); + + if (TLI->shouldEmitFixup(GV)) { + buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0); + MI.eraseFromParent(); + return true; + } + + if (TLI->shouldEmitPCReloc(GV)) { + buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0, SIInstrInfo::MO_REL32); + MI.eraseFromParent(); + return true; + } + + LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + Register GOTAddr = MRI.createGenericVirtualRegister(PtrTy); + + MachineMemOperand *GOTMMO = MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + 8 /*Size*/, 8 /*Align*/); + + buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32); + + if (Ty.getSizeInBits() == 32) { + // Truncate if this is a 32-bit constant adrdess. + auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO); + B.buildExtract(DstReg, Load, 0); + } else + B.buildLoad(DstReg, GOTAddr, *GOTMMO); + + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::legalizeLoad( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, GISelChangeObserver &Observer) const { + B.setInstr(MI); + LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg()); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(Cast.getReg(0)); + Observer.changedInstr(MI); + return true; +} + +bool AMDGPULegalizerInfo::legalizeFMad( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + assert(Ty.isScalar()); + + // TODO: Always legal with future ftz flag. + if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals()) + return true; + if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals()) + return true; + + MachineFunction &MF = B.getMF(); + + MachineIRBuilder HelperBuilder(MI); + GISelObserverWrapper DummyObserver; + LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); + HelperBuilder.setMBB(*MI.getParent()); + return Helper.lowerFMad(MI) == LegalizerHelper::Legalized; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { @@ -1212,10 +1749,9 @@ Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI, bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg) const { - if (!Arg->isRegister()) + if (!Arg->isRegister() || !Arg->getRegister().isValid()) return false; // TODO: Handle these - assert(Arg->getRegister() != 0); assert(Arg->getRegister().isPhysical()); MachineRegisterInfo &MRI = *B.getMRI(); @@ -1229,19 +1765,30 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, const unsigned Mask = Arg->getMask(); const unsigned Shift = countTrailingZeros(Mask); - auto ShiftAmt = B.buildConstant(S32, Shift); - auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt); - B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift)); + Register AndMaskSrc = LiveIn; + + if (Shift != 0) { + auto ShiftAmt = B.buildConstant(S32, Shift); + AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0); + } + + B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift)); } else B.buildCopy(DstReg, LiveIn); // Insert the argument copy if it doens't already exist. // FIXME: It seems EmitLiveInCopies isn't called anywhere? if (!MRI.getVRegDef(LiveIn)) { + // FIXME: Should have scoped insert pt + MachineBasicBlock &OrigInsBB = B.getMBB(); + auto OrigInsPt = B.getInsertPt(); + MachineBasicBlock &EntryMBB = B.getMF().front(); EntryMBB.addLiveIn(Arg->getRegister()); B.setInsertPt(EntryMBB, EntryMBB.begin()); B.buildCopy(LiveIn, Arg->getRegister()); + + B.setInsertPt(OrigInsBB, OrigInsPt); } return true; @@ -1272,6 +1819,113 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( return false; } +bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + if (legalizeFastUnsafeFDIV(MI, MRI, B)) + return true; + + return false; +} + +bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + uint16_t Flags = MI.getFlags(); + + LLT ResTy = MRI.getType(Res); + LLT S32 = LLT::scalar(32); + LLT S64 = LLT::scalar(64); + + const MachineFunction &MF = B.getMF(); + bool Unsafe = + MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp); + + if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64) + return false; + + if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals()) + return false; + + if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) { + // 1 / x -> RCP(x) + if (CLHS->isExactlyValue(1.0)) { + B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false) + .addUse(RHS) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; + } + + // -1 / x -> RCP( FNEG(x) ) + if (CLHS->isExactlyValue(-1.0)) { + auto FNeg = B.buildFNeg(ResTy, RHS, Flags); + B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false) + .addUse(FNeg.getReg(0)) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; + } + } + + // x / y -> x * (1.0 / y) + if (Unsafe) { + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false) + .addUse(RHS) + .setMIFlags(Flags); + B.buildFMul(Res, LHS, RCP, Flags); + + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + uint16_t Flags = MI.getFlags(); + + LLT S32 = LLT::scalar(32); + LLT S1 = LLT::scalar(1); + + auto Abs = B.buildFAbs(S32, RHS, Flags); + const APFloat C0Val(1.0f); + + auto C0 = B.buildConstant(S32, 0x6f800000); + auto C1 = B.buildConstant(S32, 0x2f800000); + auto C2 = B.buildConstant(S32, FloatToBits(1.0f)); + + auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags); + auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags); + + auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags); + + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false) + .addUse(Mul0.getReg(0)) + .setMIFlags(Flags); + + auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags); + + B.buildFMul(Res, Sel, Mul1, Flags); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -1306,11 +1960,79 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + unsigned AddrSpace) const { + B.setInstr(MI); + Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B); + auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32); + B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg); + MI.eraseFromParent(); + return true; +} + +/// Handle register layout difference for f16 images for some subtargets. +Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Reg) const { + if (!ST.hasUnpackedD16VMem()) + return Reg; + + const LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + LLT StoreVT = MRI.getType(Reg); + assert(StoreVT.isVector() && StoreVT.getElementType() == S16); + + auto Unmerge = B.buildUnmerge(S16, Reg); + + SmallVector WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0)); + + int NumElts = StoreVT.getNumElements(); + + return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0); +} + +bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + bool IsFormat) const { + // TODO: Reject f16 format on targets where unsupported. + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + B.setInstr(MI); + + const LLT S32 = LLT::scalar(32); + const LLT S16 = LLT::scalar(16); + + // Fixup illegal register types for i8 stores. + if (Ty == LLT::scalar(8) || Ty == S16) { + Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0); + MI.getOperand(1).setReg(AnyExt); + return true; + } + + if (Ty.isVector()) { + if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) { + if (IsFormat) + MI.getOperand(1).setReg(handleD16VData(B, MRI, VData)); + return true; + } + + return Ty.getElementType() == S32 && Ty.getNumElements() <= 4; + } + + return Ty == S32; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { // Replace the use G_BRCOND with the exec manipulate and branch pseudos. - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_if: { if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) { const SIRegisterInfo *TRI @@ -1386,6 +2108,22 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, case Intrinsic::amdgcn_dispatch_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_ID); + case Intrinsic::amdgcn_fdiv_fast: + return legalizeFDIVFastIntrin(MI, MRI, B); + case Intrinsic::amdgcn_is_shared: + return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS); + case Intrinsic::amdgcn_is_private: + return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS); + case Intrinsic::amdgcn_wavefrontsize: { + B.setInstr(MI); + B.buildConstant(MI.getOperand(0), ST.getWavefrontSize()); + MI.eraseFromParent(); + return true; + } + case Intrinsic::amdgcn_raw_buffer_store: + return legalizeRawBufferStore(MI, MRI, B, false); + case Intrinsic::amdgcn_raw_buffer_store_format: + return legalizeRawBufferStore(MI, MRI, B, true); default: return true; } diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 3f1cc1d265dd..d0fba23a8686 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "AMDGPUArgumentUsageInfo.h" +#include "SIInstrInfo.h" namespace llvm { @@ -32,29 +33,44 @@ public: const GCNTargetMachine &TM); bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder, + MachineIRBuilder &B, GISelChangeObserver &Observer) const override; Register getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder, bool Signed) const; + MachineIRBuilder &B, bool Signed) const; bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; + bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + + bool buildPCRelGlobalAddress( + Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV, + unsigned Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const; + + bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + GISelChangeObserver &Observer) const; + + bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; @@ -65,10 +81,24 @@ public: MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, unsigned AddrSpace) const; + + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Reg) const; + bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, bool IsFormat) const; bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const override; + MachineIRBuilder &B) const override; }; } // End llvm namespace. diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp index ce0a9db7c7f4..2c94e0046651 100644 --- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -48,18 +49,10 @@ static cl::list UseNative("amdgpu-use-native", cl::CommaSeparated, cl::ValueOptional, cl::Hidden); -#define MATH_PI 3.14159265358979323846264338327950288419716939937511 -#define MATH_E 2.71828182845904523536028747135266249775724709369996 -#define MATH_SQRT2 1.41421356237309504880168872420969807856967187537695 - -#define MATH_LOG2E 1.4426950408889634073599246810018921374266459541529859 -#define MATH_LOG10E 0.4342944819032518276511289189166050822943970058036665 -// Value of log2(10) -#define MATH_LOG2_10 3.3219280948873623478703194294893901758648313930245806 -// Value of 1 / log2(10) -#define MATH_RLOG2_10 0.3010299956639811952137388947244930267681898814621085 -// Value of 1 / M_LOG2E_F = 1 / log2(e) -#define MATH_RLOG2_E 0.6931471805599453094172321214581765680755001343602552 +#define MATH_PI numbers::pi +#define MATH_E numbers::e +#define MATH_SQRT2 numbers::sqrt2 +#define MATH_SQRT1_2 numbers::inv_sqrt2 namespace llvm { @@ -254,8 +247,8 @@ struct TableEntry { /* a list of {result, input} */ static const TableEntry tbl_acos[] = { - {MATH_PI/2.0, 0.0}, - {MATH_PI/2.0, -0.0}, + {MATH_PI / 2.0, 0.0}, + {MATH_PI / 2.0, -0.0}, {0.0, 1.0}, {MATH_PI, -1.0} }; @@ -271,8 +264,8 @@ static const TableEntry tbl_acospi[] = { static const TableEntry tbl_asin[] = { {0.0, 0.0}, {-0.0, -0.0}, - {MATH_PI/2.0, 1.0}, - {-MATH_PI/2.0, -1.0} + {MATH_PI / 2.0, 1.0}, + {-MATH_PI / 2.0, -1.0} }; static const TableEntry tbl_asinh[] = { {0.0, 0.0}, @@ -287,8 +280,8 @@ static const TableEntry tbl_asinpi[] = { static const TableEntry tbl_atan[] = { {0.0, 0.0}, {-0.0, -0.0}, - {MATH_PI/4.0, 1.0}, - {-MATH_PI/4.0, -1.0} + {MATH_PI / 4.0, 1.0}, + {-MATH_PI / 4.0, -1.0} }; static const TableEntry tbl_atanh[] = { {0.0, 0.0}, @@ -359,7 +352,7 @@ static const TableEntry tbl_log10[] = { }; static const TableEntry tbl_rsqrt[] = { {1.0, 1.0}, - {1.0/MATH_SQRT2, 2.0} + {MATH_SQRT1_2, 2.0} }; static const TableEntry tbl_sin[] = { {0.0, 0.0}, @@ -868,7 +861,7 @@ static double log2(double V) { #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L return ::log2(V); #else - return log(V) / 0.693147180559945309417; + return log(V) / numbers::ln2; #endif } } @@ -1430,8 +1423,8 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B, B.SetInsertPoint(&*ItNew); AllocaInst *Alloc = B.CreateAlloca(RetType, 0, std::string(prefix) + UI->getName()); - Alloc->setAlignment(UCallee->getParent()->getDataLayout() - .getTypeAllocSize(RetType)); + Alloc->setAlignment(MaybeAlign( + UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType))); return Alloc; } diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp index a5bac25701a0..e1ae496d9cbc 100644 --- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -55,7 +55,7 @@ enum EManglingParam { }; struct ManglingRule { - StringRef const Name; + const char *Name; unsigned char Lead[2]; unsigned char Param[5]; @@ -69,7 +69,7 @@ struct ManglingRule { // Information about library functions with unmangled names. class UnmangledFuncInfo { - StringRef const Name; + const char *Name; unsigned NumArgs; // Table for all lib functions with unmangled names. @@ -82,7 +82,7 @@ class UnmangledFuncInfo { public: using ID = AMDGPULibFunc::EFuncId; - UnmangledFuncInfo(StringRef _Name, unsigned _NumArgs) + constexpr UnmangledFuncInfo(const char *_Name, unsigned _NumArgs) : Name(_Name), NumArgs(_NumArgs) {} // Get index to Table by function name. static bool lookup(StringRef Name, ID &Id); @@ -133,8 +133,8 @@ unsigned ManglingRule::getNumArgs() const { // E_ANY - use prev lead type, E_CONSTPTR_ANY - make const pointer out of // prev lead type, etc. see ParamIterator::getNextParam() for details. -static const ManglingRule manglingRules[] = { -{ StringRef(), {0}, {0} }, +static constexpr ManglingRule manglingRules[] = { +{ "", {0}, {0} }, { "abs" , {1}, {E_ANY}}, { "abs_diff" , {1}, {E_ANY,E_COPY}}, { "acos" , {1}, {E_ANY}}, @@ -682,9 +682,9 @@ bool AMDGPULibFunc::parse(StringRef FuncName, AMDGPULibFunc &F) { } if (eatTerm(FuncName, "_Z")) - F.Impl = make_unique(); + F.Impl = std::make_unique(); else - F.Impl = make_unique(); + F.Impl = std::make_unique(); if (F.Impl->parseFuncName(FuncName)) return true; diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index 5dd5b3691e0a..e64542a395f0 100644 --- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -72,10 +72,10 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { BasicBlock &EntryBlock = *F.begin(); IRBuilder<> Builder(&*EntryBlock.begin()); - const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary + const Align KernArgBaseAlign(16); // FIXME: Increase if necessary const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F); - unsigned MaxAlign; + Align MaxAlign; // FIXME: Alignment is broken broken with explicit arg offset.; const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign); if (TotalKernArgSize == 0) @@ -94,12 +94,12 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { for (Argument &Arg : F.args()) { Type *ArgTy = Arg.getType(); - unsigned Align = DL.getABITypeAlignment(ArgTy); + unsigned ABITypeAlign = DL.getABITypeAlignment(ArgTy); unsigned Size = DL.getTypeSizeInBits(ArgTy); unsigned AllocSize = DL.getTypeAllocSize(ArgTy); - uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset; - ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; + uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset; + ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize; if (Arg.use_empty()) continue; @@ -128,8 +128,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { int64_t AlignDownOffset = alignDown(EltOffset, 4); int64_t OffsetDiff = EltOffset - AlignDownOffset; - unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset, - KernArgBaseAlign); + Align AdjustedAlign = commonAlignment( + KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset); Value *ArgPtr; Type *AdjustedArgTy; @@ -160,7 +160,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS), ArgPtr->getName() + ".cast"); LoadInst *Load = - Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign); + Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign.value()); Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {})); MDBuilder MDB(Ctx); @@ -220,8 +220,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { } KernArgSegment->addAttribute( - AttributeList::ReturnIndex, - Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign))); + AttributeList::ReturnIndex, + Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign))); return true; } diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index ae4c32c258a7..3760aed87a43 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -211,6 +211,10 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { lowerOperand(MO, MCOp); OutMI.addOperand(MCOp); } + + int FIIdx = AMDGPU::getNamedOperandIdx(MCOpcode, AMDGPU::OpName::fi); + if (FIIdx >= (int)OutMI.getNumOperands()) + OutMI.addOperand(MCOperand::createImm(0)); } bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 237490957058..ba72f71f4322 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -694,7 +694,7 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo) { - if (TRI->isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) << "\n"); // If this is a source register to a PHI we are chaining, it @@ -734,7 +734,7 @@ void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo) { - if (TRI->isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) << "\n"); for (auto &UI : MRI->use_operands(Reg)) { @@ -949,7 +949,7 @@ void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, (IncludeLoopPHI && IsLoopPHI); if (ShouldReplace) { - if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + if (Register::isPhysicalRegister(NewRegister)) { LLVM_DEBUG(dbgs() << "Trying to substitute physical register: " << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); @@ -1016,13 +1016,15 @@ bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { // before are no longer register kills. void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + (void)TRI; // It's used by LLVM_DEBUG. + for (auto MBBI : MBBs) { MachineBasicBlock *MBB = MBBI; for (auto &II : *MBB) { for (auto &RI : II.uses()) { if (RI.isReg()) { - unsigned Reg = RI.getReg(); - if (TRI->isVirtualRegister(Reg)) { + Register Reg = RI.getReg(); + if (Register::isVirtualRegister(Reg)) { if (hasNoDef(Reg, MRI)) continue; if (!MRI->hasOneDef(Reg)) { @@ -1402,7 +1404,7 @@ void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( MachineInstr &PHI, SmallVector *RegionIndices) { unsigned DestReg = getPHIDestReg(PHI); - unsigned LinearizeDestReg = + Register LinearizeDestReg = MRI->createVirtualRegister(MRI->getRegClass(DestReg)); PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); @@ -1890,7 +1892,7 @@ void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( if (!Cond[0].isReg()) return; - unsigned CondReg = Cond[0].getReg(); + Register CondReg = Cond[0].getReg(); for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { (*UI).setIsKill(false); } @@ -1929,8 +1931,8 @@ void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *Co BBSelectReg, TrueBB->getNumber()); } else { const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); - unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); - unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); + Register TrueBBReg = MRI->createVirtualRegister(RegClass); + Register FalseBBReg = MRI->createVirtualRegister(RegClass); TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, TrueBBReg, TrueBB->getNumber()); TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, @@ -1996,7 +1998,7 @@ void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); } const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); - unsigned NextDestReg = MRI->createVirtualRegister(RegClass); + Register NextDestReg = MRI->createVirtualRegister(RegClass); bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; LLVM_DEBUG(dbgs() << "Insert Chained PHI\n"); insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, @@ -2056,8 +2058,8 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, // register, unless it is the outgoing BB select register. We have // already creaed phi nodes for these. const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); - unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); - unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); + Register PHIDestReg = MRI->createVirtualRegister(RegClass); + Register IfSourceReg = MRI->createVirtualRegister(RegClass); // Create initializer, this value is never used, but is needed // to satisfy SSA. LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n"); @@ -2172,7 +2174,7 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); const TargetRegisterClass *RegClass = MRI->getRegClass(CurrentBackedgeReg); - unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); + Register NewBackedgeReg = MRI->createVirtualRegister(RegClass); MachineInstrBuilder BackedgePHI = BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, TII->get(TargetOpcode::PHI), NewBackedgeReg); @@ -2230,7 +2232,7 @@ void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, I != E;) { MachineOperand &O = *I; ++I; - if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + if (Register::isPhysicalRegister(NewRegister)) { LLVM_DEBUG(dbgs() << "Trying to substitute physical register: " << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); @@ -2309,7 +2311,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( } else { // Handle internal block. const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); - unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); + Register CodeBBSelectReg = MRI->createVirtualRegister(RegClass); rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, @@ -2446,7 +2448,7 @@ void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, } const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); - unsigned NewDestReg = MRI->createVirtualRegister(RegClass); + Register NewDestReg = MRI->createVirtualRegister(RegClass); LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); MachineInstrBuilder MIB = BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), @@ -2734,9 +2736,9 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { } const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); unsigned InReg = LRegion->getBBSelectRegIn(); - unsigned InnerSelectReg = + Register InnerSelectReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); - unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); + Register NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); TII->materializeImmediate(*(LRegion->getEntry()), LRegion->getEntry()->getFirstTerminator(), DL, NewInReg, Region->getEntry()->getNumber()); diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 0d3a1f1a769f..89ca702f577d 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -17,7 +17,6 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), LocalMemoryObjects(), ExplicitKernArgSize(0), - MaxKernArgAlign(0), LDSSize(0), IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())), NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath), diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 52987e2fa411..9818ab1ef148 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -23,7 +23,7 @@ class AMDGPUMachineFunction : public MachineFunctionInfo { protected: uint64_t ExplicitKernArgSize; // Cache for this. - unsigned MaxKernArgAlign; // Cache for this. + Align MaxKernArgAlign; // Cache for this. /// Number of bytes in the LDS that are being used. unsigned LDSSize; @@ -47,9 +47,7 @@ public: return ExplicitKernArgSize; } - unsigned getMaxKernArgAlign() const { - return MaxKernArgAlign; - } + unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); } unsigned getLDSSize() const { return LDSSize; diff --git a/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp new file mode 100644 index 000000000000..5250bf455d71 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -0,0 +1,592 @@ +//=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +// +// The pass bind printfs to a kernel arg pointer that will be bound to a buffer +// later by the runtime. +// +// This pass traverses the functions in the module and converts +// each call to printf to a sequence of operations that +// store the following into the printf buffer: +// - format string (passed as a module's metadata unique ID) +// - bitwise copies of printf arguments +// The backend passes will need to store metadata in the kernel +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "printfToRuntime" +#define DWORD_ALIGN 4 + +namespace { +class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final + : public ModulePass { + +public: + static char ID; + + explicit AMDGPUPrintfRuntimeBinding(); + +private: + bool runOnModule(Module &M) override; + void getConversionSpecifiers(SmallVectorImpl &OpConvSpecifiers, + StringRef fmt, size_t num_ops) const; + + bool shouldPrintAsStr(char Specifier, Type *OpType) const; + bool + lowerPrintfForGpu(Module &M, + function_ref GetTLI); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + + Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) { + return SimplifyInstruction(I, {*TD, TLI, DT}); + } + + const DataLayout *TD; + const DominatorTree *DT; + SmallVector Printfs; +}; +} // namespace + +char AMDGPUPrintfRuntimeBinding::ID = 0; + +INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, + "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", + "AMDGPU Printf lowering", false, false) + +char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; + +namespace llvm { +ModulePass *createAMDGPUPrintfRuntimeBinding() { + return new AMDGPUPrintfRuntimeBinding(); +} +} // namespace llvm + +AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() + : ModulePass(ID), TD(nullptr), DT(nullptr) { + initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); +} + +void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( + SmallVectorImpl &OpConvSpecifiers, StringRef Fmt, + size_t NumOps) const { + // not all format characters are collected. + // At this time the format characters of interest + // are %p and %s, which use to know if we + // are either storing a literal string or a + // pointer to the printf buffer. + static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; + size_t CurFmtSpecifierIdx = 0; + size_t PrevFmtSpecifierIdx = 0; + + while ((CurFmtSpecifierIdx = Fmt.find_first_of( + ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { + bool ArgDump = false; + StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, + CurFmtSpecifierIdx - PrevFmtSpecifierIdx); + size_t pTag = CurFmt.find_last_of("%"); + if (pTag != StringRef::npos) { + ArgDump = true; + while (pTag && CurFmt[--pTag] == '%') { + ArgDump = !ArgDump; + } + } + + if (ArgDump) + OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); + + PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; + } +} + +bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, + Type *OpType) const { + if (Specifier != 's') + return false; + const PointerType *PT = dyn_cast(OpType); + if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + return false; + Type *ElemType = PT->getContainedType(0); + if (ElemType->getTypeID() != Type::IntegerTyID) + return false; + IntegerType *ElemIType = cast(ElemType); + return ElemIType->getBitWidth() == 8; +} + +bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( + Module &M, function_ref GetTLI) { + LLVMContext &Ctx = M.getContext(); + IRBuilder<> Builder(Ctx); + Type *I32Ty = Type::getInt32Ty(Ctx); + unsigned UniqID = 0; + // NB: This is important for this string size to be divizable by 4 + const char NonLiteralStr[4] = "???"; + + for (auto CI : Printfs) { + unsigned NumOps = CI->getNumArgOperands(); + + SmallString<16> OpConvSpecifiers; + Value *Op = CI->getArgOperand(0); + + if (auto LI = dyn_cast(Op)) { + Op = LI->getPointerOperand(); + for (auto Use : Op->users()) { + if (auto SI = dyn_cast(Use)) { + Op = SI->getValueOperand(); + break; + } + } + } + + if (auto I = dyn_cast(Op)) { + Value *Op_simplified = simplify(I, &GetTLI(*I->getFunction())); + if (Op_simplified) + Op = Op_simplified; + } + + ConstantExpr *ConstExpr = dyn_cast(Op); + + if (ConstExpr) { + GlobalVariable *GVar = dyn_cast(ConstExpr->getOperand(0)); + + StringRef Str("unknown"); + if (GVar && GVar->hasInitializer()) { + auto Init = GVar->getInitializer(); + if (auto CA = dyn_cast(Init)) { + if (CA->isString()) + Str = CA->getAsCString(); + } else if (isa(Init)) { + Str = ""; + } + // + // we need this call to ascertain + // that we are printing a string + // or a pointer. It takes out the + // specifiers and fills up the first + // arg + getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1); + } + // Add metadata for the string + std::string AStreamHolder; + raw_string_ostream Sizes(AStreamHolder); + int Sum = DWORD_ALIGN; + Sizes << CI->getNumArgOperands() - 1; + Sizes << ':'; + for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && + ArgCount <= OpConvSpecifiers.size(); + ArgCount++) { + Value *Arg = CI->getArgOperand(ArgCount); + Type *ArgType = Arg->getType(); + unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType); + ArgSize = ArgSize / 8; + // + // ArgSize by design should be a multiple of DWORD_ALIGN, + // expand the arguments that do not follow this rule. + // + if (ArgSize % DWORD_ALIGN != 0) { + llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx); + VectorType *LLVMVecType = llvm::dyn_cast(ArgType); + int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1; + if (LLVMVecType && NumElem > 1) + ResType = llvm::VectorType::get(ResType, NumElem); + Builder.SetInsertPoint(CI); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + if (OpConvSpecifiers[ArgCount - 1] == 'x' || + OpConvSpecifiers[ArgCount - 1] == 'X' || + OpConvSpecifiers[ArgCount - 1] == 'u' || + OpConvSpecifiers[ArgCount - 1] == 'o') + Arg = Builder.CreateZExt(Arg, ResType); + else + Arg = Builder.CreateSExt(Arg, ResType); + ArgType = Arg->getType(); + ArgSize = TD->getTypeAllocSizeInBits(ArgType); + ArgSize = ArgSize / 8; + CI->setOperand(ArgCount, Arg); + } + if (OpConvSpecifiers[ArgCount - 1] == 'f') { + ConstantFP *FpCons = dyn_cast(Arg); + if (FpCons) + ArgSize = 4; + else { + FPExtInst *FpExt = dyn_cast(Arg); + if (FpExt && FpExt->getType()->isDoubleTy() && + FpExt->getOperand(0)->getType()->isFloatTy()) + ArgSize = 4; + } + } + if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { + if (ConstantExpr *ConstExpr = dyn_cast(Arg)) { + GlobalVariable *GV = + dyn_cast(ConstExpr->getOperand(0)); + if (GV && GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + ConstantDataArray *CA = dyn_cast(Init); + if (Init->isZeroValue() || CA->isString()) { + size_t SizeStr = Init->isZeroValue() + ? 1 + : (strlen(CA->getAsCString().data()) + 1); + size_t Rem = SizeStr % DWORD_ALIGN; + size_t NSizeStr = 0; + LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr + << '\n'); + if (Rem) { + NSizeStr = SizeStr + (DWORD_ALIGN - Rem); + } else { + NSizeStr = SizeStr; + } + ArgSize = NSizeStr; + } + } else { + ArgSize = sizeof(NonLiteralStr); + } + } else { + ArgSize = sizeof(NonLiteralStr); + } + } + LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize + << " for type: " << *ArgType << '\n'); + Sizes << ArgSize << ':'; + Sum += ArgSize; + } + LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str() + << '\n'); + for (size_t I = 0; I < Str.size(); ++I) { + // Rest of the C escape sequences (e.g. \') are handled correctly + // by the MDParser + switch (Str[I]) { + case '\a': + Sizes << "\\a"; + break; + case '\b': + Sizes << "\\b"; + break; + case '\f': + Sizes << "\\f"; + break; + case '\n': + Sizes << "\\n"; + break; + case '\r': + Sizes << "\\r"; + break; + case '\v': + Sizes << "\\v"; + break; + case ':': + // ':' cannot be scanned by Flex, as it is defined as a delimiter + // Replace it with it's octal representation \72 + Sizes << "\\72"; + break; + default: + Sizes << Str[I]; + break; + } + } + + // Insert the printf_alloc call + Builder.SetInsertPoint(CI); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, + Attribute::NoUnwind); + + Type *SizetTy = Type::getInt32Ty(Ctx); + + Type *Tys_alloc[1] = {SizetTy}; + Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1); + FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); + FunctionCallee PrintfAllocFn = + M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); + + LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n'); + std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str(); + MDString *fmtStrArray = MDString::get(Ctx, fmtstr); + + // Instead of creating global variables, the + // printf format strings are extracted + // and passed as metadata. This avoids + // polluting llvm's symbol tables in this module. + // Metadata is going to be extracted + // by the backend passes and inserted + // into the OpenCL binary as appropriate. + StringRef amd("llvm.printf.fmts"); + NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd); + MDNode *myMD = MDNode::get(Ctx, fmtStrArray); + metaD->addOperand(myMD); + Value *sumC = ConstantInt::get(SizetTy, Sum, false); + SmallVector alloc_args; + alloc_args.push_back(sumC); + CallInst *pcall = + CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); + + // + // Insert code to split basicblock with a + // piece of hammock code. + // basicblock splits after buffer overflow check + // + ConstantPointerNull *zeroIntPtr = + ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1)); + ICmpInst *cmp = + dyn_cast(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); + if (!CI->use_empty()) { + Value *result = + Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); + CI->replaceAllUsesWith(result); + } + SplitBlock(CI->getParent(), cmp); + Instruction *Brnch = + SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); + + Builder.SetInsertPoint(Brnch); + + // store unique printf id in the buffer + // + SmallVector ZeroIdxList; + ConstantInt *zeroInt = + ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); + ZeroIdxList.push_back(zeroInt); + + GetElementPtrInst *BufferIdx = + dyn_cast(GetElementPtrInst::Create( + nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch)); + + Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); + Value *id_gep_cast = + new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); + + StoreInst *stbuff = + new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast); + stbuff->insertBefore(Brnch); // to Remove unused variable warning + + SmallVector FourthIdxList; + ConstantInt *fourInt = + ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10)); + + FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id + // the following GEP is the buffer pointer + BufferIdx = cast(GetElementPtrInst::Create( + nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch)); + + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int64Ty = Type::getInt64Ty(Ctx); + for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && + ArgCount <= OpConvSpecifiers.size(); + ArgCount++) { + Value *Arg = CI->getArgOperand(ArgCount); + Type *ArgType = Arg->getType(); + SmallVector WhatToStore; + if (ArgType->isFPOrFPVectorTy() && + (ArgType->getTypeID() != Type::VectorTyID)) { + Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; + if (OpConvSpecifiers[ArgCount - 1] == 'f') { + ConstantFP *fpCons = dyn_cast(Arg); + if (fpCons) { + APFloat Val(fpCons->getValueAPF()); + bool Lost = false; + Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &Lost); + Arg = ConstantFP::get(Ctx, Val); + IType = Int32Ty; + } else { + FPExtInst *FpExt = dyn_cast(Arg); + if (FpExt && FpExt->getType()->isDoubleTy() && + FpExt->getOperand(0)->getType()->isFloatTy()) { + Arg = FpExt->getOperand(0); + IType = Int32Ty; + } + } + } + Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch); + WhatToStore.push_back(Arg); + } else if (ArgType->getTypeID() == Type::PointerTyID) { + if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { + const char *S = NonLiteralStr; + if (ConstantExpr *ConstExpr = dyn_cast(Arg)) { + GlobalVariable *GV = + dyn_cast(ConstExpr->getOperand(0)); + if (GV && GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + ConstantDataArray *CA = dyn_cast(Init); + if (Init->isZeroValue() || CA->isString()) { + S = Init->isZeroValue() ? "" : CA->getAsCString().data(); + } + } + } + size_t SizeStr = strlen(S) + 1; + size_t Rem = SizeStr % DWORD_ALIGN; + size_t NSizeStr = 0; + if (Rem) { + NSizeStr = SizeStr + (DWORD_ALIGN - Rem); + } else { + NSizeStr = SizeStr; + } + if (S[0]) { + char *MyNewStr = new char[NSizeStr](); + strcpy(MyNewStr, S); + int NumInts = NSizeStr / 4; + int CharC = 0; + while (NumInts) { + int ANum = *(int *)(MyNewStr + CharC); + CharC += 4; + NumInts--; + Value *ANumV = ConstantInt::get(Int32Ty, ANum, false); + WhatToStore.push_back(ANumV); + } + delete[] MyNewStr; + } else { + // Empty string, give a hint to RT it is no NULL + Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); + WhatToStore.push_back(ANumV); + } + } else { + uint64_t Size = TD->getTypeAllocSizeInBits(ArgType); + assert((Size == 32 || Size == 64) && "unsupported size"); + Type *DstType = (Size == 32) ? Int32Ty : Int64Ty; + Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch); + WhatToStore.push_back(Arg); + } + } else if (ArgType->getTypeID() == Type::VectorTyID) { + Type *IType = NULL; + uint32_t EleCount = cast(ArgType)->getNumElements(); + uint32_t EleSize = ArgType->getScalarSizeInBits(); + uint32_t TotalSize = EleCount * EleSize; + if (EleCount == 3) { + IntegerType *Int32Ty = Type::getInt32Ty(ArgType->getContext()); + Constant *Indices[4] = { + ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 2), ConstantInt::get(Int32Ty, 2)}; + Constant *Mask = ConstantVector::get(Indices); + ShuffleVectorInst *Shuffle = new ShuffleVectorInst(Arg, Arg, Mask); + Shuffle->insertBefore(Brnch); + Arg = Shuffle; + ArgType = Arg->getType(); + TotalSize += EleSize; + } + switch (EleSize) { + default: + EleCount = TotalSize / 64; + IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + break; + case 8: + if (EleCount >= 8) { + EleCount = TotalSize / 64; + IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + } else if (EleCount >= 3) { + EleCount = 1; + IType = dyn_cast(Type::getInt32Ty(ArgType->getContext())); + } else { + EleCount = 1; + IType = dyn_cast(Type::getInt16Ty(ArgType->getContext())); + } + break; + case 16: + if (EleCount >= 3) { + EleCount = TotalSize / 64; + IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + } else { + EleCount = 1; + IType = dyn_cast(Type::getInt32Ty(ArgType->getContext())); + } + break; + } + if (EleCount > 1) { + IType = dyn_cast(VectorType::get(IType, EleCount)); + } + Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch); + WhatToStore.push_back(Arg); + } else { + WhatToStore.push_back(Arg); + } + for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { + Value *TheBtCast = WhatToStore[I]; + unsigned ArgSize = + TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8; + SmallVector BuffOffset; + BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize)); + + Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1); + Value *CastedGEP = + new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch); + StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch); + LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n" + << *StBuff << '\n'); + (void)StBuff; + if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands()) + break; + BufferIdx = dyn_cast(GetElementPtrInst::Create( + nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch)); + LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" + << *BufferIdx << '\n'); + } + } + } + } + + // erase the printf calls + for (auto CI : Printfs) + CI->eraseFromParent(); + + Printfs.clear(); + return true; +} + +bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { + Triple TT(M.getTargetTriple()); + if (TT.getArch() == Triple::r600) + return false; + + auto PrintfFunction = M.getFunction("printf"); + if (!PrintfFunction) + return false; + + for (auto &U : PrintfFunction->uses()) { + if (auto *CI = dyn_cast(U.getUser())) { + if (CI->isCallee(&U)) + Printfs.push_back(CI); + } + } + + if (Printfs.empty()) + return false; + + TD = &M.getDataLayout(); + auto DTWP = getAnalysisIfAvailable(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + + return lowerPrintfForGpu(M, GetTLI); +} diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index e4c9d6685d4a..3e9dcca114a3 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -801,7 +801,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - GV->setAlignment(I.getAlignment()); + GV->setAlignment(MaybeAlign(I.getAlignment())); Value *TCntY, *TCntZ; diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 815cbc5e26ee..4d78188b3dc3 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -17,9 +17,9 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -33,6 +33,7 @@ #include "AMDGPUGenRegisterBankInfo.def" using namespace llvm; +using namespace MIPatternMatch; namespace { @@ -84,9 +85,11 @@ public: }; } -AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) +AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterBankInfo(), - TRI(static_cast(&TRI)) { + Subtarget(ST), + TRI(Subtarget.getRegisterInfo()), + TII(Subtarget.getInstrInfo()) { // HACK: Until this is fully tablegen'd. static bool AlreadyInit = false; @@ -163,11 +166,10 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost( const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( const TargetRegisterClass &RC) const { + if (&RC == &AMDGPU::SReg_1RegClass) + return AMDGPU::VCCRegBank; - if (TRI->isSGPRClass(&RC)) - return getRegBank(AMDGPU::SGPRRegBankID); - - return getRegBank(AMDGPU::VGPRRegBankID); + return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank; } template @@ -192,7 +194,8 @@ AMDGPURegisterBankInfo::addMappingFromTable( Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI); } - unsigned MappingID = 0; + // getInstrMapping's default mapping uses ID 1, so start at 2. + unsigned MappingID = 2; for (const auto &Entry : Table) { for (unsigned I = 0; I < NumOps; ++I) { int OpIdx = RegSrcOpIdx[I]; @@ -210,7 +213,7 @@ AMDGPURegisterBankInfo::addMappingFromTable( RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic( const MachineInstr &MI, const MachineRegisterInfo &MRI) const { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_readlane: { static const OpRegBankEntry<3> Table[2] = { // Perfectly legal. @@ -251,7 +254,7 @@ RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( const MachineInstr &MI, const MachineRegisterInfo &MRI) const { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_buffer_load: { static const OpRegBankEntry<3> Table[4] = { // Perfectly legal. @@ -303,6 +306,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( } case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { + // FIXME: Should have no register for immediate static const OpRegBankEntry<1> Table[2] = { // Perfectly legal. { { AMDGPU::SGPRRegBankID }, 1 }, @@ -319,12 +323,15 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( } } -static bool isInstrUniform(const MachineInstr &MI) { +// FIXME: Returns uniform if there's no source value information. This is +// probably wrong. +static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) { if (!MI.hasOneMemOperand()) return false; const MachineMemOperand *MMO = *MI.memoperands_begin(); - return AMDGPUInstrInfo::isUniformMMO(MMO); + return MMO->getSize() >= 4 && MMO->getAlignment() >= 4 && + AMDGPUInstrInfo::isUniformMMO(MMO); } RegisterBankInfo::InstructionMappings @@ -337,6 +344,31 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( InstructionMappings AltMappings; switch (MI.getOpcode()) { + case TargetOpcode::G_CONSTANT: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + if (Size == 1) { + static const OpRegBankEntry<1> Table[4] = { + { { AMDGPU::VGPRRegBankID }, 1 }, + { { AMDGPU::SGPRRegBankID }, 1 }, + { { AMDGPU::VCCRegBankID }, 1 }, + { { AMDGPU::SCCRegBankID }, 1 } + }; + + return addMappingFromTable<1>(MI, MRI, {{ 0 }}, Table); + } + + LLVM_FALLTHROUGH; + } + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FRAME_INDEX: + case TargetOpcode::G_GLOBAL_VALUE: { + static const OpRegBankEntry<1> Table[2] = { + { { AMDGPU::VGPRRegBankID }, 1 }, + { { AMDGPU::SGPRRegBankID }, 1 } + }; + + return addMappingFromTable<1>(MI, MRI, {{ 0 }}, Table); + } case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: { @@ -408,23 +440,29 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( AltMappings.push_back(&VSMapping); break; } - case TargetOpcode::G_LOAD: { + case TargetOpcode::G_LOAD: + case TargetOpcode::G_ZEXTLOAD: + case TargetOpcode::G_SEXTLOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + LLT PtrTy = MRI.getType(MI.getOperand(1).getReg()); + unsigned PtrSize = PtrTy.getSizeInBits(); + unsigned AS = PtrTy.getAddressSpace(); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - // FIXME: Should we be hard coding the size for these mappings? - if (isInstrUniform(MI)) { + if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + AS != AMDGPUAS::PRIVATE_ADDRESS) && + isInstrUniformNonExtLoadAlign4(MI)) { const InstructionMapping &SSMapping = getInstructionMapping( 1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}), 2); // Num Operands AltMappings.push_back(&SSMapping); } const InstructionMapping &VVMapping = getInstructionMapping( 2, 1, getOperandsMapping( - {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy), - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), + {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy), + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}), 2); // Num Operands AltMappings.push_back(&VVMapping); @@ -620,57 +658,53 @@ static LLT getHalfSizedType(LLT Ty) { /// /// There is additional complexity to try for compare values to identify the /// unique values used. -void AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineInstr &MI, MachineRegisterInfo &MRI, - ArrayRef OpIndices) const { - MachineFunction *MF = MI.getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); - const SIInstrInfo *TII = ST.getInstrInfo(); - MachineBasicBlock::iterator I(MI); - - MachineBasicBlock &MBB = *MI.getParent(); - const DebugLoc &DL = MI.getDebugLoc(); - - // Use a set to avoid extra readfirstlanes in the case where multiple operands - // are the same register. - SmallSet SGPROperandRegs; - for (unsigned Op : OpIndices) { - assert(MI.getOperand(Op).isUse()); - Register Reg = MI.getOperand(Op).getReg(); - const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI); - if (OpBank->getID() == AMDGPU::VGPRRegBankID) - SGPROperandRegs.insert(Reg); - } - - // No operands need to be replaced, so no need to loop. - if (SGPROperandRegs.empty()) - return; - - MachineIRBuilder B(MI); +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineIRBuilder &B, + iterator_range Range, + SmallSet &SGPROperandRegs, + MachineRegisterInfo &MRI) const { SmallVector ResultRegs; SmallVector InitResultRegs; SmallVector PhiRegs; - for (MachineOperand &Def : MI.defs()) { - LLT ResTy = MRI.getType(Def.getReg()); - const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI); - ResultRegs.push_back(Def.getReg()); - Register InitReg = B.buildUndef(ResTy).getReg(0); - Register PhiReg = MRI.createGenericVirtualRegister(ResTy); - InitResultRegs.push_back(InitReg); - PhiRegs.push_back(PhiReg); - MRI.setRegBank(PhiReg, *DefBank); - MRI.setRegBank(InitReg, *DefBank); + + MachineBasicBlock &MBB = B.getMBB(); + MachineFunction *MF = &B.getMF(); + + const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass(); + const unsigned WaveAndOpc = Subtarget.isWave32() ? + AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; + const unsigned MovTermOpc = Subtarget.isWave32() ? + AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term; + const unsigned XorTermOpc = Subtarget.isWave32() ? + AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term; + const unsigned AndSaveExecOpc = Subtarget.isWave32() ? + AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; + const unsigned ExecReg = Subtarget.isWave32() ? + AMDGPU::EXEC_LO : AMDGPU::EXEC; + + for (MachineInstr &MI : Range) { + for (MachineOperand &Def : MI.defs()) { + LLT ResTy = MRI.getType(Def.getReg()); + const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI); + ResultRegs.push_back(Def.getReg()); + Register InitReg = B.buildUndef(ResTy).getReg(0); + Register PhiReg = MRI.createGenericVirtualRegister(ResTy); + InitResultRegs.push_back(InitReg); + PhiRegs.push_back(PhiReg); + MRI.setRegBank(PhiReg, *DefBank); + MRI.setRegBank(InitReg, *DefBank); + } } - Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); - Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + Register SaveExecReg = MRI.createVirtualRegister(WaveRC); + Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC); // Don't bother using generic instructions/registers for the exec mask. B.buildInstr(TargetOpcode::IMPLICIT_DEF) .addDef(InitSaveExecReg); - Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register PhiExec = MRI.createVirtualRegister(WaveRC); + Register NewExec = MRI.createVirtualRegister(WaveRC); // To insert the loop we need to split the block. Move everything before this // point to a new block, and insert a new empty block before this instruction. @@ -688,7 +722,7 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( // Move the rest of the block into a new block. RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); - RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end()); + RemainderBB->splice(RemainderBB->begin(), &MBB, Range.end(), MBB.end()); MBB.addSuccessor(LoopBB); RestoreExecBB->addSuccessor(RemainderBB); @@ -711,164 +745,173 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( .addMBB(LoopBB); } - // Move the instruction into the loop. - LoopBB->splice(LoopBB->end(), &MBB, I); - I = std::prev(LoopBB->end()); + const DebugLoc &DL = B.getDL(); + + // Figure out the iterator range after splicing the instructions. + auto NewBegin = std::prev(LoopBB->end()); - B.setInstr(*I); + // Move the instruction into the loop. Note we moved everything after + // Range.end() already into a new block, so Range.end() is no longer valid. + LoopBB->splice(LoopBB->end(), &MBB, Range.begin(), MBB.end()); + + auto NewEnd = LoopBB->end(); + + MachineBasicBlock::iterator I = Range.begin(); + B.setInsertPt(*LoopBB, I); Register CondReg; - for (MachineOperand &Op : MI.uses()) { - if (!Op.isReg()) - continue; + for (MachineInstr &MI : make_range(NewBegin, NewEnd)) { + for (MachineOperand &Op : MI.uses()) { + if (!Op.isReg() || Op.isDef()) + continue; - assert(!Op.isDef()); - if (SGPROperandRegs.count(Op.getReg())) { - LLT OpTy = MRI.getType(Op.getReg()); - unsigned OpSize = OpTy.getSizeInBits(); - - // Can only do a readlane of 32-bit pieces. - if (OpSize == 32) { - // Avoid extra copies in the simple case of one 32-bit register. - Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - MRI.setType(CurrentLaneOpReg, OpTy); - - constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI); - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg) - .addReg(Op.getReg()); - - Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; - - // Compare the just read M0 value to all possible Idx values. - B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(Op.getReg()); - Op.setReg(CurrentLaneOpReg); - - if (!First) { - Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); - - // If there are multiple operands to consider, and the conditions. - B.buildInstr(AMDGPU::S_AND_B64) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; - } - } else { - LLT S32 = LLT::scalar(32); - SmallVector ReadlanePieces; + if (SGPROperandRegs.count(Op.getReg())) { + LLT OpTy = MRI.getType(Op.getReg()); + unsigned OpSize = OpTy.getSizeInBits(); - // The compares can be done as 64-bit, but the extract needs to be done - // in 32-bit pieces. + // Can only do a readlane of 32-bit pieces. + if (OpSize == 32) { + // Avoid extra copies in the simple case of one 32-bit register. + Register CurrentLaneOpReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + MRI.setType(CurrentLaneOpReg, OpTy); - bool Is64 = OpSize % 64 == 0; + constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI); + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpReg) + .addReg(Op.getReg()); - LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32); - unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64 - : AMDGPU::V_CMP_EQ_U32_e64; + Register NewCondReg = MRI.createVirtualRegister(WaveRC); + bool First = CondReg == AMDGPU::NoRegister; + if (First) + CondReg = NewCondReg; - // The compares can be done as 64-bit, but the extract needs to be done - // in 32-bit pieces. + // Compare the just read M0 value to all possible Idx values. + B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) + .addDef(NewCondReg) + .addReg(CurrentLaneOpReg) + .addReg(Op.getReg()); + Op.setReg(CurrentLaneOpReg); - // Insert the unmerge before the loop. + if (!First) { + Register AndReg = MRI.createVirtualRegister(WaveRC); - B.setMBB(MBB); - auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg()); - B.setInstr(*I); + // If there are multiple operands to consider, and the conditions. + B.buildInstr(WaveAndOpc) + .addDef(AndReg) + .addReg(NewCondReg) + .addReg(CondReg); + CondReg = AndReg; + } + } else { + LLT S32 = LLT::scalar(32); + SmallVector ReadlanePieces; - unsigned NumPieces = Unmerge->getNumOperands() - 1; - for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) { - unsigned UnmergePiece = Unmerge.getReg(PieceIdx); + // The compares can be done as 64-bit, but the extract needs to be done + // in 32-bit pieces. - Register CurrentLaneOpReg; - if (Is64) { - Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); - Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); + bool Is64 = OpSize % 64 == 0; - MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); - MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); - MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); + LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32); + unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64 + : AMDGPU::V_CMP_EQ_U32_e64; - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegLo) - .addReg(UnmergePiece, 0, AMDGPU::sub0); + // The compares can be done as 64-bit, but the extract needs to be done + // in 32-bit pieces. - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegHi) - .addReg(UnmergePiece, 0, AMDGPU::sub1); + // Insert the unmerge before the loop. - CurrentLaneOpReg = - B.buildMerge(LLT::scalar(64), - {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) - .getReg(0); + B.setMBB(MBB); + auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg()); + B.setInstr(*I); - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); + unsigned NumPieces = Unmerge->getNumOperands() - 1; + for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) { + Register UnmergePiece = Unmerge.getReg(PieceIdx); - if (OpTy.getScalarSizeInBits() == 64) { - // If we need to produce a 64-bit element vector, so use the - // merged pieces - ReadlanePieces.push_back(CurrentLaneOpReg); + Register CurrentLaneOpReg; + if (Is64) { + Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); + Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); + + MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); + MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); + MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpRegLo) + .addReg(UnmergePiece, 0, AMDGPU::sub0); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpRegHi) + .addReg(UnmergePiece, 0, AMDGPU::sub1); + + CurrentLaneOpReg = + B.buildMerge(LLT::scalar(64), + {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) + .getReg(0); + + MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); + + if (OpTy.getScalarSizeInBits() == 64) { + // If we need to produce a 64-bit element vector, so use the + // merged pieces + ReadlanePieces.push_back(CurrentLaneOpReg); + } else { + // 32-bit element type. + ReadlanePieces.push_back(CurrentLaneOpRegLo); + ReadlanePieces.push_back(CurrentLaneOpRegHi); + } } else { - // 32-bit element type. - ReadlanePieces.push_back(CurrentLaneOpRegLo); - ReadlanePieces.push_back(CurrentLaneOpRegHi); + CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32); + MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); + MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpReg) + .addReg(UnmergePiece); + ReadlanePieces.push_back(CurrentLaneOpReg); } - } else { - CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32)); - MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpReg) - .addReg(UnmergePiece); - ReadlanePieces.push_back(CurrentLaneOpReg); - } + Register NewCondReg = MRI.createVirtualRegister(WaveRC); + bool First = CondReg == AMDGPU::NoRegister; + if (First) + CondReg = NewCondReg; - Register NewCondReg - = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; + B.buildInstr(CmpOp) + .addDef(NewCondReg) + .addReg(CurrentLaneOpReg) + .addReg(UnmergePiece); - B.buildInstr(CmpOp) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(UnmergePiece); + if (!First) { + Register AndReg = MRI.createVirtualRegister(WaveRC); - if (!First) { - Register AndReg - = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + // If there are multiple operands to consider, and the conditions. + B.buildInstr(WaveAndOpc) + .addDef(AndReg) + .addReg(NewCondReg) + .addReg(CondReg); + CondReg = AndReg; + } + } - // If there are multiple operands to consider, and the conditions. - B.buildInstr(AMDGPU::S_AND_B64) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; + // FIXME: Build merge seems to switch to CONCAT_VECTORS but not + // BUILD_VECTOR + if (OpTy.isVector()) { + auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); + Op.setReg(Merge.getReg(0)); + } else { + auto Merge = B.buildMerge(OpTy, ReadlanePieces); + Op.setReg(Merge.getReg(0)); } - } - // FIXME: Build merge seems to switch to CONCAT_VECTORS but not - // BUILD_VECTOR - if (OpTy.isVector()) { - auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); - } else { - auto Merge = B.buildMerge(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); + MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID)); } - - MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID)); } } } @@ -876,16 +919,16 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( B.setInsertPt(*LoopBB, LoopBB->end()); // Update EXEC, save the original EXEC value to VCC. - B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64) + B.buildInstr(AndSaveExecOpc) .addDef(NewExec) .addReg(CondReg, RegState::Kill); MRI.setSimpleHint(NewExec, CondReg); // Update EXEC, switch all done bits to 0 and all todo bits to 1. - B.buildInstr(AMDGPU::S_XOR_B64_term) - .addDef(AMDGPU::EXEC) - .addReg(AMDGPU::EXEC) + B.buildInstr(XorTermOpc) + .addDef(ExecReg) + .addReg(ExecReg) .addReg(NewExec); // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use @@ -896,14 +939,60 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( .addMBB(LoopBB); // Save the EXEC mask before the loop. - BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg) - .addReg(AMDGPU::EXEC); + BuildMI(MBB, MBB.end(), DL, TII->get(MovTermOpc), SaveExecReg) + .addReg(ExecReg); // Restore the EXEC mask after the loop. B.setMBB(*RestoreExecBB); - B.buildInstr(AMDGPU::S_MOV_B64_term) - .addDef(AMDGPU::EXEC) + B.buildInstr(MovTermOpc) + .addDef(ExecReg) .addReg(SaveExecReg); + + // Restore the insert point before the original instruction. + B.setInsertPt(MBB, MBB.end()); + + return true; +} + +// Return any unique registers used by \p MI at \p OpIndices that need to be +// handled in a waterfall loop. Returns these registers in \p +// SGPROperandRegs. Returns true if there are any operansd to handle and a +// waterfall loop is necessary. +bool AMDGPURegisterBankInfo::collectWaterfallOperands( + SmallSet &SGPROperandRegs, MachineInstr &MI, + MachineRegisterInfo &MRI, ArrayRef OpIndices) const { + for (unsigned Op : OpIndices) { + assert(MI.getOperand(Op).isUse()); + Register Reg = MI.getOperand(Op).getReg(); + const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI); + if (OpBank->getID() == AMDGPU::VGPRRegBankID) + SGPROperandRegs.insert(Reg); + } + + // No operands need to be replaced, so no need to loop. + return !SGPROperandRegs.empty(); +} + +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, + ArrayRef OpIndices) const { + // Use a set to avoid extra readfirstlanes in the case where multiple operands + // are the same register. + SmallSet SGPROperandRegs; + + if (!collectWaterfallOperands(SGPROperandRegs, MI, MRI, OpIndices)) + return false; + + MachineBasicBlock::iterator I = MI.getIterator(); + return executeInWaterfallLoop(B, make_range(I, std::next(I)), + SGPROperandRegs, MRI); +} + +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineInstr &MI, MachineRegisterInfo &MRI, + ArrayRef OpIndices) const { + MachineIRBuilder B(MI); + return executeInWaterfallLoop(B, MI, MRI, OpIndices); } // Legalize an operand that must be an SGPR by inserting a readfirstlane. @@ -960,8 +1049,13 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, SmallVector SrcRegs(OpdMapper.getVRegs(1)); // If the pointer is an SGPR, we have nothing to do. - if (SrcRegs.empty()) - return false; + if (SrcRegs.empty()) { + Register PtrReg = MI.getOperand(1).getReg(); + const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); + if (PtrBank == &AMDGPU::SGPRRegBank) + return false; + SrcRegs.push_back(PtrReg); + } assert(LoadSize % MaxNonSmrdLoadSize == 0); @@ -1013,6 +1107,33 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, return true; } +bool AMDGPURegisterBankInfo::applyMappingImage( + MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RsrcIdx) const { + const int NumDefs = MI.getNumExplicitDefs(); + + // The reported argument index is relative to the IR intrinsic call arguments, + // so we need to shift by the number of defs and the intrinsic ID. + RsrcIdx += NumDefs + 1; + + // Insert copies to VGPR arguments. + applyDefaultMapping(OpdMapper); + + // Fixup any SGPR arguments. + SmallVector SGPRIndexes; + for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) { + if (!MI.getOperand(I).isReg()) + continue; + + // If this intrinsic has a sampler, it immediately follows rsrc. + if (I == RsrcIdx || I == RsrcIdx + 1) + SGPRIndexes.push_back(I); + } + + executeInWaterfallLoop(MI, MRI, SGPRIndexes); + return true; +} + // For cases where only a single copy is inserted for matching register banks. // Replace the register in the instruction operand static void substituteSimpleCopyRegs( @@ -1024,6 +1145,184 @@ static void substituteSimpleCopyRegs( } } +/// Handle register layout difference for f16 images for some subtargets. +Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Reg) const { + if (!Subtarget.hasUnpackedD16VMem()) + return Reg; + + const LLT S16 = LLT::scalar(16); + LLT StoreVT = MRI.getType(Reg); + if (!StoreVT.isVector() || StoreVT.getElementType() != S16) + return Reg; + + auto Unmerge = B.buildUnmerge(S16, Reg); + + + SmallVector WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(Unmerge.getReg(I)); + + const LLT S32 = LLT::scalar(32); + int NumElts = StoreVT.getNumElements(); + + return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0); +} + +static std::pair +getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { + int64_t Const; + if (mi_match(Reg, MRI, m_ICst(Const))) + return std::make_pair(Register(), Const); + + Register Base; + if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const)))) + return std::make_pair(Base, Const); + + // TODO: Handle G_OR used for add case + return std::make_pair(Reg, 0); +} + +std::pair +AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B, + Register OrigOffset) const { + const unsigned MaxImm = 4095; + Register BaseReg; + unsigned ImmOffset; + const LLT S32 = LLT::scalar(32); + + std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(), + OrigOffset); + + unsigned C1 = 0; + if (ImmOffset != 0) { + // If the immediate value is too big for the immoffset field, put the value + // and -4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store. + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } + + C1 = ImmOffset; + if (Overflow != 0) { + if (!BaseReg) + BaseReg = B.buildConstant(S32, Overflow).getReg(0); + else { + auto OverflowVal = B.buildConstant(S32, Overflow); + BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0); + } + } + } + + if (!BaseReg) + BaseReg = B.buildConstant(S32, 0).getReg(0); + + return {BaseReg, C1}; +} + +static bool isZero(Register Reg, MachineRegisterInfo &MRI) { + int64_t C; + return mi_match(Reg, MRI, m_ICst(C)) && C == 0; +} + +static unsigned extractGLC(unsigned CachePolicy) { + return CachePolicy & 1; +} + +static unsigned extractSLC(unsigned CachePolicy) { + return (CachePolicy >> 1) & 1; +} + +static unsigned extractDLC(unsigned CachePolicy) { + return (CachePolicy >> 2) & 1; +} + +MachineInstr * +AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr &MI) const { + MachineRegisterInfo &MRI = *B.getMRI(); + executeInWaterfallLoop(B, MI, MRI, {2, 4}); + + // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer. + + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + int EltSize = Ty.getScalarSizeInBits(); + int Size = Ty.getSizeInBits(); + + // FIXME: Broken integer truncstore. + if (EltSize != 32) + report_fatal_error("unhandled intrinsic store"); + + // FIXME: Verifier should enforce 1 MMO for these intrinsics. + const int MemSize = (*MI.memoperands_begin())->getSize(); + + + Register RSrc = MI.getOperand(2).getReg(); + Register VOffset = MI.getOperand(3).getReg(); + Register SOffset = MI.getOperand(4).getReg(); + unsigned CachePolicy = MI.getOperand(5).getImm(); + + unsigned ImmOffset; + std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset); + + const bool Offen = !isZero(VOffset, MRI); + + unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact; + switch (8 * MemSize) { + case 8: + Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : + AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; + break; + case 16: + Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : + AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; + break; + default: + Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : + AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; + if (Size > 32) + Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); + break; + } + + + // Set the insertion point back to the instruction in case it was moved into a + // loop. + B.setInstr(MI); + + MachineInstrBuilder MIB = B.buildInstr(Opc) + .addUse(VData); + + if (Offen) + MIB.addUse(VOffset); + + MIB.addUse(RSrc) + .addUse(SOffset) + .addImm(ImmOffset) + .addImm(extractGLC(CachePolicy)) + .addImm(extractSLC(CachePolicy)) + .addImm(0) // tfe: FIXME: Remove from inst + .addImm(extractDLC(CachePolicy)) + .cloneMemRefs(MI); + + // FIXME: We need a way to report failure from applyMappingImpl. + // Insert constrain copies before inserting the loop. + if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this)) + report_fatal_error("failed to constrain selected store intrinsic"); + + return MIB; +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -1289,12 +1588,202 @@ void AMDGPURegisterBankInfo::applyMappingImpl( MI.eraseFromParent(); return; } - case AMDGPU::G_EXTRACT_VECTOR_ELT: - applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, { 2 }); + case AMDGPU::G_BUILD_VECTOR: + case AMDGPU::G_BUILD_VECTOR_TRUNC: { + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy != LLT::vector(2, 16)) + break; + + assert(MI.getNumOperands() == 3 && OpdMapper.getVRegs(0).empty()); + substituteSimpleCopyRegs(OpdMapper, 1); + substituteSimpleCopyRegs(OpdMapper, 2); + + const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI); + if (DstBank == &AMDGPU::SGPRRegBank) + break; // Can use S_PACK_* instructions. + + MachineIRBuilder B(MI); + + Register Lo = MI.getOperand(1).getReg(); + Register Hi = MI.getOperand(2).getReg(); + const LLT S32 = LLT::scalar(32); + + const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI); + const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI); + + Register ZextLo; + Register ShiftHi; + + if (Opc == AMDGPU::G_BUILD_VECTOR) { + ZextLo = B.buildZExt(S32, Lo).getReg(0); + MRI.setRegBank(ZextLo, *BankLo); + + Register ZextHi = B.buildZExt(S32, Hi).getReg(0); + MRI.setRegBank(ZextHi, *BankHi); + + auto ShiftAmt = B.buildConstant(S32, 16); + MRI.setRegBank(ShiftAmt.getReg(0), *BankHi); + + ShiftHi = B.buildShl(S32, ZextHi, ShiftAmt).getReg(0); + MRI.setRegBank(ShiftHi, *BankHi); + } else { + Register MaskLo = B.buildConstant(S32, 0xffff).getReg(0); + MRI.setRegBank(MaskLo, *BankLo); + + auto ShiftAmt = B.buildConstant(S32, 16); + MRI.setRegBank(ShiftAmt.getReg(0), *BankHi); + + ShiftHi = B.buildShl(S32, Hi, ShiftAmt).getReg(0); + MRI.setRegBank(ShiftHi, *BankHi); + + ZextLo = B.buildAnd(S32, Lo, MaskLo).getReg(0); + MRI.setRegBank(ZextLo, *BankLo); + } + + auto Or = B.buildOr(S32, ZextLo, ShiftHi); + MRI.setRegBank(Or.getReg(0), *DstBank); + + B.buildBitcast(DstReg, Or); + MI.eraseFromParent(); + return; + } + case AMDGPU::G_EXTRACT_VECTOR_ELT: { + SmallVector DstRegs(OpdMapper.getVRegs(0)); + + assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty()); + + if (DstRegs.empty()) { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, { 2 }); + return; + } + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register IdxReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstReg); + (void)DstTy; + + assert(DstTy.getSizeInBits() == 64); + + LLT SrcTy = MRI.getType(SrcReg); + const LLT S32 = LLT::scalar(32); + LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32); + + MachineIRBuilder B(MI); + auto CastSrc = B.buildBitcast(Vec32, SrcReg); + auto One = B.buildConstant(S32, 1); + + // Split the vector index into 32-bit pieces. Prepare to move all of the + // new instructions into a waterfall loop if necessary. + // + // Don't put the bitcast or constant in the loop. + MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB()); + + // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1). + auto IdxLo = B.buildShl(S32, IdxReg, One); + auto IdxHi = B.buildAdd(S32, IdxLo, One); + B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo); + B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi); + + const ValueMapping &DstMapping + = OpdMapper.getInstrMapping().getOperandMapping(0); + + // FIXME: Should be getting from mapping or not? + const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); + MRI.setRegBank(DstReg, *DstMapping.BreakDown[0].RegBank); + MRI.setRegBank(CastSrc.getReg(0), *SrcBank); + MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank); + + SmallSet OpsToWaterfall; + if (!collectWaterfallOperands(OpsToWaterfall, MI, MRI, { 2 })) { + MI.eraseFromParent(); + return; + } + + // Remove the original instruction to avoid potentially confusing the + // waterfall loop logic. + B.setInstr(*Span.begin()); + MI.eraseFromParent(); + executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), + OpsToWaterfall, MRI); + return; + } + case AMDGPU::G_INSERT_VECTOR_ELT: { + SmallVector InsRegs(OpdMapper.getVRegs(2)); + + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(1).empty()); + assert(OpdMapper.getVRegs(3).empty()); + + if (InsRegs.empty()) { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, { 3 }); + return; + } + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register InsReg = MI.getOperand(2).getReg(); + Register IdxReg = MI.getOperand(3).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT InsTy = MRI.getType(InsReg); + (void)InsTy; + + assert(InsTy.getSizeInBits() == 64); + + const LLT S32 = LLT::scalar(32); + LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32); + + MachineIRBuilder B(MI); + auto CastSrc = B.buildBitcast(Vec32, SrcReg); + auto One = B.buildConstant(S32, 1); + + // Split the vector index into 32-bit pieces. Prepare to move all of the + // new instructions into a waterfall loop if necessary. + // + // Don't put the bitcast or constant in the loop. + MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB()); + + // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1). + auto IdxLo = B.buildShl(S32, IdxReg, One); + auto IdxHi = B.buildAdd(S32, IdxLo, One); + + auto InsLo = B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo); + auto InsHi = B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi); + B.buildBitcast(DstReg, InsHi); + + const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI); + const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); + const RegisterBank *InsSrcBank = getRegBank(InsReg, MRI, *TRI); + + MRI.setRegBank(InsReg, *InsSrcBank); + MRI.setRegBank(CastSrc.getReg(0), *SrcBank); + MRI.setRegBank(InsLo.getReg(0), *DstBank); + MRI.setRegBank(InsHi.getReg(0), *DstBank); + MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank); + + + SmallSet OpsToWaterfall; + if (!collectWaterfallOperands(OpsToWaterfall, MI, MRI, { 3 })) { + MI.eraseFromParent(); + return; + } + + B.setInstr(*Span.begin()); + MI.eraseFromParent(); + + executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), + OpsToWaterfall, MRI); return; + } case AMDGPU::G_INTRINSIC: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_s_buffer_load: { // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS executeInWaterfallLoop(MI, MRI, { 2, 3 }); @@ -1303,8 +1792,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_readlane: { substituteSimpleCopyRegs(OpdMapper, 2); - assert(empty(OpdMapper.getVRegs(0))); - assert(empty(OpdMapper.getVRegs(3))); + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(3).empty()); // Make sure the index is an SGPR. It doesn't make sense to run this in a // waterfall loop, so assume it's a uniform value. @@ -1312,9 +1801,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case Intrinsic::amdgcn_writelane: { - assert(empty(OpdMapper.getVRegs(0))); - assert(empty(OpdMapper.getVRegs(2))); - assert(empty(OpdMapper.getVRegs(3))); + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(2).empty()); + assert(OpdMapper.getVRegs(3).empty()); substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val constrainOpWithReadfirstlane(MI, MRI, 2); // Source value @@ -1327,7 +1816,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + auto IntrID = MI.getIntrinsicID(); + switch (IntrID) { case Intrinsic::amdgcn_buffer_load: { executeInWaterfallLoop(MI, MRI, { 2 }); return; @@ -1335,23 +1825,70 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: { // This is only allowed to execute with 1 lane, so readfirstlane is safe. - assert(empty(OpdMapper.getVRegs(0))); + assert(OpdMapper.getVRegs(0).empty()); substituteSimpleCopyRegs(OpdMapper, 3); constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; } + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_br: { + // Only the first lane is executes, so readfirstlane is safe. + substituteSimpleCopyRegs(OpdMapper, 1); + constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + return; + } + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: { + // Only the first lane is executes, so readfirstlane is safe. + constrainOpWithReadfirstlane(MI, MRI, 1); // M0 + return; + } case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { // FIXME: Should this use a waterfall loop? constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; } - default: + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_buffer_load_format: + case Intrinsic::amdgcn_raw_tbuffer_load: + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_tbuffer_store: { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, {2, 4}); + return; + } + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_tbuffer_load: + case Intrinsic::amdgcn_struct_tbuffer_store: { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, {2, 5}); + return; + } + default: { + if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = + AMDGPU::lookupRsrcIntrinsic(IntrID)) { + // Non-images can have complications from operands that allow both SGPR + // and VGPR. For now it's too complicated to figure out the final opcode + // to derive the register bank from the MCInstrDesc. + if (RSrcIntrin->IsImage) { + applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg); + return; + } + } + break; } + } break; } - case AMDGPU::G_LOAD: { + case AMDGPU::G_LOAD: + case AMDGPU::G_ZEXTLOAD: + case AMDGPU::G_SEXTLOAD: { if (applyMappingWideLoad(MI, OpdMapper, MRI)) return; break; @@ -1451,26 +1988,72 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const { MI.getNumOperands()); } +const RegisterBankInfo::InstructionMapping & +AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + int RsrcIdx) const { + // The reported argument index is relative to the IR intrinsic call arguments, + // so we need to shift by the number of defs and the intrinsic ID. + RsrcIdx += MI.getNumExplicitDefs() + 1; + + const int NumOps = MI.getNumOperands(); + SmallVector OpdsMapping(NumOps); + + // TODO: Should packed/unpacked D16 difference be reported here as part of + // the value mapping? + for (int I = 0; I != NumOps; ++I) { + if (!MI.getOperand(I).isReg()) + continue; + + Register OpReg = MI.getOperand(I).getReg(); + unsigned Size = getSizeInBits(OpReg, MRI, *TRI); + + // FIXME: Probably need a new intrinsic register bank searchable table to + // handle arbitrary intrinsics easily. + // + // If this has a sampler, it immediately follows rsrc. + const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1; + + if (MustBeSGPR) { + // If this must be an SGPR, so we must report whatever it is as legal. + unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID); + OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size); + } else { + // Some operands must be VGPR, and these are easy to copy to. + OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + } + } + + return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps); +} + const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - SmallVector OpdsMapping(MI.getNumOperands()); + SmallVector OpdsMapping(2); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); + Register PtrReg = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(PtrReg); + unsigned AS = PtrTy.getAddressSpace(); + unsigned PtrSize = PtrTy.getSizeInBits(); const ValueMapping *ValMapping; const ValueMapping *PtrMapping; - if (isInstrUniform(MI)) { + const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); + + if (PtrBank == &AMDGPU::SGPRRegBank && + (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + AS != AMDGPUAS::PRIVATE_ADDRESS) && + isInstrUniformNonExtLoadAlign4(MI)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize); } else { ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy); - // FIXME: What would happen if we used SGPRRegBankID here? PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize); } @@ -1494,6 +2077,31 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg, return Bank ? Bank->getID() : Default; } + +static unsigned regBankUnion(unsigned RB0, unsigned RB1) { + return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ? + AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; +} + +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + // Lie and claim anything is legal, even though this needs to be an SGPR + // applyMapping will have to deal with it as a waterfall loop. + unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID); + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(Bank, Size); +} + +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); +} + /// /// This function must return a legal mapping, because /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called @@ -1536,7 +2144,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { int ResultBank = -1; for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { - unsigned Reg = MI.getOperand(I).getReg(); + Register Reg = MI.getOperand(I).getReg(); const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI); // FIXME: Assuming VGPR for any undetermined inputs. @@ -1660,7 +2268,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLVM_FALLTHROUGH; } - case AMDGPU::G_GEP: case AMDGPU::G_ADD: case AMDGPU::G_SUB: @@ -1669,15 +2276,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_LSHR: case AMDGPU::G_ASHR: case AMDGPU::G_UADDO: - case AMDGPU::G_SADDO: case AMDGPU::G_USUBO: - case AMDGPU::G_SSUBO: case AMDGPU::G_UADDE: case AMDGPU::G_SADDE: case AMDGPU::G_USUBE: case AMDGPU::G_SSUBE: - case AMDGPU::G_UMULH: - case AMDGPU::G_SMULH: case AMDGPU::G_SMIN: case AMDGPU::G_SMAX: case AMDGPU::G_UMIN: @@ -1692,17 +2295,32 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_FPTOUI: case AMDGPU::G_FMUL: case AMDGPU::G_FMA: + case AMDGPU::G_FMAD: case AMDGPU::G_FSQRT: + case AMDGPU::G_FFLOOR: + case AMDGPU::G_FCEIL: + case AMDGPU::G_FRINT: case AMDGPU::G_SITOFP: case AMDGPU::G_UITOFP: case AMDGPU::G_FPTRUNC: case AMDGPU::G_FPEXT: case AMDGPU::G_FEXP2: case AMDGPU::G_FLOG2: + case AMDGPU::G_FMINNUM: + case AMDGPU::G_FMAXNUM: + case AMDGPU::G_FMINNUM_IEEE: + case AMDGPU::G_FMAXNUM_IEEE: case AMDGPU::G_FCANONICALIZE: case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_INTRINSIC_ROUND: + case AMDGPU::G_AMDGPU_FFBH_U32: + return getDefaultMappingVOP(MI); + case AMDGPU::G_UMULH: + case AMDGPU::G_SMULH: { + if (Subtarget.hasScalarMulHiInsts() && isSALUMapping(MI)) + return getDefaultMappingSOP(MI); return getDefaultMappingVOP(MI); + } case AMDGPU::G_IMPLICIT_DEF: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); @@ -1710,12 +2328,19 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case AMDGPU::G_FCONSTANT: case AMDGPU::G_CONSTANT: - case AMDGPU::G_FRAME_INDEX: + case AMDGPU::G_GLOBAL_VALUE: case AMDGPU::G_BLOCK_ADDR: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } + case AMDGPU::G_FRAME_INDEX: { + // TODO: This should be the same as other constants, but eliminateFrameIndex + // currently assumes VALU uses. + unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + break; + } case AMDGPU::G_INSERT: { unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; @@ -1737,8 +2362,25 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = nullptr; break; } - case AMDGPU::G_MERGE_VALUES: case AMDGPU::G_BUILD_VECTOR: + case AMDGPU::G_BUILD_VECTOR_TRUNC: { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + if (DstTy == LLT::vector(2, 16)) { + unsigned DstSize = DstTy.getSizeInBits(); + unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); + unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID); + + OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize); + OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize); + OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize); + break; + } + + LLVM_FALLTHROUGH; + } + case AMDGPU::G_MERGE_VALUES: case AMDGPU::G_CONCAT_VECTORS: { unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; @@ -1760,6 +2402,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_CTTZ_ZERO_UNDEF: case AMDGPU::G_CTPOP: case AMDGPU::G_BSWAP: + case AMDGPU::G_BITREVERSE: case AMDGPU::G_FABS: case AMDGPU::G_FNEG: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); @@ -1848,7 +2491,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { Op3Bank == AMDGPU::SGPRRegBankID && (Size == 32 || (Size == 64 && (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) && - MF.getSubtarget().hasScalarCompareEq64())); + Subtarget.hasScalarCompareEq64())); unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID; @@ -1859,14 +2502,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_EXTRACT_VECTOR_ELT: { - unsigned OutputBankID = isSALUMapping(MI) ? - AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; + // VGPR index can be used for waterfall when indexing a SGPR vector. + unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); + unsigned OutputBankID = regBankUnion(SrcBankID, IdxBank); - OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize); - OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize); + OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize); + OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize); // The index can be either if the source vector is VGPR. OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize); @@ -1879,15 +2524,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits(); - unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); - unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); + unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(), + MRI, *TRI); + unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize); - OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize); - OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize); + OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, VecSize); + OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(InsertEltBankID, + InsertSize); // The index can be either if the source vector is VGPR. - OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); + OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize); break; } case AMDGPU::G_UNMERGE_VALUES: { @@ -1903,11 +2551,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_INTRINSIC: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { default: return getInvalidInstructionMapping(); - case Intrinsic::maxnum: - case Intrinsic::minnum: case Intrinsic::amdgcn_div_fmas: case Intrinsic::amdgcn_trig_preop: case Intrinsic::amdgcn_sin: @@ -1938,6 +2584,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_mbcnt_hi: case Intrinsic::amdgcn_ubfe: case Intrinsic::amdgcn_sbfe: + case Intrinsic::amdgcn_mul_u24: + case Intrinsic::amdgcn_mul_i24: case Intrinsic::amdgcn_lerp: case Intrinsic::amdgcn_sad_u8: case Intrinsic::amdgcn_msad_u8: @@ -1956,10 +2604,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_udot4: case Intrinsic::amdgcn_sdot8: case Intrinsic::amdgcn_udot8: - case Intrinsic::amdgcn_fdiv_fast: case Intrinsic::amdgcn_wwm: case Intrinsic::amdgcn_wqm: return getDefaultMappingVOP(MI); + case Intrinsic::amdgcn_ds_swizzle: case Intrinsic::amdgcn_ds_permute: case Intrinsic::amdgcn_ds_bpermute: case Intrinsic::amdgcn_update_dpp: @@ -2040,7 +2688,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case Intrinsic::amdgcn_readlane: { // This must be an SGPR, but accept a VGPR. - unsigned IdxReg = MI.getOperand(3).getReg(); + Register IdxReg = MI.getOperand(3).getReg(); unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits(); unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID); OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); @@ -2055,10 +2703,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case Intrinsic::amdgcn_writelane: { unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - unsigned SrcReg = MI.getOperand(2).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID); - unsigned IdxReg = MI.getOperand(3).getReg(); + Register IdxReg = MI.getOperand(3).getReg(); unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits(); unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); @@ -2081,9 +2729,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { - default: - return getInvalidInstructionMapping(); + auto IntrID = MI.getIntrinsicID(); + switch (IntrID) { case Intrinsic::amdgcn_s_getreg: case Intrinsic::amdgcn_s_memtime: case Intrinsic::amdgcn_s_memrealtime: @@ -2123,18 +2770,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_exp: - OpdsMapping[0] = nullptr; // IntrinsicID - // FIXME: These are immediate values which can't be read from registers. - OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); - OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); // FIXME: Could we support packed types here? OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); - // FIXME: These are immediate values which can't be read from registers. - OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); - OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_buffer_load: { Register RSrc = MI.getOperand(2).getReg(); // SGPR @@ -2169,11 +2809,97 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); break; } - case Intrinsic::amdgcn_end_cf: { + case Intrinsic::amdgcn_end_cf: + case Intrinsic::amdgcn_init_exec: { + unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + break; + } + case Intrinsic::amdgcn_else: { + unsigned WaveSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize); + OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize); + break; + } + case Intrinsic::amdgcn_kill: { + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); + break; + } + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_tbuffer_load: { + // FIXME: Should make intrinsic ID the last operand of the instruction, + // then this would be the same as store + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_tbuffer_store: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_tbuffer_load: { + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_tbuffer_store: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_init_exec_from_input: { unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + break; + } + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_br: { + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + + // This must be an SGPR, but accept a VGPR. + unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); break; } + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: { + // This must be an SGPR, but accept a VGPR. + unsigned Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32); + break; + } + default: + if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = + AMDGPU::lookupRsrcIntrinsic(IntrID)) { + // Non-images can have complications from operands that allow both SGPR + // and VGPR. For now it's too complicated to figure out the final opcode + // to derive the register bank from the MCInstrDesc. + if (RSrcIntrin->IsImage) + return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg); + } + + return getInvalidInstructionMapping(); } break; } @@ -2216,6 +2942,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case AMDGPU::G_LOAD: + case AMDGPU::G_ZEXTLOAD: + case AMDGPU::G_SEXTLOAD: return getInstrMappingForLoad(MI); case AMDGPU::G_ATOMICRMW_XCHG: @@ -2228,6 +2956,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_ATOMICRMW_MIN: case AMDGPU::G_ATOMICRMW_UMAX: case AMDGPU::G_ATOMICRMW_UMIN: + case AMDGPU::G_ATOMICRMW_FADD: case AMDGPU::G_ATOMIC_CMPXCHG: { return getDefaultMappingAllVGPR(MI); } @@ -2247,4 +2976,3 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getOperandsMapping(OpdsMapping), MI.getNumOperands()); } - diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index f3a96e2a6128..a14b74961118 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -13,6 +13,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" @@ -23,7 +25,9 @@ namespace llvm { class LLT; +class GCNSubtarget; class MachineIRBuilder; +class SIInstrInfo; class SIRegisterInfo; class TargetRegisterInfo; @@ -36,9 +40,27 @@ protected: #include "AMDGPUGenRegisterBank.inc" }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { + const GCNSubtarget &Subtarget; const SIRegisterInfo *TRI; - - void executeInWaterfallLoop(MachineInstr &MI, + const SIInstrInfo *TII; + + bool collectWaterfallOperands( + SmallSet &SGPROperandRegs, + MachineInstr &MI, + MachineRegisterInfo &MRI, + ArrayRef OpIndices) const; + + bool executeInWaterfallLoop( + MachineIRBuilder &B, + iterator_range Range, + SmallSet &SGPROperandRegs, + MachineRegisterInfo &MRI) const; + + bool executeInWaterfallLoop(MachineIRBuilder &B, + MachineInstr &MI, + MachineRegisterInfo &MRI, + ArrayRef OpIndices) const; + bool executeInWaterfallLoop(MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef OpIndices) const; @@ -47,6 +69,19 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { bool applyMappingWideLoad(MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, MachineRegisterInfo &MRI) const; + bool + applyMappingImage(MachineInstr &MI, + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RSrcIdx) const; + + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Reg) const; + + std::pair + splitBufferOffsets(MachineIRBuilder &B, Register Offset) const; + + MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr &MI) const; /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; @@ -58,6 +93,16 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const TargetRegisterInfo &TRI, unsigned Default = AMDGPU::VGPRRegBankID) const; + // Return a value mapping for an operand that is required to be an SGPR. + const ValueMapping *getSGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + // Return a value mapping for an operand that is required to be a VGPR. + const ValueMapping *getVGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p /// Regs. This appropriately sets the regbank of the new registers. void split64BitValueForMapping(MachineIRBuilder &B, @@ -90,8 +135,13 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingAllVGPR( const MachineInstr &MI) const; + + const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + int RsrcIdx) const; + public: - AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI); + AMDGPURegisterBankInfo(const GCNSubtarget &STI); unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override; diff --git a/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/lib/Target/AMDGPU/AMDGPURegisterBanks.td index 9555694fb106..00f53b157577 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -7,14 +7,14 @@ //===----------------------------------------------------------------------===// def SGPRRegBank : RegisterBank<"SGPR", - [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512] + [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024] >; def VGPRRegBank : RegisterBank<"VGPR", - [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512] + [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024] >; def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>; // It is helpful to distinguish conditions from ordinary SGPRs. -def VCCRegBank : RegisterBank <"VCC", [SReg_64]>; +def VCCRegBank : RegisterBank <"VCC", [SReg_1]>; diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index 7cffdf1a4dcf..9806e6b0714f 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -26,19 +26,59 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {} // they are not supported at this time. //===----------------------------------------------------------------------===// -unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) { - static const unsigned SubRegs[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, - AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9, - AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, - AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, - AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24, - AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29, - AMDGPU::sub30, AMDGPU::sub31 - }; - - assert(Channel < array_lengthof(SubRegs)); - return SubRegs[Channel]; +// Table of NumRegs sized pieces at every 32-bit offset. +static const uint16_t SubRegFromChannelTable[][32] = { + { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, + AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, + AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, + AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, + AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31 + }, + { + AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4, + AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, + AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12, + AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16, + AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, + AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24, + AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28, + AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister + }, + { + AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5, + AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9, + AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13, + AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17, + AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21, + AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25, + AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29, + AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister + }, + { + AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6, + AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10, + AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14, + AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18, + AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22, + AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26, + AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30, + AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister + } +}; + +// FIXME: TableGen should generate something to make this manageable for all +// register classes. At a minimum we could use the opposite of +// composeSubRegIndices and go up from the base 32-bit subreg. +unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) { + const unsigned NumRegIndex = NumRegs - 1; + + assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) && + "Not implemented"); + assert(Channel < array_lengthof(SubRegFromChannelTable[0])); + return SubRegFromChannelTable[NumRegIndex][Channel]; } void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h index 3453a8c1b0b3..9e713ca804a1 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -28,7 +28,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) - static unsigned getSubRegFromChannel(unsigned Channel); + static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); void reserveRegisterTuples(BitVector &, unsigned Reg) const; }; diff --git a/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/lib/Target/AMDGPU/AMDGPUSearchableTables.td index f8703c36127a..26b8b7840270 100644 --- a/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -81,6 +81,8 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -92,6 +94,8 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 1eb9b83456c5..3bb6dd4571c0 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -175,6 +175,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : HasFminFmaxLegacy(true), EnablePromoteAlloca(false), HasTrigReducedRange(false), + MaxWavesPerEU(10), LocalMemorySize(0), WavefrontSize(0) { } @@ -261,6 +262,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, AddNoCarryInsts(false), HasUnpackedD16VMem(false), LDSMisalignedBug(false), + HasMFMAInlineLiteralBug(false), ScalarizeGlobal(false), @@ -278,9 +280,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), TLInfo(TM, *this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { + MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); - RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + RegBankInfo.reset(new AMDGPURegisterBankInfo(*this)); InstSelector.reset(new AMDGPUInstructionSelector( *this, *static_cast(RegBankInfo.get()), TM)); } @@ -489,28 +492,28 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { } uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F, - unsigned &MaxAlign) const { + Align &MaxAlign) const { assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL); const DataLayout &DL = F.getParent()->getDataLayout(); uint64_t ExplicitArgBytes = 0; - MaxAlign = 1; + MaxAlign = Align::None(); for (const Argument &Arg : F.args()) { Type *ArgTy = Arg.getType(); - unsigned Align = DL.getABITypeAlignment(ArgTy); + const Align Alignment(DL.getABITypeAlignment(ArgTy)); uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); - ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize; - MaxAlign = std::max(MaxAlign, Align); + ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize; + MaxAlign = std::max(MaxAlign, Alignment); } return ExplicitArgBytes; } unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, - unsigned &MaxAlign) const { + Align &MaxAlign) const { uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign); unsigned ExplicitOffset = getExplicitKernelArgOffset(F); @@ -518,7 +521,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes; unsigned ImplicitBytes = getImplicitArgNumBytes(F); if (ImplicitBytes != 0) { - unsigned Alignment = getAlignmentForImplicitArgPtr(); + const Align Alignment = getAlignmentForImplicitArgPtr(); TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; } @@ -566,7 +569,7 @@ bool GCNSubtarget::hasMadF16() const { unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { if (getGeneration() >= AMDGPUSubtarget::GFX10) - return 10; + return getMaxWavesPerEU(); if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) @@ -591,25 +594,12 @@ unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { } unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { - if (VGPRs <= 24) - return 10; - if (VGPRs <= 28) - return 9; - if (VGPRs <= 32) - return 8; - if (VGPRs <= 36) - return 7; - if (VGPRs <= 40) - return 6; - if (VGPRs <= 48) - return 5; - if (VGPRs <= 64) - return 4; - if (VGPRs <= 84) - return 3; - if (VGPRs <= 128) - return 2; - return 1; + unsigned MaxWaves = getMaxWavesPerEU(); + unsigned Granule = getVGPRAllocGranule(); + if (VGPRs < Granule) + return MaxWaves; + unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule; + return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves); } unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { @@ -629,6 +619,20 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { return 2; // VCC. } +unsigned GCNSubtarget::computeOccupancy(const MachineFunction &MF, + unsigned LDSSize, + unsigned NumSGPRs, + unsigned NumVGPRs) const { + unsigned Occupancy = + std::min(getMaxWavesPerEU(), + getOccupancyWithLocalMemSize(LDSSize, MF.getFunction())); + if (NumSGPRs) + Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs)); + if (NumVGPRs) + Occupancy = std::min(Occupancy, getOccupancyWithNumVGPRs(NumVGPRs)); + return Occupancy; +} + unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); @@ -878,8 +882,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { void GCNSubtarget::getPostRAMutations( std::vector> &Mutations) const { - Mutations.push_back(llvm::make_unique(&InstrInfo)); - Mutations.push_back(llvm::make_unique(&InstrInfo)); + Mutations.push_back(std::make_unique(&InstrInfo)); + Mutations.push_back(std::make_unique(&InstrInfo)); } const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 78c3b823946d..936feb00c62b 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -75,6 +75,7 @@ protected: bool HasFminFmaxLegacy; bool EnablePromoteAlloca; bool HasTrigReducedRange; + unsigned MaxWavesPerEU; int LocalMemorySize; unsigned WavefrontSize; @@ -195,8 +196,8 @@ public: return LocalMemorySize; } - unsigned getAlignmentForImplicitArgPtr() const { - return isAmdHsaOS() ? 8 : 4; + Align getAlignmentForImplicitArgPtr() const { + return isAmdHsaOS() ? Align(8) : Align(4); } /// Returns the offset in bytes from the start of the input buffer @@ -223,7 +224,9 @@ public: /// subtarget. virtual unsigned getMinWavesPerEU() const = 0; - unsigned getMaxWavesPerEU() const { return 10; } + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget without any kind of limitation. + unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } /// Creates value range metadata on an workitemid.* inrinsic call or load. bool makeLIDRangeMetadata(Instruction *I) const; @@ -235,16 +238,17 @@ public: return 16; return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0); } - uint64_t getExplicitKernArgSize(const Function &F, - unsigned &MaxAlign) const; - unsigned getKernArgSegmentSize(const Function &F, - unsigned &MaxAlign) const; + uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; + unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; virtual ~AMDGPUSubtarget() {} }; class GCNSubtarget : public AMDGPUGenSubtargetInfo, public AMDGPUSubtarget { + + using AMDGPUSubtarget::getMaxWavesPerEU; + public: enum TrapHandlerAbi { TrapHandlerAbiNone = 0, @@ -362,6 +366,7 @@ protected: bool CaymanISA; bool CFALUBug; bool LDSMisalignedBug; + bool HasMFMAInlineLiteralBug; bool HasVertexCache; short TexVTXClauseSize; bool ScalarizeGlobal; @@ -416,7 +421,7 @@ public: return CallLoweringInfo.get(); } - const InstructionSelector *getInstructionSelector() const override { + InstructionSelector *getInstructionSelector() const override { return InstSelector.get(); } @@ -544,6 +549,14 @@ public: return GFX9Insts; } + bool hasScalarPackInsts() const { + return GFX9Insts; + } + + bool hasScalarMulHiInsts() const { + return GFX9Insts; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } @@ -611,6 +624,11 @@ public: return getGeneration() >= AMDGPUSubtarget::GFX9; } + /// \returns If target supports S_DENORM_MODE. + bool hasDenormModeInst() const { + return getGeneration() >= AMDGPUSubtarget::GFX10; + } + bool useFlatForGlobal() const { return FlatForGlobal; } @@ -848,9 +866,7 @@ public: // on the pointer value itself may rely on the alignment / known low bits of // the pointer. Set this to something above the minimum to avoid needing // dynamic realignment in common cases. - unsigned getStackAlignment() const { - return 16; - } + Align getStackAlignment() const { return Align(16); } bool enableMachineScheduler() const override { return true; @@ -881,12 +897,6 @@ public: return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize); } - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget without any kind of limitation. - unsigned getMaxWavesPerEU() const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(); - } - /// \returns Number of waves per work group supported by the subtarget and /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { @@ -944,6 +954,14 @@ public: return HasDPP; } + bool hasDPPBroadcasts() const { + return HasDPP && getGeneration() < GFX10; + } + + bool hasDPPWavefrontShifts() const { + return HasDPP && getGeneration() < GFX10; + } + bool hasDPP8() const { return HasDPP8; } @@ -974,6 +992,10 @@ public: return SGPRInitBug; } + bool hasMFMAInlineLiteralBug() const { + return HasMFMAInlineLiteralBug; + } + bool has12DWordStoreHazard() const { return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } @@ -1036,6 +1058,13 @@ public: /// VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; + /// Return occupancy for the given function. Used LDS and a number of + /// registers if provided. + /// Note, occupancy can be affected by the scratch allocation as well, but + /// we do not have enough information to compute it. + unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0, + unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; + /// \returns true if the flat_scratch register should be initialized with the /// pointer to the wave's scratch memory rather than a size and offset. bool flatScratchIsPointer() const { @@ -1226,9 +1255,7 @@ public: return Gen; } - unsigned getStackAlignment() const { - return 4; - } + Align getStackAlignment() const { return Align(4); } R600Subtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS); diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0ea8db04c298..e8cf77161a14 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -238,16 +238,17 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeAMDGPUUseNativeCallsPass(*PR); initializeAMDGPUSimplifyLibCallsPass(*PR); initializeAMDGPUInlinerPass(*PR); + initializeAMDGPUPrintfRuntimeBindingPass(*PR); initializeGCNRegBankReassignPass(*PR); initializeGCNNSAReassignPass(*PR); } static std::unique_ptr createTLOF(const Triple &TT) { - return llvm::make_unique(); + return std::make_unique(); } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, llvm::make_unique()); + return new ScheduleDAGMILive(C, std::make_unique()); } static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { @@ -257,7 +258,7 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { ScheduleDAGMILive *DAG = - new GCNScheduleDAGMILive(C, make_unique(C)); + new GCNScheduleDAGMILive(C, std::make_unique(C)); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); @@ -412,6 +413,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { PM.add(createAMDGPUExternalAAWrapperPass()); } PM.add(createAMDGPUUnifyMetadataPass()); + PM.add(createAMDGPUPrintfRuntimeBinding()); PM.add(createAMDGPUPropagateAttributesLatePass(this)); if (Internalize) { PM.add(createInternalizePass(mustPreserveGV)); @@ -482,7 +484,7 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl( // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, GPU, FS, *this); + I = std::make_unique(TargetTriple, GPU, FS, *this); } return I.get(); @@ -518,7 +520,7 @@ const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, GPU, FS, *this); + I = std::make_unique(TargetTriple, GPU, FS, *this); } I->setScalarizeGlobalBehavior(ScalarizeGlobal); @@ -659,6 +661,8 @@ void AMDGPUPassConfig::addIRPasses() { disablePass(&FuncletLayoutID); disablePass(&PatchableFunctionID); + addPass(createAMDGPUPrintfRuntimeBinding()); + // This must occur before inlining, as the inliner will not look through // bitcast calls. addPass(createAMDGPUFixFunctionBitcastsPass()); @@ -681,12 +685,6 @@ void AMDGPUPassConfig::addIRPasses() { // without ever running any passes on the second. addPass(createBarrierNoopPass()); - if (TM.getTargetTriple().getArch() == Triple::amdgcn) { - // TODO: May want to move later or split into an early and late one. - - addPass(createAMDGPUCodeGenPreparePass()); - } - // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. if (TM.getTargetTriple().getArch() == Triple::r600) addPass(createR600OpenCLImageTypeLoweringPass()); @@ -714,6 +712,11 @@ void AMDGPUPassConfig::addIRPasses() { } } + if (TM.getTargetTriple().getArch() == Triple::amdgcn) { + // TODO: May want to move later or split into an early and late one. + addPass(createAMDGPUCodeGenPreparePass()); + } + TargetPassConfig::addIRPasses(); // EarlyCSE is not always strong enough to clean up what LSR produces. For @@ -1046,7 +1049,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( return true; if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG && - !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) { + !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) { return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg); } @@ -1095,7 +1098,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( if (YamlMFI.ArgInfo && (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer, - AMDGPU::SReg_128RegClass, + AMDGPU::SGPR_128RegClass, MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr, AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr, diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index aaed280a1270..616196ad5ba3 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -57,7 +57,7 @@ using namespace llvm; static cl::opt UnrollThresholdPrivate( "amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), - cl::init(2500), cl::Hidden); + cl::init(2000), cl::Hidden); static cl::opt UnrollThresholdLocal( "amdgpu-unroll-threshold-local", @@ -590,6 +590,61 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { return false; } +bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const { + switch (IID) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: + OpIndexes.push_back(0); + return true; + default: + return false; + } +} + +bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { + auto IntrID = II->getIntrinsicID(); + switch (IntrID) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: { + const ConstantInt *IsVolatile = cast(II->getArgOperand(4)); + if (!IsVolatile->isZero()) + return false; + Module *M = II->getParent()->getParent()->getParent(); + Type *DestTy = II->getType(); + Type *SrcTy = NewV->getType(); + Function *NewDecl = + Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); + II->setArgOperand(0, NewV); + II->setCalledFunction(NewDecl); + return true; + } + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: { + unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ? + AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS; + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + LLVMContext &Ctx = NewV->getType()->getContext(); + ConstantInt *NewVal = (TrueAS == NewAS) ? + ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + II->replaceAllUsesWith(NewVal); + II->eraseFromParent(); + return true; + } + default: + return false; + } +} + unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { if (ST->hasVOP3PInsts()) { @@ -638,6 +693,39 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, CommonTTI.getUnrollingPreferences(L, SE, UP); } +unsigned GCNTTIImpl::getUserCost(const User *U, + ArrayRef Operands) { + // Estimate extractelement elimination + if (const ExtractElementInst *EE = dyn_cast(U)) { + ConstantInt *CI = dyn_cast(EE->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(EE->getOpcode(), EE->getOperand(0)->getType(), + Idx); + } + + // Estimate insertelement elimination + if (const InsertElementInst *IE = dyn_cast(U)) { + ConstantInt *CI = dyn_cast(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(IE->getOpcode(), IE->getType(), Idx); + } + + // Estimate different intrinsics, e.g. llvm.fabs + if (const IntrinsicInst *II = dyn_cast(U)) { + SmallVector Args(II->arg_operands()); + FastMathFlags FMF; + if (auto *FPMO = dyn_cast(II)) + FMF = FPMO->getFastMathFlags(); + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, + FMF); + } + return BaseT::getUserCost(U, Operands); +} + unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 6f1bf5a26f0d..67f7f9074f10 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -46,10 +46,18 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { Triple TargetTriple; + const TargetSubtargetInfo *ST; + const TargetLoweringBase *TLI; + + const TargetSubtargetInfo *getST() const { return ST; } + const TargetLoweringBase *getTLI() const { return TLI; } + public: explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) - : BaseT(TM, F.getParent()->getDataLayout()), - TargetTriple(TM->getTargetTriple()) {} + : BaseT(TM, F.getParent()->getDataLayout()), + TargetTriple(TM->getTargetTriple()), + ST(static_cast(TM->getSubtargetImpl(F))), + TLI(ST->getTargetLowering()) {} void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); @@ -183,6 +191,11 @@ public: return AMDGPUAS::FLAT_ADDRESS; } + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const; + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const; + unsigned getVectorSplitCost() { return 0; } unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, @@ -191,7 +204,7 @@ public: bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - unsigned getInliningThresholdMultiplier() { return 7; } + unsigned getInliningThresholdMultiplier() { return 9; } int getInlinerVectorBonusPercent() { return 0; } @@ -201,6 +214,7 @@ public: int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned); + unsigned getUserCost(const User *U, ArrayRef Operands); }; class R600TTIImpl final : public BasicTTIImplBase { diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 12f2e9519c9e..101ecfc0c87c 100644 --- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -1307,8 +1307,8 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, if (LandBlkHasOtherPred) { report_fatal_error("Extra register needed to handle CFG"); - unsigned CmpResReg = - HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + Register CmpResReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); report_fatal_error("Extra compare instruction needed to handle CFG"); insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, CmpResReg, DebugLoc()); @@ -1316,8 +1316,8 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, // XXX: We are running this after RA, so creating virtual registers will // cause an assertion failure in the PostRA scheduling pass. - unsigned InitReg = - HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + Register InitReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, DebugLoc()); diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 6d678966c98e..9dd511fab57c 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -143,6 +143,7 @@ public: ImmTyDLC, ImmTyGLC, ImmTySLC, + ImmTySWZ, ImmTyTFE, ImmTyD16, ImmTyClampSI, @@ -216,14 +217,15 @@ public: if (Kind == Token) return true; - if (Kind != Expression || !Expr) - return false; - // When parsing operands, we can't always tell if something was meant to be // a token, like 'gds', or an expression that references a global variable. // In this case, we assume the string is an expression, and if we need to // interpret is a token, then we treat the symbol name as the token. - return isa(Expr); + return isSymbolRefExpr(); + } + + bool isSymbolRefExpr() const { + return isExpr() && Expr && isa(Expr); } bool isImm() const override { @@ -274,8 +276,10 @@ public: isRegClass(AMDGPU::VReg_64RegClassID) || isRegClass(AMDGPU::VReg_96RegClassID) || isRegClass(AMDGPU::VReg_128RegClassID) || + isRegClass(AMDGPU::VReg_160RegClassID) || isRegClass(AMDGPU::VReg_256RegClassID) || - isRegClass(AMDGPU::VReg_512RegClassID); + isRegClass(AMDGPU::VReg_512RegClassID) || + isRegClass(AMDGPU::VReg_1024RegClassID); } bool isVReg32() const { @@ -286,6 +290,10 @@ public: return isOff() || isVReg32(); } + bool isNull() const { + return isRegKind() && getReg() == AMDGPU::SGPR_NULL; + } + bool isSDWAOperand(MVT type) const; bool isSDWAFP16Operand() const; bool isSDWAFP32Operand() const; @@ -325,6 +333,7 @@ public: bool isDLC() const { return isImmTy(ImmTyDLC); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } + bool isSWZ() const { return isImmTy(ImmTySWZ); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isD16() const { return isImmTy(ImmTyD16); } bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } @@ -817,6 +826,7 @@ public: case ImmTyDLC: OS << "DLC"; break; case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; + case ImmTySWZ: OS << "SWZ"; break; case ImmTyTFE: OS << "TFE"; break; case ImmTyD16: OS << "D16"; break; case ImmTyFORMAT: OS << "FORMAT"; break; @@ -886,7 +896,7 @@ public: int64_t Val, SMLoc Loc, ImmTy Type = ImmTyNone, bool IsFPImm = false) { - auto Op = llvm::make_unique(Immediate, AsmParser); + auto Op = std::make_unique(Immediate, AsmParser); Op->Imm.Val = Val; Op->Imm.IsFPImm = IsFPImm; Op->Imm.Type = Type; @@ -899,7 +909,7 @@ public: static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, StringRef Str, SMLoc Loc, bool HasExplicitEncodingSize = true) { - auto Res = llvm::make_unique(Token, AsmParser); + auto Res = std::make_unique(Token, AsmParser); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); Res->StartLoc = Loc; @@ -910,7 +920,7 @@ public: static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, unsigned RegNo, SMLoc S, SMLoc E) { - auto Op = llvm::make_unique(Register, AsmParser); + auto Op = std::make_unique(Register, AsmParser); Op->Reg.RegNo = RegNo; Op->Reg.Mods = Modifiers(); Op->StartLoc = S; @@ -920,7 +930,7 @@ public: static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, const class MCExpr *Expr, SMLoc S) { - auto Op = llvm::make_unique(Expression, AsmParser); + auto Op = std::make_unique(Expression, AsmParser); Op->Expr = Expr; Op->StartLoc = S; Op->EndLoc = S; @@ -1051,11 +1061,23 @@ private: std::string &CollectString); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, - RegisterKind RegKind, unsigned Reg1, - unsigned RegNum); + RegisterKind RegKind, unsigned Reg1); bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, - unsigned& RegNum, unsigned& RegWidth, - unsigned *DwordRegIndex); + unsigned& RegNum, unsigned& RegWidth); + unsigned ParseRegularReg(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth); + unsigned ParseSpecialReg(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth); + unsigned ParseRegList(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth); + bool ParseRegRange(unsigned& Num, unsigned& Width); + unsigned getRegularReg(RegisterKind RegKind, + unsigned RegNum, + unsigned RegWidth); + bool isRegister(); bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; Optional getGprCountSymbolName(RegisterKind RegKind); @@ -1306,6 +1328,7 @@ private: bool validateOpSel(const MCInst &Inst); bool validateVccOperand(unsigned Reg) const; bool validateVOP3Literal(const MCInst &Inst) const; + unsigned getConstantBusLimit(unsigned Opcode) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; @@ -1321,6 +1344,7 @@ private: void peekTokens(MutableArrayRef Tokens); AsmToken::TokenKind getTokenKind() const; bool parseExpr(int64_t &Imm); + bool parseExpr(OperandVector &Operands); StringRef getTokenStr() const; AsmToken peekToken(); AsmToken getToken() const; @@ -1399,9 +1423,12 @@ public: void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); void cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType, bool skipVcc = false); + uint64_t BasicInstType, + bool SkipDstVcc = false, + bool SkipSrcVcc = false); AMDGPUOperand::Ptr defaultBLGP() const; AMDGPUOperand::Ptr defaultCBSZ() const; @@ -1636,8 +1663,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const { } bool AMDGPUOperand::isBoolReg() const { - return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? - isSCSrcB64() : isSCSrcB32(); + return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || + (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); } uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const @@ -1849,6 +1876,8 @@ static bool isInlineValue(unsigned Reg) { case AMDGPU::SRC_EXECZ: case AMDGPU::SRC_SCC: return true; + case AMDGPU::SGPR_NULL: + return true; default: return false; } @@ -1870,8 +1899,10 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 2: return AMDGPU::VReg_64RegClassID; case 3: return AMDGPU::VReg_96RegClassID; case 4: return AMDGPU::VReg_128RegClassID; + case 5: return AMDGPU::VReg_160RegClassID; case 8: return AMDGPU::VReg_256RegClassID; case 16: return AMDGPU::VReg_512RegClassID; + case 32: return AMDGPU::VReg_1024RegClassID; } } else if (Is == IS_TTMP) { switch (RegWidth) { @@ -1944,7 +1975,7 @@ static unsigned getSpecialRegForName(StringRef RegName) { .Case("tba_lo", AMDGPU::TBA_LO) .Case("tba_hi", AMDGPU::TBA_HI) .Case("null", AMDGPU::SGPR_NULL) - .Default(0); + .Default(AMDGPU::NoRegister); } bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, @@ -1959,8 +1990,7 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, } bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, - RegisterKind RegKind, unsigned Reg1, - unsigned RegNum) { + RegisterKind RegKind, unsigned Reg1) { switch (RegKind) { case IS_SPECIAL: if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { @@ -2008,14 +2038,37 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, } } -static const StringRef Registers[] = { - { "v" }, - { "s" }, - { "ttmp" }, - { "acc" }, - { "a" }, +struct RegInfo { + StringLiteral Name; + RegisterKind Kind; +}; + +static constexpr RegInfo RegularRegisters[] = { + {{"v"}, IS_VGPR}, + {{"s"}, IS_SGPR}, + {{"ttmp"}, IS_TTMP}, + {{"acc"}, IS_AGPR}, + {{"a"}, IS_AGPR}, }; +static bool isRegularReg(RegisterKind Kind) { + return Kind == IS_VGPR || + Kind == IS_SGPR || + Kind == IS_TTMP || + Kind == IS_AGPR; +} + +static const RegInfo* getRegularRegInfo(StringRef Str) { + for (const RegInfo &Reg : RegularRegisters) + if (Str.startswith(Reg.Name)) + return &Reg; + return nullptr; +} + +static bool getRegNum(StringRef Str, unsigned& Num) { + return !Str.getAsInteger(10, Num); +} + bool AMDGPUAsmParser::isRegister(const AsmToken &Token, const AsmToken &NextToken) const { @@ -2029,24 +2082,24 @@ AMDGPUAsmParser::isRegister(const AsmToken &Token, // A single register like s0 or a range of registers like s[0:1] - StringRef RegName = Token.getString(); - - for (StringRef Reg : Registers) { - if (RegName.startswith(Reg)) { - if (Reg.size() < RegName.size()) { - unsigned RegNum; - // A single register with an index: rXX - if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) - return true; - } else { - // A range of registers: r[XX:YY]. - if (NextToken.is(AsmToken::LBrac)) - return true; - } + StringRef Str = Token.getString(); + const RegInfo *Reg = getRegularRegInfo(Str); + if (Reg) { + StringRef RegName = Reg->Name; + StringRef RegSuffix = Str.substr(RegName.size()); + if (!RegSuffix.empty()) { + unsigned Num; + // A single register with an index: rXX + if (getRegNum(RegSuffix, Num)) + return true; + } else { + // A range of registers: r[XX:YY]. + if (NextToken.is(AsmToken::LBrac)) + return true; } } - return getSpecialRegForName(RegName); + return getSpecialRegForName(Str) != AMDGPU::NoRegister; } bool @@ -2055,137 +2108,161 @@ AMDGPUAsmParser::isRegister() return isRegister(getToken(), peekToken()); } -bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, - unsigned &RegNum, unsigned &RegWidth, - unsigned *DwordRegIndex) { - if (DwordRegIndex) { *DwordRegIndex = 0; } +unsigned +AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, + unsigned RegNum, + unsigned RegWidth) { + + assert(isRegularReg(RegKind)); + + unsigned AlignSize = 1; + if (RegKind == IS_SGPR || RegKind == IS_TTMP) { + // SGPR and TTMP registers must be aligned. + // Max required alignment is 4 dwords. + AlignSize = std::min(RegWidth, 4u); + } + + if (RegNum % AlignSize != 0) + return AMDGPU::NoRegister; + + unsigned RegIdx = RegNum / AlignSize; + int RCID = getRegClass(RegKind, RegWidth); + if (RCID == -1) + return AMDGPU::NoRegister; + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - if (getLexer().is(AsmToken::Identifier)) { - StringRef RegName = Parser.getTok().getString(); - if ((Reg = getSpecialRegForName(RegName))) { - Parser.Lex(); - RegKind = IS_SPECIAL; - } else { - unsigned RegNumIndex = 0; - if (RegName[0] == 'v') { - RegNumIndex = 1; - RegKind = IS_VGPR; - } else if (RegName[0] == 's') { - RegNumIndex = 1; - RegKind = IS_SGPR; - } else if (RegName[0] == 'a') { - RegNumIndex = RegName.startswith("acc") ? 3 : 1; - RegKind = IS_AGPR; - } else if (RegName.startswith("ttmp")) { - RegNumIndex = strlen("ttmp"); - RegKind = IS_TTMP; - } else { - return false; - } - if (RegName.size() > RegNumIndex) { - // Single 32-bit register: vXX. - if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) - return false; - Parser.Lex(); - RegWidth = 1; - } else { - // Range of registers: v[XX:YY]. ":YY" is optional. - Parser.Lex(); - int64_t RegLo, RegHi; - if (getLexer().isNot(AsmToken::LBrac)) - return false; - Parser.Lex(); + const MCRegisterClass RC = TRI->getRegClass(RCID); + if (RegIdx >= RC.getNumRegs()) + return AMDGPU::NoRegister; - if (getParser().parseAbsoluteExpression(RegLo)) - return false; + return RC.getRegister(RegIdx); +} - const bool isRBrace = getLexer().is(AsmToken::RBrac); - if (!isRBrace && getLexer().isNot(AsmToken::Colon)) - return false; - Parser.Lex(); +bool +AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { + int64_t RegLo, RegHi; + if (!trySkipToken(AsmToken::LBrac)) + return false; - if (isRBrace) { - RegHi = RegLo; - } else { - if (getParser().parseAbsoluteExpression(RegHi)) - return false; + if (!parseExpr(RegLo)) + return false; - if (getLexer().isNot(AsmToken::RBrac)) - return false; - Parser.Lex(); - } - RegNum = (unsigned) RegLo; - RegWidth = (RegHi - RegLo) + 1; - } - } - } else if (getLexer().is(AsmToken::LBrac)) { - // List of consecutive registers: [s0,s1,s2,s3] - Parser.Lex(); - if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) - return false; - if (RegWidth != 1) + if (trySkipToken(AsmToken::Colon)) { + if (!parseExpr(RegHi)) return false; - RegisterKind RegKind1; - unsigned Reg1, RegNum1, RegWidth1; - do { - if (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); - } else if (getLexer().is(AsmToken::RBrac)) { - Parser.Lex(); - break; - } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { - if (RegWidth1 != 1) { - return false; - } - if (RegKind1 != RegKind) { - return false; - } - if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { - return false; - } - } else { - return false; - } - } while (true); } else { - return false; + RegHi = RegLo; } - switch (RegKind) { - case IS_SPECIAL: + + if (!trySkipToken(AsmToken::RBrac)) + return false; + + if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) + return false; + + Num = static_cast(RegLo); + Width = (RegHi - RegLo) + 1; + return true; +} + +unsigned +AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth) { + assert(isToken(AsmToken::Identifier)); + unsigned Reg = getSpecialRegForName(getTokenStr()); + if (Reg) { RegNum = 0; RegWidth = 1; - break; - case IS_VGPR: - case IS_SGPR: - case IS_AGPR: - case IS_TTMP: - { - unsigned Size = 1; - if (RegKind == IS_SGPR || RegKind == IS_TTMP) { - // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. - Size = std::min(RegWidth, 4u); - } - if (RegNum % Size != 0) - return false; - if (DwordRegIndex) { *DwordRegIndex = RegNum; } - RegNum = RegNum / Size; - int RCID = getRegClass(RegKind, RegWidth); - if (RCID == -1) - return false; - const MCRegisterClass RC = TRI->getRegClass(RCID); - if (RegNum >= RC.getNumRegs()) - return false; - Reg = RC.getRegister(RegNum); - break; + RegKind = IS_SPECIAL; + lex(); // skip register name + } + return Reg; +} + +unsigned +AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth) { + assert(isToken(AsmToken::Identifier)); + StringRef RegName = getTokenStr(); + + const RegInfo *RI = getRegularRegInfo(RegName); + if (!RI) + return AMDGPU::NoRegister; + lex(); // skip register name + + RegKind = RI->Kind; + StringRef RegSuffix = RegName.substr(RI->Name.size()); + if (!RegSuffix.empty()) { + // Single 32-bit register: vXX. + if (!getRegNum(RegSuffix, RegNum)) + return AMDGPU::NoRegister; + RegWidth = 1; + } else { + // Range of registers: v[XX:YY]. ":YY" is optional. + if (!ParseRegRange(RegNum, RegWidth)) + return AMDGPU::NoRegister; } - default: - llvm_unreachable("unexpected register kind"); + return getRegularReg(RegKind, RegNum, RegWidth); +} + +unsigned +AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth) { + unsigned Reg = AMDGPU::NoRegister; + + if (!trySkipToken(AsmToken::LBrac)) + return AMDGPU::NoRegister; + + // List of consecutive registers, e.g.: [s0,s1,s2,s3] + + if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) + return AMDGPU::NoRegister; + if (RegWidth != 1) + return AMDGPU::NoRegister; + + for (; trySkipToken(AsmToken::Comma); ) { + RegisterKind NextRegKind; + unsigned NextReg, NextRegNum, NextRegWidth; + + if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth)) + return AMDGPU::NoRegister; + if (NextRegWidth != 1) + return AMDGPU::NoRegister; + if (NextRegKind != RegKind) + return AMDGPU::NoRegister; + if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) + return AMDGPU::NoRegister; } - if (!subtargetHasRegister(*TRI, Reg)) - return false; - return true; + if (!trySkipToken(AsmToken::RBrac)) + return AMDGPU::NoRegister; + + if (isRegularReg(RegKind)) + Reg = getRegularReg(RegKind, RegNum, RegWidth); + + return Reg; +} + +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, + unsigned &Reg, + unsigned &RegNum, + unsigned &RegWidth) { + Reg = AMDGPU::NoRegister; + + if (isToken(AsmToken::Identifier)) { + Reg = ParseSpecialReg(RegKind, RegNum, RegWidth); + if (Reg == AMDGPU::NoRegister) + Reg = ParseRegularReg(RegKind, RegNum, RegWidth); + } else { + Reg = ParseRegList(RegKind, RegNum, RegWidth); + } + + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); } Optional @@ -2241,18 +2318,18 @@ std::unique_ptr AMDGPUAsmParser::parseRegister() { SMLoc StartLoc = Tok.getLoc(); SMLoc EndLoc = Tok.getEndLoc(); RegisterKind RegKind; - unsigned Reg, RegNum, RegWidth, DwordRegIndex; + unsigned Reg, RegNum, RegWidth; - if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { + if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { //FIXME: improve error messages (bug 41303). Error(StartLoc, "not a valid operand."); return nullptr; } if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { - if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) + if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) return nullptr; } else - KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); + KernelScope.usesRegister(RegKind, RegNum, RegWidth); return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); } @@ -2648,7 +2725,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { case AMDGPU::VCC_LO: case AMDGPU::VCC_HI: case AMDGPU::M0: - case AMDGPU::SGPR_NULL: return Reg; default: break; @@ -2697,13 +2773,38 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, } } +unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { + if (!isGFX10()) + return 1; + + switch (Opcode) { + // 64-bit shift instructions can use only one scalar value input + case AMDGPU::V_LSHLREV_B64: + case AMDGPU::V_LSHLREV_B64_gfx10: + case AMDGPU::V_LSHL_B64: + case AMDGPU::V_LSHRREV_B64: + case AMDGPU::V_LSHRREV_B64_gfx10: + case AMDGPU::V_LSHR_B64: + case AMDGPU::V_ASHRREV_I64: + case AMDGPU::V_ASHRREV_I64_gfx10: + case AMDGPU::V_ASHR_I64: + return 1; + default: + return 2; + } +} + bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { const MCOperand &MO = Inst.getOperand(OpIdx); if (MO.isImm()) { return !isInlineConstant(Inst, OpIdx); + } else if (MO.isReg()) { + auto Reg = MO.getReg(); + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; + } else { + return true; } - return !MO.isReg() || - isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); } bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { @@ -2782,10 +2883,7 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { } ConstantBusUseCount += NumLiterals; - if (isGFX10()) - return ConstantBusUseCount <= 2; - - return ConstantBusUseCount <= 1; + return ConstantBusUseCount <= getConstantBusLimit(Opcode); } bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { @@ -3212,6 +3310,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { const int OpIndices[] = { Src0Idx, Src1Idx }; + unsigned NumExprs = 0; unsigned NumLiterals = 0; uint32_t LiteralValue; @@ -3219,19 +3318,21 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { if (OpIdx == -1) break; const MCOperand &MO = Inst.getOperand(OpIdx); - if (MO.isImm() && - // Exclude special imm operands (like that used by s_set_gpr_idx_on) - AMDGPU::isSISrcOperand(Desc, OpIdx) && - !isInlineConstant(Inst, OpIdx)) { - uint32_t Value = static_cast(MO.getImm()); - if (NumLiterals == 0 || LiteralValue != Value) { - LiteralValue = Value; - ++NumLiterals; + // Exclude special imm operands (like that used by s_set_gpr_idx_on) + if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { + if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { + uint32_t Value = static_cast(MO.getImm()); + if (NumLiterals == 0 || LiteralValue != Value) { + LiteralValue = Value; + ++NumLiterals; + } + } else if (MO.isExpr()) { + ++NumExprs; } } } - return NumLiterals <= 1; + return NumLiterals + NumExprs <= 1; } bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { @@ -3267,6 +3368,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + unsigned NumExprs = 0; unsigned NumLiterals = 0; uint32_t LiteralValue; @@ -3274,17 +3376,26 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { if (OpIdx == -1) break; const MCOperand &MO = Inst.getOperand(OpIdx); - if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) + if (!MO.isImm() && !MO.isExpr()) + continue; + if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) continue; - if (!isInlineConstant(Inst, OpIdx)) { + if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && + getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) + return false; + + if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { uint32_t Value = static_cast(MO.getImm()); if (NumLiterals == 0 || LiteralValue != Value) { LiteralValue = Value; ++NumLiterals; } + } else if (MO.isExpr()) { + ++NumExprs; } } + NumLiterals += NumExprs; return !NumLiterals || (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); @@ -3607,37 +3718,44 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, Val, ValRange); - UserSGPRCount += 4; + if (Val) + UserSGPRCount += 4; } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, Val, ValRange); - UserSGPRCount += 1; + if (Val) + UserSGPRCount += 1; } else if (ID == ".amdhsa_wavefront_size32") { if (IVersion.Major < 10) return getParser().Error(IDRange.Start, "directive requires gfx10+", @@ -5224,6 +5342,23 @@ AMDGPUAsmParser::parseExpr(int64_t &Imm) { return !getParser().parseAbsoluteExpression(Imm); } +bool +AMDGPUAsmParser::parseExpr(OperandVector &Operands) { + SMLoc S = getLoc(); + + const MCExpr *Expr; + if (Parser.parseExpression(Expr)) + return false; + + int64_t IntVal; + if (Expr->evaluateAsAbsolute(IntVal)) { + Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); + } else { + Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); + } + return true; +} + bool AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { if (isToken(AsmToken::String)) { @@ -5605,25 +5740,29 @@ bool AMDGPUOperand::isGPRIdxMode() const { OperandMatchResultTy AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { - SMLoc S = Parser.getTok().getLoc(); - switch (getLexer().getKind()) { - default: return MatchOperand_ParseFail; - case AsmToken::Integer: { - int64_t Imm; - if (getParser().parseAbsoluteExpression(Imm)) - return MatchOperand_ParseFail; - Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); - return MatchOperand_Success; - } + // Make sure we are not parsing something + // that looks like a label or an expression but is not. + // This will improve error messages. + if (isRegister() || isModifier()) + return MatchOperand_NoMatch; - case AsmToken::Identifier: - Operands.push_back(AMDGPUOperand::CreateExpr(this, - MCSymbolRefExpr::create(getContext().getOrCreateSymbol( - Parser.getTok().getString()), getContext()), S)); - Parser.Lex(); - return MatchOperand_Success; + if (parseExpr(Operands)) { + + AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); + assert(Opr.isImm() || Opr.isExpr()); + SMLoc Loc = Opr.getStartLoc(); + + // Currently we do not support arbitrary expressions as branch targets. + // Only labels and absolute expressions are accepted. + if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { + Error(Loc, "expected an absolute expression or a label"); + } else if (Opr.isImm() && !Opr.isS16Imm()) { + Error(Loc, "expected a 16-bit signed jump offset"); + } } + + return MatchOperand_Success; // avoid excessive error messages } //===----------------------------------------------------------------------===// @@ -5908,6 +6047,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, + {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, @@ -5941,8 +6081,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { }; OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { - unsigned size = Operands.size(); - assert(size > 0); OperandMatchResultTy res = parseOptionalOpr(Operands); @@ -5957,17 +6095,13 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan // to make sure autogenerated parser of custom operands never hit hardcoded // mandatory operands. - if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { - - // We have parsed the first optional operand. - // Parse as many operands as necessary to skip all mandatory operands. + for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { + if (res != MatchOperand_Success || + isToken(AsmToken::EndOfStatement)) + break; - for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { - if (res != MatchOperand_Success || - getLexer().is(AsmToken::EndOfStatement)) break; - if (getLexer().is(AsmToken::Comma)) Parser.Lex(); - res = parseOptionalOpr(Operands); - } + trySkipToken(AsmToken::Comma); + res = parseOptionalOpr(Operands); } return res; @@ -6682,7 +6816,11 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { } void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); +} + +void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); } void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { @@ -6690,11 +6828,14 @@ void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { } void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType, bool skipVcc) { + uint64_t BasicInstType, + bool SkipDstVcc, + bool SkipSrcVcc) { using namespace llvm::AMDGPU::SDWA; OptionalImmIndexMap OptionalIdx; - bool skippedVcc = false; + bool SkipVcc = SkipDstVcc || SkipSrcVcc; + bool SkippedVcc = false; unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); @@ -6704,19 +6845,21 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (skipVcc && !skippedVcc && Op.isReg() && + if (SkipVcc && !SkippedVcc && Op.isReg() && (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. // Skip VCC only if we didn't skip it on previous iteration. + // Note that src0 and src1 occupy 2 slots each because of modifiers. if (BasicInstType == SIInstrFlags::VOP2 && - (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { - skippedVcc = true; + ((SkipDstVcc && Inst.getNumOperands() == 1) || + (SkipSrcVcc && Inst.getNumOperands() == 5))) { + SkippedVcc = true; continue; } else if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) { - skippedVcc = true; + SkippedVcc = true; continue; } } @@ -6728,7 +6871,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, } else { llvm_unreachable("Invalid operand type"); } - skippedVcc = false; + SkippedVcc = false; } if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && @@ -6849,6 +6992,14 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; case MCK_AttrChan: return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; + case MCK_SReg_64: + case MCK_SReg_64_XEXEC: + // Null is defined as a 32-bit register but + // it should also be enabled with 64-bit operands. + // The following code enables it for SReg_64 operands + // used as source and destination. Remaining source + // operands are handled in isInlinableImm. + return Operand.isNull() ? Match_Success : Match_InvalidOperand; default: return Match_InvalidOperand; } diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 62a19d848af2..1b12550aed88 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// def MUBUFAddr32 : ComplexPattern; -def MUBUFAddr64 : ComplexPattern; +def MUBUFAddr64 : ComplexPattern; def MUBUFAddr64Atomic : ComplexPattern; def MUBUFScratchOffen : ComplexPattern; def MUBUFScratchOffset : ComplexPattern; -def MUBUFOffset : ComplexPattern; +def MUBUFOffset : ComplexPattern; def MUBUFOffsetNoGLC : ComplexPattern; def MUBUFOffsetAtomic : ComplexPattern; @@ -54,6 +54,17 @@ class MTBUFAddr64Table { // MTBUF classes //===----------------------------------------------------------------------===// +class MTBUFGetBaseOpcode { + string ret = !subst("FORMAT_XY", "FORMAT_X", + !subst("FORMAT_XYZ", "FORMAT_X", + !subst("FORMAT_XYZW", "FORMAT_X", Op))); +} + +class getMTBUFElements { + int ret = 1; +} + + class MTBUF_Pseudo pattern=[]> : InstSI, @@ -67,6 +78,9 @@ class MTBUF_Pseudo (NAME); + Instruction BaseOpcode = !cast(MTBUFGetBaseOpcode.ret); + let VM_CNT = 1; let EXP_CNT = 1; let MTBUF = 1; @@ -90,6 +104,7 @@ class MTBUF_Pseudo has_offset = 1; bits<1> has_slc = 1; bits<1> has_tfe = 1; + bits<4> elements = 0; } class MTBUF_Real : @@ -126,17 +141,17 @@ class getMTBUFInsDA vdataList, RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc), + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc) + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) ); dag InsData = !if(!empty(vaddrList), (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc), + SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc) + SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) ); dag ret = !if(!empty(vdataList), InsNoData, InsData); } @@ -181,51 +196,54 @@ class MTBUF_SetupAddr { class MTBUF_Load_Pseudo pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind> : MTBUF_Pseudo.ret, - " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc", + " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", pattern>, MTBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; let mayLoad = 1; let mayStore = 0; + let elements = elems; } multiclass MTBUF_Pseudo_Loads { - def _OFFSET : MTBUF_Load_Pseudo , + i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, MTBUFAddr64Table<0, NAME>; - def _ADDR64 : MTBUF_Load_Pseudo , + i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, MTBUFAddr64Table<1, NAME>; - def _OFFEN : MTBUF_Load_Pseudo ; - def _IDXEN : MTBUF_Load_Pseudo ; - def _BOTHEN : MTBUF_Load_Pseudo ; + def _OFFEN : MTBUF_Load_Pseudo ; + def _IDXEN : MTBUF_Load_Pseudo ; + def _BOTHEN : MTBUF_Load_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MTBUF_Load_Pseudo ; - def _OFFEN_exact : MTBUF_Load_Pseudo ; - def _IDXEN_exact : MTBUF_Load_Pseudo ; - def _BOTHEN_exact : MTBUF_Load_Pseudo ; + def _OFFSET_exact : MTBUF_Load_Pseudo ; + def _OFFEN_exact : MTBUF_Load_Pseudo ; + def _IDXEN_exact : MTBUF_Load_Pseudo ; + def _BOTHEN_exact : MTBUF_Load_Pseudo ; } } class MTBUF_Store_Pseudo pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind, @@ -233,39 +251,40 @@ class MTBUF_Store_Pseudo .ret, - " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc", + " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", pattern>, MTBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; let mayLoad = 0; let mayStore = 1; + let elements = elems; } multiclass MTBUF_Pseudo_Stores { - def _OFFSET : MTBUF_Store_Pseudo , + i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MTBUFAddr64Table<0, NAME>; - def _ADDR64 : MTBUF_Store_Pseudo , + i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MTBUFAddr64Table<1, NAME>; - def _OFFEN : MTBUF_Store_Pseudo ; - def _IDXEN : MTBUF_Store_Pseudo ; - def _BOTHEN : MTBUF_Store_Pseudo ; + def _OFFEN : MTBUF_Store_Pseudo ; + def _IDXEN : MTBUF_Store_Pseudo ; + def _BOTHEN : MTBUF_Store_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MTBUF_Store_Pseudo ; - def _OFFEN_exact : MTBUF_Store_Pseudo ; - def _IDXEN_exact : MTBUF_Store_Pseudo ; - def _BOTHEN_exact : MTBUF_Store_Pseudo ; + def _OFFSET_exact : MTBUF_Store_Pseudo ; + def _OFFEN_exact : MTBUF_Store_Pseudo ; + def _IDXEN_exact : MTBUF_Store_Pseudo ; + def _BOTHEN_exact : MTBUF_Store_Pseudo ; } } @@ -320,7 +339,7 @@ class MUBUF_Pseudo has_offset = 1; bits<1> has_slc = 1; bits<1> has_tfe = 1; - bits<4> dwords = 0; + bits<4> elements = 0; } class MUBUF_Real : @@ -393,18 +412,30 @@ class getMUBUFInsDA vdataList, ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc)) + !if(isLds, (ins DLC:$dlc, SWZ:$swz), (ins TFE:$tfe, DLC:$dlc,SWZ:$swz)) ); } -class getMUBUFDwords { - string regClassAsInt = !cast(regClass); +class getMUBUFElements { + // eq does not support ValueType for some reason. + string vtAsStr = !cast(vt); + int ret = - !if(!eq(regClassAsInt, !cast(VGPR_32)), 1, - !if(!eq(regClassAsInt, !cast(VReg_64)), 2, - !if(!eq(regClassAsInt, !cast(VReg_96)), 3, - !if(!eq(regClassAsInt, !cast(VReg_128)), 4, - 0)))); + !if(!eq(vtAsStr, "f16"), 1, + !if(!eq(vtAsStr, "v2f16"), 2, + !if(!eq(vtAsStr, "v3f16"), 3, + !if(!eq(vtAsStr, "v4f16"), 4, + !if(!eq(vt.Size, 32), 1, + !if(!eq(vt.Size, 64), 2, + !if(!eq(vt.Size, 96), 3, + !if(!eq(vt.Size, 128), 4, 0) + ) + ) + ) + ) + ) + ) + ); } class getMUBUFIns vdataList=[], bit isLds = 0> { @@ -442,18 +473,18 @@ class MUBUF_SetupAddr { class MUBUF_Load_Pseudo pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind> : MUBUF_Pseudo.ret:$vdata), !con(getMUBUFIns.ret, - !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))), + !if(HasTiedDest, (ins getVregSrcForVT.ret:$vdata_in), (ins))), " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc" # - !if(isLds, " lds", "$tfe") # "$dlc", + !if(isLds, " lds", "$tfe") # "$dlc" # "$swz", pattern>, MUBUF_SetupAddr { let PseudoInstr = opName # !if(isLds, "_lds", "") # @@ -467,19 +498,19 @@ class MUBUF_Load_Pseudo .ret; + let elements = getMUBUFElements.ret; } class MUBUF_Offset_Load_Pat : Pat < - (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)) + (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) >; class MUBUF_Addr64_Load_Pat : Pat < - (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)) + (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) >; multiclass MUBUF_Pseudo_Load_Pats { @@ -490,89 +521,87 @@ multiclass MUBUF_Pseudo_Load_Pats { - def _OFFSET : MUBUF_Load_Pseudo , + def _OFFSET : MUBUF_Load_Pseudo , MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>; - def _ADDR64 : MUBUF_Load_Pseudo , + def _ADDR64 : MUBUF_Load_Pseudo , MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>; - def _OFFEN : MUBUF_Load_Pseudo ; - def _IDXEN : MUBUF_Load_Pseudo ; - def _BOTHEN : MUBUF_Load_Pseudo ; + def _OFFEN : MUBUF_Load_Pseudo ; + def _IDXEN : MUBUF_Load_Pseudo ; + def _BOTHEN : MUBUF_Load_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MUBUF_Load_Pseudo ; - def _OFFEN_exact : MUBUF_Load_Pseudo ; - def _IDXEN_exact : MUBUF_Load_Pseudo ; - def _BOTHEN_exact : MUBUF_Load_Pseudo ; + def _OFFSET_exact : MUBUF_Load_Pseudo ; + def _OFFEN_exact : MUBUF_Load_Pseudo ; + def _IDXEN_exact : MUBUF_Load_Pseudo ; + def _BOTHEN_exact : MUBUF_Load_Pseudo ; } } -multiclass MUBUF_Pseudo_Loads_Lds { - defm NAME : MUBUF_Pseudo_Loads; - defm _LDS : MUBUF_Pseudo_Loads; + defm NAME : MUBUF_Pseudo_Loads; + defm _LDS : MUBUF_Pseudo_Loads; } class MUBUF_Store_Pseudo pattern=[], // Workaround bug bz30254 - int addrKindCopy = addrKind, - RegisterClass vdataClassCopy = vdataClass> + int addrKindCopy = addrKind> : MUBUF_Pseudo.ret, - " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe$dlc", + getMUBUFIns.ret]>.ret, + " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", pattern>, MUBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; let mayLoad = 0; let mayStore = 1; let maybeAtomic = 1; - let dwords = getMUBUFDwords.ret; + let elements = getMUBUFElements.ret; } -multiclass MUBUF_Pseudo_Stores { - def _OFFSET : MUBUF_Store_Pseudo , + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MUBUFAddr64Table<0, NAME>; - def _ADDR64 : MUBUF_Store_Pseudo , + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MUBUFAddr64Table<1, NAME>; - def _OFFEN : MUBUF_Store_Pseudo ; - def _IDXEN : MUBUF_Store_Pseudo ; - def _BOTHEN : MUBUF_Store_Pseudo ; + def _OFFEN : MUBUF_Store_Pseudo ; + def _IDXEN : MUBUF_Store_Pseudo ; + def _BOTHEN : MUBUF_Store_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MUBUF_Store_Pseudo ; - def _OFFEN_exact : MUBUF_Store_Pseudo ; - def _IDXEN_exact : MUBUF_Store_Pseudo ; - def _BOTHEN_exact : MUBUF_Store_Pseudo ; + def _OFFSET_exact : MUBUF_Store_Pseudo ; + def _OFFEN_exact : MUBUF_Store_Pseudo ; + def _IDXEN_exact : MUBUF_Store_Pseudo ; + def _BOTHEN_exact : MUBUF_Store_Pseudo ; } } class MUBUF_Pseudo_Store_Lds : MUBUF_Pseudo { + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz), + " $srsrc, $soffset$offset lds$glc$slc$swz"> { let mayLoad = 0; let mayStore = 1; let maybeAtomic = 1; @@ -686,7 +715,7 @@ multiclass MUBUF_Pseudo_Atomics_NO_RTN .ret> { + bit isFP = isFloatType.ret> { let FPAtomic = isFP in def _OFFSET : MUBUF_AtomicNoRet_Pseudo , MUBUFAddr64Table <0, NAME>; @@ -710,7 +739,7 @@ multiclass MUBUF_Pseudo_Atomics_RTN .ret> { + bit isFP = isFloatType.ret> { let FPAtomic = isFP in def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo ; defm BUFFER_LOAD_FORMAT_XY : MUBUF_Pseudo_Loads < - "buffer_load_format_xy", VReg_64 + "buffer_load_format_xy", v2f32 >; defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Pseudo_Loads < - "buffer_load_format_xyz", VReg_96 + "buffer_load_format_xyz", v3f32 >; defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Pseudo_Loads < - "buffer_load_format_xyzw", VReg_128 + "buffer_load_format_xyzw", v4f32 >; defm BUFFER_STORE_FORMAT_X : MUBUF_Pseudo_Stores < - "buffer_store_format_x", VGPR_32 + "buffer_store_format_x", f32 >; defm BUFFER_STORE_FORMAT_XY : MUBUF_Pseudo_Stores < - "buffer_store_format_xy", VReg_64 + "buffer_store_format_xy", v2f32 >; defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pseudo_Stores < - "buffer_store_format_xyz", VReg_96 + "buffer_store_format_xyz", v3f32 >; defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores < - "buffer_store_format_xyzw", VReg_128 + "buffer_store_format_xyzw", v4f32 >; let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_x", VGPR_32 + "buffer_load_format_d16_x", i32 >; defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xy", VReg_64 + "buffer_load_format_d16_xy", v2i32 >; defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xyz", VReg_96 + "buffer_load_format_d16_xyz", v3i32 >; defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xyzw", VReg_128 + "buffer_load_format_d16_xyzw", v4i32 >; defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_x", VGPR_32 + "buffer_store_format_d16_x", i32 >; defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xy", VReg_64 + "buffer_store_format_d16_xy", v2i32 >; defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xyz", VReg_96 + "buffer_store_format_d16_xyz", v3i32 >; defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xyzw", VReg_128 + "buffer_store_format_d16_xyzw", v4i32 >; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_x", VGPR_32 + "buffer_load_format_d16_x", f16 >; defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xy", VGPR_32 + "buffer_load_format_d16_xy", v2f16 >; defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xyz", VReg_64 + "buffer_load_format_d16_xyz", v3f16 >; defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xyzw", VReg_64 + "buffer_load_format_d16_xyzw", v4f16 >; defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_x", VGPR_32 + "buffer_store_format_d16_x", f16 >; defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xy", VGPR_32 + "buffer_store_format_d16_xy", v2f16 >; defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xyz", VReg_64 + "buffer_store_format_d16_xyz", v3f16 >; defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xyzw", VReg_64 + "buffer_store_format_d16_xyzw", v4f16 >; } // End HasPackedD16VMem. defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds < - "buffer_load_ubyte", VGPR_32, i32 + "buffer_load_ubyte", i32 >; defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds < - "buffer_load_sbyte", VGPR_32, i32 + "buffer_load_sbyte", i32 >; defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds < - "buffer_load_ushort", VGPR_32, i32 + "buffer_load_ushort", i32 >; defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds < - "buffer_load_sshort", VGPR_32, i32 + "buffer_load_sshort", i32 >; defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds < - "buffer_load_dword", VGPR_32, i32 + "buffer_load_dword", i32 >; defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx2", VReg_64, v2i32 + "buffer_load_dwordx2", v2i32 >; defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx3", VReg_96, v3i32 + "buffer_load_dwordx3", v3i32 >; defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx4", VReg_128, v4i32 + "buffer_load_dwordx4", v4i32 >; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>; @@ -867,111 +896,111 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; // in at least GFX8+ chips. See Bug 37653. let SubtargetPredicate = isGFX8GFX9 in { defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx2", VReg_64, v2i32, null_frag, 0, 1 + "buffer_load_dwordx2", v2i32, null_frag, 0, 1 >; defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx3", VReg_96, untyped, null_frag, 0, 1 + "buffer_load_dwordx3", v3i32, null_frag, 0, 1 >; defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx4", VReg_128, v4i32, null_frag, 0, 1 + "buffer_load_dwordx4", v4i32, null_frag, 0, 1 >; } defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < - "buffer_store_byte", VGPR_32, i32, truncstorei8_global + "buffer_store_byte", i32, truncstorei8_global >; defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores < - "buffer_store_short", VGPR_32, i32, truncstorei16_global + "buffer_store_short", i32, truncstorei16_global >; defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores < - "buffer_store_dword", VGPR_32, i32, store_global + "buffer_store_dword", i32, store_global >; defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx2", VReg_64, v2i32, store_global + "buffer_store_dwordx2", v2i32, store_global >; defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx3", VReg_96, v3i32, store_global + "buffer_store_dwordx3", v3i32, store_global >; defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx4", VReg_128, v4i32, store_global + "buffer_store_dwordx4", v4i32, store_global >; defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global + "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32 >; defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics < "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag >; defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics < - "buffer_atomic_add", VGPR_32, i32, atomic_add_global + "buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32 >; defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global + "buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32 >; defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin", VGPR_32, i32, atomic_min_global + "buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32 >; defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global + "buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32 >; defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax", VGPR_32, i32, atomic_max_global + "buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32 >; defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global + "buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32 >; defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics < - "buffer_atomic_and", VGPR_32, i32, atomic_and_global + "buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32 >; defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics < - "buffer_atomic_or", VGPR_32, i32, atomic_or_global + "buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32 >; defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global + "buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32 >; defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global + "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32 >; defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global + "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32 >; defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global + "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64 >; defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag >; defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_add_x2", VReg_64, i64, atomic_add_global + "buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64 >; defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub_x2", VReg_64, i64, atomic_sub_global + "buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64 >; defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin_x2", VReg_64, i64, atomic_min_global + "buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64 >; defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin_x2", VReg_64, i64, atomic_umin_global + "buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64 >; defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax_x2", VReg_64, i64, atomic_max_global + "buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64 >; defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax_x2", VReg_64, i64, atomic_umax_global + "buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64 >; defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_and_x2", VReg_64, i64, atomic_and_global + "buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64 >; defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_or_x2", VReg_64, i64, atomic_or_global + "buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64 >; defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor_x2", VReg_64, i64, atomic_xor_global + "buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64 >; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global + "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64 >; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global + "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64 >; let SubtargetPredicate = isGFX8GFX9 in { @@ -981,58 +1010,75 @@ def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; let SubtargetPredicate = isGFX6 in { // isn't on CI & VI /* defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">; -defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">; -defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">; -defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">; defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">; -defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">; -defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">; -defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">; */ def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; } +let SubtargetPredicate = isGFX6GFX7GFX10 in { + +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics < + "buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag +>; +defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmin", VGPR_32, f32, null_frag +>; +defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmax", VGPR_32, f32, null_frag +>; +defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag +>; +defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmin_x2", VReg_64, f64, null_frag +>; +defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmax_x2", VReg_64, f64, null_frag +>; + +} + let SubtargetPredicate = HasD16LoadStore in { defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads < - "buffer_load_ubyte_d16", VGPR_32, i32, null_frag, 1 + "buffer_load_ubyte_d16", i32, null_frag, 1 >; defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads < - "buffer_load_ubyte_d16_hi", VGPR_32, i32, null_frag, 1 + "buffer_load_ubyte_d16_hi", i32, null_frag, 1 >; defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Pseudo_Loads < - "buffer_load_sbyte_d16", VGPR_32, i32, null_frag, 1 + "buffer_load_sbyte_d16", i32, null_frag, 1 >; defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads < - "buffer_load_sbyte_d16_hi", VGPR_32, i32, null_frag, 1 + "buffer_load_sbyte_d16_hi", i32, null_frag, 1 >; defm BUFFER_LOAD_SHORT_D16 : MUBUF_Pseudo_Loads < - "buffer_load_short_d16", VGPR_32, i32, null_frag, 1 + "buffer_load_short_d16", i32, null_frag, 1 >; defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Pseudo_Loads < - "buffer_load_short_d16_hi", VGPR_32, i32, null_frag, 1 + "buffer_load_short_d16_hi", i32, null_frag, 1 >; defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Pseudo_Stores < - "buffer_store_byte_d16_hi", VGPR_32, i32 + "buffer_store_byte_d16_hi", i32 >; defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Pseudo_Stores < - "buffer_store_short_d16_hi", VGPR_32, i32 + "buffer_store_short_d16_hi", i32 >; defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_hi_x", VGPR_32 + "buffer_load_format_d16_hi_x", i32 >; defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_hi_x", VGPR_32 + "buffer_store_format_d16_hi_x", i32 >; } // End HasD16LoadStore @@ -1043,10 +1089,10 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", let SubtargetPredicate = HasAtomicFaddInsts in { defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN < - "buffer_atomic_add_f32", VGPR_32, f32, atomic_add_global + "buffer_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret >; defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < - "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global + "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret >; } // End SubtargetPredicate = HasAtomicFaddInsts @@ -1055,35 +1101,35 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < // MTBUF Instructions //===----------------------------------------------------------------------===// -defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>; -defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>; -defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96>; -defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>; -defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>; -defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>; -defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96>; -defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32, 1>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64, 2>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96, 3>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128, 4>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32, 1>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64, 2>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96, 3>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128, 4>; let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { - defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>; - defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>; - defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128>; - defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96>; - defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>; + defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>; + defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64, 2>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96, 3>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128, 4>; + defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>; + defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64, 2>; + defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96, 3>; + defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128, 4>; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { - defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>; - defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>; + defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>; + defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32, 2>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64, 3>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64, 4>; + defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>; + defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32, 2>; + defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64, 3>; + defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64, 4>; } // End HasPackedD16VMem. let SubtargetPredicate = isGFX7Plus in { @@ -1118,6 +1164,10 @@ def extract_dlc : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); }]>; +def extract_swz : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); +}]>; + //===----------------------------------------------------------------------===// // buffer_load/store_format patterns //===----------------------------------------------------------------------===// @@ -1125,33 +1175,37 @@ def extract_dlc : SDNodeXForm { def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$auxiliary, 0)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$auxiliary, 0)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$auxiliary, timm)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$auxiliary, timm)), (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1182,8 +1236,12 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; @@ -1196,36 +1254,40 @@ defm : MUBUF_LoadIntrinsicPat; multiclass MUBUF_StoreIntrinsicPat { def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$auxiliary, 0), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$auxiliary, 0), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$auxiliary, timm), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$auxiliary, timm), (!cast(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + $rsrc, $soffset, (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1256,8 +1318,12 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; @@ -1273,32 +1339,32 @@ multiclass BufferAtomicPatterns { def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast(opcode # _BOTHEN_RTN) $vdata_in, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1316,6 +1382,8 @@ defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; @@ -1326,37 +1394,39 @@ defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; multiclass BufferAtomicPatterns_NO_RTN { def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast(opcode # _OFFSET) $vdata_in, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast(opcode # _BOTHEN) $vdata_in, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1370,8 +1440,8 @@ defm : BufferAtomicPatterns_NO_RTN : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) >; multiclass MUBUFLoad_Atomic_Pattern ; def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) >; } @@ -1454,8 +1524,8 @@ multiclass MUBUFLoad_Pattern ; } @@ -1478,12 +1548,12 @@ multiclass MUBUFScratchLoadPat ; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) >; } @@ -1493,12 +1563,12 @@ multiclass MUBUFScratchLoadPat_D16 { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) >; } @@ -1512,7 +1582,10 @@ defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; + +foreach vt = Reg32Types.types in { defm : MUBUFScratchLoadPat ; +} defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; @@ -1535,16 +1608,16 @@ defm : MUBUFScratchLoadPat_D16 { - // Store follows atomic op convention so address is forst + // Store follows atomic op convention so address is first def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0) + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0) >; def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1558,8 +1631,8 @@ multiclass MUBUFStore_Pattern ; } @@ -1573,13 +1646,13 @@ multiclass MUBUFScratchStorePat ; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) >; } @@ -1587,7 +1660,11 @@ defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; + +foreach vt = Reg32Types.types in { +defm : MUBUFScratchStorePat ; +} + defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; @@ -1613,37 +1690,41 @@ defm : MUBUFScratchStorePat { def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, 0)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, timm)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, 0)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, timm)), (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1671,37 +1752,41 @@ let SubtargetPredicate = HasPackedD16VMem in { multiclass MTBUF_StoreIntrinsicPat { def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, 0), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, timm), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, 0), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, - imm:$offset, imm:$format, imm:$cachepolicy, imm), + timm:$offset, timm:$format, timm:$auxiliary, timm), (!cast(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1957,10 +2042,9 @@ defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>; defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>; defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>; defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>; -// FIXME-GFX6-GFX7-GFX10: Add following instructions: -//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; -//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; -//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; +defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; +defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>; defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>; defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>; @@ -1975,10 +2059,9 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>; // FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7. -// FIXME-GFX6-GFX7-GFX10: Add following instructions: -//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; -//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; -//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; +defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; +defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; +defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>; defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; @@ -2353,7 +2436,7 @@ let SubtargetPredicate = HasPackedD16VMem in { def MUBUFInfoTable : GenericTable { let FilterClass = "MUBUF_Pseudo"; let CppTypeName = "MUBUFInfo"; - let Fields = ["Opcode", "BaseOpcode", "dwords", "has_vaddr", "has_srsrc", "has_soffset"]; + let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"]; let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getMUBUFOpcodeHelper"; @@ -2364,7 +2447,26 @@ def getMUBUFInfoFromOpcode : SearchIndex { let Key = ["Opcode"]; } -def getMUBUFInfoFromBaseOpcodeAndDwords : SearchIndex { +def getMUBUFInfoFromBaseOpcodeAndElements : SearchIndex { let Table = MUBUFInfoTable; - let Key = ["BaseOpcode", "dwords"]; + let Key = ["BaseOpcode", "elements"]; +} + +def MTBUFInfoTable : GenericTable { + let FilterClass = "MTBUF_Pseudo"; + let CppTypeName = "MTBUFInfo"; + let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getMTBUFOpcodeHelper"; +} + +def getMTBUFInfoFromOpcode : SearchIndex { + let Table = MTBUFInfoTable; + let Key = ["Opcode"]; +} + +def getMTBUFInfoFromBaseOpcodeAndElements : SearchIndex { + let Table = MTBUFInfoTable; + let Key = ["BaseOpcode", "elements"]; } diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td index c52eaaa3fdc5..816ec14a0e98 100644 --- a/lib/Target/AMDGPU/DSInstructions.td +++ b/lib/Target/AMDGPU/DSInstructions.td @@ -81,6 +81,17 @@ class DS_Real : // DS Pseudo instructions +class DS_0A1D_NORET +: DS_Pseudo { + + let has_addr = 0; + let has_data1 = 0; + let has_vdst = 0; +} + class DS_1A1D_NORET : DS_Pseudo class DS_GWS_0D : DS_GWS; + (ins offset:$offset, gds:$gds), "$offset gds"> { + let hasSideEffects = 1; +} class DS_GWS_1D : DS_GWS { let has_gws_data0 = 1; + let hasSideEffects = 1; } class DS_VOID : DS_Pseudo; def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">; } +} // End has_m0_read = 0 + let SubtargetPredicate = HasDSAddTid in { -def DS_WRITE_ADDTID_B32 : DS_1A1D_NORET<"ds_write_addtid_b32">; +def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">; } -} // End has_m0_read = 0 } // End mayLoad = 0 defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">; @@ -540,13 +555,14 @@ def DS_READ_I8_D16_HI : DS_1A_RET_Tied<"ds_read_i8_d16_hi">; def DS_READ_U16_D16 : DS_1A_RET_Tied<"ds_read_u16_d16">; def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">; } +} // End has_m0_read = 0 let SubtargetPredicate = HasDSAddTid in { -def DS_READ_ADDTID_B32 : DS_1A_RET<"ds_read_addtid_b32">; -} -} // End has_m0_read = 0 +def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">; } +} // End mayStore = 0 + def DS_CONSUME : DS_0A_RET<"ds_consume">; def DS_APPEND : DS_0A_RET<"ds_append">; def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">; @@ -600,13 +616,13 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; //===----------------------------------------------------------------------===// def : GCNPat < - (int_amdgcn_ds_swizzle i32:$src, imm:$offset16), + (int_amdgcn_ds_swizzle i32:$src, timm:$offset16), (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) >; class DSReadPat : GCNPat < - (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), - (inst $ptr, (as_i16imm $offset), (i1 gds)) + (vt (frag (DS1Addr1Offset i32:$ptr, i16:$offset))), + (inst $ptr, offset:$offset, (i1 gds)) >; multiclass DSReadPat_mc { @@ -621,8 +637,8 @@ multiclass DSReadPat_mc { } class DSReadPat_D16 : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in), - (inst $ptr, (as_i16imm $offset), (i1 0), $in) + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$in), + (inst $ptr, offset:$offset, (i1 0), $in) >; defm : DSReadPat_mc ; @@ -636,13 +652,20 @@ defm : DSReadPat_mc ; defm : DSReadPat_mc ; defm : DSReadPat_mc ; defm : DSReadPat_mc ; -defm : DSReadPat_mc ; + +foreach vt = Reg32Types.types in { +defm : DSReadPat_mc ; +} + defm : DSReadPat_mc ; defm : DSReadPat_mc ; let AddedComplexity = 100 in { -defm : DSReadPat_mc ; +foreach vt = VReg_64.RegTypes in { +defm : DSReadPat_mc ; +} + defm : DSReadPat_mc ; } // End AddedComplexity = 100 @@ -664,8 +687,8 @@ def : DSReadPat_D16; } class DSWritePat : GCNPat < - (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)), - (inst $ptr, $value, (as_i16imm $offset), (i1 gds)) + (frag vt:$value, (DS1Addr1Offset i32:$ptr, i16:$offset)), + (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds)) >; multiclass DSWritePat_mc { @@ -681,8 +704,8 @@ multiclass DSWritePat_mc { // Irritatingly, atomic_store reverses the order of operands from a // normal store. class DSAtomicWritePat : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), - (inst $ptr, $value, (as_i16imm $offset), (i1 0)) + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), + (inst $ptr, $value, offset:$offset, (i1 0)) >; multiclass DSAtomicWritePat_mc { @@ -699,9 +722,13 @@ defm : DSWritePat_mc ; defm : DSWritePat_mc ; defm : DSWritePat_mc ; defm : DSWritePat_mc ; -defm : DSWritePat_mc ; -defm : DSAtomicWritePat_mc ; -defm : DSAtomicWritePat_mc ; + +foreach vt = VGPR_32.RegTypes in { +defm : DSWritePat_mc ; +} + +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; let OtherPredicates = [D16PreservesUnusedBits] in { def : DSWritePat ; @@ -736,46 +763,49 @@ def : DS64Bit4ByteAlignedWritePat; let AddedComplexity = 100 in { -defm : DSWritePat_mc ; +foreach vt = VReg_64.RegTypes in { +defm : DSWritePat_mc ; +} + defm : DSWritePat_mc ; } // End AddedComplexity = 100 class DSAtomicRetPat : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), - (inst $ptr, $value, (as_i16imm $offset), (i1 gds)) + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), + (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds)) >; multiclass DSAtomicRetPat_mc { let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicRetPat(frag#"_local_m0")>; + def : DSAtomicRetPat(frag#"_local_m0_"#vt.Size)>; } let OtherPredicates = [NotLDSRequiresM0Init] in { def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt, - !cast(frag#"_local")>; + !cast(frag#"_local_"#vt.Size)>; } - def : DSAtomicRetPat(frag#"_region_m0"), 1>; + def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), 1>; } class DSAtomicCmpXChg : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), - (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 gds)) + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap), + (inst $ptr, getVregSrcForVT.ret:$cmp, getVregSrcForVT.ret:$swap, offset:$offset, (i1 gds)) >; multiclass DSAtomicCmpXChg_mc { let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicCmpXChg(frag#"_local_m0")>; + def : DSAtomicCmpXChg(frag#"_local_m0_"#vt.Size)>; } let OtherPredicates = [NotLDSRequiresM0Init] in { def : DSAtomicCmpXChg(!cast(inst)#"_gfx9"), vt, - !cast(frag#"_local")>; + !cast(frag#"_local_"#vt.Size)>; } - def : DSAtomicCmpXChg(frag#"_region_m0"), 1>; + def : DSAtomicCmpXChg(frag#"_region_m0_"#vt.Size), 1>; } diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 4ec4be9bc485..ec2e2c4e8b71 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1095,6 +1095,7 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { case 106: return createRegOperand(VCC); case 108: return createRegOperand(TBA); case 110: return createRegOperand(TMA); + case 125: return createRegOperand(SGPR_NULL); case 126: return createRegOperand(EXEC); case 235: return createRegOperand(SRC_SHARED_BASE); case 236: return createRegOperand(SRC_SHARED_LIMIT); @@ -1172,7 +1173,8 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { int TTmpIdx = getTTmpIdx(Val); if (TTmpIdx >= 0) { - return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); + auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32); + return createSRegOperand(TTmpClsId, TTmpIdx); } else if (Val > SGPR_MAX) { return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val); diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index 0550092ce1d6..792e26d21f98 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -322,46 +322,46 @@ def : EGOrCaymanPat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$ defm AtomicSwapPat : AtomicPat ; + atomic_swap_global_ret_32, + atomic_swap_global_noret_32>; defm AtomicAddPat : AtomicPat ; + atomic_load_add_global_ret_32, atomic_load_add_global_noret_32>; defm AtomicSubPat : AtomicPat ; + atomic_load_sub_global_ret_32, atomic_load_sub_global_noret_32>; defm AtomicMinPat : AtomicPat ; + atomic_load_min_global_ret_32, atomic_load_min_global_noret_32>; defm AtomicUMinPat : AtomicPat ; + atomic_load_umin_global_ret_32, atomic_load_umin_global_noret_32>; defm AtomicMaxPat : AtomicPat ; + atomic_load_max_global_ret_32, atomic_load_max_global_noret_32>; defm AtomicUMaxPat : AtomicPat ; + atomic_load_umax_global_ret_32, atomic_load_umax_global_noret_32>; defm AtomicAndPat : AtomicPat ; + atomic_load_and_global_ret_32, atomic_load_and_global_noret_32>; defm AtomicOrPat : AtomicPat ; + atomic_load_or_global_ret_32, atomic_load_or_global_noret_32>; defm AtomicXorPat : AtomicPat ; + atomic_load_xor_global_ret_32, atomic_load_xor_global_noret_32>; defm AtomicIncAddPat : AtomicIncDecPat ; + atomic_load_add_global_ret_32, + atomic_load_add_global_noret_32, 1>; defm AtomicIncSubPat : AtomicIncDecPat ; + atomic_load_sub_global_ret_32, + atomic_load_sub_global_noret_32, -1>; defm AtomicDecAddPat : AtomicIncDecPat ; + atomic_load_add_global_ret_32, + atomic_load_add_global_noret_32, -1>; defm AtomicDecSubPat : AtomicIncDecPat ; + atomic_load_sub_global_ret_32, + atomic_load_sub_global_noret_32, 1>; // Should be predicated on FeatureFP64 // def FMA_64 : R600_3OP < @@ -628,37 +628,37 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", [(truncstorei16_local i32:$src1, i32:$src0)] >; def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", - [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_add_local_32 i32:$src0, i32:$src1))] >; def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", - [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_sub_local_32 i32:$src0, i32:$src1))] >; def LDS_AND_RET : R600_LDS_1A1D_RET <0x29, "LDS_AND", - [(set i32:$dst, (atomic_load_and_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_and_local_32 i32:$src0, i32:$src1))] >; def LDS_OR_RET : R600_LDS_1A1D_RET <0x2a, "LDS_OR", - [(set i32:$dst, (atomic_load_or_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_or_local_32 i32:$src0, i32:$src1))] >; def LDS_XOR_RET : R600_LDS_1A1D_RET <0x2b, "LDS_XOR", - [(set i32:$dst, (atomic_load_xor_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_xor_local_32 i32:$src0, i32:$src1))] >; def LDS_MIN_INT_RET : R600_LDS_1A1D_RET <0x25, "LDS_MIN_INT", - [(set i32:$dst, (atomic_load_min_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_min_local_32 i32:$src0, i32:$src1))] >; def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT", - [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_max_local_32 i32:$src0, i32:$src1))] >; def LDS_MIN_UINT_RET : R600_LDS_1A1D_RET <0x27, "LDS_MIN_UINT", - [(set i32:$dst, (atomic_load_umin_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_umin_local_32 i32:$src0, i32:$src1))] >; def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT", - [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_umax_local_32 i32:$src0, i32:$src1))] >; def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG", - [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_swap_local_32 i32:$src0, i32:$src1))] >; def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST", - [(set i32:$dst, (atomic_cmp_swap_local i32:$src0, i32:$src1, i32:$src2))] + [(set i32:$dst, (atomic_cmp_swap_local_32 i32:$src0, i32:$src1, i32:$src2))] >; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (load_local R600_Reg32:$src0))] diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 889f60dae920..80ee17eba141 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -270,7 +270,7 @@ multiclass FLAT_Atomic_Pseudo< SDPatternOperator atomic = null_frag, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc, - bit isFP = getIsFP.ret> { + bit isFP = isFloatType.ret> { def "" : FLAT_AtomicNoRet_Pseudo .ret> { + bit isFP = isFloatType.ret> { def "" : FLAT_AtomicNoRet_Pseudo .ret> { + bit isFP = isFloatType.ret> { def _RTN : FLAT_AtomicRet_Pseudo ; defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", - VGPR_32, i32, atomic_swap_global>; + VGPR_32, i32, atomic_swap_global_32>; defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", - VReg_64, i64, atomic_swap_global>; + VReg_64, i64, atomic_swap_global_64>; defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", - VGPR_32, i32, atomic_add_global>; + VGPR_32, i32, atomic_load_add_global_32>; defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", - VGPR_32, i32, atomic_sub_global>; + VGPR_32, i32, atomic_load_sub_global_32>; defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", - VGPR_32, i32, atomic_min_global>; + VGPR_32, i32, atomic_load_min_global_32>; defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", - VGPR_32, i32, atomic_umin_global>; + VGPR_32, i32, atomic_load_umin_global_32>; defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", - VGPR_32, i32, atomic_max_global>; + VGPR_32, i32, atomic_load_max_global_32>; defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", - VGPR_32, i32, atomic_umax_global>; + VGPR_32, i32, atomic_load_umax_global_32>; defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", - VGPR_32, i32, atomic_and_global>; + VGPR_32, i32, atomic_load_and_global_32>; defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", - VGPR_32, i32, atomic_or_global>; + VGPR_32, i32, atomic_load_or_global_32>; defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", - VGPR_32, i32, atomic_xor_global>; + VGPR_32, i32, atomic_load_xor_global_32>; defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", - VGPR_32, i32, atomic_inc_global>; + VGPR_32, i32, atomic_inc_global_32>; defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", - VGPR_32, i32, atomic_dec_global>; + VGPR_32, i32, atomic_dec_global_32>; defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", - VReg_64, i64, atomic_add_global>; + VReg_64, i64, atomic_load_add_global_64>; defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", - VReg_64, i64, atomic_sub_global>; + VReg_64, i64, atomic_load_sub_global_64>; defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", - VReg_64, i64, atomic_min_global>; + VReg_64, i64, atomic_load_min_global_64>; defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", - VReg_64, i64, atomic_umin_global>; + VReg_64, i64, atomic_load_umin_global_64>; defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", - VReg_64, i64, atomic_max_global>; + VReg_64, i64, atomic_load_max_global_64>; defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", - VReg_64, i64, atomic_umax_global>; + VReg_64, i64, atomic_load_umax_global_64>; defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", - VReg_64, i64, atomic_and_global>; + VReg_64, i64, atomic_load_and_global_64>; defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", - VReg_64, i64, atomic_or_global>; + VReg_64, i64, atomic_load_or_global_64>; defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", - VReg_64, i64, atomic_xor_global>; + VReg_64, i64, atomic_load_xor_global_64>; defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", - VReg_64, i64, atomic_inc_global>; + VReg_64, i64, atomic_inc_global_64>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", - VReg_64, i64, atomic_dec_global>; + VReg_64, i64, atomic_dec_global_64>; } // End is_flat_global = 1 } // End SubtargetPredicate = HasFlatGlobalInsts @@ -686,10 +686,10 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in { defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < - "global_atomic_add_f32", VGPR_32, f32, atomic_add_global + "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret >; defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < - "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global + "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret >; } // End SubtargetPredicate = HasAtomicFaddInsts @@ -777,8 +777,6 @@ def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; @@ -787,41 +785,50 @@ def : FlatLoadAtomicPat ; def : FlatStorePat ; def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; + +foreach vt = Reg32Types.types in { +def : FlatLoadPat ; +def : FlatStorePat ; +} + +foreach vt = VReg_64.RegTypes in { +def : FlatStorePat ; +def : FlatLoadPat ; +} + def : FlatStorePat ; def : FlatStorePat ; def : FlatStoreAtomicPat ; def : FlatStoreAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; def : FlatAtomicPat ; -def : FlatAtomicPat ; - -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; + +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; def : FlatStorePat ; def : FlatStorePat ; @@ -847,9 +854,6 @@ def : FlatLoadPat_D16 ; } // End OtherPredicates = [HasFlatAddressSpace] -def atomic_fadd_global : global_binary_atomic_op_frag; -def atomic_pk_fadd_global : global_binary_atomic_op_frag; - let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { def : FlatLoadSignedPat ; @@ -863,8 +867,16 @@ def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; +foreach vt = Reg32Types.types in { +def : FlatLoadSignedPat ; +def : FlatStoreSignedPat ; +} + +foreach vt = VReg_64.RegTypes in { +def : FlatLoadSignedPat ; +def : FlatStoreSignedPat ; +} + def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; @@ -875,8 +887,6 @@ def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; @@ -902,36 +912,36 @@ def : FlatSignedLoadPat_D16 ; def : FlatStoreSignedAtomicPat ; def : FlatStoreSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; - -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; + +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; -def : FlatAtomicPatNoRtn ; -def : FlatAtomicPatNoRtn ; +def : FlatAtomicPatNoRtn ; +def : FlatAtomicPatNoRtn ; } // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 @@ -1174,7 +1184,7 @@ class FLAT_Real_gfx10 op, FLAT_Pseudo ps> : let AssemblerPredicate = isGFX10Plus; let DecoderNamespace = "GFX10"; - let Inst{11-0} = {offset{12}, offset{10-0}}; + let Inst{11-0} = offset{11-0}; let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); let Inst{55} = 0; diff --git a/lib/Target/AMDGPU/GCNDPPCombine.cpp b/lib/Target/AMDGPU/GCNDPPCombine.cpp index e1845e2e8e87..98678873e37c 100644 --- a/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -41,6 +41,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -155,8 +156,6 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, RegSubRegPair CombOldVGPR, bool CombBCZ) const { assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp); - assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() == - TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg()); auto OrigOp = OrigMI.getOpcode(); auto DPPOp = getDPPOp(OrigOp); @@ -178,7 +177,9 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, if (OldIdx != -1) { assert(OldIdx == NumOperands); assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)); - DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg); + auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI); + DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef, + CombOldVGPR.SubReg); ++NumOperands; } else { // TODO: this discards MAC/FMA instructions for now, let's add it later @@ -195,6 +196,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); DPPInst.addImm(Mod0->getImm()); ++NumOperands; + } else if (AMDGPU::getNamedOperandIdx(DPPOp, + AMDGPU::OpName::src0_modifiers) != -1) { + DPPInst.addImm(0); + ++NumOperands; } auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); assert(Src0); @@ -214,6 +219,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); DPPInst.addImm(Mod1->getImm()); ++NumOperands; + } else if (AMDGPU::getNamedOperandIdx(DPPOp, + AMDGPU::OpName::src1_modifiers) != -1) { + DPPInst.addImm(0); + ++NumOperands; } if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) { @@ -344,6 +353,10 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); assert(DstOpnd && DstOpnd->isReg()); auto DPPMovReg = DstOpnd->getReg(); + if (DPPMovReg.isPhysical()) { + LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n"); + return false; + } if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) { LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" " for all uses\n"); @@ -362,7 +375,13 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { bool BoundCtrlZero = BCZOpnd->getImm(); auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old); + auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); assert(OldOpnd && OldOpnd->isReg()); + assert(SrcOpnd && SrcOpnd->isReg()); + if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) { + LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n"); + return false; + } auto * const OldOpndValue = getOldOpndValue(*OldOpnd); // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else @@ -408,6 +427,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { dbgs() << ", bound_ctrl=" << CombBCZ << '\n'); SmallVector OrigMIs, DPPMIs; + DenseMap> RegSeqWithOpNos; auto CombOldVGPR = getRegSubRegPair(*OldOpnd); // try to reuse previous old reg if its undefined (IMPLICIT_DEF) if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef @@ -420,13 +440,49 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { OrigMIs.push_back(&MovMI); bool Rollback = true; + SmallVector Uses; + for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) { + Uses.push_back(&Use); + } + + while (!Uses.empty()) { + MachineOperand *Use = Uses.pop_back_val(); Rollback = true; - auto &OrigMI = *Use.getParent(); + auto &OrigMI = *Use->getParent(); LLVM_DEBUG(dbgs() << " try: " << OrigMI); auto OrigOp = OrigMI.getOpcode(); + if (OrigOp == AMDGPU::REG_SEQUENCE) { + Register FwdReg = OrigMI.getOperand(0).getReg(); + unsigned FwdSubReg = 0; + + if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) { + LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" + " for all uses\n"); + break; + } + + unsigned OpNo, E = OrigMI.getNumOperands(); + for (OpNo = 1; OpNo < E; OpNo += 2) { + if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) { + FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm(); + break; + } + } + + if (!FwdSubReg) + break; + + for (auto &Op : MRI->use_nodbg_operands(FwdReg)) { + if (Op.getSubReg() == FwdSubReg) + Uses.push_back(&Op); + } + RegSeqWithOpNos[&OrigMI].push_back(OpNo); + continue; + } + if (TII->isVOP3(OrigOp)) { if (!TII->hasVALU32BitEncoding(OrigOp)) { LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n"); @@ -447,14 +503,14 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { } LLVM_DEBUG(dbgs() << " combining: " << OrigMI); - if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) { + if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) { if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ)) { DPPMIs.push_back(DPPInst); Rollback = false; } } else if (OrigMI.isCommutable() && - &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { + Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { auto *BB = OrigMI.getParent(); auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI); BB->insert(OrigMI, NewMI); @@ -475,9 +531,22 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { OrigMIs.push_back(&OrigMI); } + Rollback |= !Uses.empty(); + for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs)) MI->eraseFromParent(); + if (!Rollback) { + for (auto &S : RegSeqWithOpNos) { + if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) { + S.first->eraseFromParent(); + continue; + } + while (!S.second.empty()) + S.first->getOperand(S.second.pop_back_val()).setIsUndef(true); + } + } + return !Rollback; } @@ -498,6 +567,13 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; + } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) { + auto Split = TII->expandMovDPP64(MI); + for (auto M : { Split.first, Split.second }) { + if (combineDPPMov(*M)) + ++NumDPPMovsCombined; + } + Changed = true; } } } diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 885239e2faed..9528aee4c50e 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -726,7 +726,7 @@ int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, if (!TRI->isVGPR(MRI, Def.getReg())) return WaitStatesNeeded; - unsigned Reg = Def.getReg(); + Register Reg = Def.getReg(); auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { int DataIdx = createsVALUHazard(*MI); return DataIdx >= 0 && @@ -792,7 +792,7 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) return 0; - unsigned LaneSelectReg = LaneSelectOp->getReg(); + Register LaneSelectReg = LaneSelectOp->getReg(); auto IsHazardFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; @@ -891,7 +891,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* // which is always a VGPR and available. auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); - unsigned Reg = Src0->getReg(); + Register Reg = Src0->getReg(); bool IsUndef = Src0->isUndef(); BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32)) @@ -952,6 +952,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { unsigned SDSTName; switch (MI->getOpcode()) { case AMDGPU::V_READLANE_B32: + case AMDGPU::V_READLANE_B32_gfx10: case AMDGPU::V_READFIRSTLANE_B32: SDSTName = AMDGPU::OpName::vdst; break; @@ -976,7 +977,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { if (!SDST) return false; - const unsigned SDSTReg = SDST->getReg(); + const Register SDSTReg = SDST->getReg(); auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) { return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI); }; @@ -1251,14 +1252,14 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7; const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15; const int MaxWaitStates = 18; - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); unsigned HazardDefLatency = 0; auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this] (MachineInstr *MI) { if (!IsMFMAFn(MI)) return false; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); if (DstReg == Reg) return false; HazardDefLatency = std::max(HazardDefLatency, @@ -1304,7 +1305,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) { if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32) return false; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); return TRI.regsOverlap(Reg, DstReg); }; @@ -1330,14 +1331,14 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5; const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13; const int MaxWaitStates = 13; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); unsigned HazardDefLatency = 0; auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this] (MachineInstr *MI) { if (!IsMFMAFn(MI)) return false; - unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); + Register Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); HazardDefLatency = std::max(HazardDefLatency, TSchedModel.computeInstrLatency(MI)); return TRI.regsOverlap(Reg, DstReg); @@ -1376,7 +1377,7 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg())) continue; - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); const int AccVgprReadLdStWaitStates = 2; const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1; diff --git a/lib/Target/AMDGPU/GCNILPSched.cpp b/lib/Target/AMDGPU/GCNILPSched.cpp index 1eb617640c32..39072af7d871 100644 --- a/lib/Target/AMDGPU/GCNILPSched.cpp +++ b/lib/Target/AMDGPU/GCNILPSched.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 3525174223bd..90ab6a14ce20 100644 --- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -237,7 +237,7 @@ public: GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, StrategyKind S) - : BaseClass(C, llvm::make_unique()) + : BaseClass(C, std::make_unique()) , Context(C) , Strategy(S) , UPTracker(*LIS) { diff --git a/lib/Target/AMDGPU/GCNNSAReassign.cpp b/lib/Target/AMDGPU/GCNNSAReassign.cpp index 51c4c99cfb18..36a8f74150f5 100644 --- a/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -173,11 +173,11 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { bool NSA = false; for (unsigned I = 0; I < Info->VAddrDwords; ++I) { const MachineOperand &Op = MI.getOperand(VAddr0Idx + I); - unsigned Reg = Op.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) + Register Reg = Op.getReg(); + if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) return NSA_Status::FIXED; - unsigned PhysReg = VRM->getPhys(Reg); + Register PhysReg = VRM->getPhys(Reg); if (!Fast) { if (!PhysReg) @@ -276,7 +276,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { SlotIndex MinInd, MaxInd; for (unsigned I = 0; I < Info->VAddrDwords; ++I) { const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); LiveInterval *LI = &LIS->getInterval(Reg); if (llvm::find(Intervals, LI) != Intervals.end()) { // Same register used, unable to make sequential diff --git a/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/lib/Target/AMDGPU/GCNRegBankReassign.cpp index f0d47eaa4ed1..2927d4eb745a 100644 --- a/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -230,7 +230,7 @@ private: public: Printable printReg(unsigned Reg, unsigned SubReg = 0) const { return Printable([Reg, SubReg, this](raw_ostream &OS) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { OS << llvm::printReg(Reg, TRI); return; } @@ -275,7 +275,7 @@ char GCNRegBankReassign::ID = 0; char &llvm::GCNRegBankReassignID = GCNRegBankReassign::ID; unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const { - assert (TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); unsigned Size = TRI->getRegSizeInBits(*RC); @@ -293,7 +293,7 @@ unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const { unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg, int Bank) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (!VRM->isAssignedReg(Reg)) return 0; @@ -364,7 +364,7 @@ unsigned GCNRegBankReassign::analyzeInst(const MachineInstr& MI, if (!Op.isReg() || Op.isUndef()) continue; - unsigned R = Op.getReg(); + Register R = Op.getReg(); if (TRI->hasAGPRs(TRI->getRegClassForReg(*MRI, R))) continue; @@ -420,12 +420,12 @@ unsigned GCNRegBankReassign::getOperandGatherWeight(const MachineInstr& MI, } bool GCNRegBankReassign::isReassignable(unsigned Reg) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) + if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) return false; const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); - unsigned PhysReg = VRM->getPhys(Reg); + Register PhysReg = VRM->getPhys(Reg); if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg) return false; @@ -654,7 +654,7 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) { } std::sort(BankStalls.begin(), BankStalls.end()); - unsigned OrigReg = VRM->getPhys(C.Reg); + Register OrigReg = VRM->getPhys(C.Reg); LRM->unassign(LI); while (!BankStalls.empty()) { BankStall BS = BankStalls.pop_back_val(); diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 39460fbd8a84..d593204cba05 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -40,7 +40,7 @@ void llvm::printLivesAt(SlotIndex SI, << *LIS.getInstructionFromIndex(SI); unsigned Num = 0; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - const unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + const unsigned Reg = Register::index2VirtReg(I); if (!LIS.hasInterval(Reg)) continue; const auto &LI = LIS.getInterval(Reg); @@ -84,7 +84,7 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1, unsigned GCNRegPressure::getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const auto RC = MRI.getRegClass(Reg); auto STI = static_cast(MRI.getTargetRegisterInfo()); return STI->isSGPRClass(RC) ? @@ -183,7 +183,8 @@ bool GCNRegPressure::less(const GCNSubtarget &ST, #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const { - OS << "VGPRs: " << getVGPRNum(); + OS << "VGPRs: " << Value[VGPR32] << ' '; + OS << "AGPRs: " << Value[AGPR32]; if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGPRNum()) << ')'; OS << ", SGPRs: " << getSGPRNum(); if (ST) OS << "(O" << ST->getOccupancyWithNumSGPRs(getSGPRNum()) << ')'; @@ -196,8 +197,7 @@ void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const { static LaneBitmask getDefRegMask(const MachineOperand &MO, const MachineRegisterInfo &MRI) { - assert(MO.isDef() && MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())); + assert(MO.isDef() && MO.isReg() && Register::isVirtualRegister(MO.getReg())); // We don't rely on read-undef flag because in case of tentative schedule // tracking it isn't set correctly yet. This works correctly however since @@ -210,8 +210,7 @@ static LaneBitmask getDefRegMask(const MachineOperand &MO, static LaneBitmask getUsedRegMask(const MachineOperand &MO, const MachineRegisterInfo &MRI, const LiveIntervals &LIS) { - assert(MO.isUse() && MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())); + assert(MO.isUse() && MO.isReg() && Register::isVirtualRegister(MO.getReg())); if (auto SubReg = MO.getSubReg()) return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); @@ -232,7 +231,7 @@ collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI) { SmallVector Res; for (const auto &MO : MI.operands()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; if (!MO.isUse() || !MO.readsReg()) continue; @@ -278,7 +277,7 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, const MachineRegisterInfo &MRI) { GCNRPTracker::LiveRegSet LiveRegs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - auto Reg = TargetRegisterInfo::index2VirtReg(I); + auto Reg = Register::index2VirtReg(I); if (!LIS.hasInterval(Reg)) continue; auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI); @@ -329,8 +328,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { MaxPressure = max(AtMIPressure, MaxPressure); for (const auto &MO : MI.defs()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()) || - MO.isDead()) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()) || MO.isDead()) continue; auto Reg = MO.getReg(); @@ -408,8 +406,8 @@ void GCNDownwardRPTracker::advanceToNext() { for (const auto &MO : LastTrackedMI->defs()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; auto &LiveMask = LiveRegs[Reg]; auto PrevMask = LiveMask; @@ -500,7 +498,7 @@ void GCNRPTracker::printLiveRegs(raw_ostream &OS, const LiveRegSet& LiveRegs, const MachineRegisterInfo &MRI) { const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); auto It = LiveRegs.find(Reg); if (It != LiveRegs.end() && It->second.any()) OS << ' ' << printVRegOrUnit(Reg, TRI) << ':' diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h index e4894418b943..5862cdb04166 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.h +++ b/lib/Target/AMDGPU/GCNRegPressure.h @@ -214,7 +214,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) { DenseMap LiveRegMap; SmallVector LiveIdxs, SRLiveIdxs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - auto Reg = TargetRegisterInfo::index2VirtReg(I); + auto Reg = Register::index2VirtReg(I); if (!LIS.hasInterval(Reg)) continue; auto &LI = LIS.getInterval(Reg); diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 4ea990ae490e..973491a70d3c 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -71,8 +71,8 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU // the tracker, so we need to pass those function a non-const copy. RegPressureTracker &TempTracker = const_cast(RPTracker); - std::vector Pressure; - std::vector MaxPressure; + Pressure.clear(); + MaxPressure.clear(); if (AtTop) TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); @@ -103,10 +103,10 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU // the analysis to look through dependencies to find the path with the least // register pressure. - // We only need to update the RPDelata for instructions that increase - // register pressure. Instructions that decrease or keep reg pressure - // the same will be marked as RegExcess in tryCandidate() when they - // are compared with instructions that increase the register pressure. + // We only need to update the RPDelta for instructions that increase register + // pressure. Instructions that decrease or keep reg pressure the same will be + // marked as RegExcess in tryCandidate() when they are compared with + // instructions that increase the register pressure. if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); @@ -160,6 +160,7 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(Zone.DAG, SchedModel); Cand.setBest(TryCand); + LLVM_DEBUG(traceCandidate(Cand)); } } } @@ -195,6 +196,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { assert(BotCand.Reason != NoCand && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(BotCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(CandPolicy()); + pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); + assert(TCand.SU == BotCand.SU && + "Last pick result should correspond to re-picking right now"); + } +#endif } // Check if the top Q has a better candidate. @@ -206,6 +216,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { assert(TopCand.Reason != NoCand && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(TopCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(CandPolicy()); + pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); + assert(TCand.SU == TopCand.SU && + "Last pick result should correspond to re-picking right now"); + } +#endif } // Pick best from BotCand and TopCand. diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h index eaf3dee9ba5d..dd687a930c79 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -40,6 +40,9 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler { const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure); + std::vector Pressure; + std::vector MaxPressure; + unsigned SGPRExcessLimit; unsigned VGPRExcessLimit; unsigned SGPRCriticalLimit; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 57c0ba26cc3a..1f94ab799122 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -109,7 +109,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, MCContext *Ctx) { int64_t SignedValue = static_cast(Value); - switch (static_cast(Fixup.getKind())) { + switch (Fixup.getTargetKind()) { case AMDGPU::fixup_si_sopp_br: { int64_t BrImm = (SignedValue - 4) / 4; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 6549a8d7d592..d352219a7a98 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -87,7 +87,7 @@ std::unique_ptr llvm::createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend, uint8_t ABIVersion) { - return llvm::make_unique(Is64Bit, OSABI, + return std::make_unique(Is64Bit, OSABI, HasRelocationAddend, ABIVersion); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 01b53432cbb7..a9888e6ed924 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -196,6 +196,10 @@ void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo, printNamedBit(MI, OpNo, O, "slc"); } +void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { +} + void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "tfe"); @@ -292,35 +296,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, } #endif - unsigned AltName = AMDGPU::Reg32; - - if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg64; - else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg128; - else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg96; - else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SReg_160RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg160; - else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg256; - else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg512; - else if (MRI.getRegClass(AMDGPU::VReg_1024RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SReg_1024RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg1024; - - O << getRegisterName(RegNo, AltName); + O << getRegisterName(RegNo); } void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, @@ -623,9 +599,11 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp8_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10: @@ -689,6 +667,7 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI, switch (MI->getOpcode()) { default: break; + case AMDGPU::V_CNDMASK_B32_sdwa_gfx10: case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10: diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index b544d1ef3605..66b70831ff9e 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -12,7 +12,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H -#include "AMDGPUMCTargetDesc.h" #include "llvm/MC/MCInstPrinter.h" namespace llvm { @@ -26,8 +25,7 @@ public: //Autogenerated by tblgen void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = AMDGPU::NoRegAltName); + static const char *getRegisterName(unsigned RegNo); void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) override; @@ -74,6 +72,8 @@ private: raw_ostream &O); void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 8f11433476f4..c15da8075a34 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -250,7 +250,7 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( bool AMDGPUTargetAsmStreamer::EmitCodeEnd() { const uint32_t Encoded_s_code_end = 0xbf9f0000; OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n'; - OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n'; + OS << "\t.fill 48, 4, " << Encoded_s_code_end << '\n'; return true; } @@ -602,7 +602,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd() { MCStreamer &OS = getStreamer(); OS.PushSection(); OS.EmitValueToAlignment(64, Encoded_s_code_end, 4); - for (unsigned I = 0; I < 32; ++I) + for (unsigned I = 0; I < 48; ++I) OS.EmitIntValue(Encoded_s_code_end, 4); OS.PopSection(); return true; diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index 4735e6cb2446..f33ad950d5d9 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -26,7 +26,7 @@ def MIMGEncoding : GenericEnum { // Represent an ISA-level opcode, independent of the encoding and the // vdata/vaddr size. -class MIMGBaseOpcode { +class MIMGBaseOpcode : PredicateControl { MIMGBaseOpcode BaseOpcode = !cast(NAME); bit Store = 0; bit Atomic = 0; @@ -291,7 +291,7 @@ multiclass MIMG_NoSampler_Src_Helper op, string asm, multiclass MIMG_NoSampler op, string asm, bit has_d16, bit mip = 0, bit isResInfo = 0> { - def "" : MIMGBaseOpcode, PredicateControl { + def "" : MIMGBaseOpcode { let Coordinates = !if(isResInfo, 0, 1); let LodOrClampOrMip = mip; let HasD16 = has_d16; diff --git a/lib/Target/AMDGPU/R600AsmPrinter.cpp b/lib/Target/AMDGPU/R600AsmPrinter.cpp index 3fb18862fca8..b29cd75f75cf 100644 --- a/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -104,7 +104,7 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Functions needs to be cacheline (256B) aligned. - MF.ensureAlignment(8); + MF.ensureAlignment(Align(256)); SetupMachineFunction(MF); diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 8098b81d1ea2..e4160ac11c86 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -303,7 +303,7 @@ private: if (!MO.isReg()) continue; if (MO.isDef()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (R600::R600_Reg128RegClass.contains(Reg)) DstMI = Reg; else @@ -312,7 +312,7 @@ private: &R600::R600_Reg128RegClass); } if (MO.isUse()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (R600::R600_Reg128RegClass.contains(Reg)) SrcMI = Reg; else diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index c6e8a060d8a0..fd75c41040e1 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -135,7 +135,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; for (unsigned Chan = 0; Chan < 4; ++Chan) { @@ -155,12 +155,12 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { unsigned Opcode = BMI->getOpcode(); // While not strictly necessary from hw point of view, we force // all src operands of a dot4 inst to belong to the same slot. - unsigned Src0 = BMI->getOperand( - TII->getOperandIdx(Opcode, R600::OpName::src0)) - .getReg(); - unsigned Src1 = BMI->getOperand( - TII->getOperandIdx(Opcode, R600::OpName::src1)) - .getReg(); + Register Src0 = + BMI->getOperand(TII->getOperandIdx(Opcode, R600::OpName::src0)) + .getReg(); + Register Src1 = + BMI->getOperand(TII->getOperandIdx(Opcode, R600::OpName::src1)) + .getReg(); (void) Src0; (void) Src1; if ((TRI.getEncodingValue(Src0) & 0xff) < 127 && @@ -205,10 +205,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { // T0_Z = CUBE T1_X, T1_Z // T0_W = CUBE T1_Y, T1_Z for (unsigned Chan = 0; Chan < 4; Chan++) { - unsigned DstReg = MI.getOperand( - TII->getOperandIdx(MI, R600::OpName::dst)).getReg(); - unsigned Src0 = MI.getOperand( - TII->getOperandIdx(MI, R600::OpName::src0)).getReg(); + Register DstReg = + MI.getOperand(TII->getOperandIdx(MI, R600::OpName::dst)).getReg(); + Register Src0 = + MI.getOperand(TII->getOperandIdx(MI, R600::OpName::src0)).getReg(); unsigned Src1 = 0; // Determine the correct source registers diff --git a/lib/Target/AMDGPU/R600FrameLowering.h b/lib/Target/AMDGPU/R600FrameLowering.h index 950e238f4979..283e4d1935ea 100644 --- a/lib/Target/AMDGPU/R600FrameLowering.h +++ b/lib/Target/AMDGPU/R600FrameLowering.h @@ -15,9 +15,9 @@ namespace llvm { class R600FrameLowering : public AMDGPUFrameLowering { public: - R600FrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1) : - AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} + R600FrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None()) + : AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} ~R600FrameLowering() override; void emitPrologue(MachineFunction &MF, diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index f80a53ba1dc6..659458b0b752 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -334,8 +335,8 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } case R600::MASK_WRITE: { - unsigned maskedRegister = MI.getOperand(0).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); + Register maskedRegister = MI.getOperand(0).getReg(); + assert(Register::isVirtualRegister(maskedRegister)); MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); TII->addFlag(*defInstr, 0, MO_FLAG_MASK); break; @@ -782,7 +783,7 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { return TrigVal; // On R600 hw, COS/SIN input must be between -Pi and Pi. return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, - DAG.getConstantFP(3.14159265359, DL, MVT::f32)); + DAG.getConstantFP(numbers::pif, DL, MVT::f32)); } SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index d9e839fe2035..04a5e93f6213 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -97,8 +97,8 @@ bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), E = MBBI->operands_end(); I != E; ++I) { - if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && - I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) + if (I->isReg() && !Register::isVirtualRegister(I->getReg()) && I->isUse() && + RI.isPhysRegLiveAcrossClauses(I->getReg())) return false; } return true; @@ -242,8 +242,7 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { for (MachineInstr::const_mop_iterator I = MI.operands_begin(), E = MI.operands_end(); I != E; ++I) { - if (!I->isReg() || !I->isUse() || - TargetRegisterInfo::isVirtualRegister(I->getReg())) + if (!I->isReg() || !I->isUse() || Register::isVirtualRegister(I->getReg())) continue; if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg())) @@ -294,7 +293,7 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const { for (unsigned j = 0; j < 8; j++) { MachineOperand &MO = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0])); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == R600::ALU_CONST) { MachineOperand &Sel = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); @@ -317,7 +316,7 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const { if (SrcIdx < 0) break; MachineOperand &MO = MI.getOperand(SrcIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == R600::ALU_CONST) { MachineOperand &Sel = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); @@ -348,7 +347,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI, unsigned i = 0; for (const auto &Src : getSrcs(MI)) { ++i; - unsigned Reg = Src.first->getReg(); + Register Reg = Src.first->getReg(); int Index = RI.getEncodingValue(Reg) & 0xff; if (Reg == R600::OQAP) { Result.push_back(std::make_pair(Index, 0U)); @@ -865,7 +864,7 @@ bool R600InstrInfo::isPredicated(const MachineInstr &MI) const { if (idx < 0) return false; - unsigned Reg = MI.getOperand(idx).getReg(); + Register Reg = MI.getOperand(idx).getReg(); switch (Reg) { default: return false; case R600::PRED_SEL_ONE: @@ -1038,7 +1037,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); + Register OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); if (OffsetReg == R600::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(), getIndirectAddrRegClass()->getRegister(Address)); @@ -1052,7 +1051,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); + Register OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); if (OffsetReg == R600::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), MI.getOperand(ValOpIdx).getReg()); @@ -1193,8 +1192,7 @@ int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); for (std::pair LI : MRI.liveins()) { unsigned Reg = LI.first; - if (TargetRegisterInfo::isVirtualRegister(Reg) || - !IndirectRC->contains(Reg)) + if (Register::isVirtualRegister(Reg) || !IndirectRC->contains(Reg)) continue; unsigned RegIndex; diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index 34267a909b5e..7569a2629539 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -183,7 +183,7 @@ isPhysicalRegCopy(MachineInstr *MI) { if (MI->getOpcode() != R600::COPY) return false; - return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()); + return !Register::isVirtualRegister(MI->getOperand(1).getReg()); } void R600SchedStrategy::releaseTopNode(SUnit *SU) { @@ -209,7 +209,7 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) { bool R600SchedStrategy::regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { return RC->contains(Reg); } else { return MRI->getRegClass(Reg) == RC; @@ -270,7 +270,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { } // Is the result already member of a X/Y/Z/W class ? - unsigned DestReg = MI->getOperand(0).getReg(); + Register DestReg = MI->getOperand(0).getReg(); if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) || regBelongsToClass(DestReg, &R600::R600_AddrRegClass)) return AluT_X; @@ -357,7 +357,7 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { if (DstIndex == -1) { return; } - unsigned DestReg = MI->getOperand(DstIndex).getReg(); + Register DestReg = MI->getOperand(DstIndex).getReg(); // PressureRegister crashes if an operand is def and used in the same inst // and we try to constraint its regclass for (MachineInstr::mop_iterator It = MI->operands_begin(), diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 9f1cb6582b5c..cec7f563f480 100644 --- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -58,7 +58,7 @@ using namespace llvm; static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { assert(MRI.isSSA()); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; const MachineInstr *MI = MRI.getUniqueVRegDef(Reg); return MI && MI->isImplicitDef(); @@ -197,17 +197,17 @@ unsigned getReassignedChan( MachineInstr *R600VectorRegMerger::RebuildVector( RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, const std::vector> &RemapChan) const { - unsigned Reg = RSI->Instr->getOperand(0).getReg(); + Register Reg = RSI->Instr->getOperand(0).getReg(); MachineBasicBlock::iterator Pos = RSI->Instr; MachineBasicBlock &MBB = *Pos->getParent(); DebugLoc DL = Pos->getDebugLoc(); - unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); + Register SrcVec = BaseRSI->Instr->getOperand(0).getReg(); DenseMap UpdatedRegToChan = BaseRSI->RegToChan; std::vector UpdatedUndef = BaseRSI->UndefReg; for (DenseMap::iterator It = RSI->RegToChan.begin(), E = RSI->RegToChan.end(); It != E; ++It) { - unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass); + Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass); unsigned SubReg = (*It).first; unsigned Swizzle = (*It).second; unsigned Chan = getReassignedChan(RemapChan, Swizzle); @@ -350,7 +350,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { MachineInstr &MI = *MII; if (MI.getOpcode() != R600::REG_SEQUENCE) { if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { - unsigned Reg = MI.getOperand(1).getReg(); + Register Reg = MI.getOperand(1).getReg(); for (MachineRegisterInfo::def_instr_iterator It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); It != E; ++It) { @@ -363,7 +363,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { RegSeqInfo RSI(*MRI, &MI); // All uses of MI are swizzeable ? - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); if (!areAllUsesSwizzeable(Reg)) continue; diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp index df200baf11c1..176269f9b68c 100644 --- a/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/lib/Target/AMDGPU/R600Packetizer.cpp @@ -90,7 +90,7 @@ private: if (DstIdx == -1) { continue; } - unsigned Dst = BI->getOperand(DstIdx).getReg(); + Register Dst = BI->getOperand(DstIdx).getReg(); if (isTrans || TII->isTransOnly(*BI)) { Result[Dst] = R600::PS; continue; @@ -136,7 +136,7 @@ private: int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); if (OperandIdx < 0) continue; - unsigned Src = MI.getOperand(OperandIdx).getReg(); + Register Src = MI.getOperand(OperandIdx).getReg(); const DenseMap::const_iterator It = PVs.find(Src); if (It != PVs.end()) MI.getOperand(OperandIdx).setReg(It->second); diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp index 685df74490fe..ef12c1d24594 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -93,7 +93,7 @@ const RegClassWeight &R600RegisterInfo::getRegClassWeight( } bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const { - assert(!TargetRegisterInfo::isVirtualRegister(Reg)); + assert(!Register::isVirtualRegister(Reg)); switch (Reg) { case R600::OQAP: diff --git a/lib/Target/AMDGPU/SIAddIMGInit.cpp b/lib/Target/AMDGPU/SIAddIMGInit.cpp index f8094e35816c..ee011286b8ff 100644 --- a/lib/Target/AMDGPU/SIAddIMGInit.cpp +++ b/lib/Target/AMDGPU/SIAddIMGInit.cpp @@ -129,7 +129,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { continue; // Create a register for the intialization value. - unsigned PrevDst = + Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); unsigned NewDst = 0; // Final initialized value will be in here @@ -150,7 +150,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { NewDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); // Initialize dword - unsigned SubReg = + Register SubReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) .addImm(0); diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index a0e1ec6ac235..23ef56afc39c 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -99,7 +99,10 @@ enum : uint64_t { FPAtomic = UINT64_C(1) << 53, // Is a MFMA instruction. - IsMAI = UINT64_C(1) << 54 + IsMAI = UINT64_C(1) << 54, + + // Is a DOT instruction. + IsDOT = UINT64_C(1) << 55 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. @@ -444,6 +447,7 @@ namespace DPP { enum DppCtrl : unsigned { QUAD_PERM_FIRST = 0, + QUAD_PERM_ID = 0xE4, // identity permutation QUAD_PERM_LAST = 0xFF, DPP_UNUSED1 = 0x100, ROW_SHL0 = 0x100, diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 624953963cf4..65286751c12d 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -113,10 +113,16 @@ class SIFixSGPRCopies : public MachineFunctionPass { public: static char ID; + MachineRegisterInfo *MRI; + const SIRegisterInfo *TRI; + const SIInstrInfo *TII; + SIFixSGPRCopies() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; + void processPHINode(MachineInstr &MI); + StringRef getPassName() const override { return "SI Fix SGPR copies"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -148,7 +154,7 @@ static bool hasVectorOperands(const MachineInstr &MI, const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (!MI.getOperand(i).isReg() || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) + !Register::isVirtualRegister(MI.getOperand(i).getReg())) continue; if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg()))) @@ -161,21 +167,19 @@ static std::pair getCopyRegClasses(const MachineInstr &Copy, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) { - unsigned DstReg = Copy.getOperand(0).getReg(); - unsigned SrcReg = Copy.getOperand(1).getReg(); + Register DstReg = Copy.getOperand(0).getReg(); + Register SrcReg = Copy.getOperand(1).getReg(); - const TargetRegisterClass *SrcRC = - TargetRegisterInfo::isVirtualRegister(SrcReg) ? - MRI.getRegClass(SrcReg) : - TRI.getPhysRegClass(SrcReg); + const TargetRegisterClass *SrcRC = Register::isVirtualRegister(SrcReg) + ? MRI.getRegClass(SrcReg) + : TRI.getPhysRegClass(SrcReg); // We don't really care about the subregister here. // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); - const TargetRegisterClass *DstRC = - TargetRegisterInfo::isVirtualRegister(DstReg) ? - MRI.getRegClass(DstReg) : - TRI.getPhysRegClass(DstReg); + const TargetRegisterClass *DstRC = Register::isVirtualRegister(DstReg) + ? MRI.getRegClass(DstReg) + : TRI.getPhysRegClass(DstReg); return std::make_pair(SrcRC, DstRC); } @@ -199,10 +203,10 @@ static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIInstrInfo *TII) { MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); auto &Src = MI.getOperand(1); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = Src.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - !TargetRegisterInfo::isVirtualRegister(DstReg)) + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = Src.getReg(); + if (!Register::isVirtualRegister(SrcReg) || + !Register::isVirtualRegister(DstReg)) return false; for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) { @@ -238,7 +242,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, MachineRegisterInfo &MRI) { assert(MI.isRegSequence()); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) return false; @@ -250,7 +254,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return false; // It is illegal to have vreg inputs to a physreg defining reg_sequence. - if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg())) + if (Register::isPhysicalRegister(CopyUse.getOperand(0).getReg())) return false; const TargetRegisterClass *SrcRC, *DstRC; @@ -281,7 +285,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, bool IsAGPR = TRI->hasAGPRs(DstRC); for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { - unsigned SrcReg = MI.getOperand(I).getReg(); + Register SrcReg = MI.getOperand(I).getReg(); unsigned SrcSubReg = MI.getOperand(I).getSubReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); @@ -291,7 +295,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); - unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); + Register TmpReg = MRI.createVirtualRegister(NewSrcRC); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg) @@ -299,7 +303,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, if (IsAGPR) { const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC); - unsigned TmpAReg = MRI.createVirtualRegister(NewSrcRC); + Register TmpAReg = MRI.createVirtualRegister(NewSrcRC); unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ? AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY; BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc), @@ -315,52 +319,6 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return true; } -static bool phiHasVGPROperands(const MachineInstr &PHI, - const MachineRegisterInfo &MRI, - const SIRegisterInfo *TRI, - const SIInstrInfo *TII) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { - unsigned Reg = PHI.getOperand(i).getReg(); - if (TRI->hasVGPRs(MRI.getRegClass(Reg))) - return true; - } - return false; -} - -static bool phiHasBreakDef(const MachineInstr &PHI, - const MachineRegisterInfo &MRI, - SmallSet &Visited) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { - unsigned Reg = PHI.getOperand(i).getReg(); - if (Visited.count(Reg)) - continue; - - Visited.insert(Reg); - - MachineInstr *DefInstr = MRI.getVRegDef(Reg); - switch (DefInstr->getOpcode()) { - default: - break; - case AMDGPU::SI_IF_BREAK: - return true; - case AMDGPU::PHI: - if (phiHasBreakDef(*DefInstr, MRI, Visited)) - return true; - } - } - return false; -} - -static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, - const TargetRegisterInfo &TRI) { - for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(), - E = MBB.end(); I != E; ++I) { - if (I->modifiesRegister(AMDGPU::EXEC, &TRI)) - return true; - } - return false; -} - static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, @@ -422,12 +380,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB, return false; } -static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI) { - return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { - return hasTerminatorThatModifiesExec(*MBB, *TRI); }); -} - // Checks if there is potential path From instruction To instruction. // If CutOff is specified and it sits in between of that path we ignore // a higher portion of the path and report it is not reachable. @@ -468,6 +420,7 @@ getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) { // executioon. static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo *TRI, MachineDominatorTree &MDT, const TargetInstrInfo *TII) { // List of inits by immediate value. @@ -482,7 +435,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, for (auto &MI : MRI.def_instructions(Reg)) { MachineOperand *Imm = nullptr; - for (auto &MO: MI.operands()) { + for (auto &MO : MI.operands()) { if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) || (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) { Imm = nullptr; @@ -587,8 +540,44 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, } } - for (auto MI : MergedInstrs) - MI->removeFromParent(); + // Remove initializations that were merged into another. + for (auto &Init : Inits) { + auto &Defs = Init.second; + auto I = Defs.begin(); + while (I != Defs.end()) { + if (MergedInstrs.count(*I)) { + (*I)->eraseFromParent(); + I = Defs.erase(I); + } else + ++I; + } + } + + // Try to schedule SGPR initializations as early as possible in the MBB. + for (auto &Init : Inits) { + auto &Defs = Init.second; + for (auto MI : Defs) { + auto MBB = MI->getParent(); + MachineInstr &BoundaryMI = *getFirstNonPrologue(MBB, TII); + MachineBasicBlock::reverse_iterator B(BoundaryMI); + // Check if B should actually be a boundary. If not set the previous + // instruction as the boundary instead. + if (!TII->isBasicBlockPrologue(*B)) + B++; + + auto R = std::next(MI->getReverseIterator()); + const unsigned Threshold = 50; + // Search until B or Threshold for a place to insert the initialization. + for (unsigned I = 0; R != B && I < Threshold; ++R, ++I) + if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) || + TII->isSchedulingBoundary(*R, MBB, *MBB->getParent())) + break; + + // Move to directly after R. + if (&*--R != MI) + MBB->splice(*R, MBB, MI); + } + } if (Changed) MRI.clearKillFlags(Reg); @@ -598,9 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + TRI = ST.getRegisterInfo(); + TII = ST.getInstrInfo(); MDT = &getAnalysis(); SmallVector Worklist; @@ -617,22 +606,39 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { continue; case AMDGPU::COPY: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: { - // If the destination register is a physical register there isn't really - // much we can do to fix this. - if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) - continue; + Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *SrcRC, *DstRC; - std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); + std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI); + + if (!Register::isVirtualRegister(DstReg)) { + // If the destination register is a physical register there isn't + // really much we can do to fix this. + // Some special instructions use M0 as an input. Some even only use + // the first lane. Insert a readfirstlane and hope for the best. + if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) { + Register TmpReg + = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + BuildMI(MBB, MI, MI.getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg) + .add(MI.getOperand(1)); + MI.getOperand(1).setReg(TmpReg); + } + + continue; + } + if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) { TII->moveToVALU(MI, MDT); break; } - MachineInstr *DefMI = MRI.getVRegDef(SrcReg); + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); unsigned SMovOp; int64_t Imm; // If we are just copying an immediate, we can replace the copy with @@ -651,70 +657,13 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; } case AMDGPU::PHI: { - unsigned Reg = MI.getOperand(0).getReg(); - if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) - break; - - // We don't need to fix the PHI if the common dominator of the - // two incoming blocks terminates with a uniform branch. - bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); - if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) { - MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); - MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); - - if (!predsHasDivergentTerminator(MBB0, TRI) && - !predsHasDivergentTerminator(MBB1, TRI)) { - LLVM_DEBUG(dbgs() - << "Not fixing PHI for uniform branch: " << MI << '\n'); - break; - } - } - - // If a PHI node defines an SGPR and any of its operands are VGPRs, - // then we need to move it to the VALU. - // - // Also, if a PHI node defines an SGPR and has all SGPR operands - // we must move it to the VALU, because the SGPR operands will - // all end up being assigned the same register, which means - // there is a potential for a conflict if different threads take - // different control flow paths. - // - // For Example: - // - // sgpr0 = def; - // ... - // sgpr1 = def; - // ... - // sgpr2 = PHI sgpr0, sgpr1 - // use sgpr2; - // - // Will Become: - // - // sgpr2 = def; - // ... - // sgpr2 = def; - // ... - // use sgpr2 - // - // The one exception to this rule is when one of the operands - // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK - // instruction. In this case, there we know the program will - // never enter the second block (the loop) without entering - // the first block (where the condition is computed), so there - // is no chance for values to be over-written. - - SmallSet Visited; - if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { - LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); - TII->moveToVALU(MI, MDT); - } - + processPHINode(MI); break; } case AMDGPU::REG_SEQUENCE: if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) || !hasVectorOperands(MI, TRI)) { - foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); + foldVGPRCopyIntoRegSequence(MI, TRI, TII, *MRI); continue; } @@ -724,9 +673,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; case AMDGPU::INSERT_SUBREG: { const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; - DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); - Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); - Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); + DstRC = MRI->getRegClass(MI.getOperand(0).getReg()); + Src0RC = MRI->getRegClass(MI.getOperand(1).getReg()); + Src1RC = MRI->getRegClass(MI.getOperand(2).getReg()); if (TRI->isSGPRClass(DstRC) && (TRI->hasVectorRegisters(Src0RC) || TRI->hasVectorRegisters(Src1RC))) { @@ -735,12 +684,159 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } break; } + case AMDGPU::V_WRITELANE_B32: { + // Some architectures allow more than one constant bus access without + // SGPR restriction + if (ST.getConstantBusLimit(MI.getOpcode()) != 1) + break; + + // Writelane is special in that it can use SGPR and M0 (which would + // normally count as using the constant bus twice - but in this case it + // is allowed since the lane selector doesn't count as a use of the + // constant bus). However, it is still required to abide by the 1 SGPR + // rule. Apply a fix here as we might have multiple SGPRs after + // legalizing VGPRs to SGPRs + int Src0Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); + int Src1Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1); + MachineOperand &Src0 = MI.getOperand(Src0Idx); + MachineOperand &Src1 = MI.getOperand(Src1Idx); + + // Check to see if the instruction violates the 1 SGPR rule + if ((Src0.isReg() && TRI->isSGPRReg(*MRI, Src0.getReg()) && + Src0.getReg() != AMDGPU::M0) && + (Src1.isReg() && TRI->isSGPRReg(*MRI, Src1.getReg()) && + Src1.getReg() != AMDGPU::M0)) { + + // Check for trivially easy constant prop into one of the operands + // If this is the case then perform the operation now to resolve SGPR + // issue. If we don't do that here we will always insert a mov to m0 + // that can't be resolved in later operand folding pass + bool Resolved = false; + for (MachineOperand *MO : {&Src0, &Src1}) { + if (Register::isVirtualRegister(MO->getReg())) { + MachineInstr *DefMI = MRI->getVRegDef(MO->getReg()); + if (DefMI && TII->isFoldableCopy(*DefMI)) { + const MachineOperand &Def = DefMI->getOperand(0); + if (Def.isReg() && + MO->getReg() == Def.getReg() && + MO->getSubReg() == Def.getSubReg()) { + const MachineOperand &Copied = DefMI->getOperand(1); + if (Copied.isImm() && + TII->isInlineConstant(APInt(64, Copied.getImm(), true))) { + MO->ChangeToImmediate(Copied.getImm()); + Resolved = true; + break; + } + } + } + } + } + + if (!Resolved) { + // Haven't managed to resolve by replacing an SGPR with an immediate + // Move src1 to be in M0 + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + TII->get(AMDGPU::COPY), AMDGPU::M0) + .add(Src1); + Src1.ChangeToRegister(AMDGPU::M0, false); + } + } + break; + } } } } if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) - hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII); + hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII); return true; } + +void SIFixSGPRCopies::processPHINode(MachineInstr &MI) { + unsigned numVGPRUses = 0; + bool AllAGPRUses = true; + SetVector worklist; + SmallSet Visited; + worklist.insert(&MI); + Visited.insert(&MI); + while (!worklist.empty()) { + const MachineInstr *Instr = worklist.pop_back_val(); + unsigned Reg = Instr->getOperand(0).getReg(); + for (const auto &Use : MRI->use_operands(Reg)) { + const MachineInstr *UseMI = Use.getParent(); + AllAGPRUses &= (UseMI->isCopy() && + TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg())) || + TRI->isAGPR(*MRI, Use.getReg()); + if (UseMI->isCopy() || UseMI->isRegSequence()) { + if (UseMI->isCopy() && + UseMI->getOperand(0).getReg().isPhysical() && + !TRI->isSGPRReg(*MRI, UseMI->getOperand(0).getReg())) { + numVGPRUses++; + } + if (Visited.insert(UseMI).second) + worklist.insert(UseMI); + + continue; + } + + if (UseMI->isPHI()) { + const TargetRegisterClass *UseRC = MRI->getRegClass(Use.getReg()); + if (!TRI->isSGPRReg(*MRI, Use.getReg()) && + UseRC != &AMDGPU::VReg_1RegClass) + numVGPRUses++; + continue; + } + + const TargetRegisterClass *OpRC = + TII->getOpRegClass(*UseMI, UseMI->getOperandNo(&Use)); + if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass && + OpRC != &AMDGPU::VS_64RegClass) { + numVGPRUses++; + } + } + } + + Register PHIRes = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes); + if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) { + LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI); + MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0)); + } + + bool hasVGPRInput = false; + for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { + unsigned InputReg = MI.getOperand(i).getReg(); + MachineInstr *Def = MRI->getVRegDef(InputReg); + if (TRI->isVectorRegister(*MRI, InputReg)) { + if (Def->isCopy()) { + unsigned SrcReg = Def->getOperand(1).getReg(); + const TargetRegisterClass *RC = + TRI->getRegClassForReg(*MRI, SrcReg); + if (TRI->isSGPRClass(RC)) + continue; + } + hasVGPRInput = true; + break; + } + else if (Def->isCopy() && + TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) { + hasVGPRInput = true; + break; + } + } + + if ((!TRI->isVectorRegister(*MRI, PHIRes) && + RC0 != &AMDGPU::VReg_1RegClass) && + (hasVGPRInput || numVGPRUses > 1)) { + LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); + TII->moveToVALU(MI); + } + else { + LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI); + TII->legalizeOperands(MI, MDT); + } + +} diff --git a/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/lib/Target/AMDGPU/SIFixupVectorISel.cpp index 5b834c8de13a..a0119297b112 100644 --- a/lib/Target/AMDGPU/SIFixupVectorISel.cpp +++ b/lib/Target/AMDGPU/SIFixupVectorISel.cpp @@ -91,8 +91,7 @@ static bool findSRegBaseAndIndex(MachineOperand *Op, Worklist.push_back(Op); while (!Worklist.empty()) { MachineOperand *WOp = Worklist.pop_back_val(); - if (!WOp->isReg() || - !TargetRegisterInfo::isVirtualRegister(WOp->getReg())) + if (!WOp->isReg() || !Register::isVirtualRegister(WOp->getReg())) continue; MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); switch (DefInst->getOpcode()) { diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index 74d77d328019..4eac03168760 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -142,16 +142,20 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII, switch (Opc) { case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_F16_e64: - case AMDGPU::V_FMAC_F32_e64: { + case AMDGPU::V_FMAC_F32_e64: + case AMDGPU::V_FMAC_F16_e64: { // Special case for mac. Since this is replaced with mad when folded into // src2, we need to check the legality for the final instruction. int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (static_cast(OpNo) == Src2Idx) { - bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64; - bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64; + bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F16_e64; + bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F32_e64; unsigned Opc = IsFMA ? - AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); + (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) : + (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); const MCInstrDesc &MadDesc = TII->get(Opc); return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType); } @@ -235,9 +239,11 @@ static bool updateOperand(FoldCandidate &Fold, if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) { MachineBasicBlock *MBB = MI->getParent(); - auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); - if (Liveness != MachineBasicBlock::LQR_Dead) + auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16); + if (Liveness != MachineBasicBlock::LQR_Dead) { + LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n"); return false; + } MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); int Op32 = Fold.getShrinkOpcode(); @@ -248,7 +254,7 @@ static bool updateOperand(FoldCandidate &Fold, bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); - unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); + Register NewReg0 = MRI.createVirtualRegister(Dst0RC); MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); @@ -314,12 +320,15 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2 unsigned Opc = MI->getOpcode(); if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F32_e64) && + Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) && (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { - bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64; - bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64; + bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F16_e64; + bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F32_e64; unsigned NewOpc = IsFMA ? - AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); + (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) : + (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); // Check if changing this to a v_mad_{f16, f32} instruction will allow us // to fold the operand. @@ -435,7 +444,8 @@ static bool tryToFoldACImm(const SIInstrInfo *TII, OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST) return false; - if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy)) { + if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) && + TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) { UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm()); return true; } @@ -443,8 +453,8 @@ static bool tryToFoldACImm(const SIInstrInfo *TII, if (!OpToFold.isReg()) return false; - unsigned UseReg = OpToFold.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(UseReg)) + Register UseReg = OpToFold.getReg(); + if (!Register::isVirtualRegister(UseReg)) return false; if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) { @@ -481,6 +491,9 @@ static bool tryToFoldACImm(const SIInstrInfo *TII, return false; // Can only fold splat constants } + if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op)) + return false; + FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op)); return true; } @@ -518,7 +531,7 @@ void SIFoldOperands::foldOperand( // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. if (UseMI->isRegSequence()) { - unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); + Register RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); MachineRegisterInfo::use_iterator Next; @@ -569,15 +582,18 @@ void SIFoldOperands::foldOperand( OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); if (FoldingImmLike && UseMI->isCopy()) { - unsigned DestReg = UseMI->getOperand(0).getReg(); - const TargetRegisterClass *DestRC - = TargetRegisterInfo::isVirtualRegister(DestReg) ? - MRI->getRegClass(DestReg) : - TRI->getPhysRegClass(DestReg); - - unsigned SrcReg = UseMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + Register DestReg = UseMI->getOperand(0).getReg(); + + // Don't fold into a copy to a physical register. Doing so would interfere + // with the register coalescer's logic which would avoid redundant + // initalizations. + if (DestReg.isPhysical()) + return; + + const TargetRegisterClass *DestRC = MRI->getRegClass(DestReg); + + Register SrcReg = UseMI->getOperand(1).getReg(); + if (SrcReg.isVirtual()) { // XXX - This can be an assert? const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg); if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) { MachineRegisterInfo::use_iterator NextUse; @@ -613,10 +629,17 @@ void SIFoldOperands::foldOperand( return; UseMI->setDesc(TII->get(MovOp)); + MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin(); + MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end(); + while (ImpOpI != ImpOpE) { + MachineInstr::mop_iterator Tmp = ImpOpI; + ImpOpI++; + UseMI->RemoveOperand(UseMI->getOperandNo(Tmp)); + } CopiesToReplace.push_back(UseMI); } else { if (UseMI->isCopy() && OpToFold.isReg() && - TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) && + Register::isVirtualRegister(UseMI->getOperand(0).getReg()) && TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) && TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) && !UseMI->getOperand(1).getSubReg()) { @@ -677,6 +700,9 @@ void SIFoldOperands::foldOperand( // => // %sgpr1 = COPY %sgpr0 UseMI->setDesc(TII->get(AMDGPU::COPY)); + UseMI->getOperand(1).setReg(OpToFold.getReg()); + UseMI->getOperand(1).setSubReg(OpToFold.getSubReg()); + UseMI->getOperand(1).setIsKill(false); UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane) return; } @@ -708,7 +734,7 @@ void SIFoldOperands::foldOperand( // Split 64-bit constants into 32-bits for folding. if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) { - unsigned UseReg = UseOp.getReg(); + Register UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg); if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64) @@ -810,7 +836,7 @@ static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI, if (Op.isReg()) { // If this has a subregister, it obviously is a register source. if (Op.getSubReg() != AMDGPU::NoSubRegister || - !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + !Register::isVirtualRegister(Op.getReg())) return &Op; MachineInstr *Def = MRI.getVRegDef(Op.getReg()); @@ -1073,6 +1099,13 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, Copy->addImplicitDefUseOperands(*MF); for (FoldCandidate &Fold : FoldList) { + if (Fold.isReg() && Register::isVirtualRegister(Fold.OpToFold->getReg())) { + Register Reg = Fold.OpToFold->getReg(); + MachineInstr *DefMI = Fold.OpToFold->getParent(); + if (DefMI->readsRegister(AMDGPU::EXEC, TRI) && + execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI)) + continue; + } if (updateOperand(Fold, *TII, *TRI, *ST)) { // Clear kill flags. if (Fold.isReg()) { @@ -1316,6 +1349,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; + + MachineOperand *CurrentKnownM0Val = nullptr; for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; @@ -1328,6 +1363,25 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) tryFoldClamp(MI); + + // Saw an unknown clobber of m0, so we no longer know what it is. + if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) + CurrentKnownM0Val = nullptr; + continue; + } + + // Specially track simple redefs of m0 to the same value in a block, so we + // can erase the later ones. + if (MI.getOperand(0).getReg() == AMDGPU::M0) { + MachineOperand &NewM0Val = MI.getOperand(1); + if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { + MI.eraseFromParent(); + continue; + } + + // We aren't tracking other physical registers + CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ? + nullptr : &NewM0Val; continue; } @@ -1339,8 +1393,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (!FoldingImm && !OpToFold.isReg()) continue; - if (OpToFold.isReg() && - !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg())) + if (OpToFold.isReg() && !Register::isVirtualRegister(OpToFold.getReg())) continue; // Prevent folding operands backwards in the function. For example, @@ -1350,8 +1403,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // ... // %vgpr0 = V_MOV_B32_e32 1, implicit %exec MachineOperand &Dst = MI.getOperand(0); - if (Dst.isReg() && - !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) + if (Dst.isReg() && !Register::isVirtualRegister(Dst.getReg())) continue; foldInstOperand(MI, OpToFold); diff --git a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index f3c9ad63a80a..26bae5734df7 100644 --- a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -120,7 +120,7 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) { return false; // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it. for (const MachineOperand &ResMO : MI.defs()) { - unsigned ResReg = ResMO.getReg(); + Register ResReg = ResMO.getReg(); for (const MachineOperand &MO : MI.uses()) { if (!MO.isReg() || MO.isDef()) continue; @@ -144,7 +144,7 @@ static unsigned getMopState(const MachineOperand &MO) { S |= RegState::Kill; if (MO.isEarlyClobber()) S |= RegState::EarlyClobber; - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && MO.isRenamable()) + if (Register::isPhysicalRegister(MO.getReg()) && MO.isRenamable()) S |= RegState::Renamable; return S; } @@ -152,7 +152,7 @@ static unsigned getMopState(const MachineOperand &MO) { template void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask, Callable Func) const { - if (LaneMask.all() || TargetRegisterInfo::isPhysicalRegister(Reg) || + if (LaneMask.all() || Register::isPhysicalRegister(Reg) || LaneMask == MRI->getMaxLaneMaskForVReg(Reg)) { Func(0); return; @@ -216,7 +216,7 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If it is tied we will need to write same register as we read. if (MO.isTied()) @@ -227,7 +227,7 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, if (Conflict == Map.end()) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); @@ -265,13 +265,13 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - LaneBitmask Mask = TargetRegisterInfo::isVirtualRegister(Reg) ? - TRI->getSubRegIndexLaneMask(MO.getSubReg()) : - LaneBitmask::getAll(); + LaneBitmask Mask = Register::isVirtualRegister(Reg) + ? TRI->getSubRegIndexLaneMask(MO.getSubReg()) + : LaneBitmask::getAll(); RegUse &Map = MO.isDef() ? Defs : Uses; auto Loc = Map.find(Reg); @@ -389,7 +389,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { for (auto &&R : Defs) { unsigned Reg = R.first; Uses.erase(Reg); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; LIS->removeInterval(Reg); LIS->createAndComputeVirtRegInterval(Reg); @@ -397,7 +397,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { for (auto &&R : Uses) { unsigned Reg = R.first; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; LIS->removeInterval(Reg); LIS->createAndComputeVirtRegInterval(Reg); diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index feab6bed2603..ed07ed100a19 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -112,6 +112,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); return; } @@ -132,6 +133,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); } @@ -157,6 +159,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); return; } @@ -177,6 +180,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); } @@ -202,15 +206,15 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST, DebugLoc DL; MachineBasicBlock::iterator I = MBB.begin(); - unsigned FlatScratchInitReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); + Register FlatScratchInitReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); MachineRegisterInfo &MRI = MF.getRegInfo(); MRI.addLiveIn(FlatScratchInitReg); MBB.addLiveIn(FlatScratchInitReg); - unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); - unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); + Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); + Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); @@ -424,8 +428,8 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); // We need to insert initialization of the scratch resource descriptor. - unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( - AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); + Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( + AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; if (ST.isAmdHsaOrMesa(F)) { @@ -539,9 +543,9 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, if (ST.isAmdPalOS()) { // The pointer to the GIT is formed from the offset passed in and either // the amdgpu-git-ptr-high function attribute or the top part of the PC - unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); - unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); @@ -601,14 +605,14 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, assert(!ST.isAmdHsaOrMesa(Fn)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); - unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); - unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); + Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); if (MFI->hasImplicitBufferPtr()) { - unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); @@ -640,8 +644,8 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); } } else { - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); BuildMI(MBB, I, DL, SMovB32, Rsrc0) .addExternalSymbol("SCRATCH_RSRC_DWORD0") @@ -669,6 +673,8 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { case TargetStackID::NoAlloc: case TargetStackID::SGPRSpill: return true; + case TargetStackID::SVEVector: + return false; } llvm_unreachable("Invalid TargetStackID::Value"); } diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index c644f4726e2c..d9970fd6b4b8 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -20,9 +20,9 @@ class GCNSubtarget; class SIFrameLowering final : public AMDGPUFrameLowering { public: - SIFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1) : - AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} + SIFrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None()) + : AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} ~SIFrameLowering() override = default; void emitEntryFunctionPrologue(MachineFunction &MF, diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index db0782e2bf3e..56ebf9c06741 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -20,11 +20,11 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -35,6 +35,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/DAGCombine.h" @@ -44,6 +45,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" @@ -115,7 +117,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass); + addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass); addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); @@ -125,10 +127,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass); addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass); - addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass); - addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass); + addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass); @@ -141,12 +143,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass); if (Subtarget->has16BitInsts()) { - addRegisterClass(MVT::i16, &AMDGPU::SReg_32_XM0RegClass); - addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass); + addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); // Unless there are also VOP3P operations, not operations are really legal. - addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass); - addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass); + addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass); } @@ -178,6 +180,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v32i32, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); + setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); @@ -215,31 +218,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v3i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); - - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); - - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); - setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); @@ -653,6 +635,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FADD, MVT::v4f16, Custom); setOperationAction(ISD::FMUL, MVT::v4f16, Custom); + setOperationAction(ISD::FMA, MVT::v4f16, Custom); setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom); setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom); @@ -687,6 +670,33 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); } + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); + + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2i16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4i16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v4i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::f16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); + setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::ADDCARRY); setTargetDAGCombine(ISD::SUB); @@ -768,19 +778,22 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef, EVT) const { MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // TODO: Consider splitting all arguments into 32-bit pieces. - if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + if (CC == CallingConv::AMDGPU_KERNEL) + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + + if (VT.isVector()) { EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); if (Size == 32) return ScalarVT.getSimpleVT(); - if (Size == 64) + if (Size > 32) return MVT::i32; if (Size == 16 && Subtarget->has16BitInsts()) return VT.isInteger() ? MVT::v2i16 : MVT::v2f16; - } + } else if (VT.getSizeInBits() > 32) + return MVT::i32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } @@ -788,7 +801,10 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + if (CC == CallingConv::AMDGPU_KERNEL) + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); + + if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); @@ -796,12 +812,13 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, if (Size == 32) return NumElts; - if (Size == 64) - return 2 * NumElts; + if (Size > 32) + return NumElts * ((Size + 31) / 32); if (Size == 16 && Subtarget->has16BitInsts()) - return (VT.getVectorNumElements() + 1) / 2; - } + return (NumElts + 1) / 2; + } else if (VT.getSizeInBits() > 32) + return (VT.getSizeInBits() + 31) / 32; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } @@ -821,10 +838,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv( return NumIntermediates; } - if (Size == 64) { + if (Size > 32) { RegisterVT = MVT::i32; IntermediateVT = RegisterVT; - NumIntermediates = 2 * NumElts; + NumIntermediates = NumElts * ((Size + 31) / 32); return NumIntermediates; } @@ -901,7 +918,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = MFI->getImagePSV( *MF.getSubtarget().getInstrInfo(), CI.getArgOperand(RsrcIntr->RsrcArg)); - Info.align = 0; + Info.align.reset(); } else { Info.ptrVal = MFI->getBufferPSV( *MF.getSubtarget().getInstrInfo(), @@ -947,7 +964,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); - Info.align = 0; + Info.align.reset(); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = cast(CI.getOperand(4)); @@ -964,7 +981,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = MFI->getBufferPSV( *MF.getSubtarget().getInstrInfo(), CI.getArgOperand(1)); - Info.align = 0; + Info.align.reset(); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = dyn_cast(CI.getOperand(4)); @@ -978,7 +995,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getOperand(0)->getType() ->getPointerElementType()); Info.ptrVal = CI.getOperand(0); - Info.align = 0; + Info.align.reset(); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; return true; @@ -988,7 +1005,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); - Info.align = 0; + Info.align.reset(); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = cast(CI.getOperand(1)); @@ -1012,7 +1029,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // This is an abstract access, but we need to specify a type and size. Info.memVT = MVT::i32; Info.size = 4; - Info.align = 4; + Info.align = Align(4); Info.flags = MachineMemOperand::MOStore; if (IntrID == Intrinsic::amdgcn_ds_gws_barrier) @@ -1215,21 +1232,12 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, return true; } -bool SITargetLowering::allowsMisalignedMemoryAccesses( - EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, - bool *IsFast) const { +bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( + unsigned Size, unsigned AddrSpace, unsigned Align, + MachineMemOperand::Flags Flags, bool *IsFast) const { if (IsFast) *IsFast = false; - // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, - // which isn't a simple VT. - // Until MVT is extended to handle this, simply check for the size and - // rely on the condition below: allow accesses if the size is a multiple of 4. - if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 && - VT.getStoreSize() > 16)) { - return false; - } - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS) { // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte @@ -1268,7 +1276,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses( } // Smaller than dword value must be aligned. - if (VT.bitsLT(MVT::i32)) + if (Size < 32) return false; // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the @@ -1277,7 +1285,26 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses( if (IsFast) *IsFast = true; - return VT.bitsGT(MVT::i32) && Align % 4 == 0; + return Size >= 32 && Align >= 4; +} + +bool SITargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *IsFast) const { + if (IsFast) + *IsFast = false; + + // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, + // which isn't a simple VT. + // Until MVT is extended to handle this, simply check for the size and + // rely on the condition below: allow accesses if the size is a multiple of 4. + if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 && + VT.getStoreSize() > 16)) { + return false; + } + + return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, + Align, Flags, IsFast); } EVT SITargetLowering::getOptimalMemOpType( @@ -1336,9 +1363,9 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const { TargetLoweringBase::LegalizeTypeAction SITargetLowering::getPreferredVectorAction(MVT VT) const { - if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16)) - return TypeSplitVector; - + int NumElts = VT.getVectorNumElements(); + if (NumElts != 1 && VT.getScalarType().bitsLE(MVT::i16)) + return VT.isPow2VectorType() ? TypeSplitVector : TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } @@ -1562,7 +1589,8 @@ static void processShaderInputArgs(SmallVectorImpl &Splits, // entire split argument. if (Arg->Flags.isSplit()) { while (!Arg->Flags.isSplitEnd()) { - assert(!Arg->VT.isVector() && + assert((!Arg->VT.isVector() || + Arg->VT.getScalarSizeInBits() == 16) && "unexpected vector split in ps argument type"); if (!SkipArg) Splits.push_back(*Arg); @@ -1589,29 +1617,32 @@ static void processShaderInputArgs(SmallVectorImpl &Splits, } // Allocate special inputs passed in VGPRs. -static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialEntryInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { + const LLT S32 = LLT::scalar(32); + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (Info.hasWorkItemIDX()) { - unsigned Reg = AMDGPU::VGPR0; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR0; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg)); } if (Info.hasWorkItemIDY()) { - unsigned Reg = AMDGPU::VGPR1; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR1; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg)); } if (Info.hasWorkItemIDZ()) { - unsigned Reg = AMDGPU::VGPR2; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR2; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg)); @@ -1642,7 +1673,8 @@ static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask = ~0u, assert(Reg != AMDGPU::NoRegister); MachineFunction &MF = CCInfo.getMachineFunction(); - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register LiveInVReg = MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + MF.getRegInfo().setType(LiveInVReg, LLT::scalar(32)); return ArgDescriptor::createRegister(Reg, Mask); } @@ -1671,10 +1703,10 @@ static ArgDescriptor allocateSGPR64Input(CCState &CCInfo) { return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16); } -static void allocateSpecialInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { const unsigned Mask = 0x3ff; ArgDescriptor Arg; @@ -1692,10 +1724,11 @@ static void allocateSpecialInputVGPRs(CCState &CCInfo, Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg)); } -static void allocateSpecialInputSGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialInputSGPRs( + CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { auto &ArgInfo = Info.getArgInfo(); // TODO: Unify handling with private memory pointers. @@ -1728,10 +1761,10 @@ static void allocateSpecialInputSGPRs(CCState &CCInfo, } // Allocate special inputs passed in user SGPRs. -static void allocateHSAUserSGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { if (Info.hasImplicitBufferPtr()) { unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); @@ -1758,9 +1791,12 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, } if (Info.hasKernargSegmentPtr()) { - unsigned InputPtrReg = Info.addKernargSegmentPtr(TRI); - MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register InputPtrReg = Info.addKernargSegmentPtr(TRI); CCInfo.AllocateReg(InputPtrReg); + + Register VReg = MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass); + MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); } if (Info.hasDispatchID()) { @@ -1780,32 +1816,32 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, } // Allocate special input registers that are initialized per-wave. -static void allocateSystemSGPRs(CCState &CCInfo, - MachineFunction &MF, - SIMachineFunctionInfo &Info, - CallingConv::ID CallConv, - bool IsShader) { +void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, + MachineFunction &MF, + SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, + bool IsShader) const { if (Info.hasWorkGroupIDX()) { unsigned Reg = Info.addWorkGroupIDX(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(Reg); } if (Info.hasWorkGroupIDY()) { unsigned Reg = Info.addWorkGroupIDY(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(Reg); } if (Info.hasWorkGroupIDZ()) { unsigned Reg = Info.addWorkGroupIDZ(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(Reg); } if (Info.hasWorkGroupInfo()) { unsigned Reg = Info.addWorkGroupInfo(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(Reg); } @@ -1860,7 +1896,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, // resource. For the Code Object V2 ABI, this will be the first 4 user // SGPR inputs. We can reserve those and use them directly. - unsigned PrivateSegmentBufferReg = + Register PrivateSegmentBufferReg = Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); Info.setScratchRSrcReg(PrivateSegmentBufferReg); } else { @@ -1921,7 +1957,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, // // FIXME: Should not do this if inline asm is reading/writing these // registers. - unsigned PreloadedSP = Info.getPreloadedReg( + Register PreloadedSP = Info.getPreloadedReg( AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); Info.setStackPtrOffsetReg(PreloadedSP); @@ -1971,7 +2007,7 @@ void SITargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) @@ -2134,7 +2170,7 @@ SDValue SITargetLowering::LowerFormalArguments( assert(VA.isRegLoc() && "Parameter must be in a register!"); - unsigned Reg = VA.getLocReg(); + Register Reg = VA.getLocReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); EVT ValVT = VA.getValVT(); @@ -2652,6 +2688,15 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsThisReturn = false; MachineFunction &MF = DAG.getMachineFunction(); + if (Callee.isUndef() || isNullConstant(Callee)) { + if (!CLI.IsTailCall) { + for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I) + InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT)); + } + + return Chain; + } + if (IsVarArg) { return lowerUnhandledCall(CLI, InVals, "unsupported call to variadic function "); @@ -2782,7 +2827,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, int32_t Offset = LocMemOffset; SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT); - unsigned Align = 0; + MaybeAlign Alignment; if (IsTailCall) { ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; @@ -2790,8 +2835,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, Flags.getByValSize() : VA.getValVT().getStoreSize(); // FIXME: We can have better than the minimum byval required alignment. - Align = Flags.isByVal() ? Flags.getByValAlign() : - MinAlign(Subtarget->getStackAlignment(), Offset); + Alignment = + Flags.isByVal() + ? MaybeAlign(Flags.getByValAlign()) + : commonAlignment(Subtarget->getStackAlignment(), Offset); Offset = Offset + FPDiff; int FI = MFI.CreateFixedObject(OpSize, Offset, true); @@ -2810,7 +2857,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } else { DstAddr = PtrOff; DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset); - Align = MinAlign(Subtarget->getStackAlignment(), LocMemOffset); + Alignment = + commonAlignment(Subtarget->getStackAlignment(), LocMemOffset); } if (Outs[i].Flags.isByVal()) { @@ -2825,7 +2873,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, MemOpChains.push_back(Cpy); } else { - SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Align); + SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, + Alignment ? Alignment->value() : 0); MemOpChains.push_back(Store); } } @@ -2937,9 +2986,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, IsThisReturn ? OutVals[0] : SDValue()); } -unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { - unsigned Reg = StringSwitch(RegName) +Register SITargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { + Register Reg = StringSwitch(RegName) .Case("m0", AMDGPU::M0) .Case("exec", AMDGPU::EXEC) .Case("exec_lo", AMDGPU::EXEC_LO) @@ -2947,7 +2996,7 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, .Case("flat_scratch", AMDGPU::FLAT_SCR) .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) - .Default(AMDGPU::NoRegister); + .Default(Register()); if (Reg == AMDGPU::NoRegister) { report_fatal_error(Twine("invalid register name \"" @@ -3055,6 +3104,20 @@ splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) { return std::make_pair(LoopBB, RemainderBB); } +/// Insert \p MI into a BUNDLE with an S_WAITCNT 0 immediately following it. +void SITargetLowering::bundleInstWithWaitcnt(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + auto I = MI.getIterator(); + auto E = std::next(I); + + BuildMI(*MBB, E, MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT)) + .addImm(0); + + MIBundleBuilder Bundler(*MBB, I, E); + finalizeBundle(*MBB, Bundler.begin()); +} + MachineBasicBlock * SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -3066,12 +3129,13 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *RemainderBB; const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); - MachineBasicBlock::iterator Prev = std::prev(MI.getIterator()); + // Apparently kill flags are only valid if the def is in the same block? + if (MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0)) + Src->setIsKill(false); std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true); MachineBasicBlock::iterator I = LoopBB->end(); - MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0); const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg( AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1); @@ -3081,23 +3145,9 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, .addImm(0) .addImm(EncodedReg); - // This is a pain, but we're not allowed to have physical register live-ins - // yet. Insert a pair of copies if the VGPR0 hack is necessary. - if (Src && TargetRegisterInfo::isPhysicalRegister(Src->getReg())) { - unsigned Data0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*BB, std::next(Prev), DL, TII->get(AMDGPU::COPY), Data0) - .add(*Src); + bundleInstWithWaitcnt(MI); - BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::COPY), Src->getReg()) - .addReg(Data0); - - MRI.setSimpleHint(Data0, Src->getReg()); - } - - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); - - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); // Load and check TRAP_STS.MEM_VIOL BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), Reg) @@ -3138,10 +3188,10 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop( MachineBasicBlock::iterator I = LoopBB.begin(); const TargetRegisterClass *BoolRC = TRI->getBoolRC(); - unsigned PhiExec = MRI.createVirtualRegister(BoolRC); - unsigned NewExec = MRI.createVirtualRegister(BoolRC); - unsigned CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned CondReg = MRI.createVirtualRegister(BoolRC); + Register PhiExec = MRI.createVirtualRegister(BoolRC); + Register NewExec = MRI.createVirtualRegister(BoolRC); + Register CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register CondReg = MRI.createVirtualRegister(BoolRC); BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg) .addReg(InitReg) @@ -3240,9 +3290,9 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock::iterator I(&MI); const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC); - unsigned TmpExec = MRI.createVirtualRegister(BoolXExecRC); + Register DstReg = MI.getOperand(0).getReg(); + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); + Register TmpExec = MRI.createVirtualRegister(BoolXExecRC); unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; @@ -3315,7 +3365,7 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII, SetOn->getOperand(3).setIsUndef(); } else { - unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp) .add(*Idx) .addImm(Offset); @@ -3351,8 +3401,8 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Dst = MI.getOperand(0).getReg(); - unsigned SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg(); int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); const TargetRegisterClass *VecRC = MRI.getRegClass(SrcReg); @@ -3390,8 +3440,8 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); - unsigned PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg); @@ -3442,7 +3492,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Dst = MI.getOperand(0).getReg(); + Register Dst = MI.getOperand(0).getReg(); const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src); const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val); @@ -3505,7 +3555,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, const DebugLoc &DL = MI.getDebugLoc(); - unsigned PhiReg = MRI.createVirtualRegister(VecRC); + Register PhiReg = MRI.createVirtualRegister(VecRC); auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, Offset, UseGPRIdxMode, false); @@ -3564,22 +3614,22 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MachineOperand &Src0 = MI.getOperand(1); MachineOperand &Src1 = MI.getOperand(2); - unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI, Src0, BoolRC, AMDGPU::sub0, - &AMDGPU::SReg_32_XM0RegClass); + &AMDGPU::SReg_32RegClass); MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI, Src0, BoolRC, AMDGPU::sub1, - &AMDGPU::SReg_32_XM0RegClass); + &AMDGPU::SReg_32RegClass); MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI, Src1, BoolRC, AMDGPU::sub0, - &AMDGPU::SReg_32_XM0RegClass); + &AMDGPU::SReg_32RegClass); MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI, Src1, BoolRC, AMDGPU::sub1, - &AMDGPU::SReg_32_XM0RegClass); + &AMDGPU::SReg_32RegClass); bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO); @@ -3632,8 +3682,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( // S_CMOV_B64 exec, -1 MachineInstr *FirstMI = &*BB->begin(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned InputReg = MI.getOperand(0).getReg(); - unsigned CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register InputReg = MI.getOperand(0).getReg(); + Register CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); bool Found = false; // Move the COPY of the input reg to the beginning, so that we can use it. @@ -3707,16 +3757,16 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( const GCNSubtarget &ST = MF->getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src0 = MI.getOperand(1).getReg(); - unsigned Src1 = MI.getOperand(2).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); const DebugLoc &DL = MI.getDebugLoc(); - unsigned SrcCond = MI.getOperand(3).getReg(); + Register SrcCond = MI.getOperand(3).getReg(); - unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned SrcCondCopy = MRI.createVirtualRegister(CondRC); + Register SrcCondCopy = MRI.createVirtualRegister(CondRC); BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) .addReg(SrcCond); @@ -3814,8 +3864,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::DS_GWS_SEMA_P: case AMDGPU::DS_GWS_SEMA_RELEASE_ALL: case AMDGPU::DS_GWS_BARRIER: - if (getSubtarget()->hasGWSAutoReplay()) + // A s_waitcnt 0 is required to be the instruction immediately following. + if (getSubtarget()->hasGWSAutoReplay()) { + bundleInstWithWaitcnt(MI); return BB; + } + return emitGWSMemViolTestLoop(MI, BB); default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); @@ -3939,6 +3993,30 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi); } +SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opc = Op.getOpcode(); + EVT VT = Op.getValueType(); + assert(VT == MVT::v4i16 || VT == MVT::v4f16); + + SDValue Lo0, Hi0; + std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0); + SDValue Lo1, Hi1; + std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1); + SDValue Lo2, Hi2; + std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2); + + SDLoc SL(Op); + + SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1, Lo2, + Op->getFlags()); + SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1, Hi2, + Op->getFlags()); + + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi); +} + + SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); @@ -3991,6 +4069,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FMINNUM: case ISD::FMAXNUM: return lowerFMINNUM_FMAXNUM(Op, DAG); + case ISD::FMA: + return splitTernaryVectorOp(Op, DAG); case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -4070,6 +4150,41 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode, return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL); } +SDValue SITargetLowering::lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, + SelectionDAG &DAG, + ArrayRef Ops) const { + SDLoc DL(M); + EVT LoadVT = M->getValueType(0); + EVT EltType = LoadVT.getScalarType(); + EVT IntVT = LoadVT.changeTypeToInteger(); + + bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); + + unsigned Opc = + IsFormat ? AMDGPUISD::BUFFER_LOAD_FORMAT : AMDGPUISD::BUFFER_LOAD; + + if (IsD16) { + return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG, Ops); + } + + // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics + if (!IsD16 && !LoadVT.isVector() && EltType.getSizeInBits() < 32) + return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); + + if (isTypeLegal(LoadVT)) { + return getMemIntrinsicNode(Opc, DL, M->getVTList(), Ops, IntVT, + M->getMemOperand(), DAG); + } + + EVT CastVT = getEquivalentMemType(*DAG.getContext(), LoadVT); + SDVTList VTList = DAG.getVTList(CastVT, MVT::Other); + SDValue MemNode = getMemIntrinsicNode(Opc, DL, VTList, Ops, CastVT, + M->getMemOperand(), DAG); + return DAG.getMergeValues( + {DAG.getNode(ISD::BITCAST, DL, LoadVT, MemNode), MemNode.getValue(1)}, + DL); +} + static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -4196,8 +4311,14 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_W_CHAIN: { if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) { - Results.push_back(Res); - Results.push_back(Res.getValue(1)); + if (Res.getOpcode() == ISD::MERGE_VALUES) { + // FIXME: Hacky + Results.push_back(Res.getOperand(0)); + Results.push_back(Res.getOperand(1)); + } else { + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + } return; } @@ -4935,11 +5056,8 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too // small. This requires us to add 4 to the global variable offset in order to // compute the correct address. - unsigned LoFlags = GAFlags; - if (LoFlags == SIInstrInfo::MO_NONE) - LoFlags = SIInstrInfo::MO_REL32; SDValue PtrLo = - DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, LoFlags); + DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags); SDValue PtrHi; if (GAFlags == SIInstrInfo::MO_NONE) { PtrHi = DAG.getTargetConstant(0, DL, MVT::i32); @@ -5563,14 +5681,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue}); unsigned CachePolicy = cast(GLC)->getZExtValue(); SDValue Ops[] = { - DAG.getEntryNode(), // Chain - Rsrc, // rsrc - DAG.getConstant(0, DL, MVT::i32), // vindex - {}, // voffset - {}, // soffset - {}, // offset - DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getEntryNode(), // Chain + Rsrc, // rsrc + DAG.getConstant(0, DL, MVT::i32), // vindex + {}, // voffset + {}, // soffset + {}, // offset + DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; // Use the alignment to ensure that the required offsets will fit into the @@ -5579,7 +5697,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, uint64_t InstOffset = cast(Ops[5])->getZExtValue(); for (unsigned i = 0; i < NumLoads; ++i) { - Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32); + Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32); Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops, LoadVT, MMO)); } @@ -5758,45 +5876,31 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::amdgcn_fdiv_fast: return lowerFDIV_FAST(Op, DAG); - case Intrinsic::amdgcn_interp_mov: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4)); - SDValue Glue = M0.getValue(1); - return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Glue); - } - case Intrinsic::amdgcn_interp_p1: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4)); - SDValue Glue = M0.getValue(1); - return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Glue); - } - case Intrinsic::amdgcn_interp_p2: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5)); - SDValue Glue = SDValue(M0.getNode(), 1); - return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), - Glue); - } case Intrinsic::amdgcn_interp_p1_f16: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5)); - SDValue Glue = M0.getValue(1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(5), SDValue()); if (getSubtarget()->getLDSBankCount() == 16) { // 16 bank LDS - SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, - DAG.getConstant(2, DL, MVT::i32), // P0 - Op.getOperand(2), // Attrchan - Op.getOperand(3), // Attr - Glue); + + // FIXME: This implicitly will insert a second CopyToReg to M0. + SDValue S = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32, + DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32), + DAG.getConstant(2, DL, MVT::i32), // P0 + Op.getOperand(2), // Attrchan + Op.getOperand(3), // Attr + Op.getOperand(5)); // m0 + SDValue Ops[] = { Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers S, // Src2 - holds two f16 values selected by high - DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(4), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - DAG.getConstant(0, DL, MVT::i32) // $omod + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i32) // $omod }; return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops); } else { @@ -5805,28 +5909,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers Op.getOperand(4), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - DAG.getConstant(0, DL, MVT::i32), // $omod - Glue + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i32), // $omod + ToM0.getValue(1) }; return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops); } } case Intrinsic::amdgcn_interp_p2_f16: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6)); - SDValue Glue = SDValue(M0.getNode(), 1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(6), SDValue()); SDValue Ops[] = { Op.getOperand(2), // Src0 Op.getOperand(3), // Attrchan Op.getOperand(4), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers Op.getOperand(1), // Src2 - DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(5), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - Glue + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + ToM0.getValue(1) }; return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops); } @@ -5947,16 +6051,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2)); return DAG.getNode(ISD::BITCAST, DL, VT, Node); } - case Intrinsic::amdgcn_wqm: { - SDValue Src = Op.getOperand(1); - return SDValue(DAG.getMachineNode(AMDGPU::WQM, DL, Src.getValueType(), Src), - 0); - } - case Intrinsic::amdgcn_wwm: { - SDValue Src = Op.getOperand(1); - return SDValue(DAG.getMachineNode(AMDGPU::WWM, DL, Src.getValueType(), Src), - 0); - } case Intrinsic::amdgcn_fmad_ftz: return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); @@ -5977,6 +6071,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SIInstrInfo::MO_ABS32_LO); return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0}; } + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: { + SDLoc SL(Op); + unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) ? + AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS; + SDValue Aperture = getSegmentAperture(AS, SL, DAG); + SDValue SrcVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, + Op.getOperand(1)); + + SDValue SrcHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, SrcVec, + DAG.getConstant(1, SL, MVT::i32)); + return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ); + } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) @@ -5986,6 +6093,30 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } } +// This function computes an appropriate offset to pass to +// MachineMemOperand::setOffset() based on the offset inputs to +// an intrinsic. If any of the offsets are non-contstant or +// if VIndex is non-zero then this function returns 0. Otherwise, +// it returns the sum of VOffset, SOffset, and Offset. +static unsigned getBufferOffsetForMMO(SDValue VOffset, + SDValue SOffset, + SDValue Offset, + SDValue VIndex = SDValue()) { + + if (!isa(VOffset) || !isa(SOffset) || + !isa(Offset)) + return 0; + + if (VIndex) { + if (!isa(VIndex) || !cast(VIndex)->isNullValue()) + return 0; + } + + return cast(VOffset)->getSExtValue() + + cast(SOffset)->getSExtValue() + + cast(Offset)->getSExtValue(); +} + SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast(Op.getOperand(1))->getZExtValue(); @@ -6128,17 +6259,22 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); + unsigned Offset = setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; + unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; EVT VT = Op.getValueType(); EVT IntVT = VT.changeTypeToInteger(); auto *M = cast(Op); + M->getMemOperand()->setOffset(Offset); EVT LoadVT = Op.getValueType(); if (LoadVT.getScalarType() == MVT::f16) @@ -6155,6 +6291,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: { + const bool IsFormat = IntrID == Intrinsic::amdgcn_raw_buffer_load_format; + auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6163,32 +6301,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.first, // voffset Op.getOperand(4), // soffset Offsets.second, // offset - Op.getOperand(5), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + Op.getOperand(5), // cachepolicy, swizzled buffer + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; - unsigned Opc = (IntrID == Intrinsic::amdgcn_raw_buffer_load) ? - AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; - - EVT VT = Op.getValueType(); - EVT IntVT = VT.changeTypeToInteger(); auto *M = cast(Op); - EVT LoadVT = Op.getValueType(); - - if (LoadVT.getScalarType() == MVT::f16) - return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, - M, DAG, Ops); - - // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics - if (LoadVT.getScalarType() == MVT::i8 || - LoadVT.getScalarType() == MVT::i16) - return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); - - return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, - M->getMemOperand(), DAG); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5])); + return lowerIntrinsicLoad(M, IsFormat, DAG, Ops); } case Intrinsic::amdgcn_struct_buffer_load: case Intrinsic::amdgcn_struct_buffer_load_format: { + const bool IsFormat = IntrID == Intrinsic::amdgcn_struct_buffer_load_format; + auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6197,29 +6321,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.first, // voffset Op.getOperand(5), // soffset Offsets.second, // offset - Op.getOperand(6), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + Op.getOperand(6), // cachepolicy, swizzled buffer + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; - unsigned Opc = (IntrID == Intrinsic::amdgcn_struct_buffer_load) ? - AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; - - EVT VT = Op.getValueType(); - EVT IntVT = VT.changeTypeToInteger(); auto *M = cast(Op); - EVT LoadVT = Op.getValueType(); - - if (LoadVT.getScalarType() == MVT::f16) - return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, - M, DAG, Ops); - - // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics - if (LoadVT.getScalarType() == MVT::i8 || - LoadVT.getScalarType() == MVT::i16) - return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); - - return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, - M->getMemOperand(), DAG); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5], + Ops[2])); + return lowerIntrinsicLoad(cast(Op), IsFormat, DAG, Ops); } case Intrinsic::amdgcn_tbuffer_load: { MemSDNode *M = cast(Op); @@ -6239,9 +6348,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // voffset Op.getOperand(5), // soffset Op.getOperand(6), // offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6264,8 +6373,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // soffset Offsets.second, // offset Op.getOperand(5), // format - Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + Op.getOperand(6), // cachepolicy, swizzled buffer + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6288,8 +6397,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // format - Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + Op.getOperand(7), // cachepolicy, swizzled buffer + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6321,13 +6430,17 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(Offset); unsigned Opcode = 0; switch (IntrID) { @@ -6377,7 +6490,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_raw_buffer_atomic_umax: case Intrinsic::amdgcn_raw_buffer_atomic_and: case Intrinsic::amdgcn_raw_buffer_atomic_or: - case Intrinsic::amdgcn_raw_buffer_atomic_xor: { + case Intrinsic::amdgcn_raw_buffer_atomic_xor: + case Intrinsic::amdgcn_raw_buffer_atomic_inc: + case Intrinsic::amdgcn_raw_buffer_atomic_dec: { auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6388,11 +6503,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6])); unsigned Opcode = 0; switch (IntrID) { @@ -6426,6 +6542,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_raw_buffer_atomic_xor: Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR; break; + case Intrinsic::amdgcn_raw_buffer_atomic_inc: + Opcode = AMDGPUISD::BUFFER_ATOMIC_INC; + break; + case Intrinsic::amdgcn_raw_buffer_atomic_dec: + Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC; + break; default: llvm_unreachable("unhandled atomic opcode"); } @@ -6442,7 +6564,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_struct_buffer_atomic_umax: case Intrinsic::amdgcn_struct_buffer_atomic_and: case Intrinsic::amdgcn_struct_buffer_atomic_or: - case Intrinsic::amdgcn_struct_buffer_atomic_xor: { + case Intrinsic::amdgcn_struct_buffer_atomic_xor: + case Intrinsic::amdgcn_struct_buffer_atomic_inc: + case Intrinsic::amdgcn_struct_buffer_atomic_dec: { auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6453,11 +6577,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6], + Ops[3])); unsigned Opcode = 0; switch (IntrID) { @@ -6491,6 +6617,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_struct_buffer_atomic_xor: Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR; break; + case Intrinsic::amdgcn_struct_buffer_atomic_inc: + Opcode = AMDGPUISD::BUFFER_ATOMIC_INC; + break; + case Intrinsic::amdgcn_struct_buffer_atomic_dec: + Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC; + break; default: llvm_unreachable("unhandled atomic opcode"); } @@ -6512,12 +6644,16 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); + unsigned Offset = setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(Offset); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6534,10 +6670,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7])); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6554,10 +6691,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(7), // soffset Offsets.second, // offset Op.getOperand(8), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7], + Ops[4])); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6686,23 +6825,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; return DAG.getNode(Opc, DL, Op->getVTList(), Ops); } - case Intrinsic::amdgcn_s_sendmsg: - case Intrinsic::amdgcn_s_sendmsghalt: { - unsigned NodeOp = (IntrinsicID == Intrinsic::amdgcn_s_sendmsg) ? - AMDGPUISD::SENDMSG : AMDGPUISD::SENDMSGHALT; - Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); - SDValue Glue = Chain.getValue(1); - return DAG.getNode(NodeOp, DL, MVT::Other, Chain, - Op.getOperand(2), Glue); - } - case Intrinsic::amdgcn_init_exec: { - return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain, - Op.getOperand(2)); - } - case Intrinsic::amdgcn_init_exec_from_input: { - return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain, - Op.getOperand(2), Op.getOperand(3)); - } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { const GCNSubtarget &ST = MF.getSubtarget(); @@ -6733,9 +6855,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // voffset Op.getOperand(6), // soffset Op.getOperand(7), // offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idexen + DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6759,8 +6881,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // format - Op.getOperand(8), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idexen + Op.getOperand(8), // cachepolicy, swizzled buffer + DAG.getTargetConstant(1, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6784,8 +6906,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // format - Op.getOperand(7), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idexen + Op.getOperand(7), // cachepolicy, swizzled buffer + DAG.getTargetConstant(0, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6813,14 +6935,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); + M->getMemOperand()->setOffset(Offset); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics EVT VDataType = VData.getValueType().getScalarType(); @@ -6833,10 +6959,22 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_raw_buffer_store: case Intrinsic::amdgcn_raw_buffer_store_format: { + const bool IsFormat = + IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format; + SDValue VData = Op.getOperand(2); - bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + EVT VDataVT = VData.getValueType(); + EVT EltType = VDataVT.getScalarType(); + bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); if (IsD16) VData = handleD16VData(VData, DAG); + + if (!isTypeLegal(VDataVT)) { + VData = + DAG.getNode(ISD::BITCAST, DL, + getEquivalentMemType(*DAG.getContext(), VDataVT), VData); + } + auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); SDValue Ops[] = { Chain, @@ -6846,18 +6984,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.first, // voffset Op.getOperand(5), // soffset Offsets.second, // offset - Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + Op.getOperand(6), // cachepolicy, swizzled buffer + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; - unsigned Opc = IntrinsicID == Intrinsic::amdgcn_raw_buffer_store ? - AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; + unsigned Opc = + IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6])); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics - EVT VDataType = VData.getValueType().getScalarType(); - if (VDataType == MVT::i8 || VDataType == MVT::i16) - return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M); + if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32) + return handleByteShortBufferStores(DAG, VDataVT, DL, Ops, M); return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); @@ -6865,10 +7003,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_struct_buffer_store: case Intrinsic::amdgcn_struct_buffer_store_format: { + const bool IsFormat = + IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format; + SDValue VData = Op.getOperand(2); - bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + EVT VDataVT = VData.getValueType(); + EVT EltType = VDataVT.getScalarType(); + bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); + if (IsD16) VData = handleD16VData(VData, DAG); + + if (!isTypeLegal(VDataVT)) { + VData = + DAG.getNode(ISD::BITCAST, DL, + getEquivalentMemType(*DAG.getContext(), VDataVT), VData); + } + auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG); SDValue Ops[] = { Chain, @@ -6878,17 +7029,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.first, // voffset Op.getOperand(6), // soffset Offsets.second, // offset - Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + Op.getOperand(7), // cachepolicy, swizzled buffer + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6], + Ops[3])); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics EVT VDataType = VData.getValueType().getScalarType(); - if (VDataType == MVT::i8 || VDataType == MVT::i16) + if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32) return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M); return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, @@ -6908,13 +7061,17 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getOperand(2).getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(Offset); unsigned Opcode = VT.isVector() ? AMDGPUISD::BUFFER_ATOMIC_PK_FADD : AMDGPUISD::BUFFER_ATOMIC_FADD; @@ -6987,7 +7144,7 @@ std::pair SITargetLowering::splitBufferOffsets( Overflow += ImmOffset; ImmOffset = 0; } - C1 = cast(DAG.getConstant(ImmOffset, DL, MVT::i32)); + C1 = cast(DAG.getTargetConstant(ImmOffset, DL, MVT::i32)); if (Overflow) { auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32); if (!N0) @@ -7001,14 +7158,14 @@ std::pair SITargetLowering::splitBufferOffsets( if (!N0) N0 = DAG.getConstant(0, DL, MVT::i32); if (!C1) - C1 = cast(DAG.getConstant(0, DL, MVT::i32)); + C1 = cast(DAG.getTargetConstant(0, DL, MVT::i32)); return {N0, SDValue(C1, 0)}; } // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the // three offsets (voffset, soffset and instoffset) into the SDValue[3] array // pointed to by Offsets. -void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, +unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, SDValue *Offsets, unsigned Align) const { SDLoc DL(CombinedOffset); @@ -7018,8 +7175,8 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) { Offsets[0] = DAG.getConstant(0, DL, MVT::i32); Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); - Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32); - return; + Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); + return SOffset + ImmOffset; } } if (DAG.isBaseWithConstantOffset(CombinedOffset)) { @@ -7031,13 +7188,14 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, Subtarget, Align)) { Offsets[0] = N0; Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); - Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32); - return; + Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); + return 0; } } Offsets[0] = CombinedOffset; Offsets[1] = DAG.getConstant(0, DL, MVT::i32); - Offsets[2] = DAG.getConstant(0, DL, MVT::i32); + Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32); + return 0; } // Handle 8 bit and 16 bit buffer loads @@ -7053,9 +7211,10 @@ SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG, SDValue BufferLoad = DAG.getMemIntrinsicNode(Opc, DL, ResList, Ops, IntVT, M->getMemOperand()); - SDValue BufferLoadTrunc = DAG.getNode(ISD::TRUNCATE, DL, - LoadVT.getScalarType(), BufferLoad); - return DAG.getMergeValues({BufferLoadTrunc, BufferLoad.getValue(1)}, DL); + SDValue LoadVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, BufferLoad); + LoadVal = DAG.getNode(ISD::BITCAST, DL, LoadVT, LoadVal); + + return DAG.getMergeValues({LoadVal, BufferLoad.getValue(1)}, DL); } // Handle 8 bit and 16 bit buffer stores @@ -7063,6 +7222,9 @@ SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType, SDLoc DL, SDValue Ops[], MemSDNode *M) const { + if (VDataType == MVT::f16) + Ops[1] = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Ops[1]); + SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]); Ops[1] = BufferStoreExt; unsigned Opc = (VDataType == MVT::i8) ? AMDGPUISD::BUFFER_STORE_BYTE : @@ -7215,8 +7377,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType().getVectorElementType() == MVT::i32 && "Custom lowering for non-i32 vectors hasn't been implemented."); - if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - *Load->getMemOperand())) { + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + MemVT, *Load->getMemOperand())) { SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); return DAG.getMergeValues(Ops, DL); @@ -7505,6 +7667,19 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul); } +// Returns immediate value for setting the F32 denorm mode when using the +// S_DENORM_MODE instruction. +static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG, + const SDLoc &SL, const GCNSubtarget *ST) { + assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE"); + int DPDenormModeDefault = ST->hasFP64Denormals() + ? FP_DENORM_FLUSH_NONE + : FP_DENORM_FLUSH_IN_FLUSH_OUT; + + int Mode = SPDenormMode | (DPDenormModeDefault << 2); + return DAG.getTargetConstant(Mode, SL, MVT::i32); +} + SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG)) return FastLowered; @@ -7531,16 +7706,26 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE | (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) | (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_); - const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16); if (!Subtarget->hasFP32Denormals()) { SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE, - SL, MVT::i32); - SDValue EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs, - DAG.getEntryNode(), - EnableDenormValue, BitField); + + SDValue EnableDenorm; + if (Subtarget->hasDenormModeInst()) { + const SDValue EnableDenormValue = + getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget); + + EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs, + DAG.getEntryNode(), EnableDenormValue); + } else { + const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE, + SL, MVT::i32); + EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs, + DAG.getEntryNode(), EnableDenormValue, + BitField); + } + SDValue Ops[3] = { NegDivScale0, EnableDenorm.getValue(0), @@ -7562,19 +7747,29 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, NumeratorScaled, Mul); - SDValue Fma3 = getFPTernOp(DAG, ISD::FMA,SL, MVT::f32, Fma2, Fma1, Mul, Fma2); + SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2); SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled, Fma3); if (!Subtarget->hasFP32Denormals()) { - const SDValue DisableDenormValue = - DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32); - SDValue DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, - Fma4.getValue(1), - DisableDenormValue, - BitField, - Fma4.getValue(2)); + + SDValue DisableDenorm; + if (Subtarget->hasDenormModeInst()) { + const SDValue DisableDenormValue = + getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget); + + DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other, + Fma4.getValue(1), DisableDenormValue, + Fma4.getValue(2)); + } else { + const SDValue DisableDenormValue = + DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32); + + DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, + Fma4.getValue(1), DisableDenormValue, + BitField, Fma4.getValue(2)); + } SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, DisableDenorm, DAG.getRoot()); @@ -7684,8 +7879,8 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(VT.isVector() && Store->getValue().getValueType().getScalarType() == MVT::i32); - if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - *Store->getMemOperand())) { + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + VT, *Store->getMemOperand())) { return expandUnalignedStore(Store, DAG); } @@ -10065,7 +10260,7 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, // Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have // to try understanding copies to physical registers. if (SrcVal.getValueType() == MVT::i1 && - TargetRegisterInfo::isPhysicalRegister(DestReg->getReg())) { + Register::isPhysicalRegister(DestReg->getReg())) { SDLoc SL(Node); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); SDValue VReg = DAG.getRegister( @@ -10218,7 +10413,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, MachineOperand &Op = MI.getOperand(I); if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID && OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) || - !TargetRegisterInfo::isVirtualRegister(Op.getReg()) || + !Register::isVirtualRegister(Op.getReg()) || !TRI->isAGPR(MRI, Op.getReg())) continue; auto *Src = MRI.getUniqueVRegDef(Op.getReg()); @@ -10256,7 +10451,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, Node->use_begin()->isMachineOpcode() && Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG && !Node->use_begin()->hasAnyUseOfValue(0))) { - unsigned Def = MI.getOperand(0).getReg(); + Register Def = MI.getOperand(0).getReg(); // Change this into a noret atomic. MI.setDesc(TII->get(NoRetAtomicOp)); @@ -10300,7 +10495,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG, // Combine the constants and the pointer. const SDValue Ops1[] = { - DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32), + DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32), Ptr, DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32), SubRegHi, @@ -10330,7 +10525,7 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32); const SDValue Ops[] = { - DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32), + DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32), PtrLo, DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32), PtrHi, @@ -10364,7 +10559,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, nullptr); case 32: case 16: - RC = &AMDGPU::SReg_32_XM0RegClass; + RC = &AMDGPU::SReg_32RegClass; break; case 64: RC = &AMDGPU::SGPR_64RegClass; @@ -10373,7 +10568,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, RC = &AMDGPU::SReg_96RegClass; break; case 128: - RC = &AMDGPU::SReg_128RegClass; + RC = &AMDGPU::SGPR_128RegClass; break; case 160: RC = &AMDGPU::SReg_160RegClass; @@ -10415,6 +10610,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } break; case 'a': + if (!Subtarget->hasMAIInsts()) + break; switch (VT.getSizeInBits()) { default: return std::make_pair(0U, nullptr); @@ -10548,9 +10745,9 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex()); } -unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { - const unsigned PrefAlign = TargetLowering::getPrefLoopAlignment(ML); - const unsigned CacheLineAlign = 6; // log2(64) +Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { + const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML); + const Align CacheLineAlign = Align(64); // Pre-GFX10 target did not benefit from loop alignment if (!ML || DisableLoopAlignment || @@ -10578,7 +10775,7 @@ unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { // If inner loop block is aligned assume in average half of the alignment // size to be added as nops. if (MBB != Header) - LoopSize += (1 << MBB->getAlignment()) / 2; + LoopSize += MBB->getAlignment().value() / 2; for (const MachineInstr &MI : *MBB) { LoopSize += TII->getInstSizeInBytes(MI); @@ -10644,7 +10841,7 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, const MachineRegisterInfo &MRI = MF->getRegInfo(); const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); unsigned Reg = R->getReg(); - if (TRI.isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return !TRI.isSGPRReg(MRI, Reg); if (MRI.isLiveIn(Reg)) { @@ -10683,12 +10880,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, case ISD::INTRINSIC_W_CHAIN: return AMDGPU::isIntrinsicSourceOfDivergence( cast(N->getOperand(1))->getZExtValue()); - // In some cases intrinsics that are a source of divergence have been - // lowered to AMDGPUISD so we also need to check those too. - case AMDGPUISD::INTERP_MOV: - case AMDGPUISD::INTERP_P1: - case AMDGPUISD::INTERP_P2: - return true; } return false; } @@ -10748,3 +10939,110 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); } + +const TargetRegisterClass * +SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { + const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (RC == &AMDGPU::VReg_1RegClass && !isDivergent) + return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass + : &AMDGPU::SReg_32RegClass; + if (!TRI->isSGPRClass(RC) && !isDivergent) + return TRI->getEquivalentSGPRClass(RC); + else if (TRI->isSGPRClass(RC) && isDivergent) + return TRI->getEquivalentVGPRClass(RC); + + return RC; +} + +static bool hasCFUser(const Value *V, SmallPtrSet &Visited) { + if (!Visited.insert(V).second) + return false; + bool Result = false; + for (auto U : V->users()) { + if (const IntrinsicInst *Intrinsic = dyn_cast(U)) { + if (V == U->getOperand(1)) { + switch (Intrinsic->getIntrinsicID()) { + default: + Result = false; + break; + case Intrinsic::amdgcn_if_break: + case Intrinsic::amdgcn_if: + case Intrinsic::amdgcn_else: + Result = true; + break; + } + } + if (V == U->getOperand(0)) { + switch (Intrinsic->getIntrinsicID()) { + default: + Result = false; + break; + case Intrinsic::amdgcn_end_cf: + case Intrinsic::amdgcn_loop: + Result = true; + break; + } + } + } else { + Result = hasCFUser(U, Visited); + } + if (Result) + break; + } + return Result; +} + +bool SITargetLowering::requiresUniformRegister(MachineFunction &MF, + const Value *V) const { + if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if_break: + return true; + } + } + if (const ExtractValueInst *ExtValue = dyn_cast(V)) { + if (const IntrinsicInst *Intrinsic = + dyn_cast(ExtValue->getOperand(0))) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if: + case Intrinsic::amdgcn_else: { + ArrayRef Indices = ExtValue->getIndices(); + if (Indices.size() == 1 && Indices[0] == 1) { + return true; + } + } + } + } + } + if (const CallInst *CI = dyn_cast(V)) { + if (isa(CI->getCalledValue())) { + const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo(); + ImmutableCallSite CS(CI); + TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints( + MF.getDataLayout(), Subtarget->getRegisterInfo(), CS); + for (auto &TC : TargetConstraints) { + if (TC.Type == InlineAsm::isOutput) { + ComputeConstraintToUse(TC, SDValue()); + unsigned AssignedReg; + const TargetRegisterClass *RC; + std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint( + SIRI, TC.ConstraintCode, TC.ConstraintVT); + if (RC) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg)) + return true; + else if (SIRI->isSGPRClass(RC)) + return true; + } + } + } + } + } + SmallPtrSet Visited; + return hasCFUser(V, Visited); +} diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 21a215e16ce7..f0102feb65c4 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -94,6 +94,9 @@ private: SelectionDAG &DAG, ArrayRef Ops, bool IsIntrinsic = false) const; + SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG, + ArrayRef Ops) const; + // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to // dwordx4 if on SI. SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, @@ -183,6 +186,7 @@ private: unsigned isCFIntrinsic(const SDNode *Intr) const; +public: /// \returns True if fixup needs to be emitted for given global value \p GV, /// false otherwise. bool shouldEmitFixup(const GlobalValue *GV) const; @@ -195,11 +199,14 @@ private: /// global value \p GV, false otherwise. bool shouldEmitPCReloc(const GlobalValue *GV) const; +private: // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the // three offsets (voffset, soffset and instoffset) into the SDValue[3] array // pointed to by Offsets. - void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, - SDValue *Offsets, unsigned Align = 4) const; + /// \returns 0 If there is a non-constant offset or if the offset is 0. + /// Otherwise returns the constant offset. + unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, + SDValue *Offsets, unsigned Align = 4) const; // Handle 8 bit and 16 bit buffer loads SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL, @@ -235,6 +242,11 @@ public: bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override; + bool allowsMisalignedMemoryAccessesImpl( + unsigned Size, unsigned AS, unsigned Align, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *IsFast = nullptr) const; + bool allowsMisalignedMemoryAccesses( EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, @@ -309,12 +321,13 @@ public: SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; MachineBasicBlock *splitKillBlock(MachineInstr &MI, MachineBasicBlock *BB) const; + void bundleInstWithWaitcnt(MachineInstr &MI) const; MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const; @@ -330,6 +343,7 @@ public: bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const; + SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, @@ -374,7 +388,37 @@ public: unsigned Depth = 0) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - unsigned getPrefLoopAlignment(MachineLoop *ML) const override; + virtual const TargetRegisterClass * + getRegClassFor(MVT VT, bool isDivergent) const override; + virtual bool requiresUniformRegister(MachineFunction &MF, + const Value *V) const override; + Align getPrefLoopAlignment(MachineLoop *ML) const override; + + void allocateHSAUserSGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + + void allocateSystemSGPRs(CCState &CCInfo, + MachineFunction &MF, + SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, + bool IsShader) const; + + void allocateSpecialEntryInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + void allocateSpecialInputSGPRs( + CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + + void allocateSpecialInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; }; } // End namespace llvm diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c89d5b71ec5c..dcb04e426584 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1483,12 +1483,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { if (BI.Incoming) { if (!Brackets) - Brackets = llvm::make_unique(*BI.Incoming); + Brackets = std::make_unique(*BI.Incoming); else *Brackets = *BI.Incoming; } else { if (!Brackets) - Brackets = llvm::make_unique(ST); + Brackets = std::make_unique(ST); else Brackets->clear(); } @@ -1508,7 +1508,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { if (!MoveBracketsToSucc) { MoveBracketsToSucc = &SuccBI; } else { - SuccBI.Incoming = llvm::make_unique(*Brackets); + SuccBI.Incoming = std::make_unique(*Brackets); } } else if (SuccBI.Incoming->merge(*Brackets)) { SuccBI.Dirty = true; diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index 561a16c3e351..4dcbe92861f2 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -124,6 +124,9 @@ class InstSI DisableSIDecoder = 0; diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index ba8ed6993a56..d97e6a62971b 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -318,8 +318,25 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, if (isMUBUF(LdSt) || isMTBUF(LdSt)) { const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset); - if (SOffset && SOffset->isReg()) - return false; + if (SOffset && SOffset->isReg()) { + // We can only handle this if it's a stack access, as any other resource + // would require reporting multiple base registers. + const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + if (AddrReg && !AddrReg->isFI()) + return false; + + const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); + const SIMachineFunctionInfo *MFI + = LdSt.getParent()->getParent()->getInfo(); + if (RSrc->getReg() != MFI->getScratchRSrcReg()) + return false; + + const MachineOperand *OffsetImm = + getNamedOperand(LdSt, AMDGPU::OpName::offset); + BaseOp = SOffset; + Offset = OffsetImm->getImm(); + return true; + } const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (!AddrReg) @@ -458,9 +475,9 @@ bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, const MachineRegisterInfo &MRI = FirstLdSt.getParent()->getParent()->getRegInfo(); - const unsigned Reg = FirstDst->getReg(); + const Register Reg = FirstDst->getReg(); - const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg) + const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) : RI.getPhysRegClass(Reg); @@ -807,7 +824,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, "Not a VGPR32 reg"); if (Cond.size() == 1) { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -820,7 +837,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, assert(Cond[0].isImm() && "Cond[0] is not an immediate"); switch (Cond[0].getImm()) { case SIInstrInfo::SCC_TRUE: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(-1) @@ -834,7 +851,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::SCC_FALSE: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(0) @@ -850,7 +867,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, case SIInstrInfo::VCCNZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -864,7 +881,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, case SIInstrInfo::VCCZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -876,8 +893,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::EXECNZ: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); - unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -894,8 +911,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::EXECZ: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); - unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -925,7 +942,7 @@ unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB, const DebugLoc &DL, unsigned SrcReg, int Value) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC()); + Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) .addImm(Value) .addReg(SrcReg); @@ -938,7 +955,7 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, const DebugLoc &DL, unsigned SrcReg, int Value) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC()); + Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) .addImm(Value) .addReg(SrcReg); @@ -1052,12 +1069,12 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) { + if (Register::isVirtualRegister(SrcReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } - MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc) + BuildMI(MBB, MI, DL, OpDesc) .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) @@ -1068,11 +1085,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // correctly handled. if (RI.spillSGPRToVGPR()) FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); - if (ST.hasScalarStores()) { - // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); - } - return; } @@ -1083,7 +1095,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, auto MIB = BuildMI(MBB, MI, DL, get(Opcode)); if (RI.hasAGPRs(RC)) { MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MIB.addReg(Tmp, RegState::Define); } MIB.addReg(SrcReg, getKillRegState(isKill)) // data @@ -1182,24 +1194,18 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) { + if (Register::isVirtualRegister(DestReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } if (RI.spillSGPRToVGPR()) FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); - MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg) + BuildMI(MBB, MI, DL, OpDesc, DestReg) .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); - - if (ST.hasScalarStores()) { - // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); - } - return; } @@ -1208,7 +1214,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg); if (RI.hasAGPRs(RC)) { MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MIB.addReg(Tmp, RegState::Define); } MIB.addFrameIndex(FrameIndex) // vaddr @@ -1242,13 +1248,13 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) && WorkGroupSize > WavefrontSize) { - unsigned TIDIGXReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); - unsigned TIDIGYReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); - unsigned TIDIGZReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); - unsigned InputPtrReg = + Register TIDIGXReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + Register TIDIGYReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + Register TIDIGZReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + Register InputPtrReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { if (!Entry.isLiveIn(Reg)) @@ -1410,9 +1416,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { break; case AMDGPU::V_MOV_B64_PSEUDO: { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); - unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); + Register Dst = MI.getOperand(0).getReg(); + Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); + Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); const MachineOperand &SrcOp = MI.getOperand(1); // FIXME: Will this work for 64-bit floating point immediates? @@ -1437,6 +1443,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); break; } + case AMDGPU::V_MOV_B64_DPP_PSEUDO: { + expandMovDPP64(MI); + break; + } case AMDGPU::V_SET_INACTIVE_B32: { unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; @@ -1469,7 +1479,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::V_MOVRELD_B32_V8: case AMDGPU::V_MOVRELD_B32_V16: { const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32); - unsigned VecReg = MI.getOperand(0).getReg(); + Register VecReg = MI.getOperand(0).getReg(); bool IsUndef = MI.getOperand(1).isUndef(); unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm(); assert(VecReg == MI.getOperand(1).getReg()); @@ -1492,9 +1502,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case AMDGPU::SI_PC_ADD_REL_OFFSET: { MachineFunction &MF = *MBB.getParent(); - unsigned Reg = MI.getOperand(0).getReg(); - unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); - unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); + Register Reg = MI.getOperand(0).getReg(); + Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0); + Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1); // Create a bundle so these instructions won't be re-ordered by the // post-RA scheduler. @@ -1531,7 +1541,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { break; } case TargetOpcode::BUNDLE: { - if (!MI.mayLoad()) + if (!MI.mayLoad() || MI.hasUnmodeledSideEffects()) return false; // If it is a load it must be a memory clause @@ -1550,6 +1560,64 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } +std::pair +SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { + assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MBB.findDebugLoc(MI); + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + Register Dst = MI.getOperand(0).getReg(); + unsigned Part = 0; + MachineInstr *Split[2]; + + + for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) { + auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp)); + if (Dst.isPhysical()) { + MovDPP.addDef(RI.getSubReg(Dst, Sub)); + } else { + assert(MRI.isSSA()); + auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MovDPP.addDef(Tmp); + } + + for (unsigned I = 1; I <= 2; ++I) { // old and src operands. + const MachineOperand &SrcOp = MI.getOperand(I); + assert(!SrcOp.isFPImm()); + if (SrcOp.isImm()) { + APInt Imm(64, SrcOp.getImm()); + Imm.ashrInPlace(Part * 32); + MovDPP.addImm(Imm.getLoBits(32).getZExtValue()); + } else { + assert(SrcOp.isReg()); + Register Src = SrcOp.getReg(); + if (Src.isPhysical()) + MovDPP.addReg(RI.getSubReg(Src, Sub)); + else + MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub); + } + } + + for (unsigned I = 3; I < MI.getNumExplicitOperands(); ++I) + MovDPP.addImm(MI.getOperand(I).getImm()); + + Split[Part] = MovDPP; + ++Part; + } + + if (Dst.isVirtual()) + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst) + .addReg(Split[0]->getOperand(0).getReg()) + .addImm(AMDGPU::sub0) + .addReg(Split[1]->getOperand(0).getReg()) + .addImm(AMDGPU::sub1); + + MI.eraseFromParent(); + return std::make_pair(Split[0], Split[1]); +} + bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, @@ -1574,7 +1642,7 @@ bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp) { - unsigned Reg = RegOp.getReg(); + Register Reg = RegOp.getReg(); unsigned SubReg = RegOp.getSubReg(); bool IsKill = RegOp.isKill(); bool IsDead = RegOp.isDead(); @@ -1646,7 +1714,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, // This needs to be implemented because the source modifiers may be inserted // between the true commutable operands, and the base // TargetInstrInfo::commuteInstruction uses it. -bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, +bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const { return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1); } @@ -1710,7 +1779,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, // FIXME: Virtual register workaround for RegScavenger not working with empty // blocks. - unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); auto I = MBB.end(); @@ -2163,7 +2232,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, SmallVector Regs; for (int Idx = 0; Idx != NElts; ++Idx) { - unsigned DstElt = MRI.createVirtualRegister(EltRC); + Register DstElt = MRI.createVirtualRegister(EltRC); Regs.push_back(DstElt); unsigned SubIdx = SubIndices[Idx]; @@ -2327,7 +2396,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.RemoveOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); - unsigned Src1Reg = Src1->getReg(); + Register Src1Reg = Src1->getReg(); unsigned Src1SubReg = Src1->getSubReg(); Src0->setReg(Src1Reg); Src0->setSubReg(Src1SubReg); @@ -2367,12 +2436,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, MRI->hasOneUse(Src0->getReg())) { Src0->ChangeToImmediate(Def->getOperand(1).getImm()); Src0Inlined = true; - } else if ((RI.isPhysicalRegister(Src0->getReg()) && - (ST.getConstantBusLimit(Opc) <= 1 && - RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || - (RI.isVirtualRegister(Src0->getReg()) && - (ST.getConstantBusLimit(Opc) <= 1 && - RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) + } else if ((Register::isPhysicalRegister(Src0->getReg()) && + (ST.getConstantBusLimit(Opc) <= 1 && + RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || + (Register::isVirtualRegister(Src0->getReg()) && + (ST.getConstantBusLimit(Opc) <= 1 && + RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) return false; // VGPR is okay as Src0 - fallthrough } @@ -2385,10 +2454,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, MRI->hasOneUse(Src1->getReg()) && commuteInstruction(UseMI)) { Src0->ChangeToImmediate(Def->getOperand(1).getImm()); - } else if ((RI.isPhysicalRegister(Src1->getReg()) && - RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || - (RI.isVirtualRegister(Src1->getReg()) && - RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) + } else if ((Register::isPhysicalRegister(Src1->getReg()) && + RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || + (Register::isVirtualRegister(Src1->getReg()) && + RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) return false; // VGPR is okay as Src1 - fallthrough } @@ -2472,8 +2541,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, } bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA) const { + const MachineInstr &MIb) const { assert((MIa.mayLoad() || MIa.mayStore()) && "MIa must load from or modify a memory location"); assert((MIb.mayLoad() || MIb.mayStore()) && @@ -2664,6 +2732,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, MI.modifiesRegister(AMDGPU::EXEC, &RI) || MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || MI.getOpcode() == AMDGPU::S_SETREG_B32 || + MI.getOpcode() == AMDGPU::S_DENORM_MODE || changesVGPRIndexingMode(MI); } @@ -2865,8 +2934,16 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, if (OpInfo.RegClass < 0) return false; - if (MO.isImm() && isInlineConstant(MO, OpInfo)) + const MachineFunction *MF = MI.getParent()->getParent(); + const GCNSubtarget &ST = MF->getSubtarget(); + + if (MO.isImm() && isInlineConstant(MO, OpInfo)) { + if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() && + OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::src2)) + return false; return RI.opCanUseInlineConstant(OpInfo.OperandType); + } if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) return false; @@ -2874,8 +2951,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) return true; - const MachineFunction *MF = MI.getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); return ST.hasVOP3Literal(); } @@ -3036,7 +3111,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, if (!MO.isUse()) return false; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); // Null is free @@ -3093,7 +3168,8 @@ static bool shouldReadExec(const MachineInstr &MI) { return true; } - if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) || + if (MI.isPreISelOpcode() || + SIInstrInfo::isGenericOpcode(MI.getOpcode()) || SIInstrInfo::isSALU(MI) || SIInstrInfo::isSMRD(MI)) return false; @@ -3104,7 +3180,7 @@ static bool shouldReadExec(const MachineInstr &MI) { static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg) { - if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg())) + if (Register::isPhysicalRegister(SubReg.getReg())) return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg()); return SubReg.getSubReg() != AMDGPU::NoSubRegister && @@ -3144,8 +3220,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, if (!Op.isReg()) continue; - unsigned Reg = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) { + Register Reg = Op.getReg(); + if (!Register::isVirtualRegister(Reg) && !RC->contains(Reg)) { ErrInfo = "inlineasm operand has incorrect register class."; return false; } @@ -3209,9 +3285,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, continue; if (RegClass != -1) { - unsigned Reg = MI.getOperand(i).getReg(); - if (Reg == AMDGPU::NoRegister || - TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MI.getOperand(i).getReg(); + if (Reg == AMDGPU::NoRegister || Register::isVirtualRegister(Reg)) continue; const TargetRegisterClass *RC = RI.getRegClass(RegClass); @@ -3304,7 +3379,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Dst register should be tied to implicit use of preserved register"; return false; - } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) && + } else if (Register::isPhysicalRegister(TiedMO.getReg()) && Dst.getReg() != TiedMO.getReg()) { ErrInfo = "Dst register should use same physical register as preserved"; return false; @@ -3409,6 +3484,32 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + // Special case for writelane - this can break the multiple constant bus rule, + // but still can't use more than one SGPR register + if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) { + unsigned SGPRCount = 0; + Register SGPRUsed = AMDGPU::NoRegister; + + for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) { + if (OpIdx == -1) + break; + + const MachineOperand &MO = MI.getOperand(OpIdx); + + if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { + if (MO.isReg() && MO.getReg() != AMDGPU::M0) { + if (MO.getReg() != SGPRUsed) + ++SGPRCount; + SGPRUsed = MO.getReg(); + } + } + if (SGPRCount > ST.getConstantBusLimit(Opcode)) { + ErrInfo = "WRITELANE instruction violates constant bus restriction"; + return false; + } + } + } + // Verify misc. restrictions on specific instructions. if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { @@ -3609,7 +3710,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 && ST.getGeneration() >= AMDGPUSubtarget::GFX10) { ErrInfo = "Invalid dpp_ctrl value: " - "broadcats are not supported on GFX10+"; + "broadcasts are not supported on GFX10+"; return false; } if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST && @@ -3631,6 +3732,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::PHI: return AMDGPU::PHI; case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; case AMDGPU::WQM: return AMDGPU::WQM; + case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM; case AMDGPU::WWM: return AMDGPU::WWM; case AMDGPU::S_MOV_B32: { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -3708,9 +3810,9 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, const MCInstrDesc &Desc = get(MI.getOpcode()); if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || Desc.OpInfo[OpNo].RegClass == -1) { - unsigned Reg = MI.getOperand(OpNo).getReg(); + Register Reg = MI.getOperand(OpNo).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI.getRegClass(Reg); return RI.getPhysRegClass(Reg); } @@ -3741,7 +3843,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { else VRC = &AMDGPU::VGPR_32RegClass; - unsigned Reg = MRI.createVirtualRegister(VRC); + Register Reg = MRI.createVirtualRegister(VRC); DebugLoc DL = MBB->findDebugLoc(I); BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO); MO.ChangeToRegister(Reg, false); @@ -3756,7 +3858,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, const { MachineBasicBlock *MBB = MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned SubReg = MRI.createVirtualRegister(SubRC); + Register SubReg = MRI.createVirtualRegister(SubRC); if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) @@ -3768,7 +3870,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, // value so we don't need to worry about merging its subreg index with the // SubIdx passed to this function. The register coalescer should be able to // eliminate this extra copy. - unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); + Register NewSuperReg = MRI.createVirtualRegister(SuperRC); BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); @@ -3814,11 +3916,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, if (!MO.isReg()) return false; - unsigned Reg = MO.getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isVirtualRegister(Reg) ? - MRI.getRegClass(Reg) : - RI.getPhysRegClass(Reg); + Register Reg = MO.getReg(); + const TargetRegisterClass *RC = Register::isVirtualRegister(Reg) + ? MRI.getRegClass(Reg) + : RI.getPhysRegClass(Reg); const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); @@ -3935,13 +4036,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, if (Opc == AMDGPU::V_WRITELANE_B32) { const DebugLoc &DL = MI.getDebugLoc(); if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src0); Src0.ChangeToRegister(Reg, false); } if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); @@ -3967,7 +4068,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, // select is uniform. if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); @@ -4003,7 +4104,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, MI.setDesc(get(CommutedOpc)); - unsigned Src0Reg = Src0.getReg(); + Register Src0Reg = Src0.getReg(); unsigned Src0SubReg = Src0.getSubReg(); bool Src0Kill = Src0.isKill(); @@ -4039,13 +4140,13 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); const DebugLoc &DL = MI.getDebugLoc(); if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); Src1.ChangeToRegister(Reg, false); } if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src2); Src2.ChangeToRegister(Reg, false); @@ -4113,12 +4214,12 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const { const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); - unsigned DstReg = MRI.createVirtualRegister(SRC); + Register DstReg = MRI.createVirtualRegister(SRC); unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; if (RI.hasAGPRs(VRC)) { VRC = RI.getEquivalentVGPRClass(VRC); - unsigned NewSrcReg = MRI.createVirtualRegister(VRC); + Register NewSrcReg = MRI.createVirtualRegister(VRC); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(TargetOpcode::COPY), NewSrcReg) .addReg(SrcReg); @@ -4134,7 +4235,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, SmallVector SRegs; for (unsigned i = 0; i < SubRegs; ++i) { - unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(AMDGPU::V_READFIRSTLANE_B32), SGPR) .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); @@ -4176,7 +4277,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const { - unsigned OpReg = Op.getReg(); + Register OpReg = Op.getReg(); unsigned OpSubReg = Op.getSubReg(); const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( @@ -4186,7 +4287,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, if (DstRC == OpRC) return; - unsigned DstReg = MRI.createVirtualRegister(DstRC); + Register DstReg = MRI.createVirtualRegister(DstRC); MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); @@ -4198,8 +4299,19 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; // Try to eliminate the copy if it is copying an immediate value. - if (Def->isMoveImmediate()) + if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) FoldImmediate(*Copy, *Def, OpReg, &MRI); + + bool ImpDef = Def->isImplicitDef(); + while (!ImpDef && Def && Def->isCopy()) { + if (Def->getOperand(1).getReg().isPhysical()) + break; + Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); + ImpDef = Def && Def->isImplicitDef(); + } + if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && + !ImpDef) + Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); } // Emit the actual waterfall loop, executing the wrapped instruction for each @@ -4223,18 +4335,18 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock::iterator I = LoopBB.begin(); - unsigned VRsrc = Rsrc.getReg(); + Register VRsrc = Rsrc.getReg(); unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef()); - unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC); - unsigned AndCond = MRI.createVirtualRegister(BoolXExecRC); - unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); + Register AndCond = MRI.createVirtualRegister(BoolXExecRC); + Register SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); // Beginning of the loop, read the next Rsrc variant. BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0) @@ -4302,7 +4414,7 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC); + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); // Save the EXEC mask BuildMI(MBB, I, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec); @@ -4370,10 +4482,10 @@ extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); // Create an empty resource descriptor - unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat(); // Zero64 = 0 @@ -4430,7 +4542,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { if (!MI.getOperand(i).isReg() || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) + !Register::isVirtualRegister(MI.getOperand(i).getReg())) continue; const TargetRegisterClass *OpRC = MRI.getRegClass(MI.getOperand(i).getReg()); @@ -4447,8 +4559,16 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { if (!VRC) { assert(SRC); - VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC) - : RI.getEquivalentVGPRClass(SRC); + if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { + VRC = &AMDGPU::VReg_1RegClass; + } else + VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) + ? RI.getEquivalentAGPRClass(SRC) + : RI.getEquivalentVGPRClass(SRC); + } else { + VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) + ? RI.getEquivalentAGPRClass(VRC) + : RI.getEquivalentVGPRClass(VRC); } RC = VRC; } else { @@ -4458,7 +4578,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // Update all the operands so they have the same type. for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { MachineOperand &Op = MI.getOperand(I); - if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) continue; // MI is a PHI instruction. @@ -4483,7 +4603,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // subregister index types e.g. sub0_sub1 + sub2 + sub3 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { MachineOperand &Op = MI.getOperand(I); - if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) continue; const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); @@ -4502,8 +4622,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // Legalize INSERT_SUBREG // src0 must have the same register class as dst if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src0 = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); if (DstRC != Src0RC) { @@ -4577,13 +4697,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) { // This is already an ADDR64 instruction so we need to add the pointer // extracted from the resource descriptor to the current value of VAddr. - unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); unsigned RsrcPtr, NewSRsrc; std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); @@ -4623,7 +4743,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, unsigned RsrcPtr, NewSRsrc; std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); - unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); @@ -4661,6 +4781,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, MIB.addImm(TFE->getImm()); } + MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz)); + MIB.cloneMemRefs(MI); Addr64 = MIB; } else { @@ -4933,8 +5055,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); unsigned NewDstReg = AMDGPU::NoRegister; if (HasDst) { - unsigned DstReg = Inst.getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register DstReg = Inst.getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg)) continue; // Update the destination register class. @@ -4943,7 +5065,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; if (Inst.isCopy() && - TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) && + Register::isVirtualRegister(Inst.getOperand(1).getReg()) && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { // Instead of creating a copy where src and dst are the same register // class, we just replace all uses of dst with src. These kinds of @@ -4988,8 +5110,8 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - unsigned OldDstReg = Inst.getOperand(0).getReg(); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register OldDstReg = Inst.getOperand(0).getReg(); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Opc = Inst.getOpcode(); assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); @@ -5022,8 +5144,8 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src = Inst.getOperand(1); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubOp = ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32; @@ -5052,7 +5174,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, MachineOperand &Src1 = Inst.getOperand(2); if (ST.hasDLInsts()) { - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); @@ -5072,8 +5194,8 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, bool Src1IsSGPR = Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())); MachineInstr *Xor; - unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); // Build a pair of scalar instructions and add them to the work list. // The next iteration over the work list will lower these to the vector @@ -5117,8 +5239,8 @@ void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist, MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm) .add(Src0) @@ -5146,8 +5268,8 @@ void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist, MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm) .add(Src1); @@ -5189,16 +5311,16 @@ void SIInstrInfo::splitScalar64BitUnaryOp( const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); - unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); + Register FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -5226,12 +5348,12 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); - unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned CarryReg = MRI.createVirtualRegister(CarryRC); - unsigned DeadCarryReg = MRI.createVirtualRegister(CarryRC); + Register CarryReg = MRI.createVirtualRegister(CarryRC); + Register DeadCarryReg = MRI.createVirtualRegister(CarryRC); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src0 = Inst.getOperand(1); @@ -5327,17 +5449,17 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist, const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) .add(SrcReg0Sub0) .add(SrcReg1Sub0); - unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) .add(SrcReg0Sub1) .add(SrcReg1Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); + Register FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -5368,7 +5490,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist, const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineOperand* Op0; MachineOperand* Op1; @@ -5384,7 +5506,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist, BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm) .add(*Op0); - unsigned NewDest = MRI.createVirtualRegister(DestRC); + Register NewDest = MRI.createVirtualRegister(DestRC); MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest) .addReg(Interm) @@ -5411,8 +5533,8 @@ void SIInstrInfo::splitScalar64BitBCNT( MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass; - unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); @@ -5451,9 +5573,9 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, Offset == 0 && "Not implemented"); if (BitWidth < 32) { - unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) @@ -5476,8 +5598,8 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, } MachineOperand &Src = Inst.getOperand(1); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) .addImm(31) @@ -5506,6 +5628,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( switch (UseMI.getOpcode()) { case AMDGPU::COPY: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: case AMDGPU::REG_SEQUENCE: case AMDGPU::PHI: @@ -5531,7 +5654,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const { - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineBasicBlock *MBB = Inst.getParent(); MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); @@ -5539,8 +5662,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, switch (Inst.getOpcode()) { case AMDGPU::S_PACK_LL_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); // FIXME: Can do a lot better if we know the high bits of src0 or src1 are // 0. @@ -5558,7 +5681,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, break; } case AMDGPU::S_PACK_LH_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) .addImm(0xffff); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg) @@ -5568,8 +5691,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, break; } case AMDGPU::S_PACK_HH_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) .addImm(16) .add(Src0); @@ -5623,17 +5746,27 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( case AMDGPU::REG_SEQUENCE: case AMDGPU::INSERT_SUBREG: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: { const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); if (RI.hasAGPRs(SrcRC)) { if (RI.hasAGPRs(NewDstRC)) return nullptr; - NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); + switch (Inst.getOpcode()) { + case AMDGPU::PHI: + case AMDGPU::REG_SEQUENCE: + case AMDGPU::INSERT_SUBREG: + NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); + break; + default: + NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); + } + if (!NewDstRC) return nullptr; } else { - if (RI.hasVGPRs(NewDstRC)) + if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); @@ -5686,7 +5819,7 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI, return MO.getReg(); // If this could be a VGPR or an SGPR, Check the dynamic register class. - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); if (RI.isSGPRClass(RegRC)) UsedSGPRs[i] = Reg; @@ -5941,7 +6074,7 @@ void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); MachineInstr *SIIF = BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) .add(Branch->getOperand(0)) @@ -5968,8 +6101,8 @@ void SIInstrInfo::convertNonUniformLoopRegion( if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC()); - unsigned BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); MachineInstrBuilder HeaderPHIBuilder = BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), @@ -5979,7 +6112,7 @@ void SIInstrInfo::convertNonUniformLoopRegion( HeaderPHIBuilder.addReg(BackEdgeReg); } else { MachineBasicBlock *PMBB = *PI; - unsigned ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), ZeroReg, 0); HeaderPHIBuilder.addReg(ZeroReg); @@ -6063,13 +6196,30 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - unsigned UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); + Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC()); return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); } +MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + Register DestReg, + RegScavenger &RS) const { + if (ST.hasAddNoCarry()) + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); + + Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // TODO: Users need to deal with this. + if (!UnusedCarry.isValid()) + return MachineInstrBuilder(); + + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) + .addReg(UnusedCarry, RegState::Define | RegState::Dead); +} + bool SIInstrInfo::isKillTerminator(unsigned Opcode) { switch (Opcode) { case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: @@ -6115,7 +6265,21 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { return false; const auto RCID = MI.getDesc().OpInfo[Idx].RegClass; - return RCID == AMDGPU::SReg_128RegClassID; + return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); +} + +unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace, + bool Signed) const { + if (!ST.hasFlatInstOffsets()) + return 0; + + if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) + return 0; + + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) + return Signed ? 12 : 11; + + return Signed ? 13 : 12; } bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, @@ -6254,7 +6418,7 @@ static bool followSubRegDef(MachineInstr &MI, MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI) { assert(MRI.isSSA()); - if (!TargetRegisterInfo::isVirtualRegister(P.Reg)) + if (!Register::isVirtualRegister(P.Reg)) return nullptr; auto RSR = P; @@ -6265,8 +6429,7 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, case AMDGPU::COPY: case AMDGPU::V_MOV_B32_e32: { auto &Op1 = MI->getOperand(1); - if (Op1.isReg() && - TargetRegisterInfo::isVirtualRegister(Op1.getReg())) { + if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) { if (Op1.isUndef()) return nullptr; RSR = getRegSubRegPair(Op1); @@ -6360,3 +6523,40 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, return true; } } + +MachineInstr *SIInstrInfo::createPHIDestinationCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt, + const DebugLoc &DL, Register Src, Register Dst) const { + auto Cur = MBB.begin(); + if (Cur != MBB.end()) + do { + if (!Cur->isPHI() && Cur->readsRegister(Dst)) + return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src); + ++Cur; + } while (Cur != MBB.end() && Cur != LastPHIIt); + + return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src, + Dst); +} + +MachineInstr *SIInstrInfo::createPHISourceCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const { + if (InsPt != MBB.end() && + (InsPt->getOpcode() == AMDGPU::SI_IF || + InsPt->getOpcode() == AMDGPU::SI_ELSE || + InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && + InsPt->definesRegister(Src)) { + InsPt++; + return BuildMI(MBB, InsPt, InsPt->getDebugLoc(), + get(ST.isWave32() ? AMDGPU::S_MOV_B32_term + : AMDGPU::S_MOV_B64_term), + Dst) + .addReg(Src, 0, SrcSubReg) + .addReg(AMDGPU::EXEC, RegState::Implicit); + } + return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg, + Dst); +} + +bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 3ff35da0b963..be463442c888 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -173,7 +173,7 @@ public: } bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, @@ -229,6 +229,14 @@ public: bool expandPostRAPseudo(MachineInstr &MI) const override; + // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp + // instructions. Returns a pair of generated instructions. + // Can split either post-RA with physical registers or pre-RA with + // virtual registers. In latter case IR needs to be in SSA form and + // and a REG_SEQUENCE is produced to define original register. + std::pair + expandMovDPP64(MachineInstr &MI) const; + // Returns an opcode that can be used to move a value to a \p DstRC // register. If there is no hardware instruction that can store to \p // DstRC, then AMDGPU::COPY is returned. @@ -242,7 +250,7 @@ public: return commuteOpcode(MI.getOpcode()); } - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0, @@ -303,8 +311,7 @@ public: bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; bool isFoldableCopy(const MachineInstr &MI) const; @@ -578,6 +585,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::IsMAI; } + static bool isDOT(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; + } + + bool isDOT(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::IsDOT; + } + static bool isScalarUnit(const MachineInstr &MI) { return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); } @@ -954,6 +969,19 @@ public: bool isBasicBlockPrologue(const MachineInstr &MI) const override; + MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, + Register Dst) const override; + + MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, + Register SrcSubReg, + Register Dst) const override; + + bool isWave32() const; + /// Return a partially built integer add instruction without carry. /// Caller must add source operands. /// For pre-GFX9 it will generate unused carry destination operand. @@ -963,6 +991,12 @@ public: const DebugLoc &DL, unsigned DestReg) const; + MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + Register DestReg, + RegScavenger &RS) const; + static bool isKillTerminator(unsigned Opcode); const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; @@ -970,6 +1004,8 @@ public: return isUInt<12>(Imm); } + unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const; + /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT /// encoded instruction. If \p Signed, this is for an instruction that /// interprets the offset as signed. diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index c382c816e0b4..1eecbf555613 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -84,7 +84,7 @@ def SDTtbuffer_load : SDTypeProfile<1, 8, SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // format(imm) - SDTCisVT<7, i32>, // cachecontrol(imm) + SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<8, i1> // idxen(imm) ]>; @@ -102,7 +102,7 @@ def SDTtbuffer_store : SDTypeProfile<0, 9, SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // format(imm) - SDTCisVT<7, i32>, // cachecontrol(imm) + SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<8, i1> // idxen(imm) ]>; @@ -119,7 +119,7 @@ def SDTBufferLoad : SDTypeProfile<1, 7, SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // cachepolicy(imm) + SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<7, i1>]>; // idxen(imm) def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, @@ -145,7 +145,7 @@ def SDTBufferStore : SDTypeProfile<0, 8, SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // cachepolicy(imm) + SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<7, i1>]>; // idxen(imm) def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, @@ -198,6 +198,8 @@ def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; +def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; +def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>; def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>; @@ -264,6 +266,11 @@ def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] >; +def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", + SDTypeProfile<0 ,1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue] +>; + //===----------------------------------------------------------------------===// // ValueType helpers //===----------------------------------------------------------------------===// @@ -277,7 +284,9 @@ class isFloatType { !if(!eq(SrcVT.Value, f64.Value), 1, !if(!eq(SrcVT.Value, v2f16.Value), 1, !if(!eq(SrcVT.Value, v4f16.Value), 1, - 0))))); + !if(!eq(SrcVT.Value, v2f32.Value), 1, + !if(!eq(SrcVT.Value, v2f64.Value), 1, + 0))))))); } class isIntType { @@ -300,14 +309,36 @@ class isPackedType { // PatFrags for global memory operations //===----------------------------------------------------------------------===// -defm atomic_inc_global : global_binary_atomic_op; -defm atomic_dec_global : global_binary_atomic_op; +foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { +let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { + -def atomic_inc_local : local_binary_atomic_op; -def atomic_dec_local : local_binary_atomic_op; -def atomic_load_fadd_local : local_binary_atomic_op; -def atomic_load_fmin_local : local_binary_atomic_op; -def atomic_load_fmax_local : local_binary_atomic_op; +defm atomic_inc_#as : binary_atomic_op; +defm atomic_dec_#as : binary_atomic_op; +defm atomic_load_fmin_#as : binary_atomic_op; +defm atomic_load_fmax_#as : binary_atomic_op; + + +} // End let AddressSpaces = ... +} // End foreach AddrSpace + +def atomic_fadd_global_noret : PatFrag< + (ops node:$ptr, node:$value), + (SIglobal_atomic_fadd node:$ptr, node:$value)> { + // FIXME: Move this + let MemoryVT = f32; + let IsAtomic = 1; + let AddressSpaces = StoreAddress_global.AddrSpaces; +} + +def atomic_pk_fadd_global_noret : PatFrag< + (ops node:$ptr, node:$value), + (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> { + // FIXME: Move this + let MemoryVT = v2f16; + let IsAtomic = 1; + let AddressSpaces = StoreAddress_global.AddrSpaces; +} //===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. @@ -328,10 +359,12 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, >; def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { + let IsLoad = 1; let IsUnindexed = 1; } def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { + let IsLoad = 1; let IsNonExtLoad = 1; } @@ -347,14 +380,15 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr), let MemoryVT = i64; } -def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { +def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { let IsLoad = 1; let IsAnyExtLoad = 1; } -def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{ - return cast(N)->getExtensionType() == ISD::SEXTLOAD; -}]>; +def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { + let IsLoad = 1; + let IsSignExtLoad = 1; +} def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { let IsLoad = 1; @@ -391,25 +425,50 @@ def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { let MemoryVT = i16; } -def load_glue_align8 : Aligned8Bytes < - (ops node:$ptr), (load_glue node:$ptr) ->; -def load_glue_align16 : Aligned16Bytes < - (ops node:$ptr), (load_glue node:$ptr) ->; +let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { + let IsNonExtLoad = 1; +} -def load_local_m0 : LoadFrag, LocalAddress; -def sextloadi8_local_m0 : LoadFrag, LocalAddress; -def sextloadi16_local_m0 : LoadFrag, LocalAddress; -def extloadi8_local_m0 : LoadFrag, LocalAddress; -def zextloadi8_local_m0 : LoadFrag, LocalAddress; -def extloadi16_local_m0 : LoadFrag, LocalAddress; -def zextloadi16_local_m0 : LoadFrag, LocalAddress; -def load_align8_local_m0 : LoadFrag , LocalAddress; -def load_align16_local_m0 : LoadFrag , LocalAddress; -def atomic_load_32_local_m0 : LoadFrag, LocalAddress; -def atomic_load_64_local_m0 : LoadFrag, LocalAddress; +let MemoryVT = i8 in { +def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; +def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; +def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; +} + +let MemoryVT = i16 in { +def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; +def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; +def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; +} + +def load_align8_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 8; +} +def load_align16_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 16; +} + +} // End IsLoad = 1 + +let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_32_glue node:$ptr)> { + let MemoryVT = i32; +} +def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_64_glue node:$ptr)> { + let MemoryVT = i64; +} + +} // End let AddressSpaces = LoadAddress_local.AddrSpaces def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, @@ -420,50 +479,88 @@ def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] >; -def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val), - (AMDGPUatomic_st_glue node:$ptr, node:$val)> { -} - def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), - (AMDGPUst_glue node:$val, node:$ptr), [{ - return cast(N)->getAddressingMode() == ISD::UNINDEXED; -}]>; + (AMDGPUst_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsUnindexed = 1; +} def store_glue : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr), [{ - return !cast(N)->isTruncatingStore(); -}]>; + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr), [{ - return cast(N)->isTruncatingStore(); -}]>; + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 1; +} def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), - (truncstore_glue node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; + (truncstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), - (truncstore_glue node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; + (truncstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} -def store_glue_align8 : Aligned8Bytes < - (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) ->; +let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { +def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (store_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} -def store_glue_align16 : Aligned16Bytes < - (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) ->; +def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} + +def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} +} + +def store_align16_local_m0 : PatFrag < + (ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; + let MinAlignment = 16; +} -def store_local_m0 : StoreFrag, LocalAddress; -def truncstorei8_local_m0 : StoreFrag, LocalAddress; -def truncstorei16_local_m0 : StoreFrag, LocalAddress; -def atomic_store_local_m0 : StoreFrag, LocalAddress; +def store_align8_local_m0 : PatFrag < + (ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; + let MinAlignment = 8; +} + +let AddressSpaces = StoreAddress_local.AddrSpaces in { + +def atomic_store_local_32_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i32; +} +def atomic_store_local_64_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i64; +} +} // End let AddressSpaces = StoreAddress_local.AddrSpaces -def store_align8_local_m0 : StoreFrag, LocalAddress; -def store_align16_local_m0 : StoreFrag, LocalAddress; def si_setcc_uniform : PatFrag < (ops node:$lhs, node:$rhs, node:$cond), @@ -539,16 +636,27 @@ def lshl_rev : PatFrag < (shl $src0, $src1) >; +def add_ctpop : PatFrag < + (ops node:$src0, node:$src1), + (add (ctpop $src0), $src1) +>; + multiclass SIAtomicM0Glue2 { + SDTypeProfile tc = SDTAtomic2, + bit IsInt = 1> { def _glue : SDNode < !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; - def _local_m0 : local_binary_atomic_op (NAME#"_glue")>; - def _region_m0 : region_binary_atomic_op (NAME#"_glue")>; + let AddressSpaces = StoreAddress_local.AddrSpaces in { + defm _local_m0 : binary_atomic_op (NAME#"_glue"), IsInt>; + } + + let AddressSpaces = StoreAddress_region.AddrSpaces in { + defm _region_m0 : binary_atomic_op (NAME#"_glue"), IsInt>; + } } defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; @@ -563,17 +671,9 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; -defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>; -defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>; -defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>; - -def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] ->; - -def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal; -def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion; - +defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; +defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; +defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; def as_i1imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); @@ -591,6 +691,10 @@ def as_i32imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; +def as_i32timm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); +}]>; + def as_i64imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); }]>; @@ -627,9 +731,13 @@ def SIMM16bit : ImmLeaf ; def UIMM16bit : ImmLeaf (Imm); }] + [{return isUInt<16>(Imm);}] >; +def i64imm_32bit : ImmLeaf(Imm); +}]>; + class InlineImm : PatLeaf <(vt imm), [{ return isInlineImmediate(N); }]>; @@ -763,6 +871,18 @@ def ExpTgtMatchClass : AsmOperandClass { let RenderMethod = "printExpTgt"; } +def SWaitMatchClass : AsmOperandClass { + let Name = "SWaitCnt"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "parseSWaitCntOps"; +} + +def VReg32OrOffClass : AsmOperandClass { + let Name = "VReg32OrOff"; + let ParserMethod = "parseVReg32OrOff"; +} + +let OperandType = "OPERAND_IMMEDIATE" in { def SendMsgImm : Operand { let PrintMethod = "printSendMsg"; let ParserMatchClass = SendMsgMatchClass; @@ -778,22 +898,11 @@ def EndpgmImm : Operand { let ParserMatchClass = EndpgmMatchClass; } -def SWaitMatchClass : AsmOperandClass { - let Name = "SWaitCnt"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "parseSWaitCntOps"; -} - -def VReg32OrOffClass : AsmOperandClass { - let Name = "VReg32OrOff"; - let ParserMethod = "parseVReg32OrOff"; -} - def WAIT_FLAG : Operand { let ParserMatchClass = SWaitMatchClass; let PrintMethod = "printWaitFlag"; - let OperandType = "OPERAND_IMMEDIATE"; } +} // End OperandType = "OPERAND_IMMEDIATE" include "SIInstrFormats.td" include "VIInstrFormats.td" @@ -929,6 +1038,7 @@ def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>; def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; +def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; @@ -1317,18 +1427,6 @@ class getVALUDstForVT { VOPDstS64orS32)))); // else VT == i1 } -// Returns true if VT is floating point. -class getIsFP { - bit ret = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, v2f16.Value), 1, - !if(!eq(VT.Value, v4f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, v2f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - !if(!eq(VT.Value, v2f64.Value), 1, - 0))))))); -} - // Returns the register class to use for the destination of VOP[12C] // instructions with SDWA extension class getSDWADstForVT { @@ -1340,7 +1438,7 @@ class getSDWADstForVT { // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; RegisterOperand ret = !if(isFP, @@ -1373,11 +1471,14 @@ class getVOPSrc0ForVT { // Returns the vreg register class to use for source operand given VT class getVregSrcForVT { RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, - !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); + !if(!eq(VT.Size, 96), VReg_96, + !if(!eq(VT.Size, 64), VReg_64, + !if(!eq(VT.Size, 48), VReg_64, + VGPR_32)))); } class getSDWASrcForVT { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); RegisterOperand ret = !if(isFP, retFlt, retInt); @@ -1386,7 +1487,7 @@ class getSDWASrcForVT { // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; RegisterOperand ret = !if(!eq(VT.Size, 128), VSrc_128, @@ -1433,7 +1534,7 @@ class isModifierType { // Return type of input modifiers operand for specified input operand class getSrcMod { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; bit isPacked = isPackedType.ret; Operand ret = !if(!eq(VT.Size, 64), !if(isFP, FP64InputMods, Int64InputMods), @@ -1452,7 +1553,7 @@ class getOpSelMod { // Return type of input modifiers operand specified input operand for DPP class getSrcModExt { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } @@ -2038,6 +2139,7 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field int NeedPatGen = PatGenMode.NoPattern; field bit IsMAI = 0; + field bit IsDOT = 0; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 70f20bb69370..21984c6ad910 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m < (outs VINTRPDst:$vdst), (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_p1_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan), - (i32 imm:$attr)))] + [(set f32:$vdst, (int_amdgcn_interp_p1 f32:$vsrc, + (i32 timm:$attrchan), (i32 timm:$attr), M0))] >; let OtherPredicates = [has32BankLDS] in { @@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m < (outs VINTRPDst:$vdst), (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_p2_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan), - (i32 imm:$attr)))]>; + [(set f32:$vdst, (int_amdgcn_interp_p2 f32:$src0, f32:$vsrc, + (i32 timm:$attrchan), (i32 timm:$attr), M0))]>; } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst" @@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m < (outs VINTRPDst:$vdst), (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_mov_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan), - (i32 imm:$attr)))]>; + [(set f32:$vdst, (int_amdgcn_interp_mov (i32 imm:$vsrc), + (i32 timm:$attrchan), (i32 timm:$attr), M0))]>; } // End Uses = [M0, EXEC] @@ -92,6 +92,11 @@ def ATOMIC_FENCE : SPseudoInstSI< let maybeAtomic = 1; } +def VOP_I64_I64_DPP : VOPProfile <[i64, i64, untyped, untyped]> { + let HasExt = 1; + let HasExtDPP = 1; +} + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { // For use in patterns @@ -107,10 +112,19 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst), def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_b64:$src0)>; +// 64-bit vector move with dpp. Expanded post-RA. +def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64_DPP> { + let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete. +} + // Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the // WQM pass processes it. def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; +// Pseudoinstruction for @llvm.amdgcn.softwqm. Like @llvm.amdgcn.wqm it is +// turned into a copy by WQM pass, but does not seed WQM requirements. +def SOFT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; + // Pseudoinstruction for @llvm.amdgcn.wwm. It is turned into a copy post-RA, so // that the @earlyclobber is respected. The @earlyclobber is to make sure that // the instruction that defines $src0 (which is run in WWM) doesn't @@ -345,13 +359,15 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> { } def SI_INIT_EXEC : SPseudoInstSI < - (outs), (ins i64imm:$src), []> { + (outs), (ins i64imm:$src), + [(int_amdgcn_init_exec (i64 timm:$src))]> { let Defs = [EXEC]; let usesCustomInserter = 1; let isAsCheapAsAMove = 1; let WaveSizePredicate = isWave64; } +// FIXME: Intrinsic should be mangled for wave size. def SI_INIT_EXEC_LO : SPseudoInstSI < (outs), (ins i32imm:$src), []> { let Defs = [EXEC_LO]; @@ -360,12 +376,20 @@ def SI_INIT_EXEC_LO : SPseudoInstSI < let WaveSizePredicate = isWave32; } +// FIXME: Wave32 version def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI < - (outs), (ins SSrc_b32:$input, i32imm:$shift), []> { + (outs), (ins SSrc_b32:$input, i32imm:$shift), + [(int_amdgcn_init_exec_from_input i32:$input, (i32 timm:$shift))]> { let Defs = [EXEC]; let usesCustomInserter = 1; } +def : GCNPat < + (int_amdgcn_init_exec timm:$src), + (SI_INIT_EXEC_LO (as_i32imm imm:$src))> { + let WaveSizePredicate = isWave32; +} + // Return for returning shaders to a shader variant epilog. def SI_RETURN_TO_EPILOG : SPseudoInstSI < (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> { @@ -604,25 +628,6 @@ def : GCNPat < (SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0)) >; -def : GCNPat < - (AMDGPUinit_exec i64:$src), - (SI_INIT_EXEC (as_i64imm $src)) -> { - let WaveSizePredicate = isWave64; -} - -def : GCNPat < - (AMDGPUinit_exec i64:$src), - (SI_INIT_EXEC_LO (as_i32imm $src)) -> { - let WaveSizePredicate = isWave32; -} - -def : GCNPat < - (AMDGPUinit_exec_from_input i32:$input, i32:$shift), - (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift)) ->; - def : GCNPat< (AMDGPUtrap timm:$trapid), (S_TRAP $trapid) @@ -740,22 +745,22 @@ def : GCNPat < def : GCNPat < (i32 (fp_to_sint f16:$src)), - (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src)) + (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src)) >; def : GCNPat < (i32 (fp_to_uint f16:$src)), - (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src)) + (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src)) >; def : GCNPat < (f16 (sint_to_fp i32:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src)) + (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 VSrc_b32:$src)) >; def : GCNPat < (f16 (uint_to_fp i32:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src)) + (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 VSrc_b32:$src)) >; //===----------------------------------------------------------------------===// @@ -808,8 +813,14 @@ def : GCNPat < (V_BCNT_U32_B32_e64 $popcnt, $val) >; } + def : GCNPat < - (i16 (add (i16 (trunc (getDivergentFrag.ret i32:$popcnt))), i16:$val)), + (i32 (ctpop i32:$popcnt)), + (V_BCNT_U32_B32_e64 VSrc_b32:$popcnt, (i32 0)) +>; + +def : GCNPat < + (i16 (add (i16 (trunc (i32 (getDivergentFrag.ret i32:$popcnt)))), i16:$val)), (V_BCNT_U32_B32_e64 $popcnt, $val) >; @@ -1076,53 +1087,158 @@ def : GCNPat < /********** ================================ **********/ // Prevent expanding both fneg and fabs. +// TODO: Add IgnoredBySelectionDAG bit? +let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG def : GCNPat < - (fneg (fabs f32:$src)), - (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit + (fneg (fabs (f32 SReg_32:$src))), + (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit >; -// FIXME: Should use S_OR_B32 def : GCNPat < - (fneg (fabs f64:$src)), - (REG_SEQUENCE VReg_64, - (i32 (EXTRACT_SUBREG f64:$src, sub0)), - sub0, - (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), - (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. - sub1) + (fabs (f32 SReg_32:$src)), + (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff))) +>; + +def : GCNPat < + (fneg (f32 SReg_32:$src)), + (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) +>; + +def : GCNPat < + (fneg (f16 SReg_32:$src)), + (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) +>; + +def : GCNPat < + (fneg (f16 VGPR_32:$src)), + (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) +>; + +def : GCNPat < + (fabs (f16 SReg_32:$src)), + (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff))) +>; + +def : GCNPat < + (fneg (fabs (f16 SReg_32:$src))), + (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit +>; + +def : GCNPat < + (fneg (fabs (f16 VGPR_32:$src))), + (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit +>; + +def : GCNPat < + (fneg (v2f16 SReg_32:$src)), + (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) +>; + +def : GCNPat < + (fabs (v2f16 SReg_32:$src)), + (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff))) +>; + +// This is really (fneg (fabs v2f16:$src)) +// +// fabs is not reported as free because there is modifier for it in +// VOP3P instructions, so it is turned into the bit op. +def : GCNPat < + (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))), + (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit >; def : GCNPat < - (fabs f32:$src), - (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fffffff))) + (fneg (v2f16 (fabs SReg_32:$src))), + (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit +>; + +// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled + // def : GCNPat < +// (fneg (f64 SReg_64:$src)), +// (REG_SEQUENCE SReg_64, +// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +// sub0, +// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +// (i32 (S_MOV_B32 (i32 0x80000000)))), +// sub1) +// >; + +// def : GCNPat < +// (fneg (fabs (f64 SReg_64:$src))), +// (REG_SEQUENCE SReg_64, +// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +// sub0, +// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +// (S_MOV_B32 (i32 0x80000000))), // Set sign bit. +// sub1) +// >; + +} // End let AddedComplexity = 1 + +def : GCNPat < + (fabs (f32 VGPR_32:$src)), + (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (f32 VGPR_32:$src)), + (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) +>; + +def : GCNPat < + (fabs (f16 VGPR_32:$src)), + (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) >; def : GCNPat < - (fneg f32:$src), - (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000))) + (fneg (v2f16 VGPR_32:$src)), + (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) >; def : GCNPat < - (fabs f64:$src), + (fabs (v2f16 VGPR_32:$src)), + (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (v2f16 (fabs VGPR_32:$src))), + (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit +>; + +def : GCNPat < + (fabs (f64 VReg_64:$src)), (REG_SEQUENCE VReg_64, - (i32 (EXTRACT_SUBREG f64:$src, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_AND_B32_e64 (i32 (EXTRACT_SUBREG f64:$src, sub1)), + (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit. sub1) >; +// TODO: Use SGPR for constant def : GCNPat < - (fneg f64:$src), + (fneg (f64 VReg_64:$src)), (REG_SEQUENCE VReg_64, - (i32 (EXTRACT_SUBREG f64:$src, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), + (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), (i32 (V_MOV_B32_e32 (i32 0x80000000)))), sub1) >; +// TODO: Use SGPR for constant +def : GCNPat < + (fneg (fabs (f64 VReg_64:$src))), + (REG_SEQUENCE VReg_64, + (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), + sub0, + (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), + (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. + sub1) +>; + def : GCNPat < (fcopysign f16:$src0, f16:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) @@ -1154,45 +1270,6 @@ def : GCNPat < (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1))) >; -def : GCNPat < - (fneg f16:$src), - (S_XOR_B32 $src, (S_MOV_B32 (i32 0x00008000))) ->; - -def : GCNPat < - (fabs f16:$src), - (S_AND_B32 $src, (S_MOV_B32 (i32 0x00007fff))) ->; - -def : GCNPat < - (fneg (fabs f16:$src)), - (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit ->; - -def : GCNPat < - (fneg v2f16:$src), - (S_XOR_B32 $src, (S_MOV_B32 (i32 0x80008000))) ->; - -def : GCNPat < - (fabs v2f16:$src), - (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fff7fff))) ->; - -// This is really (fneg (fabs v2f16:$src)) -// -// fabs is not reported as free because there is modifier for it in -// VOP3P instructions, so it is turned into the bit op. -def : GCNPat < - (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))), - (S_OR_B32 $src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit ->; - -def : GCNPat < - (fneg (v2f16 (fabs v2f16:$src))), - (S_OR_B32 $src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit ->; - /********** ================== **********/ /********** Immediate Patterns **********/ /********** ================== **********/ @@ -1544,7 +1621,7 @@ def : GCNPat < (V_CVT_F16_F32_e32 ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), - $src)) + SSrc_i1:$src)) >; def : GCNPat < @@ -1552,35 +1629,35 @@ def : GCNPat < (V_CVT_F16_F32_e32 ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), - $src)) + SSrc_i1:$src)) >; def : GCNPat < (f32 (sint_to_fp i1:$src)), (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), - $src) + SSrc_i1:$src) >; def : GCNPat < (f32 (uint_to_fp i1:$src)), (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), - $src) + SSrc_i1:$src) >; def : GCNPat < (f64 (sint_to_fp i1:$src)), (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 -1), - $src)) + SSrc_i1:$src)) >; def : GCNPat < (f64 (uint_to_fp i1:$src)), (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 1), - $src)) + SSrc_i1:$src)) >; //===----------------------------------------------------------------------===// @@ -1788,6 +1865,22 @@ def : GCNPat < (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0) >; +def : GCNPat < + (i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), + (V_MOV_B64_DPP_PSEUDO $src, $src, (as_i32imm $dpp_ctrl), + (as_i32imm $row_mask), (as_i32imm $bank_mask), + (as_i1imm $bound_ctrl)) +>; + +def : GCNPat < + (i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), + (V_MOV_B64_DPP_PSEUDO $old, $src, (as_i32imm $dpp_ctrl), + (as_i32imm $row_mask), (as_i32imm $bank_mask), + (as_i1imm $bound_ctrl)) +>; + //===----------------------------------------------------------------------===// // Fract Patterns //===----------------------------------------------------------------------===// @@ -1915,3 +2008,13 @@ def : FP16Med3Pat; defm : Int16Med3Pat; defm : Int16Med3Pat; } // End Predicates = [isGFX9Plus] + +class AMDGPUGenericInstruction : GenericInstruction { + let Namespace = "AMDGPU"; +} + +def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = 0; +} diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index ae8b967893a2..20db1c37f354 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -42,10 +42,7 @@ // // Future improvements: // -// - This currently relies on the scheduler to place loads and stores next to -// each other, and then only merges adjacent pairs of instructions. It would -// be good to be more flexible with interleaved instructions, and possibly run -// before scheduling. It currently missing stores of constants because loading +// - This is currently missing stores of constants because loading // the constant into the data register is placed between the stores, although // this is arguably a scheduling problem. // @@ -98,14 +95,9 @@ enum InstClassEnum { DS_READ, DS_WRITE, S_BUFFER_LOAD_IMM, - BUFFER_LOAD_OFFEN = AMDGPU::BUFFER_LOAD_DWORD_OFFEN, - BUFFER_LOAD_OFFSET = AMDGPU::BUFFER_LOAD_DWORD_OFFSET, - BUFFER_STORE_OFFEN = AMDGPU::BUFFER_STORE_DWORD_OFFEN, - BUFFER_STORE_OFFSET = AMDGPU::BUFFER_STORE_DWORD_OFFSET, - BUFFER_LOAD_OFFEN_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact, - BUFFER_LOAD_OFFSET_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact, - BUFFER_STORE_OFFEN_exact = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact, - BUFFER_STORE_OFFSET_exact = AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact, + BUFFER_LOAD, + BUFFER_STORE, + MIMG, }; enum RegisterEnum { @@ -114,6 +106,7 @@ enum RegisterEnum { SOFFSET = 0x4, VADDR = 0x8, ADDR = 0x10, + SSAMP = 0x20, }; class SILoadStoreOptimizer : public MachineFunctionPass { @@ -126,6 +119,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass { unsigned Width0; unsigned Width1; unsigned BaseOff; + unsigned DMask0; + unsigned DMask1; InstClassEnum InstClass; bool GLC0; bool GLC1; @@ -135,6 +130,60 @@ class SILoadStoreOptimizer : public MachineFunctionPass { bool DLC1; bool UseST64; SmallVector InstsToMove; + int AddrIdx[5]; + const MachineOperand *AddrReg[5]; + unsigned NumAddresses; + + bool hasSameBaseAddress(const MachineInstr &MI) { + for (unsigned i = 0; i < NumAddresses; i++) { + const MachineOperand &AddrRegNext = MI.getOperand(AddrIdx[i]); + + if (AddrReg[i]->isImm() || AddrRegNext.isImm()) { + if (AddrReg[i]->isImm() != AddrRegNext.isImm() || + AddrReg[i]->getImm() != AddrRegNext.getImm()) { + return false; + } + continue; + } + + // Check same base pointer. Be careful of subregisters, which can occur + // with vectors of pointers. + if (AddrReg[i]->getReg() != AddrRegNext.getReg() || + AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) { + return false; + } + } + return true; + } + + bool hasMergeableAddress(const MachineRegisterInfo &MRI) { + for (unsigned i = 0; i < NumAddresses; ++i) { + const MachineOperand *AddrOp = AddrReg[i]; + // Immediates are always OK. + if (AddrOp->isImm()) + continue; + + // Don't try to merge addresses that aren't either immediates or registers. + // TODO: Should be possible to merge FrameIndexes and maybe some other + // non-register + if (!AddrOp->isReg()) + return false; + + // TODO: We should be able to merge physical reg addreses. + if (Register::isPhysicalRegister(AddrOp->getReg())) + return false; + + // If an address has only one use then there will be on other + // instructions with the same address, so we can't merge this one. + if (MRI.hasOneNonDBGUse(AddrOp->getReg())) + return false; + } + return true; + } + + void setMI(MachineBasicBlock::iterator MI, const SIInstrInfo &TII, + const GCNSubtarget &STM); + void setPaired(MachineBasicBlock::iterator MI, const SIInstrInfo &TII); }; struct BaseRegisters { @@ -160,14 +209,12 @@ private: AliasAnalysis *AA = nullptr; bool OptimizeAgain; + static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII); static bool offsetsCanBeCombined(CombineInfo &CI); static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI); static unsigned getNewOpcode(const CombineInfo &CI); static std::pair getSubRegIdxs(const CombineInfo &CI); const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI); - unsigned getOpcodeWidth(const MachineInstr &MI); - InstClassEnum getInstClass(unsigned Opc); - unsigned getRegs(unsigned Opc); bool findMatchingInst(CombineInfo &CI); @@ -178,22 +225,27 @@ private: unsigned write2Opcode(unsigned EltSize) const; unsigned write2ST64Opcode(unsigned EltSize) const; MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI); + MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI); MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI); void updateBaseAndOffset(MachineInstr &I, unsigned NewBase, - int32_t NewOffset); - unsigned computeBase(MachineInstr &MI, const MemAddress &Addr); - MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI); - Optional extractConstOffset(const MachineOperand &Op); - void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr); + int32_t NewOffset) const; + unsigned computeBase(MachineInstr &MI, const MemAddress &Addr) const; + MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI) const; + Optional extractConstOffset(const MachineOperand &Op) const; + void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr) const; /// Promotes constant offset to the immediate by adjusting the base. It /// tries to use a base from the nearby instructions that allows it to have /// a 13bit constant offset which gets promoted to the immediate. bool promoteConstantOffsetToImm(MachineInstr &CI, MemInfoMap &Visited, - SmallPtrSet &Promoted); + SmallPtrSet &Promoted) const; + void addInstToMergeableList(const CombineInfo &CI, + std::list > &MergeableInsts) const; + bool collectMergeableInsts(MachineBasicBlock &MBB, + std::list > &MergeableInsts) const; public: static char ID; @@ -202,7 +254,11 @@ public: initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); } - bool optimizeBlock(MachineBasicBlock &MBB); + void removeCombinedInst(std::list &MergeList, + const MachineInstr &MI); + bool optimizeInstsWithSameBaseAddr(std::list &MergeList, + bool &OptimizeListAgain); + bool optimizeBlock(std::list > &MergeableInsts); bool runOnMachineFunction(MachineFunction &MF) override; @@ -216,6 +272,264 @@ public: } }; +static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { + const unsigned Opc = MI.getOpcode(); + + if (TII.isMUBUF(Opc)) { + // FIXME: Handle d16 correctly + return AMDGPU::getMUBUFElements(Opc); + } + if (TII.isMIMG(MI)) { + uint64_t DMaskImm = + TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm(); + return countPopulation(DMaskImm); + } + + switch (Opc) { + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + return 1; + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + return 2; + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return 4; + default: + return 0; + } +} + +/// Maps instruction opcode to enum InstClassEnum. +static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { + switch (Opc) { + default: + if (TII.isMUBUF(Opc)) { + switch (AMDGPU::getMUBUFBaseOpcode(Opc)) { + default: + return UNKNOWN; + case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: + case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact: + case AMDGPU::BUFFER_LOAD_DWORD_OFFSET: + case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact: + return BUFFER_LOAD; + case AMDGPU::BUFFER_STORE_DWORD_OFFEN: + case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact: + case AMDGPU::BUFFER_STORE_DWORD_OFFSET: + case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact: + return BUFFER_STORE; + } + } + if (TII.isMIMG(Opc)) { + // Ignore instructions encoded without vaddr. + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1) + return UNKNOWN; + // TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD. + if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc)) + return UNKNOWN; + return MIMG; + } + return UNKNOWN; + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return S_BUFFER_LOAD_IMM; + case AMDGPU::DS_READ_B32: + case AMDGPU::DS_READ_B32_gfx9: + case AMDGPU::DS_READ_B64: + case AMDGPU::DS_READ_B64_gfx9: + return DS_READ; + case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_WRITE_B32_gfx9: + case AMDGPU::DS_WRITE_B64: + case AMDGPU::DS_WRITE_B64_gfx9: + return DS_WRITE; + } +} + +/// Determines instruction subclass from opcode. Only instructions +/// of the same subclass can be merged together. +static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { + switch (Opc) { + default: + if (TII.isMUBUF(Opc)) + return AMDGPU::getMUBUFBaseOpcode(Opc); + if (TII.isMIMG(Opc)) { + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + assert(Info); + return Info->BaseOpcode; + } + return -1; + case AMDGPU::DS_READ_B32: + case AMDGPU::DS_READ_B32_gfx9: + case AMDGPU::DS_READ_B64: + case AMDGPU::DS_READ_B64_gfx9: + case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_WRITE_B32_gfx9: + case AMDGPU::DS_WRITE_B64: + case AMDGPU::DS_WRITE_B64_gfx9: + return Opc; + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return AMDGPU::S_BUFFER_LOAD_DWORD_IMM; + } +} + +static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) { + if (TII.isMUBUF(Opc)) { + unsigned result = 0; + + if (AMDGPU::getMUBUFHasVAddr(Opc)) { + result |= VADDR; + } + + if (AMDGPU::getMUBUFHasSrsrc(Opc)) { + result |= SRSRC; + } + + if (AMDGPU::getMUBUFHasSoffset(Opc)) { + result |= SOFFSET; + } + + return result; + } + + if (TII.isMIMG(Opc)) { + unsigned result = VADDR | SRSRC; + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler) + result |= SSAMP; + return result; + } + + switch (Opc) { + default: + return 0; + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return SBASE; + case AMDGPU::DS_READ_B32: + case AMDGPU::DS_READ_B64: + case AMDGPU::DS_READ_B32_gfx9: + case AMDGPU::DS_READ_B64_gfx9: + case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_WRITE_B64: + case AMDGPU::DS_WRITE_B32_gfx9: + case AMDGPU::DS_WRITE_B64_gfx9: + return ADDR; + } +} + + +void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI, + const SIInstrInfo &TII, + const GCNSubtarget &STM) { + I = MI; + unsigned Opc = MI->getOpcode(); + InstClass = getInstClass(Opc, TII); + + if (InstClass == UNKNOWN) + return; + + switch (InstClass) { + case DS_READ: + EltSize = + (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 + : 4; + break; + case DS_WRITE: + EltSize = + (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 + : 4; + break; + case S_BUFFER_LOAD_IMM: + EltSize = AMDGPU::getSMRDEncodedOffset(STM, 4); + break; + default: + EltSize = 4; + break; + } + + if (InstClass == MIMG) { + DMask0 = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm(); + } else { + int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset); + Offset0 = I->getOperand(OffsetIdx).getImm(); + } + + Width0 = getOpcodeWidth(*I, TII); + + if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { + Offset0 &= 0xffff; + } else if (InstClass != MIMG) { + GLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm(); + if (InstClass != S_BUFFER_LOAD_IMM) { + SLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm(); + } + DLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm(); + } + + unsigned AddrOpName[5] = {0}; + NumAddresses = 0; + const unsigned Regs = getRegs(I->getOpcode(), TII); + + if (Regs & ADDR) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::addr; + } + + if (Regs & SBASE) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase; + } + + if (Regs & SRSRC) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc; + } + + if (Regs & SOFFSET) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; + } + + if (Regs & VADDR) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; + } + + if (Regs & SSAMP) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::ssamp; + } + + for (unsigned i = 0; i < NumAddresses; i++) { + AddrIdx[i] = AMDGPU::getNamedOperandIdx(I->getOpcode(), AddrOpName[i]); + AddrReg[i] = &I->getOperand(AddrIdx[i]); + } + + InstsToMove.clear(); +} + +void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI, + const SIInstrInfo &TII) { + Paired = MI; + assert(InstClass == getInstClass(Paired->getOpcode(), TII)); + + if (InstClass == MIMG) { + DMask1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dmask)->getImm(); + } else { + int OffsetIdx = + AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset); + Offset1 = Paired->getOperand(OffsetIdx).getImm(); + } + + Width1 = getOpcodeWidth(*Paired, TII); + if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { + Offset1 &= 0xffff; + } else if (InstClass != MIMG) { + GLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::glc)->getImm(); + if (InstClass != S_BUFFER_LOAD_IMM) { + SLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::slc)->getImm(); + } + DLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dlc)->getImm(); + } +} + + } // end anonymous namespace. INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE, @@ -249,8 +563,7 @@ static void addDefsUsesToList(const MachineInstr &MI, if (Op.isReg()) { if (Op.isDef()) RegDefs.insert(Op.getReg()); - else if (Op.readsReg() && - TargetRegisterInfo::isPhysicalRegister(Op.getReg())) + else if (Op.readsReg() && Register::isPhysicalRegister(Op.getReg())) PhysRegUses.insert(Op.getReg()); } } @@ -282,7 +595,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet &RegDefs, if (Use.isReg() && ((Use.readsReg() && RegDefs.count(Use.getReg())) || (Use.isDef() && RegDefs.count(Use.getReg())) || - (Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) && + (Use.isDef() && Register::isPhysicalRegister(Use.getReg()) && PhysRegUses.count(Use.getReg())))) { Insts.push_back(&MI); addDefsUsesToList(MI, RegDefs, PhysRegUses); @@ -307,7 +620,59 @@ static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp, return true; } +// This function assumes that \p A and \p B have are identical except for +// size and offset, and they referecne adjacent memory. +static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF, + const MachineMemOperand *A, + const MachineMemOperand *B) { + unsigned MinOffset = std::min(A->getOffset(), B->getOffset()); + unsigned Size = A->getSize() + B->getSize(); + // This function adds the offset parameter to the existing offset for A, + // so we pass 0 here as the offset and then manually set it to the correct + // value after the call. + MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size); + MMO->setOffset(MinOffset); + return MMO; +} + +bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII) { + assert(CI.InstClass == MIMG); + + // Ignore instructions with tfe/lwe set. + const auto *TFEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::tfe); + const auto *LWEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::lwe); + + if ((TFEOp && TFEOp->getImm()) || (LWEOp && LWEOp->getImm())) + return false; + + // Check other optional immediate operands for equality. + unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc, + AMDGPU::OpName::d16, AMDGPU::OpName::unorm, + AMDGPU::OpName::da, AMDGPU::OpName::r128}; + + for (auto op : OperandsToMatch) { + int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op); + if (AMDGPU::getNamedOperandIdx(CI.Paired->getOpcode(), op) != Idx) + return false; + if (Idx != -1 && + CI.I->getOperand(Idx).getImm() != CI.Paired->getOperand(Idx).getImm()) + return false; + } + + // Check DMask for overlaps. + unsigned MaxMask = std::max(CI.DMask0, CI.DMask1); + unsigned MinMask = std::min(CI.DMask0, CI.DMask1); + + unsigned AllowedBitsForMin = llvm::countTrailingZeros(MaxMask); + if ((1u << AllowedBitsForMin) <= MinMask) + return false; + + return true; +} + bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { + assert(CI.InstClass != MIMG); + // XXX - Would the same offset be OK? Is there any reason this would happen or // be useful? if (CI.Offset0 == CI.Offset1) @@ -384,164 +749,24 @@ bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM, } } -unsigned SILoadStoreOptimizer::getOpcodeWidth(const MachineInstr &MI) { - const unsigned Opc = MI.getOpcode(); - - if (TII->isMUBUF(MI)) { - return AMDGPU::getMUBUFDwords(Opc); - } - - switch (Opc) { - default: - return 0; - case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: - return 1; - case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: - return 2; - case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: - return 4; - } -} - -InstClassEnum SILoadStoreOptimizer::getInstClass(unsigned Opc) { - if (TII->isMUBUF(Opc)) { - const int baseOpcode = AMDGPU::getMUBUFBaseOpcode(Opc); - - // If we couldn't identify the opcode, bail out. - if (baseOpcode == -1) { - return UNKNOWN; - } - - switch (baseOpcode) { - default: - return UNKNOWN; - case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: - return BUFFER_LOAD_OFFEN; - case AMDGPU::BUFFER_LOAD_DWORD_OFFSET: - return BUFFER_LOAD_OFFSET; - case AMDGPU::BUFFER_STORE_DWORD_OFFEN: - return BUFFER_STORE_OFFEN; - case AMDGPU::BUFFER_STORE_DWORD_OFFSET: - return BUFFER_STORE_OFFSET; - case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact: - return BUFFER_LOAD_OFFEN_exact; - case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact: - return BUFFER_LOAD_OFFSET_exact; - case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact: - return BUFFER_STORE_OFFEN_exact; - case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact: - return BUFFER_STORE_OFFSET_exact; - } - } - - switch (Opc) { - default: - return UNKNOWN; - case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: - case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: - case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: - return S_BUFFER_LOAD_IMM; - case AMDGPU::DS_READ_B32: - case AMDGPU::DS_READ_B64: - case AMDGPU::DS_READ_B32_gfx9: - case AMDGPU::DS_READ_B64_gfx9: - return DS_READ; - case AMDGPU::DS_WRITE_B32: - case AMDGPU::DS_WRITE_B64: - case AMDGPU::DS_WRITE_B32_gfx9: - case AMDGPU::DS_WRITE_B64_gfx9: - return DS_WRITE; - } -} - -unsigned SILoadStoreOptimizer::getRegs(unsigned Opc) { - if (TII->isMUBUF(Opc)) { - unsigned result = 0; - - if (AMDGPU::getMUBUFHasVAddr(Opc)) { - result |= VADDR; - } - - if (AMDGPU::getMUBUFHasSrsrc(Opc)) { - result |= SRSRC; - } - - if (AMDGPU::getMUBUFHasSoffset(Opc)) { - result |= SOFFSET; - } - - return result; - } - - switch (Opc) { - default: - return 0; - case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: - case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: - case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: - return SBASE; - case AMDGPU::DS_READ_B32: - case AMDGPU::DS_READ_B64: - case AMDGPU::DS_READ_B32_gfx9: - case AMDGPU::DS_READ_B64_gfx9: - case AMDGPU::DS_WRITE_B32: - case AMDGPU::DS_WRITE_B64: - case AMDGPU::DS_WRITE_B32_gfx9: - case AMDGPU::DS_WRITE_B64_gfx9: - return ADDR; - } -} - bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); MachineBasicBlock::iterator E = MBB->end(); MachineBasicBlock::iterator MBBI = CI.I; const unsigned Opc = CI.I->getOpcode(); - const InstClassEnum InstClass = getInstClass(Opc); + const InstClassEnum InstClass = getInstClass(Opc, *TII); if (InstClass == UNKNOWN) { return false; } + const unsigned InstSubclass = getInstSubclass(Opc, *TII); - const unsigned Regs = getRegs(Opc); - - unsigned AddrOpName[5] = {0}; - int AddrIdx[5]; - const MachineOperand *AddrReg[5]; - unsigned NumAddresses = 0; - - if (Regs & ADDR) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::addr; - } - - if (Regs & SBASE) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase; - } - - if (Regs & SRSRC) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc; - } - - if (Regs & SOFFSET) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; - } - - if (Regs & VADDR) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; - } - - for (unsigned i = 0; i < NumAddresses; i++) { - AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]); - AddrReg[i] = &CI.I->getOperand(AddrIdx[i]); - - // We only ever merge operations with the same base address register, so - // don't bother scanning forward if there are no other uses. - if (AddrReg[i]->isReg() && - (TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) || - MRI->hasOneNonDBGUse(AddrReg[i]->getReg()))) - return false; - } + // Do not merge VMEM buffer instructions with "swizzled" bit set. + int Swizzled = + AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz); + if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm()) + return false; ++MBBI; @@ -550,11 +775,10 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove); for (; MBBI != E; ++MBBI) { - const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE); - if ((getInstClass(MBBI->getOpcode()) != InstClass) || - (IsDS && (MBBI->getOpcode() != Opc))) { - // This is not a matching DS instruction, but we can keep looking as + if ((getInstClass(MBBI->getOpcode(), *TII) != InstClass) || + (getInstSubclass(MBBI->getOpcode(), *TII) != InstSubclass)) { + // This is not a matching instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will @@ -599,58 +823,23 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { CI.InstsToMove)) continue; - bool Match = true; - for (unsigned i = 0; i < NumAddresses; i++) { - const MachineOperand &AddrRegNext = MBBI->getOperand(AddrIdx[i]); - - if (AddrReg[i]->isImm() || AddrRegNext.isImm()) { - if (AddrReg[i]->isImm() != AddrRegNext.isImm() || - AddrReg[i]->getImm() != AddrRegNext.getImm()) { - Match = false; - break; - } - continue; - } - - // Check same base pointer. Be careful of subregisters, which can occur - // with vectors of pointers. - if (AddrReg[i]->getReg() != AddrRegNext.getReg() || - AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) { - Match = false; - break; - } - } + bool Match = CI.hasSameBaseAddress(*MBBI); if (Match) { - int OffsetIdx = - AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); - CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm(); - CI.Width0 = getOpcodeWidth(*CI.I); - CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm(); - CI.Width1 = getOpcodeWidth(*MBBI); - CI.Paired = MBBI; - - if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) { - CI.Offset0 &= 0xffff; - CI.Offset1 &= 0xffff; - } else { - CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm(); - CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm(); - if (CI.InstClass != S_BUFFER_LOAD_IMM) { - CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); - CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); - } - CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm(); - CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm(); - } + CI.setPaired(MBBI, *TII); + + // Check both offsets (or masks for MIMG) can be combined and fit in the + // reduced range. + bool canBeCombined = + CI.InstClass == MIMG + ? dmasksCanBeCombined(CI, *TII) + : widthsFit(*STM, CI) && offsetsCanBeCombined(CI); - // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. - if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI)) - if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) - return true; + if (canBeCombined && canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) + return true; } // We've found a load/store that we couldn't merge for some reason. @@ -711,15 +900,15 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) { const TargetRegisterClass *SuperRC = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass; - unsigned DestReg = MRI->createVirtualRegister(SuperRC); + Register DestReg = MRI->createVirtualRegister(SuperRC); DebugLoc DL = CI.I->getDebugLoc(); - unsigned BaseReg = AddrReg->getReg(); + Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); unsigned BaseRegFlags = 0; if (CI.BaseOff) { - unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) .addImm(CI.BaseOff); @@ -755,12 +944,11 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) { moveInstsAfter(Copy1, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); LLVM_DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n'); - return Next; + return Read2; } unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const { @@ -809,11 +997,11 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) { const MCInstrDesc &Write2Desc = TII->get(Opc); DebugLoc DL = CI.I->getDebugLoc(); - unsigned BaseReg = AddrReg->getReg(); + Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); unsigned BaseRegFlags = 0; if (CI.BaseOff) { - unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) .addImm(CI.BaseOff); @@ -839,12 +1027,65 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) { moveInstsAfter(Write2, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); LLVM_DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n'); - return Next; + return Write2; +} + +MachineBasicBlock::iterator +SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + const unsigned Opcode = getNewOpcode(CI); + + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); + + Register DestReg = MRI->createVirtualRegister(SuperRC); + unsigned MergedDMask = CI.DMask0 | CI.DMask1; + unsigned DMaskIdx = + AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask); + + auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg); + for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) { + if (I == DMaskIdx) + MIB.addImm(MergedDMask); + else + MIB.add((*CI.I).getOperand(I)); + } + + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + + std::pair SubRegIdx = getSubRegIdxs(CI); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the old destination registers. + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); + const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata); + + BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest0) // Copy to same destination including flags and sub reg. + .addReg(DestReg, 0, SubRegIdx0); + MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); + + moveInstsAfter(Copy1, CI.InstsToMove); + + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); + return New; } MachineBasicBlock::iterator @@ -855,15 +1096,24 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) { const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); - unsigned DestReg = MRI->createVirtualRegister(SuperRC); + Register DestReg = MRI->createVirtualRegister(SuperRC); unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1); - BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) - .addImm(MergedOffset) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.DLC0) // dlc - .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = + BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) + .addImm(MergedOffset) // offset + .addImm(CI.GLC0) // glc + .addImm(CI.DLC0) // dlc + .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); std::pair SubRegIdx = getSubRegIdxs(CI); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -883,10 +1133,9 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) { moveInstsAfter(Copy1, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); - return Next; + return New; } MachineBasicBlock::iterator @@ -899,24 +1148,34 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) { const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); // Copy to the new source register. - unsigned DestReg = MRI->createVirtualRegister(SuperRC); + Register DestReg = MRI->createVirtualRegister(SuperRC); unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1); auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg); - const unsigned Regs = getRegs(Opcode); + const unsigned Regs = getRegs(Opcode, *TII); if (Regs & VADDR) MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); - MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) - .addImm(MergedOffset) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.SLC0) // slc - .addImm(0) // tfe - .addImm(CI.DLC0) // dlc - .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) + .addImm(MergedOffset) // offset + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc + .addImm(0) // swz + .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); std::pair SubRegIdx = getSubRegIdxs(CI); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -936,10 +1195,9 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) { moveInstsAfter(Copy1, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); - return Next; + return New; } unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) { @@ -947,7 +1205,10 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) { switch (CI.InstClass) { default: - return AMDGPU::getMUBUFOpcode(CI.InstClass, Width); + assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE); + // FIXME: Handle d16 correctly + return AMDGPU::getMUBUFOpcode(AMDGPU::getMUBUFBaseOpcode(CI.I->getOpcode()), + Width); case UNKNOWN: llvm_unreachable("Unknown instruction class"); case S_BUFFER_LOAD_IMM: @@ -959,76 +1220,47 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) { case 4: return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM; } + case MIMG: + assert("No overlaps" && (countPopulation(CI.DMask0 | CI.DMask1) == Width)); + return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width); } } std::pair SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) { - if (CI.Offset0 > CI.Offset1) { - switch (CI.Width0) { - default: - return std::make_pair(0, 0); - case 1: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1, AMDGPU::sub0); - case 2: - return std::make_pair(AMDGPU::sub2, AMDGPU::sub0_sub1); - case 3: - return std::make_pair(AMDGPU::sub3, AMDGPU::sub0_sub1_sub2); - } - case 2: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1_sub2, AMDGPU::sub0); - case 2: - return std::make_pair(AMDGPU::sub2_sub3, AMDGPU::sub0_sub1); - } - case 3: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0); - } - } + + if (CI.Width0 == 0 || CI.Width0 == 0 || CI.Width0 + CI.Width1 > 4) + return std::make_pair(0, 0); + + bool ReverseOrder; + if (CI.InstClass == MIMG) { + assert((countPopulation(CI.DMask0 | CI.DMask1) == CI.Width0 + CI.Width1) && + "No overlaps"); + ReverseOrder = CI.DMask0 > CI.DMask1; + } else + ReverseOrder = CI.Offset0 > CI.Offset1; + + static const unsigned Idxs[4][4] = { + {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3}, + {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0}, + {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0}, + {AMDGPU::sub3, 0, 0, 0}, + }; + unsigned Idx0; + unsigned Idx1; + + assert(CI.Width0 >= 1 && CI.Width0 <= 3); + assert(CI.Width1 >= 1 && CI.Width1 <= 3); + + if (ReverseOrder) { + Idx1 = Idxs[0][CI.Width1 - 1]; + Idx0 = Idxs[CI.Width1][CI.Width0 - 1]; } else { - switch (CI.Width0) { - default: - return std::make_pair(0, 0); - case 1: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1); - case 2: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2); - case 3: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2_sub3); - } - case 2: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2); - case 2: - return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2_sub3); - } - case 3: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0_sub1_sub2, AMDGPU::sub3); - } - } + Idx0 = Idxs[0][CI.Width0 - 1]; + Idx1 = Idxs[CI.Width0][CI.Width1 - 1]; } + + return std::make_pair(Idx0, Idx1); } const TargetRegisterClass * @@ -1040,7 +1272,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) { case 2: return &AMDGPU::SReg_64_XEXECRegClass; case 4: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 8: return &AMDGPU::SReg_256RegClass; case 16: @@ -1073,7 +1305,7 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) { // Copy to the new source register. const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); - unsigned SrcReg = MRI->createVirtualRegister(SuperRC); + Register SrcReg = MRI->createVirtualRegister(SuperRC); const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Src1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata); @@ -1087,35 +1319,45 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) { auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); - const unsigned Regs = getRegs(Opcode); + const unsigned Regs = getRegs(Opcode, *TII); if (Regs & VADDR) MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); - MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) - .addImm(std::min(CI.Offset0, CI.Offset1)) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.SLC0) // slc - .addImm(0) // tfe - .addImm(CI.DLC0) // dlc - .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); + + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) + .addImm(std::min(CI.Offset0, CI.Offset1)) // offset + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc + .addImm(0) // swz + .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); moveInstsAfter(MIB, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); - return Next; + return New; } MachineOperand -SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) { +SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) const { APInt V(32, Val, true); if (TII->isInlineConstant(V)) return MachineOperand::CreateImm(Val); - unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr *Mov = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), Reg) @@ -1127,7 +1369,7 @@ SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) { // Compute base address using Addr and return the final register. unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, - const MemAddress &Addr) { + const MemAddress &Addr) const { MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator MBBI = MI.getIterator(); DebugLoc DL = MI.getDebugLoc(); @@ -1146,11 +1388,11 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, createRegOrImm(static_cast(Addr.Offset >> 32), MI); const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned CarryReg = MRI->createVirtualRegister(CarryRC); - unsigned DeadCarryReg = MRI->createVirtualRegister(CarryRC); + Register CarryReg = MRI->createVirtualRegister(CarryRC); + Register DeadCarryReg = MRI->createVirtualRegister(CarryRC); - unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineInstr *LoHalf = BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0) .addReg(CarryReg, RegState::Define) @@ -1170,7 +1412,7 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, (void)HiHalf; LLVM_DEBUG(dbgs() << " "; HiHalf->dump();); - unsigned FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass); MachineInstr *FullBase = BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) @@ -1186,13 +1428,13 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, // Update base and offset with the NewBase and NewOffset in MI. void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI, unsigned NewBase, - int32_t NewOffset) { + int32_t NewOffset) const { TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase); TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset); } Optional -SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) { +SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) const { if (Op.isImm()) return Op.getImm(); @@ -1218,7 +1460,7 @@ SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) { // %Base:vreg_64 = // REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1 void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base, - MemAddress &Addr) { + MemAddress &Addr) const { if (!Base.isReg()) return; @@ -1273,15 +1515,16 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base bool SILoadStoreOptimizer::promoteConstantOffsetToImm( MachineInstr &MI, MemInfoMap &Visited, - SmallPtrSet &AnchorList) { + SmallPtrSet &AnchorList) const { + + if (!(MI.mayLoad() ^ MI.mayStore())) + return false; // TODO: Support flat and scratch. - if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0 || - TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL) + if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0) return false; - // TODO: Support Store. - if (!MI.mayLoad()) + if (MI.mayLoad() && TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL) return false; if (AnchorList.count(&MI)) @@ -1418,100 +1661,166 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( return false; } -// Scan through looking for adjacent LDS operations with constant offsets from -// the same base register. We rely on the scheduler to do the hard work of -// clustering nearby loads, and assume these are all adjacent. -bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { - bool Modified = false; +void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI, + std::list > &MergeableInsts) const { + for (std::list &AddrList : MergeableInsts) { + if (AddrList.front().hasSameBaseAddress(*CI.I) && + AddrList.front().InstClass == CI.InstClass) { + AddrList.emplace_back(CI); + return; + } + } + + // Base address not found, so add a new list. + MergeableInsts.emplace_back(1, CI); +} +bool SILoadStoreOptimizer::collectMergeableInsts(MachineBasicBlock &MBB, + std::list > &MergeableInsts) const { + bool Modified = false; // Contain the list MemInfoMap Visited; // Contains the list of instructions for which constant offsets are being // promoted to the IMM. SmallPtrSet AnchorList; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { - MachineInstr &MI = *I; - + // Sort potential mergeable instructions into lists. One list per base address. + for (MachineInstr &MI : MBB.instrs()) { + // We run this before checking if an address is mergeable, because it can produce + // better code even if the instructions aren't mergeable. if (promoteConstantOffsetToImm(MI, Visited, AnchorList)) Modified = true; + const InstClassEnum InstClass = getInstClass(MI.getOpcode(), *TII); + if (InstClass == UNKNOWN) + continue; + // Don't combine if volatile. - if (MI.hasOrderedMemoryRef()) { - ++I; + if (MI.hasOrderedMemoryRef()) + continue; + + CombineInfo CI; + CI.setMI(MI, *TII, *STM); + + if (!CI.hasMergeableAddress(*MRI)) + continue; + + addInstToMergeableList(CI, MergeableInsts); + } + return Modified; +} + +// Scan through looking for adjacent LDS operations with constant offsets from +// the same base register. We rely on the scheduler to do the hard work of +// clustering nearby loads, and assume these are all adjacent. +bool SILoadStoreOptimizer::optimizeBlock( + std::list > &MergeableInsts) { + bool Modified = false; + + for (std::list &MergeList : MergeableInsts) { + if (MergeList.size() < 2) + continue; + + bool OptimizeListAgain = false; + if (!optimizeInstsWithSameBaseAddr(MergeList, OptimizeListAgain)) { + // We weren't able to make any changes, so clear the list so we don't + // process the same instructions the next time we try to optimize this + // block. + MergeList.clear(); continue; } - const unsigned Opc = MI.getOpcode(); + // We made changes, but also determined that there were no more optimization + // opportunities, so we don't need to reprocess the list + if (!OptimizeListAgain) + MergeList.clear(); - CombineInfo CI; - CI.I = I; - CI.InstClass = getInstClass(Opc); + OptimizeAgain |= OptimizeListAgain; + Modified = true; + } + return Modified; +} + +void +SILoadStoreOptimizer::removeCombinedInst(std::list &MergeList, + const MachineInstr &MI) { + + for (auto CI = MergeList.begin(), E = MergeList.end(); CI != E; ++CI) { + if (&*CI->I == &MI) { + MergeList.erase(CI); + return; + } + } +} + +bool +SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( + std::list &MergeList, + bool &OptimizeListAgain) { + bool Modified = false; + for (auto I = MergeList.begin(); I != MergeList.end(); ++I) { + CombineInfo &CI = *I; switch (CI.InstClass) { default: break; case DS_READ: - CI.EltSize = - (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 - : 4; if (findMatchingInst(CI)) { Modified = true; - I = mergeRead2Pair(CI); - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeRead2Pair(CI); + CI.setMI(NewMI, *TII, *STM); } - continue; + break; case DS_WRITE: - CI.EltSize = - (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 - : 4; if (findMatchingInst(CI)) { Modified = true; - I = mergeWrite2Pair(CI); - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeWrite2Pair(CI); + CI.setMI(NewMI, *TII, *STM); } - continue; + break; case S_BUFFER_LOAD_IMM: - CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4); if (findMatchingInst(CI)) { Modified = true; - I = mergeSBufferLoadImmPair(CI); - OptimizeAgain |= (CI.Width0 + CI.Width1) < 16; - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeSBufferLoadImmPair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 16; } - continue; - case BUFFER_LOAD_OFFEN: - case BUFFER_LOAD_OFFSET: - case BUFFER_LOAD_OFFEN_exact: - case BUFFER_LOAD_OFFSET_exact: - CI.EltSize = 4; + break; + case BUFFER_LOAD: if (findMatchingInst(CI)) { Modified = true; - I = mergeBufferLoadPair(CI); - OptimizeAgain |= (CI.Width0 + CI.Width1) < 4; - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeBufferLoadPair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; } - continue; - case BUFFER_STORE_OFFEN: - case BUFFER_STORE_OFFSET: - case BUFFER_STORE_OFFEN_exact: - case BUFFER_STORE_OFFSET_exact: - CI.EltSize = 4; + break; + case BUFFER_STORE: if (findMatchingInst(CI)) { Modified = true; - I = mergeBufferStorePair(CI); - OptimizeAgain |= (CI.Width0 + CI.Width1) < 4; - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeBufferStorePair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; } - continue; + break; + case MIMG: + if (findMatchingInst(CI)) { + Modified = true; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeImagePair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; + } + break; } - - ++I; + // Clear the InstsToMove after we have finished searching so we don't have + // stale values left over if we search for this CI again in another pass + // over the block. + CI.InstsToMove.clear(); } return Modified; @@ -1537,10 +1846,14 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { bool Modified = false; + for (MachineBasicBlock &MBB : MF) { + std::list > MergeableInsts; + // First pass: Collect list of all instructions we know how to merge. + Modified |= collectMergeableInsts(MBB, MergeableInsts); do { OptimizeAgain = false; - Modified |= optimizeBlock(MBB); + Modified |= optimizeBlock(MergeableInsts); } while (OptimizeAgain); } diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 78f409cd9555..6f9abd3a8d9b 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -98,6 +98,8 @@ private: void emitLoop(MachineInstr &MI); void emitEndCf(MachineInstr &MI); + Register getSaveExec(MachineInstr* MI); + void findMaskOperands(MachineInstr &MI, unsigned OpNo, SmallVectorImpl &Src) const; @@ -144,7 +146,7 @@ char &llvm::SILowerControlFlowID = SILowerControlFlow::ID; static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI, const SIInstrInfo *TII) { - unsigned SaveExecReg = MI.getOperand(0).getReg(); + Register SaveExecReg = MI.getOperand(0).getReg(); auto U = MRI->use_instr_nodbg_begin(SaveExecReg); if (U == MRI->use_instr_nodbg_end() || @@ -175,17 +177,31 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI, return true; } +Register SILowerControlFlow::getSaveExec(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineOperand &SaveExec = MI->getOperand(0); + assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister); + + Register SaveExecReg = SaveExec.getReg(); + unsigned FalseTermOpc = + TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term; + MachineBasicBlock::iterator I = (MI); + MachineBasicBlock::iterator J = std::next(I); + if (J != MBB->end() && J->getOpcode() == FalseTermOpc && + J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) { + SaveExecReg = J->getOperand(0).getReg(); + J->eraseFromParent(); + } + return SaveExecReg; +} + void SILowerControlFlow::emitIf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); - - MachineOperand &SaveExec = MI.getOperand(0); - MachineOperand &Cond = MI.getOperand(1); - assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister && - Cond.getSubReg() == AMDGPU::NoSubRegister); - - Register SaveExecReg = SaveExec.getReg(); + Register SaveExecReg = getSaveExec(&MI); + MachineOperand& Cond = MI.getOperand(1); + assert(Cond.getSubReg() == AMDGPU::NoSubRegister); MachineOperand &ImpDefSCC = MI.getOperand(4); assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef()); @@ -204,7 +220,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { .addReg(Exec) .addReg(Exec, RegState::ImplicitDefine); - unsigned Tmp = MRI->createVirtualRegister(BoolRC); + Register Tmp = MRI->createVirtualRegister(BoolRC); MachineInstr *And = BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp) @@ -266,8 +282,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - Register DstReg = MI.getOperand(0).getReg(); - assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister); + Register DstReg = getSaveExec(&MI); bool ExecModified = MI.getOperand(3).getImm() != 0; MachineBasicBlock::iterator Start = MBB.begin(); @@ -339,7 +354,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { void SILowerControlFlow::emitIfBreak(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - auto Dst = MI.getOperand(0).getReg(); + auto Dst = getSaveExec(&MI); // Skip ANDing with exec if the break condition is already masked by exec // because it is a V_CMP in the same basic block. (We know the break @@ -400,13 +415,17 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) { void SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + unsigned CFMask = MI.getOperand(0).getReg(); + MachineInstr *Def = MRI.getUniqueVRegDef(CFMask); const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock::iterator InsPt = MBB.begin(); - MachineInstr *NewMI = - BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) - .addReg(Exec) - .add(MI.getOperand(0)); + MachineBasicBlock::iterator InsPt = + Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def)) + : MBB.begin(); + MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) + .addReg(Exec) + .add(MI.getOperand(0)); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *NewMI); @@ -422,7 +441,7 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) { void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo, SmallVectorImpl &Src) const { MachineOperand &Op = MI.getOperand(OpNo); - if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) { + if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) { Src.push_back(Op); return; } @@ -442,8 +461,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo, for (const auto &SrcOp : Def->explicit_operands()) if (SrcOp.isReg() && SrcOp.isUse() && - (TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) || - SrcOp.getReg() == Exec)) + (Register::isVirtualRegister(SrcOp.getReg()) || SrcOp.getReg() == Exec)) Src.push_back(SrcOp); } @@ -466,7 +484,7 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) { else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1; else return; - unsigned Reg = MI.getOperand(OpToReplace).getReg(); + Register Reg = MI.getOperand(OpToReplace).getReg(); MI.RemoveOperand(OpToReplace); MI.addOperand(Ops[UniqueOpndIdx]); if (MRI->use_empty(Reg)) diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 1c0f836f07e6..b45412536356 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -96,7 +96,7 @@ private: getSaluInsertionAtEnd(MachineBasicBlock &MBB) const; bool isVreg1(unsigned Reg) const { - return TargetRegisterInfo::isVirtualRegister(Reg) && + return Register::isVirtualRegister(Reg) && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass; } @@ -489,6 +489,15 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) { return true; } +#ifndef NDEBUG +static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + Register Reg) { + unsigned Size = TRI.getRegSizeInBits(Reg, MRI); + return Size == 1 || Size == 32; +} +#endif + void SILowerI1Copies::lowerCopiesFromI1() { SmallVector DeadCopies; @@ -497,8 +506,8 @@ void SILowerI1Copies::lowerCopiesFromI1() { if (MI.getOpcode() != AMDGPU::COPY) continue; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); if (!isVreg1(SrcReg)) continue; @@ -509,7 +518,7 @@ void SILowerI1Copies::lowerCopiesFromI1() { LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI); DebugLoc DL = MI.getDebugLoc(); - assert(TII->getRegisterInfo().getRegSizeInBits(DstReg, *MRI) == 32); + assert(isVRegCompatibleReg(TII->getRegisterInfo(), *MRI, DstReg)); assert(!MI.getOperand(0).getSubReg()); ConstrainRegs.insert(SrcReg); @@ -544,7 +553,7 @@ void SILowerI1Copies::lowerPhis() { LF.initialize(MBB); for (MachineInstr &MI : MBB.phis()) { - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (!isVreg1(DstReg)) continue; @@ -556,7 +565,7 @@ void SILowerI1Copies::lowerPhis() { // Collect incoming values. for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { assert(i + 1 < MI.getNumOperands()); - unsigned IncomingReg = MI.getOperand(i).getReg(); + Register IncomingReg = MI.getOperand(i).getReg(); MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB(); MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg); @@ -580,12 +589,12 @@ void SILowerI1Copies::lowerPhis() { // Phis in a loop that are observed outside the loop receive a simple but // conservatively correct treatment. - MachineBasicBlock *PostDomBound = &MBB; - for (MachineInstr &Use : MRI->use_instructions(DstReg)) { - PostDomBound = - PDT->findNearestCommonDominator(PostDomBound, Use.getParent()); - } + std::vector DomBlocks = {&MBB}; + for (MachineInstr &Use : MRI->use_instructions(DstReg)) + DomBlocks.push_back(Use.getParent()); + MachineBasicBlock *PostDomBound = + PDT->findNearestCommonDominator(DomBlocks); unsigned FoundLoopLevel = LF.findLoop(PostDomBound); SSAUpdater.Initialize(DstReg); @@ -669,7 +678,7 @@ void SILowerI1Copies::lowerCopiesToI1() { MI.getOpcode() != AMDGPU::COPY) continue; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (!isVreg1(DstReg)) continue; @@ -686,10 +695,10 @@ void SILowerI1Copies::lowerCopiesToI1() { continue; DebugLoc DL = MI.getDebugLoc(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); assert(!MI.getOperand(1).getSubReg()); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + if (!Register::isVirtualRegister(SrcReg) || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) { assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32); unsigned TmpReg = createLaneMaskReg(*MF); @@ -702,12 +711,12 @@ void SILowerI1Copies::lowerCopiesToI1() { // Defs in a loop that are observed outside the loop must be transformed // into appropriate bit manipulation. - MachineBasicBlock *PostDomBound = &MBB; - for (MachineInstr &Use : MRI->use_instructions(DstReg)) { - PostDomBound = - PDT->findNearestCommonDominator(PostDomBound, Use.getParent()); - } + std::vector DomBlocks = {&MBB}; + for (MachineInstr &Use : MRI->use_instructions(DstReg)) + DomBlocks.push_back(Use.getParent()); + MachineBasicBlock *PostDomBound = + PDT->findNearestCommonDominator(DomBlocks); unsigned FoundLoopLevel = LF.findLoop(PostDomBound); if (FoundLoopLevel) { SSAUpdater.Initialize(DstReg); @@ -734,7 +743,7 @@ bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const { break; Reg = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (!isLaneMaskReg(Reg)) return false; diff --git a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index a82047473370..714d403a3e8f 100644 --- a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -278,8 +278,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr); int FI = MI.getOperand(FIOp).getIndex(); - unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata) - ->getReg(); + Register VReg = + TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, TRI->isAGPR(MRI, VReg))) { TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 46da974a2f45..7dd0f11c95de 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -53,8 +53,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); - Occupancy = getMaxWavesPerEU(); - limitOccupancy(MF); + Occupancy = ST.computeOccupancy(MF, getLDSSize()); CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { @@ -190,7 +189,7 @@ unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( const SIRegisterInfo &TRI) { ArgInfo.PrivateSegmentBuffer = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass)); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); NumUserSGPRs += 4; return ArgInfo.PrivateSegmentBuffer.getRegister(); } @@ -487,6 +486,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), + HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)), FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), @@ -501,8 +501,9 @@ void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { bool SIMachineFunctionInfo::initializeBaseYamlFields( const yaml::SIMachineFunctionInfo &YamlMFI) { ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; - MaxKernArgAlign = YamlMFI.MaxKernArgAlign; + MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); LDSSize = YamlMFI.LDSSize; + HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; IsEntryFunction = YamlMFI.IsEntryFunction; NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; MemoryBound = YamlMFI.MemoryBound; diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index f19b20ceb5da..7d70c786b594 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -265,6 +265,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { bool NoSignedZerosFPMath = false; bool MemoryBound = false; bool WaveLimiter = false; + uint32_t HighBitsOf32BitAddress = 0; StringValue ScratchRSrcReg = "$private_rsrc_reg"; StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; @@ -302,6 +303,8 @@ template <> struct MappingTraits { StringValue("$sp_reg")); YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); YamlIO.mapOptional("mode", MFI.Mode, SIMode()); + YamlIO.mapOptional("highBitsOf32BitAddress", + MFI.HighBitsOf32BitAddress, 0u); } }; @@ -670,7 +673,7 @@ public: return GITPtrHigh; } - unsigned get32BitAddressHighBits() const { + uint32_t get32BitAddressHighBits() const { return HighBitsOf32BitAddress; } @@ -873,7 +876,7 @@ public: assert(BufferRsrc); auto PSV = BufferPSVs.try_emplace( BufferRsrc, - llvm::make_unique(TII)); + std::make_unique(TII)); return PSV.first->second.get(); } @@ -882,14 +885,14 @@ public: assert(ImgRsrc); auto PSV = ImagePSVs.try_emplace( ImgRsrc, - llvm::make_unique(TII)); + std::make_unique(TII)); return PSV.first->second.get(); } const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) { if (!GWSResourcePSV) { GWSResourcePSV = - llvm::make_unique(TII); + std::make_unique(TII); } return GWSResourcePSV.get(); diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index ebbdf80f9567..c072ba6b2d1c 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -348,7 +348,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, // Do not Track Physical Registers, because it messes up. for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { - if (TargetRegisterInfo::isVirtualRegister(RegMaskPair.RegUnit)) + if (Register::isVirtualRegister(RegMaskPair.RegUnit)) LiveInRegs.insert(RegMaskPair.RegUnit); } LiveOutRegs.clear(); @@ -376,7 +376,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, // The use of findDefBetween removes the case 4. for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) { unsigned Reg = RegMaskPair.RegUnit; - if (TargetRegisterInfo::isVirtualRegister(Reg) && + if (Register::isVirtualRegister(Reg) && isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(), LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI, LIS)) { @@ -1228,7 +1228,7 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria unsigned Color = CurrentColoring[SU->NodeNum]; if (RealID.find(Color) == RealID.end()) { int ID = CurrentBlocks.size(); - BlockPtrs.push_back(llvm::make_unique(DAG, this, ID)); + BlockPtrs.push_back(std::make_unique(DAG, this, ID)); CurrentBlocks.push_back(BlockPtrs.rbegin()->get()); RealID[Color] = ID; } @@ -1690,7 +1690,7 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() { void SIScheduleBlockScheduler::addLiveRegs(std::set &Regs) { for (unsigned Reg : Regs) { // For now only track virtual registers. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; // If not already in the live set, then add it. (void) LiveRegs.insert(Reg); @@ -1750,7 +1750,7 @@ SIScheduleBlockScheduler::checkRegUsageImpact(std::set &InRegs, for (unsigned Reg : InRegs) { // For now only track virtual registers. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; if (LiveRegsConsumers[Reg] > 1) continue; @@ -1762,7 +1762,7 @@ SIScheduleBlockScheduler::checkRegUsageImpact(std::set &InRegs, for (unsigned Reg : OutRegs) { // For now only track virtual registers. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); for (; PSetI.isValid(); ++PSetI) { @@ -1801,7 +1801,7 @@ SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, // SIScheduleDAGMI // SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) : - ScheduleDAGMILive(C, llvm::make_unique(C)) { + ScheduleDAGMILive(C, std::make_unique(C)) { SITII = static_cast(TII); SITRI = static_cast(TRI); @@ -1913,7 +1913,7 @@ SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End, for (_Iterator RegI = First; RegI != End; ++RegI) { unsigned Reg = *RegI; // For now only track virtual registers - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; PSetIterator PSetI = MRI.getPressureSets(Reg); for (; PSetI.isValid(); ++PSetI) { diff --git a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 4320e6c957a0..e914573306ae 100644 --- a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -656,10 +656,10 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) { std::unique_ptr SICacheControl::create(const GCNSubtarget &ST) { GCNSubtarget::Generation Generation = ST.getGeneration(); if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS) - return make_unique(ST); + return std::make_unique(ST); if (Generation < AMDGPUSubtarget::GFX10) - return make_unique(ST); - return make_unique(ST, ST.isCuModeEnabled()); + return std::make_unique(ST); + return std::make_unique(ST, ST.isCuModeEnabled()); } bool SIGfx6CacheControl::enableLoadCacheBypass( diff --git a/lib/Target/AMDGPU/SIModeRegister.cpp b/lib/Target/AMDGPU/SIModeRegister.cpp index a5edd7b3554a..52989a280e80 100644 --- a/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/lib/Target/AMDGPU/SIModeRegister.cpp @@ -226,7 +226,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI, // - on exit we have set the Require, Change, and initial Exit modes. void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII) { - auto NewInfo = llvm::make_unique(); + auto NewInfo = std::make_unique(); MachineInstr *InsertionPoint = nullptr; // RequirePending is used to indicate whether we are collecting the initial // requirements for the block, and need to defer the first InsertionPoint to diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 3227bff20513..cc9b46a75582 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -322,7 +322,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { continue; } - unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg(); + Register CopyFromExec = CopyFromExecInst->getOperand(0).getReg(); MachineInstr *SaveExecInst = nullptr; SmallVector OtherUseInsts; diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 7e10316eab92..fdd30db6a7cb 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -211,7 +211,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, return AMDGPU::NoRegister; MachineOperand *AndCC = &And->getOperand(1); - unsigned CmpReg = AndCC->getReg(); + Register CmpReg = AndCC->getReg(); unsigned CmpSubReg = AndCC->getSubReg(); if (CmpReg == ExecReg) { AndCC = &And->getOperand(2); @@ -234,7 +234,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1) return AMDGPU::NoRegister; - unsigned SelReg = Op1->getReg(); + Register SelReg = Op1->getReg(); auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS); if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) return AMDGPU::NoRegister; @@ -250,15 +250,16 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, Op1->getImm() != 0 || Op2->getImm() != 1) return AMDGPU::NoRegister; - LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' - << *Cmp << '\t' << *And); + LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' + << *And); - unsigned CCReg = CC->getReg(); + Register CCReg = CC->getReg(); LIS->RemoveMachineInstrFromMaps(*And); - MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(), - TII->get(Andn2Opc), And->getOperand(0).getReg()) - .addReg(ExecReg) - .addReg(CCReg, 0, CC->getSubReg()); + MachineInstr *Andn2 = + BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), + And->getOperand(0).getReg()) + .addReg(ExecReg) + .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg()); And->eraseFromParent(); LIS->InsertMachineInstrInMaps(*Andn2); @@ -266,20 +267,19 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, // Try to remove compare. Cmp value should not used in between of cmp // and s_and_b64 if VCC or just unused if any other register. - if ((TargetRegisterInfo::isVirtualRegister(CmpReg) && - MRI.use_nodbg_empty(CmpReg)) || + if ((Register::isVirtualRegister(CmpReg) && MRI.use_nodbg_empty(CmpReg)) || (CmpReg == CondReg && std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), [&](const MachineInstr &MI) { - return MI.readsRegister(CondReg, TRI); }))) { + return MI.readsRegister(CondReg, TRI); + }))) { LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n'); LIS->RemoveMachineInstrFromMaps(*Cmp); Cmp->eraseFromParent(); // Try to remove v_cndmask_b32. - if (TargetRegisterInfo::isVirtualRegister(SelReg) && - MRI.use_nodbg_empty(SelReg)) { + if (Register::isVirtualRegister(SelReg) && MRI.use_nodbg_empty(SelReg)) { LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); LIS->RemoveMachineInstrFromMaps(*Sel); @@ -413,7 +413,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { if (!SaveExec || !SaveExec->isFullCopy()) continue; - unsigned SavedExec = SaveExec->getOperand(0).getReg(); + Register SavedExec = SaveExec->getOperand(0).getReg(); bool SafeToReplace = true; for (auto& U : MRI.use_nodbg_instructions(SavedExec)) { if (U.getParent() != SaveExec->getParent()) { @@ -434,7 +434,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { if (Changed) { for (auto Reg : RecalcRegs) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LIS->removeInterval(Reg); if (!MRI.reg_empty(Reg)) LIS->createAndComputeVirtRegInterval(Reg); diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 2d71abc0612a..9b3b2436475c 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -574,16 +574,16 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(Src1->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) + if (Register::isPhysicalRegister(Src1->getReg()) || + Register::isPhysicalRegister(Dst->getReg())) break; if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || Opcode == AMDGPU::V_LSHLREV_B32_e64) { - return make_unique( + return std::make_unique( Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); } else { - return make_unique( + return std::make_unique( Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, Opcode != AMDGPU::V_LSHRREV_B32_e32 && Opcode != AMDGPU::V_LSHRREV_B32_e64); @@ -613,15 +613,15 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(Src1->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) + if (Register::isPhysicalRegister(Src1->getReg()) || + Register::isPhysicalRegister(Dst->getReg())) break; if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || Opcode == AMDGPU::V_LSHLREV_B16_e64) { - return make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); + return std::make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); } else { - return make_unique( + return std::make_unique( Src1, Dst, BYTE_1, false, false, Opcode != AMDGPU::V_LSHRREV_B16_e32 && Opcode != AMDGPU::V_LSHRREV_B16_e64); @@ -677,11 +677,11 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(Src0->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) + if (Register::isPhysicalRegister(Src0->getReg()) || + Register::isPhysicalRegister(Dst->getReg())) break; - return make_unique( + return std::make_unique( Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32); } @@ -706,11 +706,11 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(ValSrc->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) + if (Register::isPhysicalRegister(ValSrc->getReg()) || + Register::isPhysicalRegister(Dst->getReg())) break; - return make_unique( + return std::make_unique( ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); } @@ -840,7 +840,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); assert(OrDst && OrDst->isReg()); - return make_unique( + return std::make_unique( OrDst, OrSDWADef, OrOtherDef, DstSel); } @@ -1189,7 +1189,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, continue; } - unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), VGPR); if (Op.isImm()) diff --git a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index f9bfe96f65cb..6cdd12d0e7bd 100644 --- a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -90,12 +90,12 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { if (!MO.isReg()) return false; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!TRI->isVGPR(*MRI, Reg)) return false; - if (TRI->isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; if (VRM->hasPhys(Reg)) @@ -124,14 +124,14 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { if (!MO.isReg()) continue; - const unsigned VirtReg = MO.getReg(); - if (TRI->isPhysicalRegister(VirtReg)) + const Register VirtReg = MO.getReg(); + if (Register::isPhysicalRegister(VirtReg)) continue; if (!VRM->hasPhys(VirtReg)) continue; - unsigned PhysReg = VRM->getPhys(VirtReg); + Register PhysReg = VRM->getPhys(VirtReg); const unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { PhysReg = TRI->getSubReg(PhysReg, SubReg); @@ -149,7 +149,7 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { for (unsigned Reg : RegsToRewrite) { LIS->removeInterval(Reg); - const unsigned PhysReg = VRM->getPhys(Reg); + const Register PhysReg = VRM->getPhys(Reg); assert(PhysReg != 0); MFI->ReserveWWMRegister(PhysReg); } diff --git a/lib/Target/AMDGPU/SIProgramInfo.h b/lib/Target/AMDGPU/SIProgramInfo.h index 168f05f8fdd6..7c039a54b57f 100644 --- a/lib/Target/AMDGPU/SIProgramInfo.h +++ b/lib/Target/AMDGPU/SIProgramInfo.h @@ -41,6 +41,8 @@ struct SIProgramInfo { uint64_t ComputePGMRSrc2 = 0; uint32_t NumVGPR = 0; + uint32_t NumArchVGPR = 0; + uint32_t NumAccVGPR = 0; uint32_t NumSGPR = 0; uint32_t LDSSize = 0; bool FlatUsed = false; @@ -51,6 +53,9 @@ struct SIProgramInfo { // Number of VGPRs that meets number of waves per execution unit request. uint32_t NumVGPRsForWavesPerEU = 0; + // Final occupancy. + uint32_t Occupancy = 0; + // Whether there is recursion, dynamic allocas, indirect calls or some other // reason there may be statically unknown stack usage. bool DynamicCallStack = false; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index f152deb28004..f58bc3060c42 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -48,11 +48,6 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg, } } -static cl::opt EnableSpillSGPRToSMEM( - "amdgpu-spill-sgpr-to-smem", - cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), - cl::init(false)); - static cl::opt EnableSpillSGPRToVGPR( "amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), @@ -61,17 +56,12 @@ static cl::opt EnableSpillSGPRToVGPR( SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPURegisterInfo(), + ST(ST), SGPRPressureSets(getNumRegPressureSets()), VGPRPressureSets(getNumRegPressureSets()), AGPRPressureSets(getNumRegPressureSets()), - SpillSGPRToVGPR(false), - SpillSGPRToSMEM(false), + SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { - if (EnableSpillSGPRToSMEM && ST.hasScalarStores()) - SpillSGPRToSMEM = true; - else if (EnableSpillSGPRToVGPR) - SpillSGPRToVGPR = true; - unsigned NumRegPressureSets = getNumRegPressureSets(); SGPRSetID = NumRegPressureSets; @@ -118,11 +108,9 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { - - const GCNSubtarget &ST = MF.getSubtarget(); unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); - return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); + return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass); } static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { @@ -144,7 +132,6 @@ static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { - const GCNSubtarget &ST = MF.getSubtarget(); unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); return AMDGPU::SGPR_32RegClass.getRegister(Reg); } @@ -202,8 +189,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::VCC_HI); } - const GCNSubtarget &ST = MF.getSubtarget(); - unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { @@ -220,6 +205,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, Reg); } + // Reserve all the rest AGPRs if there are no instructions to use it. + if (!ST.hasMAIInsts()) { + for (unsigned i = 0; i < MaxNumVGPRs; ++i) { + unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); + reserveRegisterTuples(Reserved, Reg); + } + } + const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); @@ -293,32 +286,17 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const bool SIRegisterInfo::requiresFrameIndexScavenging( const MachineFunction &MF) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.hasStackObjects()) - return true; - - // May need to deal with callee saved registers. - const SIMachineFunctionInfo *Info = MF.getInfo(); - return !Info->isEntryFunction(); + // Do not use frame virtual registers. They used to be used for SGPRs, but + // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the + // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a + // spill. + return false; } bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (!MFI.hasStackObjects()) - return false; - - // The scavenger is used for large frames which may require finding a free - // register for large offsets. - if (!isUInt<12>(MFI.getStackSize())) - return true; - - // If using scalar stores, for spills, m0 is needed for the scalar store - // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual - // register for it during frame index elimination, so the scavenger is - // directly needed. - return MF.getSubtarget().hasScalarStores() && - MF.getInfo()->hasSpilledSGPRs(); + return MFI.hasStackObjects(); } bool SIRegisterInfo::requiresVirtualBaseRegisters( @@ -372,8 +350,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, DL = Ins->getDebugLoc(); MachineFunction *MF = MBB->getParent(); - const GCNSubtarget &Subtarget = MF->getSubtarget(); - const SIInstrInfo *TII = Subtarget.getInstrInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); if (Offset == 0) { BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg) @@ -382,9 +359,9 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, } MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addImm(Offset); @@ -399,11 +376,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { - - MachineBasicBlock *MBB = MI.getParent(); - MachineFunction *MF = MBB->getParent(); - const GCNSubtarget &Subtarget = MF->getSubtarget(); - const SIInstrInfo *TII = Subtarget.getInstrInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); #ifndef NDEBUG // FIXME: Is it possible to be storing a frame index to itself? @@ -419,12 +392,15 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, #endif MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); +#ifndef NDEBUG + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); +#endif assert(FIOp && FIOp->isFI() && "frame index must be address operand"); assert(TII->isMUBUF(MI)); assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == - MF->getInfo()->getFrameOffsetReg() && - "should only be seeing frame offset relative FrameIndex"); - + MF->getInfo()->getStackPtrOffsetReg() && + "should only be seeing stack pointer offset relative FrameIndex"); MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); int64_t NewOffset = OffsetOp->getImm() + Offset; @@ -564,7 +540,8 @@ static int getOffsetMUBUFLoad(unsigned Opc) { } } -static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, +static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, + MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, @@ -572,7 +549,6 @@ static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); @@ -595,11 +571,12 @@ static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not // need to handle the case where an SGPR may need to be spilled while spilling. -static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, +static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset) { + const SIInstrInfo *TII = ST.getInstrInfo(); MachineBasicBlock *MBB = MI->getParent(); const DebugLoc &DL = MI->getDebugLoc(); bool IsStore = MI->mayStore(); @@ -611,7 +588,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, return false; const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); - if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr()) + if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr()) return true; MachineInstrBuilder NewMI = @@ -624,6 +601,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .cloneMemRefs(*MI); const MachineOperand *VDataIn = TII->getNamedOperand(*MI, @@ -645,7 +623,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, RegScavenger *RS) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -707,8 +684,9 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, } for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) { - unsigned SubReg = NumSubRegs == 1 ? - ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i)); + Register SubReg = NumSubRegs == 1 + ? Register(ValueReg) + : getSubReg(ValueReg, getSubRegFromChannel(i)); unsigned SOffsetRegState = 0; unsigned SrcDstRegState = getDefRegState(!IsStore); @@ -718,7 +696,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, SrcDstRegState |= getKillRegState(IsKill); } - auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill); + auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill); if (!MIB.getInstr()) { unsigned FinalReg = SubReg; @@ -743,6 +721,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(NewMMO); if (!IsStore && TmpReg != AMDGPU::NoRegister) @@ -763,22 +742,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, } } -static std::pair getSpillEltSize(unsigned SuperRegSize, - bool Store) { - if (SuperRegSize % 16 == 0) { - return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR : - AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR }; - } - - if (SuperRegSize % 8 == 0) { - return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR : - AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR }; - } - - return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR : - AMDGPU::S_BUFFER_LOAD_DWORD_SGPR}; -} - bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, @@ -794,98 +757,37 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, if (OnlyToVGPR && !SpillToVGPR) return false; - MachineRegisterInfo &MRI = MF->getRegInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); - unsigned SuperReg = MI->getOperand(0).getReg(); + Register SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); const DebugLoc &DL = MI->getDebugLoc(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - bool SpillToSMEM = spillSGPRToSMEM(); - if (SpillToSMEM && OnlyToVGPR) - return false; - - Register FrameReg = getFrameRegister(*MF); - assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() && SuperReg != MFI->getFrameOffsetReg() && SuperReg != MFI->getScratchWaveOffsetReg())); assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; - if (SpillToSMEM) { - if (RS->isRegUsed(AMDGPU::M0)) { - M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) - .addReg(AMDGPU::M0); - } - } - - unsigned ScalarStoreOp; unsigned EltSize = 4; const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - if (SpillToSMEM && isSGPRClass(RC)) { - // XXX - if private_element_size is larger than 4 it might be useful to be - // able to spill wider vmem spills. - std::tie(EltSize, ScalarStoreOp) = - getSpillEltSize(getRegSizeInBits(*RC) / 8, true); - } ArrayRef SplitParts = getRegSplitParts(RC, EltSize); unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + // Scavenged temporary VGPR to use. It must be scavenged once for any number + // of spilled subregs. + Register TmpVGPR; + // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - unsigned SubReg = NumSubRegs == 1 ? - SuperReg : getSubReg(SuperReg, SplitParts[i]); - - if (SpillToSMEM) { - int64_t FrOffset = FrameInfo.getObjectOffset(Index); - - // The allocated memory size is really the wavefront size * the frame - // index size. The widest register class is 64 bytes, so a 4-byte scratch - // allocation is enough to spill this in a single stack object. - // - // FIXME: Frame size/offsets are computed earlier than this, so the extra - // space is still unnecessarily allocated. - - unsigned Align = FrameInfo.getObjectAlignment(Index); - MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); - MachineMemOperand *MMO - = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - EltSize, MinAlign(Align, EltSize * i)); - - // SMEM instructions only support a single offset, so increment the wave - // offset. - - int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); - if (Offset != 0) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(FrameReg) - .addImm(Offset); - } else { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(FrameReg); - } - - BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) - .addReg(SubReg, getKillRegState(IsKill)) // sdata - .addReg(MFI->getScratchRSrcReg()) // sbase - .addReg(OffsetReg, RegState::Kill) // soff - .addImm(0) // glc - .addImm(0) // dlc - .addMemOperand(MMO); - - continue; - } + Register SubReg = + NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); if (SpillToVGPR) { SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; @@ -915,15 +817,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, return false; // Spill SGPR to a frame index. - // TODO: Should VI try to spill to VGPR and then spill to SMEM? - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - // TODO: Should VI try to spill to VGPR and then spill to SMEM? + if (!TmpVGPR.isValid()) + TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); MachineInstrBuilder Mov - = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) .addReg(SubReg, SubKillState); - // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { @@ -941,7 +841,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) - .addReg(TmpReg, RegState::Kill) // src + .addReg(TmpVGPR, RegState::Kill) // src .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srrsrc .addReg(MFI->getStackPtrOffsetReg()) // soffset @@ -965,7 +865,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, RegScavenger *RS, bool OnlyToVGPR) const { MachineFunction *MF = MI->getParent()->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); @@ -976,84 +875,27 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, return false; MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); - unsigned SuperReg = MI->getOperand(0).getReg(); - bool SpillToSMEM = spillSGPRToSMEM(); - if (SpillToSMEM && OnlyToVGPR) - return false; + Register SuperReg = MI->getOperand(0).getReg(); assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; - if (SpillToSMEM) { - if (RS->isRegUsed(AMDGPU::M0)) { - M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) - .addReg(AMDGPU::M0); - } - } - unsigned EltSize = 4; - unsigned ScalarLoadOp; - - Register FrameReg = getFrameRegister(*MF); const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - if (SpillToSMEM && isSGPRClass(RC)) { - // XXX - if private_element_size is larger than 4 it might be useful to be - // able to spill wider vmem spills. - std::tie(EltSize, ScalarLoadOp) = - getSpillEltSize(getRegSizeInBits(*RC) / 8, false); - } ArrayRef SplitParts = getRegSplitParts(RC, EltSize); unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - // SubReg carries the "Kill" flag when SubReg == SuperReg. - int64_t FrOffset = FrameInfo.getObjectOffset(Index); + Register TmpVGPR; for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - unsigned SubReg = NumSubRegs == 1 ? - SuperReg : getSubReg(SuperReg, SplitParts[i]); - - if (SpillToSMEM) { - // FIXME: Size may be > 4 but extra bytes wasted. - unsigned Align = FrameInfo.getObjectAlignment(Index); - MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); - MachineMemOperand *MMO - = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - EltSize, MinAlign(Align, EltSize * i)); - - // Add i * 4 offset - int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); - if (Offset != 0) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(FrameReg) - .addImm(Offset); - } else { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(FrameReg); - } - - auto MIB = - BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) - .addReg(MFI->getScratchRSrcReg()) // sbase - .addReg(OffsetReg, RegState::Kill) // soff - .addImm(0) // glc - .addImm(0) // dlc - .addMemOperand(MMO); - - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); - - continue; - } + Register SubReg = + NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); if (SpillToVGPR) { SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; @@ -1071,7 +913,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + if (!TmpVGPR.isValid()) + TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo @@ -1081,7 +924,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR) .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srsrc .addReg(MFI->getStackPtrOffsetReg()) // soffset @@ -1090,7 +933,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) - .addReg(TmpReg, RegState::Kill); + .addReg(TmpVGPR, RegState::Kill); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); @@ -1141,11 +984,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -1255,13 +1096,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // In an entry function/kernel the offset is already the absolute // address relative to the frame register. - unsigned DiffReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register TmpDiffReg = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); + + // If there's no free SGPR, in-place modify the FP + Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg; bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; Register ResultReg = IsCopy ? MI->getOperand(0).getReg() : - MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg) .addReg(FrameReg) @@ -1271,35 +1115,80 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (Offset == 0) { // XXX - This never happens because of emergency scavenging slot at 0? BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) - .addImm(Log2_32(ST.getWavefrontSize())) + .addImm(ST.getWavefrontSizeLog2()) .addReg(DiffReg); } else { - unsigned ScaledReg - = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) - .addImm(Log2_32(ST.getWavefrontSize())) - .addReg(DiffReg, RegState::Kill); - - // TODO: Fold if use instruction is another add of a constant. - if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { - TII->getAddNoCarry(*MBB, MI, DL, ResultReg) - .addImm(Offset) - .addReg(ScaledReg, RegState::Kill) - .addImm(0); // clamp bit + if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) { + Register ScaledReg = + RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MIB, 0); + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), + ScaledReg) + .addImm(ST.getWavefrontSizeLog2()) + .addReg(DiffReg, RegState::Kill); + + const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; + + // TODO: Fold if use instruction is another add of a constant. + if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { + // FIXME: This can fail + MIB.addImm(Offset); + MIB.addReg(ScaledReg, RegState::Kill); + if (!IsVOP2) + MIB.addImm(0); // clamp bit + } else { + Register ConstOffsetReg = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false); + + // This should always be able to use the unused carry out. + assert(ConstOffsetReg && "this scavenge should not be able to fail"); + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) + .addImm(Offset); + MIB.addReg(ConstOffsetReg, RegState::Kill); + MIB.addReg(ScaledReg, RegState::Kill); + MIB.addImm(0); // clamp bit + } } else { - unsigned ConstOffsetReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) - .addImm(Offset); - TII->getAddNoCarry(*MBB, MI, DL, ResultReg) - .addReg(ConstOffsetReg, RegState::Kill) + // We have to produce a carry out, and we there isn't a free SGPR + // pair for it. We can keep the whole computation on the SALU to + // avoid clobbering an additional register at the cost of an extra + // mov. + + // We may have 1 free scratch SGPR even though a carry out is + // unavailable. Only one additional mov is needed. + Register TmpScaledReg = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); + Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : DiffReg; + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(ST.getWavefrontSizeLog2()); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg) .addReg(ScaledReg, RegState::Kill) - .addImm(0); // clamp bit + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) + .addReg(ScaledReg, RegState::Kill); + + // If there were truly no free SGPRs, we need to undo everything. + if (!TmpScaledReg.isValid()) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg) + .addReg(ScaledReg, RegState::Kill) + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(ST.getWavefrontSizeLog2()); + } } } + if (!TmpDiffReg.isValid()) { + // Restore the FP. + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg) + .addReg(FrameReg) + .addReg(MFI->getScratchWaveOffsetReg()); + } + // Don't introduce an extra copy if we're just materializing in a mov. if (IsCopy) MI->eraseFromParent(); @@ -1325,7 +1214,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int64_t NewOffset = OldImm + Offset; if (isUInt<12>(NewOffset) && - buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) { + buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) { MI->eraseFromParent(); return; } @@ -1337,7 +1226,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int64_t Offset = FrameInfo.getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); @@ -1347,27 +1236,13 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { - const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg); - unsigned Size = getRegSizeInBits(*RC); - unsigned AltName = AMDGPU::NoRegAltName; - - switch (Size) { - case 32: AltName = AMDGPU::Reg32; break; - case 64: AltName = AMDGPU::Reg64; break; - case 96: AltName = AMDGPU::Reg96; break; - case 128: AltName = AMDGPU::Reg128; break; - case 160: AltName = AMDGPU::Reg160; break; - case 256: AltName = AMDGPU::Reg256; break; - case 512: AltName = AMDGPU::Reg512; break; - case 1024: AltName = AMDGPU::Reg1024; break; - } - return AMDGPUInstPrinter::getRegisterName(Reg, AltName); + return AMDGPUInstPrinter::getRegisterName(Reg); } // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { - assert(!TargetRegisterInfo::isVirtualRegister(Reg)); + assert(!Register::isVirtualRegister(Reg)); static const TargetRegisterClass *const BaseClasses[] = { &AMDGPU::VGPR_32RegClass, @@ -1408,8 +1283,6 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { // TargetRegisterClass to mark which classes are VGPRs to make this trivial. bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { unsigned Size = getRegSizeInBits(*RC); - if (Size < 32) - return false; switch (Size) { case 32: return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; @@ -1427,8 +1300,11 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; case 1024: return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr; + case 1: + return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr; default: - llvm_unreachable("Invalid register class size"); + assert(Size < 32 && "Invalid register class size"); + return false; } } @@ -1476,6 +1352,8 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( return &AMDGPU::VReg_512RegClass; case 1024: return &AMDGPU::VReg_1024RegClass; + case 1: + return &AMDGPU::VReg_1RegClass; default: llvm_unreachable("Invalid register class size"); } @@ -1509,7 +1387,7 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( case 96: return &AMDGPU::SReg_96RegClass; case 128: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 160: return &AMDGPU::SReg_160RegClass; case 256: @@ -1539,7 +1417,7 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( case 3: return &AMDGPU::SReg_96RegClass; case 4: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 5: return &AMDGPU::SReg_160RegClass; case 8: @@ -1587,6 +1465,15 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( } } +bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { + if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && + OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST) + return !ST.hasMFMAInlineLiteralBug(); + + return OpType >= AMDGPU::OPERAND_SRC_FIRST && + OpType <= AMDGPU::OPERAND_SRC_LAST; +} + bool SIRegisterInfo::shouldRewriteCopySrc( const TargetRegisterClass *DefRC, unsigned DefSubReg, @@ -1802,7 +1689,7 @@ ArrayRef SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC const TargetRegisterClass* SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI.getRegClass(Reg); return getPhysRegClass(Reg); @@ -1845,8 +1732,6 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - - const GCNSubtarget &ST = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), @@ -1900,18 +1785,22 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass; case AMDGPU::SGPRRegBankID: - return &AMDGPU::SReg_32_XM0RegClass; + return &AMDGPU::SReg_32RegClass; case AMDGPU::SCCRegBankID: // This needs to return an allocatable class, so don't bother returning // the dummy SCC class. - return &AMDGPU::SReg_32_XM0RegClass; + // + // FIXME: This is a grotesque hack. We use SGPR_32 as an indication this + // was not an VCC bank value since we use the larger class SReg_32 for + // other values. These should all use SReg_32. + return &AMDGPU::SGPR_32RegClass; default: llvm_unreachable("unknown register bank"); } } case 32: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : - &AMDGPU::SReg_32_XM0RegClass; + &AMDGPU::SReg_32RegClass; case 64: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass : &AMDGPU::SReg_64_XEXECRegClass; @@ -1920,7 +1809,7 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, &AMDGPU::SReg_96RegClass; case 128: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass : - &AMDGPU::SReg_128RegClass; + &AMDGPU::SGPR_128RegClass; case 160: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass : &AMDGPU::SReg_160RegClass; @@ -1930,10 +1819,13 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, case 512: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass : &AMDGPU::SReg_512RegClass; + case 1024: + return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass : + &AMDGPU::SReg_1024RegClass; default: if (Size < 32) return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : - &AMDGPU::SReg_32_XM0RegClass; + &AMDGPU::SReg_32RegClass; return nullptr; } } @@ -1941,9 +1833,12 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, const TargetRegisterClass * SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const { - if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg())) + const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg()); + if (const RegisterBank *RB = RCOrRB.dyn_cast()) return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI); - return nullptr; + + const TargetRegisterClass *RC = RCOrRB.get(); + return getAllocatableClass(RC); } unsigned SIRegisterInfo::getVCC() const { @@ -1974,7 +1869,7 @@ MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg, SlotIndex UseIdx = LIS->getInstructionIndex(Use); SlotIndex DefIdx; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (!LIS->hasInterval(Reg)) return nullptr; LiveInterval &LI = LIS->getInterval(Reg); diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 34487c96e72e..ac3dea1a1a28 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -27,6 +27,7 @@ class SIMachineFunctionInfo; class SIRegisterInfo final : public AMDGPURegisterInfo { private: + const GCNSubtarget &ST; unsigned SGPRSetID; unsigned VGPRSetID; unsigned AGPRSetID; @@ -34,7 +35,6 @@ private: BitVector VGPRPressureSets; BitVector AGPRPressureSets; bool SpillSGPRToVGPR; - bool SpillSGPRToSMEM; bool isWave32; void classifyPressureSet(unsigned PSetID, unsigned Reg, @@ -46,10 +46,6 @@ public: return SpillSGPRToVGPR; } - bool spillSGPRToSMEM() const { - return SpillSGPRToSMEM; - } - /// Return the end register initially reserved for the scratch buffer in case /// spilling is needed. unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; @@ -141,7 +137,7 @@ public: bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const { const TargetRegisterClass *RC; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) RC = MRI.getRegClass(Reg); else RC = getPhysRegClass(Reg); @@ -193,10 +189,7 @@ public: /// \returns True if operands defined with this operand type can accept /// an inline constant. i.e. An integer value in the range (-16, 64) or /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. - bool opCanUseInlineConstant(unsigned OpType) const { - return OpType >= AMDGPU::OPERAND_SRC_FIRST && - OpType <= AMDGPU::OPERAND_SRC_LAST; - } + bool opCanUseInlineConstant(unsigned OpType) const; unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, @@ -270,7 +263,7 @@ public: const MachineRegisterInfo &MRI) const override; const TargetRegisterClass *getBoolRC() const { - return isWave32 ? &AMDGPU::SReg_32_XM0RegClass + return isWave32 ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index d5948a7862cc..82219cbdf3b2 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -37,50 +37,52 @@ class getSubRegs { !if(!eq(size, 16), ret16, ret32)))))); } -let Namespace = "AMDGPU" in { -defset list AllRegAltNameIndices = { - def Reg32 : RegAltNameIndex; - def Reg64 : RegAltNameIndex; - def Reg96 : RegAltNameIndex; - def Reg128 : RegAltNameIndex; - def Reg160 : RegAltNameIndex; - def Reg256 : RegAltNameIndex; - def Reg512 : RegAltNameIndex; - def Reg1024 : RegAltNameIndex; -} -} +// Generates list of sequential register tuple names. +// E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ] +class RegSeqNames { + int next = !add(start, stride); + int end_reg = !add(!add(start, size), -1); + list ret = + !if(!le(end_reg, last_reg), + !listconcat([prefix # "[" # start # ":" # end_reg # "]"], + RegSeqNames.ret), + []); +} + +// Generates list of dags for register tupless. +class RegSeqDags { + dag trunc_rc = (trunc RC, + !if(!and(!eq(stride, 1), !eq(start, 0)), + !add(!add(last_reg, 2), !mul(size, -1)), + !add(last_reg, 1))); + list ret = + !if(!lt(start, size), + !listconcat([(add (decimate (shl trunc_rc, start), stride))], + RegSeqDags.ret), + []); +} + +class SIRegisterTuples Indices, RegisterClass RC, + int last_reg, int stride, int size, string prefix> : + RegisterTuples.ret, + RegSeqNames.ret>; //===----------------------------------------------------------------------===// // Declarations that describe the SI registers //===----------------------------------------------------------------------===// -class SIReg regIdx = 0, string prefix = "", - int regNo = !cast(regIdx)> : - Register, +class SIReg regIdx = 0> : + Register, DwarfRegNum<[!cast(HWEncoding)]> { let Namespace = "AMDGPU"; - let RegAltNameIndices = AllRegAltNameIndices; // This is the not yet the complete register encoding. An additional // bit is set for VGPRs. let HWEncoding = regIdx; } -class SIRegisterWithSubRegs subregs> : - RegisterWithSubRegs { - let RegAltNameIndices = AllRegAltNameIndices; - let AltNames = [ n, n, n, n, n, n, n, n ]; -} - // Special Registers def VCC_LO : SIReg<"vcc_lo", 106>; def VCC_HI : SIReg<"vcc_hi", 107>; @@ -93,7 +95,7 @@ def SP_REG : SIReg<"sp", 0>; def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>; // VCC for 64-bit instructions -def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, +def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -103,7 +105,7 @@ def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, def EXEC_LO : SIReg<"exec_lo", 126>; def EXEC_HI : SIReg<"exec_hi", 127>; -def EXEC : SIRegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, +def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -134,7 +136,7 @@ def LDS_DIRECT : SIReg <"src_lds_direct", 254>; def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>; def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>; -def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, +def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -145,7 +147,7 @@ def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_ def TBA_LO : SIReg<"tba_lo", 108>; def TBA_HI : SIReg<"tba_hi", 109>; -def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, +def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -155,7 +157,7 @@ def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, def TMA_LO : SIReg<"tma_lo", 110>; def TMA_HI : SIReg<"tma_hi", 111>; -def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, +def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -175,7 +177,7 @@ multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { } class FlatReg encoding> : - SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>, + RegisterWithSubRegs<"flat_scratch", [lo, hi]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -191,19 +193,19 @@ def FLAT_SCR : FlatReg; // SGPR registers foreach Index = 0-105 in { - def SGPR#Index : SIReg <"SGPR"#Index, Index, "s">; + def SGPR#Index : SIReg <"s"#Index, Index>; } // VGPR registers foreach Index = 0-255 in { - def VGPR#Index : SIReg <"VGPR"#Index, Index, "v"> { + def VGPR#Index : SIReg <"v"#Index, Index> { let HWEncoding{8} = 1; } } // AccVGPR registers foreach Index = 0-255 in { - def AGPR#Index : SIReg <"AGPR"#Index, Index, "a"> { + def AGPR#Index : SIReg <"a"#Index, Index> { let HWEncoding{8} = 1; } } @@ -226,102 +228,32 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> { // SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "SGPR%u", 0, 105)), Reg32> { + (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. let AllocationPriority = 9; } // SGPR 64-bit registers -def SGPR_64Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 2)), - (add (decimate (shl SGPR_32, 1), 2))]>; +def SGPR_64Regs : SIRegisterTuples.ret, SGPR_32, 105, 2, 2, "s">; // SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs. -def SGPR_96Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 3)), - (add (decimate (shl SGPR_32, 1), 3)), - (add (decimate (shl SGPR_32, 2), 3))]>; +def SGPR_96Regs : SIRegisterTuples.ret, SGPR_32, 105, 3, 3, "s">; // SGPR 128-bit registers -def SGPR_128Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4))]>; +def SGPR_128Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 4, "s">; // SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs. -def SGPR_160Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4))]>; +def SGPR_160Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 5, "s">; // SGPR 256-bit registers -def SGPR_256Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4))]>; +def SGPR_256Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 8, "s">; // SGPR 512-bit registers -def SGPR_512Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4)), - (add (decimate (shl SGPR_32, 8), 4)), - (add (decimate (shl SGPR_32, 9), 4)), - (add (decimate (shl SGPR_32, 10), 4)), - (add (decimate (shl SGPR_32, 11), 4)), - (add (decimate (shl SGPR_32, 12), 4)), - (add (decimate (shl SGPR_32, 13), 4)), - (add (decimate (shl SGPR_32, 14), 4)), - (add (decimate (shl SGPR_32, 15), 4))]>; +def SGPR_512Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 16, "s">; // SGPR 1024-bit registers -def SGPR_1024Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4)), - (add (decimate (shl SGPR_32, 8), 4)), - (add (decimate (shl SGPR_32, 9), 4)), - (add (decimate (shl SGPR_32, 10), 4)), - (add (decimate (shl SGPR_32, 11), 4)), - (add (decimate (shl SGPR_32, 12), 4)), - (add (decimate (shl SGPR_32, 13), 4)), - (add (decimate (shl SGPR_32, 14), 4)), - (add (decimate (shl SGPR_32, 15), 4)), - (add (decimate (shl SGPR_32, 16), 4)), - (add (decimate (shl SGPR_32, 17), 4)), - (add (decimate (shl SGPR_32, 18), 4)), - (add (decimate (shl SGPR_32, 19), 4)), - (add (decimate (shl SGPR_32, 20), 4)), - (add (decimate (shl SGPR_32, 21), 4)), - (add (decimate (shl SGPR_32, 22), 4)), - (add (decimate (shl SGPR_32, 23), 4)), - (add (decimate (shl SGPR_32, 24), 4)), - (add (decimate (shl SGPR_32, 25), 4)), - (add (decimate (shl SGPR_32, 26), 4)), - (add (decimate (shl SGPR_32, 27), 4)), - (add (decimate (shl SGPR_32, 28), 4)), - (add (decimate (shl SGPR_32, 29), 4)), - (add (decimate (shl SGPR_32, 30), 4)), - (add (decimate (shl SGPR_32, 31), 4))]>; +def SGPR_1024Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 32, "s">; // Trap handler TMP 32-bit registers def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, @@ -330,51 +262,21 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, } // Trap handler TMP 64-bit registers -def TTMP_64Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 2)), - (add (decimate (shl TTMP_32, 1), 2))]>; +def TTMP_64Regs : SIRegisterTuples.ret, TTMP_32, 15, 2, 2, "ttmp">; // Trap handler TMP 128-bit registers -def TTMP_128Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4))]>; - -def TTMP_256Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4)), - (add (decimate (shl TTMP_32, 4), 4)), - (add (decimate (shl TTMP_32, 5), 4)), - (add (decimate (shl TTMP_32, 6), 4)), - (add (decimate (shl TTMP_32, 7), 4))]>; - -def TTMP_512Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4)), - (add (decimate (shl TTMP_32, 4), 4)), - (add (decimate (shl TTMP_32, 5), 4)), - (add (decimate (shl TTMP_32, 6), 4)), - (add (decimate (shl TTMP_32, 7), 4)), - (add (decimate (shl TTMP_32, 8), 4)), - (add (decimate (shl TTMP_32, 9), 4)), - (add (decimate (shl TTMP_32, 10), 4)), - (add (decimate (shl TTMP_32, 11), 4)), - (add (decimate (shl TTMP_32, 12), 4)), - (add (decimate (shl TTMP_32, 13), 4)), - (add (decimate (shl TTMP_32, 14), 4)), - (add (decimate (shl TTMP_32, 15), 4))]>; +def TTMP_128Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 4, "ttmp">; + +def TTMP_256Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 8, "ttmp">; + +def TTMP_512Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 16, "ttmp">; class TmpRegTuplesBase subRegs, list indices = getSubRegs.ret, int index1 = !add(index, !add(size, -1)), string name = "ttmp["#index#":"#index1#"]"> : - SIRegisterWithSubRegs { + RegisterWithSubRegs { let HWEncoding = subRegs[0].HWEncoding; let SubRegIndices = indices; } @@ -448,196 +350,80 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10, TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>; +class RegisterTypes reg_types> { + list types = reg_types; +} + +def Reg16Types : RegisterTypes<[i16, f16]>; +def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>; + + // VGPR 32-bit registers // i16/f16 only on VI+ -def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "VGPR%u", 0, 255)), Reg32> { +def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, + (add (sequence "VGPR%u", 0, 255))> { let AllocationPriority = 1; let Size = 32; } // VGPR 64-bit registers -def VGPR_64 : RegisterTuples.ret, - [(add (trunc VGPR_32, 255)), - (add (shl VGPR_32, 1))]>; +def VGPR_64 : SIRegisterTuples.ret, VGPR_32, 255, 1, 2, "v">; // VGPR 96-bit registers -def VGPR_96 : RegisterTuples.ret, - [(add (trunc VGPR_32, 254)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2))]>; +def VGPR_96 : SIRegisterTuples.ret, VGPR_32, 255, 1, 3, "v">; // VGPR 128-bit registers -def VGPR_128 : RegisterTuples.ret, - [(add (trunc VGPR_32, 253)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3))]>; +def VGPR_128 : SIRegisterTuples.ret, VGPR_32, 255, 1, 4, "v">; // VGPR 160-bit registers -def VGPR_160 : RegisterTuples.ret, - [(add (trunc VGPR_32, 252)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4))]>; +def VGPR_160 : SIRegisterTuples.ret, VGPR_32, 255, 1, 5, "v">; // VGPR 256-bit registers -def VGPR_256 : RegisterTuples.ret, - [(add (trunc VGPR_32, 249)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7))]>; +def VGPR_256 : SIRegisterTuples.ret, VGPR_32, 255, 1, 8, "v">; // VGPR 512-bit registers -def VGPR_512 : RegisterTuples.ret, - [(add (trunc VGPR_32, 241)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7)), - (add (shl VGPR_32, 8)), - (add (shl VGPR_32, 9)), - (add (shl VGPR_32, 10)), - (add (shl VGPR_32, 11)), - (add (shl VGPR_32, 12)), - (add (shl VGPR_32, 13)), - (add (shl VGPR_32, 14)), - (add (shl VGPR_32, 15))]>; +def VGPR_512 : SIRegisterTuples.ret, VGPR_32, 255, 1, 16, "v">; // VGPR 1024-bit registers -def VGPR_1024 : RegisterTuples.ret, - [(add (trunc VGPR_32, 225)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7)), - (add (shl VGPR_32, 8)), - (add (shl VGPR_32, 9)), - (add (shl VGPR_32, 10)), - (add (shl VGPR_32, 11)), - (add (shl VGPR_32, 12)), - (add (shl VGPR_32, 13)), - (add (shl VGPR_32, 14)), - (add (shl VGPR_32, 15)), - (add (shl VGPR_32, 16)), - (add (shl VGPR_32, 17)), - (add (shl VGPR_32, 18)), - (add (shl VGPR_32, 19)), - (add (shl VGPR_32, 20)), - (add (shl VGPR_32, 21)), - (add (shl VGPR_32, 22)), - (add (shl VGPR_32, 23)), - (add (shl VGPR_32, 24)), - (add (shl VGPR_32, 25)), - (add (shl VGPR_32, 26)), - (add (shl VGPR_32, 27)), - (add (shl VGPR_32, 28)), - (add (shl VGPR_32, 29)), - (add (shl VGPR_32, 30)), - (add (shl VGPR_32, 31))]>; +def VGPR_1024 : SIRegisterTuples.ret, VGPR_32, 255, 1, 32, "v">; // AccVGPR 32-bit registers def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "AGPR%u", 0, 255)), Reg32> { + (add (sequence "AGPR%u", 0, 255))> { let AllocationPriority = 1; let Size = 32; } // AGPR 64-bit registers -def AGPR_64 : RegisterTuples.ret, - [(add (trunc AGPR_32, 255)), - (add (shl AGPR_32, 1))]>; +def AGPR_64 : SIRegisterTuples.ret, AGPR_32, 255, 1, 2, "a">; // AGPR 128-bit registers -def AGPR_128 : RegisterTuples.ret, - [(add (trunc AGPR_32, 253)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3))]>; +def AGPR_128 : SIRegisterTuples.ret, AGPR_32, 255, 1, 4, "a">; // AGPR 512-bit registers -def AGPR_512 : RegisterTuples.ret, - [(add (trunc AGPR_32, 241)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3)), - (add (shl AGPR_32, 4)), - (add (shl AGPR_32, 5)), - (add (shl AGPR_32, 6)), - (add (shl AGPR_32, 7)), - (add (shl AGPR_32, 8)), - (add (shl AGPR_32, 9)), - (add (shl AGPR_32, 10)), - (add (shl AGPR_32, 11)), - (add (shl AGPR_32, 12)), - (add (shl AGPR_32, 13)), - (add (shl AGPR_32, 14)), - (add (shl AGPR_32, 15))]>; +def AGPR_512 : SIRegisterTuples.ret, AGPR_32, 255, 1, 16, "a">; // AGPR 1024-bit registers -def AGPR_1024 : RegisterTuples.ret, - [(add (trunc AGPR_32, 225)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3)), - (add (shl AGPR_32, 4)), - (add (shl AGPR_32, 5)), - (add (shl AGPR_32, 6)), - (add (shl AGPR_32, 7)), - (add (shl AGPR_32, 8)), - (add (shl AGPR_32, 9)), - (add (shl AGPR_32, 10)), - (add (shl AGPR_32, 11)), - (add (shl AGPR_32, 12)), - (add (shl AGPR_32, 13)), - (add (shl AGPR_32, 14)), - (add (shl AGPR_32, 15)), - (add (shl AGPR_32, 16)), - (add (shl AGPR_32, 17)), - (add (shl AGPR_32, 18)), - (add (shl AGPR_32, 19)), - (add (shl AGPR_32, 20)), - (add (shl AGPR_32, 21)), - (add (shl AGPR_32, 22)), - (add (shl AGPR_32, 23)), - (add (shl AGPR_32, 24)), - (add (shl AGPR_32, 25)), - (add (shl AGPR_32, 26)), - (add (shl AGPR_32, 27)), - (add (shl AGPR_32, 28)), - (add (shl AGPR_32, 29)), - (add (shl AGPR_32, 30)), - (add (shl AGPR_32, 31))]>; +def AGPR_1024 : SIRegisterTuples.ret, AGPR_32, 255, 1, 32, "a">; //===----------------------------------------------------------------------===// // Register classes used as source and destination //===----------------------------------------------------------------------===// def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG), Reg32> { + (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> { let isAllocatable = 0; let CopyCost = -1; } def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32, - (add PRIVATE_RSRC_REG), Reg128> { + (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; } def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add LDS_DIRECT), Reg32> { + (add LDS_DIRECT)> { let isAllocatable = 0; let CopyCost = -1; } @@ -648,41 +434,40 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1 (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, - SRC_VCCZ, SRC_EXECZ, SRC_SCC), Reg32> { + SRC_VCCZ, SRC_EXECZ, SRC_SCC)> { let AllocationPriority = 10; } def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS), Reg32> { + (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { let AllocationPriority = 10; } def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI), Reg32> { + (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 10; } // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI), Reg32> { + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { let AllocationPriority = 10; } def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS), - Reg32> { + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> { let isAllocatable = 0; } def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, - (add SGPR_64Regs), Reg64> { + (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 11; } // CCR (call clobbered registers) SGPR 64-bit registers def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc SGPR_64, 16)), Reg64> { + (add (trunc SGPR_64, 16))> { let CopyCost = SGPR_64.CopyCost; let AllocationPriority = SGPR_64.AllocationPriority; } @@ -693,13 +478,13 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, } def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> { + (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 13; } def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SReg_64_XEXEC, EXEC), Reg64> { + (add SReg_64_XEXEC, EXEC)> { let CopyCost = 1; let AllocationPriority = 13; } @@ -722,17 +507,17 @@ let CopyCost = 2 in { // There are no 3-component scalar instructions, but this is needed // for symmetry with VGPRs. def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, - (add SGPR_96Regs), Reg96> { + (add SGPR_96Regs)> { let AllocationPriority = 14; } def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, - (add SGPR_96), Reg96> { + (add SGPR_96)> { let AllocationPriority = 14; } def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, - (add SGPR_128Regs), Reg128> { + (add SGPR_128Regs)> { let AllocationPriority = 15; } @@ -742,8 +527,9 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, } def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, - (add SGPR_128, TTMP_128), Reg128> { + (add SGPR_128, TTMP_128)> { let AllocationPriority = 15; + let isAllocatable = 0; } } // End CopyCost = 2 @@ -751,17 +537,16 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, // There are no 5-component scalar instructions, but this is needed // for symmetry with VGPRs. def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, - (add SGPR_160Regs), Reg160> { + (add SGPR_160Regs)> { let AllocationPriority = 16; } def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, - (add SGPR_160), Reg160> { + (add SGPR_160)> { let AllocationPriority = 16; } -def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs), - Reg256> { +def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { let AllocationPriority = 17; } @@ -770,14 +555,14 @@ def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> { } def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, - (add SGPR_256, TTMP_256), Reg256> { + (add SGPR_256, TTMP_256)> { // Requires 4 s_mov_b64 to copy let CopyCost = 4; let AllocationPriority = 17; } def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add SGPR_512Regs), Reg512> { + (add SGPR_512Regs)> { let AllocationPriority = 18; } @@ -787,31 +572,31 @@ def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, } def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add SGPR_512, TTMP_512), Reg512> { + (add SGPR_512, TTMP_512)> { // Requires 8 s_mov_b64 to copy let CopyCost = 8; let AllocationPriority = 18; } def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add VGPR_32, LDS_DIRECT_CLASS), Reg32> { + (add VGPR_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; } def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add SGPR_1024Regs), Reg1024> { + (add SGPR_1024Regs)> { let AllocationPriority = 19; } def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add SGPR_1024), Reg1024> { + (add SGPR_1024)> { let CopyCost = 16; let AllocationPriority = 19; } // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, - (add VGPR_64), Reg64> { +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4], 32, + (add VGPR_64)> { let Size = 64; // Requires 2 v_mov_b32 to copy @@ -819,7 +604,7 @@ def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32 let AllocationPriority = 2; } -def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> { +def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> { let Size = 96; // Requires 3 v_mov_b32 to copy @@ -828,7 +613,7 @@ def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> } def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, - (add VGPR_128), Reg128> { + (add VGPR_128)> { let Size = 128; // Requires 4 v_mov_b32 to copy @@ -837,7 +622,7 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, } def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, - (add VGPR_160), Reg160> { + (add VGPR_160)> { let Size = 160; // Requires 5 v_mov_b32 to copy @@ -846,28 +631,28 @@ def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, } def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, - (add VGPR_256), Reg256> { + (add VGPR_256)> { let Size = 256; let CopyCost = 8; let AllocationPriority = 6; } def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add VGPR_512), Reg512> { + (add VGPR_512)> { let Size = 512; let CopyCost = 16; let AllocationPriority = 7; } def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add VGPR_1024), Reg1024> { + (add VGPR_1024)> { let Size = 1024; let CopyCost = 32; let AllocationPriority = 8; } def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, - (add AGPR_64), Reg64> { + (add AGPR_64)> { let Size = 64; let CopyCost = 5; @@ -875,7 +660,7 @@ def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32 } def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, - (add AGPR_128), Reg128> { + (add AGPR_128)> { let Size = 128; // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr @@ -884,40 +669,39 @@ def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, } def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add AGPR_512), Reg512> { + (add AGPR_512)> { let Size = 512; let CopyCost = 33; let AllocationPriority = 7; } def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add AGPR_1024), Reg1024> { + (add AGPR_1024)> { let Size = 1024; let CopyCost = 65; let AllocationPriority = 8; } -def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32), Reg32> { - let Size = 32; +def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { + let Size = 1; } def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add VGPR_32, SReg_32, LDS_DIRECT_CLASS), Reg32> { + (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; } -def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64), - Reg64> { +def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> { let isAllocatable = 0; } def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add AGPR_32, VGPR_32), Reg32> { + (add AGPR_32, VGPR_32)> { let isAllocatable = 0; } def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32, - (add AReg_64, VReg_64), Reg64> { + (add AReg_64, VReg_64)> { let isAllocatable = 0; } diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 7ee178149c7a..8afca2cdc325 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -77,8 +77,8 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, // Try to fold Src0 MachineOperand &Src0 = MI.getOperand(Src0Idx); if (Src0.isReg()) { - unsigned Reg = Src0.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) { + Register Reg = Src0.getReg(); + if (Register::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) { MachineInstr *Def = MRI.getUniqueVRegDef(Reg); if (Def && Def->isMoveImmediate()) { MachineOperand &MovSrc = Def->getOperand(1); @@ -360,8 +360,7 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST, } if (NewImm != 0) { - if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) && - SrcReg->isReg()) { + if (Register::isVirtualRegister(Dest->getReg()) && SrcReg->isReg()) { MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg()); MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg()); return true; @@ -394,12 +393,11 @@ static bool instAccessReg(iterator_range &&R, if (!MO.isReg()) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) && - TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(Reg) && + Register::isPhysicalRegister(MO.getReg())) { if (TRI.regsOverlap(Reg, MO.getReg())) return true; - } else if (MO.getReg() == Reg && - TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (MO.getReg() == Reg && Register::isVirtualRegister(Reg)) { LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) & TRI.getSubRegIndexLaneMask(MO.getSubReg()); if (Overlap.any()) @@ -425,7 +423,7 @@ static TargetInstrInfo::RegSubRegPair getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) { if (TRI.getRegSizeInBits(Reg, MRI) != 32) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I)); } else { LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub); @@ -459,13 +457,13 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 || MovT.getOpcode() == AMDGPU::COPY); - unsigned T = MovT.getOperand(0).getReg(); + Register T = MovT.getOperand(0).getReg(); unsigned Tsub = MovT.getOperand(0).getSubReg(); MachineOperand &Xop = MovT.getOperand(1); if (!Xop.isReg()) return nullptr; - unsigned X = Xop.getReg(); + Register X = Xop.getReg(); unsigned Xsub = Xop.getSubReg(); unsigned Size = TII->getOpSize(MovT, 0) / 4; @@ -484,7 +482,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, MovY.getOperand(1).getSubReg() != Tsub) continue; - unsigned Y = MovY.getOperand(0).getReg(); + Register Y = MovY.getOperand(0).getReg(); unsigned Ysub = MovY.getOperand(0).getSubReg(); if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent()) @@ -579,7 +577,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // XXX - not exactly a check for post-regalloc run. MachineOperand &Src = MI.getOperand(1); if (Src.isImm() && - TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { + Register::isPhysicalRegister(MI.getOperand(0).getReg())) { int32_t ReverseImm; if (isReverseInlineImm(TII, Src, ReverseImm)) { MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); @@ -643,8 +641,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // FIXME: This could work better if hints worked with subregisters. If // we have a vector add of a constant, we usually don't get the correct // allocation due to the subregister usage. - if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) && - Src0->isReg()) { + if (Register::isVirtualRegister(Dest->getReg()) && Src0->isReg()) { MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg()); MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg()); continue; @@ -672,8 +669,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { const MachineOperand &Dst = MI.getOperand(0); MachineOperand &Src = MI.getOperand(1); - if (Src.isImm() && - TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) { + if (Src.isImm() && Register::isPhysicalRegister(Dst.getReg())) { int32_t ReverseImm; if (isKImmOperand(TII, Src)) MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); @@ -721,8 +717,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); if (TII->isVOPC(Op32)) { - unsigned DstReg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register DstReg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(DstReg)) { // VOPC instructions can only write to the VCC register. We can't // force them to use VCC here, because this is only one register and // cannot deal with sequences which would require multiple copies of @@ -745,8 +741,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (!Src2->isReg()) continue; - unsigned SReg = Src2->getReg(); - if (TargetRegisterInfo::isVirtualRegister(SReg)) { + Register SReg = Src2->getReg(); + if (Register::isVirtualRegister(SReg)) { MRI.setRegAllocationHint(SReg, 0, VCCReg); continue; } @@ -766,7 +762,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { bool Next = false; if (SDst->getReg() != VCCReg) { - if (TargetRegisterInfo::isVirtualRegister(SDst->getReg())) + if (Register::isVirtualRegister(SDst->getReg())) MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg); Next = true; } @@ -774,7 +770,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // All of the instructions with carry outs also have an SGPR input in // src2. if (Src2 && Src2->getReg() != VCCReg) { - if (TargetRegisterInfo::isVirtualRegister(Src2->getReg())) + if (Register::isVirtualRegister(Src2->getReg())) MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg); Next = true; } diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 4e07efff55d8..cb4cf68d709a 100644 --- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -273,12 +273,12 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag, if (!Use.isReg() || !Use.isUse()) continue; - unsigned Reg = Use.getReg(); + Register Reg = Use.getReg(); // Handle physical registers that we need to track; this is mostly relevant // for VCC, which can appear as the (implicit) input of a uniform branch, // e.g. when a loop counter is stored in a VGPR. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { if (Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO) continue; @@ -312,6 +312,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, char GlobalFlags = 0; bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs"); SmallVector SetInactiveInstrs; + SmallVector SoftWQMInstrs; // We need to visit the basic blocks in reverse post-order so that we visit // defs before uses, in particular so that we don't accidentally mark an @@ -340,6 +341,10 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, // correct, so we need it to be in WQM. Flags = StateWQM; LowerToCopyInstrs.push_back(&MI); + } else if (Opcode == AMDGPU::SOFT_WQM) { + LowerToCopyInstrs.push_back(&MI); + SoftWQMInstrs.push_back(&MI); + continue; } else if (Opcode == AMDGPU::WWM) { // The WWM intrinsic doesn't make the same guarantee, and plus it needs // to be executed in WQM or Exact so that its copy doesn't clobber @@ -356,8 +361,8 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, if (Inactive.isUndef()) { LowerToCopyInstrs.push_back(&MI); } else { - unsigned Reg = Inactive.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = Inactive.getReg(); + if (Register::isVirtualRegister(Reg)) { for (MachineInstr &DefMI : MRI->def_instructions(Reg)) markInstruction(DefMI, StateWWM, Worklist); } @@ -385,9 +390,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); - if (!TRI->isVirtualRegister(Reg) && + if (!Register::isVirtualRegister(Reg) && TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) { Flags = StateWQM; break; @@ -407,9 +412,12 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is // ever used anywhere in the function. This implements the corresponding // semantics of @llvm.amdgcn.set.inactive. + // Similarly for SOFT_WQM instructions, implementing @llvm.amdgcn.softwqm. if (GlobalFlags & StateWQM) { for (MachineInstr *MI : SetInactiveInstrs) markInstruction(*MI, StateWQM, Worklist); + for (MachineInstr *MI : SoftWQMInstrs) + markInstruction(*MI, StateWQM, Worklist); } return GlobalFlags; @@ -548,7 +556,7 @@ bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const { MachineBasicBlock::iterator SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before) { - unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); MachineInstr *Save = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg) @@ -832,7 +840,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) { for (MachineInstr *MI : LiveMaskQueries) { const DebugLoc &DL = MI->getDebugLoc(); - unsigned Dest = MI->getOperand(0).getReg(); + Register Dest = MI->getOperand(0).getReg(); MachineInstr *Copy = BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest) .addReg(LiveMaskReg); @@ -847,13 +855,12 @@ void SIWholeQuadMode::lowerCopyInstrs() { for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--) MI->RemoveOperand(i); - const unsigned Reg = MI->getOperand(0).getReg(); + const Register Reg = MI->getOperand(0).getReg(); if (TRI->isVGPR(*MRI, Reg)) { - const TargetRegisterClass *regClass = - TargetRegisterInfo::isVirtualRegister(Reg) - ? MRI->getRegClass(Reg) - : TRI->getPhysRegClass(Reg); + const TargetRegisterClass *regClass = Register::isVirtualRegister(Reg) + ? MRI->getRegClass(Reg) + : TRI->getPhysRegClass(Reg); const unsigned MovOp = TII->getMovOpcode(regClass); MI->setDesc(TII->get(MovOp)); @@ -885,7 +892,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; if (!(GlobalFlags & StateWQM)) { lowerLiveMaskQueries(Exec); - if (!(GlobalFlags & StateWWM)) + if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty()) return !LiveMaskQueries.empty(); } else { // Store a copy of the original live mask when required diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 1b410b6b5912..1a74ebbf8165 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -793,9 +793,18 @@ multiclass SMLoad_Pattern { // selector to prefer those. let AddedComplexity = 100 in { -defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; +foreach vt = Reg32Types.types in { +defm : SMRD_Pattern <"S_LOAD_DWORD", vt>; +} + +foreach vt = SReg_64.RegTypes in { +defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>; +} + +foreach vt = SReg_128.RegTypes in { +defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>; +} + defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index dfafdccc05a3..d31a49f428ee 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -181,7 +181,9 @@ def S_BCNT0_I32_B64 : SOP1_32_64 <"s_bcnt0_i32_b64">; def S_BCNT1_I32_B32 : SOP1_32 <"s_bcnt1_i32_b32", [(set i32:$sdst, (ctpop i32:$src0))] >; -def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64">; +def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64", + [(set i32:$sdst, (ctpop i64:$src0))] +>; } // End Defs = [SCC] def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">; @@ -417,16 +419,16 @@ def S_SUBB_U32 : SOP2_32 <"s_subb_u32", let isCommutable = 1 in { def S_MIN_I32 : SOP2_32 <"s_min_i32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set i32:$sdst, (smin i32:$src0, i32:$src1))] >; def S_MIN_U32 : SOP2_32 <"s_min_u32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set i32:$sdst, (umin i32:$src0, i32:$src1))] >; def S_MAX_I32 : SOP2_32 <"s_max_i32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set i32:$sdst, (smax i32:$src0, i32:$src1))] >; def S_MAX_U32 : SOP2_32 <"s_max_u32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set i32:$sdst, (umax i32:$src0, i32:$src1))] >; } // End isCommutable = 1 } // End Defs = [SCC] @@ -853,13 +855,13 @@ class SOPC_Base op, RegisterOperand rc0, RegisterOperand rc1, let Defs = [SCC]; } class SOPC_Helper op, RegisterOperand rc, ValueType vt, - string opName, PatLeaf cond> : SOPC_Base < + string opName, SDPatternOperator cond> : SOPC_Base < op, rc, rc, opName, [(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > { } class SOPC_CMP_32 op, string opName, - PatLeaf cond = COND_NULL, string revOp = opName> + SDPatternOperator cond = COND_NULL, string revOp = opName> : SOPC_Helper, Commutable_REV, SOPKInstTable<0, opName> { @@ -868,7 +870,7 @@ class SOPC_CMP_32 op, string opName, } class SOPC_CMP_64 op, string opName, - PatLeaf cond = COND_NULL, string revOp = opName> + SDPatternOperator cond = COND_NULL, string revOp = opName> : SOPC_Helper, Commutable_REV { let isCompare = 1; @@ -1076,8 +1078,6 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier", [(int_amdgcn_s_barrier)]> { let SchedRW = [WriteBarrier]; let simm16 = 0; - let mayLoad = 1; - let mayStore = 1; let isConvergent = 1; } @@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> { let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16", - [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>; + [(int_amdgcn_s_waitcnt timm:$simm16)]>; def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">; def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; @@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; // maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the // maximum really 15 on VI? def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16), - "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> { + "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; @@ -1110,12 +1110,11 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">; let Uses = [EXEC, M0] in { // FIXME: Should this be mayLoad+mayStore? def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", - [(AMDGPUsendmsg (i32 imm:$simm16))] ->; + [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>; def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", - [(AMDGPUsendmsghalt (i32 imm:$simm16))] ->; + [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>; + } // End Uses = [EXEC, M0] def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> { @@ -1126,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; } def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16", - [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> { + [(int_amdgcn_s_incperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; } def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16", - [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> { + [(int_amdgcn_s_decperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; @@ -1169,7 +1168,10 @@ let SubtargetPredicate = isGFX10Plus in { def S_ROUND_MODE : SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">; def S_DENORM_MODE : - SOPP<0x025, (ins s16imm:$simm16), "s_denorm_mode $simm16">; + SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16", + [(SIdenorm_mode (i32 timm:$simm16))]> { + let hasSideEffects = 1; + } def S_TTRACEDATA_IMM : SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">; } // End SubtargetPredicate = isGFX10Plus @@ -1178,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in { // S_GETREG_B32 Intrinsic Pattern. //===----------------------------------------------------------------------===// def : GCNPat < - (int_amdgcn_s_getreg imm:$simm16), + (int_amdgcn_s_getreg timm:$simm16), (S_GETREG_B32 (as_i16imm $simm16)) >; diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index e90f40e6abea..afb2fd987afd 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -131,29 +131,70 @@ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { struct MUBUFInfo { uint16_t Opcode; uint16_t BaseOpcode; - uint8_t dwords; + uint8_t elements; bool has_vaddr; bool has_srsrc; bool has_soffset; }; +struct MTBUFInfo { + uint16_t Opcode; + uint16_t BaseOpcode; + uint8_t elements; + bool has_vaddr; + bool has_srsrc; + bool has_soffset; +}; + +#define GET_MTBUFInfoTable_DECL +#define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL #define GET_MUBUFInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" +int getMTBUFBaseOpcode(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); + return Info ? Info->BaseOpcode : -1; +} + +int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { + const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); + return Info ? Info->Opcode : -1; +} + +int getMTBUFElements(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->elements : 0; +} + +bool getMTBUFHasVAddr(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_vaddr : false; +} + +bool getMTBUFHasSrsrc(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_srsrc : false; +} + +bool getMTBUFHasSoffset(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_soffset : false; +} + int getMUBUFBaseOpcode(unsigned Opc) { const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); return Info ? Info->BaseOpcode : -1; } -int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) { - const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords); +int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { + const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); return Info ? Info->Opcode : -1; } -int getMUBUFDwords(unsigned Opc) { +int getMUBUFElements(unsigned Opc) { const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); - return Info ? Info->dwords : 0; + return Info ? Info->elements : 0; } bool getMUBUFHasVAddr(unsigned Opc) { @@ -241,7 +282,7 @@ unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, } unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { - return getMaxWavesPerEU() * getEUsPerCU(STI); + return getMaxWavesPerEU(STI) * getEUsPerCU(STI); } unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, @@ -253,9 +294,11 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } -unsigned getMaxWavesPerEU() { +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { // FIXME: Need to take scratch memory into account. - return 10; + if (!isGFX10(*STI)) + return 10; + return 20; } unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, @@ -317,7 +360,7 @@ unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { if (Version.Major >= 10) return 0; - if (WavesPerEU >= getMaxWavesPerEU()) + if (WavesPerEU >= getMaxWavesPerEU(STI)) return 0; unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); @@ -394,17 +437,19 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, } unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { - return 256; + if (!isGFX10(*STI)) + return 256; + return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512; } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { - return getTotalNumVGPRs(STI); + return 256; } unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); - if (WavesPerEU >= getMaxWavesPerEU()) + if (WavesPerEU >= getMaxWavesPerEU(STI)) return 0; unsigned MinNumVGPRs = alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), @@ -510,7 +555,7 @@ bool isReadOnlySegment(const GlobalValue *GV) { } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() != Triple::AMDHSA; + return TT.getOS() == Triple::AMDPAL; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 209ef7eef749..f78dadd447ff 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -94,7 +94,7 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per execution unit for given subtarget \p /// STI without any kind of limitation. -unsigned getMaxWavesPerEU(); +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per execution unit for given subtarget \p /// STI and limited by given \p FlatWorkGroupSize. @@ -263,14 +263,32 @@ struct MIMGInfo { LLVM_READONLY const MIMGInfo *getMIMGInfo(unsigned Opc); +LLVM_READONLY +int getMTBUFBaseOpcode(unsigned Opc); + +LLVM_READONLY +int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); + +LLVM_READONLY +int getMTBUFElements(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasVAddr(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasSrsrc(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasSoffset(unsigned Opc); + LLVM_READONLY int getMUBUFBaseOpcode(unsigned Opc); LLVM_READONLY -int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords); +int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); LLVM_READONLY -int getMUBUFDwords(unsigned Opc); +int getMUBUFElements(unsigned Opc); LLVM_READONLY bool getMUBUFHasVAddr(unsigned Opc); diff --git a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index db20d5ccf5f9..207e4232e829 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -21,6 +21,8 @@ #include "SIDefines.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/EndianStream.h" diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index 6bc416ed7d4b..f1cdc3097dc0 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -104,9 +104,21 @@ multiclass VOP1Inst { def _e32 : VOP1_Pseudo ; def _e64 : VOP3_Pseudo .ret>; - def _sdwa : VOP1_SDWA_Pseudo ; + + foreach _ = BoolToList.ret in + def _sdwa : VOP1_SDWA_Pseudo ; + foreach _ = BoolToList.ret in def _dpp : VOP1_DPP_Pseudo ; + + def : MnemonicAlias, LetDummies; + def : MnemonicAlias, LetDummies; + + foreach _ = BoolToList.ret in + def : MnemonicAlias, LetDummies; + + foreach _ = BoolToList.ret in + def : MnemonicAlias, LetDummies; } // Special profile for instructions which have clamp @@ -227,10 +239,10 @@ defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; } // End SchedRW = [WriteQuarterRate32] defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; -defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>; -defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; +defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; +defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; -defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>; +defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; let SchedRW = [WriteDoubleAdd] in { defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; @@ -434,7 +446,7 @@ let SubtargetPredicate = isGFX10Plus in { // Target-specific instruction encodings. //===----------------------------------------------------------------------===// -class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : +class VOP1_DPP op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : VOP_DPP { let hasSideEffects = ps.hasSideEffects; let Defs = ps.Defs; @@ -448,8 +460,9 @@ class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = let Inst{31-25} = 0x3f; } -class VOP1_DPP16 op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : - VOP1_DPP { +class VOP1_DPP16 op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : + VOP1_DPP, + SIMCInstr { let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); let SubtargetPredicate = HasDPP16; } @@ -492,6 +505,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; } multiclass VOP1_Real_sdwa_gfx10 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(NAME#"_sdwa")>, VOP1_SDWA9Ae(NAME#"_sdwa").Pfl> { @@ -499,11 +513,13 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP1_Real_dpp_gfx10 op> { - def _dpp_gfx10 : VOP1_DPP16(NAME#"_e32")> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx10 : VOP1_DPP16(NAME#"_dpp")> { let DecoderNamespace = "SDWA10"; } } multiclass VOP1_Real_dpp8_gfx10 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP1_DPP8(NAME#"_e32")> { let DecoderNamespace = "DPP8"; } @@ -704,10 +720,12 @@ multiclass VOP1_Real_e32e64_vi op> { multiclass VOP1_Real_vi op> { defm NAME : VOP1_Real_e32e64_vi ; + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA>.ret in def _sdwa_vi : VOP_SDWA_Real (NAME#"_sdwa")>, VOP1_SDWAe (NAME#"_sdwa").Pfl>; + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOP1_SDWA9Ae (NAME#"_sdwa").Pfl>; @@ -831,25 +849,25 @@ def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo; def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo; def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo; -let OtherPredicates = [isGFX8GFX9] in { +let OtherPredicates = [isGFX8Plus] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl)), + (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) >; def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, - imm:$bank_mask, imm:$bound_ctrl)), + (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) >; -} // End OtherPredicates = [isGFX8GFX9] +} // End OtherPredicates = [isGFX8Plus] let OtherPredicates = [isGFX8Plus] in { def : GCNPat< @@ -885,6 +903,7 @@ multiclass VOP1_Real_gfx9 op> { defm NAME : VOP1_Real_e32e64_vi ; } + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOP1_SDWA9Ae (NAME#"_sdwa").Pfl>; @@ -904,23 +923,7 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let OtherPredicates = [isGFX10Plus] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)), + (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0)) >; - -def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl)), - (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl), - (as_i32imm $row_mask), (as_i32imm $bank_mask), - (as_i1imm $bound_ctrl), (i32 0)) ->; - -def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, - imm:$bank_mask, imm:$bound_ctrl)), - (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl), - (as_i32imm $row_mask), (as_i32imm $bank_mask), - (as_i1imm $bound_ctrl), (i32 0)) ->; } // End OtherPredicates = [isGFX10Plus] diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 1b30cd2ed516..1ab0fc1ab58d 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -147,7 +147,8 @@ multiclass VOP2Inst_sdwa { let renamedInGFX9 = GFX9Renamed in { - def _sdwa : VOP2_SDWA_Pseudo ; + foreach _ = BoolToList.ret in + def _sdwa : VOP2_SDWA_Pseudo ; } // End renamedInGFX9 = GFX9Renamed } @@ -179,9 +180,10 @@ multiclass VOP2bInst { - let AsmMatchConverter = "cvtSdwaVOP2b"; - } + foreach _ = BoolToList.ret in + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } foreach _ = BoolToList.ret in def _dpp : VOP2_DPP_Pseudo ; } @@ -220,9 +222,10 @@ multiclass VOP2eInst , Commutable_REV; - def _sdwa : VOP2_SDWA_Pseudo { - let AsmMatchConverter = "cvtSdwaVOP2b"; - } + foreach _ = BoolToList.ret in + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2e"; + } foreach _ = BoolToList.ret in def _dpp : VOP2_DPP_Pseudo ; @@ -251,7 +254,9 @@ multiclass VOP2eInstAliases { class VOP_MADAK : VOPProfile <[vt, vt, vt, vt]> { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); - field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm); + field dag Ins32 = !if(!eq(vt.Size, 32), + (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm), + (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)); field bit HasExt = 0; // Hack to stop printing _e64 @@ -519,7 +524,7 @@ def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, } // End isConvergent = 1 defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT>; -defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT>; +defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT, add_ctpop>; defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_lo>; defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_hi>; defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT, AMDGPUldexp>; @@ -539,9 +544,9 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfma let SubtargetPredicate = isGFX6GFX7GFX10 in { let isCommutable = 1 in { defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; -defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>; -defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>; -defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; +defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32, srl>; +defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32, sra>; +defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32, shl>; } // End isCommutable = 1 } // End SubtargetPredicate = isGFX6GFX7GFX10 @@ -606,9 +611,9 @@ def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; } // End FPDPRounding = 1 -defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; -defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>; -defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>; +defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, lshl_rev>; +defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, lshr_rev>; +defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>; let isCommutable = 1 in { let FPDPRounding = 1 in { @@ -618,16 +623,16 @@ defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; } // End FPDPRounding = 1 -defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>; -defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>; +defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>; +defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>; defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; -defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>; +defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; -defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>; -defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>; -defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>; -defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>; +defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; +defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; +defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; +defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1 in { @@ -653,16 +658,17 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1, - isCommutable = 1 in { + isCommutable = 1, + IsDOT = 1 in { let SubtargetPredicate = HasDot5Insts in - defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; + defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; let SubtargetPredicate = HasDot6Insts in - defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; + defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; let SubtargetPredicate = HasDot4Insts in - defm V_DOT2C_I32_I16 : VOP2Inst_e32<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; + defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; let SubtargetPredicate = HasDot3Insts in - defm V_DOT8C_I32_I4 : VOP2Inst_e32<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; + defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; } let AddedComplexity = 30 in { @@ -719,50 +725,17 @@ defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; // Note: 16-bit instructions produce a 0 result in the high 16-bits // on GFX8 and GFX9 and preserve high 16 bits on GFX10+ -def ClearHI16 : OutPatFrag<(ops node:$op), - (V_AND_B32_e64 $op, (V_MOV_B32_e32 (i32 0xffff)))>; - -multiclass Arithmetic_i16_Pats { - -def : GCNPat< - (op i16:$src0, i16:$src1), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)) ->; - -def : GCNPat< - (i32 (zext (op i16:$src0, i16:$src1))), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)) ->; - -def : GCNPat< - (i64 (zext (op i16:$src0, i16:$src1))), - (REG_SEQUENCE VReg_64, - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)), - sub0, - (V_MOV_B32_e32 (i32 0)), sub1) ->; -} - -multiclass Bits_OpsRev_i16_Pats { - -def : GCNPat< - (op i16:$src0, i16:$src1), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)) ->; +multiclass Arithmetic_i16_0Hi_Pats { def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)) + (inst $src0, $src1) >; - def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)), - sub0, + (inst $src0, $src1), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; } @@ -774,53 +747,36 @@ class ZExt_i16_i1_Pat : GCNPat < $src) >; -let Predicates = [Has16BitInsts] in { - -let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -} - -let Predicates = [Has16BitInsts, isGFX10Plus] in { -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -} - +foreach vt = [i16, v2i16] in { def : GCNPat < - (and i16:$src0, i16:$src1), - (V_AND_B32_e64 $src0, $src1) + (and vt:$src0, vt:$src1), + (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) >; def : GCNPat < - (or i16:$src0, i16:$src1), - (V_OR_B32_e64 $src0, $src1) + (or vt:$src0, vt:$src1), + (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) >; def : GCNPat < - (xor i16:$src0, i16:$src1), - (V_XOR_B32_e64 $src0, $src1) + (xor vt:$src0, vt:$src1), + (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) >; - -let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { -defm : Bits_OpsRev_i16_Pats; -defm : Bits_OpsRev_i16_Pats; -defm : Bits_OpsRev_i16_Pats; } -let Predicates = [Has16BitInsts, isGFX10Plus] in { -defm : Bits_OpsRev_i16_Pats; -defm : Bits_OpsRev_i16_Pats; -defm : Bits_OpsRev_i16_Pats; +let Predicates = [Has16BitInsts] in { + +let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; } def : ZExt_i16_i1_Pat; @@ -847,7 +803,7 @@ def : GCNPat< // Target-specific instruction encodings. //===----------------------------------------------------------------------===// -class VOP2_DPP op, VOP2_Pseudo ps, +class VOP2_DPP op, VOP2_DPP_Pseudo ps, string opName = ps.OpName, VOPProfile p = ps.Pfl, bit IsDPP16 = 0> : VOP_DPP { @@ -865,13 +821,18 @@ class VOP2_DPP op, VOP2_Pseudo ps, let Inst{31} = 0x0; } -class VOP2_DPP16 op, VOP2_Pseudo ps, +class Base_VOP2_DPP16 op, VOP2_DPP_Pseudo ps, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP2_DPP { let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); let SubtargetPredicate = HasDPP16; } +class VOP2_DPP16 op, VOP2_DPP_Pseudo ps, + string opName = ps.OpName, VOPProfile p = ps.Pfl> : + Base_VOP2_DPP16, + SIMCInstr ; + class VOP2_DPP8 op, VOP2_Pseudo ps, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP_DPP8 { @@ -924,6 +885,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; } multiclass VOP2_Real_sdwa_gfx10 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(NAME#"_sdwa")>, VOP2_SDWA9Ae(NAME#"_sdwa").Pfl> { @@ -931,11 +893,13 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP2_Real_dpp_gfx10 op> { - def _dpp_gfx10 : VOP2_DPP16(NAME#"_e32")> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx10 : VOP2_DPP16(NAME#"_dpp")> { let DecoderNamespace = "SDWA10"; } } multiclass VOP2_Real_dpp8_gfx10 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(NAME#"_e32")> { let DecoderNamespace = "DPP8"; } @@ -964,6 +928,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let DecoderNamespace = "SDWA10" in { multiclass VOP2_Real_sdwa_gfx10_with_name op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(opName#"_sdwa")>, VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { @@ -973,13 +938,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } multiclass VOP2_Real_dpp_gfx10_with_name op, string opName, string asmName> { - def _dpp_gfx10 : VOP2_DPP16(opName#"_e32")> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx10 : VOP2_DPP16(opName#"_dpp")> { VOP2_Pseudo ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP16; } } multiclass VOP2_Real_dpp8_gfx10_with_name op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32")> { VOP2_Pseudo ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP8; @@ -989,13 +956,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } // End DecoderNamespace = "SDWA10" //===------------------------------ VOP2be ------------------------------===// - multiclass VOP2be_Real_gfx10 op, string opName, string asmName> { + multiclass VOP2be_Real_e32_gfx10 op, string opName, string asmName> { def _e32_gfx10 : VOP2_Real(opName#"_e32"), SIEncodingFamily.GFX10>, VOP2e(opName#"_e32").Pfl> { VOP2_Pseudo Ps = !cast(opName#"_e32"); let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); } + } + multiclass VOP2be_Real_e64_gfx10 op, string opName, string asmName> { def _e64_gfx10 : VOP3_Real(opName#"_e64"), SIEncodingFamily.GFX10>, VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, @@ -1003,6 +972,9 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { VOP3_Pseudo Ps = !cast(opName#"_e64"); let AsmString = asmName # Ps.AsmOperands; } + } + multiclass VOP2be_Real_sdwa_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(opName#"_sdwa")>, VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { @@ -1010,64 +982,76 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); let DecoderNamespace = "SDWA10"; } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w32_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w64_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # Ps.AsmOperands; + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx10 : - VOP2_DPP16(opName#"_e32"), asmName> { + VOP2_DPP16(opName#"_dpp"), asmName> { string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; let AsmString = asmName # !subst(", vcc", "", AsmDPP); let DecoderNamespace = "SDWA10"; } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w32_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w64_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp8_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32"), asmName> { string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; let AsmString = asmName # !subst(", vcc", "", AsmDPP8); let DecoderNamespace = "DPP8"; } - - let WaveSizePredicate = isWave32 in { - def _sdwa_w32_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w32_gfx10 : - VOP2_DPP16(opName#"_e32"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); - let isAsmParserOnly = 1; - } - def _dpp8_w32_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave32 - - let WaveSizePredicate = isWave64 in { - def _sdwa_w64_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # Ps.AsmOperands; - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w64_gfx10 : - VOP2_DPP16(opName#"_e32"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # AsmDPP; - let isAsmParserOnly = 1; - } - def _dpp8_w64_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # AsmDPP8; - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave64 + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w32_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w64_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } } //===----------------------------- VOP3Only -----------------------------===// @@ -1088,8 +1072,19 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -multiclass Base_VOP2_Real_gfx10 op> : - VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10; +multiclass VOP2be_Real_gfx10 op, string opName, string asmName> : + VOP2be_Real_e32_gfx10, + VOP2be_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; + +multiclass VOP2e_Real_gfx10 op, string opName, string asmName> : + VOP2_Real_e32_gfx10, + VOP2_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; multiclass VOP2_Real_gfx10 op> : VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10, @@ -1103,7 +1098,6 @@ multiclass VOP2_Real_gfx10_with_name op, string opName, VOP2_Real_dpp_gfx10_with_name, VOP2_Real_dpp8_gfx10_with_name; -defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>; defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; @@ -1136,6 +1130,9 @@ defm V_SUB_CO_CI_U32 : defm V_SUBREV_CO_CI_U32 : VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; +defm V_CNDMASK_B32 : + VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; + // VOP3 only. defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; @@ -1322,12 +1319,14 @@ multiclass Base_VOP2_Real_e32e64_vi op> : } // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" multiclass VOP2_SDWA_Real op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA>.ret in def _sdwa_vi : VOP_SDWA_Real (NAME#"_sdwa")>, VOP2_SDWAe (NAME#"_sdwa").Pfl>; } multiclass VOP2_SDWA9_Real op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOP2_SDWA9Ae (NAME#"_sdwa").Pfl>; @@ -1350,12 +1349,13 @@ multiclass VOP2be_Real_e32e64_vi_only op, string OpName, string AsmName let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "GFX8"; } - def _sdwa_vi : - VOP_SDWA_Real (OpName#"_sdwa")>, - VOP2_SDWAe (OpName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); - let AsmString = AsmName # ps.AsmOperands; - } + foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtSDWA>.ret in + def _sdwa_vi : + VOP_SDWA_Real (OpName#"_sdwa")>, + VOP2_SDWAe (OpName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); + let AsmString = AsmName # ps.AsmOperands; + } foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_vi : VOP_DPP_Real(OpName#"_dpp"), SIEncodingFamily.VI>, @@ -1383,12 +1383,13 @@ multiclass VOP2be_Real_e32e64_gfx9 op, string OpName, string AsmName> { let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "GFX9"; } - def _sdwa_gfx9 : - VOP_SDWA9_Real (OpName#"_sdwa")>, - VOP2_SDWA9Ae (OpName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); - let AsmString = AsmName # ps.AsmOperands; - } + foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_gfx9 : + VOP_SDWA9_Real (OpName#"_sdwa")>, + VOP2_SDWA9Ae (OpName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); + let AsmString = AsmName # ps.AsmOperands; + } foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx9 : VOP_DPP_Real(OpName#"_dpp"), SIEncodingFamily.GFX9>, @@ -1410,10 +1411,11 @@ multiclass VOP2_Real_e32e64_gfx9 op> { VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl> { let DecoderNamespace = "GFX9"; } - def _sdwa_gfx9 : - VOP_SDWA9_Real (NAME#"_sdwa")>, - VOP2_SDWA9Ae (NAME#"_sdwa").Pfl> { - } + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOP2_SDWA9Ae (NAME#"_sdwa").Pfl> { + } foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx9 : VOP_DPP_Real(NAME#"_dpp"), SIEncodingFamily.GFX9>, @@ -1554,7 +1556,7 @@ defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; } // End SubtargetPredicate = HasDLInsts multiclass VOP2_Real_DOT_ACC_gfx9 op> : VOP2_Real_e32_vi { - def _dpp : VOP2_DPP(NAME#"_e32")>; + def _dpp_vi : VOP2_DPP(NAME#"_dpp")>; } multiclass VOP2_Real_DOT_ACC_gfx10 op> : diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 21dbef9240e1..605425972b1c 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -112,7 +112,7 @@ class getVOP3ClampPat { class getVOP3MAIPat { list ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, - imm:$cbsz, imm:$abid, imm:$blgp))]; + timm:$cbsz, timm:$abid, timm:$blgp))]; } class VOP3Inst : @@ -385,12 +385,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile>, shl>; -def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile>, srl>; -def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile>, sra>; +let SubtargetPredicate = isGFX6GFX7GFX10 in { +def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile, shl>; +def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile, srl>; +def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile, sra>; def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; -} // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] +} // End SubtargetPredicate = isGFX6GFX7GFX10 let SubtargetPredicate = isGFX8Plus in { def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile, lshl_rev>; @@ -399,21 +399,6 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile, as } // End SubtargetPredicate = isGFX8Plus } // End SchedRW = [Write64Bit] -let Predicates = [isGFX8Plus] in { -def : GCNPat < - (getDivergentFrag.ret i64:$x, i32:$y), - (V_LSHLREV_B64 $y, $x) ->; -def : AMDGPUPat < - (getDivergentFrag.ret i64:$x, i32:$y), - (V_LSHRREV_B64 $y, $x) ->; -def : AMDGPUPat < - (getDivergentFrag.ret i64:$x, i32:$y), - (V_ASHRREV_I64 $y, $x) ->; -} - let SchedRW = [Write32Bit] in { let SubtargetPredicate = isGFX8Plus in { @@ -468,13 +453,13 @@ let FPDPRounding = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; let Uses = [M0, EXEC] in { def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, - [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), + [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), (f32 VRegSrc_32:$src2), - (i32 imm:$src2_modifiers), - (i1 imm:$high), - (i1 imm:$clamp)))]>; + (i32 timm:$src2_modifiers), + (i1 timm:$high), + (i1 timm:$clamp)))]>; } // End Uses = [M0, EXEC] } // End FPDPRounding = 1 } // End renamedInGFX9 = 1 @@ -493,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1 let Uses = [M0, EXEC], FPDPRounding = 1 in { def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>, - [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), - (i1 imm:$high), - (i1 imm:$clamp), - (i32 imm:$omod)))]>; + [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), + (i1 timm:$high), + (i1 timm:$clamp), + (i32 timm:$omod)))]>; def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>, - [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), + [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), (f32 VRegSrc_32:$src2), - (i32 imm:$src2_modifiers), - (i1 imm:$high), - (i1 imm:$clamp), - (i32 imm:$omod)))]>; + (i32 timm:$src2_modifiers), + (i1 timm:$high), + (i1 timm:$clamp), + (i32 timm:$omod)))]>; } // End Uses = [M0, EXEC], FPDPRounding = 1 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 @@ -657,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in { } // End $vdst = $vdst_in, DisableEncoding $vdst_in def : GCNPat< - (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) >; def : GCNPat< - (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) >; } // End SubtargetPredicate = isGFX10Plus diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td index 55ee5f6577cf..0c13f39fec02 100644 --- a/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/lib/Target/AMDGPU/VOP3PInstructions.td @@ -261,6 +261,7 @@ class SDot2Pat : GCNPat < let SubtargetPredicate = !cast(Inst).SubtargetPredicate; } +let IsDOT = 1 in { let SubtargetPredicate = HasDot2Insts in { def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile>; @@ -277,6 +278,7 @@ def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile>; } // End SubtargetPredicate = HasDot1Insts +} // End let IsDOT = 1 multiclass DotPats { diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td index b3513e383d10..8ef0ec7b71f4 100644 --- a/lib/Target/AMDGPU/VOPCInstructions.td +++ b/lib/Target/AMDGPU/VOPCInstructions.td @@ -183,7 +183,7 @@ multiclass VOPCXInstAliases { } -class getVOPCPat64 : LetDummies { +class getVOPCPat64 : LetDummies { list ret = !if(P.HasModifiers, [(set i1:$sdst, (setcc (P.Src0VT @@ -202,7 +202,7 @@ class VCMPXNoSDstTable { multiclass VOPC_Pseudos { @@ -225,6 +225,7 @@ multiclass VOPC_Pseudos .ret in def _sdwa : VOPC_SDWA_Pseudo { let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let SchedRW = P.Schedule; @@ -236,7 +237,7 @@ multiclass VOPC_Pseudos : VOPC_Pseudos { @@ -261,6 +262,7 @@ multiclass VOPCX_Pseudos .ret in def _nosdst_sdwa : VOPC_SDWA_Pseudo { let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; @@ -285,22 +287,23 @@ def VOPC_I16_I16 : VOPC_NoSdst_Profile<[Write32Bit], i16>; def VOPC_I32_I32 : VOPC_NoSdst_Profile<[Write32Bit], i32>; def VOPC_I64_I64 : VOPC_NoSdst_Profile<[Write64Bit], i64>; -multiclass VOPC_F16 : +multiclass VOPC_F16 : VOPC_Pseudos ; -multiclass VOPC_F32 : +multiclass VOPC_F32 : VOPC_Pseudos ; -multiclass VOPC_F64 : +multiclass VOPC_F64 : VOPC_Pseudos ; -multiclass VOPC_I16 : +multiclass VOPC_I16 : VOPC_Pseudos ; -multiclass VOPC_I32 : +multiclass VOPC_I32 : VOPC_Pseudos ; -multiclass VOPC_I64 : +multiclass VOPC_I64 : VOPC_Pseudos ; multiclass VOPCX_F16 : @@ -669,6 +672,7 @@ multiclass VOPC_Class_Pseudos .ret in def _sdwa : VOPC_SDWA_Pseudo { let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]), !if(DefVcc, [VCC], [])); @@ -698,6 +702,7 @@ multiclass VOPCX_Class_Pseudos .ret in def _nosdst_sdwa : VOPC_SDWA_Pseudo { let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; @@ -737,8 +742,11 @@ defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">; defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">; defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">; defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">; + +let SubtargetPredicate = Has16BitInsts in { defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">; defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; +} //===----------------------------------------------------------------------===// // V_ICMPIntrinsic Pattern. @@ -878,6 +886,7 @@ let AssemblerPredicate = isGFX10Plus in { } } // End DecoderNamespace = "GFX10" + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(NAME#"_sdwa")>, VOPC_SDWA9e(NAME#"_sdwa").Pfl>; @@ -903,6 +912,7 @@ let AssemblerPredicate = isGFX10Plus in { } } // End DecoderNamespace = "GFX10" + foreach _ = BoolToList(NAME#"_nosdst_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(NAME#"_nosdst_sdwa")>, VOPC_SDWA9e(NAME#"_nosdst_sdwa").Pfl> { @@ -1223,10 +1233,12 @@ multiclass VOPC_Real_vi op> { } } + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA>.ret in def _sdwa_vi : VOP_SDWA_Real (NAME#"_sdwa")>, VOPC_SDWAe (NAME#"_sdwa").Pfl>; + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOPC_SDWA9e (NAME#"_sdwa").Pfl>; diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td index 677095a354be..f208a1134a5a 100644 --- a/lib/Target/AMDGPU/VOPInstructions.td +++ b/lib/Target/AMDGPU/VOPInstructions.td @@ -14,6 +14,7 @@ class LetDummies { bit isReMaterializable; bit isAsCheapAsAMove; bit VOPAsmPrefer32Bit; + bit FPDPRounding; Predicate SubtargetPredicate; string Constraints; string DisableEncoding; @@ -41,9 +42,7 @@ class VOP_Pseudo pattern> : InstSI , VOP , - SIMCInstr , - MnemonicAlias { - + SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; let UseNamedOperandTable = 1; @@ -148,6 +147,7 @@ class VOP3_Real : // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let Constraints = ps.Constraints; @@ -473,8 +473,7 @@ class VOP_SDWA9Be : VOP_SDWA9e

{ class VOP_SDWA_Pseudo pattern=[]> : InstSI , VOP , - SIMCInstr , - MnemonicAlias { + SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; @@ -595,8 +594,7 @@ class VOP_DPPe : Enc64 { class VOP_DPP_Pseudo pattern=[]> : InstSI , VOP , - SIMCInstr , - MnemonicAlias { + SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; diff --git a/lib/Target/ARC/ARCFrameLowering.h b/lib/Target/ARC/ARCFrameLowering.h index 41b559d16761..9242400fb28d 100644 --- a/lib/Target/ARC/ARCFrameLowering.h +++ b/lib/Target/ARC/ARCFrameLowering.h @@ -27,8 +27,8 @@ class ARCInstrInfo; class ARCFrameLowering : public TargetFrameLowering { public: ARCFrameLowering(const ARCSubtarget &st) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), ST(st) { - } + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(4), 0), + ST(st) {} /// Insert Prologue into the function. void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/lib/Target/ARC/ARCISelLowering.cpp b/lib/Target/ARC/ARCISelLowering.cpp index 847d23f0abdb..751fd567bae8 100644 --- a/lib/Target/ARC/ARCISelLowering.cpp +++ b/lib/Target/ARC/ARCISelLowering.cpp @@ -716,7 +716,7 @@ SDValue ARCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); assert(cast(Op.getOperand(0))->getZExtValue() == 0 && "Only support lowering frame addr of current frame."); - unsigned FrameReg = ARI.getFrameRegister(MF); + Register FrameReg = ARI.getFrameRegister(MF); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); } diff --git a/lib/Target/ARC/ARCMachineFunctionInfo.h b/lib/Target/ARC/ARCMachineFunctionInfo.h index 31aa5b93246c..d4dcf9bf285c 100644 --- a/lib/Target/ARC/ARCMachineFunctionInfo.h +++ b/lib/Target/ARC/ARCMachineFunctionInfo.h @@ -34,8 +34,8 @@ public: explicit ARCFunctionInfo(MachineFunction &MF) : ReturnStackOffsetSet(false), VarArgsFrameIndex(0), ReturnStackOffset(-1U), MaxCallStackReq(0) { - // Functions are 4-byte (2**2) aligned. - MF.setAlignment(2); + // Functions are 4-byte aligned. + MF.setAlignment(Align(4)); } ~ARCFunctionInfo() {} diff --git a/lib/Target/ARC/ARCOptAddrMode.cpp b/lib/Target/ARC/ARCOptAddrMode.cpp index c922b99c57b0..22a3b9111c8e 100644 --- a/lib/Target/ARC/ARCOptAddrMode.cpp +++ b/lib/Target/ARC/ARCOptAddrMode.cpp @@ -139,8 +139,7 @@ static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg, MachineDominatorTree *MDT, MachineRegisterInfo *MRI) { - assert(TargetRegisterInfo::isVirtualRegister(VReg) && - "Expected virtual register!"); + assert(Register::isVirtualRegister(VReg) && "Expected virtual register!"); for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end(); it != end; ++it) { @@ -181,7 +180,7 @@ static bool isLoadStoreThatCanHandleDisplacement(const TargetInstrInfo *TII, bool ARCOptAddrMode::noUseOfAddBeforeLoadOrStore(const MachineInstr *Add, const MachineInstr *Ldst) { - unsigned R = Add->getOperand(0).getReg(); + Register R = Add->getOperand(0).getReg(); return dominatesAllUsesOf(Ldst, R, MDT, MRI); } @@ -205,9 +204,8 @@ MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) { return nullptr; } - unsigned B = Base.getReg(); - if (TargetRegisterInfo::isStackSlot(B) || - !TargetRegisterInfo::isVirtualRegister(B)) { + Register B = Base.getReg(); + if (Register::isStackSlot(B) || !Register::isVirtualRegister(B)) { LLVM_DEBUG(dbgs() << "[ABAW] Base is not VReg\n"); return nullptr; } @@ -285,7 +283,7 @@ ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add, return nullptr; } - unsigned BaseReg = Ldst->getOperand(BasePos).getReg(); + Register BaseReg = Ldst->getOperand(BasePos).getReg(); // prohibit this: // v1 = add v0, c @@ -294,7 +292,7 @@ ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add, // st v0, [v0, 0] // v1 = add v0, c if (Ldst->mayStore() && Ldst->getOperand(0).isReg()) { - unsigned StReg = Ldst->getOperand(0).getReg(); + Register StReg = Ldst->getOperand(0).getReg(); if (Add->getOperand(0).getReg() == StReg || BaseReg == StReg) { LLVM_DEBUG(dbgs() << "[canJoinInstructions] Store uses result of Add\n"); return nullptr; @@ -447,7 +445,7 @@ void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode, MachineOperand Src = MachineOperand::CreateImm(0xDEADBEEF); AII->getBaseAndOffsetPosition(Ldst, BasePos, OffPos); - unsigned BaseReg = Ldst.getOperand(BasePos).getReg(); + Register BaseReg = Ldst.getOperand(BasePos).getReg(); Ldst.RemoveOperand(OffPos); Ldst.RemoveOperand(BasePos); diff --git a/lib/Target/ARC/ARCRegisterInfo.cpp b/lib/Target/ARC/ARCRegisterInfo.cpp index 9c8340ac8f81..a7f89b385ffe 100644 --- a/lib/Target/ARC/ARCRegisterInfo.cpp +++ b/lib/Target/ARC/ARCRegisterInfo.cpp @@ -206,7 +206,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, LLVM_DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n"); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); assert(ARC::GPR32RegClass.contains(Reg) && "Unexpected register operand"); if (!TFI->hasFP(MF)) { diff --git a/lib/Target/ARC/ARCTargetMachine.cpp b/lib/Target/ARC/ARCTargetMachine.cpp index 9fb45d686c26..34700dc22c54 100644 --- a/lib/Target/ARC/ARCTargetMachine.cpp +++ b/lib/Target/ARC/ARCTargetMachine.cpp @@ -38,7 +38,7 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT, "f32:32:32-i64:32-f64:32-a:0:32-n32", TT, CPU, FS, Options, getRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index fb238bfc9cbc..30b9c8071ba2 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -133,9 +133,9 @@ bool A15SDOptimizer::usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC) { if (!MO.isReg()) return false; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); else return TRC->contains(Reg); @@ -151,7 +151,7 @@ unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { // Get the subreg type that is most likely to be coalesced // for an SPR register that will be used in VDUP32d pseudo. unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { - if (!TRI->isVirtualRegister(SReg)) + if (!Register::isVirtualRegister(SReg)) return getDPRLaneFromSPR(SReg); MachineInstr *MI = MRI->getVRegDef(SReg); @@ -166,7 +166,7 @@ unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { SReg = MI->getOperand(1).getReg(); } - if (TargetRegisterInfo::isVirtualRegister(SReg)) { + if (Register::isVirtualRegister(SReg)) { if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; return ARM::ssub_0; } @@ -191,8 +191,8 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { for (MachineOperand &MO : MI->operands()) { if ((!MO.isReg()) || (!MO.isUse())) continue; - unsigned Reg = MO.getReg(); - if (!TRI->isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; MachineOperand *Op = MI->findRegisterDefOperand(Reg); @@ -213,8 +213,8 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { for (MachineOperand &MODef : Def->operands()) { if ((!MODef.isReg()) || (!MODef.isDef())) continue; - unsigned DefReg = MODef.getReg(); - if (!TRI->isVirtualRegister(DefReg)) { + Register DefReg = MODef.getReg(); + if (!Register::isVirtualRegister(DefReg)) { IsDead = false; break; } @@ -245,10 +245,10 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { } if (MI->isInsertSubreg()) { - unsigned DPRReg = MI->getOperand(1).getReg(); - unsigned SPRReg = MI->getOperand(2).getReg(); + Register DPRReg = MI->getOperand(1).getReg(); + Register SPRReg = MI->getOperand(2).getReg(); - if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { + if (Register::isVirtualRegister(DPRReg) && Register::isVirtualRegister(SPRReg)) { MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); @@ -267,7 +267,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { // Find the thing we're subreg copying out of - is it of the same // regclass as DPRMI? (i.e. a DPR or QPR). - unsigned FullReg = SPRMI->getOperand(1).getReg(); + Register FullReg = SPRMI->getOperand(1).getReg(); const TargetRegisterClass *TRC = MRI->getRegClass(MI->getOperand(1).getReg()); if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { @@ -296,9 +296,9 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { if (!MI->getOperand(I).isReg()) continue; ++NumTotal; - unsigned OpReg = MI->getOperand(I).getReg(); + Register OpReg = MI->getOperand(I).getReg(); - if (!TRI->isVirtualRegister(OpReg)) + if (!Register::isVirtualRegister(OpReg)) break; MachineInstr *Def = MRI->getVRegDef(OpReg); @@ -342,7 +342,7 @@ bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { if (!MI->isFullCopy()) return MI; - if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) return nullptr; MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); if (!Def) @@ -369,8 +369,8 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, Reached.insert(MI); if (MI->isPHI()) { for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { - unsigned Reg = MI->getOperand(I).getReg(); - if (!TRI->isVirtualRegister(Reg)) { + Register Reg = MI->getOperand(I).getReg(); + if (!Register::isVirtualRegister(Reg)) { continue; } MachineInstr *NewMI = MRI->getVRegDef(Reg); @@ -379,7 +379,7 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, Front.push_back(NewMI); } } else if (MI->isFullCopy()) { - if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) continue; MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); if (!NewMI) @@ -418,8 +418,8 @@ unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Reg, unsigned Lane, bool QPR) { - unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : - &ARM::DPRRegClass); + Register Out = + MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass); BuildMI(MBB, InsertBefore, DL, TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out) .addReg(Reg) @@ -434,7 +434,7 @@ unsigned A15SDOptimizer::createExtractSubreg( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned DReg, unsigned Lane, const TargetRegisterClass *TRC) { - unsigned Out = MRI->createVirtualRegister(TRC); + Register Out = MRI->createVirtualRegister(TRC); BuildMI(MBB, InsertBefore, DL, @@ -448,7 +448,7 @@ unsigned A15SDOptimizer::createExtractSubreg( unsigned A15SDOptimizer::createRegSequence( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Reg1, unsigned Reg2) { - unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); + Register Out = MRI->createVirtualRegister(&ARM::QPRRegClass); BuildMI(MBB, InsertBefore, DL, @@ -466,7 +466,7 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1) { - unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass); BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out) .addReg(Ssub0) .addReg(Ssub1) @@ -478,7 +478,7 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB, unsigned A15SDOptimizer::createInsertSubreg( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) { - unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); + Register Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); BuildMI(MBB, InsertBefore, DL, @@ -494,7 +494,7 @@ unsigned A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL) { - unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass); BuildMI(MBB, InsertBefore, DL, @@ -602,7 +602,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // we can end up with multiple defs of this DPR. SmallVector DefSrcs; - if (!TRI->isVirtualRegister(*I)) + if (!Register::isVirtualRegister(*I)) continue; MachineInstr *Def = MRI->getVRegDef(*I); if (!Def) @@ -622,7 +622,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // Collect all the uses of this MI's DPR def for updating later. SmallVector Uses; - unsigned DPRDefReg = MI->getOperand(0).getReg(); + Register DPRDefReg = MI->getOperand(0).getReg(); for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), E = MRI->use_end(); I != E; ++I) Uses.push_back(&*I); diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index bf8ed6562fe7..2e6f756d522c 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,6 +35,7 @@ class MachineInstr; class MCInst; class PassRegistry; +Pass *createMVETailPredicationPass(); FunctionPass *createARMLowOverheadLoopsPass(); Pass *createARMParallelDSPPass(); FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, @@ -67,6 +68,7 @@ void initializeThumb2SizeReducePass(PassRegistry &); void initializeThumb2ITBlockPass(PassRegistry &); void initializeMVEVPTBlockPass(PassRegistry &); void initializeARMLowOverheadLoopsPass(PassRegistry &); +void initializeMVETailPredicationPass(PassRegistry &); } // end namespace llvm diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b687db12eaf5..fed4cb2b9316 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -57,12 +57,15 @@ def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", "Extend FP to 32 double registers">; multiclass VFPver prev = [], - list otherimplies = []> { + list prev, + list otherimplies, + list vfp2prev = []> { def _D16_SP: SubtargetFeature< name#"d16sp", query#"D16SP", "true", description#" with only 16 d-registers and no double precision", - !foreach(v, prev, !cast(v # "_D16_SP")) # otherimplies>; + !foreach(v, prev, !cast(v # "_D16_SP")) # + !foreach(v, vfp2prev, !cast(v # "_SP")) # + otherimplies>; def _SP: SubtargetFeature< name#"sp", query#"SP", "true", description#" with no double precision", @@ -72,6 +75,7 @@ multiclass VFPver(v # "_D16")) # + vfp2prev # otherimplies # [FeatureFP64, !cast(NAME # "_D16_SP")]>; def "": SubtargetFeature< name, query, "true", description, @@ -80,11 +84,17 @@ multiclass VFPver(NAME # "_SP")]>; } -defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions", - [], [FeatureFPRegs]>; +def FeatureVFP2_SP : SubtargetFeature<"vfp2sp", "HasVFPv2SP", "true", + "Enable VFP2 instructions with " + "no double precision", + [FeatureFPRegs]>; + +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions", + [FeatureFP64, FeatureVFP2_SP]>; defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions", - [FeatureVFP2]>; + [], [], [FeatureVFP2]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", @@ -98,7 +108,7 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", [FeatureVFP3], [FeatureFP16]>; defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", - [FeatureVFP4]>; + [FeatureVFP4], []>; def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Enable full half-precision " @@ -302,9 +312,18 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; -def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopAlignment","2", +def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", "Prefer 32-bit alignment for loops">; +def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "1", + "Model MVE instructions as a 1 beat per tick architecture">; + +def FeatureMVEVectorCostFactor2 : SubtargetFeature<"mve2beat", "MVEVectorCostFactor", "2", + "Model MVE instructions as a 2 beats per tick architecture">; + +def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "4", + "Model MVE instructions as a 4 beats per tick architecture">; + /// Some instructions update CPSR partially, which can add false dependency for /// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is /// mapped to a separate physical register. Avoid partial CPSR update for these @@ -1156,6 +1175,13 @@ def : ProcNoItin<"cortex-a76ae", [ARMv82a, ProcA76, FeatureFullFP16, FeatureDotProd]>; +def : ProcNoItin<"neoverse-n1", [ARMv82a, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureDotProd]>; + def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureHasRetAddrStack, FeatureNEONForFP, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index e29077266fcd..c8c91e53c44e 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -168,7 +168,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // relatively easy to exceed the thumb branch range within a TU. if (! ThumbIndirectPads.empty()) { OutStreamer->EmitAssemblerFlag(MCAF_Code16); - EmitAlignment(1); + EmitAlignment(Align(2)); for (std::pair &TIP : ThumbIndirectPads) { OutStreamer->EmitLabel(TIP.second); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX) @@ -203,8 +203,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, switch (MO.getType()) { default: llvm_unreachable(""); case MachineOperand::MO_Register: { - unsigned Reg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + Register Reg = MO.getReg(); + assert(Register::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); if(ARM::GPRPairRegClass.contains(Reg)) { const MachineFunction &MF = *MI->getParent()->getParent(); @@ -275,7 +275,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; case 'y': // Print a VFP single precision register as indexed double. if (MI->getOperand(OpNum).isReg()) { - unsigned Reg = MI->getOperand(OpNum).getReg(); + Register Reg = MI->getOperand(OpNum).getReg(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Find the 'd' register that has this 's' register as a sub-register, // and determine the lane number. @@ -302,14 +302,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!MI->getOperand(OpNum).isReg()) return true; const MachineOperand &MO = MI->getOperand(OpNum); - unsigned RegBegin = MO.getReg(); + Register RegBegin = MO.getReg(); // This takes advantage of the 2 operand-ness of ldm/stm and that we've // already got the operands in registers that are operands to the // inline asm statement. O << "{"; if (ARM::GPRPairRegClass.contains(RegBegin)) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); + Register Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); O << ARMInstPrinter::getRegisterName(Reg0) << ", "; RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); } @@ -378,8 +378,8 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!MO.isReg()) return true; const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned Reg = TRI->getSubReg(MO.getReg(), FirstHalf ? - ARM::gsub_0 : ARM::gsub_1); + Register Reg = + TRI->getSubReg(MO.getReg(), FirstHalf ? ARM::gsub_0 : ARM::gsub_1); O << ARMInstPrinter::getRegisterName(Reg); return false; } @@ -391,7 +391,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(RegOp); if (!MO.isReg()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); O << ARMInstPrinter::getRegisterName(Reg); return false; } @@ -400,12 +400,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 'f': { // The high doubleword register of a NEON quad register. if (!MI->getOperand(OpNum).isReg()) return true; - unsigned Reg = MI->getOperand(OpNum).getReg(); + Register Reg = MI->getOperand(OpNum).getReg(); if (!ARM::QPRRegClass.contains(Reg)) return true; const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? - ARM::dsub_0 : ARM::dsub_1); + Register SubReg = + TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? ARM::dsub_0 : ARM::dsub_1); O << ARMInstPrinter::getRegisterName(SubReg); return false; } @@ -419,7 +419,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return true; const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if(!ARM::GPRPairRegClass.contains(Reg)) return false; Reg = TRI->getSubReg(Reg, ARM::gsub_1); @@ -526,7 +526,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); - EmitAlignment(2); + EmitAlignment(Align(4)); for (auto &Stub : Stubs) emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second); @@ -539,7 +539,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection()); - EmitAlignment(2); + EmitAlignment(Align(4)); for (auto &Stub : Stubs) emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second); @@ -940,7 +940,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) { // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for // ARM mode tables. - EmitAlignment(2); + EmitAlignment(Align(4)); // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); @@ -986,7 +986,7 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) { // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for // ARM mode tables. - EmitAlignment(2); + EmitAlignment(Align(4)); // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); @@ -1015,7 +1015,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI, unsigned JTI = MO1.getIndex(); if (Subtarget->isThumb1Only()) - EmitAlignment(2); + EmitAlignment(Align(4)); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); OutStreamer->EmitLabel(JTISymbol); @@ -1058,7 +1058,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI, OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); // Make sure the next instruction is 2-byte aligned. - EmitAlignment(1); + EmitAlignment(Align(2)); } void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { @@ -1072,7 +1072,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { MF.getSubtarget().getRegisterInfo(); const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo(); - unsigned FramePtr = TargetRegInfo->getFrameRegister(MF); + Register FramePtr = TargetRegInfo->getFrameRegister(MF); unsigned Opc = MI->getOpcode(); unsigned SrcReg, DstReg; @@ -1136,7 +1136,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } // Check for registers that are remapped (for a Thumb1 prologue that // saves high registers). - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(Reg)) Reg = RemappedReg; RegList.push_back(Reg); @@ -1326,7 +1326,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // So here we generate a bl to a small jump pad that does bx rN. // The jump pads are emitted after the function body. - unsigned TReg = MI->getOperand(0).getReg(); + Register TReg = MI->getOperand(0).getReg(); MCSymbol *TRegSym = nullptr; for (std::pair &TIP : ThumbIndirectPads) { if (TIP.first == TReg) { @@ -1663,8 +1663,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::tTBH_JT: { bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT; - unsigned Base = MI->getOperand(0).getReg(); - unsigned Idx = MI->getOperand(1).getReg(); + Register Base = MI->getOperand(0).getReg(); + Register Idx = MI->getOperand(1).getReg(); assert(MI->getOperand(1).isKill() && "We need the index register as scratch!"); // Multiply up idx if necessary. @@ -1844,8 +1844,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // b LSJLJEH // movs r0, #1 // LSJLJEH: - unsigned SrcReg = MI->getOperand(0).getReg(); - unsigned ValReg = MI->getOperand(1).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); + Register ValReg = MI->getOperand(1).getReg(); MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true); OutStreamer->AddComment("eh_setjmp begin"); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) @@ -1910,8 +1910,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // mov r0, #0 // add pc, pc, #0 // mov r0, #1 - unsigned SrcReg = MI->getOperand(0).getReg(); - unsigned ValReg = MI->getOperand(1).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); + Register ValReg = MI->getOperand(1).getReg(); OutStreamer->AddComment("eh_setjmp begin"); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDri) @@ -1967,8 +1967,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // ldr $scratch, [$src, #4] // ldr r7, [$src] // bx $scratch - unsigned SrcReg = MI->getOperand(0).getReg(); - unsigned ScratchReg = MI->getOperand(1).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); + Register ScratchReg = MI->getOperand(1).getReg(); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) .addReg(ARM::SP) .addReg(SrcReg) @@ -2027,8 +2027,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // ldr $scratch, [$src, #4] // ldr r7, [$src] // bx $scratch - unsigned SrcReg = MI->getOperand(0).getReg(); - unsigned ScratchReg = MI->getOperand(1).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); + Register ScratchReg = MI->getOperand(1).getReg(); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) .addReg(ScratchReg) @@ -2095,7 +2095,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // ldr.w sp, [$src, #8] // ldr.w pc, [$src, #4] - unsigned SrcReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12) .addReg(ARM::R11) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 222aa85856a2..684cd1def977 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -172,9 +172,9 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0); const MachineOperand &Base = MI.getOperand(2); const MachineOperand &Offset = MI.getOperand(NumOps - 3); - unsigned WBReg = WB.getReg(); - unsigned BaseReg = Base.getReg(); - unsigned OffReg = Offset.getReg(); + Register WBReg = WB.getReg(); + Register BaseReg = Base.getReg(); + Register OffReg = Offset.getReg(); unsigned OffImm = MI.getOperand(NumOps - 2).getImm(); ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm(); switch (AddrMode) { @@ -276,8 +276,8 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( if (LV) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned Reg = MO.getReg(); + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { + Register Reg = MO.getReg(); LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); if (MO.isDef()) { @@ -966,8 +966,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, SmallSet DstRegs; #endif for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); + Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); + Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); assert(Dst && Src && "Bad sub-register"); #ifndef NDEBUG assert(!DstRegs.count(Src) && "destructive vector copy"); @@ -1019,7 +1019,7 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, if (!SubIdx) return MIB.addReg(Reg, State); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); return MIB.addReg(Reg, State, SubIdx); } @@ -1133,7 +1133,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 24: if (ARM::DTripleRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + Subtarget.hasNEON()) { BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo)) .addFrameIndex(FI) .addImm(16) @@ -1155,7 +1156,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 32: if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + Subtarget.hasNEON()) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo)) @@ -1337,7 +1339,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); } - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); @@ -1368,7 +1370,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 24: if (ARM::DTripleRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) .addFrameIndex(FI) .addImm(16) @@ -1382,7 +1385,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } } else @@ -1390,7 +1393,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 32: if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI) .addImm(16) @@ -1405,7 +1409,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } } else @@ -1425,7 +1429,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); @@ -1583,8 +1587,8 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // Look for a copy between even S-registers. That is where we keep floats // when using NEON v2f32 instructions for f32 arithmetic. - unsigned DstRegS = MI.getOperand(0).getReg(); - unsigned SrcRegS = MI.getOperand(1).getReg(); + Register DstRegS = MI.getOperand(0).getReg(); + Register SrcRegS = MI.getOperand(1).getReg(); if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) return false; @@ -1794,12 +1798,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, if (MI0.getNumOperands() != MI1.getNumOperands()) return false; - unsigned Addr0 = MI0.getOperand(1).getReg(); - unsigned Addr1 = MI1.getOperand(1).getReg(); + Register Addr0 = MI0.getOperand(1).getReg(); + Register Addr1 = MI1.getOperand(1).getReg(); if (Addr0 != Addr1) { - if (!MRI || - !TargetRegisterInfo::isVirtualRegister(Addr0) || - !TargetRegisterInfo::isVirtualRegister(Addr1)) + if (!MRI || !Register::isVirtualRegister(Addr0) || + !Register::isVirtualRegister(Addr1)) return false; // This assumes SSA form. @@ -2076,6 +2079,38 @@ isProfitableToIfCvt(MachineBasicBlock &TBB, return PredCost <= UnpredCost; } +unsigned +ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. + // ARM has a condition code field in every predicable instruction, using it + // doesn't change code size. + return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0; +} + +unsigned +ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { + // If this branch is likely to be folded into the comparison to form a + // CB(N)Z, then removing it won't reduce code size at all, because that will + // just replace the CB(N)Z with a CMP. + if (MI.getOpcode() == ARM::t2Bcc && + findCMPToFoldIntoCBZ(&MI, &getRegisterInfo())) + return 0; + + unsigned Size = getInstSizeInBytes(MI); + + // For Thumb2, all branches are 32-bit instructions during the if conversion + // pass, but may be replaced with 16-bit instructions during size reduction. + // Since the branches considered by if conversion tend to be forward branches + // over small basic blocks, they are very likely to be in range for the + // narrow instructions, so we assume the final code size will be half what it + // currently is. + if (Subtarget.isThumb2()) + Size /= 2; + + return Size; +} + bool ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const { @@ -2141,7 +2176,7 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, MachineInstr * ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) return nullptr; @@ -2163,7 +2198,7 @@ ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) return nullptr; - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) return nullptr; if (MO.isDef() && !MO.isDead()) return nullptr; @@ -2211,7 +2246,7 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, // Find new register class to use. MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) return nullptr; @@ -2298,6 +2333,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { {ARM::tSUBSrr, ARM::tSUBrr}, {ARM::tSBCS, ARM::tSBC}, {ARM::tRSBS, ARM::tRSB}, + {ARM::tLSLSri, ARM::tLSLri}, {ARM::t2ADDSri, ARM::t2ADDri}, {ARM::t2ADDSrr, ARM::t2ADDrr}, @@ -2420,7 +2456,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineOperand &MO = MI->getOperand(i); RegList.push_back(MO); - if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) + if (MO.isReg() && !MO.isImplicit() && + TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) FirstRegEnc = TRI->getEncodingValue(MO.getReg()); } @@ -2430,7 +2467,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; --CurRegEnc) { unsigned CurReg = RegClass->getRegister(CurRegEnc); - if (IsT1PushPop && CurReg > ARM::R7) + if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7)) continue; if (!IsPop) { // Pushing any register is completely harmless, mark the register involved @@ -3039,18 +3076,22 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( break; case ARM::VSELEQD: case ARM::VSELEQS: + case ARM::VSELEQH: CC = ARMCC::EQ; break; case ARM::VSELGTD: case ARM::VSELGTS: + case ARM::VSELGTH: CC = ARMCC::GT; break; case ARM::VSELGED: case ARM::VSELGES: + case ARM::VSELGEH: CC = ARMCC::GE; break; - case ARM::VSELVSS: case ARM::VSELVSD: + case ARM::VSELVSS: + case ARM::VSELVSH: CC = ARMCC::VS; break; } @@ -3271,9 +3312,9 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } unsigned OpIdx = Commute ? 2 : 1; - unsigned Reg1 = UseMI.getOperand(OpIdx).getReg(); + Register Reg1 = UseMI.getOperand(OpIdx).getReg(); bool isKill = UseMI.getOperand(OpIdx).isKill(); - unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); + Register NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), NewReg) .addReg(Reg1, getKillRegState(isKill)) @@ -3335,15 +3376,15 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRSB_POST: case ARM::LDRSH_POST: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rm = MI.getOperand(3).getReg(); return (Rt == Rm) ? 4 : 3; } case ARM::LDR_PRE_REG: case ARM::LDRB_PRE_REG: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (Rt == Rm) return 3; unsigned ShOpVal = MI.getOperand(4).getImm(); @@ -3372,8 +3413,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRH_PRE: case ARM::STRH_PRE: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (!Rm) return 2; if (Rt == Rm) @@ -3384,8 +3425,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDR_POST_REG: case ARM::LDRB_POST_REG: case ARM::LDRH_POST: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rm = MI.getOperand(3).getReg(); return (Rt == Rm) ? 3 : 2; } @@ -3404,10 +3445,10 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRSB_PRE: case ARM::LDRSH_PRE: { - unsigned Rm = MI.getOperand(3).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (Rm == 0) return 3; - unsigned Rt = MI.getOperand(0).getReg(); + Register Rt = MI.getOperand(0).getReg(); if (Rt == Rm) return 4; unsigned ShOpVal = MI.getOperand(4).getImm(); @@ -3422,9 +3463,9 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, } case ARM::LDRD: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rn = MI.getOperand(2).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rn = MI.getOperand(2).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (Rm) return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 : 3; @@ -3432,7 +3473,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, } case ARM::STRD: { - unsigned Rm = MI.getOperand(3).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (Rm) return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 : 3; @@ -3448,9 +3489,9 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, return 4; case ARM::LDRD_PRE: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rn = MI.getOperand(3).getReg(); - unsigned Rm = MI.getOperand(4).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rn = MI.getOperand(3).getReg(); + Register Rm = MI.getOperand(4).getReg(); if (Rm) return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 : 4; @@ -3458,13 +3499,13 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, } case ARM::t2LDRD_PRE: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rn = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rn = MI.getOperand(3).getReg(); return (Rt == Rn) ? 4 : 3; } case ARM::STRD_PRE: { - unsigned Rm = MI.getOperand(4).getReg(); + Register Rm = MI.getOperand(4).getReg(); if (Rm) return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 : 4; @@ -3495,8 +3536,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, return 2; case ARM::t2LDRDi8: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rn = MI.getOperand(2).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rn = MI.getOperand(2).getReg(); return (Rt == Rn) ? 3 : 2; } @@ -3745,7 +3786,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, } bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { - unsigned BaseReg = MI.getOperand(0).getReg(); + Register BaseReg = MI.getOperand(0).getReg(); for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { const auto &Op = MI.getOperand(i); if (Op.isReg() && Op.getReg() == BaseReg) @@ -4219,7 +4260,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return -1; const MachineOperand &DefMO = DefMI.getOperand(DefIdx); - unsigned Reg = DefMO.getReg(); + Register Reg = DefMO.getReg(); const MachineInstr *ResolvedDefMI = &DefMI; unsigned DefAdj = 0; @@ -4328,10 +4369,10 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); - const MachineSDNode *DefMN = dyn_cast(DefNode); + auto *DefMN = cast(DefNode); unsigned DefAlign = !DefMN->memoperands_empty() ? (*DefMN->memoperands_begin())->getAlignment() : 0; - const MachineSDNode *UseMN = dyn_cast(UseNode); + auto *UseMN = cast(UseNode); unsigned UseAlign = !UseMN->memoperands_empty() ? (*UseMN->memoperands_begin())->getAlignment() : 0; int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, @@ -4708,7 +4749,7 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, if (MI.getOperand(i).isImplicit() || !MI.getOperand(i).isReg()) continue; - unsigned Reg = MI.getOperand(i).getReg(); + Register Reg = MI.getOperand(i).getReg(); if (Reg < ARM::R0 || Reg > ARM::R7) { if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { @@ -4731,7 +4772,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); const GlobalValue *GV = cast((*MI->memoperands_begin())->getValue()); MachineInstrBuilder MIB; @@ -5104,7 +5145,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( const MachineOperand &MO = MI.getOperand(OpNum); if (MO.readsReg()) return 0; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); int UseOp = -1; switch (MI.getOpcode()) { @@ -5134,7 +5175,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( return 0; // We must be able to clobber the whole D-reg. - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { // Virtual register must be a def undef foo:ssub_0 operand. if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) return 0; @@ -5159,8 +5200,8 @@ void ARMBaseInstrInfo::breakPartialRegDependency( assert(TRI && "Need TRI instance"); const MachineOperand &MO = MI.getOperand(OpNum); - unsigned Reg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + Register Reg = MO.getReg(); + assert(Register::isPhysicalRegister(Reg) && "Can't break virtual register dependencies."); unsigned DReg = Reg; @@ -5337,7 +5378,7 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, // is not redefined between the cmp and the br. if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) return nullptr; - unsigned Reg = CmpMI->getOperand(0).getReg(); + Register Reg = CmpMI->getOperand(0).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg); if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0) @@ -5349,3 +5390,50 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, return &*CmpMI; } + +unsigned llvm::ConstantMaterializationCost(unsigned Val, + const ARMSubtarget *Subtarget, + bool ForCodesize) { + if (Subtarget->isThumb()) { + if (Val <= 255) // MOV + return ForCodesize ? 2 : 1; + if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV + ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW + ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN + return ForCodesize ? 4 : 1; + if (Val <= 510) // MOV + ADDi8 + return ForCodesize ? 4 : 2; + if (~Val <= 255) // MOV + MVN + return ForCodesize ? 4 : 2; + if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL + return ForCodesize ? 4 : 2; + } else { + if (ARM_AM::getSOImmVal(Val) != -1) // MOV + return ForCodesize ? 4 : 1; + if (ARM_AM::getSOImmVal(~Val) != -1) // MVN + return ForCodesize ? 4 : 1; + if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW + return ForCodesize ? 4 : 1; + if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs + return ForCodesize ? 8 : 2; + } + if (Subtarget->useMovt()) // MOVW + MOVT + return ForCodesize ? 8 : 2; + return ForCodesize ? 8 : 3; // Literal pool load +} + +bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, + const ARMSubtarget *Subtarget, + bool ForCodesize) { + // Check with ForCodesize + unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize); + unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize); + if (Cost1 < Cost2) + return true; + if (Cost1 > Cost2) + return false; + + // If they are equal, try with !ForCodesize + return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < + ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index c28983fcc15c..c232b6f0b45d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -276,6 +276,10 @@ public: return NumCycles == 1; } + unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const override; + unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override; + bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override; @@ -601,7 +605,8 @@ bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, - const ARMBaseInstrInfo &TII); + const ARMBaseInstrInfo &TII, + const TargetRegisterInfo *TRI); /// Return true if Reg is defd between From and To bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, @@ -620,6 +625,20 @@ void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond); void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive); +/// Returns the number of instructions required to materialize the given +/// constant in a register, or 3 if a literal pool load is needed. +/// If ForCodesize is specified, an approximate cost in bytes is returned. +unsigned ConstantMaterializationCost(unsigned Val, + const ARMSubtarget *Subtarget, + bool ForCodesize = false); + +/// Returns true if Val1 has a lower Constant Materialization Cost than Val2. +/// Uses the cost from ConstantMaterializationCost, first with ForCodesize as +/// specified. If the scores are equal, return the comparison for !ForCodesize. +bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, + const ARMSubtarget *Subtarget, + bool ForCodesize = false); + } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index dc99b37742da..1eaf871867e0 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -174,6 +174,12 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, : CSR_AAPCS_ThisReturn_RegMask; } +ArrayRef ARMBaseRegisterInfo::getIntraCallClobberedRegs( + const MachineFunction *MF) const { + static const MCPhysReg IntraCallClobberedRegs[] = {ARM::R12}; + return ArrayRef(IntraCallClobberedRegs); +} + BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget(); @@ -185,7 +191,7 @@ getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, ARM::PC); markSuperRegs(Reserved, ARM::FPSCR); markSuperRegs(Reserved, ARM::APSR_NZCV); - if (TFI->hasFP(MF)) + if (TFI->hasFP(MF) || STI.isTargetDarwin()) markSuperRegs(Reserved, getFramePointerReg(STI)); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); @@ -217,7 +223,7 @@ isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const { const TargetRegisterClass * ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, - const MachineFunction &) const { + const MachineFunction &MF) const { const TargetRegisterClass *Super = RC; TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { @@ -225,11 +231,13 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case ARM::GPRRegClassID: case ARM::SPRRegClassID: case ARM::DPRRegClassID: + case ARM::GPRPairRegClassID: + return Super; case ARM::QPRRegClassID: case ARM::QQPRRegClassID: case ARM::QQQQPRRegClassID: - case ARM::GPRPairRegClassID: - return Super; + if (MF.getSubtarget().hasNEON()) + return Super; } Super = *I++; } while (Super); @@ -317,7 +325,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, return false; unsigned PairedPhys = 0; - if (TargetRegisterInfo::isPhysicalRegister(Paired)) { + if (Register::isPhysicalRegister(Paired)) { PairedPhys = Paired; } else if (VRM && VRM->hasPhys(Paired)) { PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this); @@ -347,7 +355,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, std::pair Hint = MRI->getRegAllocationHint(Reg); if ((Hint.first == (unsigned)ARMRI::RegPairOdd || Hint.first == (unsigned)ARMRI::RegPairEven) && - TargetRegisterInfo::isVirtualRegister(Hint.second)) { + Register::isVirtualRegister(Hint.second)) { // If 'Reg' is one of the even / odd register pair and it's now changed // (e.g. coalesced) into a different register. The other register of the // pair allocation hint must be updated to reflect the relationship @@ -357,7 +365,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, // Make sure the pair has not already divorced. if (Hint.second == Reg) { MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); - if (TargetRegisterInfo::isVirtualRegister(NewReg)) + if (Register::isVirtualRegister(NewReg)) MRI->setRegAllocationHint(NewReg, Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven : ARMRI::RegPairOdd, OtherReg); @@ -663,7 +671,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII); else { assert(AFI->isThumb2Function()); - Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); + Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII, this); } assert(Done && "Unable to resolve frame index!"); (void)Done; @@ -775,7 +783,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); else { assert(AFI->isThumb2Function()); - Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); + Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII, this); } if (Done) return; @@ -783,21 +791,32 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. - assert((Offset || - (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 || - (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) && - "This code isn't needed if offset already handled!"); + assert( + (Offset || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrModeT2_i7 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrModeT2_i7s2 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == + ARMII::AddrModeT2_i7s4) && + "This code isn't needed if offset already handled!"); unsigned ScratchReg = 0; int PIdx = MI.findFirstPredOperandIdx(); ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); Register PredReg = (PIdx == -1) ? Register() : MI.getOperand(PIdx+1).getReg(); - if (Offset == 0) + + const MCInstrDesc &MCID = MI.getDesc(); + const TargetRegisterClass *RegClass = + TII.getRegClass(MCID, FIOperandNum, this, *MI.getParent()->getParent()); + + if (Offset == 0 && + (Register::isVirtualRegister(FrameReg) || RegClass->contains(FrameReg))) // Must be addrmode4/6. MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false); else { - ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass); + ScratchReg = MF.getRegInfo().createVirtualRegister(RegClass); if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 7e2c72b4d712..477f3ad0a9a7 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -129,6 +129,9 @@ public: const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const; + ArrayRef + getIntraCallClobberedRegs(const MachineFunction *MF) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; bool isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const override; @@ -176,8 +179,6 @@ public: Register getFrameRegister(const MachineFunction &MF) const override; unsigned getBaseRegister() const { return BasePtr; } - bool isLowRegister(unsigned Reg) const; - /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. diff --git a/lib/Target/ARM/ARMBasicBlockInfo.cpp b/lib/Target/ARM/ARMBasicBlockInfo.cpp index 2de90e816b33..00a2231f59e3 100644 --- a/lib/Target/ARM/ARMBasicBlockInfo.cpp +++ b/lib/Target/ARM/ARMBasicBlockInfo.cpp @@ -6,14 +6,16 @@ // //===----------------------------------------------------------------------===// +#include "ARMBasicBlockInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBasicBlockInfo.h" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/Support/Debug.h" #include #define DEBUG_TYPE "arm-bb-utils" @@ -47,7 +49,7 @@ void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) { BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; BBI.Size = 0; BBI.Unalign = 0; - BBI.PostAlign = 0; + BBI.PostAlign = Align::None(); for (MachineInstr &I : *MBB) { BBI.Size += TII->getInstSizeInBytes(I); @@ -62,8 +64,8 @@ void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) { // tBR_JTr contains a .align 2 directive. if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { - BBI.PostAlign = 2; - MBB->getParent()->ensureAlignment(2); + BBI.PostAlign = Align(4); + MBB->getParent()->ensureAlignment(Align(4)); } } @@ -126,9 +128,9 @@ void ARMBasicBlockUtils::adjustBBOffsetsAfter(MachineBasicBlock *BB) { for(unsigned i = BBNum + 1, e = MF.getNumBlockIDs(); i < e; ++i) { // Get the offset and known bits at the end of the layout predecessor. // Include the alignment of the current block. - unsigned LogAlign = MF.getBlockNumbered(i)->getAlignment(); - unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); - unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + const Align Align = MF.getBlockNumbered(i)->getAlignment(); + const unsigned Offset = BBInfo[i - 1].postOffset(Align); + const unsigned KnownBits = BBInfo[i - 1].postKnownBits(Align); // This is where block i begins. Stop if the offset is already correct, // and we have updated 2 blocks. This is the maximum number of blocks diff --git a/lib/Target/ARM/ARMBasicBlockInfo.h b/lib/Target/ARM/ARMBasicBlockInfo.h index 400bba351cec..13df399ed995 100644 --- a/lib/Target/ARM/ARMBasicBlockInfo.h +++ b/lib/Target/ARM/ARMBasicBlockInfo.h @@ -21,17 +21,18 @@ namespace llvm { +struct BasicBlockInfo; using BBInfoVector = SmallVectorImpl; /// UnknownPadding - Return the worst case padding that could result from /// unknown offset bits. This does not include alignment padding caused by /// known offset bits. /// -/// @param LogAlign log2(alignment) +/// @param Alignment alignment /// @param KnownBits Number of known low offset bits. -inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { - if (KnownBits < LogAlign) - return (1u << LogAlign) - (1u << KnownBits); +inline unsigned UnknownPadding(Align Alignment, unsigned KnownBits) { + if (KnownBits < Log2(Alignment)) + return Alignment.value() - (1ull << KnownBits); return 0; } @@ -65,10 +66,9 @@ struct BasicBlockInfo { /// multiple of 1 << Unalign. uint8_t Unalign = 0; - /// PostAlign - When non-zero, the block terminator contains a .align - /// directive, so the end of the block is aligned to 1 << PostAlign - /// bytes. - uint8_t PostAlign = 0; + /// PostAlign - When > 1, the block terminator contains a .align + /// directive, so the end of the block is aligned to PostAlign bytes. + Align PostAlign; BasicBlockInfo() = default; @@ -84,16 +84,16 @@ struct BasicBlockInfo { return Bits; } - /// Compute the offset immediately following this block. If LogAlign is + /// Compute the offset immediately following this block. If Align is /// specified, return the offset the successor block will get if it has /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { + unsigned postOffset(Align Alignment = Align::None()) const { unsigned PO = Offset + Size; - unsigned LA = std::max(unsigned(PostAlign), LogAlign); - if (!LA) + const Align PA = std::max(PostAlign, Alignment); + if (PA == Align::None()) return PO; // Add alignment padding from the terminator. - return PO + UnknownPadding(LA, internalKnownBits()); + return PO + UnknownPadding(PA, internalKnownBits()); } /// Compute the number of known low bits of postOffset. If this block @@ -101,9 +101,8 @@ struct BasicBlockInfo { /// instruction alignment. An aligned terminator may increase the number /// of know bits. /// If LogAlign is given, also consider the alignment of the next block. - unsigned postKnownBits(unsigned LogAlign = 0) const { - return std::max(std::max(unsigned(PostAlign), LogAlign), - internalKnownBits()); + unsigned postKnownBits(Align Align = Align::None()) const { + return std::max(Log2(std::max(PostAlign, Align)), internalKnownBits()); } }; diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index 0cbe6e1871e4..d3b595ce8323 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -90,6 +90,8 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { MachineInstrBuilder &MIB, CCAssignFn *AssignFn) : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} + bool isIncomingArgumentHandler() const override { return false; } + Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && @@ -169,8 +171,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - const CallLowering::ArgInfo &Info, CCState &State) override { - if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State)) + const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, + CCState &State) override { + if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State)) return true; StackSize = @@ -199,9 +202,8 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, if (SplitVTs.size() == 1) { // Even if there is no splitting to do, we still want to replace the // original type (e.g. pointer type -> integer). - auto Flags = OrigArg.Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); - Flags.setOrigAlign(OriginalAlignment); + auto Flags = OrigArg.Flags[0]; + Flags.setOrigAlign(Align(DL.getABITypeAlignment(OrigArg.Ty))); SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), Flags, OrigArg.IsFixed); return; @@ -211,10 +213,9 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { EVT SplitVT = SplitVTs[i]; Type *SplitTy = SplitVT.getTypeForEVT(Ctx); - auto Flags = OrigArg.Flags; + auto Flags = OrigArg.Flags[0]; - unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(SplitTy))); bool NeedsConsecutiveRegisters = TLI.functionArgumentNeedsConsecutiveRegisters( @@ -286,7 +287,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { CCAssignFn AssignFn) : ValueHandler(MIRBuilder, MRI, AssignFn) {} - bool isArgumentHandler() const override { return true; } + bool isIncomingArgumentHandler() const override { return true; } Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { @@ -298,7 +299,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { int FI = MFI.CreateFixedObject(Size, Offset, true); MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); - unsigned AddrReg = + Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(MPO.getAddrSpace(), 32)); MIRBuilder.buildFrameIndex(AddrReg, FI); @@ -405,6 +406,7 @@ struct FormalArgHandler : public IncomingValueHandler { : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {} void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } }; @@ -498,11 +500,7 @@ unsigned getCallOpcode(const ARMSubtarget &STI, bool isDirect) { } } // end anonymous namespace -bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, - const MachineOperand &Callee, - const ArgInfo &OrigRet, - ArrayRef OrigArgs) const { +bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { MachineFunction &MF = MIRBuilder.getMF(); const auto &TLI = *getTLI(); const auto &DL = MF.getDataLayout(); @@ -520,7 +518,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Create the call instruction so we can add the implicit uses of arg // registers, but don't insert it yet. - bool IsDirect = !Callee.isReg(); + bool IsDirect = !Info.Callee.isReg(); auto CallOpcode = getCallOpcode(STI, IsDirect); auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode); @@ -528,35 +526,35 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (IsThumb) MIB.add(predOps(ARMCC::AL)); - MIB.add(Callee); + MIB.add(Info.Callee); if (!IsDirect) { - auto CalleeReg = Callee.getReg(); - if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) { + auto CalleeReg = Info.Callee.getReg(); + if (CalleeReg && !Register::isPhysicalRegister(CalleeReg)) { unsigned CalleeIdx = IsThumb ? 2 : 0; MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass( MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), - *MIB.getInstr(), MIB->getDesc(), Callee, CalleeIdx)); + *MIB.getInstr(), MIB->getDesc(), Info.Callee, CalleeIdx)); } } - MIB.addRegMask(TRI->getCallPreservedMask(MF, CallConv)); + MIB.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv)); bool IsVarArg = false; SmallVector ArgInfos; - for (auto Arg : OrigArgs) { + for (auto Arg : Info.OrigArgs) { if (!isSupportedType(DL, TLI, Arg.Ty)) return false; if (!Arg.IsFixed) IsVarArg = true; - if (Arg.Flags.isByVal()) + if (Arg.Flags[0].isByVal()) return false; splitToValueTypes(Arg, ArgInfos, MF); } - auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, IsVarArg); + auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, IsVarArg); OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler)) return false; @@ -564,13 +562,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Now we can add the actual call instruction to the correct basic block. MIRBuilder.insertInstr(MIB); - if (!OrigRet.Ty->isVoidTy()) { - if (!isSupportedType(DL, TLI, OrigRet.Ty)) + if (!Info.OrigRet.Ty->isVoidTy()) { + if (!isSupportedType(DL, TLI, Info.OrigRet.Ty)) return false; ArgInfos.clear(); - splitToValueTypes(OrigRet, ArgInfos, MF); - auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, IsVarArg); + splitToValueTypes(Info.OrigRet, ArgInfos, MF); + auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, IsVarArg); CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) return false; diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h index 794127b5ebc7..ddbc9feb90e2 100644 --- a/lib/Target/ARM/ARMCallLowering.h +++ b/lib/Target/ARM/ARMCallLowering.h @@ -38,9 +38,8 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, diff --git a/lib/Target/ARM/ARMCallingConv.cpp b/lib/Target/ARM/ARMCallingConv.cpp index 5ede7c67f7c2..92ebc542b423 100644 --- a/lib/Target/ARM/ARMCallingConv.cpp +++ b/lib/Target/ARM/ARMCallingConv.cpp @@ -193,7 +193,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. auto &DL = State.getMachineFunction().getDataLayout(); - unsigned StackAlign = DL.getStackAlignment(); + unsigned StackAlign = DL.getStackAlignment().value(); unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign); ArrayRef RegList; diff --git a/lib/Target/ARM/ARMCodeGenPrepare.cpp b/lib/Target/ARM/ARMCodeGenPrepare.cpp index 2fc5f4aaab50..1c2c8aef55bb 100644 --- a/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ b/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -179,16 +179,12 @@ public: } static bool GenerateSignBits(Value *V) { - if (auto *Arg = dyn_cast(V)) - return Arg->hasSExtAttr(); - if (!isa(V)) return false; unsigned Opc = cast(V)->getOpcode(); return Opc == Instruction::AShr || Opc == Instruction::SDiv || - Opc == Instruction::SRem || Opc == Instruction::SExt || - Opc == Instruction::SIToFP; + Opc == Instruction::SRem || Opc == Instruction::SExt; } static bool EqualTypeSize(Value *V) { @@ -806,54 +802,48 @@ void IRPromoter::Mutate(Type *OrigTy, /// return value is zeroext. We don't allow opcodes that can introduce sign /// bits. bool ARMCodeGenPrepare::isSupportedValue(Value *V) { - if (auto *I = dyn_cast(V)) { - // Now that we allow small types than TypeSize, only allow icmp of - // TypeSize because they will require a trunc to be legalised. - // TODO: Allow icmp of smaller types, and calculate at the end - // whether the transform would be beneficial. - if (isa(I->getOperand(0)->getType())) + if (auto *I = dyn_cast(V)) { + switch (I->getOpcode()) { + default: + return isa(I) && isSupportedType(I) && + !GenerateSignBits(I); + case Instruction::GetElementPtr: + case Instruction::Store: + case Instruction::Br: + case Instruction::Switch: return true; - return EqualTypeSize(I->getOperand(0)); - } - - if (GenerateSignBits(V)) { - LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n"); - return false; - } - - // Memory instructions - if (isa(V) || isa(V)) - return true; - - // Branches and targets. - if( isa(V) || isa(V) || isa(V)) - return true; - - // Non-instruction values that we can handle. - if ((isa(V) && !isa(V)) || isa(V)) + case Instruction::PHI: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + case Instruction::Trunc: + case Instruction::BitCast: + return isSupportedType(I); + case Instruction::ZExt: + return isSupportedType(I->getOperand(0)); + case Instruction::ICmp: + // Now that we allow small types than TypeSize, only allow icmp of + // TypeSize because they will require a trunc to be legalised. + // TODO: Allow icmp of smaller types, and calculate at the end + // whether the transform would be beneficial. + if (isa(I->getOperand(0)->getType())) + return true; + return EqualTypeSize(I->getOperand(0)); + case Instruction::Call: { + // Special cases for calls as we need to check for zeroext + // TODO We should accept calls even if they don't have zeroext, as they + // can still be sinks. + auto *Call = cast(I); + return isSupportedType(Call) && + Call->hasRetAttr(Attribute::AttrKind::ZExt); + } + } + } else if (isa(V) && !isa(V)) { return isSupportedType(V); - - if (isa(V) || isa(V) || isa(V) || - isa(V)) + } else if (isa(V)) return isSupportedType(V); - if (auto *Cast = dyn_cast(V)) - return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0)); - - // Special cases for calls as we need to check for zeroext - // TODO We should accept calls even if they don't have zeroext, as they can - // still be sinks. - if (auto *Call = dyn_cast(V)) - return isSupportedType(Call) && - Call->hasRetAttr(Attribute::AttrKind::ZExt); - - if (!isa(V)) - return false; - - if (!isSupportedType(V)) - return false; - - return true; + return isa(V); } /// Check that the type of V would be promoted and that the original type is diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 60e5d7bf6098..24ca25f73e96 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -26,8 +26,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -69,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); STATISTIC(NumJTMoved, "Number of jump table destination blocks moved"); STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); +STATISTIC(NumLEInserted, "Number of LE backwards branches inserted"); static cl::opt AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), @@ -212,6 +215,7 @@ namespace { const ARMBaseInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; + MachineDominatorTree *DT = nullptr; bool isThumb; bool isThumb1; bool isThumb2; @@ -224,6 +228,11 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -238,7 +247,7 @@ namespace { void doInitialJumpTablePlacement(std::vector &CPEMIs); bool BBHasFallthrough(MachineBasicBlock *MBB); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - unsigned getCPELogAlign(const MachineInstr *CPEMI); + Align getCPEAlign(const MachineInstr *CPEMI); void scanFunctionJumpTables(); void initializeFunctionInfo(const std::vector &CPEMIs); MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); @@ -327,8 +336,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { const BasicBlockInfo &BBI = BBInfo[J]; dbgs() << format("%08x %bb.%u\t", BBI.Offset, J) << " kb=" << unsigned(BBI.KnownBits) - << " ua=" << unsigned(BBI.Unalign) - << " pa=" << unsigned(BBI.PostAlign) + << " ua=" << unsigned(BBI.Unalign) << " pa=" << Log2(BBI.PostAlign) << format(" size=%#x\n", BBInfo[J].Size); } }); @@ -349,6 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { isPositionIndependentOrROPI = STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); AFI = MF->getInfo(); + DT = &getAnalysis(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); @@ -357,9 +366,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { HasFarJump = false; bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB); - // This pass invalidates liveness information when it splits basic blocks. - MF->getRegInfo().invalidateLiveness(); - // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. MF->RenumberBlocks(); @@ -398,7 +404,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Functions with jump tables need an alignment of 4 because they use the ADR // instruction, which aligns the PC to 4 bytes before adding an offset. if (!T2JumpTables.empty()) - MF->ensureAlignment(2); + MF->ensureAlignment(Align(4)); /// Remove dead constant pool entries. MadeChange |= removeUnusedCPEntries(); @@ -487,8 +493,9 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); - // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). - unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + // MachineConstantPool measures alignment in bytes. + const Align MaxAlign(MCP->getConstantPoolAlignment()); + const unsigned MaxLogAlign = Log2(MaxAlign); // Mark the basic block as required by the const-pool. BB->setAlignment(MaxAlign); @@ -501,7 +508,8 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) // alignment of all entries as long as BB is sufficiently aligned. Keep // track of the insertion point for each alignment. We are going to bucket // sort the entries as they are created. - SmallVector InsPoint(MaxAlign + 1, BB->end()); + SmallVector InsPoint(MaxLogAlign + 1, + BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. @@ -526,7 +534,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) // Ensure that future entries with higher alignment get inserted before // CPEMI. This is bucket sort with iterators. - for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) + for (unsigned a = LogAlign + 1; a <= MaxLogAlign; ++a) if (InsPoint[a] == InsAt) InsPoint[a] = CPEMI; @@ -640,29 +648,27 @@ ARMConstantIslands::findConstPoolEntry(unsigned CPI, return nullptr; } -/// getCPELogAlign - Returns the required alignment of the constant pool entry -/// represented by CPEMI. Alignment is measured in log2(bytes) units. -unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { +/// getCPEAlign - Returns the required alignment of the constant pool entry +/// represented by CPEMI. +Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) { switch (CPEMI->getOpcode()) { case ARM::CONSTPOOL_ENTRY: break; case ARM::JUMPTABLE_TBB: - return isThumb1 ? 2 : 0; + return isThumb1 ? Align(4) : Align(1); case ARM::JUMPTABLE_TBH: - return isThumb1 ? 2 : 1; + return isThumb1 ? Align(4) : Align(2); case ARM::JUMPTABLE_INSTS: - return 1; + return Align(2); case ARM::JUMPTABLE_ADDRS: - return 2; + return Align(4); default: llvm_unreachable("unknown constpool entry kind"); } unsigned CPI = getCombinedIndex(CPEMI); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); - unsigned Align = MCP->getConstants()[CPI].getAlignment(); - assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); - return Log2_32(Align); + return Align(MCP->getConstants()[CPI].getAlignment()); } /// scanFunctionJumpTables - Do a scan of the function, building up @@ -687,7 +693,7 @@ initializeFunctionInfo(const std::vector &CPEMIs) { BBInfoVector &BBInfo = BBUtils->getBBInfo(); // The known bits of the entry block offset are determined by the function // alignment. - BBInfo.front().KnownBits = MF->getAlignment(); + BBInfo.front().KnownBits = Log2(MF->getAlignment()); // Compute block offsets and known bits. BBUtils->adjustBBOffsetsAfter(&MF->front()); @@ -824,11 +830,6 @@ initializeFunctionInfo(const std::vector &CPEMIs) { Scale = 2; // +-(offset_8*2) NegOk = true; break; - - case ARM::tLDRHi: - Bits = 5; - Scale = 2; // +(offset_5*2) - break; } // Remember that this is a user of a CP entry. @@ -885,6 +886,13 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); + // Collect liveness information at MI. + LivePhysRegs LRs(*MF->getSubtarget().getRegisterInfo()); + LRs.addLiveOuts(*OrigBB); + auto LivenessEnd = ++MachineBasicBlock::iterator(MI).getReverse(); + for (MachineInstr &LiveMI : make_range(OrigBB->rbegin(), LivenessEnd)) + LRs.stepBackward(LiveMI); + // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); @@ -913,6 +921,12 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { // OrigBB branches to NewBB. OrigBB->addSuccessor(NewBB); + // Update live-in information in the new block. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (MCPhysReg L : LRs) + if (!MRI.isReserved(L)) + NewBB->addLiveIn(L); + // Update internal data structures to account for the newly inserted MBB. // This is almost the same as updateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. @@ -1007,13 +1021,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, MachineBasicBlock* Water, CPUser &U, unsigned &Growth) { BBInfoVector &BBInfo = BBUtils->getBBInfo(); - unsigned CPELogAlign = getCPELogAlign(U.CPEMI); - unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); - unsigned NextBlockOffset, NextBlockAlignment; + const Align CPEAlign = getCPEAlign(U.CPEMI); + const unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPEAlign); + unsigned NextBlockOffset; + Align NextBlockAlignment; MachineFunction::const_iterator NextBlock = Water->getIterator(); if (++NextBlock == MF->end()) { NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); - NextBlockAlignment = 0; } else { NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; NextBlockAlignment = NextBlock->getAlignment(); @@ -1028,13 +1042,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, Growth = CPEEnd - NextBlockOffset; // Compute the padding that would go at the end of the CPE to align the next // block. - Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment); + Growth += offsetToAlignment(CPEEnd, NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise // the offset of the instruction. Also account for unknown alignment padding // in blocks between CPE and the user. if (CPEOffset < UserOffset) - UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + UserOffset += Growth + UnknownPadding(MF->getAlignment(), Log2(CPEAlign)); } else // CPE fits in existing padding. Growth = 0; @@ -1200,8 +1214,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, // inserting islands between BB0 and BB1 makes other accesses out of range. MachineBasicBlock *UserBB = U.MI->getParent(); BBInfoVector &BBInfo = BBUtils->getBBInfo(); - unsigned MinNoSplitDisp = - BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI)); + const Align CPEAlign = getCPEAlign(U.CPEMI); + unsigned MinNoSplitDisp = BBInfo[UserBB->getNumber()].postOffset(CPEAlign); if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2) return false; for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();; @@ -1254,7 +1268,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; - unsigned CPELogAlign = getCPELogAlign(CPEMI); + const Align CPEAlign = getCPEAlign(CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); BBInfoVector &BBInfo = BBUtils->getBBInfo(); const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; @@ -1267,7 +1281,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Size of branch to insert. unsigned Delta = isThumb1 ? 2 : 4; // Compute the offset where the CPE will begin. - unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; + unsigned CPEOffset = UserBBI.postOffset(CPEAlign) + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) @@ -1308,11 +1322,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Try to split the block so it's fully aligned. Compute the latest split // point where we can add a 4-byte branch instruction, and then align to - // LogAlign which is the largest possible alignment in the function. - unsigned LogAlign = MF->getAlignment(); - assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + // Align which is the largest possible alignment in the function. + const Align Align = MF->getAlignment(); + assert(Align >= CPEAlign && "Over-aligned constant pool entry"); unsigned KnownBits = UserBBI.internalKnownBits(); - unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned UPad = UnknownPadding(Align, KnownBits); unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad; LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); @@ -1323,7 +1337,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, BaseInsertOffset -= 4; LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) - << " la=" << LogAlign << " kb=" << KnownBits + << " la=" << Log2(Align) << " kb=" << KnownBits << " up=" << UPad << '\n'); // This could point off the end of the block if we've already got constant @@ -1337,6 +1351,28 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, BaseInsertOffset = std::max(UserBBI.postOffset() - UPad - 8, UserOffset + TII->getInstSizeInBytes(*UserMI) + 1); + // If the CP is referenced(ie, UserOffset) is in first four instructions + // after IT, this recalculated BaseInsertOffset could be in the middle of + // an IT block. If it is, change the BaseInsertOffset to just after the + // IT block. This still make the CP Entry is in range becuase of the + // following reasons. + // 1. The initial BaseseInsertOffset calculated is (UserOffset + + // U.getMaxDisp() - UPad). + // 2. An IT block is only at most 4 instructions plus the "it" itself (18 + // bytes). + // 3. All the relevant instructions support much larger Maximum + // displacement. + MachineBasicBlock::iterator I = UserMI; + ++I; + for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI), + PredReg = 0; + I->getOpcode() != ARM::t2IT && + getITInstrPredicate(*I, PredReg) != ARMCC::AL; + Offset += TII->getInstSizeInBytes(*I), I = std::next(I)) { + BaseInsertOffset = + std::max(BaseInsertOffset, Offset + TII->getInstSizeInBytes(*I) + 1); + assert(I != UserMBB->end() && "Fell off end of block"); + } LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); } unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1354,8 +1390,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUIndex]; if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. - BaseInsertOffset -= 1u << LogAlign; - EndInsertOffset -= 1u << LogAlign; + BaseInsertOffset -= Align.value(); + EndInsertOffset -= Align.value(); } // This is overly conservative, as we don't account for CPEMIs being // reused within the block, but it doesn't matter much. Also assume CPEs @@ -1397,9 +1433,10 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, } // We really must not split an IT block. - LLVM_DEBUG(unsigned PredReg; assert( - !isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL)); - +#ifndef NDEBUG + unsigned PredReg; + assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL); +#endif NewMBB = splitBlockBeforeInstr(&*MI); } @@ -1464,9 +1501,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, // Always align the new block because CP entries can be smaller than 4 // bytes. Be careful not to decrease the existing alignment, e.g. NewMBB may // be an already aligned constant pool block. - const unsigned Align = isThumb ? 1 : 2; - if (NewMBB->getAlignment() < Align) - NewMBB->setAlignment(Align); + const Align Alignment = isThumb ? Align(2) : Align(4); + if (NewMBB->getAlignment() < Alignment) + NewMBB->setAlignment(Alignment); // Remove the original WaterList entry; we want subsequent insertions in // this vicinity to go after the one we're about to insert. This @@ -1495,7 +1532,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, decrementCPEReferenceCount(CPI, CPEMI); // Mark the basic block as aligned as required by the const-pool entry. - NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + NewIsland->setAlignment(getCPEAlign(U.CPEMI)); // Increase the size of the island block to account for the new entry. BBUtils->adjustBBSize(NewIsland, Size); @@ -1529,10 +1566,11 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { BBInfo[CPEBB->getNumber()].Size = 0; // This block no longer needs to be aligned. - CPEBB->setAlignment(0); - } else + CPEBB->setAlignment(Align::None()); + } else { // Entries are sorted by descending alignment, so realign from the front. - CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin())); + CPEBB->setAlignment(getCPEAlign(&*CPEBB->begin())); + } BBUtils->adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if @@ -1620,7 +1658,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { // L2: ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm(); CC = ARMCC::getOppositeCondition(CC); - unsigned CCReg = MI->getOperand(2).getReg(); + Register CCReg = MI->getOperand(2).getReg(); // If the branch is at the end of its MBB and that has a fall-through block, // direct the updated conditional branch to the fall-through block. Otherwise, @@ -1778,16 +1816,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() { return MadeChange; } + bool ARMConstantIslands::optimizeThumb2Branches() { - bool MadeChange = false; - // The order in which branches appear in ImmBranches is approximately their - // order within the function body. By visiting later branches first, we reduce - // the distance between earlier forward branches and their targets, making it - // more likely that the cbn?z optimization, which can only apply to forward - // branches, will succeed. - for (unsigned i = ImmBranches.size(); i != 0; --i) { - ImmBranch &Br = ImmBranches[i-1]; + auto TryShrinkBranch = [this](ImmBranch &Br) { unsigned Opcode = Br.MI->getOpcode(); unsigned NewOpc = 0; unsigned Scale = 1; @@ -1815,47 +1847,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() { BBUtils->adjustBBSize(MBB, -2); BBUtils->adjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; - MadeChange = true; + return true; } } + return false; + }; - Opcode = Br.MI->getOpcode(); - if (Opcode != ARM::tBcc) - continue; + struct ImmCompare { + MachineInstr* MI = nullptr; + unsigned NewOpc = 0; + }; + + auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp, + MachineBasicBlock *DestBB) { + ImmCmp.MI = nullptr; + ImmCmp.NewOpc = 0; // If the conditional branch doesn't kill CPSR, then CPSR can be liveout // so this transformation is not safe. if (!Br.MI->killsRegister(ARM::CPSR)) - continue; + return false; - NewOpc = 0; unsigned PredReg = 0; + unsigned NewOpc = 0; ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) NewOpc = ARM::tCBNZ; - if (!NewOpc) - continue; - MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + else + return false; + // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2; BBInfoVector &BBInfo = BBUtils->getBBInfo(); unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126) - continue; + return false; // Search backwards to find a tCMPi8 auto *TRI = STI->getRegisterInfo(); MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI); if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8) + return false; + + ImmCmp.MI = CmpMI; + ImmCmp.NewOpc = NewOpc; + return true; + }; + + auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) { + if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() || + STI->hasMinSize()) + return false; + + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) || + !BBUtils->isBBInRange(Br.MI, DestBB, 4094)) + return false; + + if (!DT->dominates(DestBB, MBB)) + return false; + + // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite + // target of Br. So now we need to reverse the condition. + Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ; + + MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), + TII->get(ARM::t2LE)); + MIB.add(Br.MI->getOperand(0)); + Br.MI->eraseFromParent(); + Br.MI = MIB; + ++NumLEInserted; + return true; + }; + + bool MadeChange = false; + + // The order in which branches appear in ImmBranches is approximately their + // order within the function body. By visiting later branches first, we reduce + // the distance between earlier forward branches and their targets, making it + // more likely that the cbn?z optimization, which can only apply to forward + // branches, will succeed. + for (ImmBranch &Br : reverse(ImmBranches)) { + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ? + MBB->getFallThrough() : + MBB->back().getOperand(0).getMBB(); + + ImmCompare Cmp; + if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) { + DestBB = ExitBB; + MadeChange = true; + } else { + FindCmpForCBZ(Br, Cmp, DestBB); + MadeChange |= TryShrinkBranch(Br); + } + + unsigned Opcode = Br.MI->getOpcode(); + if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc) continue; - unsigned Reg = CmpMI->getOperand(0).getReg(); + Register Reg = Cmp.MI->getOperand(0).getReg(); // Check for Kill flags on Reg. If they are present remove them and set kill // on the new CBZ. + auto *TRI = STI->getRegisterInfo(); MachineBasicBlock::iterator KillMI = Br.MI; bool RegKilled = false; do { @@ -1865,19 +1965,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() { RegKilled = true; break; } - } while (KillMI != CmpMI); + } while (KillMI != Cmp.MI); // Create the new CBZ/CBNZ - MachineBasicBlock *MBB = Br.MI->getParent(); - LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); + LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI); MachineInstr *NewBR = - BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc)) + BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc)) .addReg(Reg, getKillRegState(RegKilled)) .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); - CmpMI->eraseFromParent(); - Br.MI->eraseFromParent(); - Br.MI = NewBR; + + Cmp.MI->eraseFromParent(); + BBInfoVector &BBInfo = BBUtils->getBBInfo(); BBInfo[MBB->getNumber()].Size -= 2; + + if (Br.MI->getOpcode() == ARM::tBcc) { + Br.MI->eraseFromParent(); + Br.MI = NewBR; + } else if (&MBB->back() != Br.MI) { + // We've generated an LE and already erased the original conditional + // branch. The CBN?Z is now used to branch to the other successor, so an + // unconditional branch terminator is now redundant. + MachineInstr *LastMI = &MBB->back(); + if (LastMI != Br.MI) { + BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize(); + LastMI->eraseFromParent(); + } + } BBUtils->adjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; @@ -1931,8 +2044,8 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI, // of BaseReg, but only if the t2ADDrs can be removed. // + Some instruction other than t2ADDrs computing the entry. Not seen in // the wild, but we should be careful. - unsigned EntryReg = JumpMI->getOperand(0).getReg(); - unsigned BaseReg = LEAMI->getOperand(0).getReg(); + Register EntryReg = JumpMI->getOperand(0).getReg(); + Register BaseReg = LEAMI->getOperand(0).getReg(); CanDeleteLEA = true; BaseRegKill = false; @@ -2009,7 +2122,7 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI, // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg // and is not clobbered / used. MachineInstr *RemovableAdd = nullptr; - unsigned EntryReg = JumpMI->getOperand(0).getReg(); + Register EntryReg = JumpMI->getOperand(0).getReg(); // Find the last ADD to set EntryReg MachineBasicBlock::iterator I(LEAMI); @@ -2106,7 +2219,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // %idx = tLSLri %idx, 2 // %base = tLEApcrelJT // %t = tLDRr %base, %idx - unsigned BaseReg = User.MI->getOperand(0).getReg(); + Register BaseReg = User.MI->getOperand(0).getReg(); if (User.MI->getIterator() == User.MI->getParent()->begin()) continue; @@ -2116,7 +2229,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { !Shift->getOperand(2).isKill()) continue; IdxReg = Shift->getOperand(2).getReg(); - unsigned ShiftedIdxReg = Shift->getOperand(0).getReg(); + Register ShiftedIdxReg = Shift->getOperand(0).getReg(); // It's important that IdxReg is live until the actual TBB/TBH. Most of // the range is checked later, but the LEA might still clobber it and not @@ -2313,6 +2426,10 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineFunction::iterator MBBI = ++JTBB->getIterator(); MF->insert(MBBI, NewBB); + // Copy live-in information to new block. + for (const MachineBasicBlock::RegisterMaskPair &RegMaskPair : BB->liveins()) + NewBB->addLiveIn(RegMaskPair); + // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond directly to anything in the source. diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 3bdb0e1ef62d..72c95f441265 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index b32ba3eeea18..563fdda56104 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -481,7 +481,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { unsigned OpIdx = 0; bool DstIsDead = MI.getOperand(OpIdx).isDead(); - unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + Register DstReg = MI.getOperand(OpIdx++).getReg(); if(TableEntry->RealOpc == ARM::VLD2DUPd8x2 || TableEntry->RealOpc == ARM::VLD2DUPd16x2 || TableEntry->RealOpc == ARM::VLD2DUPd32x2) { @@ -492,7 +492,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { assert(RegSpc == OddDblSpc && "Unexpected spacing!"); SubRegIndex = ARM::dsub_1; } - unsigned SubReg = TRI->getSubReg(DstReg, SubRegIndex); + Register SubReg = TRI->getSubReg(DstReg, SubRegIndex); unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0, &ARM::DPairSpcRegClass); MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead)); @@ -624,7 +624,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { bool SrcIsKill = MI.getOperand(OpIdx).isKill(); bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); - unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + Register SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0, getUndefRegState(SrcIsUndef)); @@ -760,7 +760,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, } bool SrcIsKill = MI.getOperand(OpIdx).isKill(); - unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + Register SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0); @@ -789,6 +789,7 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { case MachineOperand::MO_Immediate: case MachineOperand::MO_CImmediate: case MachineOperand::MO_FPImmediate: + case MachineOperand::MO_ShuffleMask: return false; case MachineOperand::MO_MachineBasicBlock: return true; @@ -828,7 +829,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned Opcode = MI.getOpcode(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); @@ -932,13 +933,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); - unsigned TempReg = MI.getOperand(1).getReg(); + Register TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned DesiredReg = MI.getOperand(3).getReg(); - unsigned NewReg = MI.getOperand(4).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register DesiredReg = MI.getOperand(3).getReg(); + Register NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -1035,8 +1036,8 @@ static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, unsigned Flags, bool IsThumb, const TargetRegisterInfo *TRI) { if (IsThumb) { - unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); - unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); + Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); + Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); MIB.addReg(RegLo, Flags); MIB.addReg(RegHi, Flags); } else @@ -1051,19 +1052,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); - unsigned TempReg = MI.getOperand(1).getReg(); + Register TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned DesiredReg = MI.getOperand(3).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register DesiredReg = MI.getOperand(3).getReg(); MachineOperand New = MI.getOperand(4); New.setIsKill(false); - unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); - unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); - unsigned DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); - unsigned DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); + Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); + Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); + Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); + Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -1204,8 +1205,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); - // Delete the pseudo instruction TCRETURN. + + // Update call site info and delete the pseudo instruction TCRETURN. + MBB.getParent()->moveCallSiteInfo(&MI, &*NewMI); MBB.erase(MBBI); + MBBI = NewMI; return true; } @@ -1336,7 +1340,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // for us. Otherwise, expand to nothing. if (RI.hasBasePointer(MF)) { int32_t NumBytes = AFI->getFramePtrSpillOffset(); - unsigned FramePtr = RI.getFrameRegister(MF); + Register FramePtr = RI.getFrameRegister(MF); assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && "base pointer without frame pointer?"); @@ -1412,7 +1416,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(MF->getFunction().getContext(), "__aeabi_read_tp", PCLabelID, 0); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); @@ -1435,6 +1439,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.cloneMemRefs(MI); TransferImpOps(MI, MIB, MIB); + MI.getMF()->moveCallSiteInfo(&MI, &*MIB); MI.eraseFromParent(); return true; } @@ -1442,7 +1447,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) @@ -1464,7 +1469,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::LDRLIT_ga_pcrel_ldr: case ARM::tLDRLIT_ga_abs: case ARM::tLDRLIT_ga_pcrel: { - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); auto Flags = MO1.getTargetFlags(); @@ -1522,7 +1527,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::t2MOV_ga_pcrel: { // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. unsigned LabelId = AFI->createPICLabelUId(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); @@ -1586,7 +1591,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Grab the Q register destination. bool DstIsDead = MI.getOperand(OpIdx).isDead(); - unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + Register DstReg = MI.getOperand(OpIdx++).getReg(); // Copy the source register. MIB.add(MI.getOperand(OpIdx++)); @@ -1596,8 +1601,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.add(MI.getOperand(OpIdx++)); // Add the destination operands (D subregs). - unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0); - unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1); + Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1); MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); @@ -1617,7 +1622,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Grab the Q register source. bool SrcIsKill = MI.getOperand(OpIdx).isKill(); - unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + Register SrcReg = MI.getOperand(OpIdx++).getReg(); // Copy the destination register. MachineOperand Dst(MI.getOperand(OpIdx++)); @@ -1628,8 +1633,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.add(MI.getOperand(OpIdx++)); // Add the source operands (D subregs). - unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); - unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); + Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0) .addReg(D1, SrcIsKill ? RegState::Kill : 0); @@ -1915,6 +1920,37 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); + + case ARM::tBL_PUSHLR: + case ARM::BL_PUSHLR: { + const bool Thumb = Opcode == ARM::tBL_PUSHLR; + Register Reg = MI.getOperand(0).getReg(); + assert(Reg == ARM::LR && "expect LR register!"); + MachineInstrBuilder MIB; + if (Thumb) { + // push {lr} + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(Reg); + + // bl __gnu_mcount_nc + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL)); + } else { + // stmdb sp!, {lr} + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(Reg); + + // bl __gnu_mcount_nc + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL)); + } + MIB.cloneMemRefs(MI); + for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i)); + MI.eraseFromParent(); + return true; + } } } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6e274d269bf2..1fc5ff6921c6 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -191,8 +191,8 @@ class ARMFastISel final : public FastISel { bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt, bool isEquality); - bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + bool isZExt); + bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, @@ -219,15 +219,15 @@ class ARMFastISel final : public FastISel { bool Return, bool isVarArg); bool ProcessCallArgs(SmallVectorImpl &Args, - SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgRegs, SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, + SmallVectorImpl &RegArgs, CallingConv::ID CC, unsigned &NumBytes, bool isVarArg); unsigned getLibcallReg(const Twine &Name); - bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes, bool isVarArg); bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); @@ -301,7 +301,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); // Make sure the input operand is sufficiently constrained to be legal @@ -913,7 +913,7 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, +bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, unsigned Alignment, bool isZExt, bool allocReg) { unsigned Opc; bool useAM3 = false; @@ -1045,7 +1045,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { Address Addr; if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; - unsigned ResultReg; + Register ResultReg; if (!ARMEmitLoad(VT, ResultReg, Addr, cast(I)->getAlignment())) return false; updateValueMap(I, ResultReg); @@ -1259,8 +1259,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CI->isEquality())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; @@ -1349,7 +1348,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt, bool isEquality) { + bool isZExt) { Type *Ty = Src1Value->getType(); EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; @@ -1397,19 +1396,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // TODO: Verify compares. case MVT::f32: isICmp = false; - // Equality comparisons shouldn't raise Invalid on uordered inputs. - if (isEquality) - CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS; - else - CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; + CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS; break; case MVT::f64: isICmp = false; - // Equality comparisons shouldn't raise Invalid on uordered inputs. - if (isEquality) - CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD; - else - CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; + CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD; break; case MVT::i1: case MVT::i8: @@ -1485,8 +1476,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CI->isEquality())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; // Now set a register based on the comparison. Explicitly set the predicates @@ -1893,10 +1883,10 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, } bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, - SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgRegs, SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, + SmallVectorImpl &RegArgs, CallingConv::ID CC, unsigned &NumBytes, bool isVarArg) { @@ -1960,7 +1950,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; const Value *ArgVal = Args[VA.getValNo()]; - unsigned Arg = ArgRegs[VA.getValNo()]; + Register Arg = ArgRegs[VA.getValNo()]; MVT ArgVT = ArgVTs[VA.getValNo()]; assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) && @@ -2039,7 +2029,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, return true; } -bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, +bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes, bool isVarArg) { // Issue CALLSEQ_END @@ -2060,7 +2050,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, // double fp reg we want. MVT DestVT = RVLocs[0].getValVT(); const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); - unsigned ResultReg = createResultReg(DstRC); + Register ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVDRR), ResultReg) .addReg(RVLocs[0].getLocReg()) @@ -2081,7 +2071,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); - unsigned ResultReg = createResultReg(DstRC); + Register ResultReg = createResultReg(DstRC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(RVLocs[0].getLocReg()); @@ -2162,7 +2152,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { } // Make the copy. - unsigned DstReg = VA.getLocReg(); + Register DstReg = VA.getLocReg(); const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); // Avoid a cross-class copy. This is very unlikely. if (!SrcRC->contains(DstReg)) @@ -2231,7 +2221,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Set up the argument vectors. SmallVector Args; - SmallVector ArgRegs; + SmallVector ArgRegs; SmallVector ArgVTs; SmallVector ArgFlags; Args.reserve(I->getNumOperands()); @@ -2247,8 +2237,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!isTypeLegal(ArgTy, ArgVT)) return false; ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy))); Args.push_back(Op); ArgRegs.push_back(Arg); @@ -2257,13 +2246,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { } // Handle the arguments now that we've gotten them. - SmallVector RegArgs; + SmallVector RegArgs; unsigned NumBytes; if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes, false)) return false; - unsigned CalleeReg = 0; + Register CalleeReg; if (Subtarget->genLongCalls()) { CalleeReg = getLibcallReg(TLI.getLibcallName(Call)); if (CalleeReg == 0) return false; @@ -2282,7 +2271,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { MIB.addExternalSymbol(TLI.getLibcallName(Call)); // Add implicit physical register uses to the call. - for (unsigned R : RegArgs) + for (Register R : RegArgs) MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. @@ -2290,7 +2279,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Finish off the call including any return values. - SmallVector UsedRegs; + SmallVector UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false; // Set all unused physreg defs as dead. @@ -2340,7 +2329,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Set up the argument vectors. SmallVector Args; - SmallVector ArgRegs; + SmallVector ArgRegs; SmallVector ArgVTs; SmallVector ArgFlags; unsigned arg_size = CS.arg_size(); @@ -2377,12 +2366,11 @@ bool ARMFastISel::SelectCall(const Instruction *I, ArgVT != MVT::i1) return false; - unsigned Arg = getRegForValue(*i); - if (Arg == 0) + Register Arg = getRegForValue(*i); + if (!Arg.isValid()) return false; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy))); Args.push_back(*i); ArgRegs.push_back(Arg); @@ -2391,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, } // Handle the arguments now that we've gotten them. - SmallVector RegArgs; + SmallVector RegArgs; unsigned NumBytes; if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes, isVarArg)) @@ -2401,7 +2389,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, const GlobalValue *GV = dyn_cast(Callee); if (!GV || Subtarget->genLongCalls()) UseReg = true; - unsigned CalleeReg = 0; + Register CalleeReg; if (UseReg) { if (IntrMemName) CalleeReg = getLibcallReg(IntrMemName); @@ -2427,7 +2415,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. - for (unsigned R : RegArgs) + for (Register R : RegArgs) MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. @@ -2435,7 +2423,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Finish off the call including any return values. - SmallVector UsedRegs; + SmallVector UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg)) return false; @@ -2476,7 +2464,7 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, } bool RV; - unsigned ResultReg; + Register ResultReg; RV = ARMEmitLoad(VT, ResultReg, Src); assert(RV && "Should be able to handle this load."); RV = ARMEmitStore(VT, ResultReg, Dest); @@ -2506,7 +2494,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { const ARMBaseRegisterInfo *RegInfo = static_cast(Subtarget->getRegisterInfo()); - unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); + Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); unsigned SrcReg = FramePtr; // Recursively load frame address @@ -2947,7 +2935,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; - unsigned ResultReg = MI->getOperand(0).getReg(); + Register ResultReg = MI->getOperand(0).getReg(); if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; MachineBasicBlock::iterator I(MI); @@ -2974,7 +2962,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, 4, 4); - unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); + Register TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp; MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index bedb779bcba0..01ae93086dcb 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -76,7 +76,7 @@ skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs); ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), + : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)), STI(sti) {} bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const { @@ -376,7 +376,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // to determine the end of the prologue. DebugLoc dl; - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. @@ -780,7 +780,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI.getStackSize(); - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -1503,11 +1503,17 @@ static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, /// instructions will require a scratch register during their expansion later. // FIXME: Move to TII? static unsigned estimateRSStackSizeLimit(MachineFunction &MF, - const TargetFrameLowering *TFI) { + const TargetFrameLowering *TFI, + bool &HasNonSPFrameIndex) { const ARMFunctionInfo *AFI = MF.getInfo(); + const ARMBaseInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned Limit = (1 << 12) - 1; for (auto &MBB : MF) { for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (!MI.getOperand(i).isFI()) continue; @@ -1518,13 +1524,29 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, Limit = std::min(Limit, (1U << 8) - 1); break; } + // t2ADDri will not require an extra register, it can reuse the + // destination. + if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12) + break; + + const MCInstrDesc &MCID = MI.getDesc(); + const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF); + if (RegClass && !RegClass->contains(ARM::SP)) + HasNonSPFrameIndex = true; // Otherwise check the addressing mode. switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode_i12: + case ARMII::AddrMode2: + // Default 12 bit limit. + break; case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: Limit = std::min(Limit, (1U << 8) - 1); break; + case ARMII::AddrMode5FP16: + Limit = std::min(Limit, ((1U << 8) - 1) * 2); + break; case ARMII::AddrMode5: case ARMII::AddrModeT2_i8s4: case ARMII::AddrModeT2_ldrex: @@ -1541,8 +1563,17 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // Addressing modes 4 & 6 (load/store) instructions can't encode an // immediate offset for stack references. return 0; - default: + case ARMII::AddrModeT2_i7: + Limit = std::min(Limit, ((1U << 7) - 1) * 1); + break; + case ARMII::AddrModeT2_i7s2: + Limit = std::min(Limit, ((1U << 7) - 1) * 2); break; + case ARMII::AddrModeT2_i7s4: + Limit = std::min(Limit, ((1U << 7) - 1) * 4); + break; + default: + llvm_unreachable("Unhandled addressing mode in stack size limit calculation"); } break; // At most one FI per instruction } @@ -1623,7 +1654,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); (void)TRI; // Silence unused warning in non-assert builds. - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. Also spill R4 if Thumb2 function has varsized objects, @@ -1784,6 +1815,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, EstimatedStackSize += 16; // For possible paddings. unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit; + bool HasNonSPFrameIndex = false; if (AFI->isThumb1OnlyFunction()) { // For Thumb1, don't bother to iterate over the function. The only // instruction that requires an emergency spill slot is a store to a @@ -1804,7 +1836,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, EstimatedRSStackSizeLimit = (1U << 8) * 4; EstimatedRSFixedSizeLimit = (1U << 5) * 4; } else { - EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); + EstimatedRSStackSizeLimit = + estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex); EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit; } // Final estimate of whether sp or bp-relative accesses might require @@ -1830,12 +1863,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit; bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP || - HasLargeArgumentList; + HasLargeArgumentList || HasNonSPFrameIndex; LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit - << "; EstimatedStack" << EstimatedStackSize - << "; EstimatedFPStack" << MaxFixedOffset - MaxFPOffset - << "; BigFrameOffsets: " << BigFrameOffsets - << "\n"); + << "; EstimatedStack: " << EstimatedStackSize + << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset + << "; BigFrameOffsets: " << BigFrameOffsets << "\n"); if (BigFrameOffsets || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); @@ -2080,9 +2112,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, ExtraCSSpill = true; } } - if (!ExtraCSSpill) { + if (!ExtraCSSpill && RS) { // Reserve a slot closest to SP or frame pointer. - assert(RS && "Register scavenging not provided"); LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n"); const TargetRegisterClass &RC = ARM::GPRRegClass; unsigned Size = TRI->getSpillSize(RC); @@ -2097,6 +2128,12 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setLRIsSpilledForFarJump(true); } AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); + + // If we have the "returned" parameter attribute which guarantees that we + // return the value which was passed in r0 unmodified (e.g. C++ 'structors), + // record that fact for IPRA. + if (AFI->getPreservesR0()) + SavedRegs.set(ARM::R0); } MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 7544ca3c38d6..6d8aee597945 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -63,6 +63,11 @@ public: bool enableShrinkWrapping(const MachineFunction &MF) const override { return true; } + bool isProfitableForNoCSROpt(const Function &F) const override { + // The no-CSR optimisation is bad for code size on ARM, because we can save + // many registers with a single PUSH/POP pair. + return false; + } private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index b349627b67b1..8f6515c423eb 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -139,6 +139,8 @@ public: bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); + template + bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); // Thumb 2 Addressing Modes: bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); @@ -146,9 +148,12 @@ public: SDValue &OffImm); bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm); - template - bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, - SDValue &OffImm); + template + bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); + bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, + unsigned Shift); + template + bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); @@ -179,6 +184,7 @@ private: bool tryARMIndexedLoad(SDNode *N); bool tryT1IndexedLoad(SDNode *N); bool tryT2IndexedLoad(SDNode *N); + bool tryMVEIndexedLoad(SDNode *N); /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for @@ -246,10 +252,6 @@ private: SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, bool is64BitVector); - /// Returns the number of instructions required to materialize the given - /// constant in a register, or 3 if a literal pool load is needed. - unsigned ConstantMaterializationCost(unsigned Val) const; - /// Checks if N is a multiplication by a constant where we can extract out a /// power of two from the constant so that it can be used in a shift, but only /// if it simplifies the materialization of the constant. Returns true if it @@ -450,27 +452,6 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } -unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { - if (Subtarget->isThumb()) { - if (Val <= 255) return 1; // MOV - if (Subtarget->hasV6T2Ops() && - (Val <= 0xffff || // MOV - ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW - ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN - return 1; - if (Val <= 510) return 2; // MOV + ADDi8 - if (~Val <= 255) return 2; // MOV + MVN - if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL - } else { - if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV - if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN - if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW - if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs - } - if (Subtarget->useMovt()) return 2; // MOVW + MOVT - return 3; // Literal pool load -} - bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, unsigned &PowerOfTwo, @@ -500,8 +481,8 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, // Only optimise if the new cost is better unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); - unsigned OldCost = ConstantMaterializationCost(MulConstVal); - unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); + unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); + unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); return NewCost < OldCost; } @@ -1172,6 +1153,28 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, return false; } +template +bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, + SDValue &OffImm) { + if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, + RHSC)) { + Base = N.getOperand(0); + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + OffImm = + CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); + return true; + } + } + + // Base only. + Base = N; + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); + return true; +} + //===----------------------------------------------------------------------===// // Thumb 2 Addressing Modes @@ -1278,35 +1281,59 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, return false; } -template -bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, - SDValue &Base, SDValue &OffImm) { - if (N.getOpcode() == ISD::SUB || - CurDAG->isBaseWithConstantOffset(N)) { - if (auto RHS = dyn_cast(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - if (N.getOpcode() == ISD::SUB) - RHSC = -RHSC; - - if (isShiftedInt<7, Shift>(RHSC)) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex( +template +bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, + SDValue &OffImm) { + if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, + RHSC)) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); - } - OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); - return true; } + + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + OffImm = + CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); + return true; } } // Base only. Base = N; - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); return true; } +template +bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm) { + return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); +} + +bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm, + unsigned Shift) { + unsigned Opcode = Op->getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast(Op)->getAddressingMode() + : cast(Op)->getAddressingMode(); + int RHSC; + if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits. + OffImm = + ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) + ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) + : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), + MVT::i32); + return true; + } + return false; +} + bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm) { @@ -1565,6 +1592,68 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { return false; } +bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { + LoadSDNode *LD = cast(N); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + EVT LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + SDValue Offset; + bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + unsigned Opcode = 0; + unsigned Align = LD->getAlignment(); + bool IsLE = Subtarget->isLittle(); + + if (Align >= 2 && LoadedVT == MVT::v4i16 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; + else + Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; + } else if (LoadedVT == MVT::v8i8 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; + else + Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; + } else if (LoadedVT == MVT::v4i8 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; + else + Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; + } else if (Align >= 4 && + (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2)) + Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; + else if (Align >= 2 && + (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) + Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; + else if ((IsLE || LoadedVT == MVT::v16i8) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) + Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; + else + return false; + + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = {Base, Offset, + CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), + CurDAG->getRegister(0, MVT::i32), Chain}; + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), + MVT::i32, MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceUses(SDValue(N, 0), SDValue(New, 1)); + ReplaceUses(SDValue(N, 1), SDValue(New, 0)); + ReplaceUses(SDValue(N, 2), SDValue(New, 2)); + CurDAG->RemoveDeadNode(N); + return true; +} + /// Form a GPRPair pseudo register from a pair of GPR regs. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { SDLoc dl(V0.getNode()); @@ -2701,7 +2790,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: { unsigned Val = cast(N)->getZExtValue(); // If we can't materialize the constant we need to use a literal pool - if (ConstantMaterializationCost(Val) > 2) { + if (ConstantMaterializationCost(Val, Subtarget) > 2) { SDValue CPIdx = CurDAG->getTargetConstantPool( ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), TLI->getPointerTy(CurDAG->getDataLayout())); @@ -2842,8 +2931,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { bool PreferImmediateEncoding = Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); if (!PreferImmediateEncoding && - ConstantMaterializationCost(Imm) > - ConstantMaterializationCost(~Imm)) { + ConstantMaterializationCost(Imm, Subtarget) > + ConstantMaterializationCost(~Imm, Subtarget)) { // The current immediate costs more to materialize than a negated // immediate, so negate the immediate and use a BIC. SDValue NewImm = @@ -2987,6 +3076,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } case ISD::LOAD: { + if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) + return; if (Subtarget->isThumb() && Subtarget->hasThumb2()) { if (tryT2IndexedLoad(N)) return; @@ -2998,13 +3089,26 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } - case ARMISD::WLS: { - SDValue Ops[] = { N->getOperand(1), // Loop count - N->getOperand(2), // Exit target + case ARMISD::WLS: + case ARMISD::LE: { + SDValue Ops[] = { N->getOperand(1), + N->getOperand(2), + N->getOperand(0) }; + unsigned Opc = N->getOpcode() == ARMISD::WLS ? + ARM::t2WhileLoopStart : ARM::t2LoopEnd; + SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); + ReplaceUses(N, New); + CurDAG->RemoveDeadNode(N); + return; + } + case ARMISD::LOOP_DEC: { + SDValue Ops[] = { N->getOperand(1), + N->getOperand(2), N->getOperand(0) }; - SDNode *LoopStart = - CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops); - ReplaceUses(N, LoopStart); + SDNode *Dec = + CurDAG->getMachineNode(ARM::t2LoopDec, dl, + CurDAG->getVTList(MVT::i32, MVT::Other), Ops); + ReplaceUses(N, Dec); CurDAG->RemoveDeadNode(N); return; } @@ -4365,7 +4469,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to // the original GPRs. - unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); SDValue Chain = SDValue(N,0); @@ -4401,7 +4505,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two // i32 VRs of inline asm with it. - unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 18bb9bf3eccc..db26feb57010 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -245,7 +245,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 }; for (auto VT : IntTypes) { - addRegisterClass(VT, &ARM::QPRRegClass); + addRegisterClass(VT, &ARM::MQPRRegClass); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -258,12 +258,31 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Legal); + setOperationAction(ISD::CTLZ, VT, Legal); + setOperationAction(ISD::CTTZ, VT, Custom); + setOperationAction(ISD::BITREVERSE, VT, Legal); + setOperationAction(ISD::BSWAP, VT, Legal); + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::CTPOP, VT, Expand); + + // Vector reductions + setOperationAction(ISD::VECREDUCE_ADD, VT, Legal); + setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal); + setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal); + setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal); + setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal); if (!HasMVEFP) { setOperationAction(ISD::SINT_TO_FP, VT, Expand); @@ -271,11 +290,18 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); } + + // Pre and Post inc are supported on loads and stores + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + } } const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; for (auto VT : FloatTypes) { - addRegisterClass(VT, &ARM::QPRRegClass); + addRegisterClass(VT, &ARM::MQPRRegClass); if (!HasMVEFP) setAllExpand(VT); @@ -287,6 +313,16 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Legal); + + // Pre and Post inc are supported on loads and stores + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + } if (HasMVEFP) { setOperationAction(ISD::FMINNUM, VT, Legal); @@ -314,7 +350,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { // vector types is inhibited at integer-only level. const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 }; for (auto VT : LongTypes) { - addRegisterClass(VT, &ARM::QPRRegClass); + addRegisterClass(VT, &ARM::MQPRRegClass); setAllExpand(VT); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -334,6 +370,33 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + + // Pre and Post inc on these are legal, given the correct extends + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::v8i8, Legal); + setIndexedStoreAction(im, MVT::v8i8, Legal); + setIndexedLoadAction(im, MVT::v4i8, Legal); + setIndexedStoreAction(im, MVT::v4i8, Legal); + setIndexedLoadAction(im, MVT::v4i16, Legal); + setIndexedStoreAction(im, MVT::v4i16, Legal); + } + + // Predicate types + const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1}; + for (auto VT : pTypes) { + addRegisterClass(VT, &ARM::VCCRRegClass); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + } } ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, @@ -645,8 +708,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); } - for (MVT VT : MVT::vector_valuetypes()) { - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); addAllExtLoads(VT, InnerVT, Expand); } @@ -669,8 +732,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, addMVEVectorTypes(Subtarget->hasMVEFloatOps()); // Combine low-overhead loop intrinsics so that we can lower i1 types. - if (Subtarget->hasLOB()) + if (Subtarget->hasLOB()) { setTargetDAGCombine(ISD::BRCOND); + setTargetDAGCombine(ISD::BR_CC); + } if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); @@ -837,10 +902,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); @@ -849,7 +910,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // It is legal to extload from v4i8 to v4i16 or v4i32. for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, MVT::v2i32}) { - for (MVT VT : MVT::integer_vector_valuetypes()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); @@ -861,6 +922,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); } if (!Subtarget->hasFP64()) { @@ -901,9 +966,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); } - if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){ + if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); + if (Subtarget->hasFullFP16()) + setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); } if (!Subtarget->hasFP16()) @@ -955,6 +1021,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + if (Subtarget->hasDSP()) { + setOperationAction(ISD::SADDSAT, MVT::i8, Custom); + setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); + setOperationAction(ISD::SADDSAT, MVT::i16, Custom); + setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); + } + if (Subtarget->hasBaseDSP()) { + setOperationAction(ISD::SADDSAT, MVT::i32, Legal); + setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); + } // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); @@ -972,6 +1048,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); // MVE lowers 64 bit shifts to lsll and lsrl @@ -991,7 +1068,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); } @@ -1365,14 +1442,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. - setMinStackArgumentAlignment(4); + setMinStackArgumentAlignment(Align(4)); // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); - setPrefLoopAlignment(Subtarget->getPrefLoopAlignment()); + setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); - setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); + setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); if (Subtarget->isThumb() || Subtarget->isThumb2()) setTargetDAGCombine(ISD::ABS); @@ -1472,6 +1549,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::ADDE: return "ARMISD::ADDE"; case ARMISD::SUBC: return "ARMISD::SUBC"; case ARMISD::SUBE: return "ARMISD::SUBE"; + case ARMISD::LSLS: return "ARMISD::LSLS"; case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; @@ -1496,16 +1574,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; - case ARMISD::VCEQ: return "ARMISD::VCEQ"; - case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; - case ARMISD::VCGE: return "ARMISD::VCGE"; - case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; - case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; - case ARMISD::VCGEU: return "ARMISD::VCGEU"; - case ARMISD::VCGT: return "ARMISD::VCGT"; - case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; - case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; - case ARMISD::VCGTU: return "ARMISD::VCGTU"; + case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST"; + case ARMISD::VCMP: return "ARMISD::VCMP"; + case ARMISD::VCMPZ: return "ARMISD::VCMPZ"; case ARMISD::VTST: return "ARMISD::VTST"; case ARMISD::VSHLs: return "ARMISD::VSHLs"; @@ -1543,6 +1614,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTRN: return "ARMISD::VTRN"; case ARMISD::VTBL1: return "ARMISD::VTBL1"; case ARMISD::VTBL2: return "ARMISD::VTBL2"; + case ARMISD::VMOVN: return "ARMISD::VMOVN"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; @@ -1560,6 +1632,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::SMMLAR: return "ARMISD::SMMLAR"; case ARMISD::SMMLSR: return "ARMISD::SMMLSR"; + case ARMISD::QADD16b: return "ARMISD::QADD16b"; + case ARMISD::QSUB16b: return "ARMISD::QSUB16b"; + case ARMISD::QADD8b: return "ARMISD::QADD8b"; + case ARMISD::QSUB8b: return "ARMISD::QSUB8b"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -1589,6 +1665,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; case ARMISD::WLS: return "ARMISD::WLS"; + case ARMISD::LE: return "ARMISD::LE"; + case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC"; + case ARMISD::CSINV: return "ARMISD::CSINV"; + case ARMISD::CSNEG: return "ARMISD::CSNEG"; + case ARMISD::CSINC: return "ARMISD::CSINC"; } return nullptr; } @@ -1597,6 +1678,11 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); + + // MVE has a predicate register. + if (Subtarget->hasMVEIntegerOps() && + (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) + return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } @@ -1726,34 +1812,22 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, - ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) { + ARMCC::CondCodes &CondCode2) { CondCode2 = ARMCC::AL; - InvalidOnQNaN = true; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: - case ISD::SETOEQ: - CondCode = ARMCC::EQ; - InvalidOnQNaN = false; - break; + case ISD::SETOEQ: CondCode = ARMCC::EQ; break; case ISD::SETGT: case ISD::SETOGT: CondCode = ARMCC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; case ISD::SETOLE: CondCode = ARMCC::LS; break; - case ISD::SETONE: - CondCode = ARMCC::MI; - CondCode2 = ARMCC::GT; - InvalidOnQNaN = false; - break; + case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; - case ISD::SETUEQ: - CondCode = ARMCC::EQ; - CondCode2 = ARMCC::VS; - InvalidOnQNaN = false; - break; + case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; case ISD::SETUGT: CondCode = ARMCC::HI; break; case ISD::SETUGE: CondCode = ARMCC::PL; break; case ISD::SETLT: @@ -1761,10 +1835,7 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETLE: case ISD::SETULE: CondCode = ARMCC::LE; break; case ISD::SETNE: - case ISD::SETUNE: - CondCode = ARMCC::NE; - InvalidOnQNaN = false; - break; + case ISD::SETUNE: CondCode = ARMCC::NE; break; } } @@ -1988,6 +2059,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction::CallSiteInfo CSInfo; bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); @@ -2112,6 +2184,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, "unexpected use of 'returned'"); isThisReturn = true; } + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.EnableDebugEntryValues) + CSInfo.emplace_back(VA.getLocReg(), i); RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); @@ -2347,12 +2422,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) { MF.getFrameInfo().setHasTailCall(); - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); + SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); + DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); + return Ret; } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); + DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); @@ -2431,7 +2509,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, int FI = std::numeric_limits::max(); if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VR)) + if (!Register::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) @@ -3047,12 +3125,12 @@ ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, // Load the current TEB (thread environment block) SDValue Ops[] = {Chain, - DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), - DAG.getConstant(15, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(13, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(2, DL, MVT::i32)}; + DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getTargetConstant(15, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(13, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(2, DL, MVT::i32)}; SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); @@ -3498,6 +3576,48 @@ SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, Op.getOperand(0)); } +SDValue ARMTargetLowering::LowerINTRINSIC_VOID( + SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { + unsigned IntNo = + cast( + Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other)) + ->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_gnu_eabi_mcount: { + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc dl(Op); + SDValue Chain = Op.getOperand(0); + // call "\01__gnu_mcount_nc" + const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); + const uint32_t *Mask = + ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + // Mark LR an implicit live-in. + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); + SDValue ReturnAddress = + DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT); + std::vector ResultTys = {MVT::Other, MVT::Glue}; + SDValue Callee = + DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0); + SDValue RegisterMask = DAG.getRegisterMask(Mask); + if (Subtarget->isThumb()) + return SDValue( + DAG.getMachineNode( + ARM::tBL_PUSHLR, dl, ResultTys, + {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT), + DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}), + 0); + return SDValue( + DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys, + {ReturnAddress, Callee, RegisterMask, Chain}), + 0); + } + } +} + SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { @@ -3898,6 +4018,12 @@ SDValue ARMTargetLowering::LowerFormalArguments( // Transform the arguments in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + + // If this value is passed in r0 and has the returned attribute (e.g. + // C++ 'structors), record this fact for later use. + if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) { + AFI->setPreservesR0(); + } } // If this is an 8 or 16-bit value, it is really passed promoted @@ -4049,6 +4175,67 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, std::swap(LHS, RHS); } + // Thumb1 has very limited immediate modes, so turning an "and" into a + // shift can save multiple instructions. + // + // If we have (x & C1), and C1 is an appropriate mask, we can transform it + // into "((x << n) >> n)". But that isn't necessarily profitable on its + // own. If it's the operand to an unsigned comparison with an immediate, + // we can eliminate one of the shifts: we transform + // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)". + // + // We avoid transforming cases which aren't profitable due to encoding + // details: + // + // 1. C2 fits into the immediate field of a cmp, and the transformed version + // would not; in that case, we're essentially trading one immediate load for + // another. + // 2. C1 is 255 or 65535, so we can use uxtb or uxth. + // 3. C2 is zero; we have other code for this special case. + // + // FIXME: Figure out profitability for Thumb2; we usually can't save an + // instruction, since the AND is always one instruction anyway, but we could + // use narrow instructions in some cases. + if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND && + LHS->hasOneUse() && isa(LHS.getOperand(1)) && + LHS.getValueType() == MVT::i32 && isa(RHS) && + !isSignedIntSetCC(CC)) { + unsigned Mask = cast(LHS.getOperand(1))->getZExtValue(); + auto *RHSC = cast(RHS.getNode()); + uint64_t RHSV = RHSC->getZExtValue(); + if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) { + unsigned ShiftBits = countLeadingZeros(Mask); + if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) { + SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32); + LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt); + RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32); + } + } + } + + // The specific comparison "(x< 0x80000000U" can be optimized to a + // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same + // way a cmp would. + // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and + // some tweaks to the heuristics for the previous and->shift transform. + // FIXME: Optimize cases where the LHS isn't a shift. + if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL && + isa(RHS) && + cast(RHS)->getZExtValue() == 0x80000000U && + CC == ISD::SETUGT && isa(LHS.getOperand(1)) && + cast(LHS.getOperand(1))->getZExtValue() < 31) { + unsigned ShiftAmt = + cast(LHS.getOperand(1))->getZExtValue() + 1; + SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, + DAG.getVTList(MVT::i32, MVT::i32), + LHS.getOperand(0), + DAG.getConstant(ShiftAmt, dl, MVT::i32)); + SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, + Shift.getValue(1), SDValue()); + ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32); + return Chain.getValue(1); + } + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); // If the RHS is a constant zero then the V (overflow) flag will never be @@ -4083,15 +4270,13 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const SDLoc &dl, - bool InvalidOnQNaN) const { + SelectionDAG &DAG, const SDLoc &dl) const { assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64); SDValue Cmp; - SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32); if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C); + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); else - Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C); + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } @@ -4108,12 +4293,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { Cmp = Cmp.getOperand(0); Opc = Cmp.getOpcode(); if (Opc == ARMISD::CMPFP) - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), - Cmp.getOperand(1), Cmp.getOperand(2)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); else { assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), - Cmp.getOperand(1)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); } return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } @@ -4276,6 +4459,35 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + EVT VT = Op.getValueType(); + if (!Subtarget->hasDSP()) + return SDValue(); + if (!VT.isSimple()) + return SDValue(); + + unsigned NewOpcode; + bool IsAdd = Op->getOpcode() == ISD::SADDSAT; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::i8: + NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b; + break; + case MVT::i16: + NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b; + break; + } + + SDLoc dl(Op); + SDValue Add = + DAG.getNode(NewOpcode, dl, MVT::i32, + DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), + DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -4656,10 +4868,62 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); + ConstantSDNode *CFVal = dyn_cast(FalseVal); + ConstantSDNode *CTVal = dyn_cast(TrueVal); + + if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && + LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) { + unsigned TVal = CTVal->getZExtValue(); + unsigned FVal = CFVal->getZExtValue(); + unsigned Opcode = 0; + + if (TVal == ~FVal) { + Opcode = ARMISD::CSINV; + } else if (TVal == ~FVal + 1) { + Opcode = ARMISD::CSNEG; + } else if (TVal + 1 == FVal) { + Opcode = ARMISD::CSINC; + } else if (TVal == FVal + 1) { + Opcode = ARMISD::CSINC; + std::swap(TrueVal, FalseVal); + std::swap(TVal, FVal); + CC = ISD::getSetCCInverse(CC, true); + } + + if (Opcode) { + // If one of the constants is cheaper than another, materialise the + // cheaper one and let the csel generate the other. + if (Opcode != ARMISD::CSINC && + HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) { + std::swap(TrueVal, FalseVal); + std::swap(TVal, FVal); + CC = ISD::getSetCCInverse(CC, true); + } + + // Attempt to use ZR checking TVal is 0, possibly inverting the condition + // to get there. CSINC not is invertable like the other two (~(~a) == a, + // -(-a) == a, but (a+1)+1 != a). + if (FVal == 0 && Opcode != ARMISD::CSINC) { + std::swap(TrueVal, FalseVal); + std::swap(TVal, FVal); + CC = ISD::getSetCCInverse(CC, true); + } + if (TVal == 0) + TrueVal = DAG.getRegister(ARM::ZR, MVT::i32); + + // Drops F's value because we can get it by inverting/negating TVal. + FalseVal = TrueVal; + + SDValue ARMcc; + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); + EVT VT = TrueVal.getValueType(); + return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp); + } + } if (isUnsupportedFloatingType(LHS.getValueType())) { DAG.getTargetLoweringInfo().softenSetCCOperands( - DAG, LHS.getValueType(), LHS, RHS, CC, dl); + DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands only returned one value, we should compare it to // zero. @@ -4701,8 +4965,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - bool InvalidOnQNaN; - FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); + FPCCToARMCC(CC, CondCode, CondCode2); // Normalize the fp compare. If RHS is zero we prefer to keep it there so we // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we @@ -4727,13 +4990,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; @@ -4903,7 +5166,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (isUnsupportedFloatingType(LHS.getValueType())) { DAG.getTargetLoweringInfo().softenSetCCOperands( - DAG, LHS.getValueType(), LHS, RHS, CC, dl); + DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands only returned one value, we should compare it to // zero. @@ -4960,11 +5223,10 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - bool InvalidOnQNaN; - FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; @@ -5056,8 +5318,9 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); + MakeLibCallOptions CallOptions; return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), - /*isSigned*/ false, SDLoc(Op)).first; + CallOptions, SDLoc(Op)).first; } return Op; @@ -5120,8 +5383,9 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { else LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + MakeLibCallOptions CallOptions; return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), - /*isSigned*/ false, SDLoc(Op)).first; + CallOptions, SDLoc(Op)).first; } return Op; @@ -5140,7 +5404,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { if (UseNEON) { // Use VBSL to copy the sign bit. - unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); + unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80); SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, DAG.getTargetConstant(EncodedVal, dl, MVT::i32)); EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; @@ -5163,7 +5427,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); - SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), + SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32); AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, @@ -5243,7 +5507,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = ARI.getFrameRegister(MF); + Register FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -5253,9 +5517,9 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { - unsigned Reg = StringSwitch(RegName) +Register ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { + Register Reg = StringSwitch(RegName) .Case("sp", ARM::SP) .Default(0); if (Reg) @@ -5576,8 +5840,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDLoc dl(N); EVT VT = N->getValueType(0); - if (VT.isVector()) { - assert(ST->hasNEON()); + if (VT.isVector() && ST->hasNEON()) { // Compute the least significant set bit: LSB = X & -X SDValue X = N->getOperand(0); @@ -5777,14 +6040,15 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, unsigned ShPartsOpc = ARMISD::LSLL; ConstantSDNode *Con = dyn_cast(ShAmt); - // If the shift amount is greater than 32 then do the default optimisation - if (Con && Con->getZExtValue() > 32) + // If the shift amount is greater than 32 or has a greater bitwidth than 64 + // then do the default optimisation + if (ShAmt->getValueType(0).getSizeInBits() > 64 || + (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32))) return SDValue(); - // Extract the lower 32 bits of the shift amount if it's an i64 - if (ShAmt->getValueType(0) == MVT::i64) - ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt, - DAG.getConstant(0, dl, MVT::i32)); + // Extract the lower 32 bits of the shift amount if it's not an i32 + if (ShAmt->getValueType(0) != MVT::i32) + ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32); if (ShOpc == ISD::SRL) { if (!Con) @@ -5839,20 +6103,37 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } -static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { - SDValue TmpOp0, TmpOp1; +static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { bool Invert = false; bool Swap = false; - unsigned Opc = 0; + unsigned Opc = ARMCC::AL; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); + EVT CmpVT; + if (ST->hasNEON()) + CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); + else { + assert(ST->hasMVEIntegerOps() && + "No hardware support for integer vector comparison!"); + + if (Op.getValueType().getVectorElementType() != MVT::i1) + return SDValue(); + + // Make sure we expand floating point setcc to scalar if we do not have + // mve.fp, so that we can handle them from there. + if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps()) + return SDValue(); + + CmpVT = VT; + } + if (Op0.getValueType().getVectorElementType() == MVT::i64 && (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { // Special-case integer 64-bit equality comparisons. They aren't legal, @@ -5880,60 +6161,74 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: - case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; + case ISD::SETNE: + if (ST->hasMVEFloatOps()) { + Opc = ARMCC::NE; break; + } else { + Invert = true; LLVM_FALLTHROUGH; + } case ISD::SETOEQ: - case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETEQ: Opc = ARMCC::EQ; break; case ISD::SETOLT: case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGT: - case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETGT: Opc = ARMCC::GT; break; case ISD::SETOLE: case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGE: - case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETGE: Opc = ARMCC::GE; break; case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; + case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break; case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; + case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break; case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETONE: + case ISD::SETONE: { // Expand this to (OLT | OGT). - TmpOp0 = Op0; - TmpOp1 = Op1; - Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); - break; - case ISD::SETUO: - Invert = true; - LLVM_FALLTHROUGH; - case ISD::SETO: + SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1); + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; + } + case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH; + case ISD::SETO: { // Expand this to (OLT | OGE). - TmpOp0 = Op0; - TmpOp1 = Op1; - Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); - break; + SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(ARMCC::GE, dl, MVT::i32)); + SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1); + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; + } } } else { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); - case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETNE: + if (ST->hasMVEIntegerOps()) { + Opc = ARMCC::NE; break; + } else { + Invert = true; LLVM_FALLTHROUGH; + } + case ISD::SETEQ: Opc = ARMCC::EQ; break; case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETGT: Opc = ARMCC::GT; break; case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETGE: Opc = ARMCC::GE; break; case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGT: Opc = ARMISD::VCGTU; break; + case ISD::SETUGT: Opc = ARMCC::HI; break; case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGE: Opc = ARMISD::VCGEU; break; + case ISD::SETUGE: Opc = ARMCC::HS; break; } // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). - if (Opc == ARMISD::VCEQ) { + if (ST->hasNEON() && Opc == ARMCC::EQ) { SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; @@ -5945,10 +6240,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { AndOp = AndOp.getOperand(0); if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { - Opc = ARMISD::VTST; Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); - Invert = !Invert; + SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1); + if (!Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; } } } @@ -5962,31 +6259,20 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (ISD::isBuildVectorAllZeros(Op1.getNode())) SingleOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { - if (Opc == ARMISD::VCGE) - Opc = ARMISD::VCLEZ; - else if (Opc == ARMISD::VCGT) - Opc = ARMISD::VCLTZ; + if (Opc == ARMCC::GE) + Opc = ARMCC::LE; + else if (Opc == ARMCC::GT) + Opc = ARMCC::LT; SingleOp = Op1; } SDValue Result; if (SingleOp.getNode()) { - switch (Opc) { - case ARMISD::VCEQ: - Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCGE: - Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCLEZ: - Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCGT: - Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCLTZ: - Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; - default: - Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); - } + Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp, + DAG.getConstant(Opc, dl, MVT::i32)); } else { - Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); + Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(Opc, dl, MVT::i32)); } Result = DAG.getSExtOrTrunc(Result, dl, VT); @@ -6027,13 +6313,13 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) { CCR, Chain.getValue(1)); } -/// isNEONModifiedImm - Check if the specified splat value corresponds to a -/// valid vector constant for a NEON or MVE instruction with a "modified immediate" -/// operand (e.g., VMOV). If so, return the encoded value. -static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, +/// isVMOVModifiedImm - Check if the specified splat value corresponds to a +/// valid vector constant for a NEON or MVE instruction with a "modified +/// immediate" operand (e.g., VMOV). If so, return the encoded value. +static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, - NEONModImmType type) { + VMOVModImmType type) { unsigned OpCmode, Imm; // SplatBitSize is set to the smallest size that splats the vector, so a @@ -6163,10 +6449,10 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, } default: - llvm_unreachable("unexpected size for isNEONModifiedImm"); + llvm_unreachable("unexpected size for isVMOVModifiedImm"); } - unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); + unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, dl, MVT::i32); } @@ -6246,7 +6532,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, return SDValue(); // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). - SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), + SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); @@ -6263,7 +6549,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, } // Finally, try a VMVN.i32 - NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, + NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); @@ -6649,6 +6935,29 @@ static bool isReverseMask(ArrayRef M, EVT VT) { return true; } +static bool isVMOVNMask(ArrayRef M, EVT VT, bool Top) { + unsigned NumElts = VT.getVectorNumElements(); + // Make sure the mask has the right size. + if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8)) + return false; + + // If Top + // Look for <0, N, 2, N+2, 4, N+4, ..>. + // This inserts Input2 into Input1 + // else if not Top + // Look for <0, N+1, 2, N+3, 4, N+5, ..> + // This inserts Input1 into Input2 + unsigned Offset = Top ? 0 : 1; + for (unsigned i = 0; i < NumElts; i+=2) { + if (M[i] >= 0 && M[i] != (int)i) + return false; + if (M[i+1] >= 0 && M[i+1] != (int)(NumElts + i + Offset)) + return false; + } + + return true; +} + // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. @@ -6669,6 +6978,66 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, return SDValue(); } +static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!"); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned BoolMask; + unsigned BitsPerBool; + if (NumElts == 4) { + BitsPerBool = 4; + BoolMask = 0xf; + } else if (NumElts == 8) { + BitsPerBool = 2; + BoolMask = 0x3; + } else if (NumElts == 16) { + BitsPerBool = 1; + BoolMask = 0x1; + } else + return SDValue(); + + // If this is a single value copied into all lanes (a splat), we can just sign + // extend that single value + SDValue FirstOp = Op.getOperand(0); + if (!isa(FirstOp) && + std::all_of(std::next(Op->op_begin()), Op->op_end(), + [&FirstOp](SDUse &U) { + return U.get().isUndef() || U.get() == FirstOp; + })) { + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp, + DAG.getValueType(MVT::i1)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext); + } + + // First create base with bits set where known + unsigned Bits32 = 0; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (!isa(V) && !V.isUndef()) + continue; + bool BitSet = V.isUndef() ? false : cast(V)->getZExtValue(); + if (BitSet) + Bits32 |= BoolMask << (i * BitsPerBool); + } + + // Add in unknown nodes + SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, + DAG.getConstant(Bits32, dl, MVT::i32)); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (isa(V) || V.isUndef()) + continue; + Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V, + DAG.getConstant(i, dl, MVT::i32)); + } + + return Base; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, @@ -6677,6 +7046,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); EVT VT = Op.getValueType(); + if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1) + return LowerBUILD_VECTOR_i1(Op, DAG, ST); + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -6688,7 +7060,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) { // Check if an immediate VMOV works. EVT VmovVT; - SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMOVModImm); @@ -6700,7 +7072,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Try an immediate VMVN. uint64_t NegatedImm = (~SplatBits).getZExtValue(); - Val = isNEONModifiedImm( + Val = isVMOVModifiedImm( NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm); @@ -7088,9 +7460,6 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, LaneMask[j] = ExtractBase + j; } - // Final check before we try to produce nonsense... - if (!isShuffleMaskLegal(Mask, ShuffleVT)) - return SDValue(); // We can't handle more than two sources. This should have already // been checked before this point. @@ -7100,8 +7469,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, for (unsigned i = 0; i < Sources.size(); ++i) ShuffleOps[i] = Sources[i].ShuffleVec; - SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], - ShuffleOps[1], Mask); + SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0], + ShuffleOps[1], Mask, DAG); + if (!Shuffle) + return SDValue(); return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } @@ -7168,6 +7539,7 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize >= 32 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + ShuffleVectorInst::isIdentityMask(M) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16)) @@ -7180,6 +7552,9 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)) return true; + else if (Subtarget->hasMVEIntegerOps() && + (isVMOVNMask(M, VT, 0) || isVMOVNMask(M, VT, 1))) + return true; else return false; } @@ -7282,6 +7657,94 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, DAG.getConstant(ExtractNum, DL, MVT::i32)); } +static EVT getVectorTyFromPredicateVector(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::v4i1: + return MVT::v4i32; + case MVT::v8i1: + return MVT::v8i16; + case MVT::v16i1: + return MVT::v16i8; + default: + llvm_unreachable("Unexpected vector predicate type"); + } +} + +static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, + SelectionDAG &DAG) { + // Converting from boolean predicates to integers involves creating a vector + // of all ones or all zeroes and selecting the lanes based upon the real + // predicate. + SDValue AllOnes = + DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32); + AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes); + + SDValue AllZeroes = + DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32); + AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes); + + // Get full vector type from predicate type + EVT NewVT = getVectorTyFromPredicateVector(VT); + + SDValue RecastV1; + // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast + // this to a v16i1. This cannot be done with an ordinary bitcast because the + // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node, + // since we know in hardware the sizes are really the same. + if (VT != MVT::v16i1) + RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred); + else + RecastV1 = Pred; + + // Select either all ones or zeroes depending upon the real predicate bits. + SDValue PredAsVector = + DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes); + + // Recast our new predicate-as-integer v16i8 vector into something + // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate. + return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector); +} + +static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VT = Op.getValueType(); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + ArrayRef ShuffleMask = SVN->getMask(); + + assert(ST->hasMVEIntegerOps() && + "No support for vector shuffle of boolean predicates"); + + SDValue V1 = Op.getOperand(0); + SDLoc dl(Op); + if (isReverseMask(ShuffleMask, VT)) { + SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1); + SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast); + SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit, + DAG.getConstant(16, dl, MVT::i32)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl); + } + + // Until we can come up with optimised cases for every single vector + // shuffle in existence we have chosen the least painful strategy. This is + // to essentially promote the boolean predicate to a 8-bit integer, where + // each predicate represents a byte. Then we fall back on a normal integer + // vector shuffle and convert the result back into a predicate vector. In + // many cases the generated code might be even better than scalar code + // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit + // fields in a register into 8 other arbitrary 2-bit fields! + SDValue PredAsVector = PromoteMVEPredVector(dl, V1, VT, DAG); + EVT NewVT = PredAsVector.getValueType(); + + // Do the shuffle! + SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector, + DAG.getUNDEF(NewVT), ShuffleMask); + + // Now return the result of comparing the shuffled vector with zero, + // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. + return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) { SDValue V1 = Op.getOperand(0); @@ -7289,6 +7752,10 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); + unsigned EltSize = VT.getScalarSizeInBits(); + + if (ST->hasMVEIntegerOps() && EltSize == 1) + return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST); // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again @@ -7298,7 +7765,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // of the same time so that they get CSEd properly. ArrayRef ShuffleMask = SVN->getMask(); - unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize <= 32) { if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); @@ -7364,6 +7830,14 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, .getValue(WhichResult); } } + if (ST->hasMVEIntegerOps()) { + if (isVMOVNMask(ShuffleMask, VT, 0)) + return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1, + DAG.getConstant(0, dl, MVT::i32)); + if (isVMOVNMask(ShuffleMask, VT, 1)) + return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2, + DAG.getConstant(1, dl, MVT::i32)); + } // Also check for these shuffles through CONCAT_VECTORS: we canonicalize // shuffles that produce a result larger than their operands with: @@ -7468,8 +7942,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, return SDValue(); } -SDValue ARMTargetLowering:: -LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VecVT = Op.getOperand(0).getValueType(); + SDLoc dl(Op); + + assert(ST->hasMVEIntegerOps() && + "LowerINSERT_VECTOR_ELT_i1 called without MVE!"); + + SDValue Conv = + DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0)); + unsigned Lane = cast(Op.getOperand(2))->getZExtValue(); + unsigned LaneWidth = + getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8; + unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth; + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, + Op.getOperand(1), DAG.getValueType(MVT::i1)); + SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext, + DAG.getConstant(~Mask, dl, MVT::i32)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI); +} + +SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { // INSERT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(2); if (!isa(Lane)) @@ -7477,6 +7972,11 @@ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue Elt = Op.getOperand(1); EVT EltVT = Elt.getValueType(); + + if (Subtarget->hasMVEIntegerOps() && + Op.getValueType().getScalarSizeInBits() == 1) + return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget); + if (getTypeAction(*DAG.getContext(), EltVT) == TargetLowering::TypePromoteFloat) { // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32, @@ -7505,13 +8005,37 @@ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { return Op; } -static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VecVT = Op.getOperand(0).getValueType(); + SDLoc dl(Op); + + assert(ST->hasMVEIntegerOps() && + "LowerINSERT_VECTOR_ELT_i1 called without MVE!"); + + SDValue Conv = + DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0)); + unsigned Lane = cast(Op.getOperand(1))->getZExtValue(); + unsigned LaneWidth = + getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8; + SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv, + DAG.getConstant(Lane * LaneWidth, dl, MVT::i32)); + return Shift; +} + +static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); if (!isa(Lane)) return SDValue(); SDValue Vec = Op.getOperand(0); + EVT VT = Vec.getValueType(); + + if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1) + return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST); + if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) { SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); @@ -7520,7 +8044,64 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { return Op; } -static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT Op1VT = V1.getValueType(); + EVT Op2VT = V2.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + assert(Op1VT == Op2VT && "Operand types don't match!"); + assert(VT.getScalarSizeInBits() == 1 && + "Unexpected custom CONCAT_VECTORS lowering"); + assert(ST->hasMVEIntegerOps() && + "CONCAT_VECTORS lowering only supported for MVE"); + + SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG); + SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG); + + // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets + // promoted to v8i16, etc. + + MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT(); + + // Extract the vector elements from Op1 and Op2 one by one and truncate them + // to be the right size for the destination. For example, if Op1 is v4i1 then + // the promoted vector is v4i32. The result of concatentation gives a v8i1, + // which when promoted is v8i16. That means each i32 element from Op1 needs + // truncating to i16 and inserting in the result. + EVT ConcatVT = MVT::getVectorVT(ElType, NumElts); + SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT); + auto ExractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) { + EVT NewVT = NewV.getValueType(); + EVT ConcatVT = ConVec.getValueType(); + for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV, + DAG.getIntPtrConstant(i, dl)); + ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt, + DAG.getConstant(j, dl, MVT::i32)); + } + return ConVec; + }; + unsigned j = 0; + ConVec = ExractInto(NewV1, ConVec, j); + ConVec = ExractInto(NewV2, ConVec, j); + + // Now return the result of comparing the subvector with zero, + // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. + return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); +} + +static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VT = Op->getValueType(0); + if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1) + return LowerCONCAT_VECTORS_i1(Op, DAG, ST); + // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && @@ -7540,6 +8121,43 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); } +static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT Op1VT = V1.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned Index = cast(V2)->getZExtValue(); + + assert(VT.getScalarSizeInBits() == 1 && + "Unexpected custom EXTRACT_SUBVECTOR lowering"); + assert(ST->hasMVEIntegerOps() && + "EXTRACT_SUBVECTOR lowering only supported for MVE"); + + SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG); + + // We now have Op1 promoted to a vector of integers, where v8i1 gets + // promoted to v8i16, etc. + + MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT(); + + EVT SubVT = MVT::getVectorVT(ElType, NumElts); + SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT); + for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1, + DAG.getIntPtrConstant(i, dl)); + SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt, + DAG.getConstant(j, dl, MVT::i32)); + } + + // Now return the result of comparing the subvector with zero, + // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. + return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); +} + /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each /// element has been zero/sign-extended, depending on the isSigned parameter, /// from an integer type half its size. @@ -7897,7 +8515,8 @@ static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, return N0; } -static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"); @@ -7924,7 +8543,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); - N0 = LowerCONCAT_VECTORS(N0, DAG); + N0 = LowerCONCAT_VECTORS(N0, DAG, ST); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); return N0; @@ -7932,7 +8551,8 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { return LowerSDIV_v4i16(N0, N1, dl, DAG); } -static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { // TODO: Should this propagate fast-math-flags? EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && @@ -7960,7 +8580,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); - N0 = LowerCONCAT_VECTORS(N0, DAG); + N0 = LowerCONCAT_VECTORS(N0, DAG, ST); N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl, @@ -8255,6 +8875,96 @@ void ARMTargetLowering::ExpandDIV_Windows( Results.push_back(Upper); } +static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) { + LoadSDNode *LD = cast(Op.getNode()); + EVT MemVT = LD->getMemoryVT(); + assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && + "Expected a predicate type!"); + assert(MemVT == Op.getValueType()); + assert(LD->getExtensionType() == ISD::NON_EXTLOAD && + "Expected a non-extending load"); + assert(LD->isUnindexed() && "Expected a unindexed load"); + + // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit + // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We + // need to make sure that 8/4 bits are actually loaded into the correct + // place, which means loading the value and then shuffling the values into + // the bottom bits of the predicate. + // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect + // for BE). + + SDLoc dl(Op); + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(), + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()), + LD->getMemOperand()); + SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Load); + if (MemVT != MVT::v16i1) + Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred, + DAG.getConstant(0, dl, MVT::i32)); + return DAG.getMergeValues({Pred, Load.getValue(1)}, dl); +} + +static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { + StoreSDNode *ST = cast(Op.getNode()); + EVT MemVT = ST->getMemoryVT(); + assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && + "Expected a predicate type!"); + assert(MemVT == ST->getValue().getValueType()); + assert(!ST->isTruncatingStore() && "Expected a non-extending store"); + assert(ST->isUnindexed() && "Expected a unindexed store"); + + // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits + // unset and a scalar store. + SDLoc dl(Op); + SDValue Build = ST->getValue(); + if (MemVT != MVT::v16i1) { + SmallVector Ops; + for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build, + DAG.getConstant(I, dl, MVT::i32))); + for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++) + Ops.push_back(DAG.getUNDEF(MVT::i32)); + Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops); + } + SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build); + return DAG.getTruncStore( + ST->getChain(), dl, GRP, ST->getBasePtr(), + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()), + ST->getMemOperand()); +} + +static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { + MaskedLoadSDNode *N = cast(Op.getNode()); + MVT VT = Op.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDValue PassThru = N->getPassThru(); + SDLoc dl(Op); + + auto IsZero = [](SDValue PassThru) { + return (ISD::isBuildVectorAllZeros(PassThru.getNode()) || + (PassThru->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(PassThru->getOperand(0)))); + }; + + if (IsZero(PassThru)) + return Op; + + // MVE Masked loads use zero as the passthru value. Here we convert undef to + // zero too, and other values are lowered to a select. + SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(0, dl, MVT::i32)); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(), + N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); + SDValue Combo = NewLoad; + if (!PassThru.isUndef() && + (PassThru.getOpcode() != ISD::BITCAST || + !IsZero(PassThru->getOperand(0)))) + Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); + return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast(Op)->getOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or @@ -8273,12 +8983,12 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, // Under Power Management extensions, the cycle-count is: // mrc p15, #0, , c9, c13, #0 SDValue Ops[] = { N->getOperand(0), // Chain - DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), - DAG.getConstant(15, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(9, DL, MVT::i32), - DAG.getConstant(13, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32) + DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getTargetConstant(15, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(9, DL, MVT::i32), + DAG.getTargetConstant(13, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32) }; SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, @@ -8412,6 +9122,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); + case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget); @@ -8426,24 +9137,25 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); - case ISD::SETCC: return LowerVSETCC(Op, DAG); + case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget); case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ true); - return LowerSDIV(Op, DAG); + return LowerSDIV(Op, DAG, Subtarget); case ISD::UDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ false); - return LowerUDIV(Op, DAG); + return LowerUDIV(Op, DAG, Subtarget); case ISD::ADDCARRY: case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::SADDO: @@ -8452,6 +9164,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UADDO: case ISD::USUBO: return LowerUnsignedALUO(Op, DAG); + case ISD::SADDSAT: + case ISD::SSUBSAT: + return LowerSADDSUBSAT(Op, DAG, Subtarget); + case ISD::LOAD: + return LowerPredicateLoad(Op, DAG); + case ISD::STORE: + return LowerPredicateStore(Op, DAG); + case ISD::MLOAD: + return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -8530,6 +9251,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res.getValue(0)); Results.push_back(Res.getValue(1)); return; + case ISD::SADDSAT: + case ISD::SSUBSAT: + Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget); + break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; @@ -8600,19 +9325,19 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // orr r5, r5, #1 // add r5, pc // str r5, [$jbuf, #+4] ; &jbuf[1] - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); // Set the low bit because of thumb mode. - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + Register NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(0x01) .add(predOps(ARMCC::AL)) .add(condCodeOp()); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) .addReg(NewVReg2, RegState::Kill) .addImm(PCLabelId); @@ -8630,28 +9355,28 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // orrs r1, r2 // add r2, $jbuf, #+4 ; &jbuf[1] // str r1, [r2] - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + Register NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId); // Set the low bit because of thumb mode. - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) .addReg(ARM::CPSR, RegState::Define) .addImm(1) .add(predOps(ARMCC::AL)); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + Register NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill) .add(predOps(ARMCC::AL)); - unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + Register NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) .addFrameIndex(FI) .addImm(36); // &jbuf[1] :: pc @@ -8666,13 +9391,13 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // ldr r1, LCPI1_1 // add r1, pc, r1 // str r1, [$jbuf, #+4] ; &jbuf[1] - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) .addConstantPoolIndex(CPI) .addImm(0) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + Register NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId) @@ -8794,7 +9519,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, bool IsPositionIndependent = isPositionIndependent(); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) @@ -8807,7 +9532,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(LPadList.size()) .add(predOps(ARMCC::AL)); } else { - unsigned VReg1 = MRI->createVirtualRegister(TRC); + Register VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) .addImm(NumLPads & 0xFFFF) .add(predOps(ARMCC::AL)); @@ -8832,12 +9557,12 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(ARMCC::HI) .addReg(ARM::CPSR); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + Register NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg1) @@ -8850,7 +9575,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(NewVReg1) .addJumpTableIndex(MJTI); } else if (Subtarget->isThumb()) { - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) .addFrameIndex(FI) .addImm(1) @@ -8873,7 +9598,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - unsigned VReg1 = MRI->createVirtualRegister(TRC); + Register VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx) @@ -8889,19 +9614,19 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(ARMCC::HI) .addReg(ARM::CPSR); - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + Register NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg1) .addImm(2) .add(predOps(ARMCC::AL)); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + Register NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) @@ -8911,7 +9636,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); - unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + Register NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) .addReg(NewVReg4, RegState::Kill) .addImm(0) @@ -8932,7 +9657,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(NewVReg6, RegState::Kill) .addJumpTableIndex(MJTI); } else { - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) @@ -8945,7 +9670,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(NumLPads) .add(predOps(ARMCC::AL)); } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { - unsigned VReg1 = MRI->createVirtualRegister(TRC); + Register VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) .addImm(NumLPads & 0xFFFF) .add(predOps(ARMCC::AL)); @@ -8974,7 +9699,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - unsigned VReg1 = MRI->createVirtualRegister(TRC); + Register VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx) @@ -8991,20 +9716,20 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(ARMCC::HI) .addReg(ARM::CPSR); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)) .add(predOps(ARMCC::AL)) .add(condCodeOp()); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + Register NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); - unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + Register NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg4) @@ -9239,8 +9964,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI.getOperand(0).getReg(); - unsigned src = MI.getOperand(1).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register src = MI.getOperand(1).getReg(); unsigned SizeVal = MI.getOperand(2).getImm(); unsigned Align = MI.getOperand(3).getImm(); DebugLoc dl = MI.getDebugLoc(); @@ -9291,9 +10016,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + Register srcOut = MRI.createVirtualRegister(TRC); + Register destOut = MRI.createVirtualRegister(TRC); + Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, @@ -9306,9 +10031,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // [scratch, srcOut] = LDRB_POST(srcIn, 1) // [destOut] = STRB_POST(scratch, destIn, 1) for (unsigned i = 0; i < BytesLeft; i++) { - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - unsigned scratch = MRI.createVirtualRegister(TRC); + Register srcOut = MRI.createVirtualRegister(TRC); + Register destOut = MRI.createVirtualRegister(TRC); + Register scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, @@ -9351,7 +10076,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, exitMBB->transferSuccessorsAndUpdatePHIs(BB); // Load an immediate to varEnd. - unsigned varEnd = MRI.createVirtualRegister(TRC); + Register varEnd = MRI.createVirtualRegister(TRC); if (Subtarget->useMovt()) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) @@ -9401,12 +10126,12 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // destPhi = PHI(destLoop, dst) MachineBasicBlock *entryBB = BB; BB = loopMBB; - unsigned varLoop = MRI.createVirtualRegister(TRC); - unsigned varPhi = MRI.createVirtualRegister(TRC); - unsigned srcLoop = MRI.createVirtualRegister(TRC); - unsigned srcPhi = MRI.createVirtualRegister(TRC); - unsigned destLoop = MRI.createVirtualRegister(TRC); - unsigned destPhi = MRI.createVirtualRegister(TRC); + Register varLoop = MRI.createVirtualRegister(TRC); + Register varPhi = MRI.createVirtualRegister(TRC); + Register srcLoop = MRI.createVirtualRegister(TRC); + Register srcPhi = MRI.createVirtualRegister(TRC); + Register destLoop = MRI.createVirtualRegister(TRC); + Register destPhi = MRI.createVirtualRegister(TRC); BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) .addReg(varLoop).addMBB(loopMBB) @@ -9420,7 +10145,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, IsThumb1, IsThumb2); emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, @@ -9461,9 +10186,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, unsigned srcIn = srcLoop; unsigned destIn = destLoop; for (unsigned i = 0; i < BytesLeft; i++) { - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - unsigned scratch = MRI.createVirtualRegister(TRC); + Register srcOut = MRI.createVirtualRegister(TRC); + Register destOut = MRI.createVirtualRegister(TRC); + Register scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, @@ -9523,7 +10248,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, break; case CodeModel::Large: { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) .addExternalSymbol("__chkstk"); @@ -9771,8 +10496,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // equality. bool RHSisZero = MI.getOpcode() == ARM::BCCZi64; - unsigned LHS1 = MI.getOperand(1).getReg(); - unsigned LHS2 = MI.getOperand(2).getReg(); + Register LHS1 = MI.getOperand(1).getReg(); + Register LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS1) @@ -9782,8 +10507,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { - unsigned RHS1 = MI.getOperand(3).getReg(); - unsigned RHS2 = MI.getOperand(4).getReg(); + Register RHS1 = MI.getOperand(3).getReg(); + Register RHS2 = MI.getOperand(4).getReg(); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS1) .addReg(RHS1) @@ -9844,15 +10569,15 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Fn->insert(BBI, RSBBB); Fn->insert(BBI, SinkBB); - unsigned int ABSSrcReg = MI.getOperand(1).getReg(); - unsigned int ABSDstReg = MI.getOperand(0).getReg(); + Register ABSSrcReg = MI.getOperand(1).getReg(); + Register ABSDstReg = MI.getOperand(0).getReg(); bool ABSSrcKIll = MI.getOperand(1).isKill(); bool isThumb2 = Subtarget->isThumb2(); MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class - unsigned NewRsbDstReg = - MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); + Register NewRsbDstReg = MRI.createVirtualRegister( + isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, @@ -9931,7 +10656,7 @@ static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, // The MEMCPY both defines and kills the scratch registers. for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) { - unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass + Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } @@ -10369,10 +11094,7 @@ static SDValue findMUL_LOHI(SDValue V) { static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb()) { - if (!Subtarget->hasDSP()) - return SDValue(); - } else if (!Subtarget->hasV5TEOps()) + if (!Subtarget->hasBaseDSP()) return SDValue(); // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and @@ -11253,7 +11975,7 @@ static SDValue PerformANDCombine(SDNode *N, BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; - SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), + SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VbicVT, VT.is128BitVector(), OtherModImm); @@ -11469,6 +12191,77 @@ static SDValue PerformORCombineToBFI(SDNode *N, return SDValue(); } +static bool isValidMVECond(unsigned CC, bool IsFloat) { + switch (CC) { + case ARMCC::EQ: + case ARMCC::NE: + case ARMCC::LE: + case ARMCC::GT: + case ARMCC::GE: + case ARMCC::LT: + return true; + case ARMCC::HS: + case ARMCC::HI: + return !IsFloat; + default: + return false; + }; +} + +static SDValue PerformORCombine_i1(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain + // together with predicates + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + ARMCC::CondCodes CondCode0 = ARMCC::AL; + ARMCC::CondCodes CondCode1 = ARMCC::AL; + if (N0->getOpcode() == ARMISD::VCMP) + CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(2)) + ->getZExtValue(); + else if (N0->getOpcode() == ARMISD::VCMPZ) + CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(1)) + ->getZExtValue(); + if (N1->getOpcode() == ARMISD::VCMP) + CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(2)) + ->getZExtValue(); + else if (N1->getOpcode() == ARMISD::VCMPZ) + CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(1)) + ->getZExtValue(); + + if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL) + return SDValue(); + + unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0); + unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1); + + if (!isValidMVECond(Opposite0, + N0->getOperand(0)->getValueType(0).isFloatingPoint()) || + !isValidMVECond(Opposite1, + N1->getOperand(0)->getValueType(0).isFloatingPoint())) + return SDValue(); + + SmallVector Ops0; + Ops0.push_back(N0->getOperand(0)); + if (N0->getOpcode() == ARMISD::VCMP) + Ops0.push_back(N0->getOperand(1)); + Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32)); + SmallVector Ops1; + Ops1.push_back(N1->getOperand(0)); + if (N1->getOpcode() == ARMISD::VCMP) + Ops1.push_back(N1->getOperand(1)); + Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32)); + + SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0); + SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1); + SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1); + return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And, + DCI.DAG.getAllOnesConstant(SDLoc(N), VT)); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -11489,7 +12282,7 @@ static SDValue PerformORCombine(SDNode *N, BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; - SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VorrVT, VT.is128BitVector(), OtherModImm); @@ -11553,6 +12346,10 @@ static SDValue PerformORCombine(SDNode *N, } } + if (Subtarget->hasMVEIntegerOps() && + (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)) + return PerformORCombine_i1(N, DCI, Subtarget); + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { @@ -11921,6 +12718,24 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return Vec; } +static SDValue +PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + SDLoc dl(N); + + // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x) + if (Op->getOpcode() == ARMISD::PREDICATE_CAST) { + // If the valuetypes are the same, we can remove the cast entirely. + if (Op->getOperand(0).getValueType() == VT) + return Op->getOperand(0); + return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, + Op->getOperand(0).getValueType(), Op->getOperand(0)); + } + + return SDValue(); +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -12332,7 +13147,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, // The canonical VMOV for a zero vector uses a 32-bit element size. unsigned Imm = cast(Op.getOperand(0))->getZExtValue(); unsigned EltBits; - if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) + if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0) EltSize = 8; EVT VT = N->getValueType(0); if (EltSize > VT.getScalarSizeInBits()) @@ -12382,95 +13197,163 @@ static SDValue PerformLOADCombine(SDNode *N, return SDValue(); } -/// PerformSTORECombine - Target-specific dag combine xforms for -/// ISD::STORE. -static SDValue PerformSTORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - StoreSDNode *St = cast(N); - if (St->isVolatile()) - return SDValue(); - - // Optimize trunc store (of multiple scalars) to shuffle and store. First, - // pack all of the elements in one place. Next, store to memory in fewer - // chunks. +// Optimize trunc store (of multiple scalars) to shuffle and store. First, +// pack all of the elements in one place. Next, store to memory in fewer +// chunks. +static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, + SelectionDAG &DAG) { SDValue StVal = St->getValue(); EVT VT = StVal.getValueType(); - if (St->isTruncatingStore() && VT.isVector()) { - SelectionDAG &DAG = DCI.DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT StVT = St->getMemoryVT(); - unsigned NumElems = VT.getVectorNumElements(); - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromEltSz = VT.getScalarSizeInBits(); - unsigned ToEltSz = StVT.getScalarSizeInBits(); + if (!St->isTruncatingStore() || !VT.isVector()) + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getScalarSizeInBits(); + unsigned ToEltSz = StVT.getScalarSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) + return SDValue(); - // From, To sizes and ElemCount must be pow of two - if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) + return SDValue(); - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); - unsigned SizeRatio = FromEltSz / ToEltSz; - assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems * SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), - NumElems*SizeRatio); - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + SDLoc DL(St); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1 + : i * SizeRatio; - SDLoc DL(St); - SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) - ShuffleVec[i] = DAG.getDataLayout().isBigEndian() - ? (i + 1) * SizeRatio - 1 - : i * SizeRatio; - - // Can't shuffle using an illegal type. - if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, - DAG.getUNDEF(WideVec.getValueType()), - ShuffleVec); - // At this point all of the data is stored at the bottom of the - // register. We now need to save it to mem. - - // Find the largest store unit - MVT StoreType = MVT::i8; - for (MVT Tp : MVT::integer_valuetypes()) { - if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) - StoreType = Tp; - } - // Didn't find a legal store type. - if (!TLI.isTypeLegal(StoreType)) - return SDValue(); + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) + return SDValue(); - // Bitcast the original vector into a vector of store-size units - EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); - assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); - SmallVector Chains; - SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, - TLI.getPointerTy(DAG.getDataLayout())); - SDValue BasePtr = St->getBasePtr(); + SDValue Shuff = DAG.getVectorShuffle( + WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. - // Perform one or more big stores into memory. - unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); - for (unsigned I = 0; I < E; I++) { - SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - StoreType, ShuffWide, - DAG.getIntPtrConstant(I, DL)); - SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, - St->getPointerInfo(), St->getAlignment(), - St->getMemOperand()->getFlags()); - BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, - Increment); - Chains.push_back(Ch); - } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + // Find the largest store unit + MVT StoreType = MVT::i8; + for (MVT Tp : MVT::integer_valuetypes()) { + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = + EVT::getVectorVT(*DAG.getContext(), StoreType, + VT.getSizeInBits() / EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, + TLI.getPointerTy(DAG.getDataLayout())); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType, + ShuffWide, DAG.getIntPtrConstant(I, DL)); + SDValue Ch = + DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(), + St->getAlignment(), St->getMemOperand()->getFlags()); + BasePtr = + DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); +} + +// Try taking a single vector store from an truncate (which would otherwise turn +// into an expensive buildvector) and splitting it into a series of narrowing +// stores. +static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, + SelectionDAG &DAG) { + if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed()) + return SDValue(); + SDValue Trunc = St->getValue(); + if (Trunc->getOpcode() != ISD::TRUNCATE) + return SDValue(); + EVT FromVT = Trunc->getOperand(0).getValueType(); + EVT ToVT = Trunc.getValueType(); + if (!ToVT.isVector()) + return SDValue(); + assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements()); + EVT ToEltVT = ToVT.getVectorElementType(); + EVT FromEltVT = FromVT.getVectorElementType(); + + unsigned NumElements = 0; + if (FromEltVT == MVT::i32 && (ToEltVT == MVT::i16 || ToEltVT == MVT::i8)) + NumElements = 4; + if (FromEltVT == MVT::i16 && ToEltVT == MVT::i8) + NumElements = 8; + if (NumElements == 0 || FromVT.getVectorNumElements() == NumElements || + FromVT.getVectorNumElements() % NumElements != 0) + return SDValue(); + + SDLoc DL(St); + // Details about the old store + SDValue Ch = St->getChain(); + SDValue BasePtr = St->getBasePtr(); + unsigned Alignment = St->getOriginalAlignment(); + MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags(); + AAMDNodes AAInfo = St->getAAInfo(); + + EVT NewFromVT = EVT::getVectorVT(*DAG.getContext(), FromEltVT, NumElements); + EVT NewToVT = EVT::getVectorVT(*DAG.getContext(), ToEltVT, NumElements); + + SmallVector Stores; + for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) { + unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8; + SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset); + + SDValue Extract = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0), + DAG.getConstant(i * NumElements, DL, MVT::i32)); + SDValue Store = DAG.getTruncStore( + Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset), + NewToVT, Alignment, MMOFlags, AAInfo); + Stores.push_back(Store); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); +} + +/// PerformSTORECombine - Target-specific dag combine xforms for +/// ISD::STORE. +static SDValue PerformSTORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + StoreSDNode *St = cast(N); + if (St->isVolatile()) + return SDValue(); + SDValue StVal = St->getValue(); + EVT VT = StVal.getValueType(); + + if (Subtarget->hasNEON()) + if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG)) + return Store; + + if (Subtarget->hasMVEIntegerOps()) + if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG)) + return NewToken; if (!ISD::isNormalStore(St)) return SDValue(); @@ -12522,7 +13405,7 @@ static SDValue PerformSTORECombine(SDNode *N, } // If this is a legal vector store, try to combine it into a VST1_UPD. - if (ISD::isNormalStore(N) && VT.isVector() && + if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() && DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) return CombineBaseUpdate(N, DCI); @@ -12890,6 +13773,71 @@ static SDValue PerformShiftCombine(SDNode *N, return SDValue(); } +// Look for a sign/zero extend of a larger than legal load. This can be split +// into two extending loads, which are simpler to deal with than an arbitrary +// sign extend. +static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::LOAD) + return SDValue(); + LoadSDNode *LD = cast(N0.getNode()); + if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() || + LD->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + EVT FromVT = LD->getValueType(0); + EVT ToVT = N->getValueType(0); + if (!ToVT.isVector()) + return SDValue(); + assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements()); + EVT ToEltVT = ToVT.getVectorElementType(); + EVT FromEltVT = FromVT.getVectorElementType(); + + unsigned NumElements = 0; + if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8)) + NumElements = 4; + if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8) + NumElements = 8; + if (NumElements == 0 || + FromVT.getVectorNumElements() == NumElements || + FromVT.getVectorNumElements() % NumElements != 0 || + !isPowerOf2_32(NumElements)) + return SDValue(); + + SDLoc DL(LD); + // Details about the old load + SDValue Ch = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Alignment = LD->getOriginalAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + ISD::LoadExtType NewExtType = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + SDValue Offset = DAG.getUNDEF(BasePtr.getValueType()); + EVT NewFromVT = FromVT.getHalfNumVectorElementsVT(*DAG.getContext()); + EVT NewToVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext()); + unsigned NewOffset = NewFromVT.getSizeInBits() / 8; + SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset); + + // Split the load in half, each side of which is extended separately. This + // is good enough, as legalisation will take it from there. They are either + // already legal or they will be split further into something that is + // legal. + SDValue NewLoad1 = + DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, BasePtr, Offset, + LD->getPointerInfo(), NewFromVT, Alignment, MMOFlags, AAInfo); + SDValue NewLoad2 = + DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset, + LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT, + Alignment, MMOFlags, AAInfo); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue(NewLoad1.getNode(), 1), + SDValue(NewLoad2.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, NewLoad1, NewLoad2); +} + /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, @@ -12927,6 +13875,10 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, } } + if (ST->hasMVEIntegerOps()) + if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG)) + return NewLoad; + return SDValue(); } @@ -13028,43 +13980,169 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D return V; } +// Given N, the value controlling the conditional branch, search for the loop +// intrinsic, returning it, along with how the value is used. We need to handle +// patterns such as the following: +// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit) +// (brcond (setcc (loop.decrement), 0, eq), exit) +// (brcond (setcc (loop.decrement), 0, ne), header) +static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, + bool &Negate) { + switch (N->getOpcode()) { + default: + break; + case ISD::XOR: { + if (!isa(N.getOperand(1))) + return SDValue(); + if (!cast(N.getOperand(1))->isOne()) + return SDValue(); + Negate = !Negate; + return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate); + } + case ISD::SETCC: { + auto *Const = dyn_cast(N.getOperand(1)); + if (!Const) + return SDValue(); + if (Const->isNullValue()) + Imm = 0; + else if (Const->isOne()) + Imm = 1; + else + return SDValue(); + CC = cast(N.getOperand(2))->get(); + return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate); + } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntOp = cast(N.getOperand(1))->getZExtValue(); + if (IntOp != Intrinsic::test_set_loop_iterations && + IntOp != Intrinsic::loop_decrement_reg) + return SDValue(); + return N; + } + } + return SDValue(); +} + static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST) { - // Look for (brcond (xor test.set.loop.iterations, -1) - SDValue CC = N->getOperand(1); - unsigned Opc = CC->getOpcode(); - SDValue Int; - if ((Opc == ISD::XOR || Opc == ISD::SETCC) && - (CC->getOperand(0)->getOpcode() == ISD::INTRINSIC_W_CHAIN)) { + // The hwloop intrinsics that we're interested are used for control-flow, + // either for entering or exiting the loop: + // - test.set.loop.iterations will test whether its operand is zero. If it + // is zero, the proceeding branch should not enter the loop. + // - loop.decrement.reg also tests whether its operand is zero. If it is + // zero, the proceeding branch should not branch back to the beginning of + // the loop. + // So here, we need to check that how the brcond is using the result of each + // of the intrinsics to ensure that we're branching to the right place at the + // right time. + + ISD::CondCode CC; + SDValue Cond; + int Imm = 1; + bool Negate = false; + SDValue Chain = N->getOperand(0); + SDValue Dest; - assert((isa(CC->getOperand(1)) && - cast(CC->getOperand(1))->isOne()) && - "Expected to compare against 1"); + if (N->getOpcode() == ISD::BRCOND) { + CC = ISD::SETEQ; + Cond = N->getOperand(1); + Dest = N->getOperand(2); + } else { + assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!"); + CC = cast(N->getOperand(1))->get(); + Cond = N->getOperand(2); + Dest = N->getOperand(4); + if (auto *Const = dyn_cast(N->getOperand(3))) { + if (!Const->isOne() && !Const->isNullValue()) + return SDValue(); + Imm = Const->getZExtValue(); + } else + return SDValue(); + } - Int = CC->getOperand(0); - } else if (CC->getOpcode() == ISD::INTRINSIC_W_CHAIN) - Int = CC; - else + SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate); + if (!Int) return SDValue(); - unsigned IntOp = cast(Int.getOperand(1))->getZExtValue(); - if (IntOp != Intrinsic::test_set_loop_iterations) - return SDValue(); + if (Negate) + CC = ISD::getSetCCInverse(CC, true); + + auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) { + return (CC == ISD::SETEQ && Imm == 0) || + (CC == ISD::SETNE && Imm == 1) || + (CC == ISD::SETLT && Imm == 1) || + (CC == ISD::SETULT && Imm == 1); + }; + + auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) { + return (CC == ISD::SETEQ && Imm == 1) || + (CC == ISD::SETNE && Imm == 0) || + (CC == ISD::SETGT && Imm == 0) || + (CC == ISD::SETUGT && Imm == 0) || + (CC == ISD::SETGE && Imm == 1) || + (CC == ISD::SETUGE && Imm == 1); + }; + + assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) && + "unsupported condition"); SDLoc dl(Int); - SDValue Chain = N->getOperand(0); + SelectionDAG &DAG = DCI.DAG; SDValue Elements = Int.getOperand(2); - SDValue ExitBlock = N->getOperand(2); + unsigned IntOp = cast(Int->getOperand(1))->getZExtValue(); + assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) + && "expected single br user"); + SDNode *Br = *N->use_begin(); + SDValue OtherTarget = Br->getOperand(1); + + // Update the unconditional branch to branch to the given Dest. + auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) { + SDValue NewBrOps[] = { Br->getOperand(0), Dest }; + SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps); + DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr); + }; - // TODO: Once we start supporting tail predication, we can add another - // operand to WLS for the number of elements processed in a vector loop. + if (IntOp == Intrinsic::test_set_loop_iterations) { + SDValue Res; + // We expect this 'instruction' to branch when the counter is zero. + if (IsTrueIfZero(CC, Imm)) { + SDValue Ops[] = { Chain, Elements, Dest }; + Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops); + } else { + // The logic is the reverse of what we need for WLS, so find the other + // basic block target: the target of the proceeding br. + UpdateUncondBr(Br, Dest, DAG); - SDValue Ops[] = { Chain, Elements, ExitBlock }; - SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops); - DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0)); - return Res; + SDValue Ops[] = { Chain, Elements, OtherTarget }; + Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops); + } + DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0)); + return Res; + } else { + SDValue Size = DAG.getTargetConstant( + cast(Int.getOperand(3))->getZExtValue(), dl, MVT::i32); + SDValue Args[] = { Int.getOperand(0), Elements, Size, }; + SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl, + DAG.getVTList(MVT::i32, MVT::Other), Args); + DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode()); + + // We expect this instruction to branch when the count is not zero. + SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget; + + // Update the unconditional branch to target the loop preheader if we've + // found the condition has been reversed. + if (Target == OtherTarget) + UpdateUncondBr(Br, Dest, DAG); + + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + SDValue(LoopDec.getNode(), 1), Chain); + + SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target }; + return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs); + } + return SDValue(); } /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. @@ -13298,14 +14376,15 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); - case ISD::BRCOND: return PerformHWLoopCombine(N, DCI, Subtarget); + case ISD::BRCOND: + case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget); case ARMISD::ADDC: case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); - case ISD::STORE: return PerformSTORECombine(N, DCI); + case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget); case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); @@ -13334,6 +14413,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); + case ARMISD::PREDICATE_CAST: + return PerformPREDICATE_CASTCombine(N, DCI); case ARMISD::SMULWB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); @@ -13348,7 +14429,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } - case ARMISD::SMLALBB: { + case ARMISD::SMLALBB: + case ARMISD::QADD16b: + case ARMISD::QSUB16b: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || @@ -13384,6 +14467,15 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } + case ARMISD::QADD8b: + case ARMISD::QSUB8b: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); + if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) + return SDValue(); + break; + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { @@ -13457,47 +14549,38 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, if (!Subtarget->hasMVEIntegerOps()) return false; - if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 && - Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 && - Ty != MVT::v2f64 && - // These are for truncated stores - Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16) - return false; - if (Subtarget->isLittle()) { - // In little-endian MVE, the store instructions VSTRB.U8, - // VSTRH.U16 and VSTRW.U32 all store the vector register in - // exactly the same format, and differ only in the range of - // their immediate offset field and the required alignment. - // - // In particular, VSTRB.U8 can store a vector at byte alignment. - // So at this stage we can simply say that loads/stores of all - // 128-bit wide vector types are permitted at any alignment, - // because we know at least _one_ instruction can manage that. - // - // Later on we might find that some of those loads are better - // generated as VLDRW.U32 if alignment permits, to take - // advantage of the larger immediate range. But for the moment, - // all that matters is that if we don't lower the load then - // _some_ instruction can handle it. + // These are for predicates + if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) { + if (Fast) + *Fast = true; + return true; + } + + // These are for truncated stores/narrowing loads. They are fine so long as + // the alignment is at least the size of the item being loaded + if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) && + Alignment >= VT.getScalarSizeInBits() / 8) { + if (Fast) + *Fast = true; + return true; + } + + // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and + // VSTRW.U32 all store the vector register in exactly the same format, and + // differ only in the range of their immediate offset field and the required + // alignment. So there is always a store that can be used, regardless of + // actual type. + // + // For big endian, that is not the case. But can still emit a (VSTRB.U8; + // VREV64.8) pair and get the same effect. This will likely be better than + // aligning the vector through the stack. + if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 || + Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 || + Ty == MVT::v2f64) { if (Fast) *Fast = true; return true; - } else { - // In big-endian MVE, those instructions aren't so similar - // after all, because they reorder the bytes of the vector - // differently. So this time we can only store a particular - // kind of vector if its alignment is at least the element - // type. And we can't store vectors of i64 or f64 at all - // without having to do some postprocessing, because there's - // no VSTRD.U64. - if (Ty == MVT::v16i8 || - ((Ty == MVT::v8i16 || Ty == MVT::v8f16) && Alignment >= 2) || - ((Ty == MVT::v4i32 || Ty == MVT::v4f32) && Alignment >= 4)) { - if (Fast) - *Fast = true; - return true; - } } return false; @@ -13617,22 +14700,60 @@ static bool areExtractExts(Value *Ext1, Value *Ext2) { /// sext/zext can be folded into vsubl. bool ARMTargetLowering::shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const { - if (!Subtarget->hasNEON() || !I->getType()->isVectorTy()) + if (!I->getType()->isVectorTy()) return false; - switch (I->getOpcode()) { - case Instruction::Sub: - case Instruction::Add: { - if (!areExtractExts(I->getOperand(0), I->getOperand(1))) + if (Subtarget->hasNEON()) { + switch (I->getOpcode()) { + case Instruction::Sub: + case Instruction::Add: { + if (!areExtractExts(I->getOperand(0), I->getOperand(1))) + return false; + Ops.push_back(&I->getOperandUse(0)); + Ops.push_back(&I->getOperandUse(1)); + return true; + } + default: return false; - Ops.push_back(&I->getOperandUse(0)); - Ops.push_back(&I->getOperandUse(1)); - return true; + } } - default: + + if (!Subtarget->hasMVEIntegerOps()) + return false; + + auto IsSinker = [](Instruction *I, int Operand) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Mul: + return true; + case Instruction::Sub: + return Operand == 1; + default: + return false; + } + }; + + int Op = 0; + if (!isa(I->getOperand(Op))) + Op = 1; + if (!IsSinker(I, Op)) + return false; + if (!match(I->getOperand(Op), + m_ShuffleVector(m_InsertElement(m_Undef(), m_Value(), m_ZeroInt()), + m_Undef(), m_Zero()))) { return false; } - return false; + Instruction *Shuffle = cast(I->getOperand(Op)); + // All uses of the shuffle should be sunk to avoid duplicating it across gpr + // and vector registers + for (Use &U : Shuffle->uses()) { + Instruction *Insn = cast(U.getUser()); + if (!IsSinker(Insn, U.getOperandNo())) + return false; + } + Ops.push_back(&Shuffle->getOperandUse(0)); + Ops.push_back(&I->getOperandUse(Op)); + return true; } bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { @@ -13641,6 +14762,11 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { if (!isTypeLegal(VT)) return false; + if (auto *Ld = dyn_cast(ExtVal.getOperand(0))) { + if (Ld->isExpandingLoad()) + return false; + } + // Don't create a loadext if we can fold the extension into a wide/long // instruction. // If there's more than one user instruction, the loadext is desirable no @@ -14028,6 +15154,52 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, return false; } +static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, + bool isSEXTLoad, bool isLE, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) + return false; + if (!isa(Ptr->getOperand(1))) + return false; + + ConstantSDNode *RHS = cast(Ptr->getOperand(1)); + int RHSC = (int)RHS->getZExtValue(); + + auto IsInRange = [&](int RHSC, int Limit, int Scale) { + if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) { + assert(Ptr->getOpcode() == ISD::ADD); + isInc = false; + Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); + return true; + } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) { + isInc = Ptr->getOpcode() == ISD::ADD; + Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0)); + return true; + } + return false; + }; + + // Try to find a matching instruction based on s/zext, Alignment, Offset and + // (in BE) type. + Base = Ptr->getOperand(0); + if (VT == MVT::v4i16) { + if (Align >= 2 && IsInRange(RHSC, 0x80, 2)) + return true; + } else if (VT == MVT::v4i8 || VT == MVT::v8i8) { + if (IsInRange(RHSC, 0x80, 1)) + return true; + } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) && + IsInRange(RHSC, 0x80, 4)) + return true; + else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) && + IsInRange(RHSC, 0x80, 2)) + return true; + else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) + return true; + return false; +} + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -14041,25 +15213,35 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, EVT VT; SDValue Ptr; + unsigned Align; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); - VT = LD->getMemoryVT(); + VT = LD->getMemoryVT(); + Align = LD->getAlignment(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); - VT = ST->getMemoryVT(); + VT = ST->getMemoryVT(); + Align = ST->getAlignment(); } else return false; bool isInc; bool isLegal = false; - if (Subtarget->isThumb2()) - isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, - Offset, isInc, DAG); - else - isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, - Offset, isInc, DAG); + if (VT.isVector()) + isLegal = Subtarget->hasMVEIntegerOps() && + getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad, + Subtarget->isLittle(), Base, Offset, + isInc, DAG); + else { + if (Subtarget->isThumb2()) + isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + else + isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + } if (!isLegal) return false; @@ -14077,15 +15259,18 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SelectionDAG &DAG) const { EVT VT; SDValue Ptr; + unsigned Align; bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast(N)) { - VT = LD->getMemoryVT(); + VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); + Align = LD->getAlignment(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { - VT = ST->getMemoryVT(); + VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); + Align = ST->getAlignment(); isNonExt = !ST->isTruncatingStore(); } else return false; @@ -14108,12 +15293,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isInc; bool isLegal = false; - if (Subtarget->isThumb2()) - isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, - isInc, DAG); - else - isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + if (VT.isVector()) + isLegal = Subtarget->hasMVEIntegerOps() && + getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, + Subtarget->isLittle(), Base, Offset, isInc, DAG); + else { + if (Subtarget->isThumb2()) + isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + else + isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + } if (!isLegal) return false; @@ -14369,7 +15561,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { /// constraint it is for this target. ARMTargetLowering::ConstraintType ARMTargetLowering::getConstraintType(StringRef Constraint) const { - if (Constraint.size() == 1) { + unsigned S = Constraint.size(); + if (S == 1) { switch (Constraint[0]) { default: break; case 'l': return C_RegisterClass; @@ -14377,12 +15570,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const { case 'h': return C_RegisterClass; case 'x': return C_RegisterClass; case 't': return C_RegisterClass; - case 'j': return C_Other; // Constant for movw. - // An address with a single base register. Due to the way we - // currently handle addresses it is the same as an 'r' memory constraint. + case 'j': return C_Immediate; // Constant for movw. + // An address with a single base register. Due to the way we + // currently handle addresses it is the same as an 'r' memory constraint. case 'Q': return C_Memory; } - } else if (Constraint.size() == 2) { + } else if (S == 2) { switch (Constraint[0]) { default: break; case 'T': return C_RegisterClass; @@ -14535,7 +15728,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'j': // Constant suitable for movw, must be between 0 and // 65535. - if (Subtarget->hasV6T2Ops()) + if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps())) if (CVal >= 0 && CVal <= 65535) break; return; @@ -14643,7 +15836,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'N': - if (Subtarget->isThumb()) { // FIXME thumb2 + if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 31, for shift amounts. if (CVal >= 0 && CVal <= 31) break; @@ -14651,7 +15844,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'O': - if (Subtarget->isThumb()) { // FIXME thumb2 + if (Subtarget->isThumb1Only()) { // This must be a multiple of 4 between -508 and 508, for // ADD/SUB sp = sp + immediate. if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) @@ -14874,6 +16067,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { // without FP16. So we must do a function call. SDLoc Loc(Op); RTLIB::Libcall LC; + MakeLibCallOptions CallOptions; if (SrcSz == 16) { // Instruction from 16 -> 32 if (Subtarget->hasFP16()) @@ -14884,7 +16078,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_EXTEND"); SrcVal = - makeLibCall(DAG, LC, MVT::f32, SrcVal, /*isSigned*/ false, Loc).first; + makeLibCall(DAG, LC, MVT::f32, SrcVal, CallOptions, Loc).first; } } @@ -14897,7 +16091,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { LC = RTLIB::getFPEXT(MVT::f32, MVT::f64); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_EXTEND"); - return makeLibCall(DAG, LC, MVT::f64, SrcVal, /*isSigned*/ false, Loc).first; + return makeLibCall(DAG, LC, MVT::f64, SrcVal, CallOptions, Loc).first; } SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { @@ -14923,7 +16117,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_ROUND"); - return makeLibCall(DAG, LC, DstVT, SrcVal, /*isSigned*/ false, Loc).first; + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions, Loc).first; } void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl &Results, @@ -15015,7 +16210,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast(AlignArg)->getZExtValue(); + Info.align = MaybeAlign(cast(AlignArg)->getZExtValue()); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; @@ -15030,7 +16225,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; - Info.align = 0; + Info.align.reset(); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; @@ -15056,7 +16251,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast(AlignArg)->getZExtValue(); + Info.align = MaybeAlign(cast(AlignArg)->getZExtValue()); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; @@ -15077,7 +16272,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 0; + Info.align.reset(); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; @@ -15090,7 +16285,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } @@ -15102,7 +16297,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } @@ -15112,7 +16307,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(2); Info.offset = 0; - Info.align = 8; + Info.align = Align(8); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; @@ -15122,7 +16317,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 8; + Info.align = Align(8); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; @@ -15473,6 +16668,12 @@ bool ARMTargetLowering::isLegalInterleavedAccessType( return VecSize == 64 || VecSize % 128 == 0; } +unsigned ARMTargetLowering::getMaxSupportedInterleaveFactor() const { + if (Subtarget->hasNEON()) + return 4; + return TargetLoweringBase::getMaxSupportedInterleaveFactor(); +} + /// Lower an interleaved load into a vldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -15792,15 +16993,15 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, } /// Return the correct alignment for the current calling convention. -unsigned -ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const { +Align ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + const Align ABITypeAlign(DL.getABITypeAlignment(ArgTy)); if (!ArgTy->isVectorTy()) - return DL.getABITypeAlignment(ArgTy); + return ABITypeAlign; // Avoid over-aligning vector parameters. It would require realigning the // stack and waste space for no real benefit. - return std::min(DL.getABITypeAlignment(ArgTy), DL.getStackAlignment()); + return std::min(ABITypeAlign, DL.getStackAlignment()); } /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of @@ -15861,7 +17062,7 @@ void ARMTargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 1675ec59a354..53813fad5afd 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -103,6 +103,7 @@ class VectorType; ADDE, // Add using carry SUBC, // Sub with carry SUBE, // Sub using carry + LSLS, // Shift left producing carry VMOVRRD, // double to two gprs. VMOVDRR, // Two gprs to double. @@ -126,17 +127,13 @@ class VectorType; WIN__DBZCHK, // Windows' divide by zero check WLS, // Low-overhead loops, While Loop Start + LOOP_DEC, // Really a part of LE, performs the sub + LE, // Low-overhead loops, Loop End - VCEQ, // Vector compare equal. - VCEQZ, // Vector compare equal to zero. - VCGE, // Vector compare greater than or equal. - VCGEZ, // Vector compare greater than or equal to zero. - VCLEZ, // Vector compare less than or equal to zero. - VCGEU, // Vector compare unsigned greater than or equal. - VCGT, // Vector compare greater than. - VCGTZ, // Vector compare greater than zero. - VCLTZ, // Vector compare less than zero. - VCGTU, // Vector compare unsigned greater than. + PREDICATE_CAST, // Predicate cast for MVE i1 types + + VCMP, // Vector compare. + VCMPZ, // Vector compare to zero. VTST, // Vector test bits. // Vector shift by vector @@ -200,6 +197,7 @@ class VectorType; VTRN, // transpose VTBL1, // 1-register shuffle with mask VTBL2, // 2-register shuffle with mask + VMOVN, // MVE vmovn // Vector multiply long: VMULLs, // ...signed @@ -221,6 +219,12 @@ class VectorType; SMMLAR, // Signed multiply long, round and add SMMLSR, // Signed multiply long, subtract and round + // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b stands for. + QADD8b, + QSUB8b, + QADD16b, + QSUB16b, + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -243,6 +247,11 @@ class VectorType; // instructions. MEMCPY, + // V8.1MMainline condition select + CSINV, // Conditional select invert. + CSNEG, // Conditional select negate. + CSINC, // Conditional select increment. + // Vector load N-element structure to all lanes: VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, VLD2DUP, @@ -539,7 +548,7 @@ class VectorType; Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override; - unsigned getMaxSupportedInterleaveFactor() const override { return 4; } + unsigned getMaxSupportedInterleaveFactor() const override; bool lowerInterleavedLoad(LoadInst *LI, ArrayRef Shuffles, @@ -608,8 +617,8 @@ class VectorType; void finalizeLowering(MachineFunction &MF) const override; /// Return the correct alignment for the current calling convention. - unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const override; + Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const override; bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; @@ -670,6 +679,8 @@ class VectorType; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -721,8 +732,8 @@ class VectorType; void lowerABS(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; @@ -814,7 +825,7 @@ class VectorType; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - const SDLoc &dl, bool InvalidOnQNaN) const; + const SDLoc &dl) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; @@ -838,7 +849,7 @@ class VectorType; void setAllExpand(MVT VT); }; - enum NEONModImmType { + enum VMOVModImmType { VMOVModImm, VMVNModImm, MVEVMVNModImm, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index bc93a058720c..1da32ad2af6c 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -188,6 +188,13 @@ def s_cc_out : OptionalDefOperand { let DecoderMethod = "DecodeCCOutOperand"; } +// Transform to generate the inverse of a condition code during ISel +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(ARMCC::getOppositeCondition(CC), SDLoc(N), + MVT::i32); +}]>; + // VPT predicate def VPTPredNOperand : AsmOperandClass { @@ -401,6 +408,8 @@ class InstTemplate(f), "Pseudo"); @@ -412,6 +421,7 @@ class InstTemplate let isCodeGenOnly = 0; // So we get asm matcher for it. let AsmString = asm; let isPseudo = 1; + let hasNoSchedulingInfo = 1; } class ARMAsmPseudo @@ -2282,7 +2293,7 @@ class N1ModImm op21_19, bits<4> op11_8, bit op7, bit op6, let Inst{24} = SIMM{7}; let Inst{18-16} = SIMM{6-4}; let Inst{3-0} = SIMM{3-0}; - let DecoderMethod = "DecodeNEONModImmInstruction"; + let DecoderMethod = "DecodeVMOVModImmInstruction"; } // NEON 2 vector register format. @@ -2724,6 +2735,16 @@ def complexrotateopodd : Operand { let PrintMethod = "printComplexRotationOp<180, 90>"; } +def MveSaturateOperand : AsmOperandClass { + let PredicateMethod = "isMveSaturateOp"; + let DiagnosticString = "saturate operand must be 48 or 64"; + let Name = "MveSaturate"; +} +def saturateop : Operand { + let ParserMatchClass = MveSaturateOperand; + let PrintMethod = "printMveSaturateOp"; +} + // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. def : TokenAlias<".s8", ".i8">; def : TokenAlias<".u8", ".i8">; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 388c889349b7..a802d5a06f07 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -117,7 +117,7 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); MachineInstrBuilder MIB; MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e35145463852..fe696222ec70 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -51,8 +51,6 @@ def SDT_ARMAnd : SDTypeProfile<1, 2, SDTCisVT<2, i32>]>; def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; -def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; @@ -108,14 +106,24 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>, // TODO Add another operand for 'Size' so that we can re-use this node when we // start supporting *TP versions. -def SDT_ARMWhileLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, - SDTCisVT<1, OtherVT>]>; +def SDT_ARMLoLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, OtherVT>]>; def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; +def SDT_ARMCSel : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<3>, + SDTCisVT<3, i32>]>; + +def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel, [SDNPOptInGlue]>; +def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel, [SDNPOptInGlue]>; +def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel, [SDNPOptInGlue]>; + def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -194,6 +202,7 @@ def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, [SDNPCommutative]>; def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; +def ARMlsls : SDNode<"ARMISD::LSLS", SDTBinaryArithWithFlags>; def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; @@ -229,6 +238,11 @@ def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; +def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>; +def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; +def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; +def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; + // Vector operations shared between NEON and MVE def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; @@ -265,8 +279,16 @@ def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>; def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; -def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop, - [SDNPHasChain]>; +def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, + SDTCisInt<3>]>; +def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>; + +def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>; +def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>; + +def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>; +def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>; +def ARMLoopDec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -1948,7 +1970,7 @@ multiclass AI_str1nopc; @@ -2361,6 +2383,12 @@ let isCall = 1, def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, Requires<[IsARM]>, Sched<[WriteBr]>; + + // push lr before the call + def BL_PUSHLR : ARMPseudoInst<(outs), (ins GPRlr:$ra, arm_bl_target:$func), + 4, IIC_Br, + []>, + Requires<[IsARM]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1 in { @@ -3727,6 +3755,23 @@ let DecoderMethod = "DecodeQADDInstruction" in [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; } +def : ARMV5TEPat<(saddsat GPR:$a, GPR:$b), + (QADD GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(ssubsat GPR:$a, GPR:$b), + (QSUB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (QDADD rGPR:$Rm, rGPR:$Rn)>; +def : ARMV5TEPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), + (QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), + (QADD8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), + (QSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), + (QADD16 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), + (QSUB16 rGPR:$Rm, rGPR:$Rn)>; + def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; @@ -4870,14 +4915,13 @@ def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>, let hasSideEffects = 1; } -let usesCustomInserter = 1, Defs = [CPSR] in { - -// Pseudo instruction that combines movs + predicated rsbmi -// to implement integer ABS +let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { + // Pseudo instruction that combines movs + predicated rsbmi + // to implement integer ABS def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; } -let usesCustomInserter = 1, Defs = [CPSR] in { +let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { def COPY_STRUCT_BYVAL_I32 : PseudoInst< (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment), NoItinerary, @@ -5085,8 +5129,8 @@ def SWPB: AIswp<1, (outs GPRnopc:$Rt), def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]> { bits<4> opc1; bits<4> CRn; @@ -5109,8 +5153,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]> { let Inst{31-28} = 0b1111; bits<4> opc1; @@ -5289,15 +5333,15 @@ multiclass LdSt2Cop pattern> { } } -defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC : LdStCop <0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; } // DecoderNamespace = "CoProc" @@ -5333,8 +5377,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, ComplexDeprecationPredicate<"MCR">; def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -5347,8 +5391,8 @@ def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; -def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : ARMPat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; class MovRCopro2 pattern> @@ -5379,8 +5423,8 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]>; def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -5394,9 +5438,9 @@ def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; -def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, - imm:$CRm, imm:$opc2), - (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : ARMV5TPat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, + timm:$CRm, timm:$opc2), + (MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; class MovRRCopro pattern = []> @@ -5422,8 +5466,8 @@ class MovRRCopro def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), - [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, - GPRnopc:$Rt2, imm:$CRm)]>; + [(int_arm_mcrr timm:$cop, timm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, timm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, (outs GPRnopc:$Rt, GPRnopc:$Rt2), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; @@ -5455,8 +5499,8 @@ class MovRRCopro2; + [(int_arm_mcrr2 timm:$cop, timm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, timm:$CRm)]>; def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */, (outs GPRnopc:$Rt, GPRnopc:$Rt2), @@ -5579,12 +5623,12 @@ def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, [SDNPHasChain, SDNPSideEffect]>; -let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in +let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP], hasNoSchedulingInfo = 1 in def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; -let usesCustomInserter = 1, Defs = [CPSR] in +let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary, [(win__dbzchk tGPR:$divisor)]>; @@ -6131,10 +6175,10 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, ComplexDeprecationPredicate<"IT">; -let mayLoad = 1, mayStore =1, hasSideEffects = 1 in +let mayLoad = 1, mayStore =1, hasSideEffects = 1, hasNoSchedulingInfo = 1 in def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), NoItinerary, - [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; + [(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>; //===---------------------------------- // Atomic cmpxchg for -O0 @@ -6174,4 +6218,5 @@ def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, let hasSideEffects = 1; let Size = 0; let AsmString = "@ COMPILER BARRIER"; + let hasNoSchedulingInfo = 1; } diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index 3e7ae55c7fc8..4f67cd6e47cc 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -160,7 +160,8 @@ class TMemImm7ShiftOffsetAsmOperand : AsmOperandClass { let RenderMethod = "addMemImmOffsetOperands"; } -class taddrmode_imm7 : MemOperand { +class taddrmode_imm7 : MemOperand, + ComplexPattern", []> { let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand; // They are printed the same way as the T2 imm8 version let PrintMethod = "printT2AddrModeImm8Operand"; @@ -221,7 +222,9 @@ def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>; def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>; def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>; -class t2am_imm7_offset : MemOperand { +class t2am_imm7_offset : MemOperand, + ComplexPattern", + [], [SDNPWantRoot]> { // They are printed the same way as the imm8 version let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let ParserMatchClass = @@ -371,6 +374,8 @@ class MVE_ScalarShiftSRegReg op5_4, list pattern=[]> let Inst{7-6} = 0b00; let Inst{5-4} = op5_4{1-0}; let Inst{3-0} = 0b1101; + + let Unpredictable{8-6} = 0b111; } def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>; @@ -403,18 +408,17 @@ class MVE_ScalarShiftDRegImm op5_4, bit op16, let Inst{3-0} = 0b1111; } -class MVE_ScalarShiftDRegReg pattern=[]> +class MVE_ScalarShiftDRegRegBase pattern=[]> : MVE_ScalarShiftDoubleReg< - iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm), - "$RdaLo, $RdaHi, $Rm", "@earlyclobber $RdaHi,@earlyclobber $RdaLo," - "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src", + iname, iops, asm, "@earlyclobber $RdaHi,@earlyclobber $RdaLo," + "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src", pattern> { bits<4> Rm; let Inst{16} = op16; let Inst{15-12} = Rm{3-0}; - let Inst{7-6} = 0b00; + let Inst{6} = 0b0; let Inst{5} = op5; let Inst{4} = 0b0; let Inst{3-0} = 0b1101; @@ -427,27 +431,44 @@ class MVE_ScalarShiftDRegReg pattern=[]> + : MVE_ScalarShiftDRegRegBase< + iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm), + "$RdaLo, $RdaHi, $Rm", op5, 0b0, pattern> { + + let Inst{7} = 0b0; +} + +class MVE_ScalarShiftDRegRegWithSat pattern=[]> + : MVE_ScalarShiftDRegRegBase< + iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm, saturateop:$sat), + "$RdaLo, $RdaHi, $sat, $Rm", op5, 0b1, pattern> { + bit sat; + + let Inst{7} = sat; +} + +def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMasrl tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm))]>; def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMasrl tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; -def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; +def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsll tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm))]>; def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsll tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsrl tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; -def MVE_SQRSHRL : MVE_ScalarShiftDRegReg<"sqrshrl", 0b1, 0b1>; +def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>; def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>; def MVE_SRSHRL : MVE_ScalarShiftDRegImm<"srshrl", 0b10, 0b1>; -def MVE_UQRSHLL : MVE_ScalarShiftDRegReg<"uqrshll", 0b0, 0b1>; +def MVE_UQRSHLL : MVE_ScalarShiftDRegRegWithSat<"uqrshll", 0b0>; def MVE_UQSHLL : MVE_ScalarShiftDRegImm<"uqshll", 0b00, 0b1>; def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>; @@ -531,6 +552,19 @@ defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>; defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>; defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(i32 (vecreduce_add (v4i32 MQPR:$src))), (i32 (MVE_VADDVu32no_acc $src))>; + def : Pat<(i32 (vecreduce_add (v8i16 MQPR:$src))), (i32 (MVE_VADDVu16no_acc $src))>; + def : Pat<(i32 (vecreduce_add (v16i8 MQPR:$src))), (i32 (MVE_VADDVu8no_acc $src))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v4i32 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu32acc $src2, $src1))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v8i16 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu16acc $src2, $src1))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v16i8 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu8acc $src2, $src1))>; + +} + class MVE_VADDLV pattern=[]> : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname, @@ -636,6 +670,35 @@ multiclass MVE_VMINMAXV_ty pattern=[]> { defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>; defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>; +let Predicates = [HasMVEInt] in { + def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>; + def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>; + def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>; + def : Pat<(i32 (vecreduce_umax (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVu8 (t2MOVi (i32 0)), $src))>; + def : Pat<(i32 (vecreduce_umax (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVu16 (t2MOVi (i32 0)), $src))>; + def : Pat<(i32 (vecreduce_umax (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVu32 (t2MOVi (i32 0)), $src))>; + + def : Pat<(i32 (vecreduce_smin (v16i8 MQPR:$src))), + (i32 (MVE_VMINVs8 (t2MOVi (i32 127)), $src))>; + def : Pat<(i32 (vecreduce_smin (v8i16 MQPR:$src))), + (i32 (MVE_VMINVs16 (t2MOVi16 (i32 32767)), $src))>; + def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))), + (i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>; + def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))), + (i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>; + def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))), + (i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>; + def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))), + (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>; + +} + multiclass MVE_VMINMAXAV_ty pattern=[]> { def s8 : MVE_VMINMAXV; def s16 : MVE_VMINMAXV; @@ -667,57 +730,57 @@ class MVE_VMLAMLSDAV pattern=[]> { - def _noexch : MVE_VMLAMLSDAV; - def _exch : MVE_VMLAMLSDAV; +multiclass MVE_VMLAMLSDAV_A pattern=[]> { + def ""#x#suffix : MVE_VMLAMLSDAV; + def "a"#x#suffix : MVE_VMLAMLSDAV; +} + +multiclass MVE_VMLAMLSDAV_AX pattern=[]> { + defm "" : MVE_VMLAMLSDAV_A; + defm "" : MVE_VMLAMLSDAV_A; } -multiclass MVE_VMLAMLSDAV_XA pattern=[]> { - defm _noacc : MVE_VMLAMLSDAV_X; - defm _acc : MVE_VMLAMLSDAV_X; +multiclass MVE_VMLADAV_multi pattern=[]> { + defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix, + sz, 0b0, bit_8, 0b0, pattern>; + defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix, + sz, 0b1, 0b0, bit_8, 0b0, pattern>; } -multiclass MVE_VMLADAV_multi pattern=[]> { - defm "" : MVE_VMLAMLSDAV_XA<"vmladav", suffix, sz, U, bit_8, 0b0, pattern>; +multiclass MVE_VMLSDAV_multi pattern=[]> { + defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix, + sz, bit_28, 0b0, 0b1, pattern>; } -defm MVE_VMLADAVs16 : MVE_VMLADAV_multi<"s16", 0b0, 0b0, 0b0>; -defm MVE_VMLADAVs32 : MVE_VMLADAV_multi<"s32", 0b1, 0b0, 0b0>; -defm MVE_VMLADAVu16 : MVE_VMLADAV_multi<"u16", 0b0, 0b1, 0b0>; -defm MVE_VMLADAVu32 : MVE_VMLADAV_multi<"u32", 0b1, 0b1, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>; -defm MVE_VMLADAVs8 : MVE_VMLADAV_multi<"s8", 0b0, 0b0, 0b1>; -defm MVE_VMLADAVu8 : MVE_VMLADAV_multi<"u8", 0b0, 0b1, 0b1>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>; // vmlav aliases vmladav -foreach acc = ["_acc", "_noacc"] in { +foreach acc = ["", "a"] in { foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in { - def : MVEInstAlias("MVE_VMLADAV"#suffix#acc#"_noexch") + def : MVEInstAlias<"vmlav"#acc#"${vp}."#suffix#"\t$RdaDest, $Qn, $Qm", + (!cast("MVE_VMLADAV"#acc#suffix) tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; } } -multiclass MVE_VMLSDAV_multi pattern=[]> { - defm "" : MVE_VMLAMLSDAV_XA<"vmlsdav", suffix, sz, bit_28, 0b0, 0b1, pattern>; -} - -defm MVE_VMLSDAVs8 : MVE_VMLSDAV_multi<"s8", 0, 0b1>; -defm MVE_VMLSDAVs16 : MVE_VMLSDAV_multi<"s16", 0, 0b0>; -defm MVE_VMLSDAVs32 : MVE_VMLSDAV_multi<"s32", 1, 0b0>; - // Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH class MVE_VMLALDAVBase pattern=[]> { - def _noexch : MVE_VMLALDAVBase; - def _exch : MVE_VMLALDAVBase; +multiclass MVE_VMLALDAVBase_A pattern=[]> { + def ""#x#suffix : MVE_VMLALDAVBase< + iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", + sz, bit_28, 0b0, X, bit_8, bit_0, pattern>; + def "a"#x#suffix : MVE_VMLALDAVBase< + iname # "a" # x, suffix, + (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm), + "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc", + sz, bit_28, 0b1, X, bit_8, bit_0, pattern>; } -multiclass MVE_VMLALDAVBase_XA pattern=[]> { - defm _noacc : MVE_VMLALDAVBase_X< - iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", - sz, bit_28, 0b0, bit_8, bit_0, pattern>; - defm _acc : MVE_VMLALDAVBase_X< - iname # "a", suffix, (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, - MQPR:$Qn, MQPR:$Qm), - "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc", - sz, bit_28, 0b1, bit_8, bit_0, pattern>; + +multiclass MVE_VMLALDAVBase_AX pattern=[]> { + defm "" : MVE_VMLALDAVBase_A; + defm "" : MVE_VMLALDAVBase_A; } -multiclass MVE_VRMLALDAVH_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA< - "vrmlaldavh", suffix, 0b0, U, 0b1, 0b0, pattern>; +multiclass MVE_VRMLALDAVH_multi pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix, + 0b0, 0b0, 0b1, 0b0, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix, + 0b0, 0b1, 0b0, 0b1, 0b0, pattern>; } -defm MVE_VRMLALDAVHs32 : MVE_VRMLALDAVH_multi<"s32", 0>; -defm MVE_VRMLALDAVHu32 : MVE_VRMLALDAVH_multi<"u32", 1>; +defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">; // vrmlalvh aliases for vrmlaldavh def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHs32_noacc_noexch + (MVE_VRMLALDAVHs32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHs32_acc_noexch + (MVE_VRMLALDAVHas32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHu32_noacc_noexch + (MVE_VRMLALDAVHu32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHu32_acc_noexch + (MVE_VRMLALDAVHau32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; -multiclass MVE_VMLALDAV_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA<"vmlaldav", suffix, sz, U, 0b0, 0b0, pattern>; +multiclass MVE_VMLALDAV_multi pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix, + sz, 0b1, 0b0, 0b0, 0b0, pattern>; } -defm MVE_VMLALDAVs16 : MVE_VMLALDAV_multi<"s16", 0b0, 0b0>; -defm MVE_VMLALDAVs32 : MVE_VMLALDAV_multi<"s32", 0b1, 0b0>; -defm MVE_VMLALDAVu16 : MVE_VMLALDAV_multi<"u16", 0b0, 0b1>; -defm MVE_VMLALDAVu32 : MVE_VMLALDAV_multi<"u32", 0b1, 0b1>; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>; // vmlalv aliases vmlaldav -foreach acc = ["_acc", "_noacc"] in { +foreach acc = ["", "a"] in { foreach suffix = ["s16", "s32", "u16", "u32"] in { - def : MVEInstAlias("MVE_VMLALDAV"#suffix#acc#"_noexch") + def : MVEInstAlias<"vmlalv" # acc # "${vp}." # suffix # + "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm", + (!cast("MVE_VMLALDAV"#acc#suffix) tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; } } multiclass MVE_VMLSLDAV_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA; + bit bit_28, list pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX; } -defm MVE_VMLSLDAVs16 : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>; -defm MVE_VMLSLDAVs32 : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>; -defm MVE_VRMLSLDAVHs32 : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>; +defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>; // end of mve_rDest instructions @@ -967,11 +1031,12 @@ def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), let Inst{6} = 0b1; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; } -class MVE_VREV size, bits<2> bit_8_7> +class MVE_VREV size, bits<2> bit_8_7, string cstr=""> : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, - suffix, "$Qd, $Qm", ""> { + suffix, "$Qd, $Qm", cstr> { let Inst{28} = 0b1; let Inst{25-23} = 0b111; @@ -985,15 +1050,22 @@ class MVE_VREV size, bits<2> bit_8_7> let Inst{0} = 0b0; } -def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00>; -def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00>; -def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00>; +def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">; +def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">; +def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">; def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>; def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>; def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))), + (v8i16 (MVE_VREV16_8 (v8i16 MQPR:$src)))>; + def : Pat<(v4i32 (bswap (v4i32 MQPR:$src))), + (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>; +} + let Predicates = [HasMVEInt] in { def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))), (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>; @@ -1026,6 +1098,7 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), let Inst{12-6} = 0b0010111; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } let Predicates = [HasMVEInt] in { @@ -1054,6 +1127,7 @@ class MVE_bit_ops bit_21_20, bit bit_28> let Inst{6} = 0b1; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>; @@ -1145,6 +1219,7 @@ class MVE_bit_cmode cmode, dag inOps> class MVE_VORR cmode, ExpandImm imm_type> : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> { let Inst{5} = 0b0; + let validForTailPredication = 1; } def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>; @@ -1173,6 +1248,7 @@ def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm", class MVE_VBIC cmode, ExpandImm imm_type> : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> { let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>; @@ -1315,8 +1391,12 @@ let Predicates = [HasMVEInt] in { def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane), (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>; - def : Pat<(extractelt (v8f16 MQPR:$src), imm:$lane), - (COPY_TO_REGCLASS (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane), HPR)>; + def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane), + (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>; + def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane), + (COPY_TO_REGCLASS + (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))), + HPR)>; def : Pat<(v4f32 (scalar_to_vector SPR:$src)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; @@ -1408,6 +1488,7 @@ class MVE_VADDSUB size, bit subtract, let Inst{12-8} = 0b01000; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } class MVE_VADD size, list pattern=[]> @@ -1442,8 +1523,8 @@ let Predicates = [HasMVEInt] in { } class MVE_VQADDSUB size, list pattern=[]> - : MVE_int { + bits<2> size, ValueType vt> + : MVE_int { let Inst{28} = U; let Inst{25-23} = 0b110; @@ -1453,26 +1534,49 @@ class MVE_VQADDSUB size, list pattern=[]> - : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>; -class MVE_VQSUB size, list pattern=[]> - : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>; +class MVE_VQADD size, ValueType VT> + : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>; +class MVE_VQSUB size, ValueType VT> + : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>; -def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00>; -def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>; -def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>; -def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00>; -def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>; -def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>; +def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>; +def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>; +def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>; +def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>; +def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>; +def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>; + +def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>; +def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>; +def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>; +def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>; +def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>; +def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>; + +let Predicates = [HasMVEInt] in { + foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in + foreach VT = [instr.VT] in + def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in + foreach VT = [instr.VT] in + def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in + foreach VT = [instr.VT] in + def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in + foreach VT = [instr.VT] in + def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; +} -def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00>; -def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>; -def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>; -def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00>; -def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>; -def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>; class MVE_VABD_int size, list pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { @@ -1483,6 +1587,7 @@ class MVE_VABD_int size, list pattern=[]> let Inst{12-8} = 0b00111; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; @@ -1501,6 +1606,7 @@ class MVE_VRHADD size, list pattern=[]> let Inst{12-8} = 0b00001; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>; @@ -1522,6 +1628,7 @@ class MVE_VHADDSUB size, @@ -1545,6 +1652,60 @@ def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>; def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>; def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (ARMvshrsImm + (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHADDs8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshrsImm + (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHADDs16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshrsImm + (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHADDs32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshruImm + (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHADDu8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshruImm + (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHADDu16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshruImm + (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHADDu32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshrsImm + (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHSUBs8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshrsImm + (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHSUBs16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshrsImm + (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHSUBs32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshruImm + (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHSUBu8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshruImm + (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHSUBu16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshruImm + (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHSUBu32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; +} + class MVE_VDUP pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { @@ -1563,6 +1724,7 @@ class MVE_VDUP pattern=[]> let Inst{6} = 0b0; let Inst{5} = E; let Inst{4-0} = 0b10000; + let validForTailPredication = 1; } def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>; @@ -1625,6 +1787,7 @@ class MVE_VCLSCLZ size, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>; @@ -1635,6 +1798,15 @@ def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>; def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>; def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))), + (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>; + def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))), + (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>; + def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))), + (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>; +} + class MVE_VABSNEG_int size, bit negate, list pattern=[]> : MVEIntSingleSrc { @@ -1648,6 +1820,7 @@ class MVE_VABSNEG_int size, bit negate, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>; @@ -1689,6 +1862,7 @@ class MVE_VQABSNEG size, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>; @@ -1720,6 +1894,7 @@ class MVE_mod_imm cmode, bit op, let Inst{3-0} = imm{3-0}; let DecoderMethod = "DecodeMVEModImmInstruction"; + let validForTailPredication = 1; } let isReMaterializable = 1 in { @@ -2115,6 +2290,7 @@ class MVE_shift_by_vec { @@ -2163,6 +2339,7 @@ class MVE_shift_with_imm @@ -2175,6 +2352,7 @@ class MVE_VSxI_imm let Inst{21-16} = imm; let Inst{10-9} = 0b10; let Inst{8} = bit_8; + let validForTailPredication = 1; } def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> { @@ -2427,6 +2605,7 @@ class MVE_VRINT op, string suffix, bits<2> size, let Inst{11-10} = 0b01; let Inst{9-7} = op{2-0}; let Inst{4} = 0b0; + let validForTailPredication = 1; } @@ -2489,6 +2668,7 @@ class MVE_VMUL_fp pattern=[]> let Inst{12-8} = 0b01101; let Inst{7} = Qn{3}; let Inst{4} = 0b1; + let validForTailPredication = 1; } def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>; @@ -2556,8 +2736,38 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1, def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1, (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">; -def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; -def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +let Predicates = [HasMVEFloat, UseFusedMAC] in { + def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1), + (fmul (v8f16 MQPR:$src2), + (v8f16 MQPR:$src3)))), + (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>; + def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1), + (fmul (v4f32 MQPR:$src2), + (v4f32 MQPR:$src3)))), + (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>; + + def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1), + (fmul (v8f16 MQPR:$src2), + (v8f16 MQPR:$src3)))), + (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>; + def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1), + (fmul (v4f32 MQPR:$src2), + (v4f32 MQPR:$src3)))), + (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>; +} + +let Predicates = [HasMVEFloat] in { + def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))), + (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>; + def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))), + (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>; +} + + +let validForTailPredication = 1 in { + def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; + def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +} let Predicates = [HasMVEFloat] in { def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), @@ -2566,8 +2776,11 @@ let Predicates = [HasMVEFloat] in { (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; } -def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; -def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; + +let validForTailPredication = 1 in { + def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; + def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; +} let Predicates = [HasMVEFloat] in { def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), @@ -2576,10 +2789,10 @@ let Predicates = [HasMVEFloat] in { (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; } -class MVE_VCADD pattern=[]> +class MVE_VCADD pattern=[]> : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qd; bits<4> Qn; bit rot; @@ -2598,7 +2811,7 @@ class MVE_VCADD pattern=[]> } def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>; -def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1>; +def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">; class MVE_VABD_fp : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), @@ -2617,6 +2830,7 @@ class MVE_VABD_fp let Inst{11-8} = 0b1101; let Inst{7} = Qn{3}; let Inst{4} = 0b0; + let validForTailPredication = 1; } def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>; @@ -2643,6 +2857,7 @@ class MVE_VCVT_fix : AsmOperandClass { @@ -2693,6 +2908,7 @@ class MVE_VCVT_fp_int_anpm size, bit op, string anpm, let Inst{9-8} = rm; let Inst{7} = op; let Inst{4} = 0b0; + let validForTailPredication = 1; } multiclass MVE_VCVT_fp_int_anpm_multi size, bit op, @@ -2727,6 +2943,7 @@ class MVE_VCVT_fp_int size, bits<2> op, let Inst{12-9} = 0b0011; let Inst{8-7} = op; let Inst{4} = 0b0; + let validForTailPredication = 1; } // The unsuffixed VCVT for float->int implicitly rounds toward zero, @@ -2776,6 +2993,7 @@ class MVE_VABSNEG_fp size, bit negate, let Inst{11-8} = 0b0111; let Inst{7} = negate; let Inst{4} = 0b0; + let validForTailPredication = 1; } def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>; @@ -2863,6 +3081,7 @@ class MVE_VCMPqq bits_21_20, // decoder to emit an operand that isn't affected by any instruction // bit. let DecoderMethod = "DecodeMVEVCMP"; + let validForTailPredication = 1; } class MVE_VCMPqqf @@ -2927,6 +3146,7 @@ class MVE_VCMPqr bits_21_20, let Constraints = ""; // Custom decoder method, for the same reason as MVE_VCMPqq let DecoderMethod = "DecodeMVEVCMP"; + let validForTailPredication = 1; } class MVE_VCMPqrf @@ -2966,6 +3186,168 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>; def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; +multiclass unpred_vcmp_z { + def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; + def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; + def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmp_r { + def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; + def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; + def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; + + def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; + def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; + def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmpf_z { + def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; + def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))), + (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmpf_r { + def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), + (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; + def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), + (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; + + def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; + def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), + (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), + (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; +} + +let Predicates = [HasMVEInt] in { + defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>; + defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>; + defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>; + defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>; + defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>; + defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>; + defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; + defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; + + defm MVE_VCEQ : unpred_vcmp_r<"i", 0>; + defm MVE_VCNE : unpred_vcmp_r<"i", 1>; + defm MVE_VCGE : unpred_vcmp_r<"s", 10>; + defm MVE_VCLT : unpred_vcmp_r<"s", 11>; + defm MVE_VCGT : unpred_vcmp_r<"s", 12>; + defm MVE_VCLE : unpred_vcmp_r<"s", 13>; + defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; + defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; +} + +let Predicates = [HasMVEFloat] in { + defm MVE_VFCEQZ : unpred_vcmpf_z<0>; + defm MVE_VFCNEZ : unpred_vcmpf_z<1>; + defm MVE_VFCGEZ : unpred_vcmpf_z<10>; + defm MVE_VFCLTZ : unpred_vcmpf_z<11>; + defm MVE_VFCGTZ : unpred_vcmpf_z<12>; + defm MVE_VFCLEZ : unpred_vcmpf_z<13>; + + defm MVE_VFCEQ : unpred_vcmpf_r<0>; + defm MVE_VFCNE : unpred_vcmpf_r<1>; + defm MVE_VFCGE : unpred_vcmpf_r<10>; + defm MVE_VFCLT : unpred_vcmpf_r<11>; + defm MVE_VFCGT : unpred_vcmpf_r<12>; + defm MVE_VFCLE : unpred_vcmpf_r<13>; +} + + +// Extra "worst case" and/or/xor partterns, going into and out of GRP +multiclass two_predops { + def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))), + (v16i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p2), rGPR))), + VCCR))>; + def v8i1 : Pat<(v8i1 (opnode (v8i1 VCCR:$p1), (v8i1 VCCR:$p2))), + (v8i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p2), rGPR))), + VCCR))>; + def v4i1 : Pat<(v4i1 (opnode (v4i1 VCCR:$p1), (v4i1 VCCR:$p2))), + (v4i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p2), rGPR))), + VCCR))>; +} + +let Predicates = [HasMVEInt] in { + defm POR : two_predops; + defm PAND : two_predops; + defm PEOR : two_predops; +} + +// Occasionally we need to cast between a i32 and a boolean vector, for +// example when moving between rGPR and VPR.P0 as part of predicate vector +// shuffles. We also sometimes need to cast between different predicate +// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles. + +def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>; + +let Predicates = [HasMVEInt] in { + foreach VT = [ v4i1, v8i1, v16i1 ] in { + def : Pat<(i32 (predicate_cast (VT VCCR:$src))), + (i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>; + def : Pat<(VT (predicate_cast (i32 VCCR:$src))), + (VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>; + + foreach VT2 = [ v4i1, v8i1, v16i1 ] in + def : Pat<(VT (predicate_cast (VT2 VCCR:$src))), + (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>; + } +} + // end of MVE compares // start of MVE_qDest_qSrc @@ -2989,10 +3371,10 @@ class MVE_qDest_qSrc size, list pattern=[]> + string suffix, bits<2> size, string cstr="", list pattern=[]> : MVE_qDest_qSrc { + vpred_n, "$Qd = $Qd_src"#cstr, pattern> { bits<4> Qn; let Inst{28} = subtract; @@ -3009,7 +3391,7 @@ multiclass MVE_VQxDMLxDH_multi { def s8 : MVE_VQxDMLxDH; def s16 : MVE_VQxDMLxDH; - def s32 : MVE_VQxDMLxDH; + def s32 : MVE_VQxDMLxDH; } defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>; @@ -3021,10 +3403,10 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>; defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>; defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>; -class MVE_VCMUL pattern=[]> +class MVE_VCMUL pattern=[]> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qn; bits<2> rot; @@ -3041,13 +3423,13 @@ class MVE_VCMUL pattern=[]> } def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>; -def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1>; +def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">; class MVE_VMULL bits_21_20, - bit T, list pattern=[]> + bit T, string cstr, list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -3063,9 +3445,9 @@ class MVE_VMULL bits_21_20, } multiclass MVE_VMULL_multi bits_21_20> { - def bh : MVE_VMULL; - def th : MVE_VMULL; + bit bit_28, bits<2> bits_21_20, string cstr=""> { + def bh : MVE_VMULL; + def th : MVE_VMULL; } // For integer multiplies, bits 21:20 encode size, and bit 28 signedness. @@ -3074,10 +3456,10 @@ multiclass MVE_VMULL_multi; defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>; -defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10>; +defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">; defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>; defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>; -defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10>; +defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">; defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>; defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>; @@ -3144,6 +3526,18 @@ defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>; defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>; defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>; +def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>; +let Predicates = [HasMVEInt] in { + def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))), + (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; + def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))), + (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; + def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))), + (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; + def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))), + (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; +} + class MVE_VCVT_ff pattern=[]> : MVE_qDest_qSrc; defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>; class MVE_VxCADD size, bit halve, - list pattern=[]> + string cstr="", list pattern=[]> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qn; bit rot; @@ -3186,11 +3579,11 @@ class MVE_VxCADD size, bit halve, def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>; def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>; -def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1>; +def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">; def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>; def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>; -def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0>; +def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">; class MVE_VADCSBC pattern=[]> @@ -3220,10 +3613,10 @@ def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>; def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>; class MVE_VQDMULL pattern=[]> + string cstr="", list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, pattern> { bits<4> Qn; let Inst{28} = size; @@ -3236,13 +3629,13 @@ class MVE_VQDMULL { - def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0>; - def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1>; +multiclass MVE_VQDMULL_halves { + def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>; + def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>; } defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>; -defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1>; +defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">; // end of mve_qDest_qSrc @@ -3267,9 +3660,9 @@ class MVE_qr_base pattern=[]> +class MVE_qDest_rSrc pattern=[]> : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm), - NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, "", + NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr, pattern>; class MVE_qDestSrc_rSrc pattern=[]> @@ -3291,7 +3684,7 @@ class MVE_qDest_single_rSrc pattern=[]> class MVE_VADDSUB_qr size, bit bit_5, bit bit_12, bit bit_16, bit bit_28, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = size; @@ -3299,6 +3692,7 @@ class MVE_VADDSUB_qr size, let Inst{12} = bit_12; let Inst{8} = 0b1; let Inst{5} = bit_5; + let validForTailPredication = 1; } multiclass MVE_VADDSUB_qr_sizes; defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>; defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VADD_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VADD_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VADD_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VSUB_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VSUB_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VSUB_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + class MVE_VQDMULL_qr pattern=[]> - : MVE_qDest_rSrc { + bit T, string cstr="", list pattern=[]> + : MVE_qDest_rSrc { let Inst{28} = size; let Inst{21-20} = 0b11; @@ -3332,18 +3744,18 @@ class MVE_VQDMULL_qr { - def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0>; - def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1>; +multiclass MVE_VQDMULL_qr_halves { + def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>; + def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>; } defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>; -defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1>; +defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">; class MVE_VxADDSUB_qr bits_21_20, bit subtract, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -3351,6 +3763,7 @@ class MVE_VxADDSUB_qr; @@ -3388,6 +3801,7 @@ class MVE_VxSHL_qr size, let Inst{12-8} = 0b11110; let Inst{7} = bit_7; let Inst{6-4} = 0b110; + let validForTailPredication = 1; } multiclass MVE_VxSHL_qr_types { @@ -3421,7 +3835,7 @@ let Predicates = [HasMVEInt] in { } class MVE_VBRSR size, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b1; let Inst{21-20} = size; @@ -3429,15 +3843,27 @@ class MVE_VBRSR size, list pattern=[]> let Inst{12} = 0b1; let Inst{8} = 0b0; let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>; def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>; def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))), + (v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>; + + def : Pat<(v4i32 ( bitreverse (v4i32 MQPR:$val1))), + (v4i32 ( MVE_VBRSR32 (v4i32 MQPR:$val1), (t2MOVi (i32 32)) ))>; + + def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))), + (v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>; +} + class MVE_VMUL_qr_int size, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b0; let Inst{21-20} = size; @@ -3445,15 +3871,25 @@ class MVE_VMUL_qr_int; def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>; def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VMUL_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VMUL_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VMUL_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + class MVE_VxxMUL_qr bits_21_20, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -3471,14 +3907,14 @@ def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>; def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>; def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>; -let Predicates = [HasMVEFloat] in { +let Predicates = [HasMVEFloat], validForTailPredication = 1 in { def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>; def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>; } class MVE_VFMAMLA_qr bits_21_20, bit S, - list pattern=[]> + bit bit_28, bits<2> bits_21_20, bit S, + list pattern=[]> : MVE_qDestSrc_rSrc { let Inst{28} = bit_28; @@ -3487,6 +3923,7 @@ class MVE_VFMAMLA_qr; @@ -3503,6 +3940,21 @@ def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>; def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>; def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v4i32 (add (v4i32 MQPR:$src1), + (v4i32 (mul (v4i32 MQPR:$src2), + (v4i32 (ARMvdup (i32 rGPR:$x))))))), + (v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>; + def : Pat<(v8i16 (add (v8i16 MQPR:$src1), + (v8i16 (mul (v8i16 MQPR:$src2), + (v8i16 (ARMvdup (i32 rGPR:$x))))))), + (v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>; + def : Pat<(v16i8 (add (v16i8 MQPR:$src1), + (v16i8 (mul (v16i8 MQPR:$src2), + (v16i8 (ARMvdup (i32 rGPR:$x))))))), + (v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>; +} + let Predicates = [HasMVEFloat] in { def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>; def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>; @@ -3555,6 +4007,7 @@ class MVE_VxDUP size, bit bit_12, let Inst{7} = imm{1}; let Inst{6-1} = 0b110111; let Inst{0} = imm{0}; + let validForTailPredication = 1; } def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>; @@ -3589,6 +4042,7 @@ class MVE_VxWDUP size, bit bit_12, let Inst{6-4} = 0b110; let Inst{3-1} = Rm{3-1}; let Inst{0} = imm{0}; + let validForTailPredication = 1; } def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>; @@ -3599,6 +4053,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>; def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; +let hasSideEffects = 1 in class MVE_VCTP size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { @@ -3614,6 +4069,7 @@ class MVE_VCTP size, list pattern=[]> let Constraints = ""; let DecoderMethod = "DecodeMveVCTP"; + let validForTailPredication = 1; } def MVE_VCTP8 : MVE_VCTP<"8", 0b00>; @@ -3621,6 +4077,15 @@ def MVE_VCTP16 : MVE_VCTP<"16", 0b01>; def MVE_VCTP32 : MVE_VCTP<"32", 0b10>; def MVE_VCTP64 : MVE_VCTP<"64", 0b11>; +let Predicates = [HasMVEInt] in { + def : Pat<(int_arm_vctp8 rGPR:$Rn), + (v16i1 (MVE_VCTP8 rGPR:$Rn))>; + def : Pat<(int_arm_vctp16 rGPR:$Rn), + (v8i1 (MVE_VCTP16 rGPR:$Rn))>; + def : Pat<(int_arm_vctp32 rGPR:$Rn), + (v4i1 (MVE_VCTP32 rGPR:$Rn))>; +} + // end of mve_qDest_rSrc // start of coproc mov @@ -3863,6 +4328,7 @@ class MVE_VLDRSTR_base size, dag iops, string asm, list patte let Inst{7} = fc{0}; let Inst{4} = 0b0; - let Defs = [VPR, P0]; + let Defs = [VPR]; + let validForTailPredication = 1; } class MVE_VPTt1 size, dag iops> @@ -4177,11 +4644,12 @@ class MVE_VPTt1 size, dag iops> let Inst{5} = Qm{3}; let Inst{3-1} = Qm{2-0}; let Inst{0} = fc{1}; + let validForTailPredication = 1; } class MVE_VPTt1i size> : MVE_VPTt1 { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_i:$fc)> { let Inst{12} = 0b0; let Inst{0} = 0b0; } @@ -4192,7 +4660,7 @@ def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>; class MVE_VPTt1u size> : MVE_VPTt1 { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_u:$fc)> { let Inst{12} = 0b0; let Inst{0} = 0b1; } @@ -4203,7 +4671,7 @@ def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>; class MVE_VPTt1s size> : MVE_VPTt1 { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_s:$fc)> { let Inst{12} = 0b1; } @@ -4225,7 +4693,7 @@ class MVE_VPTt2 size, dag iops> class MVE_VPTt2i size> : MVE_VPTt2 { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_i:$fc)> { let Inst{12} = 0b0; let Inst{5} = 0b0; } @@ -4236,7 +4704,7 @@ def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>; class MVE_VPTt2u size> : MVE_VPTt2 { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_u:$fc)> { let Inst{12} = 0b0; let Inst{5} = 0b1; } @@ -4247,7 +4715,7 @@ def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>; class MVE_VPTt2s size> : MVE_VPTt2 { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_s:$fc)> { let Inst{12} = 0b1; } @@ -4276,12 +4744,13 @@ class MVE_VPTf pattern= let Inst{7} = fc{0}; let Inst{4} = 0b0; - let Defs = [P0]; + let Defs = [VPR]; let Predicates = [HasMVEFloat]; + let validForTailPredication = 1; } class MVE_VPTft1 - : MVE_VPTf { bits<3> fc; bits<4> Qm; @@ -4296,7 +4765,7 @@ def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>; def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>; class MVE_VPTft2 - : MVE_VPTf { bits<3> fc; bits<4> Rm; @@ -4322,7 +4791,8 @@ def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary, let Unpredictable{7} = 0b1; let Unpredictable{5} = 0b1; - let Defs = [P0]; + let Uses = [VPR]; + let validForTailPredication = 1; } def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, @@ -4346,6 +4816,7 @@ def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, let Inst{4} = 0b0; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b1; + let validForTailPredication = 1; } foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", @@ -4353,19 +4824,113 @@ foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm", (MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; -def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary, +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + + def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + + def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>; + def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + + def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + + // Pred <-> Int + def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))), + (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))), + (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>; + def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))), + (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>; +} + +let Predicates = [HasMVEFloat] in { + // Pred <-> Float + // 112 is 1.0 in float + def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))), + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + // 2620 in 1.0 in half + def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))), + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + // 240 is -1.0 in float + def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))), + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + // 2748 is -1.0 in half + def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))), + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + + def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; +} + +def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary, "vpnot", "", "", vpred_n, "", []> { let Inst{31-0} = 0b11111110001100010000111101001101; let Unpredictable{19-17} = 0b111; let Unpredictable{12} = 0b1; let Unpredictable{7} = 0b1; let Unpredictable{5} = 0b1; - let Defs = [P0]; - let Uses = [P0]; let Constraints = ""; + let DecoderMethod = "DecodeMVEVPNOT"; } +let Predicates = [HasMVEInt] in { + def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))), + (v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>; + def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))), + (v8i1 (MVE_VPNOT (v8i1 VCCR:$pred)))>; + def : Pat<(v16i1 (xor (v16i1 VCCR:$pred), (v16i1 (predicate_cast (i32 65535))))), + (v16i1 (MVE_VPNOT (v16i1 VCCR:$pred)))>; +} + + class MVE_loltp_start size> : t2LOL<(outs GPRlr:$LR), iops, asm, ops> { bits<4> Rn; @@ -4433,159 +4998,440 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> { // Patterns //===----------------------------------------------------------------------===// -class MVE_unpred_vector_store_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; +class MVE_vector_maskedstore_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr, VCCR:$pred), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr, (i32 1), VCCR:$pred)>; + +multiclass MVE_vector_store { + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; +} + +class MVE_vector_load_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), + (Ty (RegImmInst t2addrmode_imm7:$addr))>; +class MVE_vector_maskedload_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), + (Ty (RegImmInst t2addrmode_imm7:$addr, (i32 1), VCCR:$pred))>; + +multiclass MVE_vector_load { + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; +} + +class MVE_vector_offset_store_typed - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; -multiclass MVE_unpred_vector_store { - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; -} - -class MVE_unpred_vector_load_typed - : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), - (Ty (RegImmInst t2addrmode_imm7:$addr))>; - -multiclass MVE_unpred_vector_load { - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; -} + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; +} + +def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (pre_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (post_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (pre_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 2; +}]>; +def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (post_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 2; +}]>; + + +def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + return Ld->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getExtensionType() == ISD::SEXTLOAD; +}]>; +def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; +def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; +}]>; +def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; +}]>; +def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getExtensionType() == ISD::SEXTLOAD; +}]>; +def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; +def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; +}]>; +def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; +}]>; + +def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (maskedstore8 node:$val, node:$ptr, node:$pred), [{ + return cast(N)->isTruncatingStore(); +}]>; +def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; + +def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (maskedstore16 node:$val, node:$ptr, node:$pred), [{ + return cast(N)->isTruncatingStore(); +}]>; +def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; let Predicates = [HasMVEInt, IsLE] in { - defm : MVE_unpred_vector_store; - defm : MVE_unpred_vector_store; - defm : MVE_unpred_vector_store; + // Stores + defm : MVE_vector_store; + defm : MVE_vector_store; + defm : MVE_vector_store; - defm : MVE_unpred_vector_load; - defm : MVE_unpred_vector_load; - defm : MVE_unpred_vector_load; + // Loads + defm : MVE_vector_load; + defm : MVE_vector_load; + defm : MVE_vector_load; - def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)), - (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; - def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)), - (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; - def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)), - (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; + // Pre/post inc stores + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; } let Predicates = [HasMVEInt, IsBE] in { - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; + // Aligned Stores + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + + // Aligned Loads + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + + // Other unaligned loads/stores need to go though a VREV + def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)), + (v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)), + (v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)), + (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)), + (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)), + (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)), + (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + + // Pre/Post inc stores + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; } +let Predicates = [HasMVEInt] in { + // Aligned masked store, shared between LE and BE + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + // Truncating stores + def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred), + (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + // Aligned masked loads + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + // Extending masked loads. + def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; +} // Widening/Narrowing Loads/Stores +let MinAlignment = 2 in { + def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi16 node:$val, node:$ptr)>; + def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi16 node:$val, node:$base, node:$offset)>; + def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi16 node:$val, node:$base, node:$offset)>; +} + let Predicates = [HasMVEInt] in { - def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr), - (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>; - def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr), - (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>; - def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr), - (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>; + def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), + (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; + def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr), + (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>; + def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr), + (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>; + + def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; +} + + +let MinAlignment = 2 in { + def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; + def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; + def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; } multiclass MVEExtLoad { + string Align, Operand am> { def _Any : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("extloadvi" # SrcElemBits) am:$addr)), + (!cast("extloadvi" # SrcElemBits # Align) am:$addr)), (!cast("MVE_VLDR" # SrcElemType # "U" # DestElemBits) am:$addr)>; def _Z : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("zextloadvi" # SrcElemBits) am:$addr)), + (!cast("zextloadvi" # SrcElemBits # Align) am:$addr)), (!cast("MVE_VLDR" # SrcElemType # "U" # DestElemBits) am:$addr)>; def _S : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("sextloadvi" # SrcElemBits) am:$addr)), + (!cast("sextloadvi" # SrcElemBits # Align) am:$addr)), (!cast("MVE_VLDR" # SrcElemType # "S" # DestElemBits) am:$addr)>; } let Predicates = [HasMVEInt] in { - defm : MVEExtLoad<"4", "32", "8", "B", t2addrmode_imm7<1>>; - defm : MVEExtLoad<"8", "16", "8", "B", t2addrmode_imm7<1>>; - defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>; + defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>; + defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>; + defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>; } // Bit convert patterns let Predicates = [HasMVEInt] in { - def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v2i64 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v2f64 MQPR:$src))), (v2i64 MQPR:$src)>; - def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v4f32 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v4i32 MQPR:$src))), (v4f32 MQPR:$src)>; - def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v8f16 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v8i16 MQPR:$src))), (v8f16 MQPR:$src)>; } let Predicates = [IsLE,HasMVEInt] in { - def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; - - def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; - - def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; - - def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; - - def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; - - def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; - - def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 MQPR:$src)>; + + def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 MQPR:$src)>; + + def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 MQPR:$src)>; + + def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 MQPR:$src)>; + + def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 MQPR:$src)>; + + def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 MQPR:$src)>; + + def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 MQPR:$src)>; +} + +let Predicates = [IsBE,HasMVEInt] in { + def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 (MVE_VREV64_8 MQPR:$src))>; + + def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 (MVE_VREV64_8 MQPR:$src))>; + + def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 (MVE_VREV32_8 MQPR:$src))>; + + def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 (MVE_VREV32_8 MQPR:$src))>; + + def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 (MVE_VREV16_8 MQPR:$src))>; + + def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 (MVE_VREV16_8 MQPR:$src))>; + + def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 806681df102c..60ca92e58041 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -15,22 +15,22 @@ // NEON-specific Operands. //===----------------------------------------------------------------------===// def nModImm : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; } def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } def nImmSplatI8 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI8AsmOperand; } def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } def nImmSplatI16 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI16AsmOperand; } def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } def nImmSplatI32 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI32AsmOperand; } def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } @@ -43,7 +43,7 @@ def nImmSplatNotI32 : Operand { } def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } def nImmVMOVI32 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } @@ -62,18 +62,18 @@ class nImmVINVIAsmOperandReplicate } class nImmVMOVIReplicate : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVIAsmOperandReplicate; } class nImmVINVIReplicate : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVINVIAsmOperandReplicate; } def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } def nImmVMOVI32Neg : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVI32NegAsmOperand; } def nImmVMOVF32 : Operand { @@ -82,7 +82,7 @@ def nImmVMOVF32 : Operand { } def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } def nImmSplatI64 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI64AsmOperand; } @@ -478,20 +478,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; -def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; - -def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; -def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; -def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; -def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; -def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; -def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; -def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; -def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; -def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; -def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; -def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; +def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; +def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; // Types for vector shift by immediates. The "SHX" version is for long and // narrow operations where the source and destination vectors have different @@ -559,14 +547,14 @@ def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; - uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 32 && EltVal == 0); }]>; def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; - uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 8 && EltVal == 0xff); }]>; @@ -3326,30 +3314,30 @@ class N2VCvtQ op11_8, bit op7, bit op4, // source operand element sizes of 8, 16 and 32 bits: multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, string opc, string Dt, - string asm, SDNode OpNode> { + string asm, int fc> { // 64-bit vector types. def v8i8 : N2V; + [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), (i32 fc))))]>; def v4i16 : N2V; + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), (i32 fc))))]>; def v2i32 : N2V; + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), (i32 fc))))]>; def v2f32 : N2V { + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), (i32 fc))))]> { let Inst{10} = 1; // overwrite F = 1 } def v4f16 : N2V, + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), (i32 fc))))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } @@ -3358,30 +3346,83 @@ multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, def v16i8 : N2V; + [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), (i32 fc))))]>; def v8i16 : N2V; + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), (i32 fc))))]>; def v4i32 : N2V; + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), (i32 fc))))]>; def v4f32 : N2V { + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), (i32 fc))))]> { let Inst{10} = 1; // overwrite F = 1 } def v8f16 : N2V, + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), (i32 fc))))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } } +// Neon 3-register comparisons. +class N3VQ_cmp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +class N3VD_cmp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +multiclass N3V_QHS_cmp op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + int fc, bit Commutable = 0> { + // 64-bit vector types. + def v8i8 : N3VD_cmp; + def v4i16 : N3VD_cmp; + def v2i32 : N3VD_cmp; + + // 128-bit vector types. + def v16i8 : N3VQ_cmp; + def v8i16 : N3VQ_cmp; + def v4i32 : N3VQ_cmp; +} + // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: @@ -5026,67 +5067,67 @@ def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, - NEONvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, - NEONvceq, 1>; -def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, - NEONvceq, 1>, +defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", 0, 1>; +def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, + 0, 1>; +def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, + 0, 1>; +def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, + 0, 1>, Requires<[HasNEON, HasFullFP16]>; -def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, - NEONvceq, 1>, +def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, + 0, 1>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$Vd, $Vm, #0", NEONvceqz>; + "$Vd, $Vm, #0", 0>; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, - NEONvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, - NEONvcge, 0>; -def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, - NEONvcge, 0>, +defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", 10, 0>; +defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", 2, 0>; +def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, + 10, 0>; +def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, + 10, 0>; +def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, + 10, 0>, Requires<[HasNEON, HasFullFP16]>; -def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, - NEONvcge, 0>, +def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, + 10, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$Vd, $Vm, #0", NEONvcgez>; + "$Vd, $Vm, #0", 10>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$Vd, $Vm, #0", NEONvclez>; + "$Vd, $Vm, #0", 13>; } // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, - NEONvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, - NEONvcgt, 0>; -def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, - NEONvcgt, 0>, +defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", 12, 0>; +defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", 8, 0>; +def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, + 12, 0>; +def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, + 12, 0>; +def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, + 12, 0>, Requires<[HasNEON, HasFullFP16]>; -def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, - NEONvcgt, 0>, +def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, + 12, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$Vd, $Vm, #0", NEONvcgtz>; + "$Vd, $Vm, #0", 12>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$Vd, $Vm, #0", NEONvcltz>; + "$Vd, $Vm, #0", 11>; } // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index cfeb13c6acb6..18bcbda44580 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -565,6 +565,13 @@ let isCall = 1, 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>; + + // Also used for Thumb2 + // push lr before the call + def tBL_PUSHLR : tPseudoInst<(outs), (ins GPRlr:$ra, pred:$p, thumb_bl_target:$func), + 4, IIC_Br, + []>, + Requires<[IsThumb]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { @@ -592,6 +599,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { [(ARMbrjt tGPR:$target, tjumptable:$jt)]>, Sched<[WriteBrTbl]> { let Size = 2; + let isNotDuplicable = 1; list Predicates = [IsThumb, IsThumb1Only]; } } @@ -1362,6 +1370,12 @@ let hasPostISelHook = 1, Defs = [CPSR] in { [(set tGPR:$Rd, CPSR, (ARMsubc 0, tGPR:$Rn))]>, Requires<[IsThumb1Only]>, Sched<[WriteALU]>; + + def tLSLSri : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, imm0_31:$imm5), + 2, IIC_iALUr, + [(set tGPR:$Rd, CPSR, (ARMlsls tGPR:$Rn, imm0_31:$imm5))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; } @@ -1465,7 +1479,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them // and make use of the same compressed jump table format as Thumb-2. let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1, - isIndirectBranch = 1 in { + isIndirectBranch = 1, isNotDuplicable = 1 in { def tTBB_JT : tPseudoInst<(outs), (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 7cbfaba7a8eb..25a45b39fa0c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -45,7 +45,8 @@ def mve_shift_imm : AsmOperandClass { let RenderMethod = "addImmOperands"; let DiagnosticString = "operand must be an immediate in the range [1,32]"; } -def long_shift : Operand { +def long_shift : Operand, + ImmLeaf 0 && Imm <= 32; }]> { let ParserMatchClass = mve_shift_imm; let DecoderMethod = "DecodeLongShiftOperand"; } @@ -2394,6 +2395,23 @@ def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)), (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(saddsat rGPR:$Rm, rGPR:$Rn), + (t2QADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ssubsat rGPR:$Rm, rGPR:$Rn), + (t2QSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (t2QDADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), + (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), + (t2QADD8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), + (t2QSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), + (t2QADD16 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), + (t2QSUB16 rGPR:$Rm, rGPR:$Rn)>; + // Signed/Unsigned add/subtract def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; @@ -4085,7 +4103,7 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), // Pseudo isntruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in { +let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src), NoItinerary, []>, Requires<[IsThumb2]>; } @@ -4175,15 +4193,15 @@ multiclass t2LdStCop op31_28, bit load, bit Dbit, string asm, list } let DecoderNamespace = "Thumb2CoProc" in { -defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; } @@ -4368,8 +4386,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, ComplexDeprecationPredicate<"MCR">; def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -4377,8 +4395,8 @@ def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Predicates = [IsThumb2, PreV8]; } def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm", @@ -4402,24 +4420,24 @@ def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; -def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : T2v6Pat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (t2MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; -def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : T2v6Pat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (t2MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; /* from ARM core register to coprocessor */ def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, - imm:$CRm)]>; + [(int_arm_mcrr timm:$cop, timm:$opc1, GPR:$Rt, GPR:$Rt2, + timm:$CRm)]>; def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, - GPR:$Rt2, imm:$CRm)]> { + [(int_arm_mcrr2 timm:$cop, timm:$opc1, GPR:$Rt, + GPR:$Rt2, timm:$CRm)]> { let Predicates = [IsThumb2, PreV8]; } @@ -4439,8 +4457,8 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2), def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Inst{27-24} = 0b1110; bits<4> opc1; @@ -4465,8 +4483,8 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Inst{27-24} = 0b1110; bits<4> opc1; @@ -5087,6 +5105,7 @@ def t2BF_LabelPseudo : t2PseudoInst<(outs ), (ins pclabel:$cp), 0, NoItinerary, []> { let isTerminator = 1; let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; + let hasNoSchedulingInfo = 1; } def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p), @@ -5217,11 +5236,13 @@ def t2LoopDec : t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br, []>, Sched<[WriteBr]>; -let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in { +let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { +// Set WhileLoopStart and LoopEnd to occupy 8 bytes because they may +// get converted into t2CMP and t2Bcc. def t2WhileLoopStart : t2PseudoInst<(outs), (ins rGPR:$elts, brtarget:$target), - 4, IIC_Br, []>, + 8, IIC_Br, []>, Sched<[WriteBr]>; def t2LoopEnd : @@ -5233,7 +5254,7 @@ def t2LoopEnd : } // end isNotDuplicable class CS opcode, list pattern=[]> - : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZR:$Rn, GPRwithZRnosp:$Rm, pred_noal:$fcond), + : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rm, pred_noal:$fcond), AddrModeNone, NoItinerary, iname, "$Rd, $Rn, $Rm, $fcond", "", pattern> { bits<4> Rd; bits<4> Rm; @@ -5255,6 +5276,25 @@ def t2CSINC : CS<"csinc", 0b1001>; def t2CSINV : CS<"csinv", 0b1010>; def t2CSNEG : CS<"csneg", 0b1011>; +let Predicates = [HasV8_1MMainline] in { + def : T2Pat<(ARMcsinc GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm), + (t2CSINC GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; + def : T2Pat<(ARMcsinv GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm), + (t2CSINV GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; + def : T2Pat<(ARMcsneg GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm), + (t2CSNEG GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; + + multiclass ModifiedV8_1CSEL { + def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm), + (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; + def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, cmovpred:$imm), + (Insn GPRwithZR:$tval, GPRwithZR:$fval, + (i32 (inv_cond_XFORM imm:$imm)))>; + } + defm : ModifiedV8_1CSEL; + defm : ModifiedV8_1CSEL; + defm : ModifiedV8_1CSEL; +} // CS aliases. let Predicates = [HasV8_1MMainline] in { diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index a0dd25de07ee..fdd961bfbb2f 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>; +def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, @@ -19,7 +19,7 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; -def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; @@ -324,7 +324,7 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r", // However, there is no UAL syntax for them, so we keep them around for // (dis)assembly only. multiclass vfp_ldstx_mult { - let Predicates = [HasFPRegs] in { + let Predicates = [HasFPRegs], hasNoSchedulingInfo = 1 in { // Unknown precision def XIA : AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), @@ -548,12 +548,12 @@ let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>; + [/* For disassembly only; pattern left blank */]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -562,17 +562,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", - [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 1))]>; + [/* For disassembly only; pattern left blank */]>; def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>; + [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> { + [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -581,7 +581,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm", - [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 0))]>; + [(arm_cmpfp HPR:$Sd, HPR:$Sm)]>; } // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// @@ -611,7 +611,7 @@ let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", - [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -619,7 +619,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", - [(arm_cmpfp0 SPR:$Sd, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -631,7 +631,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", - [(arm_cmpfp0 HPR:$Sd, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -639,7 +639,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", - [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> { + [(arm_cmpfp0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -647,7 +647,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", - [(arm_cmpfp0 SPR:$Sd, (i32 0))]> { + [(arm_cmpfp0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -659,7 +659,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0", - [(arm_cmpfp0 HPR:$Sd, (i32 0))]> { + [(arm_cmpfp0 HPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -1732,7 +1732,8 @@ def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1740,7 +1741,8 @@ def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0, def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1748,7 +1750,8 @@ def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1, def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -2297,6 +2300,8 @@ class MovFromVFP opc19_16, dag oops, dag iops, string opc, string asm, let Inst{6-5} = 0b00; let Inst{4} = 1; let Inst{3-0} = 0b0000; + let Unpredictable{7-5} = 0b111; + let Unpredictable{3-0} = 0b1111; } let DecoderMethod = "DecodeForVMRSandVMSR" in { @@ -2370,63 +2375,65 @@ class MovToVFP opc19_16, dag oops, dag iops, string opc, string asm, VFPAI { // Instruction operand. - bits<4> src; - - // Encode instruction operand. - let Inst{15-12} = src; + bits<4> Rt; let Inst{27-20} = 0b11101110; let Inst{19-16} = opc19_16; + let Inst{15-12} = Rt; let Inst{11-8} = 0b1010; let Inst{7} = 0; + let Inst{6-5} = 0b00; let Inst{4} = 1; + let Inst{3-0} = 0b0000; let Predicates = [HasVFP2]; + let Unpredictable{7-5} = 0b111; + let Unpredictable{3-0} = 0b1111; } let DecoderMethod = "DecodeForVMRSandVMSR" in { let Defs = [FPSCR] in { let Predicates = [HasFPRegs] in // Application level GPR -> FPSCR - def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpscr, $src", - [(int_arm_set_fpscr GPRnopc:$src)]>; + def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpscr, $Rt", + [(int_arm_set_fpscr GPRnopc:$Rt)]>; // System level GPR -> FPEXC - def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpexc, $src", []>; + def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpexc, $Rt", []>; // System level GPR -> FPSID - def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpsid, $src", []>; - def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpinst, $src", []>; - def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpinst2, $src", []>; + def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpsid, $Rt", []>; + def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpinst, $Rt", []>; + def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpinst2, $Rt", []>; } let Predicates = [HasV8_1MMainline, Has8MSecExt] in { // System level GPR -> FPSCR with context saving for security extensions - def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$src), - "vmsr", "\tfpcxtns, $src", []>; + def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$Rt), + "vmsr", "\tfpcxtns, $Rt", []>; } let Predicates = [HasV8_1MMainline, Has8MSecExt] in { // System level GPR -> FPSCR with context saving for security extensions - def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$src), - "vmsr", "\tfpcxts, $src", []>; + def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$Rt), + "vmsr", "\tfpcxts, $Rt", []>; } let Predicates = [HasV8_1MMainline, HasFPRegs] in { // System level GPR -> FPSCR_NZCVQC def VMSR_FPSCR_NZCVQC : MovToVFP<0b0010 /* fpscr_nzcvqc */, - (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$src), - "vmsr", "\tfpscr_nzcvqc, $src", []>; + (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$Rt), + "vmsr", "\tfpscr_nzcvqc, $Rt", []>; } let Predicates = [HasV8_1MMainline, HasMVEInt] in { // System level GPR -> VPR/P0 let Defs = [VPR] in - def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$src), - "vmsr", "\tvpr, $src", []>; + def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$Rt), + "vmsr", "\tvpr, $Rt", []>; - def VMSR_P0 : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$src), - "vmsr", "\tp0, $src", []>; + def VMSR_P0 : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$Rt), + "vmsr", "\tp0, $Rt", []>; } } @@ -2614,7 +2621,8 @@ def VSCCLRMD : VFPXI<(outs), (ins pred:$p, fp_dreglist_with_vpr:$regs, variable_ let Inst{21-16} = 0b011111; let Inst{15-12} = regs{11-8}; let Inst{11-8} = 0b1011; - let Inst{7-0} = regs{7-0}; + let Inst{7-1} = regs{7-1}; + let Inst{0} = 0; let DecoderMethod = "DecodeVSCCLRM"; diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 4485a474a6df..8e5e474c0f59 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -34,7 +34,7 @@ public: ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -210,8 +210,8 @@ static const TargetRegisterClass *guessRegClass(unsigned Reg, static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - unsigned DstReg = I.getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register DstReg = I.getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg)) return true; const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI); @@ -236,17 +236,17 @@ static bool selectMergeValues(MachineInstrBuilder &MIB, // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs // into one DPR. - unsigned VReg0 = MIB->getOperand(0).getReg(); + Register VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 64 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID && "Unsupported operand for G_MERGE_VALUES"); - unsigned VReg1 = MIB->getOperand(1).getReg(); + Register VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; assert(MRI.getType(VReg1).getSizeInBits() == 32 && RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && "Unsupported operand for G_MERGE_VALUES"); - unsigned VReg2 = MIB->getOperand(2).getReg(); + Register VReg2 = MIB->getOperand(2).getReg(); (void)VReg2; assert(MRI.getType(VReg2).getSizeInBits() == 32 && RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID && @@ -268,17 +268,17 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB, // We only support G_UNMERGE_VALUES as a way to break up one DPR into two // GPRs. - unsigned VReg0 = MIB->getOperand(0).getReg(); + Register VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 32 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID && "Unsupported operand for G_UNMERGE_VALUES"); - unsigned VReg1 = MIB->getOperand(1).getReg(); + Register VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; assert(MRI.getType(VReg1).getSizeInBits() == 32 && RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && "Unsupported operand for G_UNMERGE_VALUES"); - unsigned VReg2 = MIB->getOperand(2).getReg(); + Register VReg2 = MIB->getOperand(2).getReg(); (void)VReg2; assert(MRI.getType(VReg2).getSizeInBits() == 64 && RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID && @@ -833,8 +833,7 @@ void ARMInstructionSelector::renderVFPF64Imm( NewInstBuilder.addImm(FPImmEncoding); } -bool ARMInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool ARMInstructionSelector::select(MachineInstr &I) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -851,7 +850,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, using namespace TargetOpcode; - if (selectImpl(I, CoverageInfo)) + if (selectImpl(I, *CoverageInfo)) return true; MachineInstrBuilder MIB{MF, I}; @@ -874,10 +873,10 @@ bool ARMInstructionSelector::select(MachineInstr &I, MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp()); if (isSExt) { - unsigned SExtResult = I.getOperand(0).getReg(); + Register SExtResult = I.getOperand(0).getReg(); // Use a new virtual register for the result of the AND - unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); + Register AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); I.getOperand(0).setReg(AndResult); auto InsertBefore = std::next(I.getIterator()); @@ -928,7 +927,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, assert(MRI.getType(SrcReg).getSizeInBits() == 64 && "Unsupported size"); assert(MRI.getType(DstReg).getSizeInBits() <= 32 && "Unsupported size"); - unsigned IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass); + Register IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass); auto InsertBefore = std::next(I.getIterator()); auto MovI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::VMOVRRD)) @@ -1039,7 +1038,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, case G_FCMP: { assert(STI.hasVFP2Base() && "Can't select fcmp without VFP"); - unsigned OpReg = I.getOperand(2).getReg(); + Register OpReg = I.getOperand(2).getReg(); unsigned Size = MRI.getType(OpReg).getSizeInBits(); if (Size == 64 && !STI.hasFP64()) { @@ -1077,12 +1076,12 @@ bool ARMInstructionSelector::select(MachineInstr &I, case G_STORE: case G_LOAD: { const auto &MemOp = **I.memoperands_begin(); - if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { + if (MemOp.isAtomic()) { LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n"); return false; } - unsigned Reg = I.getOperand(0).getReg(); + Register Reg = I.getOperand(0).getReg(); unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID(); LLT ValTy = MRI.getType(Reg); @@ -1097,9 +1096,9 @@ bool ARMInstructionSelector::select(MachineInstr &I, if (ValSize == 1 && NewOpc == Opcodes.STORE8) { // Before storing a 1-bit value, make sure to clear out any unneeded bits. - unsigned OriginalValue = I.getOperand(0).getReg(); + Register OriginalValue = I.getOperand(0).getReg(); - unsigned ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass); + Register ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass); I.getOperand(0).setReg(ValueToStore); auto InsertBefore = I.getIterator(); @@ -1159,7 +1158,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, case G_PHI: { I.setDesc(TII.get(PHI)); - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI); if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { break; diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 73a57b297ad6..81414e6d76fe 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -84,6 +84,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalForCartesianProduct({s8, s16, s32}, {s1, s8, s16}); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR}) .legalFor({s32}) .minScalar(0, s32); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 90a1ce238c3f..4a193fed04a3 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -509,7 +509,7 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, Offset = MO.getImm() - WordOffset * getImmScale(Opc); // If storing the base register, it needs to be reset first. - unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); + Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) MO.setImm(Offset); @@ -859,7 +859,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { // Determine list of registers and list of implicit super-register defs. for (const MachineInstr *MI : Cand.Instrs) { const MachineOperand &MO = getLoadStoreRegOp(*MI); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); bool IsKill = MO.isKill(); if (IsKill) KilledRegs.insert(Reg); @@ -874,7 +874,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; assert(MO.isImplicit()); - unsigned DefReg = MO.getReg(); + Register DefReg = MO.getReg(); if (is_contained(ImpDefs, DefReg)) continue; @@ -893,7 +893,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { iterator InsertBefore = std::next(iterator(LatestMI)); MachineBasicBlock &MBB = *LatestMI->getParent(); unsigned Offset = getMemoryOpOffset(*First); - unsigned Base = getLoadStoreBaseOp(*First).getReg(); + Register Base = getLoadStoreBaseOp(*First).getReg(); bool BaseKill = LatestMI->killsRegister(Base); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); @@ -1005,7 +1005,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { const MachineInstr *MI = MemOps[SIndex].MI; int Offset = MemOps[SIndex].Offset; const MachineOperand &PMO = getLoadStoreRegOp(*MI); - unsigned PReg = PMO.getReg(); + Register PReg = PMO.getReg(); unsigned PRegNum = PMO.isUndef() ? std::numeric_limits::max() : TRI->getEncodingValue(PReg); unsigned Latest = SIndex; @@ -1052,7 +1052,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { if (NewOffset != Offset + (int)Size) break; const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == ARM::SP || Reg == ARM::PC) break; if (Count == Limit) @@ -1261,7 +1261,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { if (isThumb1) return false; const MachineOperand &BaseOP = MI->getOperand(0); - unsigned Base = BaseOP.getReg(); + Register Base = BaseOP.getReg(); bool BaseKill = BaseOP.isKill(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); @@ -1387,7 +1387,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { // FIXME: Use LDM/STM with single register instead. if (isThumb1) return false; - unsigned Base = getLoadStoreBaseOp(*MI).getReg(); + Register Base = getLoadStoreBaseOp(*MI).getReg(); bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); unsigned Opcode = MI->getOpcode(); DebugLoc DL = MI->getDebugLoc(); @@ -1512,7 +1512,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { // Behaviour for writeback is undefined if base register is the same as one // of the others. const MachineOperand &BaseOp = MI.getOperand(2); - unsigned Base = BaseOp.getReg(); + Register Base = BaseOp.getReg(); const MachineOperand &Reg0Op = MI.getOperand(0); const MachineOperand &Reg1Op = MI.getOperand(1); if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) @@ -1655,9 +1655,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; const MachineOperand &BaseOp = MI->getOperand(2); - unsigned BaseReg = BaseOp.getReg(); - unsigned EvenReg = MI->getOperand(0).getReg(); - unsigned OddReg = MI->getOperand(1).getReg(); + Register BaseReg = BaseOp.getReg(); + Register EvenReg = MI->getOperand(0).getReg(); + Register OddReg = MI->getOperand(1).getReg(); unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); @@ -1783,8 +1783,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (isMemoryOp(*MBBI)) { unsigned Opcode = MBBI->getOpcode(); const MachineOperand &MO = MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); + Register Reg = MO.getReg(); + Register Base = getLoadStoreBaseOp(*MBBI).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); int Offset = getMemoryOpOffset(*MBBI); @@ -2121,7 +2121,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineOperand &MO = I->getOperand(j); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isDef() && TRI->regsOverlap(Reg, Base)) return false; if (Reg != Base && !MemRegs.count(Reg)) @@ -2415,7 +2415,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { int Opc = MI.getOpcode(); bool isLd = isLoadSingle(Opc); - unsigned Base = MI.getOperand(1).getReg(); + Register Base = MI.getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); bool StopHere = false; auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) { diff --git a/lib/Target/ARM/ARMLowOverheadLoops.cpp b/lib/Target/ARM/ARMLowOverheadLoops.cpp index cedf3bd3c74e..e1c5a9c3e223 100644 --- a/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -11,8 +11,7 @@ /// The expectation is that the loop contains three pseudo instructions: /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop /// form should be in the preheader, whereas the while form should be in the -/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the -/// pre-preheader? +/// preheaders only predecessor. /// - t2LoopDec - placed within in the loop body. /// - t2LoopEnd - the loop latch terminator. /// @@ -35,6 +34,7 @@ using namespace llvm; namespace { class ARMLowOverheadLoops : public MachineFunctionPass { + MachineFunction *MF = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; std::unique_ptr BBUtils = nullptr; @@ -52,17 +52,6 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; - bool ProcessLoop(MachineLoop *ML); - - void RevertWhile(MachineInstr *MI) const; - - void RevertLoopDec(MachineInstr *MI) const; - - void RevertLoopEnd(MachineInstr *MI) const; - - void Expand(MachineLoop *ML, MachineInstr *Start, - MachineInstr *Dec, MachineInstr *End, bool Revert); - MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -71,36 +60,156 @@ namespace { StringRef getPassName() const override { return ARM_LOW_OVERHEAD_LOOPS_NAME; } + + private: + bool ProcessLoop(MachineLoop *ML); + + MachineInstr * IsSafeToDefineLR(MachineInstr *MI); + + bool RevertNonLoops(); + + void RevertWhile(MachineInstr *MI) const; + + bool RevertLoopDec(MachineInstr *MI, bool AllowFlags = false) const; + + void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const; + + void Expand(MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt, MachineInstr *Dec, + MachineInstr *End, bool Revert); + }; } - + char ARMLowOverheadLoops::ID = 0; INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) -bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) { - if (!static_cast(MF.getSubtarget()).hasLOB()) +bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { + const ARMSubtarget &ST = static_cast(mf.getSubtarget()); + if (!ST.hasLOB()) return false; - LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n"); + MF = &mf; + LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); auto &MLI = getAnalysis(); - MRI = &MF.getRegInfo(); - TII = static_cast( - MF.getSubtarget().getInstrInfo()); - BBUtils = std::unique_ptr(new ARMBasicBlockUtils(MF)); + MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); + MRI = &MF->getRegInfo(); + TII = static_cast(ST.getInstrInfo()); + BBUtils = std::unique_ptr(new ARMBasicBlockUtils(*MF)); BBUtils->computeAllBlockSizes(); - BBUtils->adjustBBOffsetsAfter(&MF.front()); + BBUtils->adjustBBOffsetsAfter(&MF->front()); bool Changed = false; for (auto ML : MLI) { if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } + Changed |= RevertNonLoops(); return Changed; } +static bool IsLoopStart(MachineInstr &MI) { + return MI.getOpcode() == ARM::t2DoLoopStart || + MI.getOpcode() == ARM::t2WhileLoopStart; +} + +template +static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) { + for(auto &MI : make_range(T(Begin), End)) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) + continue; + return &MI; + } + } + return nullptr; +} + +static MachineInstr* SearchForUse(MachineInstr *Begin, + MachineBasicBlock::iterator End, + unsigned Reg) { + for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + return &MI; + } + } + return nullptr; +} + +// Is it safe to define LR with DLS/WLS? +// LR can defined if it is the operand to start, because it's the same value, +// or if it's going to be equivalent to the operand to Start. +MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) { + + auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) { + return MI->getOpcode() == ARM::tMOVr && + MI->getOperand(0).getReg() == ARM::LR && + MI->getOperand(1).getReg() == Reg && + MI->getOperand(2).getImm() == ARMCC::AL; + }; + + MachineBasicBlock *MBB = Start->getParent(); + unsigned CountReg = Start->getOperand(0).getReg(); + // Walk forward and backward in the block to find the closest instructions + // that define LR. Then also filter them out if they're not a mov lr. + MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR); + if (PredLRDef && !IsMoveLR(PredLRDef, CountReg)) + PredLRDef = nullptr; + + MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR); + if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg)) + SuccLRDef = nullptr; + + // We've either found one, two or none mov lr instructions... Now figure out + // if they are performing the equilvant mov that the Start instruction will. + // Do this by scanning forward and backward to see if there's a def of the + // register holding the count value. If we find a suitable def, return it as + // the insert point. Later, if InsertPt != Start, then we can remove the + // redundant instruction. + if (SuccLRDef) { + MachineBasicBlock::iterator End(SuccLRDef); + if (!SearchForDef(Start, End, CountReg)) { + return SuccLRDef; + } else + SuccLRDef = nullptr; + } + if (PredLRDef) { + MachineBasicBlock::reverse_iterator End(PredLRDef); + if (!SearchForDef(Start, End, CountReg)) { + return PredLRDef; + } else + PredLRDef = nullptr; + } + + // We can define LR because LR already contains the same value. + if (Start->getOperand(0).getReg() == ARM::LR) + return Start; + + // We've found no suitable LR def and Start doesn't use LR directly. Can we + // just define LR anyway? + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + + // Not if we've haven't found a suitable mov and LR is live out. + if (LiveRegs.contains(ARM::LR)) + return nullptr; + + // If LR is not live out, we can insert the instruction if nothing else + // uses LR after it. + if (!SearchForUse(Start, MBB->end(), ARM::LR)) + return Start; + + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for" + << " LR\n"); + return nullptr; +} + bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { bool Changed = false; @@ -111,15 +220,10 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); - auto IsLoopStart = [](MachineInstr &MI) { - return MI.getOpcode() == ARM::t2DoLoopStart || - MI.getOpcode() == ARM::t2WhileLoopStart; - }; - // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. std::function SearchForStart = - [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { + [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { for (auto &MI : *MBB) { if (IsLoopStart(MI)) return &MI; @@ -165,41 +269,62 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Dec = &MI; else if (MI.getOpcode() == ARM::t2LoopEnd) End = &MI; - else if (MI.getDesc().isCall()) + else if (IsLoopStart(MI)) + Start = &MI; + else if (MI.getDesc().isCall()) { // TODO: Though the call will require LE to execute again, does this // mean we should revert? Always executing LE hopefully should be // faster than performing a sub,cmp,br or even subs,br. Revert = true; + LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n"); + } - if (!Dec) + if (!Dec || End) continue; - // If we find that we load/store LR between LoopDec and LoopEnd, expect - // that the decremented value has been spilled to the stack. Because - // this value isn't actually going to be produced until the latch, by LE, - // we would need to generate a real sub. The value is also likely to be - // reloaded for use of LoopEnd - in which in case we'd need to perform - // an add because it gets negated again by LE! The other option is to - // then generate the other form of LE which doesn't perform the sub. - if (MI.mayLoad() || MI.mayStore()) - Revert = - MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR; + // If we find that LR has been written or read between LoopDec and + // LoopEnd, expect that the decremented value is being used else where. + // Because this value isn't actually going to be produced until the + // latch, by LE, we would need to generate a real sub. The value is also + // likely to be copied/reloaded for use of LoopEnd - in which in case + // we'd need to perform an add because it gets subtracted again by LE! + // The other option is to then generate the other form of LE which doesn't + // perform the sub. + for (auto &MO : MI.operands()) { + if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() && + MO.getReg() == ARM::LR) { + LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI); + Revert = true; + break; + } + } } if (Dec && End && Revert) break; } + LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; + if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; + if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;); + if (!Start && !Dec && !End) { LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n"); return Changed; - } if (!(Start && Dec && End)) { - report_fatal_error("Failed to find all loop components"); + } else if (!(Start && Dec && End)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n"); + return false; } - if (!End->getOperand(1).isMBB() || - End->getOperand(1).getMBB() != ML->getHeader()) - report_fatal_error("Expected LoopEnd to target Loop Header"); + if (!End->getOperand(1).isMBB()) + report_fatal_error("Expected LoopEnd to target basic block"); + + // TODO Maybe there's cases where the target doesn't have to be the header, + // but for now be safe and revert. + if (End->getOperand(1).getMBB() != ML->getHeader()) { + LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n"); + Revert = true; + } // The WLS and LE instructions have 12-bits for the label offset. WLS // requires a positive offset, while LE uses negative. @@ -216,41 +341,57 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Revert = true; } - LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start - << " - Found Loop Dec: " << *Dec - << " - Found Loop End: " << *End); + MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start); + if (!InsertPt) { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); + Revert = true; + } else + LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt); - Expand(ML, Start, Dec, End, Revert); + Expand(ML, Start, InsertPt, Dec, End, Revert); return true; } // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a // beq that branches to the exit branch. -// FIXME: Need to check that we're not trashing the CPSR when generating the -// cmp. We could also try to generate a cbz if the value in LR is also in +// TODO: We could also try to generate a cbz if the value in LR is also in // another low register. void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI); MachineBasicBlock *MBB = MI->getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri)); - MIB.addReg(ARM::LR); + MIB.add(MI->getOperand(0)); MIB.addImm(0); MIB.addImm(ARMCC::AL); - MIB.addReg(ARM::CPSR); + MIB.addReg(ARM::NoRegister); + + MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); + unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? + ARM::tBcc : ARM::t2Bcc; - // TODO: Try to use tBcc instead - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::EQ); // condition code MIB.addReg(ARM::CPSR); MI->eraseFromParent(); } -// TODO: Check flags so that we can possibly generate a tSubs or tSub. -void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { +bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI, + bool AllowFlags) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); MachineBasicBlock *MBB = MI->getParent(); + + // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS. + bool SetFlags = false; + if (AllowFlags) { + if (auto *Def = SearchForDef(MI, MBB->end(), ARM::CPSR)) { + if (!SearchForUse(MI, MBB->end(), ARM::CPSR) && + Def->getOpcode() == ARM::t2LoopEnd) + SetFlags = true; + } + } + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); MIB.addDef(ARM::LR); @@ -258,28 +399,39 @@ void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { MIB.add(MI->getOperand(2)); MIB.addImm(ARMCC::AL); MIB.addReg(0); - MIB.addReg(0); + + if (SetFlags) { + MIB.addReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + } else + MIB.addReg(0); + MI->eraseFromParent(); + return SetFlags; } // Generate a subs, or sub and cmp, and a branch instead of an LE. -// FIXME: Need to check that we're not trashing the CPSR when generating -// the cmp. -void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const { +void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI); - // Create cmp MachineBasicBlock *MBB = MI->getParent(); - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(ARM::t2CMPri)); - MIB.addReg(ARM::LR); - MIB.addImm(0); - MIB.addImm(ARMCC::AL); - MIB.addReg(ARM::CPSR); + // Create cmp + if (!SkipCmp) { + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(ARM::t2CMPri)); + MIB.addReg(ARM::LR); + MIB.addImm(0); + MIB.addImm(ARMCC::AL); + MIB.addReg(ARM::NoRegister); + } + + MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); + unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? + ARM::tBcc : ARM::t2Bcc; - // TODO Try to use tBcc instead. // Create bne - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::NE); // condition code MIB.addReg(ARM::CPSR); @@ -287,33 +439,13 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const { } void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt, MachineInstr *Dec, MachineInstr *End, bool Revert) { - auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) { - // The trip count should already been held in LR since the instructions - // within the loop can only read and write to LR. So, there should be a - // mov to setup the count. WLS/DLS perform this move, so find the original - // and delete it - inserting WLS/DLS in its place. - MachineBasicBlock *MBB = Start->getParent(); - MachineInstr *InsertPt = Start; - for (auto &I : MRI->def_instructions(ARM::LR)) { - if (I.getParent() != MBB) - continue; - - // Always execute. - if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL) - continue; - - // Only handle move reg, if the trip count it will need moving into a reg - // before the setup instruction anyway. - if (!I.getDesc().isMoveReg() || - !I.getOperand(1).isIdenticalTo(Start->getOperand(0))) - continue; - InsertPt = &I; - break; - } - + auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt) { + MachineBasicBlock *MBB = InsertPt->getParent(); unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ? ARM::t2DLS : ARM::t2WLS; MachineInstrBuilder MIB = @@ -369,16 +501,54 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, RevertWhile(Start); else Start->eraseFromParent(); - RevertLoopDec(Dec); - RevertLoopEnd(End); + bool FlagsAlreadySet = RevertLoopDec(Dec, true); + RevertLoopEnd(End, FlagsAlreadySet); } else { - Start = ExpandLoopStart(ML, Start); + Start = ExpandLoopStart(ML, Start, InsertPt); RemoveDeadBranch(Start); End = ExpandLoopEnd(ML, Dec, End); RemoveDeadBranch(End); } } +bool ARMLowOverheadLoops::RevertNonLoops() { + LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"); + bool Changed = false; + + for (auto &MBB : *MF) { + SmallVector Starts; + SmallVector Decs; + SmallVector Ends; + + for (auto &I : MBB) { + if (IsLoopStart(I)) + Starts.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopDec) + Decs.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopEnd) + Ends.push_back(&I); + } + + if (Starts.empty() && Decs.empty() && Ends.empty()) + continue; + + Changed = true; + + for (auto *Start : Starts) { + if (Start->getOpcode() == ARM::t2WhileLoopStart) + RevertWhile(Start); + else + Start->eraseFromParent(); + } + for (auto *Dec : Decs) + RevertLoopDec(Dec); + + for (auto *End : Ends) + RevertLoopEnd(End); + } + return Changed; +} + FunctionPass *llvm::createARMLowOverheadLoopsPass() { return new ARMLowOverheadLoops(); } diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 90c5ad025e56..c92689f4942e 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -74,8 +74,8 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, switch (MO.getType()) { default: llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: - // Ignore all non-CPSR implicit register operands. - if (MO.isImplicit() && MO.getReg() != ARM::CPSR) + // Ignore all implicit register operands. + if (MO.isImplicit()) return false; assert(!MO.getSubReg() && "Subregs should be eliminated!"); MCOp = MCOperand::createReg(MO.getReg()); diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 90d794cd27b1..bb136e92329b 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/Support/ErrorHandling.h" #include @@ -130,6 +131,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// The amount the literal pool has been increasedby due to promoted globals. int PromotedGlobalsIncrease = 0; + /// True if r0 will be preserved by a call to this function (e.g. C++ + /// con/destructors). + bool PreservesR0 = false; + public: ARMFunctionInfo() = default; @@ -247,6 +252,9 @@ public: } DenseMap EHPrologueRemappedRegs; + + void setPreservesR0() { PreservesR0 = true; } + bool getPreservesR0() const { return PreservesR0; } }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMParallelDSP.cpp b/lib/Target/ARM/ARMParallelDSP.cpp index 5389d09bf7d7..ae5657a0a2c1 100644 --- a/lib/Target/ARM/ARMParallelDSP.cpp +++ b/lib/Target/ARM/ARMParallelDSP.cpp @@ -1,4 +1,4 @@ -//===- ParallelDSP.cpp - Parallel DSP Pass --------------------------------===// +//===- ARMParallelDSP.cpp - Parallel DSP Pass -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -18,13 +18,11 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/NoFolder.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/PassSupport.h" @@ -45,54 +43,39 @@ static cl::opt DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false), cl::desc("Disable the ARM Parallel DSP pass")); +static cl::opt +NumLoadLimit("arm-parallel-dsp-load-limit", cl::Hidden, cl::init(16), + cl::desc("Limit the number of loads analysed")); + namespace { - struct OpChain; - struct BinOpChain; + struct MulCandidate; class Reduction; - using OpChainList = SmallVector, 8>; - using ReductionList = SmallVector; - using ValueList = SmallVector; - using MemInstList = SmallVector; - using PMACPair = std::pair; - using PMACPairList = SmallVector; - using Instructions = SmallVector; - using MemLocList = SmallVector; + using MulCandList = SmallVector, 8>; + using MemInstList = SmallVectorImpl; + using MulPairList = SmallVector, 8>; - struct OpChain { + // 'MulCandidate' holds the multiplication instructions that are candidates + // for parallel execution. + struct MulCandidate { Instruction *Root; - ValueList AllValues; - MemInstList VecLd; // List of all load instructions. - MemInstList Loads; + Value* LHS; + Value* RHS; + bool Exchange = false; bool ReadOnly = true; + bool Paired = false; + SmallVector VecLd; // Container for loads to widen. - OpChain(Instruction *I, ValueList &vl) : Root(I), AllValues(vl) { } - virtual ~OpChain() = default; + MulCandidate(Instruction *I, Value *lhs, Value *rhs) : + Root(I), LHS(lhs), RHS(rhs) { } - void PopulateLoads() { - for (auto *V : AllValues) { - if (auto *Ld = dyn_cast(V)) - Loads.push_back(Ld); - } + bool HasTwoLoadInputs() const { + return isa(LHS) && isa(RHS); } - unsigned size() const { return AllValues.size(); } - }; - - // 'BinOpChain' holds the multiplication instructions that are candidates - // for parallel execution. - struct BinOpChain : public OpChain { - ValueList LHS; // List of all (narrow) left hand operands. - ValueList RHS; // List of all (narrow) right hand operands. - bool Exchange = false; - - BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) : - OpChain(I, lhs), LHS(lhs), RHS(rhs) { - for (auto *V : RHS) - AllValues.push_back(V); - } - - bool AreSymmetrical(BinOpChain *Other); + LoadInst *getBaseLoad() const { + return VecLd.front(); + } }; /// Represent a sequence of multiply-accumulate operations with the aim to @@ -100,9 +83,9 @@ namespace { class Reduction { Instruction *Root = nullptr; Value *Acc = nullptr; - OpChainList Muls; - PMACPairList MulPairs; - SmallPtrSet Adds; + MulCandList Muls; + MulPairList MulPairs; + SetVector Adds; public: Reduction() = delete; @@ -112,10 +95,35 @@ namespace { /// Record an Add instruction that is a part of the this reduction. void InsertAdd(Instruction *I) { Adds.insert(I); } - /// Record a BinOpChain, rooted at a Mul instruction, that is a part of - /// this reduction. - void InsertMul(Instruction *I, ValueList &LHS, ValueList &RHS) { - Muls.push_back(make_unique(I, LHS, RHS)); + /// Create MulCandidates, each rooted at a Mul instruction, that is a part + /// of this reduction. + void InsertMuls() { + auto GetMulOperand = [](Value *V) -> Instruction* { + if (auto *SExt = dyn_cast(V)) { + if (auto *I = dyn_cast(SExt->getOperand(0))) + if (I->getOpcode() == Instruction::Mul) + return I; + } else if (auto *I = dyn_cast(V)) { + if (I->getOpcode() == Instruction::Mul) + return I; + } + return nullptr; + }; + + auto InsertMul = [this](Instruction *I) { + Value *LHS = cast(I->getOperand(0))->getOperand(0); + Value *RHS = cast(I->getOperand(1))->getOperand(0); + Muls.push_back(std::make_unique(I, LHS, RHS)); + }; + + for (auto *Add : Adds) { + if (Add == Acc) + continue; + if (auto *Mul = GetMulOperand(Add->getOperand(0))) + InsertMul(Mul); + if (auto *Mul = GetMulOperand(Add->getOperand(1))) + InsertMul(Mul); + } } /// Add the incoming accumulator value, returns true if a value had not @@ -128,9 +136,17 @@ namespace { return true; } - /// Set two BinOpChains, rooted at muls, that can be executed as a single + /// Set two MulCandidates, rooted at muls, that can be executed as a single /// parallel operation. - void AddMulPair(BinOpChain *Mul0, BinOpChain *Mul1) { + void AddMulPair(MulCandidate *Mul0, MulCandidate *Mul1, + bool Exchange = false) { + LLVM_DEBUG(dbgs() << "Pairing:\n" + << *Mul0->Root << "\n" + << *Mul1->Root << "\n"); + Mul0->Paired = true; + Mul1->Paired = true; + if (Exchange) + Mul1->Exchange = true; MulPairs.push_back(std::make_pair(Mul0, Mul1)); } @@ -141,24 +157,40 @@ namespace { /// Return the add instruction which is the root of the reduction. Instruction *getRoot() { return Root; } + bool is64Bit() const { return Root->getType()->isIntegerTy(64); } + + Type *getType() const { return Root->getType(); } + /// Return the incoming value to be accumulated. This maybe null. Value *getAccumulator() { return Acc; } /// Return the set of adds that comprise the reduction. - SmallPtrSetImpl &getAdds() { return Adds; } + SetVector &getAdds() { return Adds; } - /// Return the BinOpChain, rooted at mul instruction, that comprise the + /// Return the MulCandidate, rooted at mul instruction, that comprise the /// the reduction. - OpChainList &getMuls() { return Muls; } + MulCandList &getMuls() { return Muls; } - /// Return the BinOpChain, rooted at mul instructions, that have been + /// Return the MulCandidate, rooted at mul instructions, that have been /// paired for parallel execution. - PMACPairList &getMulPairs() { return MulPairs; } + MulPairList &getMulPairs() { return MulPairs; } /// To finalise, replace the uses of the root with the intrinsic call. void UpdateRoot(Instruction *SMLAD) { Root->replaceAllUsesWith(SMLAD); } + + void dump() { + LLVM_DEBUG(dbgs() << "Reduction:\n"; + for (auto *Add : Adds) + LLVM_DEBUG(dbgs() << *Add << "\n"); + for (auto &Mul : Muls) + LLVM_DEBUG(dbgs() << *Mul->Root << "\n" + << " " << *Mul->LHS << "\n" + << " " << *Mul->RHS << "\n"); + LLVM_DEBUG(if (Acc) dbgs() << "Acc in: " << *Acc << "\n") + ); + } }; class WidenedLoad { @@ -176,13 +208,11 @@ namespace { } }; - class ARMParallelDSP : public LoopPass { + class ARMParallelDSP : public FunctionPass { ScalarEvolution *SE; AliasAnalysis *AA; TargetLibraryInfo *TLI; DominatorTree *DT; - LoopInfo *LI; - Loop *L; const DataLayout *DL; Module *M; std::map LoadPairs; @@ -190,13 +220,12 @@ namespace { std::map> WideLoads; template - bool IsNarrowSequence(Value *V, ValueList &VL); - + bool IsNarrowSequence(Value *V); + bool Search(Value *V, BasicBlock *BB, Reduction &R); bool RecordMemoryOps(BasicBlock *BB); void InsertParallelMACs(Reduction &Reduction); bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem); - LoadInst* CreateWideLoad(SmallVectorImpl &Loads, - IntegerType *LoadTy); + LoadInst* CreateWideLoad(MemInstList &Loads, IntegerType *LoadTy); bool CreateParallelPairs(Reduction &R); /// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate @@ -204,60 +233,38 @@ namespace { /// products to a 32-bit accumulate operand. Optionally, the instruction can /// exchange the halfwords of the second operand before performing the /// arithmetic. - bool MatchSMLAD(Loop *L); + bool MatchSMLAD(Function &F); public: static char ID; - ARMParallelDSP() : LoopPass(ID) { } - - bool doInitialization(Loop *L, LPPassManager &LPM) override { - LoadPairs.clear(); - WideLoads.clear(); - return true; - } + ARMParallelDSP() : FunctionPass(ID) { } void getAnalysisUsage(AnalysisUsage &AU) const override { - LoopPass::getAnalysisUsage(AU); + FunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.setPreservesCFG(); } - bool runOnLoop(Loop *TheLoop, LPPassManager &) override { + bool runOnFunction(Function &F) override { if (DisableParallelDSP) return false; - L = TheLoop; + if (skipFunction(F)) + return false; + SE = &getAnalysis().getSE(); AA = &getAnalysis().getAAResults(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI(F); DT = &getAnalysis().getDomTree(); - LI = &getAnalysis().getLoopInfo(); auto &TPC = getAnalysis(); - BasicBlock *Header = TheLoop->getHeader(); - if (!Header) - return false; - - // TODO: We assume the loop header and latch to be the same block. - // This is not a fundamental restriction, but lifting this would just - // require more work to do the transformation and then patch up the CFG. - if (Header != TheLoop->getLoopLatch()) { - LLVM_DEBUG(dbgs() << "The loop header is not the loop latch: not " - "running pass ARMParallelDSP\n"); - return false; - } - - if (!TheLoop->getLoopPreheader()) - InsertPreheaderForLoop(L, DT, LI, nullptr, true); - - Function &F = *Header->getParent(); M = F.getParent(); DL = &M->getDataLayout(); @@ -282,17 +289,10 @@ namespace { return false; } - LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI); - LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n"); LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n"); - if (!RecordMemoryOps(Header)) { - LLVM_DEBUG(dbgs() << " - No sequential loads found.\n"); - return false; - } - - bool Changes = MatchSMLAD(L); + bool Changes = MatchSMLAD(F); return Changes; } }; @@ -331,40 +331,14 @@ bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, // TODO: we currently only collect i16, and will support i8 later, so that's // why we check that types are equal to MaxBitWidth, and not <= MaxBitWidth. template -bool ARMParallelDSP::IsNarrowSequence(Value *V, ValueList &VL) { - ConstantInt *CInt; - - if (match(V, m_ConstantInt(CInt))) { - // TODO: if a constant is used, it needs to fit within the bit width. - return false; - } - - auto *I = dyn_cast(V); - if (!I) - return false; - - Value *Val, *LHS, *RHS; - if (match(V, m_Trunc(m_Value(Val)))) { - if (cast(I)->getDestTy()->getIntegerBitWidth() == MaxBitWidth) - return IsNarrowSequence(Val, VL); - } else if (match(V, m_Add(m_Value(LHS), m_Value(RHS)))) { - // TODO: we need to implement sadd16/sadd8 for this, which enables to - // also do the rewrite for smlad8.ll, but it is unsupported for now. - return false; - } else if (match(V, m_ZExtOrSExt(m_Value(Val)))) { - if (cast(I)->getSrcTy()->getIntegerBitWidth() != MaxBitWidth) +bool ARMParallelDSP::IsNarrowSequence(Value *V) { + if (auto *SExt = dyn_cast(V)) { + if (SExt->getSrcTy()->getIntegerBitWidth() != MaxBitWidth) return false; - if (match(Val, m_Load(m_Value()))) { - auto *Ld = cast(Val); - - // Check that these load could be paired. - if (!LoadPairs.count(Ld) && !OffsetLoads.count(Ld)) - return false; - - VL.push_back(Val); - VL.push_back(I); - return true; + if (auto *Ld = dyn_cast(SExt->getOperand(0))) { + // Check that this load could be paired. + return LoadPairs.count(Ld) || OffsetLoads.count(Ld); } } return false; @@ -375,6 +349,9 @@ bool ARMParallelDSP::IsNarrowSequence(Value *V, ValueList &VL) { bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { SmallVector Loads; SmallVector Writes; + LoadPairs.clear(); + WideLoads.clear(); + OrderedBasicBlock OrderedBB(BB); // Collect loads and instruction that may write to memory. For now we only // record loads which are simple, sign-extended and have a single user. @@ -389,21 +366,24 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { Loads.push_back(Ld); } + if (Loads.empty() || Loads.size() > NumLoadLimit) + return false; + using InstSet = std::set; using DepMap = std::map; DepMap RAWDeps; // Record any writes that may alias a load. const auto Size = LocationSize::unknown(); - for (auto Read : Loads) { - for (auto Write : Writes) { + for (auto Write : Writes) { + for (auto Read : Loads) { MemoryLocation ReadLoc = MemoryLocation(Read->getPointerOperand(), Size); if (!isModOrRefSet(intersectModRef(AA->getModRefInfo(Write, ReadLoc), ModRefInfo::ModRef))) continue; - if (DT->dominates(Write, Read)) + if (OrderedBB.dominates(Write, Read)) RAWDeps[Read].insert(Write); } } @@ -411,17 +391,16 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { // Check whether there's not a write between the two loads which would // prevent them from being safely merged. auto SafeToPair = [&](LoadInst *Base, LoadInst *Offset) { - LoadInst *Dominator = DT->dominates(Base, Offset) ? Base : Offset; - LoadInst *Dominated = DT->dominates(Base, Offset) ? Offset : Base; + LoadInst *Dominator = OrderedBB.dominates(Base, Offset) ? Base : Offset; + LoadInst *Dominated = OrderedBB.dominates(Base, Offset) ? Offset : Base; if (RAWDeps.count(Dominated)) { InstSet &WritesBefore = RAWDeps[Dominated]; for (auto Before : WritesBefore) { - // We can't move the second load backward, past a write, to merge // with the first load. - if (DT->dominates(Dominator, Before)) + if (OrderedBB.dominates(Dominator, Before)) return false; } } @@ -431,7 +410,7 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { // Record base, offset load pairs. for (auto *Base : Loads) { for (auto *Offset : Loads) { - if (Base == Offset) + if (Base == Offset || OffsetLoads.count(Offset)) continue; if (AreSequentialAccesses(Base, Offset, *DL, *SE) && @@ -453,7 +432,54 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { return LoadPairs.size() > 1; } -// Loop Pass that needs to identify integer add/sub reductions of 16-bit vector +// Search recursively back through the operands to find a tree of values that +// form a multiply-accumulate chain. The search records the Add and Mul +// instructions that form the reduction and allows us to find a single value +// to be used as the initial input to the accumlator. +bool ARMParallelDSP::Search(Value *V, BasicBlock *BB, Reduction &R) { + // If we find a non-instruction, try to use it as the initial accumulator + // value. This may have already been found during the search in which case + // this function will return false, signaling a search fail. + auto *I = dyn_cast(V); + if (!I) + return R.InsertAcc(V); + + if (I->getParent() != BB) + return false; + + switch (I->getOpcode()) { + default: + break; + case Instruction::PHI: + // Could be the accumulator value. + return R.InsertAcc(V); + case Instruction::Add: { + // Adds should be adding together two muls, or another add and a mul to + // be within the mac chain. One of the operands may also be the + // accumulator value at which point we should stop searching. + R.InsertAdd(I); + Value *LHS = I->getOperand(0); + Value *RHS = I->getOperand(1); + bool ValidLHS = Search(LHS, BB, R); + bool ValidRHS = Search(RHS, BB, R); + + if (ValidLHS && ValidRHS) + return true; + + return R.InsertAcc(I); + } + case Instruction::Mul: { + Value *MulOp0 = I->getOperand(0); + Value *MulOp1 = I->getOperand(1); + return IsNarrowSequence<16>(MulOp0) && IsNarrowSequence<16>(MulOp1); + } + case Instruction::SExt: + return Search(I->getOperand(0), BB, R); + } + return false; +} + +// The pass needs to identify integer add/sub reductions of 16-bit vector // multiplications. // To use SMLAD: // 1) we first need to find integer add then look for this pattern: @@ -484,88 +510,39 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { // If loop invariants are used instead of loads, these need to be packed // before the loop begins. // -bool ARMParallelDSP::MatchSMLAD(Loop *L) { - // Search recursively back through the operands to find a tree of values that - // form a multiply-accumulate chain. The search records the Add and Mul - // instructions that form the reduction and allows us to find a single value - // to be used as the initial input to the accumlator. - std::function Search = [&] - (Value *V, Reduction &R) -> bool { - - // If we find a non-instruction, try to use it as the initial accumulator - // value. This may have already been found during the search in which case - // this function will return false, signaling a search fail. - auto *I = dyn_cast(V); - if (!I) - return R.InsertAcc(V); - - switch (I->getOpcode()) { - default: - break; - case Instruction::PHI: - // Could be the accumulator value. - return R.InsertAcc(V); - case Instruction::Add: { - // Adds should be adding together two muls, or another add and a mul to - // be within the mac chain. One of the operands may also be the - // accumulator value at which point we should stop searching. - bool ValidLHS = Search(I->getOperand(0), R); - bool ValidRHS = Search(I->getOperand(1), R); - if (!ValidLHS && !ValidLHS) - return false; - else if (ValidLHS && ValidRHS) { - R.InsertAdd(I); - return true; - } else { - R.InsertAdd(I); - return R.InsertAcc(I); - } - } - case Instruction::Mul: { - Value *MulOp0 = I->getOperand(0); - Value *MulOp1 = I->getOperand(1); - if (isa(MulOp0) && isa(MulOp1)) { - ValueList LHS; - ValueList RHS; - if (IsNarrowSequence<16>(MulOp0, LHS) && - IsNarrowSequence<16>(MulOp1, RHS)) { - R.InsertMul(I, LHS, RHS); - return true; - } - } - return false; - } - case Instruction::SExt: - return Search(I->getOperand(0), R); - } - return false; - }; - +bool ARMParallelDSP::MatchSMLAD(Function &F) { bool Changed = false; - SmallPtrSet AllAdds; - BasicBlock *Latch = L->getLoopLatch(); - for (Instruction &I : reverse(*Latch)) { - if (I.getOpcode() != Instruction::Add) + for (auto &BB : F) { + SmallPtrSet AllAdds; + if (!RecordMemoryOps(&BB)) continue; - if (AllAdds.count(&I)) - continue; + for (Instruction &I : reverse(BB)) { + if (I.getOpcode() != Instruction::Add) + continue; - const auto *Ty = I.getType(); - if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64)) - continue; + if (AllAdds.count(&I)) + continue; - Reduction R(&I); - if (!Search(&I, R)) - continue; + const auto *Ty = I.getType(); + if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64)) + continue; - if (!CreateParallelPairs(R)) - continue; + Reduction R(&I); + if (!Search(&I, &BB, R)) + continue; - InsertParallelMACs(R); - Changed = true; - AllAdds.insert(R.getAdds().begin(), R.getAdds().end()); + R.InsertMuls(); + LLVM_DEBUG(dbgs() << "After search, Reduction:\n"; R.dump()); + + if (!CreateParallelPairs(R)) + continue; + + InsertParallelMACs(R); + Changed = true; + AllAdds.insert(R.getAdds().begin(), R.getAdds().end()); + } } return Changed; @@ -578,87 +555,57 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) { return false; // Check that the muls operate directly upon sign extended loads. - for (auto &MulChain : R.getMuls()) { - // A mul has 2 operands, and a narrow op consist of sext and a load; thus - // we expect at least 4 items in this operand value list. - if (MulChain->size() < 4) { - LLVM_DEBUG(dbgs() << "Operand list too short.\n"); + for (auto &MulCand : R.getMuls()) { + if (!MulCand->HasTwoLoadInputs()) return false; - } - MulChain->PopulateLoads(); - ValueList &LHS = static_cast(MulChain.get())->LHS; - ValueList &RHS = static_cast(MulChain.get())->RHS; - - // Use +=2 to skip over the expected extend instructions. - for (unsigned i = 0, e = LHS.size(); i < e; i += 2) { - if (!isa(LHS[i]) || !isa(RHS[i])) - return false; - } } - auto CanPair = [&](Reduction &R, BinOpChain *PMul0, BinOpChain *PMul1) { - if (!PMul0->AreSymmetrical(PMul1)) - return false; - + auto CanPair = [&](Reduction &R, MulCandidate *PMul0, MulCandidate *PMul1) { // The first elements of each vector should be loads with sexts. If we // find that its two pairs of consecutive loads, then these can be // transformed into two wider loads and the users can be replaced with // DSP intrinsics. - for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) { - auto *Ld0 = dyn_cast(PMul0->LHS[x]); - auto *Ld1 = dyn_cast(PMul1->LHS[x]); - auto *Ld2 = dyn_cast(PMul0->RHS[x]); - auto *Ld3 = dyn_cast(PMul1->RHS[x]); - - if (!Ld0 || !Ld1 || !Ld2 || !Ld3) - return false; + auto Ld0 = static_cast(PMul0->LHS); + auto Ld1 = static_cast(PMul1->LHS); + auto Ld2 = static_cast(PMul0->RHS); + auto Ld3 = static_cast(PMul1->RHS); - LLVM_DEBUG(dbgs() << "Loads:\n" - << " - " << *Ld0 << "\n" - << " - " << *Ld1 << "\n" - << " - " << *Ld2 << "\n" - << " - " << *Ld3 << "\n"); - - if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) { - if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) { - LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); - R.AddMulPair(PMul0, PMul1); - return true; - } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) { - LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); - LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n"); - PMul1->Exchange = true; - R.AddMulPair(PMul0, PMul1); - return true; - } - } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) && - AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) { + if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) { + if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) { LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); - LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n"); - LLVM_DEBUG(dbgs() << " and swapping muls\n"); - PMul0->Exchange = true; - // Only the second operand can be exchanged, so swap the muls. - R.AddMulPair(PMul1, PMul0); + R.AddMulPair(PMul0, PMul1); + return true; + } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) { + LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); + LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n"); + R.AddMulPair(PMul0, PMul1, true); return true; } + } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) && + AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) { + LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); + LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n"); + LLVM_DEBUG(dbgs() << " and swapping muls\n"); + // Only the second operand can be exchanged, so swap the muls. + R.AddMulPair(PMul1, PMul0, true); + return true; } return false; }; - OpChainList &Muls = R.getMuls(); + MulCandList &Muls = R.getMuls(); const unsigned Elems = Muls.size(); - SmallPtrSet Paired; for (unsigned i = 0; i < Elems; ++i) { - BinOpChain *PMul0 = static_cast(Muls[i].get()); - if (Paired.count(PMul0->Root)) + MulCandidate *PMul0 = static_cast(Muls[i].get()); + if (PMul0->Paired) continue; for (unsigned j = 0; j < Elems; ++j) { if (i == j) continue; - BinOpChain *PMul1 = static_cast(Muls[j].get()); - if (Paired.count(PMul1->Root)) + MulCandidate *PMul1 = static_cast(Muls[j].get()); + if (PMul1->Paired) continue; const Instruction *Mul0 = PMul0->Root; @@ -668,29 +615,19 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) { assert(PMul0 != PMul1 && "expected different chains"); - if (CanPair(R, PMul0, PMul1)) { - Paired.insert(Mul0); - Paired.insert(Mul1); + if (CanPair(R, PMul0, PMul1)) break; - } } } return !R.getMulPairs().empty(); } - void ARMParallelDSP::InsertParallelMACs(Reduction &R) { - auto CreateSMLADCall = [&](SmallVectorImpl &VecLd0, - SmallVectorImpl &VecLd1, - Value *Acc, bool Exchange, - Instruction *InsertAfter) { + auto CreateSMLAD = [&](LoadInst* WideLd0, LoadInst *WideLd1, + Value *Acc, bool Exchange, + Instruction *InsertAfter) { // Replace the reduction chain with an intrinsic call - IntegerType *Ty = IntegerType::get(M->getContext(), 32); - LoadInst *WideLd0 = WideLoads.count(VecLd0[0]) ? - WideLoads[VecLd0[0]]->getLoad() : CreateWideLoad(VecLd0, Ty); - LoadInst *WideLd1 = WideLoads.count(VecLd1[0]) ? - WideLoads[VecLd1[0]]->getLoad() : CreateWideLoad(VecLd1, Ty); Value* Args[] = { WideLd0, WideLd1, Acc }; Function *SMLAD = nullptr; @@ -704,34 +641,95 @@ void ARMParallelDSP::InsertParallelMACs(Reduction &R) { Intrinsic::getDeclaration(M, Intrinsic::arm_smlald); IRBuilder Builder(InsertAfter->getParent(), - ++BasicBlock::iterator(InsertAfter)); + BasicBlock::iterator(InsertAfter)); Instruction *Call = Builder.CreateCall(SMLAD, Args); NumSMLAD++; return Call; }; - Instruction *InsertAfter = R.getRoot(); + // Return the instruction after the dominated instruction. + auto GetInsertPoint = [this](Value *A, Value *B) { + assert((isa(A) || isa(B)) && + "expected at least one instruction"); + + Value *V = nullptr; + if (!isa(A)) + V = B; + else if (!isa(B)) + V = A; + else + V = DT->dominates(cast(A), cast(B)) ? B : A; + + return &*++BasicBlock::iterator(cast(V)); + }; + Value *Acc = R.getAccumulator(); - if (!Acc) - Acc = ConstantInt::get(IntegerType::get(M->getContext(), 32), 0); - LLVM_DEBUG(dbgs() << "Root: " << *InsertAfter << "\n" - << "Acc: " << *Acc << "\n"); + // For any muls that were discovered but not paired, accumulate their values + // as before. + IRBuilder Builder(R.getRoot()->getParent()); + MulCandList &MulCands = R.getMuls(); + for (auto &MulCand : MulCands) { + if (MulCand->Paired) + continue; + + Instruction *Mul = cast(MulCand->Root); + LLVM_DEBUG(dbgs() << "Accumulating unpaired mul: " << *Mul << "\n"); + + if (R.getType() != Mul->getType()) { + assert(R.is64Bit() && "expected 64-bit result"); + Builder.SetInsertPoint(&*++BasicBlock::iterator(Mul)); + Mul = cast(Builder.CreateSExt(Mul, R.getRoot()->getType())); + } + + if (!Acc) { + Acc = Mul; + continue; + } + + // If Acc is the original incoming value to the reduction, it could be a + // phi. But the phi will dominate Mul, meaning that Mul will be the + // insertion point. + Builder.SetInsertPoint(GetInsertPoint(Mul, Acc)); + Acc = Builder.CreateAdd(Mul, Acc); + } + + if (!Acc) { + Acc = R.is64Bit() ? + ConstantInt::get(IntegerType::get(M->getContext(), 64), 0) : + ConstantInt::get(IntegerType::get(M->getContext(), 32), 0); + } else if (Acc->getType() != R.getType()) { + Builder.SetInsertPoint(R.getRoot()); + Acc = Builder.CreateSExt(Acc, R.getType()); + } + + // Roughly sort the mul pairs in their program order. + OrderedBasicBlock OrderedBB(R.getRoot()->getParent()); + llvm::sort(R.getMulPairs(), [&OrderedBB](auto &PairA, auto &PairB) { + const Instruction *A = PairA.first->Root; + const Instruction *B = PairB.first->Root; + return OrderedBB.dominates(A, B); + }); + + IntegerType *Ty = IntegerType::get(M->getContext(), 32); for (auto &Pair : R.getMulPairs()) { - BinOpChain *PMul0 = Pair.first; - BinOpChain *PMul1 = Pair.second; - LLVM_DEBUG(dbgs() << "Muls:\n" - << "- " << *PMul0->Root << "\n" - << "- " << *PMul1->Root << "\n"); - - Acc = CreateSMLADCall(PMul0->VecLd, PMul1->VecLd, Acc, PMul1->Exchange, - InsertAfter); - InsertAfter = cast(Acc); + MulCandidate *LHSMul = Pair.first; + MulCandidate *RHSMul = Pair.second; + LoadInst *BaseLHS = LHSMul->getBaseLoad(); + LoadInst *BaseRHS = RHSMul->getBaseLoad(); + LoadInst *WideLHS = WideLoads.count(BaseLHS) ? + WideLoads[BaseLHS]->getLoad() : CreateWideLoad(LHSMul->VecLd, Ty); + LoadInst *WideRHS = WideLoads.count(BaseRHS) ? + WideLoads[BaseRHS]->getLoad() : CreateWideLoad(RHSMul->VecLd, Ty); + + Instruction *InsertAfter = GetInsertPoint(WideLHS, WideRHS); + InsertAfter = GetInsertPoint(InsertAfter, Acc); + Acc = CreateSMLAD(WideLHS, WideRHS, Acc, RHSMul->Exchange, InsertAfter); } R.UpdateRoot(cast(Acc)); } -LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl &Loads, +LoadInst* ARMParallelDSP::CreateWideLoad(MemInstList &Loads, IntegerType *LoadTy) { assert(Loads.size() == 2 && "currently only support widening two loads"); @@ -758,8 +756,8 @@ LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl &Loads, return; Source->moveBefore(Sink); - for (auto &U : Source->uses()) - MoveBefore(Source, U.getUser()); + for (auto &Op : Source->operands()) + MoveBefore(Op, Source); }; // Insert the load at the point of the original dominating load. @@ -784,57 +782,30 @@ LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl &Loads, // Loads[0] needs trunc while Loads[1] needs a lshr and trunc. // TODO: Support big-endian as well. Value *Bottom = IRB.CreateTrunc(WideLoad, Base->getType()); - BaseSExt->setOperand(0, Bottom); + Value *NewBaseSExt = IRB.CreateSExt(Bottom, BaseSExt->getType()); + BaseSExt->replaceAllUsesWith(NewBaseSExt); IntegerType *OffsetTy = cast(Offset->getType()); Value *ShiftVal = ConstantInt::get(LoadTy, OffsetTy->getBitWidth()); Value *Top = IRB.CreateLShr(WideLoad, ShiftVal); Value *Trunc = IRB.CreateTrunc(Top, OffsetTy); - OffsetSExt->setOperand(0, Trunc); - + Value *NewOffsetSExt = IRB.CreateSExt(Trunc, OffsetSExt->getType()); + OffsetSExt->replaceAllUsesWith(NewOffsetSExt); + + LLVM_DEBUG(dbgs() << "From Base and Offset:\n" + << *Base << "\n" << *Offset << "\n" + << "Created Wide Load:\n" + << *WideLoad << "\n" + << *Bottom << "\n" + << *NewBaseSExt << "\n" + << *Top << "\n" + << *Trunc << "\n" + << *NewOffsetSExt << "\n"); WideLoads.emplace(std::make_pair(Base, - make_unique(Loads, WideLoad))); + std::make_unique(Loads, WideLoad))); return WideLoad; } -// Compare the value lists in Other to this chain. -bool BinOpChain::AreSymmetrical(BinOpChain *Other) { - // Element-by-element comparison of Value lists returning true if they are - // instructions with the same opcode or constants with the same value. - auto CompareValueList = [](const ValueList &VL0, - const ValueList &VL1) { - if (VL0.size() != VL1.size()) { - LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: " - << VL0.size() << " != " << VL1.size() << "\n"); - return false; - } - - const unsigned Pairs = VL0.size(); - - for (unsigned i = 0; i < Pairs; ++i) { - const Value *V0 = VL0[i]; - const Value *V1 = VL1[i]; - const auto *Inst0 = dyn_cast(V0); - const auto *Inst1 = dyn_cast(V1); - - if (!Inst0 || !Inst1) - return false; - - if (Inst0->isSameOperationAs(Inst1)) - continue; - - const APInt *C0, *C1; - if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1)) - return false; - } - - return true; - }; - - return CompareValueList(LHS, Other->LHS) && - CompareValueList(RHS, Other->RHS); -} - Pass *llvm::createARMParallelDSPPass() { return new ARMParallelDSP(); } @@ -842,6 +813,6 @@ Pass *llvm::createARMParallelDSPPass() { char ARMParallelDSP::ID = 0; INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp", - "Transform loops to use DSP intrinsics", false, false) + "Transform functions to use DSP intrinsics", false, false) INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp", - "Transform loops to use DSP intrinsics", false, false) + "Transform functions to use DSP intrinsics", false, false) diff --git a/lib/Target/ARM/ARMPredicates.td b/lib/Target/ARM/ARMPredicates.td index 0b6b40de80dd..b008d3e2e296 100644 --- a/lib/Target/ARM/ARMPredicates.td +++ b/lib/Target/ARM/ARMPredicates.td @@ -71,7 +71,7 @@ def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, AssemblerPredicate<"HasV8_5aOps", "armv8.5a">; def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">, - AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">; + AssemblerPredicate<"FeatureVFP2_SP", "VFP2">; def HasVFP3 : Predicate<"Subtarget->hasVFP3Base()">, AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4Base()">, diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 92ae26b3729d..56055a15483a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -180,7 +180,7 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // models the APSR when it's accessed by some special instructions. In such cases // it has the same encoding as PC. def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; +def APSR : ARMReg<15, "apsr">; def APSR_NZCV : ARMReg<15, "apsr_nzcv">; def SPSR : ARMReg<2, "spsr">; def FPSCR : ARMReg<3, "fpscr">; @@ -486,12 +486,20 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], // Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP. // These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs. -def Tuples2R : RegisterTuples<[gsub_0, gsub_1], - [(add R0, R2, R4, R6, R8, R10, R12), - (add R1, R3, R5, R7, R9, R11, SP)]>; +def Tuples2Rnosp : RegisterTuples<[gsub_0, gsub_1], + [(add R0, R2, R4, R6, R8, R10), + (add R1, R3, R5, R7, R9, R11)]>; + +def Tuples2Rsp : RegisterTuples<[gsub_0, gsub_1], + [(add R12), (add SP)]>; // Register class representing a pair of even-odd GPRs. -def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> { +def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2Rnosp, Tuples2Rsp)> { + let Size = 64; // 2 x 32 bits, we have no predefined type of that size. +} + +// Register class representing a pair of even-odd GPRs, except (R12, SP). +def GPRPairnosp : RegisterClass<"ARM", [untyped], 64, (add Tuples2Rnosp)> { let Size = 64; // 2 x 32 bits, we have no predefined type of that size. } diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 21d32bde4710..3f0b71afd977 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -2239,9 +2239,9 @@ def A9WriteLMfpPostRA : SchedWriteVariant<[ // Distinguish between our multiple MI-level forms of the same // VLDM/VSTM instructions. def A9PreRA : SchedPredicate< - "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">; + "Register::isVirtualRegister(MI->getOperand(0).getReg())">; def A9PostRA : SchedPredicate< - "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">; + "Register::isPhysicalRegister(MI->getOperand(0).getReg())">; // VLDM represents all destination registers as a single register // tuple, unlike LDM. So the number of write operands is not variadic. diff --git a/lib/Target/ARM/ARMScheduleM4.td b/lib/Target/ARM/ARMScheduleM4.td index 38c8ea2b4f35..bfa5fc0d7131 100644 --- a/lib/Target/ARM/ARMScheduleM4.td +++ b/lib/Target/ARM/ARMScheduleM4.td @@ -18,6 +18,9 @@ def CortexM4Model : SchedMachineModel { let PostRAScheduler = 1; let CompleteModel = 0; + let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt, + IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8, + HasV8_3a, HasTrustZone, HasDFB, IsWindows]; } @@ -50,6 +53,7 @@ def : M4UnitL2; def : M4UnitL2; def : M4UnitL2I<(instregex "(t|t2)LDM")>; +def : M4UnitL2I<(instregex "(t|t2)LDR")>; // Stores we use a latency of 1 as they have no outputs @@ -78,9 +82,20 @@ def : M4UnitL1; def : M4UnitL1; def : M4UnitL1I<(instregex "(t|t2)MOV")>; def : M4UnitL1I<(instrs COPY)>; -def : M4UnitL1I<(instregex "t2IT")>; -def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", - "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>; +def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>; +def : M4UnitL1I<(instregex "t2CLREX")>; +def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]", + "t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>; + +// These instructions are not of much interest to scheduling as they will not +// be generated or it is not very useful to schedule them. They are here to make +// the model more complete. +def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>; +def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>; +def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>; +def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>; +def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>; +def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>; def : ReadAdvance; def : ReadAdvance; @@ -112,6 +127,9 @@ def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; +def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>; +def : M4UnitL2I<(instregex "VMOVD")>; +def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>; def : ReadAdvance; def : ReadAdvance; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 978faed776b0..09603057b2c8 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -125,7 +125,7 @@ const CallLowering *ARMSubtarget::getCallLowering() const { return CallLoweringInfo.get(); } -const InstructionSelector *ARMSubtarget::getInstructionSelector() const { +InstructionSelector *ARMSubtarget::getInstructionSelector() const { return InstSelector.get(); } @@ -205,9 +205,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { NoARM = true; if (isAAPCS_ABI()) - stackAlignment = 8; + stackAlignment = Align(8); if (isTargetNaCl() || isAAPCS16_ABI()) - stackAlignment = 16; + stackAlignment = Align(16); // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as @@ -253,6 +253,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isRWPI()) ReserveR9 = true; + // If MVEVectorCostFactor is still 0 (has not been set to anything else), default it to 2 + if (MVEVectorCostFactor == 0) + MVEVectorCostFactor = 2; + // FIXME: Teach TableGen to deal with these instead of doing it manually here. switch (ARMProcFamily) { case Others: @@ -296,13 +300,15 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { LdStMultipleTiming = SingleIssuePlusExtras; MaxInterleaveFactor = 4; if (!isThumb()) - PrefLoopAlignment = 3; + PrefLoopLogAlignment = 3; break; case Kryo: break; case Krait: PreISelOperandLatencyAdjustment = 1; break; + case NeoverseN1: + break; case Swift: MaxInterleaveFactor = 2; LdStMultipleTiming = SingleIssuePlusExtras; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index c2b0f052b843..ef460342a69e 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -71,6 +71,7 @@ protected: Exynos, Krait, Kryo, + NeoverseN1, Swift }; enum ARMProcClassEnum { @@ -179,11 +180,9 @@ protected: bool HasVFPv3SP = false; bool HasVFPv4SP = false; bool HasFPARMv8SP = false; - bool HasVFPv2D16 = false; bool HasVFPv3D16 = false; bool HasVFPv4D16 = false; bool HasFPARMv8D16 = false; - bool HasVFPv2D16SP = false; bool HasVFPv3D16SP = false; bool HasVFPv4D16SP = false; bool HasFPARMv8D16SP = false; @@ -450,7 +449,7 @@ protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned stackAlignment = 4; + Align stackAlignment = Align(4); /// CPUString - String name of used CPU. std::string CPUString; @@ -469,7 +468,12 @@ protected: int PreISelOperandLatencyAdjustment = 2; /// What alignment is preferred for loop bodies, in log2(bytes). - unsigned PrefLoopAlignment = 0; + unsigned PrefLoopLogAlignment = 0; + + /// The cost factor for MVE instructions, representing the multiple beats an + // instruction can take. The default is 2, (set in initSubtargetFeatures so + // that we can use subtarget features less than 2). + unsigned MVEVectorCostFactor = 0; /// OptMinSize - True if we're optimising for minimum code size, equal to /// the function attribute. @@ -535,7 +539,7 @@ public: } const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; @@ -600,7 +604,7 @@ public: bool hasARMOps() const { return !NoARM; } - bool hasVFP2Base() const { return HasVFPv2D16SP; } + bool hasVFP2Base() const { return HasVFPv2SP; } bool hasVFP3Base() const { return HasVFPv3D16SP; } bool hasVFP4Base() const { return HasVFPv4D16SP; } bool hasFPARMv8Base() const { return HasFPARMv8D16SP; } @@ -668,6 +672,12 @@ public: bool hasSB() const { return HasSB; } bool genLongCalls() const { return GenLongCalls; } bool genExecuteOnly() const { return GenExecuteOnly; } + bool hasBaseDSP() const { + if (isThumb()) + return hasDSP(); + else + return hasV5TEOps(); + } bool hasFP16() const { return HasFP16; } bool hasD32() const { return HasD32; } @@ -812,7 +822,7 @@ public: /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. - unsigned getStackAlignment() const { return stackAlignment; } + Align getStackAlignment() const { return stackAlignment; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } @@ -853,9 +863,9 @@ public: return isROPI() || !isTargetELF(); } - unsigned getPrefLoopAlignment() const { - return PrefLoopAlignment; - } + unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; } + + unsigned getMVEVectorCostFactor() const { return MVEVectorCostFactor; } bool ignoreCSRForAllocationOrder(const MachineFunction &MF, unsigned PhysReg) const override; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7f0aae1739b3..5c8007f101d9 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -96,15 +96,16 @@ extern "C" void LLVMInitializeARMTarget() { initializeARMExpandPseudoPass(Registry); initializeThumb2SizeReducePass(Registry); initializeMVEVPTBlockPass(Registry); + initializeMVETailPredicationPass(Registry); initializeARMLowOverheadLoopsPass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSWindows()) - return llvm::make_unique(); - return llvm::make_unique(); + return std::make_unique(); + return std::make_unique(); } static ARMBaseTargetMachine::ARMABI @@ -282,7 +283,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle, + I = std::make_unique(TargetTriple, CPU, FS, *this, isLittle, F.hasMinSize()); if (!I->isThumb() && !I->hasARMOps()) @@ -447,8 +448,10 @@ bool ARMPassConfig::addPreISel() { MergeExternalByDefault)); } - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() != CodeGenOpt::None) { addPass(createHardwareLoopsPass()); + addPass(createMVETailPredicationPass()); + } return false; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2a8ec734a05f..86c8684d14dc 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -36,8 +36,12 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +static cl::opt EnableMaskedLoadStores( + "enable-arm-maskedldst", cl::Hidden, cl::init(false), + cl::desc("Enable the generation of masked loads and stores")); + static cl::opt DisableLowOverheadLoops( - "disable-arm-loloops", cl::Hidden, cl::init(true), + "disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops")); bool ARMTTIImpl::areInlineCompatible(const Function *Caller, @@ -167,6 +171,42 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (!SrcTy.isSimple() || !DstTy.isSimple()) return BaseT::getCastInstrCost(Opcode, Dst, Src); + // The extend of a load is free + if (I && isa(I->getOperand(0))) { + static const TypeConversionCostTblEntry LoadConversionTbl[] = { + {ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0}, + {ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0}, + {ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0}, + {ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0}, + {ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0}, + {ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0}, + {ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1}, + {ISD::ZERO_EXTEND, MVT::i64, MVT::i32, 1}, + {ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 1}, + {ISD::ZERO_EXTEND, MVT::i64, MVT::i16, 1}, + {ISD::SIGN_EXTEND, MVT::i64, MVT::i8, 1}, + {ISD::ZERO_EXTEND, MVT::i64, MVT::i8, 1}, + }; + if (const auto *Entry = ConvertCostTableLookup( + LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return Entry->Cost; + + static const TypeConversionCostTblEntry MVELoadConversionTbl[] = { + {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0}, + {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0}, + {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 0}, + {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0}, + {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0}, + {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0}, + }; + if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { + if (const auto *Entry = + ConvertCostTableLookup(MVELoadConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return Entry->Cost; + } + } + // Some arithmetic, load and store operations have specific instructions // to cast up/down their types automatically at no extra cost. // TODO: Get these tables to know at least what the related operations are. @@ -313,6 +353,31 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Entry->Cost; } + // MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one + // instruction, i8->i32 is two. i64 zexts are an VAND with a constant, sext + // are linearised so take more. + static const TypeConversionCostTblEntry MVEVectorConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 10 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 10 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 8 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 2 }, + }; + + if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { + if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl, + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost * ST->getMVEVectorCostFactor(); + } + // Scalar integer conversion costs. static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { // i16 -> i64 requires two dependent operations. @@ -332,7 +397,10 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Entry->Cost; } - return BaseT::getCastInstrCost(Opcode, Dst, Src); + int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src); } int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, @@ -343,8 +411,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) return 3; - if ((Opcode == Instruction::InsertElement || - Opcode == Instruction::ExtractElement)) { + if (ST->hasNEON() && (Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement)) { // Cross-class copies are expensive on many microarchitectures, // so assume they are expensive by default. if (ValTy->getVectorElementType()->isIntegerTy()) @@ -357,6 +425,17 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U); } + if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement)) { + // We say MVE moves costs at least the MVEVectorCostFactor, even though + // they are scalar instructions. This helps prevent mixing scalar and + // vector, to prevent vectorising where we end up just scalarising the + // result anyway. + return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), + ST->getMVEVectorCostFactor()) * + ValTy->getVectorNumElements() / 2; + } + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } @@ -385,7 +464,10 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, return LT.first; } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, @@ -397,13 +479,37 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, unsigned NumVectorInstToHideOverhead = 10; int MaxMergeDistance = 64; - if (Ty->isVectorTy() && SE && - !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) - return NumVectorInstToHideOverhead; + if (ST->hasNEON()) { + if (Ty->isVectorTy() && SE && + !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) + return NumVectorInstToHideOverhead; - // In many cases the address computation is not merged into the instruction - // addressing mode. - return 1; + // In many cases the address computation is not merged into the instruction + // addressing mode. + return 1; + } + return BaseT::getAddressComputationCost(Ty, SE, Ptr); +} + +bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) { + if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps()) + return false; + + if (auto *VecTy = dyn_cast(DataTy)) { + // Don't support v2i1 yet. + if (VecTy->getNumElements() == 2) + return false; + + // We don't support extending fp types. + unsigned VecWidth = DataTy->getPrimitiveSizeInBits(); + if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy()) + return false; + } + + unsigned EltWidth = DataTy->getScalarSizeInBits(); + return (EltWidth == 32 && (!Alignment || Alignment >= 4)) || + (EltWidth == 16 && (!Alignment || Alignment >= 2)) || + (EltWidth == 8); } int ARMTTIImpl::getMemcpyCost(const Instruction *I) { @@ -442,78 +548,96 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) { int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - if (Kind == TTI::SK_Broadcast) { - static const CostTblEntry NEONDupTbl[] = { - // VDUP handles these cases. - {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, - - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; - - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - - if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, - LT.second)) - return LT.first * Entry->Cost; - - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); - } - if (Kind == TTI::SK_Reverse) { - static const CostTblEntry NEONShuffleTbl[] = { - // Reverse shuffle cost one instruction if we are shuffling within a - // double word (vrev) or two if we shuffle a quad word (vrev, vext). - {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, - - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - - if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, - LT.second)) - return LT.first * Entry->Cost; - - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); - } - if (Kind == TTI::SK_Select) { - static const CostTblEntry NEONSelShuffleTbl[] = { - // Select shuffle cost table for ARM. Cost is the number of instructions - // required to create the shuffled vector. + if (ST->hasNEON()) { + if (Kind == TTI::SK_Broadcast) { + static const CostTblEntry NEONDupTbl[] = { + // VDUP handles these cases. + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; + + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + + if (const auto *Entry = + CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; + } + if (Kind == TTI::SK_Reverse) { + static const CostTblEntry NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a + // double word (vrev) or two if we shuffle a quad word (vrev, vext). + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; + + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + + if (const auto *Entry = + CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; + } + if (Kind == TTI::SK_Select) { + static const CostTblEntry NEONSelShuffleTbl[] = { + // Select shuffle cost table for ARM. Cost is the number of + // instructions + // required to create the shuffled vector. - {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, + ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; + } + } + if (ST->hasMVEIntegerOps()) { + if (Kind == TTI::SK_Broadcast) { + static const CostTblEntry MVEDupTbl[] = { + // VDUP handles these cases. + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}}; + + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + + if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE, + LT.second)) + return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(); + } } - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } int ARMTTIImpl::getArithmeticInstrCost( @@ -567,38 +691,64 @@ int ARMTTIImpl::getArithmeticInstrCost( // Multiplication. }; - if (ST->hasNEON()) + if (ST->hasNEON()) { if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) return LT.first * Entry->Cost; - int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo); - - // This is somewhat of a hack. The problem that we are facing is that SROA - // creates a sequence of shift, and, or instructions to construct values. - // These sequences are recognized by the ISel and have zero-cost. Not so for - // the vectorized code. Because we have support for v2i64 but not i64 those - // sequences look particularly beneficial to vectorize. - // To work around this we increase the cost of v2i64 operations to make them - // seem less beneficial. - if (LT.second == MVT::v2i64 && - Op2Info == TargetTransformInfo::OK_UniformConstantValue) - Cost += 4; - - return Cost; + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo); + + // This is somewhat of a hack. The problem that we are facing is that SROA + // creates a sequence of shift, and, or instructions to construct values. + // These sequences are recognized by the ISel and have zero-cost. Not so for + // the vectorized code. Because we have support for v2i64 but not i64 those + // sequences look particularly beneficial to vectorize. + // To work around this we increase the cost of v2i64 operations to make them + // seem less beneficial. + if (LT.second == MVT::v2i64 && + Op2Info == TargetTransformInfo::OK_UniformConstantValue) + Cost += 4; + + return Cost; + } + + int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + + // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost, + // without treating floats as more expensive that scalars or increasing the + // costs for custom operations. The results is also multiplied by the + // MVEVectorCostFactor where appropriate. + if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second)) + return LT.first * BaseCost; + + // Else this is expand, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost; + } + + return BaseCost; } int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { std::pair LT = TLI->getTypeLegalizationCost(DL, Src); - if (Src->isVectorTy() && Alignment != 16 && + if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 && Src->getVectorElementType()->isDoubleTy()) { // Unaligned loads/stores are extremely inefficient. // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. return LT.first * 4; } - return LT.first; + int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + return BaseCost * LT.first; } int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, @@ -893,6 +1043,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, } return; } + // Don't unroll vectorised loop. MVE does not benefit from it as much as + // scalar code. + if (I.getType()->isVectorTy()) + return; + SmallVector Operands(I.value_op_begin(), I.value_op_end()); Cost += getUserCost(&I, Operands); @@ -914,3 +1069,28 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, if (Cost < 12) UP.Force = true; } + +bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + assert(isa(Ty) && "Expected Ty to be a vector type"); + unsigned ScalarBits = Ty->getScalarSizeInBits(); + if (!ST->hasMVEIntegerOps()) + return false; + + switch (Opcode) { + case Instruction::FAdd: + case Instruction::FMul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Mul: + case Instruction::FCmp: + return false; + case Instruction::ICmp: + case Instruction::Add: + return ScalarBits < 64 && ScalarBits * Ty->getVectorNumElements() == 128; + default: + llvm_unreachable("Unhandled reduction opcode"); + } + return false; +} diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 52f6ea4a6e2f..a878fdcfe3c7 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -101,9 +101,9 @@ public: /// Floating-point computation using ARMv8 AArch32 Advanced /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD - /// is IEEE-754 compliant, but it's not covered in this target. + /// and Arm MVE are IEEE-754 compliant. bool isFPVectorizationPotentiallyUnsafe() { - return !ST->isTargetDarwin(); + return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); } /// \name Scalar TTI Implementations @@ -122,10 +122,13 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { if (ST->hasNEON()) return 16; + if (ST->hasMVEIntegerOps()) + return 8; return 0; } @@ -138,6 +141,8 @@ public: if (Vector) { if (ST->hasNEON()) return 128; + if (ST->hasMVEIntegerOps()) + return 128; return 0; } @@ -148,10 +153,23 @@ public: return ST->getMaxInterleaveFactor(); } + bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment); + + bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) { + return isLegalMaskedLoad(DataTy, Alignment); + } + int getMemcpyCost(const Instruction *I); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const; + + bool shouldExpandReduction(const IntrinsicInst *II) const { + return false; + } + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I = nullptr); diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 1da9452f1d22..d2c355c1da75 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -2275,6 +2275,14 @@ public: return Value >= 1 && Value <= 32; } + bool isMveSaturateOp() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + uint64_t Value = CE->getValue(); + return Value == 48 || Value == 64; + } + bool isITCondCodeNoAL() const { if (!isITCondCode()) return false; ARMCC::CondCodes CC = getCondCode(); @@ -2479,28 +2487,28 @@ public: void addModImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue()); Inst.addOperand(MCOperand::createImm(Enc)); } void addModImmNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue()); Inst.addOperand(MCOperand::createImm(Enc)); } void addThumbModImmNeg8_255Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint32_t Val = -CE->getValue(); Inst.addOperand(MCOperand::createImm(Val)); } void addThumbModImmNeg1_7Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint32_t Val = -CE->getValue(); Inst.addOperand(MCOperand::createImm(Val)); } @@ -2523,19 +2531,19 @@ public: void addFBits16Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(16 - CE->getValue())); } void addFBits32Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(32 - CE->getValue())); } void addFPImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); Inst.addOperand(MCOperand::createImm(Val)); } @@ -2544,7 +2552,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // FIXME: We really want to scale the value here, but the LDRD/STRD // instruction don't encode operands that way yet. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } @@ -2552,35 +2560,31 @@ public: assert(N == 1 && "Invalid number of operands!"); // FIXME: We really want to scale the value here, but the VSTR/VLDR_VSYSR // instruction don't encode operands that way yet. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } void addImm7Shift0Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); - assert(CE != nullptr && "Invalid operand type!"); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } void addImm7Shift1Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); - assert(CE != nullptr && "Invalid operand type!"); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } void addImm7Shift2Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); - assert(CE != nullptr && "Invalid operand type!"); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } void addImm7Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); - assert(CE != nullptr && "Invalid operand type!"); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } @@ -2588,7 +2592,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored // in the MCInst as such. Lop off the low two bits here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() / 4)); } @@ -2596,7 +2600,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored // in the MCInst as such. Lop off the low two bits here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(-(CE->getValue() / 4))); } @@ -2604,7 +2608,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored // in the MCInst as such. Lop off the low two bits here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() / 4)); } @@ -2612,7 +2616,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate-1, and we store in the instruction // the bits as encoded, so subtract off one here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() - 1)); } @@ -2620,7 +2624,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate-1, and we store in the instruction // the bits as encoded, so subtract off one here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() - 1)); } @@ -2628,7 +2632,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate, except for 32, which encodes as // zero. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Imm = CE->getValue(); Inst.addOperand(MCOperand::createImm((Imm == 32 ? 0 : Imm))); } @@ -2637,7 +2641,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // An ASR value of 32 encodes as 0, so that's how we want to add it to // the instruction as well. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); int Val = CE->getValue(); Inst.addOperand(MCOperand::createImm(Val == 32 ? 0 : Val)); } @@ -2646,7 +2650,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The operand is actually a t2_so_imm, but we have its bitwise // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(~(uint32_t)CE->getValue())); } @@ -2654,7 +2658,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The operand is actually a t2_so_imm, but we have its // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue())); } @@ -2662,7 +2666,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The operand is actually an imm0_4095, but we have its // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue())); } @@ -2671,9 +2675,7 @@ public: Inst.addOperand(MCOperand::createImm(CE->getValue() >> 2)); return; } - - const MCSymbolRefExpr *SR = dyn_cast(Imm.Val); - assert(SR && "Unknown value type!"); + const MCSymbolRefExpr *SR = cast(Imm.Val); Inst.addOperand(MCOperand::createExpr(SR)); } @@ -2685,10 +2687,7 @@ public: Inst.addOperand(MCOperand::createImm(CE->getValue())); return; } - - const MCSymbolRefExpr *SR = dyn_cast(Imm.Val); - - assert(SR && "Unknown value type!"); + const MCSymbolRefExpr *SR = cast(Imm.Val); Inst.addOperand(MCOperand::createExpr(SR)); return; } @@ -2750,7 +2749,7 @@ public: return; } - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); int Val = CE->getValue(); Inst.addOperand(MCOperand::createImm(Val)); } @@ -3130,7 +3129,7 @@ public: void addPowerTwoOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } @@ -3225,14 +3224,14 @@ public: assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. // Mask in that this is an i8 splat. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() | 0xe00)); } void addNEONi16splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = CE->getValue(); Value = ARM_AM::encodeNEONi16splat(Value); Inst.addOperand(MCOperand::createImm(Value)); @@ -3241,7 +3240,7 @@ public: void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = CE->getValue(); Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff); Inst.addOperand(MCOperand::createImm(Value)); @@ -3250,7 +3249,7 @@ public: void addNEONi32splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = CE->getValue(); Value = ARM_AM::encodeNEONi32splat(Value); Inst.addOperand(MCOperand::createImm(Value)); @@ -3259,7 +3258,7 @@ public: void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = CE->getValue(); Value = ARM_AM::encodeNEONi32splat(~Value); Inst.addOperand(MCOperand::createImm(Value)); @@ -3267,7 +3266,7 @@ public: void addNEONi8ReplicateOperands(MCInst &Inst, bool Inv) const { // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); assert((Inst.getOpcode() == ARM::VMOVv8i8 || Inst.getOpcode() == ARM::VMOVv16i8) && "All instructions that wants to replicate non-zero byte " @@ -3298,7 +3297,7 @@ public: void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = encodeNeonVMOVImmediate(CE->getValue()); Inst.addOperand(MCOperand::createImm(Value)); } @@ -3310,7 +3309,7 @@ public: void addNEONvmovi16ReplicateOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); assert((Inst.getOpcode() == ARM::VMOVv4i16 || Inst.getOpcode() == ARM::VMOVv8i16 || Inst.getOpcode() == ARM::VMVNv4i16 || @@ -3327,14 +3326,14 @@ public: void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = encodeNeonVMOVImmediate(~CE->getValue()); Inst.addOperand(MCOperand::createImm(Value)); } void addNEONvmovi32ReplicateOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); assert((Inst.getOpcode() == ARM::VMOVv2i32 || Inst.getOpcode() == ARM::VMOVv4i32 || Inst.getOpcode() == ARM::VMVNv2i32 || @@ -3349,7 +3348,7 @@ public: void addNEONi64splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint64_t Value = CE->getValue(); unsigned Imm = 0; for (unsigned i = 0; i < 8; ++i, Value >>= 8) { @@ -3360,20 +3359,28 @@ public: void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() / 90)); } void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm((CE->getValue() - 90) / 180)); } + void addMveSaturateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast(getImm()); + unsigned Imm = CE->getValue(); + assert((Imm == 48 || Imm == 64) && "Invalid saturate operand"); + Inst.addOperand(MCOperand::createImm(Imm == 48 ? 1 : 0)); + } + void print(raw_ostream &OS) const override; static std::unique_ptr CreateITMask(unsigned Mask, SMLoc S) { - auto Op = make_unique(k_ITCondMask); + auto Op = std::make_unique(k_ITCondMask); Op->ITMask.Mask = Mask; Op->StartLoc = S; Op->EndLoc = S; @@ -3382,7 +3389,7 @@ public: static std::unique_ptr CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { - auto Op = make_unique(k_CondCode); + auto Op = std::make_unique(k_CondCode); Op->CC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; @@ -3391,7 +3398,7 @@ public: static std::unique_ptr CreateVPTPred(ARMVCC::VPTCodes CC, SMLoc S) { - auto Op = make_unique(k_VPTPred); + auto Op = std::make_unique(k_VPTPred); Op->VCC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; @@ -3399,7 +3406,7 @@ public: } static std::unique_ptr CreateCoprocNum(unsigned CopVal, SMLoc S) { - auto Op = make_unique(k_CoprocNum); + auto Op = std::make_unique(k_CoprocNum); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; @@ -3407,7 +3414,7 @@ public: } static std::unique_ptr CreateCoprocReg(unsigned CopVal, SMLoc S) { - auto Op = make_unique(k_CoprocReg); + auto Op = std::make_unique(k_CoprocReg); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; @@ -3416,7 +3423,7 @@ public: static std::unique_ptr CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) { - auto Op = make_unique(k_CoprocOption); + auto Op = std::make_unique(k_CoprocOption); Op->Cop.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -3424,7 +3431,7 @@ public: } static std::unique_ptr CreateCCOut(unsigned RegNum, SMLoc S) { - auto Op = make_unique(k_CCOut); + auto Op = std::make_unique(k_CCOut); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = S; @@ -3432,7 +3439,7 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - auto Op = make_unique(k_Token); + auto Op = std::make_unique(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -3442,7 +3449,7 @@ public: static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - auto Op = make_unique(k_Register); + auto Op = std::make_unique(k_Register); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = E; @@ -3453,7 +3460,7 @@ public: CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg, unsigned ShiftReg, unsigned ShiftImm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ShiftedRegister); + auto Op = std::make_unique(k_ShiftedRegister); Op->RegShiftedReg.ShiftTy = ShTy; Op->RegShiftedReg.SrcReg = SrcReg; Op->RegShiftedReg.ShiftReg = ShiftReg; @@ -3466,7 +3473,7 @@ public: static std::unique_ptr CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg, unsigned ShiftImm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ShiftedImmediate); + auto Op = std::make_unique(k_ShiftedImmediate); Op->RegShiftedImm.ShiftTy = ShTy; Op->RegShiftedImm.SrcReg = SrcReg; Op->RegShiftedImm.ShiftImm = ShiftImm; @@ -3477,7 +3484,7 @@ public: static std::unique_ptr CreateShifterImm(bool isASR, unsigned Imm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ShifterImmediate); + auto Op = std::make_unique(k_ShifterImmediate); Op->ShifterImm.isASR = isASR; Op->ShifterImm.Imm = Imm; Op->StartLoc = S; @@ -3487,7 +3494,7 @@ public: static std::unique_ptr CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_RotateImmediate); + auto Op = std::make_unique(k_RotateImmediate); Op->RotImm.Imm = Imm; Op->StartLoc = S; Op->EndLoc = E; @@ -3496,7 +3503,7 @@ public: static std::unique_ptr CreateModImm(unsigned Bits, unsigned Rot, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ModifiedImmediate); + auto Op = std::make_unique(k_ModifiedImmediate); Op->ModImm.Bits = Bits; Op->ModImm.Rot = Rot; Op->StartLoc = S; @@ -3506,7 +3513,7 @@ public: static std::unique_ptr CreateConstantPoolImm(const MCExpr *Val, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ConstantPoolImmediate); + auto Op = std::make_unique(k_ConstantPoolImmediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -3515,7 +3522,7 @@ public: static std::unique_ptr CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) { - auto Op = make_unique(k_BitfieldDescriptor); + auto Op = std::make_unique(k_BitfieldDescriptor); Op->Bitfield.LSB = LSB; Op->Bitfield.Width = Width; Op->StartLoc = S; @@ -3543,16 +3550,15 @@ public: Kind = k_SPRRegisterList; } - // Sort based on the register encoding values. - array_pod_sort(Regs.begin(), Regs.end()); - if (Kind == k_RegisterList && Regs.back().second == ARM::APSR) Kind = k_RegisterListWithAPSR; - auto Op = make_unique(Kind); - for (SmallVectorImpl>::const_iterator - I = Regs.begin(), E = Regs.end(); I != E; ++I) - Op->Registers.push_back(I->second); + assert(std::is_sorted(Regs.begin(), Regs.end()) && + "Register list must be sorted by encoding"); + + auto Op = std::make_unique(Kind); + for (const auto &P : Regs) + Op->Registers.push_back(P.second); Op->StartLoc = StartLoc; Op->EndLoc = EndLoc; @@ -3563,7 +3569,7 @@ public: unsigned Count, bool isDoubleSpaced, SMLoc S, SMLoc E) { - auto Op = make_unique(k_VectorList); + auto Op = std::make_unique(k_VectorList); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.isDoubleSpaced = isDoubleSpaced; @@ -3575,7 +3581,7 @@ public: static std::unique_ptr CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced, SMLoc S, SMLoc E) { - auto Op = make_unique(k_VectorListAllLanes); + auto Op = std::make_unique(k_VectorListAllLanes); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.isDoubleSpaced = isDoubleSpaced; @@ -3587,7 +3593,7 @@ public: static std::unique_ptr CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index, bool isDoubleSpaced, SMLoc S, SMLoc E) { - auto Op = make_unique(k_VectorListIndexed); + auto Op = std::make_unique(k_VectorListIndexed); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.LaneIndex = Index; @@ -3599,7 +3605,7 @@ public: static std::unique_ptr CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_VectorIndex); + auto Op = std::make_unique(k_VectorIndex); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; @@ -3608,7 +3614,7 @@ public: static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - auto Op = make_unique(k_Immediate); + auto Op = std::make_unique(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -3620,7 +3626,7 @@ public: unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType, unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S, SMLoc E, SMLoc AlignmentLoc = SMLoc()) { - auto Op = make_unique(k_Memory); + auto Op = std::make_unique(k_Memory); Op->Memory.BaseRegNum = BaseRegNum; Op->Memory.OffsetImm = OffsetImm; Op->Memory.OffsetRegNum = OffsetRegNum; @@ -3637,7 +3643,7 @@ public: static std::unique_ptr CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy, unsigned ShiftImm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_PostIndexRegister); + auto Op = std::make_unique(k_PostIndexRegister); Op->PostIdxReg.RegNum = RegNum; Op->PostIdxReg.isAdd = isAdd; Op->PostIdxReg.ShiftTy = ShiftTy; @@ -3649,7 +3655,7 @@ public: static std::unique_ptr CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) { - auto Op = make_unique(k_MemBarrierOpt); + auto Op = std::make_unique(k_MemBarrierOpt); Op->MBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; @@ -3658,7 +3664,7 @@ public: static std::unique_ptr CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) { - auto Op = make_unique(k_InstSyncBarrierOpt); + auto Op = std::make_unique(k_InstSyncBarrierOpt); Op->ISBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; @@ -3667,7 +3673,7 @@ public: static std::unique_ptr CreateTraceSyncBarrierOpt(ARM_TSB::TraceSyncBOpt Opt, SMLoc S) { - auto Op = make_unique(k_TraceSyncBarrierOpt); + auto Op = std::make_unique(k_TraceSyncBarrierOpt); Op->TSBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; @@ -3676,7 +3682,7 @@ public: static std::unique_ptr CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { - auto Op = make_unique(k_ProcIFlags); + auto Op = std::make_unique(k_ProcIFlags); Op->IFlags.Val = IFlags; Op->StartLoc = S; Op->EndLoc = S; @@ -3684,7 +3690,7 @@ public: } static std::unique_ptr CreateMSRMask(unsigned MMask, SMLoc S) { - auto Op = make_unique(k_MSRMask); + auto Op = std::make_unique(k_MSRMask); Op->MMask.Val = MMask; Op->StartLoc = S; Op->EndLoc = S; @@ -3692,7 +3698,7 @@ public: } static std::unique_ptr CreateBankedReg(unsigned Reg, SMLoc S) { - auto Op = make_unique(k_BankedReg); + auto Op = std::make_unique(k_BankedReg); Op->BankedReg.Val = Reg; Op->StartLoc = S; Op->EndLoc = S; @@ -4253,6 +4259,24 @@ static unsigned getNextRegister(unsigned Reg) { } } +// Insert an pair in an ordered vector. Return true on +// success, or false, if duplicate encoding found. +static bool +insertNoDuplicates(SmallVectorImpl> &Regs, + unsigned Enc, unsigned Reg) { + Regs.emplace_back(Enc, Reg); + for (auto I = Regs.rbegin(), J = I + 1, E = Regs.rend(); J != E; ++I, ++J) { + if (J->first == Enc) { + Regs.erase(J.base()); + return false; + } + if (J->first < Enc) + break; + std::swap(*I, *J); + } + return true; +} + /// Parse a register list. bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder) { @@ -4278,7 +4302,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + Registers.emplace_back(EReg, Reg); ++Reg; } const MCRegisterClass *RC; @@ -4295,7 +4319,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, // Store the register. EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + Registers.emplace_back(EReg, Reg); // This starts immediately after the first register token in the list, // so we can see either a comma or a minus (range separator) as a legal @@ -4326,7 +4350,11 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, while (Reg != EndReg) { Reg = getNextRegister(Reg); EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + if (!insertNoDuplicates(Registers, EReg, Reg)) { + Warning(AfterMinusLoc, StringRef("duplicated register (") + + ARMInstPrinter::getRegisterName(Reg) + + ") in register list"); + } } continue; } @@ -4350,11 +4378,16 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, // subset of GPRRegClassId except it contains APSR as well. RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID]; } - if (Reg == ARM::VPR && (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] || - RC == &ARMMCRegisterClasses[ARM::DPRRegClassID])) { + if (Reg == ARM::VPR && + (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] || + RC == &ARMMCRegisterClasses[ARM::DPRRegClassID] || + RC == &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID])) { RC = &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID]; EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + if (!insertNoDuplicates(Registers, EReg, Reg)) { + Warning(RegLoc, "duplicated register (" + RegTok.getString() + + ") in register list"); + } continue; } // The register must be in the same register class as the first. @@ -4371,21 +4404,19 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, else if (!ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg)) return Error(RegLoc, "register list not in ascending order"); } - if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) { - Warning(RegLoc, "duplicated register (" + RegTok.getString() + - ") in register list"); - continue; - } // VFP register lists must also be contiguous. if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] && RC != &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID] && Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + if (!insertNoDuplicates(Registers, EReg, Reg)) { + Warning(RegLoc, "duplicated register (" + RegTok.getString() + + ") in register list"); + } if (isQReg) { EReg = MRI->getEncodingValue(++Reg); - Registers.push_back(std::pair(EReg, Reg)); + Registers.emplace_back(EReg, Reg); } } @@ -5702,14 +5733,16 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { return false; } - // If we have a '#', it's an immediate offset, else assume it's a register - // offset. Be friendly and also accept a plain integer (without a leading - // hash) for gas compatibility. + // If we have a '#' or '$', it's an immediate offset, else assume it's a + // register offset. Be friendly and also accept a plain integer or expression + // (without a leading hash) for gas compatibility. if (Parser.getTok().is(AsmToken::Hash) || Parser.getTok().is(AsmToken::Dollar) || + Parser.getTok().is(AsmToken::LParen) || Parser.getTok().is(AsmToken::Integer)) { - if (Parser.getTok().isNot(AsmToken::Integer)) - Parser.Lex(); // Eat '#' or '$'. + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) + Parser.Lex(); // Eat '#' or '$' E = Parser.getTok().getLoc(); bool isNegative = getParser().getTok().is(AsmToken::Minus); @@ -11308,7 +11341,7 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { SmallVector Opcodes; auto parseOne = [&]() -> bool { - const MCExpr *OE; + const MCExpr *OE = nullptr; SMLoc OpcodeLoc = getLexer().getLoc(); if (check(getLexer().is(AsmToken::EndOfStatement) || Parser.parseExpression(OE), @@ -11694,14 +11727,14 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { { ARM::AEK_CRYPTO, {Feature_HasV8Bit}, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_FP, {Feature_HasV8Bit}, - {ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} }, + {ARM::FeatureVFP2_SP, ARM::FeatureFPARMv8} }, { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), {Feature_HasV7Bit, Feature_IsNotMClassBit}, {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} }, { ARM::AEK_MP, {Feature_HasV7Bit, Feature_IsNotMClassBit}, {ARM::FeatureMP} }, { ARM::AEK_SIMD, {Feature_HasV8Bit}, - {ARM::FeatureNEON, ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} }, + {ARM::FeatureNEON, ARM::FeatureVFP2_SP, ARM::FeatureFPARMv8} }, { ARM::AEK_SEC, {Feature_HasV6KBit}, {ARM::FeatureTrustZone} }, // FIXME: Only available in A-class, isel not predicated { ARM::AEK_VIRT, {Feature_HasV7Bit}, {ARM::FeatureVirtualization} }, @@ -11775,19 +11808,19 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, // immediate in the syntax. switch (Kind) { default: break; - case MCK__35_0: + case MCK__HASH_0: if (Op.isImm()) if (const MCConstantExpr *CE = dyn_cast(Op.getImm())) if (CE->getValue() == 0) return Match_Success; break; - case MCK__35_8: + case MCK__HASH_8: if (Op.isImm()) if (const MCConstantExpr *CE = dyn_cast(Op.getImm())) if (CE->getValue() == 8) return Match_Success; break; - case MCK__35_16: + case MCK__HASH_16: if (Op.isImm()) if (const MCConstantExpr *CE = dyn_cast(Op.getImm())) if (CE->getValue() == 16) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 673691ebd93e..eabc26d05f47 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -314,7 +314,7 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val, +static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); @@ -561,6 +561,8 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); @@ -3445,7 +3447,7 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn, +DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -5679,7 +5681,7 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, } } } - return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + return DecodeVMOVModImmInstruction(Inst, Insn, Address, Decoder); } if (!(imm & 0x20)) return MCDisassembler::Fail; @@ -5738,7 +5740,7 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, } } } - return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + return DecodeVMOVModImmInstruction(Inst, Insn, Address, Decoder); } if (!(imm & 0x20)) return MCDisassembler::Fail; @@ -6481,6 +6483,12 @@ static DecodeStatus DecodeMVEOverlappingLongShift( if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + if (fieldFromInstruction (Insn, 6, 3) != 4) + return MCDisassembler::SoftFail; + + if (Rda == Rm) + return MCDisassembler::SoftFail; + return S; } @@ -6503,6 +6511,13 @@ static DecodeStatus DecodeMVEOverlappingLongShift( if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + if (Inst.getOpcode() == ARM::MVE_SQRSHRL || + Inst.getOpcode() == ARM::MVE_UQRSHLL) { + unsigned Saturate = fieldFromInstruction(Insn, 7, 1); + // Saturate, the bit position for saturation + Inst.addOperand(MCOperand::createImm(Saturate)); + } + return S; } @@ -6572,3 +6587,11 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; return S; } + +static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + Inst.addOperand(MCOperand::createReg(ARM::VPR)); + Inst.addOperand(MCOperand::createReg(ARM::VPR)); + return S; +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 7732a6485a85..24a9fabf0979 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -518,10 +518,10 @@ namespace ARM_AM { // Valid alignments depend on the specific instruction. //===--------------------------------------------------------------------===// - // NEON Modified Immediates + // NEON/MVE Modified Immediates //===--------------------------------------------------------------------===// // - // Several NEON instructions (e.g., VMOV) take a "modified immediate" + // Several NEON and MVE instructions (e.g., VMOV) take a "modified immediate" // vector operand, where a small immediate encoded in the instruction // specifies a full NEON vector value. These modified immediates are // represented here as encoded integers. The low 8 bits hold the immediate @@ -529,20 +529,20 @@ namespace ARM_AM { // the "Cmode" field of the instruction. The interfaces below treat the // Op and Cmode values as a single 5-bit value. - inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { + inline unsigned createVMOVModImm(unsigned OpCmode, unsigned Val) { return (OpCmode << 8) | Val; } - inline unsigned getNEONModImmOpCmode(unsigned ModImm) { + inline unsigned getVMOVModImmOpCmode(unsigned ModImm) { return (ModImm >> 8) & 0x1f; } - inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; } + inline unsigned getVMOVModImmVal(unsigned ModImm) { return ModImm & 0xff; } - /// decodeNEONModImm - Decode a NEON modified immediate value into the + /// decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the /// element value and the element size in bits. (If the element size is /// smaller than the vector, it is splatted into all the elements.) - inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { - unsigned OpCmode = getNEONModImmOpCmode(ModImm); - unsigned Imm8 = getNEONModImmVal(ModImm); + inline uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits) { + unsigned OpCmode = getVMOVModImmOpCmode(ModImm); + unsigned Imm8 = getVMOVModImmVal(ModImm); uint64_t Val = 0; if (OpCmode == 0xe) { @@ -572,7 +572,7 @@ namespace ARM_AM { } EltBits = 64; } else { - llvm_unreachable("Unsupported NEON immediate"); + llvm_unreachable("Unsupported VMOV immediate"); } return Val; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index aeab5be78ab4..6196881a9b8f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -233,7 +233,7 @@ static const char *checkPCRelOffset(uint64_t Value, int64_t Min, int64_t Max) { const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup, uint64_t Value) const { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case ARM::fixup_arm_thumb_br: { // Relaxing tB to t2B. tB has a signed 12-bit displacement with the // low bit being an implied zero. There's an implied +4 offset for the @@ -870,7 +870,7 @@ bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCValue &Target) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; - const unsigned FixupKind = Fixup.getKind() ; + const unsigned FixupKind = Fixup.getKind(); if (FixupKind == FK_NONE) return true; if (FixupKind == ARM::fixup_arm_thumb_bl) { @@ -1105,28 +1105,28 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding( if (Instrs.empty()) return 0; // Start off assuming CFA is at SP+0. - int CFARegister = ARM::SP; + unsigned CFARegister = ARM::SP; int CFARegisterOffset = 0; // Mark savable registers as initially unsaved DenseMap RegOffsets; int FloatRegCount = 0; // Process each .cfi directive and build up compact unwind info. for (size_t i = 0, e = Instrs.size(); i != e; ++i) { - int Reg; + unsigned Reg; const MCCFIInstruction &Inst = Instrs[i]; switch (Inst.getOperation()) { case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa CFARegisterOffset = -Inst.getOffset(); - CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true); + CFARegister = *MRI.getLLVMRegNum(Inst.getRegister(), true); break; case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset CFARegisterOffset = -Inst.getOffset(); break; case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register - CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true); + CFARegister = *MRI.getLLVMRegNum(Inst.getRegister(), true); break; case MCCFIInstruction::OpOffset: // DW_CFA_offset - Reg = MRI.getLLVMRegNum(Inst.getRegister(), true); + Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) RegOffsets[Reg] = Inst.getOffset(); else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index c4daafe8ee97..6293a2462306 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -393,6 +393,9 @@ namespace ARMII { // in an IT block). ThumbArithFlagSetting = 1 << 19, + // Whether an instruction can be included in an MVE tail-predicated loop. + ValidForTailPredication = 1 << 20, + //===------------------------------------------------------------------===// // Code domain. DomainShift = 15, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index fda19eea1de6..1fee38821a49 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -82,7 +82,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); return ELF::R_ARM_NONE; @@ -145,7 +145,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, return ELF::R_ARM_THM_BF18; } } - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); return ELF::R_ARM_NONE; @@ -263,5 +263,5 @@ void ARMELFObjectWriter::addTargetSectionFlags(MCContext &Ctx, std::unique_ptr llvm::createARMELFObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp index 45be1ee96342..a1def61b58d9 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp @@ -1334,12 +1334,12 @@ void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, << markup(">"); } -void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, +void ARMInstPrinter::printVMOVModImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; - uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + uint64_t Val = ARM_AM::decodeVMOVModImm(EncodedImm, EltBits); O << markup(""); @@ -1676,3 +1676,11 @@ void ARMInstPrinter::printExpandedImmOperand(const MCInst *MI, unsigned OpNum, O.write_hex(Val); O << markup(">"); } + +void ARMInstPrinter::printMveSaturateOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint32_t Val = MI->getOperand(OpNum).getImm(); + assert(Val <= 1 && "Invalid MVE saturate operand"); + O << "#" << (Val == 1 ? 48 : 64); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h index 69026956b60e..eeb811e216fc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h +++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h @@ -191,7 +191,7 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printFPImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); - void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + void printVMOVModImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); @@ -262,7 +262,8 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printExpandedImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); - + void printMveSaturateOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); private: unsigned DefaultAltIdx = ARM::NoRegAltName; }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index dca6fe37d49a..268fe7efd9ce 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1720,7 +1720,6 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, unsigned Reg = MI.getOperand(Op).getReg(); bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); - bool CLRMRegs = MI.getOpcode() == ARM::t2CLRM; unsigned Binary = 0; @@ -1739,21 +1738,13 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, Binary |= NumRegs * 2; } else { const MCRegisterInfo &MRI = *CTX.getRegisterInfo(); - if (!CLRMRegs) { - assert(std::is_sorted(MI.begin() + Op, MI.end(), - [&](const MCOperand &LHS, const MCOperand &RHS) { - return MRI.getEncodingValue(LHS.getReg()) < - MRI.getEncodingValue(RHS.getReg()); - })); - } - + assert(std::is_sorted(MI.begin() + Op, MI.end(), + [&](const MCOperand &LHS, const MCOperand &RHS) { + return MRI.getEncodingValue(LHS.getReg()) < + MRI.getEncodingValue(RHS.getReg()); + })); for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo; - if (CLRMRegs && MI.getOperand(I).getReg() == ARM::APSR) { - RegNo = 15; - } else { - RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg()); - } + unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index c49885023cb2..ed4000c7e5be 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -204,7 +204,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // relocation entry in the low 16 bits of r_address field. unsigned ThumbBit = 0; unsigned MovtBit = 0; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: break; case ARM::fixup_arm_movt_hi16: MovtBit = 1; @@ -480,7 +480,7 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, // PAIR. I.e. it's correct that we insert the high bits of the addend in the // MOVW case here. relocation entries. uint32_t Value = 0; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: break; case ARM::fixup_arm_movw_lo16: case ARM::fixup_t2_movw_lo16: @@ -506,5 +506,5 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, std::unique_ptr llvm::createARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { - return llvm::make_unique(Is64Bit, CPUType, CPUSubtype); + return std::make_unique(Is64Bit, CPUType, CPUSubtype); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index b863517c0cca..7b30a61e8ccb 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -249,12 +249,12 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { : ARM::FK_VFPV3_D16) : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD))); - else if (STI.hasFeature(ARM::FeatureVFP2_D16_SP)) + else if (STI.hasFeature(ARM::FeatureVFP2_SP)) emitFPU(ARM::FK_VFPV2); } // ABI_HardFP_use attribute to indicate single precision FP. - if (STI.hasFeature(ARM::FeatureVFP2_D16_SP) && !STI.hasFeature(ARM::FeatureFP64)) + if (STI.hasFeature(ARM::FeatureVFP2_SP) && !STI.hasFeature(ARM::FeatureFP64)) emitAttribute(ARMBuildAttrs::ABI_HardFP_use, ARMBuildAttrs::HardFPSinglePrecision); diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 054a95dd1e12..900c5fe30364 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -92,7 +92,7 @@ namespace llvm { std::unique_ptr createARMWinCOFFObjectWriter(bool Is64Bit) { - return llvm::make_unique(Is64Bit); + return std::make_unique(Is64Bit); } } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index 2e816bea5e91..b3c8146a9bde 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -22,20 +22,10 @@ public: std::unique_ptr OW) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {} - void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitThumbFunc(MCSymbol *Symbol) override; void FinishImpl() override; }; -void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { - switch (Flag) { - default: llvm_unreachable("not implemented"); - case MCAF_SyntaxUnified: - case MCAF_Code16: - break; - } -} - void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { getAssembler().setIsThumbFunc(Symbol); } diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 4b25986b90a7..cc31929899b4 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -86,8 +86,8 @@ void MLxExpansion::pushStack(MachineInstr *MI) { MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { // Look past COPY and INSERT_SUBREG instructions to find the // real definition MI. This is important for _sfp instructions. - unsigned Reg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MI->getOperand(1).getReg(); + if (Register::isPhysicalRegister(Reg)) return nullptr; MachineBasicBlock *MBB = MI->getParent(); @@ -97,13 +97,13 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { break; if (DefMI->isCopyLike()) { Reg = DefMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { DefMI = MRI->getVRegDef(Reg); continue; } } else if (DefMI->isInsertSubreg()) { Reg = DefMI->getOperand(2).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { DefMI = MRI->getVRegDef(Reg); continue; } @@ -114,9 +114,8 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { } unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { - unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->hasOneNonDBGUse(Reg)) + Register Reg = MI->getOperand(0).getReg(); + if (Register::isPhysicalRegister(Reg) || !MRI->hasOneNonDBGUse(Reg)) return Reg; MachineBasicBlock *MBB = MI->getParent(); @@ -126,8 +125,7 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { while (UseMI->isCopy() || UseMI->isInsertSubreg()) { Reg = UseMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->hasOneNonDBGUse(Reg)) + if (Register::isPhysicalRegister(Reg) || !MRI->hasOneNonDBGUse(Reg)) return Reg; UseMI = &*MRI->use_instr_nodbg_begin(Reg); if (UseMI->getParent() != MBB) @@ -140,8 +138,8 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { /// hasLoopHazard - Check whether an MLx instruction is chained to itself across /// a single-MBB loop. bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { - unsigned Reg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MI->getOperand(1).getReg(); + if (Register::isPhysicalRegister(Reg)) return false; MachineBasicBlock *MBB = MI->getParent(); @@ -154,8 +152,8 @@ outer_continue: if (DefMI->isPHI()) { for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { if (DefMI->getOperand(i + 1).getMBB() == MBB) { - unsigned SrcReg = DefMI->getOperand(i).getReg(); - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + Register SrcReg = DefMI->getOperand(i).getReg(); + if (Register::isVirtualRegister(SrcReg)) { DefMI = MRI->getVRegDef(SrcReg); goto outer_continue; } @@ -163,13 +161,13 @@ outer_continue: } } else if (DefMI->isCopyLike()) { Reg = DefMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { DefMI = MRI->getVRegDef(Reg); continue; } } else if (DefMI->isInsertSubreg()) { Reg = DefMI->getOperand(2).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { DefMI = MRI->getVRegDef(Reg); continue; } @@ -271,23 +269,23 @@ void MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned MulOpc, unsigned AddSubOpc, bool NegAcc, bool HasLane) { - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); bool DstDead = MI->getOperand(0).isDead(); - unsigned AccReg = MI->getOperand(1).getReg(); - unsigned Src1Reg = MI->getOperand(2).getReg(); - unsigned Src2Reg = MI->getOperand(3).getReg(); + Register AccReg = MI->getOperand(1).getReg(); + Register Src1Reg = MI->getOperand(2).getReg(); + Register Src2Reg = MI->getOperand(3).getReg(); bool Src1Kill = MI->getOperand(2).isKill(); bool Src2Kill = MI->getOperand(3).isKill(); unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0; unsigned NextOp = HasLane ? 5 : 4; ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); - unsigned PredReg = MI->getOperand(++NextOp).getReg(); + Register PredReg = MI->getOperand(++NextOp).getReg(); const MCInstrDesc &MCID1 = TII->get(MulOpc); const MCInstrDesc &MCID2 = TII->get(AddSubOpc); const MachineFunction &MF = *MI->getParent()->getParent(); - unsigned TmpReg = MRI->createVirtualRegister( - TII->getRegClass(MCID1, 0, TRI, MF)); + Register TmpReg = + MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI, MF)); MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) diff --git a/lib/Target/ARM/MVETailPredication.cpp b/lib/Target/ARM/MVETailPredication.cpp new file mode 100644 index 000000000000..4db8ab17c49b --- /dev/null +++ b/lib/Target/ARM/MVETailPredication.cpp @@ -0,0 +1,519 @@ +//===- MVETailPredication.cpp - MVE Tail Predication ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead +/// branches to help accelerate DSP applications. These two extensions can be +/// combined to provide implicit vector predication within a low-overhead loop. +/// The HardwareLoops pass inserts intrinsics identifying loops that the +/// backend will attempt to convert into a low-overhead loop. The vectorizer is +/// responsible for generating a vectorized loop in which the lanes are +/// predicated upon the iteration counter. This pass looks at these predicated +/// vector loops, that are targets for low-overhead loops, and prepares it for +/// code generation. Once the vectorizer has produced a masked loop, there's a +/// couple of final forms: +/// - A tail-predicated loop, with implicit predication. +/// - A loop containing multiple VCPT instructions, predicating multiple VPT +/// blocks of instructions operating on different vector types. + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "ARM.h" +#include "ARMSubtarget.h" + +using namespace llvm; + +#define DEBUG_TYPE "mve-tail-predication" +#define DESC "Transform predicated vector loops to use MVE tail predication" + +static cl::opt +DisableTailPredication("disable-mve-tail-predication", cl::Hidden, + cl::init(true), + cl::desc("Disable MVE Tail Predication")); +namespace { + +class MVETailPredication : public LoopPass { + SmallVector MaskedInsts; + Loop *L = nullptr; + ScalarEvolution *SE = nullptr; + TargetTransformInfo *TTI = nullptr; + +public: + static char ID; + + MVETailPredication() : LoopPass(ID) { } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.setPreservesCFG(); + } + + bool runOnLoop(Loop *L, LPPassManager&) override; + +private: + + /// Perform the relevant checks on the loop and convert if possible. + bool TryConvert(Value *TripCount); + + /// Return whether this is a vectorized loop, that contains masked + /// load/stores. + bool IsPredicatedVectorLoop(); + + /// Compute a value for the total number of elements that the predicated + /// loop will process. + Value *ComputeElements(Value *TripCount, VectorType *VecTy); + + /// Is the icmp that generates an i1 vector, based upon a loop counter + /// and a limit that is defined outside the loop. + bool isTailPredicate(Instruction *Predicate, Value *NumElements); +}; + +} // end namespace + +static bool IsDecrement(Instruction &I) { + auto *Call = dyn_cast(&I); + if (!Call) + return false; + + Intrinsic::ID ID = Call->getIntrinsicID(); + return ID == Intrinsic::loop_decrement_reg; +} + +static bool IsMasked(Instruction *I) { + auto *Call = dyn_cast(I); + if (!Call) + return false; + + Intrinsic::ID ID = Call->getIntrinsicID(); + // TODO: Support gather/scatter expand/compress operations. + return ID == Intrinsic::masked_store || ID == Intrinsic::masked_load; +} + +bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { + if (skipLoop(L) || DisableTailPredication) + return false; + + Function &F = *L->getHeader()->getParent(); + auto &TPC = getAnalysis(); + auto &TM = TPC.getTM(); + auto *ST = &TM.getSubtarget(F); + TTI = &getAnalysis().getTTI(F); + SE = &getAnalysis().getSE(); + this->L = L; + + // The MVE and LOB extensions are combined to enable tail-predication, but + // there's nothing preventing us from generating VCTP instructions for v8.1m. + if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) { + LLVM_DEBUG(dbgs() << "TP: Not a v8.1m.main+mve target.\n"); + return false; + } + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + + auto FindLoopIterations = [](BasicBlock *BB) -> IntrinsicInst* { + for (auto &I : *BB) { + auto *Call = dyn_cast(&I); + if (!Call) + continue; + + Intrinsic::ID ID = Call->getIntrinsicID(); + if (ID == Intrinsic::set_loop_iterations || + ID == Intrinsic::test_set_loop_iterations) + return cast(&I); + } + return nullptr; + }; + + // Look for the hardware loop intrinsic that sets the iteration count. + IntrinsicInst *Setup = FindLoopIterations(Preheader); + + // The test.set iteration could live in the pre- preheader. + if (!Setup) { + if (!Preheader->getSinglePredecessor()) + return false; + Setup = FindLoopIterations(Preheader->getSinglePredecessor()); + if (!Setup) + return false; + } + + // Search for the hardware loop intrinic that decrements the loop counter. + IntrinsicInst *Decrement = nullptr; + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + if (IsDecrement(I)) { + Decrement = cast(&I); + break; + } + } + } + + if (!Decrement) + return false; + + LLVM_DEBUG(dbgs() << "TP: Running on Loop: " << *L + << *Setup << "\n" + << *Decrement << "\n"); + bool Changed = TryConvert(Setup->getArgOperand(0)); + return Changed; +} + +bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) { + // Look for the following: + + // %trip.count.minus.1 = add i32 %N, -1 + // %broadcast.splatinsert10 = insertelement <4 x i32> undef, + // i32 %trip.count.minus.1, i32 0 + // %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, + // <4 x i32> undef, + // <4 x i32> zeroinitializer + // ... + // ... + // %index = phi i32 + // %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 + // %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, + // <4 x i32> undef, + // <4 x i32> zeroinitializer + // %induction = add <4 x i32> %broadcast.splat, + // %pred = icmp ule <4 x i32> %induction, %broadcast.splat11 + + // And return whether V == %pred. + + using namespace PatternMatch; + + CmpInst::Predicate Pred; + Instruction *Shuffle = nullptr; + Instruction *Induction = nullptr; + + // The vector icmp + if (!match(I, m_ICmp(Pred, m_Instruction(Induction), + m_Instruction(Shuffle))) || + Pred != ICmpInst::ICMP_ULE || !L->isLoopInvariant(Shuffle)) + return false; + + // First find the stuff outside the loop which is setting up the limit + // vector.... + // The invariant shuffle that broadcast the limit into a vector. + Instruction *Insert = nullptr; + if (!match(Shuffle, m_ShuffleVector(m_Instruction(Insert), m_Undef(), + m_Zero()))) + return false; + + // Insert the limit into a vector. + Instruction *BECount = nullptr; + if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(BECount), + m_Zero()))) + return false; + + // The limit calculation, backedge count. + Value *TripCount = nullptr; + if (!match(BECount, m_Add(m_Value(TripCount), m_AllOnes()))) + return false; + + if (TripCount != NumElements) + return false; + + // Now back to searching inside the loop body... + // Find the add with takes the index iv and adds a constant vector to it. + Instruction *BroadcastSplat = nullptr; + Constant *Const = nullptr; + if (!match(Induction, m_Add(m_Instruction(BroadcastSplat), + m_Constant(Const)))) + return false; + + // Check that we're adding <0, 1, 2, 3... + if (auto *CDS = dyn_cast(Const)) { + for (unsigned i = 0; i < CDS->getNumElements(); ++i) { + if (CDS->getElementAsInteger(i) != i) + return false; + } + } else + return false; + + // The shuffle which broadcasts the index iv into a vector. + if (!match(BroadcastSplat, m_ShuffleVector(m_Instruction(Insert), m_Undef(), + m_Zero()))) + return false; + + // The insert element which initialises a vector with the index iv. + Instruction *IV = nullptr; + if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(IV), m_Zero()))) + return false; + + // The index iv. + auto *Phi = dyn_cast(IV); + if (!Phi) + return false; + + // TODO: Don't think we need to check the entry value. + Value *OnEntry = Phi->getIncomingValueForBlock(L->getLoopPreheader()); + if (!match(OnEntry, m_Zero())) + return false; + + Value *InLoop = Phi->getIncomingValueForBlock(L->getLoopLatch()); + unsigned Lanes = cast(Insert->getType())->getNumElements(); + + Instruction *LHS = nullptr; + if (!match(InLoop, m_Add(m_Instruction(LHS), m_SpecificInt(Lanes)))) + return false; + + return LHS == Phi; +} + +static VectorType* getVectorType(IntrinsicInst *I) { + unsigned TypeOp = I->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1; + auto *PtrTy = cast(I->getOperand(TypeOp)->getType()); + return cast(PtrTy->getElementType()); +} + +bool MVETailPredication::IsPredicatedVectorLoop() { + // Check that the loop contains at least one masked load/store intrinsic. + // We only support 'normal' vector instructions - other than masked + // load/stores. + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + if (IsMasked(&I)) { + VectorType *VecTy = getVectorType(cast(&I)); + unsigned Lanes = VecTy->getNumElements(); + unsigned ElementWidth = VecTy->getScalarSizeInBits(); + // MVE vectors are 128-bit, but don't support 128 x i1. + // TODO: Can we support vectors larger than 128-bits? + unsigned MaxWidth = TTI->getRegisterBitWidth(true); + if (Lanes * ElementWidth != MaxWidth || Lanes == MaxWidth) + return false; + MaskedInsts.push_back(cast(&I)); + } else if (auto *Int = dyn_cast(&I)) { + for (auto &U : Int->args()) { + if (isa(U->getType())) + return false; + } + } + } + } + + return !MaskedInsts.empty(); +} + +Value* MVETailPredication::ComputeElements(Value *TripCount, + VectorType *VecTy) { + const SCEV *TripCountSE = SE->getSCEV(TripCount); + ConstantInt *VF = ConstantInt::get(cast(TripCount->getType()), + VecTy->getNumElements()); + + if (VF->equalsInt(1)) + return nullptr; + + // TODO: Support constant trip counts. + auto VisitAdd = [&](const SCEVAddExpr *S) -> const SCEVMulExpr* { + if (auto *Const = dyn_cast(S->getOperand(0))) { + if (Const->getAPInt() != -VF->getValue()) + return nullptr; + } else + return nullptr; + return dyn_cast(S->getOperand(1)); + }; + + auto VisitMul = [&](const SCEVMulExpr *S) -> const SCEVUDivExpr* { + if (auto *Const = dyn_cast(S->getOperand(0))) { + if (Const->getValue() != VF) + return nullptr; + } else + return nullptr; + return dyn_cast(S->getOperand(1)); + }; + + auto VisitDiv = [&](const SCEVUDivExpr *S) -> const SCEV* { + if (auto *Const = dyn_cast(S->getRHS())) { + if (Const->getValue() != VF) + return nullptr; + } else + return nullptr; + + if (auto *RoundUp = dyn_cast(S->getLHS())) { + if (auto *Const = dyn_cast(RoundUp->getOperand(0))) { + if (Const->getAPInt() != (VF->getValue() - 1)) + return nullptr; + } else + return nullptr; + + return RoundUp->getOperand(1); + } + return nullptr; + }; + + // TODO: Can we use SCEV helpers, such as findArrayDimensions, and friends to + // determine the numbers of elements instead? Looks like this is what is used + // for delinearization, but I'm not sure if it can be applied to the + // vectorized form - at least not without a bit more work than I feel + // comfortable with. + + // Search for Elems in the following SCEV: + // (1 + ((-VF + (VF * (((VF - 1) + %Elems) /u VF))) /u VF)) + const SCEV *Elems = nullptr; + if (auto *TC = dyn_cast(TripCountSE)) + if (auto *Div = dyn_cast(TC->getOperand(1))) + if (auto *Add = dyn_cast(Div->getLHS())) + if (auto *Mul = VisitAdd(Add)) + if (auto *Div = VisitMul(Mul)) + if (auto *Res = VisitDiv(Div)) + Elems = Res; + + if (!Elems) + return nullptr; + + Instruction *InsertPt = L->getLoopPreheader()->getTerminator(); + if (!isSafeToExpandAt(Elems, InsertPt, *SE)) + return nullptr; + + auto DL = L->getHeader()->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "elements"); + return Expander.expandCodeFor(Elems, Elems->getType(), InsertPt); +} + +// Look through the exit block to see whether there's a duplicate predicate +// instruction. This can happen when we need to perform a select on values +// from the last and previous iteration. Instead of doing a straight +// replacement of that predicate with the vctp, clone the vctp and place it +// in the block. This means that the VPR doesn't have to be live into the +// exit block which should make it easier to convert this loop into a proper +// tail predicated loop. +static void Cleanup(DenseMap &NewPredicates, + SetVector &MaybeDead, Loop *L) { + if (BasicBlock *Exit = L->getUniqueExitBlock()) { + for (auto &Pair : NewPredicates) { + Instruction *OldPred = Pair.first; + Instruction *NewPred = Pair.second; + + for (auto &I : *Exit) { + if (I.isSameOperationAs(OldPred)) { + Instruction *PredClone = NewPred->clone(); + PredClone->insertBefore(&I); + I.replaceAllUsesWith(PredClone); + MaybeDead.insert(&I); + break; + } + } + } + } + + // Drop references and add operands to check for dead. + SmallPtrSet Dead; + while (!MaybeDead.empty()) { + auto *I = MaybeDead.front(); + MaybeDead.remove(I); + if (I->hasNUsesOrMore(1)) + continue; + + for (auto &U : I->operands()) { + if (auto *OpI = dyn_cast(U)) + MaybeDead.insert(OpI); + } + I->dropAllReferences(); + Dead.insert(I); + } + + for (auto *I : Dead) + I->eraseFromParent(); + + for (auto I : L->blocks()) + DeleteDeadPHIs(I); +} + +bool MVETailPredication::TryConvert(Value *TripCount) { + if (!IsPredicatedVectorLoop()) + return false; + + LLVM_DEBUG(dbgs() << "TP: Found predicated vector loop.\n"); + + // Walk through the masked intrinsics and try to find whether the predicate + // operand is generated from an induction variable. + Module *M = L->getHeader()->getModule(); + Type *Ty = IntegerType::get(M->getContext(), 32); + SetVector Predicates; + DenseMap NewPredicates; + + for (auto *I : MaskedInsts) { + Intrinsic::ID ID = I->getIntrinsicID(); + unsigned PredOp = ID == Intrinsic::masked_load ? 2 : 3; + auto *Predicate = dyn_cast(I->getArgOperand(PredOp)); + if (!Predicate || Predicates.count(Predicate)) + continue; + + VectorType *VecTy = getVectorType(I); + Value *NumElements = ComputeElements(TripCount, VecTy); + if (!NumElements) + continue; + + if (!isTailPredicate(Predicate, NumElements)) { + LLVM_DEBUG(dbgs() << "TP: Not tail predicate: " << *Predicate << "\n"); + continue; + } + + LLVM_DEBUG(dbgs() << "TP: Found tail predicate: " << *Predicate << "\n"); + Predicates.insert(Predicate); + + // Insert a phi to count the number of elements processed by the loop. + IRBuilder<> Builder(L->getHeader()->getFirstNonPHI()); + PHINode *Processed = Builder.CreatePHI(Ty, 2); + Processed->addIncoming(NumElements, L->getLoopPreheader()); + + // Insert the intrinsic to represent the effect of tail predication. + Builder.SetInsertPoint(cast(Predicate)); + ConstantInt *Factor = + ConstantInt::get(cast(Ty), VecTy->getNumElements()); + Intrinsic::ID VCTPID; + switch (VecTy->getNumElements()) { + default: + llvm_unreachable("unexpected number of lanes"); + case 2: VCTPID = Intrinsic::arm_vctp64; break; + case 4: VCTPID = Intrinsic::arm_vctp32; break; + case 8: VCTPID = Intrinsic::arm_vctp16; break; + case 16: VCTPID = Intrinsic::arm_vctp8; break; + } + Function *VCTP = Intrinsic::getDeclaration(M, VCTPID); + Value *TailPredicate = Builder.CreateCall(VCTP, Processed); + Predicate->replaceAllUsesWith(TailPredicate); + NewPredicates[Predicate] = cast(TailPredicate); + + // Add the incoming value to the new phi. + // TODO: This add likely already exists in the loop. + Value *Remaining = Builder.CreateSub(Processed, Factor); + Processed->addIncoming(Remaining, L->getLoopLatch()); + LLVM_DEBUG(dbgs() << "TP: Insert processed elements phi: " + << *Processed << "\n" + << "TP: Inserted VCTP: " << *TailPredicate << "\n"); + } + + // Now clean up. + Cleanup(NewPredicates, Predicates, L); + return true; +} + +Pass *llvm::createMVETailPredicationPass() { + return new MVETailPredication(); +} + +char MVETailPredication::ID = 0; + +INITIALIZE_PASS_BEGIN(MVETailPredication, DEBUG_TYPE, DESC, false, false) +INITIALIZE_PASS_END(MVETailPredication, DEBUG_TYPE, DESC, false, false) diff --git a/lib/Target/ARM/MVEVPTBlockPass.cpp b/lib/Target/ARM/MVEVPTBlockPass.cpp new file mode 100644 index 000000000000..bc0a80b177ed --- /dev/null +++ b/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -0,0 +1,278 @@ +//===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "arm-mve-vpt" + +namespace { + class MVEVPTBlock : public MachineFunctionPass { + public: + static char ID; + const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; + + MVEVPTBlock() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "MVE VPT block insertion pass"; + } + + private: + bool InsertVPTBlocks(MachineBasicBlock &MBB); + }; + + char MVEVPTBlock::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) + +enum VPTMaskValue { + T = 8, // 0b1000 + TT = 4, // 0b0100 + TE = 12, // 0b1100 + TTT = 2, // 0b0010 + TTE = 6, // 0b0110 + TEE = 10, // 0b1010 + TET = 14, // 0b1110 + TTTT = 1, // 0b0001 + TTTE = 3, // 0b0011 + TTEE = 5, // 0b0101 + TTET = 7, // 0b0111 + TEEE = 9, // 0b1001 + TEET = 11, // 0b1011 + TETT = 13, // 0b1101 + TETE = 15 // 0b1111 +}; + +static unsigned VCMPOpcodeToVPT(unsigned Opcode) { + switch (Opcode) { + case ARM::MVE_VCMPf32: + return ARM::MVE_VPTv4f32; + case ARM::MVE_VCMPf16: + return ARM::MVE_VPTv8f16; + case ARM::MVE_VCMPi8: + return ARM::MVE_VPTv16i8; + case ARM::MVE_VCMPi16: + return ARM::MVE_VPTv8i16; + case ARM::MVE_VCMPi32: + return ARM::MVE_VPTv4i32; + case ARM::MVE_VCMPu8: + return ARM::MVE_VPTv16u8; + case ARM::MVE_VCMPu16: + return ARM::MVE_VPTv8u16; + case ARM::MVE_VCMPu32: + return ARM::MVE_VPTv4u32; + case ARM::MVE_VCMPs8: + return ARM::MVE_VPTv16s8; + case ARM::MVE_VCMPs16: + return ARM::MVE_VPTv8s16; + case ARM::MVE_VCMPs32: + return ARM::MVE_VPTv4s32; + + case ARM::MVE_VCMPf32r: + return ARM::MVE_VPTv4f32r; + case ARM::MVE_VCMPf16r: + return ARM::MVE_VPTv8f16r; + case ARM::MVE_VCMPi8r: + return ARM::MVE_VPTv16i8r; + case ARM::MVE_VCMPi16r: + return ARM::MVE_VPTv8i16r; + case ARM::MVE_VCMPi32r: + return ARM::MVE_VPTv4i32r; + case ARM::MVE_VCMPu8r: + return ARM::MVE_VPTv16u8r; + case ARM::MVE_VCMPu16r: + return ARM::MVE_VPTv8u16r; + case ARM::MVE_VCMPu32r: + return ARM::MVE_VPTv4u32r; + case ARM::MVE_VCMPs8r: + return ARM::MVE_VPTv16s8r; + case ARM::MVE_VCMPs16r: + return ARM::MVE_VPTv8s16r; + case ARM::MVE_VCMPs32r: + return ARM::MVE_VPTv4s32r; + + default: + return 0; + } +} + +static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, + const TargetRegisterInfo *TRI, + unsigned &NewOpcode) { + // Search backwards to the instruction that defines VPR. This may or not + // be a VCMP, we check that after this loop. If we find another instruction + // that reads cpsr, we return nullptr. + MachineBasicBlock::iterator CmpMI = MI; + while (CmpMI != MI->getParent()->begin()) { + --CmpMI; + if (CmpMI->modifiesRegister(ARM::VPR, TRI)) + break; + if (CmpMI->readsRegister(ARM::VPR, TRI)) + break; + } + + if (CmpMI == MI) + return nullptr; + NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); + if (NewOpcode == 0) + return nullptr; + + // Search forward from CmpMI to MI, checking if either register was def'd + if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), + MI, TRI)) + return nullptr; + if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), + MI, TRI)) + return nullptr; + return &*CmpMI; +} + +bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { + bool Modified = false; + MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); + MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); + + while (MBIter != EndIter) { + MachineInstr *MI = &*MBIter; + unsigned PredReg = 0; + DebugLoc dl = MI->getDebugLoc(); + + ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); + + // The idea of the predicate is that None, Then and Else are for use when + // handling assembly language: they correspond to the three possible + // suffixes "", "t" and "e" on the mnemonic. So when instructions are read + // from assembly source or disassembled from object code, you expect to see + // a mixture whenever there's a long VPT block. But in code generation, we + // hope we'll never generate an Else as input to this pass. + assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); + + if (Pred == ARMVCC::None) { + ++MBIter; + continue; + } + + LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump()); + int VPTInstCnt = 1; + ARMVCC::VPTCodes NextPred; + + // Look at subsequent instructions, checking if they can be in the same VPT + // block. + ++MBIter; + while (MBIter != EndIter && VPTInstCnt < 4) { + NextPred = getVPTInstrPredicate(*MBIter, PredReg); + assert(NextPred != ARMVCC::Else && + "VPT block pass does not expect Else preds"); + if (NextPred != Pred) + break; + LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump()); + ++VPTInstCnt; + ++MBIter; + }; + + unsigned BlockMask = 0; + switch (VPTInstCnt) { + case 1: + BlockMask = VPTMaskValue::T; + break; + case 2: + BlockMask = VPTMaskValue::TT; + break; + case 3: + BlockMask = VPTMaskValue::TTT; + break; + case 4: + BlockMask = VPTMaskValue::TTTT; + break; + default: + llvm_unreachable("Unexpected number of instruction in a VPT block"); + }; + + // Search back for a VCMP that can be folded to create a VPT, or else create + // a VPST directly + MachineInstrBuilder MIBuilder; + unsigned NewOpcode; + MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode); + if (VCMP) { + LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); + MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); + MIBuilder.addImm(BlockMask); + MIBuilder.add(VCMP->getOperand(1)); + MIBuilder.add(VCMP->getOperand(2)); + MIBuilder.add(VCMP->getOperand(3)); + VCMP->eraseFromParent(); + } else { + MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); + MIBuilder.addImm(BlockMask); + } + + finalizeBundle( + Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); + + Modified = true; + } + return Modified; +} + +bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { + const ARMSubtarget &STI = + static_cast(Fn.getSubtarget()); + + if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) + return false; + + TII = static_cast(STI.getInstrInfo()); + TRI = STI.getRegisterInfo(); + + LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" + << "********** Function: " << Fn.getName() << '\n'); + + bool Modified = false; + for (MachineBasicBlock &MBB : Fn) + Modified |= InsertVPTBlocks(MBB); + + LLVM_DEBUG(dbgs() << "**************************************\n"); + return Modified; +} + +/// createMVEVPTBlock - Returns an instance of the MVE VPT block +/// insertion pass. +FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 426e9a0ed9b8..956d474f1d79 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -164,7 +164,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // to determine the end of the prologue. DebugLoc dl; - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); int CFAOffset = 0; @@ -459,8 +459,8 @@ static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { else if (MI.getOpcode() == ARM::tPOP) { return true; } else if (MI.getOpcode() == ARM::tMOVr) { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) && ARM::hGPRRegClass.contains(Dst)); } @@ -483,7 +483,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index f57d93a2e83d..fccaa4c9cc8a 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -80,12 +80,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { assert((RC == &ARM::tGPRRegClass || - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - isARMLowRegister(SrcReg))) && "Unknown regclass!"); + (Register::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) && + "Unknown regclass!"); if (RC == &ARM::tGPRRegClass || - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - isARMLowRegister(SrcReg))) { + (Register::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -108,13 +107,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) || - (TargetRegisterInfo::isPhysicalRegister(DestReg) && - isARMLowRegister(DestReg))) && "Unknown regclass!"); + assert( + (RC->hasSuperClassEq(&ARM::tGPRRegClass) || + (Register::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) && + "Unknown regclass!"); if (RC->hasSuperClassEq(&ARM::tGPRRegClass) || - (TargetRegisterInfo::isPhysicalRegister(DestReg) && - isARMLowRegister(DestReg))) { + (Register::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 3143eb9840ed..786fc78d0233 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -87,7 +87,7 @@ static void TrackDefUses(MachineInstr *MI, RegisterSet &Defs, RegisterSet &Uses, for (auto &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP) continue; if (MO.isUse()) @@ -145,8 +145,8 @@ Thumb2ITBlock::MoveCopyOutOfITBlock(MachineInstr *MI, MI->getOperand(1).getSubReg() == 0 && "Sub-register indices still around?"); - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); // First check if it's safe to move it. if (Uses.count(DstReg) || Defs.count(SrcReg)) @@ -308,131 +308,3 @@ bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) { /// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks /// insertion pass. FunctionPass *llvm::createThumb2ITBlockPass() { return new Thumb2ITBlock(); } - -#undef DEBUG_TYPE -#define DEBUG_TYPE "arm-mve-vpt" - -namespace { - class MVEVPTBlock : public MachineFunctionPass { - public: - static char ID; - const Thumb2InstrInfo *TII; - const TargetRegisterInfo *TRI; - - MVEVPTBlock() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &Fn) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - - StringRef getPassName() const override { - return "MVE VPT block insertion pass"; - } - - private: - bool InsertVPTBlocks(MachineBasicBlock &MBB); - }; - - char MVEVPTBlock::ID = 0; - -} // end anonymous namespace - -INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) - -enum VPTMaskValue { - T = 8, // 0b1000 - TT = 4, // 0b0100 - TE = 12, // 0b1100 - TTT = 2, // 0b0010 - TTE = 6, // 0b0110 - TEE = 10, // 0b1010 - TET = 14, // 0b1110 - TTTT = 1, // 0b0001 - TTTE = 3, // 0b0011 - TTEE = 5, // 0b0101 - TTET = 7, // 0b0111 - TEEE = 9, // 0b1001 - TEET = 11, // 0b1011 - TETT = 13, // 0b1101 - TETE = 15 // 0b1111 -}; - -bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { - bool Modified = false; - MachineBasicBlock::iterator MBIter = Block.begin(); - MachineBasicBlock::iterator EndIter = Block.end(); - - while (MBIter != EndIter) { - MachineInstr *MI = &*MBIter; - unsigned PredReg = 0; - DebugLoc dl = MI->getDebugLoc(); - - ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); - - // The idea of the predicate is that None, Then and Else are for use when - // handling assembly language: they correspond to the three possible - // suffixes "", "t" and "e" on the mnemonic. So when instructions are read - // from assembly source or disassembled from object code, you expect to see - // a mixture whenever there's a long VPT block. But in code generation, we - // hope we'll never generate an Else as input to this pass. - - assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); - - if (Pred == ARMVCC::None) { - ++MBIter; - continue; - } - - MachineInstrBuilder MIBuilder = - BuildMI(Block, MBIter, dl, TII->get(ARM::MVE_VPST)); - // The mask value for the VPST instruction is T = 0b1000 = 8 - MIBuilder.addImm(VPTMaskValue::T); - - MachineBasicBlock::iterator VPSTInsertPos = MIBuilder.getInstr(); - int VPTInstCnt = 1; - ARMVCC::VPTCodes NextPred; - - do { - ++MBIter; - NextPred = getVPTInstrPredicate(*MBIter, PredReg); - } while (NextPred != ARMVCC::None && NextPred == Pred && ++VPTInstCnt < 4); - - MachineInstr *LastMI = &*MBIter; - finalizeBundle(Block, VPSTInsertPos.getInstrIterator(), - ++LastMI->getIterator()); - - Modified = true; - LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump();); - - ++MBIter; - } - return Modified; -} - -bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast(Fn.getSubtarget()); - - if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) - return false; - - TII = static_cast(STI.getInstrInfo()); - TRI = STI.getRegisterInfo(); - - LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" - << "********** Function: " << Fn.getName() << '\n'); - - bool Modified = false; - for (MachineBasicBlock &MBB : Fn) - Modified |= InsertVPTBlocks(MBB); - - LLVM_DEBUG(dbgs() << "**************************************\n"); - return Modified; -} - -/// createMVEVPTBlock - Returns an instance of the MVE VPT block -/// insertion pass. -FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 5a965f7a6b9b..af1f0aeb27ba 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -159,9 +159,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(SrcReg)) { MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass); + MRI->constrainRegClass(SrcReg, &ARM::GPRPairnospRegClass); } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); @@ -200,10 +200,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(DestReg, - &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass); + MRI->constrainRegClass(DestReg, &ARM::GPRPairnospRegClass); } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); @@ -211,7 +210,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); return; } @@ -470,12 +469,17 @@ immediateOffsetOpcode(unsigned opcode) bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, - const ARMBaseInstrInfo &TII) { + const ARMBaseInstrInfo &TII, + const TargetRegisterInfo *TRI) { unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); bool isSub = false; + MachineFunction &MF = *MI.getParent()->getParent(); + const TargetRegisterClass *RegClass = + TII.getRegClass(Desc, FrameRegIdx, TRI, MF); + // Memory operands in inline assembly always use AddrModeT2_i12. if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? @@ -554,7 +558,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // register then we change to an immediate version. unsigned NewOpc = Opcode; if (AddrMode == ARMII::AddrModeT2_so) { - unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg(); + Register OffsetReg = MI.getOperand(FrameRegIdx + 1).getReg(); if (OffsetReg != 0) { MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); return Offset == 0; @@ -645,10 +649,21 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1); // Attempt to fold address computation - // Common case: small offset, fits into instruction. + // Common case: small offset, fits into instruction. We need to make sure + // the register class is correct too, for instructions like the MVE + // VLDRH.32, which only accepts low tGPR registers. int ImmedOffset = Offset / Scale; unsigned Mask = (1 << NumBits) - 1; - if ((unsigned)Offset <= Mask * Scale) { + if ((unsigned)Offset <= Mask * Scale && + (Register::isVirtualRegister(FrameReg) || + RegClass->contains(FrameReg))) { + if (Register::isVirtualRegister(FrameReg)) { + // Make sure the register class for the virtual register is correct + MachineRegisterInfo *MRI = &MF.getRegInfo(); + if (!MRI->constrainRegClass(FrameReg, RegClass)) + llvm_unreachable("Unable to constrain virtual register class."); + } + // Replace the FrameIndex with fp/sp MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); if (isSub) { @@ -681,7 +696,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } Offset = (isSub) ? -Offset : Offset; - return Offset == 0; + return Offset == 0 && (Register::isVirtualRegister(FrameReg) || + RegClass->contains(FrameReg)); } ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 37a85fa38417..c5a62aa33990 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -300,7 +300,7 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { for (const MachineOperand &MO : CPSRDef->operands()) { if (!MO.isReg() || MO.isUndef() || MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0 || Reg == ARM::CPSR) continue; Defs.insert(Reg); @@ -309,7 +309,7 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { for (const MachineOperand &MO : Use->operands()) { if (!MO.isReg() || MO.isUndef() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Defs.count(Reg)) return false; } @@ -380,7 +380,7 @@ static bool VerifyLowRegs(MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0 || Reg == ARM::CPSR) continue; if (isPCOk && Reg == ARM::PC) @@ -464,11 +464,11 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // For this reason we can't reuse the logic at the end of this function; we // have to implement the MI building here. bool IsStore = Entry.WideOpc == ARM::t2STR_POST; - unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); - unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); + Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); + Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); unsigned Offset = MI->getOperand(3).getImm(); unsigned PredImm = MI->getOperand(4).getImm(); - unsigned PredReg = MI->getOperand(5).getReg(); + Register PredReg = MI->getOperand(5).getReg(); assert(isARMLowRegister(Rt)); assert(isARMLowRegister(Rn)); @@ -496,7 +496,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, return true; } case ARM::t2LDMIA: { - unsigned BaseReg = MI->getOperand(0).getReg(); + Register BaseReg = MI->getOperand(0).getReg(); assert(isARMLowRegister(BaseReg)); // For the non-writeback version (this one), the base register must be @@ -524,7 +524,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, break; case ARM::t2LDMIA_RET: { - unsigned BaseReg = MI->getOperand(1).getReg(); + Register BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET @@ -537,7 +537,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2STMDB_UPD: { OpNum = 0; - unsigned BaseReg = MI->getOperand(1).getReg(); + Register BaseReg = MI->getOperand(1).getReg(); if (BaseReg == ARM::SP && (Entry.WideOpc == ARM::t2LDMIA_UPD || Entry.WideOpc == ARM::t2STMDB_UPD)) { @@ -743,11 +743,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // are optimizing for size. return false; - unsigned Reg0 = MI->getOperand(0).getReg(); - unsigned Reg1 = MI->getOperand(1).getReg(); + Register Reg0 = MI->getOperand(0).getReg(); + Register Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. if (MI->getOpcode() == ARM::t2MUL) { - unsigned Reg2 = MI->getOperand(2).getReg(); + Register Reg2 = MI->getOperand(2).getReg(); // Early exit if the regs aren't all low regs. if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) || !isARMLowRegister(Reg2)) @@ -782,7 +782,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (Imm > Limit) return false; } else { - unsigned Reg2 = MI->getOperand(2).getReg(); + Register Reg2 = MI->getOperand(2).getReg(); if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) return false; } @@ -868,7 +868,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp index a96417ffce4d..b0ba58d8dc4a 100644 --- a/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -107,8 +107,9 @@ void ThumbRegisterInfo::emitLoadConstPool( MachineFunction &MF = *MBB.getParent(); const ARMSubtarget &STI = MF.getSubtarget(); if (STI.isThumb1Only()) { - assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) && - "Thumb1 does not have ldr to high register"); + assert( + (isARMLowRegister(DestReg) || Register::isVirtualRegister(DestReg)) && + "Thumb1 does not have ldr to high register"); return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred, PredReg, MIFlags); } @@ -141,7 +142,7 @@ static void emitThumbRegPlusImmInReg( unsigned LdReg = DestReg; if (DestReg == ARM::SP) assert(BaseReg == ARM::SP && "Unexpected!"); - if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg)) + if (!isARMLowRegister(DestReg) && !Register::isVirtualRegister(DestReg)) LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { @@ -371,7 +372,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, if (Opcode == ARM::tADDframe) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, *this); @@ -509,7 +510,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (MI.mayLoad()) { // Use the destination register to materialize sp + offset. - unsigned TmpReg = MI.getOperand(0).getReg(); + Register TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; if (Opcode == ARM::tLDRspi) { if (FrameReg == ARM::SP || STI.genExecuteOnly()) diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp index 7586bd7b78fc..1db6b2236b4f 100644 --- a/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/lib/Target/AVR/AVRAsmPrinter.cpp @@ -97,7 +97,7 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, assert(RegOp.isReg() && "Operand must be a register when you're" "using 'A'..'Z' operand extracodes."); - unsigned Reg = RegOp.getReg(); + Register Reg = RegOp.getReg(); unsigned ByteNumber = ExtraCode[0] - 'A'; diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp index c45b2d0e39c1..83d0f6845332 100644 --- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -140,8 +140,8 @@ bool AVRExpandPseudo:: expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool DstIsKill = MI.getOperand(1).isKill(); bool SrcIsKill = MI.getOperand(2).isKill(); @@ -173,8 +173,8 @@ bool AVRExpandPseudo:: expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool DstIsKill = MI.getOperand(1).isKill(); bool SrcIsKill = MI.getOperand(2).isKill(); @@ -220,7 +220,7 @@ bool AVRExpandPseudo:: expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned DstLoReg, DstHiReg; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool SrcIsKill = MI.getOperand(1).isKill(); bool ImpIsDead = MI.getOperand(3).isDead(); @@ -874,7 +874,7 @@ unsigned AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) { // Exclude all the registers being used by the instruction. for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + !Register::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp index 5e91bb8632c1..e6c48de5a782 100644 --- a/lib/Target/AVR/AVRFrameLowering.cpp +++ b/lib/Target/AVR/AVRFrameLowering.cpp @@ -30,7 +30,8 @@ namespace llvm { AVRFrameLowering::AVRFrameLowering() - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 1, -2) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align::None(), + -2) {} bool AVRFrameLowering::canSimplifyCallFramePseudos( const MachineFunction &MF) const { @@ -323,7 +324,7 @@ static void fixStackStores(MachineBasicBlock &MBB, "Invalid register, should be SP!"); if (insertPushes) { // Replace this instruction with a push. - unsigned SrcReg = MI.getOperand(2).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); bool SrcIsKill = MI.getOperand(2).isKill(); // We can't use PUSHWRr here because when expanded the order of the new diff --git a/lib/Target/AVR/AVRISelDAGToDAG.cpp b/lib/Target/AVR/AVRISelDAGToDAG.cpp index 5cb4441c4380..4c4f4faa0508 100644 --- a/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -251,7 +251,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, RegisterSDNode *RegNode = cast(CopyFromRegOp->getOperand(1)); Reg = RegNode->getReg(); - CanHandleRegImmOpt &= (TargetRegisterInfo::isVirtualRegister(Reg) || + CanHandleRegImmOpt &= (Register::isVirtualRegister(Reg) || AVR::PTRDISPREGSRegClass.contains(Reg)); } else { CanHandleRegImmOpt = false; diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index b6ba5f22fafb..f12c59b7d8c3 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -236,7 +236,7 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, setLibcallName(RTLIB::SIN_F32, "sin"); setLibcallName(RTLIB::COS_F32, "cos"); - setMinFunctionAlignment(1); + setMinFunctionAlignment(Align(2)); setMinimumJumpTableEntries(UINT_MAX); } @@ -1517,11 +1517,11 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, unsigned ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass); unsigned ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass); - unsigned ShiftReg = RI.createVirtualRegister(RC); - unsigned ShiftReg2 = RI.createVirtualRegister(RC); - unsigned ShiftAmtSrcReg = MI.getOperand(2).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register ShiftReg = RI.createVirtualRegister(RC); + Register ShiftReg2 = RI.createVirtualRegister(RC); + Register ShiftAmtSrcReg = MI.getOperand(2).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); // BB: // cpi N, 0 @@ -1568,7 +1568,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, static bool isCopyMulResult(MachineBasicBlock::iterator const &I) { if (I->getOpcode() == AVR::COPY) { - unsigned SrcReg = I->getOperand(1).getReg(); + Register SrcReg = I->getOperand(1).getReg(); return (SrcReg == AVR::R0 || SrcReg == AVR::R1); } @@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html switch (Constraint[0]) { + default: + break; case 'a': // Simple upper registers case 'b': // Base pointer registers pairs case 'd': // Upper register @@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const { case 'O': // Integer constant (Range: 8, 16, 24) case 'P': // Integer constant (Range: 1) case 'R': // Integer constant (Range: -6 to 5)x - return C_Other; - default: - break; + return C_Immediate; } } @@ -2006,10 +2006,9 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -unsigned AVRTargetLowering::getRegisterByName(const char *RegName, - EVT VT, - SelectionDAG &DAG) const { - unsigned Reg; +Register AVRTargetLowering::getRegisterByName(const char *RegName, EVT VT, + const MachineFunction &MF) const { + Register Reg; if (VT == MVT::i8) { Reg = StringSwitch(RegName) diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h index ed2d0835903c..6c722fa5414b 100644 --- a/lib/Target/AVR/AVRISelLowering.h +++ b/lib/Target/AVR/AVRISelLowering.h @@ -125,8 +125,8 @@ public: std::vector &Ops, SelectionDAG &DAG) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const override { diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index a6b36f80485d..8fce05c933bc 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -158,7 +158,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // We need to materialize the offset via an add instruction. unsigned Opcode; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer"); II++; // Skip over the FRMIDX (and now MOVW) instruction. diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp index a36c8b0f9649..25304280d002 100644 --- a/lib/Target/AVR/AVRTargetMachine.cpp +++ b/lib/Target/AVR/AVRTargetMachine.cpp @@ -50,7 +50,7 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), SubTarget(TT, getCPU(CPU), FS, *this) { - this->TLOF = make_unique(); + this->TLOF = std::make_unique(); initAsmInfo(); } diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index aac5644711e2..af60bc4fdc90 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -199,22 +199,22 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - return make_unique(Str, S); + return std::make_unique(Str, S); } static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - return make_unique(RegNum, S, E); + return std::make_unique(RegNum, S, E); } static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - return make_unique(Val, S, E); + return std::make_unique(Val, S, E); } static std::unique_ptr CreateMemri(unsigned RegNum, const MCExpr *Val, SMLoc S, SMLoc E) { - return make_unique(RegNum, Val, S, E); + return std::make_unique(RegNum, Val, S, E); } void makeToken(StringRef Token) { diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp index 6025e4b2437c..1c69fea5962d 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp @@ -152,7 +152,7 @@ unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx, } std::unique_ptr createAVRELFObjectWriter(uint8_t OSABI) { - return make_unique(OSABI); + return std::make_unique(OSABI); } } // end of namespace llvm diff --git a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 75885fd058a7..ce1d2ecd9d26 100644 --- a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -194,7 +194,7 @@ public: } static std::unique_ptr createToken(StringRef Str, SMLoc S) { - auto Op = make_unique(Token); + auto Op = std::make_unique(Token); Op->Tok = Str; Op->StartLoc = S; Op->EndLoc = S; @@ -203,7 +203,7 @@ public: static std::unique_ptr createReg(unsigned RegNo, SMLoc S, SMLoc E) { - auto Op = make_unique(Register); + auto Op = std::make_unique(Register); Op->Reg.RegNum = RegNo; Op->StartLoc = S; Op->EndLoc = E; @@ -212,7 +212,7 @@ public: static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, SMLoc E) { - auto Op = make_unique(Immediate); + auto Op = std::make_unique(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; diff --git a/lib/Target/BPF/BPF.h b/lib/Target/BPF/BPF.h index d311fc154094..6e4f35f4c5d7 100644 --- a/lib/Target/BPF/BPF.h +++ b/lib/Target/BPF/BPF.h @@ -15,17 +15,19 @@ namespace llvm { class BPFTargetMachine; -ModulePass *createBPFAbstractMemberAccess(); +ModulePass *createBPFAbstractMemberAccess(BPFTargetMachine *TM); FunctionPass *createBPFISelDag(BPFTargetMachine &TM); FunctionPass *createBPFMISimplifyPatchablePass(); FunctionPass *createBPFMIPeepholePass(); +FunctionPass *createBPFMIPeepholeTruncElimPass(); FunctionPass *createBPFMIPreEmitPeepholePass(); FunctionPass *createBPFMIPreEmitCheckingPass(); void initializeBPFAbstractMemberAccessPass(PassRegistry&); void initializeBPFMISimplifyPatchablePass(PassRegistry&); void initializeBPFMIPeepholePass(PassRegistry&); +void initializeBPFMIPeepholeTruncElimPass(PassRegistry&); void initializeBPFMIPreEmitPeepholePass(PassRegistry&); void initializeBPFMIPreEmitCheckingPass(PassRegistry&); } diff --git a/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 51d4cbc8a429..400701c4e5c2 100644 --- a/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -50,6 +50,28 @@ // addr = preserve_struct_access_index(base, gep_index, di_index) // !llvm.preserve.access.index // +// Bitfield member access needs special attention. User cannot take the +// address of a bitfield acceess. To facilitate kernel verifier +// for easy bitfield code optimization, a new clang intrinsic is introduced: +// uint32_t __builtin_preserve_field_info(member_access, info_kind) +// In IR, a chain with two (or more) intrinsic calls will be generated: +// ... +// addr = preserve_struct_access_index(base, 1, 1) !struct s +// uint32_t result = bpf_preserve_field_info(addr, info_kind) +// +// Suppose the info_kind is FIELD_SIGNEDNESS, +// The above two IR intrinsics will be replaced with +// a relocatable insn: +// signness = /* signness of member_access */ +// and signness can be changed by bpf loader based on the +// types on the host. +// +// User can also test whether a field exists or not with +// uint32_t result = bpf_preserve_field_info(member_access, FIELD_EXISTENCE) +// The field will be always available (result = 1) during initial +// compilation, but bpf loader can patch with the correct value +// on the target host where the member_access may or may not be available +// //===----------------------------------------------------------------------===// #include "BPF.h" @@ -65,13 +87,12 @@ #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include #define DEBUG_TYPE "bpf-abstract-member-access" namespace llvm { const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama"; -const std::string BPFCoreSharedInfo::PatchableExtSecName = - ".BPF.patchable_externs"; } // namespace llvm using namespace llvm; @@ -87,40 +108,62 @@ class BPFAbstractMemberAccess final : public ModulePass { public: static char ID; - BPFAbstractMemberAccess() : ModulePass(ID) {} + TargetMachine *TM; + // Add optional BPFTargetMachine parameter so that BPF backend can add the phase + // with target machine to find out the endianness. The default constructor (without + // parameters) is used by the pass manager for managing purposes. + BPFAbstractMemberAccess(BPFTargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) {} + + struct CallInfo { + uint32_t Kind; + uint32_t AccessIndex; + MDNode *Metadata; + Value *Base; + }; + typedef std::stack> CallInfoStack; private: enum : uint32_t { BPFPreserveArrayAI = 1, BPFPreserveUnionAI = 2, BPFPreserveStructAI = 3, + BPFPreserveFieldInfoAI = 4, }; std::map GEPGlobals; // A map to link preserve_*_access_index instrinsic calls. - std::map> AIChain; + std::map> AIChain; // A map to hold all the base preserve_*_access_index instrinsic calls. - // The base call is not an input of any other preserve_*_access_index + // The base call is not an input of any other preserve_* // intrinsics. - std::map BaseAICalls; + std::map BaseAICalls; bool doTransformation(Module &M); - void traceAICall(CallInst *Call, uint32_t Kind); - void traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind); - void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind); + void traceAICall(CallInst *Call, CallInfo &ParentInfo); + void traceBitCast(BitCastInst *BitCast, CallInst *Parent, + CallInfo &ParentInfo); + void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, + CallInfo &ParentInfo); void collectAICallChains(Module &M, Function &F); - bool IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind); + bool IsPreserveDIAccessIndexCall(const CallInst *Call, CallInfo &Cinfo); + bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI, + const MDNode *ChildMeta); bool removePreserveAccessIndexIntrinsic(Module &M); void replaceWithGEP(std::vector &CallList, uint32_t NumOfZerosIndex, uint32_t DIIndex); - - Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr, - std::string &AccessKey, uint32_t Kind, - MDNode *&TypeMeta); - bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex); - bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind); + bool HasPreserveFieldInfoCall(CallInfoStack &CallStack); + void GetStorageBitRange(DICompositeType *CTy, DIDerivedType *MemberTy, + uint32_t AccessIndex, uint32_t &StartBitOffset, + uint32_t &EndBitOffset); + uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy, + uint32_t AccessIndex, uint32_t PatchImm); + + Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo, + std::string &AccessKey, MDNode *&BaseMeta); + uint64_t getConstant(const Value *IndexValue); + bool transformGEPChain(Module &M, CallInst *Call, CallInfo &CInfo); }; } // End anonymous namespace @@ -128,23 +171,65 @@ char BPFAbstractMemberAccess::ID = 0; INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE, "abstracting struct/union member accessees", false, false) -ModulePass *llvm::createBPFAbstractMemberAccess() { - return new BPFAbstractMemberAccess(); +ModulePass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) { + return new BPFAbstractMemberAccess(TM); } bool BPFAbstractMemberAccess::runOnModule(Module &M) { LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n"); // Bail out if no debug info. - if (empty(M.debug_compile_units())) + if (M.debug_compile_units().empty()) return false; return doTransformation(M); } +static bool SkipDIDerivedTag(unsigned Tag) { + if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type && + Tag != dwarf::DW_TAG_member) + return false; + return true; +} + +static DIType * stripQualifiers(DIType *Ty) { + while (auto *DTy = dyn_cast(Ty)) { + if (!SkipDIDerivedTag(DTy->getTag())) + break; + Ty = DTy->getBaseType(); + } + return Ty; +} + +static const DIType * stripQualifiers(const DIType *Ty) { + while (auto *DTy = dyn_cast(Ty)) { + if (!SkipDIDerivedTag(DTy->getTag())) + break; + Ty = DTy->getBaseType(); + } + return Ty; +} + +static uint32_t calcArraySize(const DICompositeType *CTy, uint32_t StartDim) { + DINodeArray Elements = CTy->getElements(); + uint32_t DimSize = 1; + for (uint32_t I = StartDim; I < Elements.size(); ++I) { + if (auto *Element = dyn_cast_or_null(Elements[I])) + if (Element->getTag() == dwarf::DW_TAG_subrange_type) { + const DISubrange *SR = cast(Element); + auto *CI = SR->getCount().dyn_cast(); + DimSize *= CI->getSExtValue(); + } + } + + return DimSize; +} + /// Check whether a call is a preserve_*_access_index intrinsic call or not. bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, - uint32_t &Kind) { + CallInfo &CInfo) { if (!Call) return false; @@ -152,15 +237,40 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, if (!GV) return false; if (GV->getName().startswith("llvm.preserve.array.access.index")) { - Kind = BPFPreserveArrayAI; + CInfo.Kind = BPFPreserveArrayAI; + CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); + if (!CInfo.Metadata) + report_fatal_error("Missing metadata for llvm.preserve.array.access.index intrinsic"); + CInfo.AccessIndex = getConstant(Call->getArgOperand(2)); + CInfo.Base = Call->getArgOperand(0); return true; } if (GV->getName().startswith("llvm.preserve.union.access.index")) { - Kind = BPFPreserveUnionAI; + CInfo.Kind = BPFPreserveUnionAI; + CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); + if (!CInfo.Metadata) + report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic"); + CInfo.AccessIndex = getConstant(Call->getArgOperand(1)); + CInfo.Base = Call->getArgOperand(0); return true; } if (GV->getName().startswith("llvm.preserve.struct.access.index")) { - Kind = BPFPreserveStructAI; + CInfo.Kind = BPFPreserveStructAI; + CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); + if (!CInfo.Metadata) + report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic"); + CInfo.AccessIndex = getConstant(Call->getArgOperand(2)); + CInfo.Base = Call->getArgOperand(0); + return true; + } + if (GV->getName().startswith("llvm.bpf.preserve.field.info")) { + CInfo.Kind = BPFPreserveFieldInfoAI; + CInfo.Metadata = nullptr; + // Check validity of info_kind as clang did not check this. + uint64_t InfoKind = getConstant(Call->getArgOperand(1)); + if (InfoKind >= BPFCoreSharedInfo::MAX_FIELD_RELOC_KIND) + report_fatal_error("Incorrect info_kind for llvm.bpf.preserve.field.info intrinsic"); + CInfo.AccessIndex = InfoKind; return true; } @@ -173,8 +283,7 @@ void BPFAbstractMemberAccess::replaceWithGEP(std::vector &CallList, for (auto Call : CallList) { uint32_t Dimension = 1; if (DimensionIndex > 0) - Dimension = cast(Call->getArgOperand(DimensionIndex)) - ->getZExtValue(); + Dimension = getConstant(Call->getArgOperand(DimensionIndex)); Constant *Zero = ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0); @@ -200,14 +309,14 @@ bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) { for (auto &BB : F) for (auto &I : BB) { auto *Call = dyn_cast(&I); - uint32_t Kind; - if (!IsPreserveDIAccessIndexCall(Call, Kind)) + CallInfo CInfo; + if (!IsPreserveDIAccessIndexCall(Call, CInfo)) continue; Found = true; - if (Kind == BPFPreserveArrayAI) + if (CInfo.Kind == BPFPreserveArrayAI) PreserveArrayIndexCalls.push_back(Call); - else if (Kind == BPFPreserveUnionAI) + else if (CInfo.Kind == BPFPreserveUnionAI) PreserveUnionIndexCalls.push_back(Call); else PreserveStructIndexCalls.push_back(Call); @@ -233,79 +342,146 @@ bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) { return Found; } -void BPFAbstractMemberAccess::traceAICall(CallInst *Call, uint32_t Kind) { +/// Check whether the access index chain is valid. We check +/// here because there may be type casts between two +/// access indexes. We want to ensure memory access still valid. +bool BPFAbstractMemberAccess::IsValidAIChain(const MDNode *ParentType, + uint32_t ParentAI, + const MDNode *ChildType) { + if (!ChildType) + return true; // preserve_field_info, no type comparison needed. + + const DIType *PType = stripQualifiers(cast(ParentType)); + const DIType *CType = stripQualifiers(cast(ChildType)); + + // Child is a derived/pointer type, which is due to type casting. + // Pointer type cannot be in the middle of chain. + if (isa(CType)) + return false; + + // Parent is a pointer type. + if (const auto *PtrTy = dyn_cast(PType)) { + if (PtrTy->getTag() != dwarf::DW_TAG_pointer_type) + return false; + return stripQualifiers(PtrTy->getBaseType()) == CType; + } + + // Otherwise, struct/union/array types + const auto *PTy = dyn_cast(PType); + const auto *CTy = dyn_cast(CType); + assert(PTy && CTy && "ParentType or ChildType is null or not composite"); + + uint32_t PTyTag = PTy->getTag(); + assert(PTyTag == dwarf::DW_TAG_array_type || + PTyTag == dwarf::DW_TAG_structure_type || + PTyTag == dwarf::DW_TAG_union_type); + + uint32_t CTyTag = CTy->getTag(); + assert(CTyTag == dwarf::DW_TAG_array_type || + CTyTag == dwarf::DW_TAG_structure_type || + CTyTag == dwarf::DW_TAG_union_type); + + // Multi dimensional arrays, base element should be the same + if (PTyTag == dwarf::DW_TAG_array_type && PTyTag == CTyTag) + return PTy->getBaseType() == CTy->getBaseType(); + + DIType *Ty; + if (PTyTag == dwarf::DW_TAG_array_type) + Ty = PTy->getBaseType(); + else + Ty = dyn_cast(PTy->getElements()[ParentAI]); + + return dyn_cast(stripQualifiers(Ty)) == CTy; +} + +void BPFAbstractMemberAccess::traceAICall(CallInst *Call, + CallInfo &ParentInfo) { for (User *U : Call->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { - traceBitCast(BI, Call, Kind); + traceBitCast(BI, Call, ParentInfo); } else if (auto *CI = dyn_cast(Inst)) { - uint32_t CIKind; - if (IsPreserveDIAccessIndexCall(CI, CIKind)) { - AIChain[CI] = std::make_pair(Call, Kind); - traceAICall(CI, CIKind); + CallInfo ChildInfo; + + if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && + IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, + ChildInfo.Metadata)) { + AIChain[CI] = std::make_pair(Call, ParentInfo); + traceAICall(CI, ChildInfo); } else { - BaseAICalls[Call] = Kind; + BaseAICalls[Call] = ParentInfo; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) - traceGEP(GI, Call, Kind); + traceGEP(GI, Call, ParentInfo); else - BaseAICalls[Call] = Kind; + BaseAICalls[Call] = ParentInfo; + } else { + BaseAICalls[Call] = ParentInfo; } } } void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast, - CallInst *Parent, uint32_t Kind) { + CallInst *Parent, + CallInfo &ParentInfo) { for (User *U : BitCast->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { - traceBitCast(BI, Parent, Kind); + traceBitCast(BI, Parent, ParentInfo); } else if (auto *CI = dyn_cast(Inst)) { - uint32_t CIKind; - if (IsPreserveDIAccessIndexCall(CI, CIKind)) { - AIChain[CI] = std::make_pair(Parent, Kind); - traceAICall(CI, CIKind); + CallInfo ChildInfo; + if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && + IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, + ChildInfo.Metadata)) { + AIChain[CI] = std::make_pair(Parent, ParentInfo); + traceAICall(CI, ChildInfo); } else { - BaseAICalls[Parent] = Kind; + BaseAICalls[Parent] = ParentInfo; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) - traceGEP(GI, Parent, Kind); + traceGEP(GI, Parent, ParentInfo); else - BaseAICalls[Parent] = Kind; + BaseAICalls[Parent] = ParentInfo; + } else { + BaseAICalls[Parent] = ParentInfo; } } } void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent, - uint32_t Kind) { + CallInfo &ParentInfo) { for (User *U : GEP->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { - traceBitCast(BI, Parent, Kind); + traceBitCast(BI, Parent, ParentInfo); } else if (auto *CI = dyn_cast(Inst)) { - uint32_t CIKind; - if (IsPreserveDIAccessIndexCall(CI, CIKind)) { - AIChain[CI] = std::make_pair(Parent, Kind); - traceAICall(CI, CIKind); + CallInfo ChildInfo; + if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && + IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, + ChildInfo.Metadata)) { + AIChain[CI] = std::make_pair(Parent, ParentInfo); + traceAICall(CI, ChildInfo); } else { - BaseAICalls[Parent] = Kind; + BaseAICalls[Parent] = ParentInfo; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) - traceGEP(GI, Parent, Kind); + traceGEP(GI, Parent, ParentInfo); else - BaseAICalls[Parent] = Kind; + BaseAICalls[Parent] = ParentInfo; + } else { + BaseAICalls[Parent] = ParentInfo; } } } @@ -316,92 +492,345 @@ void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) { for (auto &BB : F) for (auto &I : BB) { - uint32_t Kind; + CallInfo CInfo; auto *Call = dyn_cast(&I); - if (!IsPreserveDIAccessIndexCall(Call, Kind) || + if (!IsPreserveDIAccessIndexCall(Call, CInfo) || AIChain.find(Call) != AIChain.end()) continue; - traceAICall(Call, Kind); + traceAICall(Call, CInfo); } } -/// Get access index from the preserve_*_access_index intrinsic calls. -bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue, - uint64_t &AccessIndex) { +uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) { const ConstantInt *CV = dyn_cast(IndexValue); - if (!CV) - return false; + assert(CV); + return CV->getValue().getZExtValue(); +} - AccessIndex = CV->getValue().getZExtValue(); - return true; +/// Get the start and the end of storage offset for \p MemberTy. +/// The storage bits are corresponding to the LLVM internal types, +/// and the storage bits for the member determines what load width +/// to use in order to extract the bitfield value. +void BPFAbstractMemberAccess::GetStorageBitRange(DICompositeType *CTy, + DIDerivedType *MemberTy, + uint32_t AccessIndex, + uint32_t &StartBitOffset, + uint32_t &EndBitOffset) { + auto SOff = dyn_cast(MemberTy->getStorageOffsetInBits()); + assert(SOff); + StartBitOffset = SOff->getZExtValue(); + + EndBitOffset = CTy->getSizeInBits(); + uint32_t Index = AccessIndex + 1; + for (; Index < CTy->getElements().size(); ++Index) { + auto Member = cast(CTy->getElements()[Index]); + if (!Member->getStorageOffsetInBits()) { + EndBitOffset = Member->getOffsetInBits(); + break; + } + SOff = dyn_cast(Member->getStorageOffsetInBits()); + assert(SOff); + unsigned BitOffset = SOff->getZExtValue(); + if (BitOffset != StartBitOffset) { + EndBitOffset = BitOffset; + break; + } + } +} + +uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind, + DICompositeType *CTy, + uint32_t AccessIndex, + uint32_t PatchImm) { + if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE) + return 1; + + uint32_t Tag = CTy->getTag(); + if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_OFFSET) { + if (Tag == dwarf::DW_TAG_array_type) { + auto *EltTy = stripQualifiers(CTy->getBaseType()); + PatchImm += AccessIndex * calcArraySize(CTy, 1) * + (EltTy->getSizeInBits() >> 3); + } else if (Tag == dwarf::DW_TAG_structure_type) { + auto *MemberTy = cast(CTy->getElements()[AccessIndex]); + if (!MemberTy->isBitField()) { + PatchImm += MemberTy->getOffsetInBits() >> 3; + } else { + auto SOffset = dyn_cast(MemberTy->getStorageOffsetInBits()); + assert(SOffset); + PatchImm += SOffset->getZExtValue() >> 3; + } + } + return PatchImm; + } + + if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_SIZE) { + if (Tag == dwarf::DW_TAG_array_type) { + auto *EltTy = stripQualifiers(CTy->getBaseType()); + return calcArraySize(CTy, 1) * (EltTy->getSizeInBits() >> 3); + } else { + auto *MemberTy = cast(CTy->getElements()[AccessIndex]); + uint32_t SizeInBits = MemberTy->getSizeInBits(); + if (!MemberTy->isBitField()) + return SizeInBits >> 3; + + unsigned SBitOffset, NextSBitOffset; + GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset); + SizeInBits = NextSBitOffset - SBitOffset; + if (SizeInBits & (SizeInBits - 1)) + report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info"); + return SizeInBits >> 3; + } + } + + if (InfoKind == BPFCoreSharedInfo::FIELD_SIGNEDNESS) { + const DIType *BaseTy; + if (Tag == dwarf::DW_TAG_array_type) { + // Signedness only checked when final array elements are accessed. + if (CTy->getElements().size() != 1) + report_fatal_error("Invalid array expression for llvm.bpf.preserve.field.info"); + BaseTy = stripQualifiers(CTy->getBaseType()); + } else { + auto *MemberTy = cast(CTy->getElements()[AccessIndex]); + BaseTy = stripQualifiers(MemberTy->getBaseType()); + } + + // Only basic types and enum types have signedness. + const auto *BTy = dyn_cast(BaseTy); + while (!BTy) { + const auto *CompTy = dyn_cast(BaseTy); + // Report an error if the field expression does not have signedness. + if (!CompTy || CompTy->getTag() != dwarf::DW_TAG_enumeration_type) + report_fatal_error("Invalid field expression for llvm.bpf.preserve.field.info"); + BaseTy = stripQualifiers(CompTy->getBaseType()); + BTy = dyn_cast(BaseTy); + } + uint32_t Encoding = BTy->getEncoding(); + return (Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char); + } + + if (InfoKind == BPFCoreSharedInfo::FIELD_LSHIFT_U64) { + // The value is loaded into a value with FIELD_BYTE_SIZE size, + // and then zero or sign extended to U64. + // FIELD_LSHIFT_U64 and FIELD_RSHIFT_U64 are operations + // to extract the original value. + const Triple &Triple = TM->getTargetTriple(); + DIDerivedType *MemberTy = nullptr; + bool IsBitField = false; + uint32_t SizeInBits; + + if (Tag == dwarf::DW_TAG_array_type) { + auto *EltTy = stripQualifiers(CTy->getBaseType()); + SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits(); + } else { + MemberTy = cast(CTy->getElements()[AccessIndex]); + SizeInBits = MemberTy->getSizeInBits(); + IsBitField = MemberTy->isBitField(); + } + + if (!IsBitField) { + if (SizeInBits > 64) + report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); + return 64 - SizeInBits; + } + + unsigned SBitOffset, NextSBitOffset; + GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset); + if (NextSBitOffset - SBitOffset > 64) + report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); + + unsigned OffsetInBits = MemberTy->getOffsetInBits(); + if (Triple.getArch() == Triple::bpfel) + return SBitOffset + 64 - OffsetInBits - SizeInBits; + else + return OffsetInBits + 64 - NextSBitOffset; + } + + if (InfoKind == BPFCoreSharedInfo::FIELD_RSHIFT_U64) { + DIDerivedType *MemberTy = nullptr; + bool IsBitField = false; + uint32_t SizeInBits; + if (Tag == dwarf::DW_TAG_array_type) { + auto *EltTy = stripQualifiers(CTy->getBaseType()); + SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits(); + } else { + MemberTy = cast(CTy->getElements()[AccessIndex]); + SizeInBits = MemberTy->getSizeInBits(); + IsBitField = MemberTy->isBitField(); + } + + if (!IsBitField) { + if (SizeInBits > 64) + report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); + return 64 - SizeInBits; + } + + unsigned SBitOffset, NextSBitOffset; + GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset); + if (NextSBitOffset - SBitOffset > 64) + report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); + + return 64 - SizeInBits; + } + + llvm_unreachable("Unknown llvm.bpf.preserve.field.info info kind"); } -/// Compute the base of the whole preserve_*_access_index chains, i.e., the base +bool BPFAbstractMemberAccess::HasPreserveFieldInfoCall(CallInfoStack &CallStack) { + // This is called in error return path, no need to maintain CallStack. + while (CallStack.size()) { + auto StackElem = CallStack.top(); + if (StackElem.second.Kind == BPFPreserveFieldInfoAI) + return true; + CallStack.pop(); + } + return false; +} + +/// Compute the base of the whole preserve_* intrinsics chains, i.e., the base /// pointer of the first preserve_*_access_index call, and construct the access /// string, which will be the name of a global variable. -Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, - std::string &AccessStr, +Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, + CallInfo &CInfo, std::string &AccessKey, - uint32_t Kind, MDNode *&TypeMeta) { Value *Base = nullptr; - std::vector AccessIndices; - uint64_t TypeNameIndex = 0; - std::string LastTypeName; + std::string TypeName; + CallInfoStack CallStack; + // Put the access chain into a stack with the top as the head of the chain. while (Call) { - // Base of original corresponding GEP - Base = Call->getArgOperand(0); - - // Type Name - std::string TypeName; - MDNode *MDN; - if (Kind == BPFPreserveUnionAI || Kind == BPFPreserveStructAI) { - MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index); - if (!MDN) - return nullptr; - - DIType *Ty = dyn_cast(MDN); - if (!Ty) - return nullptr; + CallStack.push(std::make_pair(Call, CInfo)); + CInfo = AIChain[Call].second; + Call = AIChain[Call].first; + } + // The access offset from the base of the head of chain is also + // calculated here as all debuginfo types are available. + + // Get type name and calculate the first index. + // We only want to get type name from structure or union. + // If user wants a relocation like + // int *p; ... __builtin_preserve_access_index(&p[4]) ... + // or + // int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ... + // we will skip them. + uint32_t FirstIndex = 0; + uint32_t PatchImm = 0; // AccessOffset or the requested field info + uint32_t InfoKind = BPFCoreSharedInfo::FIELD_BYTE_OFFSET; + while (CallStack.size()) { + auto StackElem = CallStack.top(); + Call = StackElem.first; + CInfo = StackElem.second; + + if (!Base) + Base = CInfo.Base; + + DIType *Ty = stripQualifiers(cast(CInfo.Metadata)); + if (CInfo.Kind == BPFPreserveUnionAI || + CInfo.Kind == BPFPreserveStructAI) { + // struct or union type TypeName = Ty->getName(); + TypeMeta = Ty; + PatchImm += FirstIndex * (Ty->getSizeInBits() >> 3); + break; } - // Access Index - uint64_t AccessIndex; - uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2; - if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex)) - return nullptr; - - AccessIndices.push_back(AccessIndex); - if (TypeName.size()) { - TypeNameIndex = AccessIndices.size() - 1; - LastTypeName = TypeName; - TypeMeta = MDN; + assert(CInfo.Kind == BPFPreserveArrayAI); + + // Array entries will always be consumed for accumulative initial index. + CallStack.pop(); + + // BPFPreserveArrayAI + uint64_t AccessIndex = CInfo.AccessIndex; + + DIType *BaseTy = nullptr; + bool CheckElemType = false; + if (const auto *CTy = dyn_cast(Ty)) { + // array type + assert(CTy->getTag() == dwarf::DW_TAG_array_type); + + + FirstIndex += AccessIndex * calcArraySize(CTy, 1); + BaseTy = stripQualifiers(CTy->getBaseType()); + CheckElemType = CTy->getElements().size() == 1; + } else { + // pointer type + auto *DTy = cast(Ty); + assert(DTy->getTag() == dwarf::DW_TAG_pointer_type); + + BaseTy = stripQualifiers(DTy->getBaseType()); + CTy = dyn_cast(BaseTy); + if (!CTy) { + CheckElemType = true; + } else if (CTy->getTag() != dwarf::DW_TAG_array_type) { + FirstIndex += AccessIndex; + CheckElemType = true; + } else { + FirstIndex += AccessIndex * calcArraySize(CTy, 0); + } } - Kind = AIChain[Call].second; - Call = AIChain[Call].first; - } + if (CheckElemType) { + auto *CTy = dyn_cast(BaseTy); + if (!CTy) { + if (HasPreserveFieldInfoCall(CallStack)) + report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic"); + return nullptr; + } - // The intial type name is required. - // FIXME: if the initial type access is an array index, e.g., - // &a[3].b.c, only one dimentional array is supported. - if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2) - return nullptr; + unsigned CTag = CTy->getTag(); + if (CTag == dwarf::DW_TAG_structure_type || CTag == dwarf::DW_TAG_union_type) { + TypeName = CTy->getName(); + } else { + if (HasPreserveFieldInfoCall(CallStack)) + report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic"); + return nullptr; + } + TypeMeta = CTy; + PatchImm += FirstIndex * (CTy->getSizeInBits() >> 3); + break; + } + } + assert(TypeName.size()); + AccessKey += std::to_string(FirstIndex); + + // Traverse the rest of access chain to complete offset calculation + // and access key construction. + while (CallStack.size()) { + auto StackElem = CallStack.top(); + CInfo = StackElem.second; + CallStack.pop(); + + if (CInfo.Kind == BPFPreserveFieldInfoAI) + break; + + // If the next Call (the top of the stack) is a BPFPreserveFieldInfoAI, + // the action will be extracting field info. + if (CallStack.size()) { + auto StackElem2 = CallStack.top(); + CallInfo CInfo2 = StackElem2.second; + if (CInfo2.Kind == BPFPreserveFieldInfoAI) { + InfoKind = CInfo2.AccessIndex; + assert(CallStack.size() == 1); + } + } - // Construct the type string AccessStr. - for (unsigned I = 0; I < AccessIndices.size(); ++I) - AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr; + // Access Index + uint64_t AccessIndex = CInfo.AccessIndex; + AccessKey += ":" + std::to_string(AccessIndex); - if (TypeNameIndex == AccessIndices.size() - 1) - AccessStr = "0:" + AccessStr; + MDNode *MDN = CInfo.Metadata; + // At this stage, it cannot be pointer type. + auto *CTy = cast(stripQualifiers(cast(MDN))); + PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm); + } - // Access key is the type name + access string, uniquely identifying - // one kernel memory access. - AccessKey = LastTypeName + ":" + AccessStr; + // Access key is the type name + reloc type + patched imm + access string, + // uniquely identifying one relocation. + AccessKey = TypeName + ":" + std::to_string(InfoKind) + ":" + + std::to_string(PatchImm) + "$" + AccessKey; return Base; } @@ -409,39 +838,52 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, /// Call/Kind is the base preserve_*_access_index() call. Attempts to do /// transformation to a chain of relocable GEPs. bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, - uint32_t Kind) { - std::string AccessStr, AccessKey; - MDNode *TypeMeta = nullptr; + CallInfo &CInfo) { + std::string AccessKey; + MDNode *TypeMeta; Value *Base = - computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta); + computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta); if (!Base) return false; - // Do the transformation - // For any original GEP Call and Base %2 like - // %4 = bitcast %struct.net_device** %dev1 to i64* - // it is transformed to: - // %6 = load __BTF_0:sk_buff:0:0:2:0: - // %7 = bitcast %struct.sk_buff* %2 to i8* - // %8 = getelementptr i8, i8* %7, %6 - // %9 = bitcast i8* %8 to i64* - // using %9 instead of %4 - // The original Call inst is removed. BasicBlock *BB = Call->getParent(); GlobalVariable *GV; if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { - GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false, - GlobalVariable::ExternalLinkage, NULL, AccessStr); + IntegerType *VarType; + if (CInfo.Kind == BPFPreserveFieldInfoAI) + VarType = Type::getInt32Ty(BB->getContext()); // 32bit return value + else + VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr arith + + GV = new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage, + NULL, AccessKey); GV->addAttribute(BPFCoreSharedInfo::AmaAttr); - // Set the metadata (debuginfo types) for the global. - if (TypeMeta) - GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); + GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); GEPGlobals[AccessKey] = GV; } else { GV = GEPGlobals[AccessKey]; } + if (CInfo.Kind == BPFPreserveFieldInfoAI) { + // Load the global variable which represents the returned field info. + auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV); + BB->getInstList().insert(Call->getIterator(), LDInst); + Call->replaceAllUsesWith(LDInst); + Call->eraseFromParent(); + return true; + } + + // For any original GEP Call and Base %2 like + // %4 = bitcast %struct.net_device** %dev1 to i64* + // it is transformed to: + // %6 = load sk_buff:50:$0:0:0:2:0 + // %7 = bitcast %struct.sk_buff* %2 to i8* + // %8 = getelementptr i8, i8* %7, %6 + // %9 = bitcast i8* %8 to i64* + // using %9 instead of %4 + // The original Call inst is removed. + // Load the global variable. auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV); BB->getInstList().insert(Call->getIterator(), LDInst); diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index e61e73468057..218b0302927c 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -59,7 +59,7 @@ bool BPFAsmPrinter::doInitialization(Module &M) { AsmPrinter::doInitialization(M); // Only emit BTF when debuginfo available. - if (MAI->doesSupportDebugInformation() && !empty(M.debug_compile_units())) { + if (MAI->doesSupportDebugInformation() && !M.debug_compile_units().empty()) { BTF = new BTFDebug(this); Handlers.push_back(HandlerInfo(std::unique_ptr(BTF), "emit", "Debug Info Emission", "BTF", diff --git a/lib/Target/BPF/BPFCORE.h b/lib/Target/BPF/BPFCORE.h index e0950d95f8d7..ed4778353e52 100644 --- a/lib/Target/BPF/BPFCORE.h +++ b/lib/Target/BPF/BPFCORE.h @@ -13,10 +13,18 @@ namespace llvm { class BPFCoreSharedInfo { public: - /// The attribute attached to globals representing a member offset + enum OffsetRelocKind : uint32_t { + FIELD_BYTE_OFFSET = 0, + FIELD_BYTE_SIZE, + FIELD_EXISTENCE, + FIELD_SIGNEDNESS, + FIELD_LSHIFT_U64, + FIELD_RSHIFT_U64, + + MAX_FIELD_RELOC_KIND, + }; + /// The attribute attached to globals representing a field access static const std::string AmaAttr; - /// The section name to identify a patchable external global - static const std::string PatchableExtSecName; }; } // namespace llvm diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h index 2dc6277d2244..a546351ec6cb 100644 --- a/lib/Target/BPF/BPFFrameLowering.h +++ b/lib/Target/BPF/BPFFrameLowering.h @@ -21,7 +21,7 @@ class BPFSubtarget; class BPFFrameLowering : public TargetFrameLowering { public: explicit BPFFrameLowering(const BPFSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0) {} void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index 1bd705c55188..f2be0ff070d2 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -45,9 +45,7 @@ class BPFDAGToDAGISel : public SelectionDAGISel { public: explicit BPFDAGToDAGISel(BPFTargetMachine &TM) - : SelectionDAGISel(TM), Subtarget(nullptr) { - curr_func_ = nullptr; - } + : SelectionDAGISel(TM), Subtarget(nullptr) {} StringRef getPassName() const override { return "BPF DAG->DAG Pattern Instruction Selection"; @@ -92,14 +90,8 @@ private: val_vec_type &Vals, int Offset); bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset, uint64_t Size, unsigned char *ByteSeq); - bool checkLoadDef(unsigned DefReg, unsigned match_load_op); - // Mapping from ConstantStruct global value to corresponding byte-list values std::map cs_vals_; - // Mapping from vreg to load memory opcode - std::map load_to_vreg_; - // Current function - const Function *curr_func_; }; } // namespace @@ -325,32 +317,13 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, } void BPFDAGToDAGISel::PreprocessISelDAG() { - // Iterate through all nodes, interested in the following cases: + // Iterate through all nodes, interested in the following case: // // . loads from ConstantStruct or ConstantArray of constructs // which can be turns into constant itself, with this we can // avoid reading from read-only section at runtime. // - // . reg truncating is often the result of 8/16/32bit->64bit or - // 8/16bit->32bit conversion. If the reg value is loaded with - // masked byte width, the AND operation can be removed since - // BPF LOAD already has zero extension. - // - // This also solved a correctness issue. - // In BPF socket-related program, e.g., __sk_buff->{data, data_end} - // are 32-bit registers, but later on, kernel verifier will rewrite - // it with 64-bit value. Therefore, truncating the value after the - // load will result in incorrect code. - - // clear the load_to_vreg_ map so that we have a clean start - // for this function. - if (!curr_func_) { - curr_func_ = FuncInfo->Fn; - } else if (curr_func_ != FuncInfo->Fn) { - load_to_vreg_.clear(); - curr_func_ = FuncInfo->Fn; - } - + // . Removing redundant AND for intrinsic narrow loads. for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) { @@ -358,8 +331,6 @@ void BPFDAGToDAGISel::PreprocessISelDAG() { unsigned Opcode = Node->getOpcode(); if (Opcode == ISD::LOAD) PreprocessLoad(Node, I); - else if (Opcode == ISD::CopyToReg) - PreprocessCopyToReg(Node); else if (Opcode == ISD::AND) PreprocessTrunc(Node, I); } @@ -491,37 +462,6 @@ bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL, return true; } -void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) { - const RegisterSDNode *RegN = dyn_cast(Node->getOperand(1)); - if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) - return; - - const LoadSDNode *LD = dyn_cast(Node->getOperand(2)); - if (!LD) - return; - - // Assign a load value to a virtual register. record its load width - unsigned mem_load_op = 0; - switch (LD->getMemOperand()->getSize()) { - default: - return; - case 4: - mem_load_op = BPF::LDW; - break; - case 2: - mem_load_op = BPF::LDH; - break; - case 1: - mem_load_op = BPF::LDB; - break; - } - - LLVM_DEBUG(dbgs() << "Find Load Value to VReg " - << TargetRegisterInfo::virtReg2Index(RegN->getReg()) - << '\n'); - load_to_vreg_[RegN->getReg()] = mem_load_op; -} - void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator &I) { ConstantSDNode *MaskN = dyn_cast(Node->getOperand(1)); @@ -535,112 +475,26 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, // which the generic optimizer doesn't understand their results are // zero extended. SDValue BaseV = Node->getOperand(0); - if (BaseV.getOpcode() == ISD::INTRINSIC_W_CHAIN) { - unsigned IntNo = cast(BaseV->getOperand(1))->getZExtValue(); - uint64_t MaskV = MaskN->getZExtValue(); - - if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) || - (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) || - (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF))) - return; - - LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; - Node->dump(); dbgs() << '\n'); - - I--; - CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); - I++; - CurDAG->DeleteNode(Node); - - return; - } - - // Multiple basic blocks case. - if (BaseV.getOpcode() != ISD::CopyFromReg) + if (BaseV.getOpcode() != ISD::INTRINSIC_W_CHAIN) return; - unsigned match_load_op = 0; - switch (MaskN->getZExtValue()) { - default: - return; - case 0xFFFFFFFF: - match_load_op = BPF::LDW; - break; - case 0xFFFF: - match_load_op = BPF::LDH; - break; - case 0xFF: - match_load_op = BPF::LDB; - break; - } + unsigned IntNo = cast(BaseV->getOperand(1))->getZExtValue(); + uint64_t MaskV = MaskN->getZExtValue(); - const RegisterSDNode *RegN = - dyn_cast(BaseV.getNode()->getOperand(1)); - if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) + if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) || + (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) || + (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF))) return; - unsigned AndOpReg = RegN->getReg(); - LLVM_DEBUG(dbgs() << "Examine " << printReg(AndOpReg) << '\n'); - - // Examine the PHI insns in the MachineBasicBlock to found out the - // definitions of this virtual register. At this stage (DAG2DAG - // transformation), only PHI machine insns are available in the machine basic - // block. - MachineBasicBlock *MBB = FuncInfo->MBB; - MachineInstr *MII = nullptr; - for (auto &MI : *MBB) { - for (unsigned i = 0; i < MI.getNumOperands(); ++i) { - const MachineOperand &MOP = MI.getOperand(i); - if (!MOP.isReg() || !MOP.isDef()) - continue; - unsigned Reg = MOP.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && Reg == AndOpReg) { - MII = &MI; - break; - } - } - } - - if (MII == nullptr) { - // No phi definition in this block. - if (!checkLoadDef(AndOpReg, match_load_op)) - return; - } else { - // The PHI node looks like: - // %2 = PHI %0, <%bb.1>, %1, <%bb.3> - // Trace each incoming definition, e.g., (%0, %bb.1) and (%1, %bb.3) - // The AND operation can be removed if both %0 in %bb.1 and %1 in - // %bb.3 are defined with a load matching the MaskN. - LLVM_DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n'); - unsigned PrevReg = -1; - for (unsigned i = 0; i < MII->getNumOperands(); ++i) { - const MachineOperand &MOP = MII->getOperand(i); - if (MOP.isReg()) { - if (MOP.isDef()) - continue; - PrevReg = MOP.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(PrevReg)) - return; - if (!checkLoadDef(PrevReg, match_load_op)) - return; - } - } - } - LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump(); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; + Node->dump(); dbgs() << '\n'); I--; CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); I++; CurDAG->DeleteNode(Node); -} - -bool BPFDAGToDAGISel::checkLoadDef(unsigned DefReg, unsigned match_load_op) { - auto it = load_to_vreg_.find(DefReg); - if (it == load_to_vreg_.end()) - return false; // The definition of register is not exported yet. - return it->second == match_load_op; + return; } FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) { diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index ff69941d26fb..56e0288f26c9 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -132,9 +132,9 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setBooleanContents(ZeroOrOneBooleanContent); - // Function alignments (log2) - setMinFunctionAlignment(3); - setPrefFunctionAlignment(3); + // Function alignments + setMinFunctionAlignment(Align(8)); + setPrefFunctionAlignment(Align(8)); if (BPFExpandMemcpyInOrder) { // LLVM generic code will try to expand memcpy into load/store pairs at this @@ -236,9 +236,8 @@ SDValue BPFTargetLowering::LowerFormalArguments( } case MVT::i32: case MVT::i64: - unsigned VReg = RegInfo.createVirtualRegister(SimpleTy == MVT::i64 ? - &BPF::GPRRegClass : - &BPF::GPR32RegClass); + Register VReg = RegInfo.createVirtualRegister( + SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); @@ -571,9 +570,9 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, DebugLoc DL = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned PromotedReg0 = RegInfo.createVirtualRegister(RC); - unsigned PromotedReg1 = RegInfo.createVirtualRegister(RC); - unsigned PromotedReg2 = RegInfo.createVirtualRegister(RC); + Register PromotedReg0 = RegInfo.createVirtualRegister(RC); + Register PromotedReg1 = RegInfo.createVirtualRegister(RC); + Register PromotedReg2 = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) .addReg(PromotedReg0).addImm(32); @@ -699,7 +698,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, report_fatal_error("unimplemented select CondCode " + Twine(CC)); } - unsigned LHS = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(1).getReg(); bool isSignedCmp = (CC == ISD::SETGT || CC == ISD::SETGE || CC == ISD::SETLT || @@ -716,7 +715,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp); if (isSelectRROp) { - unsigned RHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(2).getReg(); if (is32BitCmp && !HasJmp32) RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp); diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp index 932f718d5490..6de3a4084d3d 100644 --- a/lib/Target/BPF/BPFInstrInfo.cpp +++ b/lib/Target/BPF/BPFInstrInfo.cpp @@ -43,11 +43,11 @@ void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } void BPFInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); uint64_t CopyLen = MI->getOperand(2).getImm(); uint64_t Alignment = MI->getOperand(3).getImm(); - unsigned ScratchReg = MI->getOperand(4).getReg(); + Register ScratchReg = MI->getOperand(4).getReg(); MachineBasicBlock *BB = MI->getParent(); DebugLoc dl = MI->getDebugLoc(); unsigned LdOpc, StOpc; diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index c44702a78ec8..ae5a82a99303 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -473,7 +473,7 @@ class CALL class CALLX : TYPE_ALU_JMP { bits<32> BrDst; diff --git a/lib/Target/BPF/BPFMIChecking.cpp b/lib/Target/BPF/BPFMIChecking.cpp index 4c46289656b4..f82f166eda4d 100644 --- a/lib/Target/BPF/BPFMIChecking.cpp +++ b/lib/Target/BPF/BPFMIChecking.cpp @@ -19,6 +19,7 @@ #include "BPFTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/Target/BPF/BPFMIPeephole.cpp b/lib/Target/BPF/BPFMIPeephole.cpp index 156ba793e359..e9eecc55c3c3 100644 --- a/lib/Target/BPF/BPFMIPeephole.cpp +++ b/lib/Target/BPF/BPFMIPeephole.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -71,7 +72,7 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; MRI = &MF->getRegInfo(); TII = MF->getSubtarget().getInstrInfo(); - LLVM_DEBUG(dbgs() << "*** BPF MachineSSA peephole pass ***\n\n"); + LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n"); } bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) @@ -104,10 +105,10 @@ bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) if (!opnd.isReg()) return false; - unsigned Reg = opnd.getReg(); - if ((TargetRegisterInfo::isVirtualRegister(Reg) && + Register Reg = opnd.getReg(); + if ((Register::isVirtualRegister(Reg) && MRI->getRegClass(Reg) == &BPF::GPRRegClass)) - return false; + return false; } LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); @@ -134,8 +135,8 @@ bool BPFMIPeephole::eliminateZExtSeq(void) { // SRL_ri rB, rB, 32 if (MI.getOpcode() == BPF::SRL_ri && MI.getOperand(2).getImm() == 32) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned ShfReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register ShfReg = MI.getOperand(1).getReg(); MachineInstr *SllMI = MRI->getVRegDef(ShfReg); LLVM_DEBUG(dbgs() << "Starting SRL found:"); @@ -159,7 +160,7 @@ bool BPFMIPeephole::eliminateZExtSeq(void) { LLVM_DEBUG(dbgs() << " Type cast Mov found:"); LLVM_DEBUG(MovMI->dump()); - unsigned SubReg = MovMI->getOperand(1).getReg(); + Register SubReg = MovMI->getOperand(1).getReg(); if (!isMovFrom32Def(MovMI)) { LLVM_DEBUG(dbgs() << " One ZExt elim sequence failed qualifying elim.\n"); @@ -186,7 +187,8 @@ bool BPFMIPeephole::eliminateZExtSeq(void) { } // end default namespace INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, - "BPF MachineSSA Peephole Optimization", false, false) + "BPF MachineSSA Peephole Optimization For ZEXT Eliminate", + false, false) char BPFMIPeephole::ID = 0; FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); } @@ -253,12 +255,16 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) { // enabled. The special type cast insn MOV_32_64 involves different // register class on src (i32) and dst (i64), RA could generate useless // instruction due to this. - if (MI.getOpcode() == BPF::MOV_32_64) { - unsigned dst = MI.getOperand(0).getReg(); - unsigned dst_sub = TRI->getSubReg(dst, BPF::sub_32); - unsigned src = MI.getOperand(1).getReg(); + unsigned Opcode = MI.getOpcode(); + if (Opcode == BPF::MOV_32_64 || + Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) { + Register dst = MI.getOperand(0).getReg(); + Register src = MI.getOperand(1).getReg(); + + if (Opcode == BPF::MOV_32_64) + dst = TRI->getSubReg(dst, BPF::sub_32); - if (dst_sub != src) + if (dst != src) continue; ToErase = &MI; @@ -281,3 +287,177 @@ FunctionPass* llvm::createBPFMIPreEmitPeepholePass() { return new BPFMIPreEmitPeephole(); } + +STATISTIC(TruncElemNum, "Number of truncation eliminated"); + +namespace { + +struct BPFMIPeepholeTruncElim : public MachineFunctionPass { + + static char ID; + const BPFInstrInfo *TII; + MachineFunction *MF; + MachineRegisterInfo *MRI; + + BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) { + initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + bool eliminateTruncSeq(void); + +public: + + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + initialize(MF); + + return eliminateTruncSeq(); + } +}; + +static bool TruncSizeCompatible(int TruncSize, unsigned opcode) +{ + if (TruncSize == 1) + return opcode == BPF::LDB || opcode == BPF::LDB32; + + if (TruncSize == 2) + return opcode == BPF::LDH || opcode == BPF::LDH32; + + if (TruncSize == 4) + return opcode == BPF::LDW || opcode == BPF::LDW32; + + return false; +} + +// Initialize class variables. +void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) { + MF = &MFParm; + MRI = &MF->getRegInfo(); + TII = MF->getSubtarget().getInstrInfo(); + LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n"); +} + +// Reg truncating is often the result of 8/16/32bit->64bit or +// 8/16bit->32bit conversion. If the reg value is loaded with +// masked byte width, the AND operation can be removed since +// BPF LOAD already has zero extension. +// +// This also solved a correctness issue. +// In BPF socket-related program, e.g., __sk_buff->{data, data_end} +// are 32-bit registers, but later on, kernel verifier will rewrite +// it with 64-bit value. Therefore, truncating the value after the +// load will result in incorrect code. +bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) { + MachineInstr* ToErase = nullptr; + bool Eliminated = false; + + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + // The second insn to remove if the eliminate candidate is a pair. + MachineInstr *MI2 = nullptr; + Register DstReg, SrcReg; + MachineInstr *DefMI; + int TruncSize = -1; + + // If the previous instruction was marked for elimination, remove it now. + if (ToErase) { + ToErase->eraseFromParent(); + ToErase = nullptr; + } + + // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate + // for BPF ANDI is i32, and this case only happens on ALU64. + if (MI.getOpcode() == BPF::SRL_ri && + MI.getOperand(2).getImm() == 32) { + SrcReg = MI.getOperand(1).getReg(); + MI2 = MRI->getVRegDef(SrcReg); + DstReg = MI.getOperand(0).getReg(); + + if (!MI2 || + MI2->getOpcode() != BPF::SLL_ri || + MI2->getOperand(2).getImm() != 32) + continue; + + // Update SrcReg. + SrcReg = MI2->getOperand(1).getReg(); + DefMI = MRI->getVRegDef(SrcReg); + if (DefMI) + TruncSize = 4; + } else if (MI.getOpcode() == BPF::AND_ri || + MI.getOpcode() == BPF::AND_ri_32) { + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + DefMI = MRI->getVRegDef(SrcReg); + + if (!DefMI) + continue; + + int64_t imm = MI.getOperand(2).getImm(); + if (imm == 0xff) + TruncSize = 1; + else if (imm == 0xffff) + TruncSize = 2; + } + + if (TruncSize == -1) + continue; + + // The definition is PHI node, check all inputs. + if (DefMI->isPHI()) { + bool CheckFail = false; + + for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { + MachineOperand &opnd = DefMI->getOperand(i); + if (!opnd.isReg()) { + CheckFail = true; + break; + } + + MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); + if (!PhiDef || PhiDef->isPHI() || + !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) { + CheckFail = true; + break; + } + } + + if (CheckFail) + continue; + } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) { + continue; + } + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg) + .addReg(SrcReg); + + if (MI2) + MI2->eraseFromParent(); + + // Mark it to ToErase, and erase in the next iteration. + ToErase = &MI; + TruncElemNum++; + Eliminated = true; + } + } + + return Eliminated; +} + +} // end default namespace + +INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim", + "BPF MachineSSA Peephole Optimization For TRUNC Eliminate", + false, false) + +char BPFMIPeepholeTruncElim::ID = 0; +FunctionPass* llvm::createBPFMIPeepholeTruncElimPass() +{ + return new BPFMIPeepholeTruncElim(); +} diff --git a/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/lib/Target/BPF/BPFMISimplifyPatchable.cpp index e9114d7187e3..9c689aed6417 100644 --- a/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -11,19 +11,15 @@ // ldd r2, r1, 0 // add r3, struct_base_reg, r2 // -// Here @global should either present a AMA (abstruct member access) or -// a patchable extern variable. And these two kinds of accesses -// are subject to bpf load time patching. After this pass, the +// Here @global should represent an AMA (abstruct member access). +// Such an access is subject to bpf load time patching. After this pass, the // code becomes // ld_imm64 r1, @global // add r3, struct_base_reg, r1 // // Eventually, at BTF output stage, a relocation record will be generated // for ld_imm64 which should be replaced later by bpf loader: -// r1 = or -// add r3, struct_base_reg, r1 -// or -// ld_imm64 r1, +// r1 = // add r3, struct_base_reg, r1 // //===----------------------------------------------------------------------===// @@ -34,6 +30,7 @@ #include "BPFTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -100,9 +97,8 @@ bool BPFMISimplifyPatchable::removeLD() { if (!MI.getOperand(2).isImm() || MI.getOperand(2).getImm()) continue; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - int64_t ImmVal = MI.getOperand(2).getImm(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); MachineInstr *DefInst = MRI->getUniqueVRegDef(SrcReg); if (!DefInst) @@ -118,17 +114,8 @@ bool BPFMISimplifyPatchable::removeLD() { // Global variables representing structure offset or // patchable extern globals. if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) { - assert(ImmVal == 0); + assert(MI.getOperand(2).getImm() == 0); IsCandidate = true; - } else if (!GVar->hasInitializer() && GVar->hasExternalLinkage() && - GVar->getSection() == - BPFCoreSharedInfo::PatchableExtSecName) { - if (ImmVal == 0) - IsCandidate = true; - else - errs() << "WARNING: unhandled patchable extern " - << GVar->getName() << " with load offset " << ImmVal - << "\n"; } } } diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 714af06e11d9..8de81a469b84 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -77,7 +77,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - unsigned FrameReg = getFrameRegister(MF); + Register FrameReg = getFrameRegister(MF); int FrameIndex = MI.getOperand(i).getIndex(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); @@ -86,7 +86,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, WarnSize(Offset, MF, DL); MI.getOperand(i).ChangeToRegister(FrameReg, false); - unsigned reg = MI.getOperand(i - 1).getReg(); + Register reg = MI.getOperand(i - 1).getReg(); BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg) .addReg(reg) .addImm(Offset); @@ -105,7 +105,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // architecture does not really support FI_ri, replace it with // MOV_rr , frame_reg // ADD_ri , imm - unsigned reg = MI.getOperand(i - 1).getReg(); + Register reg = MI.getOperand(i - 1).getReg(); BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg) .addReg(FrameReg); diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index 24c0ff0f7f15..0c4f2c74e7a4 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -36,6 +36,7 @@ extern "C" void LLVMInitializeBPFTarget() { PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeBPFAbstractMemberAccessPass(PR); initializeBPFMIPeepholePass(PR); + initializeBPFMIPeepholeTruncElimPass(PR); } // DataLayout: little or big endian @@ -61,7 +62,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); @@ -94,7 +95,7 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) { void BPFPassConfig::addIRPasses() { - addPass(createBPFAbstractMemberAccess()); + addPass(createBPFAbstractMemberAccess(&getBPFTargetMachine())); TargetPassConfig::addIRPasses(); } @@ -115,15 +116,16 @@ void BPFPassConfig::addMachineSSAOptimization() { TargetPassConfig::addMachineSSAOptimization(); const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl(); - if (Subtarget->getHasAlu32() && !DisableMIPeephole) - addPass(createBPFMIPeepholePass()); + if (!DisableMIPeephole) { + if (Subtarget->getHasAlu32()) + addPass(createBPFMIPeepholePass()); + addPass(createBPFMIPeepholeTruncElimPass()); + } } void BPFPassConfig::addPreEmitPass() { - const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl(); - addPass(createBPFMIPreEmitCheckingPass()); if (getOptLevel() != CodeGenOpt::None) - if (Subtarget->getHasAlu32() && !DisableMIPeephole) + if (!DisableMIPeephole) addPass(createBPFMIPreEmitPeepholePass()); } diff --git a/lib/Target/BPF/BTF.h b/lib/Target/BPF/BTF.h index ad56716710a6..a13c862bf840 100644 --- a/lib/Target/BPF/BTF.h +++ b/lib/Target/BPF/BTF.h @@ -17,7 +17,7 @@ /// /// The binary layout for .BTF.ext section: /// struct ExtHeader -/// FuncInfo, LineInfo, OffsetReloc and ExternReloc subsections +/// FuncInfo, LineInfo, FieldReloc and ExternReloc subsections /// The FuncInfo subsection is defined as below: /// BTFFuncInfo Size /// struct SecFuncInfo for ELF section #1 @@ -32,19 +32,12 @@ /// struct SecLineInfo for ELF section #2 /// A number of struct BPFLineInfo for ELF section #2 /// ... -/// The OffsetReloc subsection is defined as below: -/// BPFOffsetReloc Size -/// struct SecOffsetReloc for ELF section #1 -/// A number of struct BPFOffsetReloc for ELF section #1 -/// struct SecOffsetReloc for ELF section #2 -/// A number of struct BPFOffsetReloc for ELF section #2 -/// ... -/// The ExternReloc subsection is defined as below: -/// BPFExternReloc Size -/// struct SecExternReloc for ELF section #1 -/// A number of struct BPFExternReloc for ELF section #1 -/// struct SecExternReloc for ELF section #2 -/// A number of struct BPFExternReloc for ELF section #2 +/// The FieldReloc subsection is defined as below: +/// BPFFieldReloc Size +/// struct SecFieldReloc for ELF section #1 +/// A number of struct BPFFieldReloc for ELF section #1 +/// struct SecFieldReloc for ELF section #2 +/// A number of struct BPFFieldReloc for ELF section #2 /// ... /// /// The section formats are also defined at @@ -63,7 +56,7 @@ enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 }; /// Sizes in bytes of various things in the BTF format. enum { HeaderSize = 24, - ExtHeaderSize = 40, + ExtHeaderSize = 32, CommonTypeSize = 12, BTFArraySize = 12, BTFEnumSize = 8, @@ -72,12 +65,10 @@ enum { BTFDataSecVarSize = 12, SecFuncInfoSize = 8, SecLineInfoSize = 8, - SecOffsetRelocSize = 8, - SecExternRelocSize = 8, + SecFieldRelocSize = 8, BPFFuncInfoSize = 8, BPFLineInfoSize = 16, - BPFOffsetRelocSize = 12, - BPFExternRelocSize = 8, + BPFFieldRelocSize = 16, }; /// The .BTF section header definition. @@ -213,10 +204,8 @@ struct ExtHeader { uint32_t FuncInfoLen; ///< Length of func info section uint32_t LineInfoOff; ///< Offset of line info section uint32_t LineInfoLen; ///< Length of line info section - uint32_t OffsetRelocOff; ///< Offset of offset reloc section - uint32_t OffsetRelocLen; ///< Length of offset reloc section - uint32_t ExternRelocOff; ///< Offset of extern reloc section - uint32_t ExternRelocLen; ///< Length of extern reloc section + uint32_t FieldRelocOff; ///< Offset of offset reloc section + uint32_t FieldRelocLen; ///< Length of offset reloc section }; /// Specifying one function info. @@ -247,28 +236,17 @@ struct SecLineInfo { }; /// Specifying one offset relocation. -struct BPFOffsetReloc { +struct BPFFieldReloc { uint32_t InsnOffset; ///< Byte offset in this section uint32_t TypeID; ///< TypeID for the relocation uint32_t OffsetNameOff; ///< The string to traverse types + uint32_t RelocKind; ///< What to patch the instruction }; /// Specifying offset relocation's in one section. -struct SecOffsetReloc { - uint32_t SecNameOff; ///< Section name index in the .BTF string table - uint32_t NumOffsetReloc; ///< Number of offset reloc's in this section -}; - -/// Specifying one offset relocation. -struct BPFExternReloc { - uint32_t InsnOffset; ///< Byte offset in this section - uint32_t ExternNameOff; ///< The string for external variable -}; - -/// Specifying extern relocation's in one section. -struct SecExternReloc { +struct SecFieldReloc { uint32_t SecNameOff; ///< Section name index in the .BTF string table - uint32_t NumExternReloc; ///< Number of extern reloc's in this section + uint32_t NumFieldReloc; ///< Number of offset reloc's in this section }; } // End namespace BTF. diff --git a/lib/Target/BPF/BTFDebug.cpp b/lib/Target/BPF/BTFDebug.cpp index fa35c6619e21..db551e739bd7 100644 --- a/lib/Target/BPF/BTFDebug.cpp +++ b/lib/Target/BPF/BTFDebug.cpp @@ -184,9 +184,7 @@ void BTFTypeEnum::emitType(MCStreamer &OS) { } } -BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, - uint32_t NumElems) - : ElemSize(ElemSize) { +BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems) { Kind = BTF::BTF_KIND_ARRAY; BTFType.NameOff = 0; BTFType.Info = Kind << 24; @@ -216,12 +214,6 @@ void BTFTypeArray::emitType(MCStreamer &OS) { OS.EmitIntValue(ArrayInfo.Nelems, 4); } -void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset, - uint32_t &ElementTypeId) { - ElementTypeId = ArrayInfo.ElemType; - LocOffset = Loc * ElemSize; -} - /// Represent either a struct or a union. BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField, uint32_t Vlen) @@ -251,7 +243,8 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) { } else { BTFMember.Offset = DDTy->getOffsetInBits(); } - BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType()); + const auto *BaseTy = DDTy->getBaseType(); + BTFMember.Type = BDebug.getTypeId(BaseTy); Members.push_back(BTFMember); } } @@ -268,15 +261,6 @@ void BTFTypeStruct::emitType(MCStreamer &OS) { std::string BTFTypeStruct::getName() { return STy->getName(); } -void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset, - uint32_t &MemberType) { - MemberType = Members[Loc].Type; - MemberOffset = - HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset; -} - -uint32_t BTFTypeStruct::getStructSize() { return STy->getSizeInBits() >> 3; } - /// The Func kind represents both subprogram and pointee of function /// pointers. If the FuncName is empty, it represents a pointee of function /// pointer. Otherwise, it represents a subprogram. The func arg names @@ -428,7 +412,7 @@ void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) { // Create a BTF type instance for this DIBasicType and put it into // DIToIdMap for cross-type reference check. - auto TypeEntry = llvm::make_unique( + auto TypeEntry = std::make_unique( Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName()); TypeId = addType(std::move(TypeEntry), BTy); } @@ -447,7 +431,7 @@ void BTFDebug::visitSubroutineType( // a function pointer has an empty name. The subprogram type will // not be added to DIToIdMap as it should not be referenced by // any other types. - auto TypeEntry = llvm::make_unique(STy, VLen, FuncArgNames); + auto TypeEntry = std::make_unique(STy, VLen, FuncArgNames); if (ForSubprog) TypeId = addType(std::move(TypeEntry)); // For subprogram else @@ -478,7 +462,7 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, } auto TypeEntry = - llvm::make_unique(CTy, IsStruct, HasBitField, VLen); + std::make_unique(CTy, IsStruct, HasBitField, VLen); StructTypes.push_back(TypeEntry.get()); TypeId = addType(std::move(TypeEntry), CTy); @@ -489,35 +473,29 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) { // Visit array element type. - uint32_t ElemTypeId, ElemSize; + uint32_t ElemTypeId; const DIType *ElemType = CTy->getBaseType(); visitTypeEntry(ElemType, ElemTypeId, false, false); - ElemSize = ElemType->getSizeInBits() >> 3; - if (!CTy->getSizeInBits()) { - auto TypeEntry = llvm::make_unique(ElemTypeId, 0, 0); - ArrayTypes.push_back(TypeEntry.get()); - ElemTypeId = addType(std::move(TypeEntry), CTy); - } else { - // Visit array dimensions. - DINodeArray Elements = CTy->getElements(); - for (int I = Elements.size() - 1; I >= 0; --I) { - if (auto *Element = dyn_cast_or_null(Elements[I])) - if (Element->getTag() == dwarf::DW_TAG_subrange_type) { - const DISubrange *SR = cast(Element); - auto *CI = SR->getCount().dyn_cast(); - int64_t Count = CI->getSExtValue(); - - auto TypeEntry = - llvm::make_unique(ElemTypeId, ElemSize, Count); - ArrayTypes.push_back(TypeEntry.get()); - if (I == 0) - ElemTypeId = addType(std::move(TypeEntry), CTy); - else - ElemTypeId = addType(std::move(TypeEntry)); - ElemSize = ElemSize * Count; - } - } + // Visit array dimensions. + DINodeArray Elements = CTy->getElements(); + for (int I = Elements.size() - 1; I >= 0; --I) { + if (auto *Element = dyn_cast_or_null(Elements[I])) + if (Element->getTag() == dwarf::DW_TAG_subrange_type) { + const DISubrange *SR = cast(Element); + auto *CI = SR->getCount().dyn_cast(); + int64_t Count = CI->getSExtValue(); + + // For struct s { int b; char c[]; }, the c[] will be represented + // as an array with Count = -1. + auto TypeEntry = + std::make_unique(ElemTypeId, + Count >= 0 ? Count : 0); + if (I == 0) + ElemTypeId = addType(std::move(TypeEntry), CTy); + else + ElemTypeId = addType(std::move(TypeEntry)); + } } // The array TypeId is the type id of the outermost dimension. @@ -526,7 +504,7 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) { // The IR does not have a type for array index while BTF wants one. // So create an array index type if there is none. if (!ArrayIndexTypeId) { - auto TypeEntry = llvm::make_unique(dwarf::DW_ATE_unsigned, 32, + auto TypeEntry = std::make_unique(dwarf::DW_ATE_unsigned, 32, 0, "__ARRAY_SIZE_TYPE__"); ArrayIndexTypeId = addType(std::move(TypeEntry)); } @@ -538,7 +516,7 @@ void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) { if (VLen > BTF::MAX_VLEN) return; - auto TypeEntry = llvm::make_unique(CTy, VLen); + auto TypeEntry = std::make_unique(CTy, VLen); TypeId = addType(std::move(TypeEntry), CTy); // No need to visit base type as BTF does not encode it. } @@ -546,7 +524,7 @@ void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) { /// Handle structure/union forward declarations. void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion, uint32_t &TypeId) { - auto TypeEntry = llvm::make_unique(CTy->getName(), IsUnion); + auto TypeEntry = std::make_unique(CTy->getName(), IsUnion); TypeId = addType(std::move(TypeEntry), CTy); } @@ -588,7 +566,7 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, /// Find a candidate, generate a fixup. Later on the struct/union /// pointee type will be replaced with either a real type or /// a forward declaration. - auto TypeEntry = llvm::make_unique(DTy, Tag, true); + auto TypeEntry = std::make_unique(DTy, Tag, true); auto &Fixup = FixupDerivedTypes[CTy->getName()]; Fixup.first = CTag == dwarf::DW_TAG_union_type; Fixup.second.push_back(TypeEntry.get()); @@ -602,7 +580,7 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || Tag == dwarf::DW_TAG_restrict_type) { - auto TypeEntry = llvm::make_unique(DTy, Tag, false); + auto TypeEntry = std::make_unique(DTy, Tag, false); TypeId = addType(std::move(TypeEntry), DTy); } else if (Tag != dwarf::DW_TAG_member) { return; @@ -669,7 +647,7 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) { } auto TypeEntry = - llvm::make_unique(CTy, true, HasBitField, Elements.size()); + std::make_unique(CTy, true, HasBitField, Elements.size()); StructTypes.push_back(TypeEntry.get()); TypeId = addType(std::move(TypeEntry), CTy); @@ -774,9 +752,10 @@ void BTFDebug::emitBTFSection() { } void BTFDebug::emitBTFExtSection() { - // Do not emit section if empty FuncInfoTable and LineInfoTable. + // Do not emit section if empty FuncInfoTable and LineInfoTable + // and FieldRelocTable. if (!FuncInfoTable.size() && !LineInfoTable.size() && - !OffsetRelocTable.size() && !ExternRelocTable.size()) + !FieldRelocTable.size()) return; MCContext &Ctx = OS.getContext(); @@ -788,8 +767,8 @@ void BTFDebug::emitBTFExtSection() { // Account for FuncInfo/LineInfo record size as well. uint32_t FuncLen = 4, LineLen = 4; - // Do not account for optional OffsetReloc/ExternReloc. - uint32_t OffsetRelocLen = 0, ExternRelocLen = 0; + // Do not account for optional FieldReloc. + uint32_t FieldRelocLen = 0; for (const auto &FuncSec : FuncInfoTable) { FuncLen += BTF::SecFuncInfoSize; FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize; @@ -798,28 +777,20 @@ void BTFDebug::emitBTFExtSection() { LineLen += BTF::SecLineInfoSize; LineLen += LineSec.second.size() * BTF::BPFLineInfoSize; } - for (const auto &OffsetRelocSec : OffsetRelocTable) { - OffsetRelocLen += BTF::SecOffsetRelocSize; - OffsetRelocLen += OffsetRelocSec.second.size() * BTF::BPFOffsetRelocSize; - } - for (const auto &ExternRelocSec : ExternRelocTable) { - ExternRelocLen += BTF::SecExternRelocSize; - ExternRelocLen += ExternRelocSec.second.size() * BTF::BPFExternRelocSize; + for (const auto &FieldRelocSec : FieldRelocTable) { + FieldRelocLen += BTF::SecFieldRelocSize; + FieldRelocLen += FieldRelocSec.second.size() * BTF::BPFFieldRelocSize; } - if (OffsetRelocLen) - OffsetRelocLen += 4; - if (ExternRelocLen) - ExternRelocLen += 4; + if (FieldRelocLen) + FieldRelocLen += 4; OS.EmitIntValue(0, 4); OS.EmitIntValue(FuncLen, 4); OS.EmitIntValue(FuncLen, 4); OS.EmitIntValue(LineLen, 4); OS.EmitIntValue(FuncLen + LineLen, 4); - OS.EmitIntValue(OffsetRelocLen, 4); - OS.EmitIntValue(FuncLen + LineLen + OffsetRelocLen, 4); - OS.EmitIntValue(ExternRelocLen, 4); + OS.EmitIntValue(FieldRelocLen, 4); // Emit func_info table. OS.AddComment("FuncInfo"); @@ -853,35 +824,20 @@ void BTFDebug::emitBTFExtSection() { } } - // Emit offset reloc table. - if (OffsetRelocLen) { - OS.AddComment("OffsetReloc"); - OS.EmitIntValue(BTF::BPFOffsetRelocSize, 4); - for (const auto &OffsetRelocSec : OffsetRelocTable) { - OS.AddComment("Offset reloc section string offset=" + - std::to_string(OffsetRelocSec.first)); - OS.EmitIntValue(OffsetRelocSec.first, 4); - OS.EmitIntValue(OffsetRelocSec.second.size(), 4); - for (const auto &OffsetRelocInfo : OffsetRelocSec.second) { - Asm->EmitLabelReference(OffsetRelocInfo.Label, 4); - OS.EmitIntValue(OffsetRelocInfo.TypeID, 4); - OS.EmitIntValue(OffsetRelocInfo.OffsetNameOff, 4); - } - } - } - - // Emit extern reloc table. - if (ExternRelocLen) { - OS.AddComment("ExternReloc"); - OS.EmitIntValue(BTF::BPFExternRelocSize, 4); - for (const auto &ExternRelocSec : ExternRelocTable) { - OS.AddComment("Extern reloc section string offset=" + - std::to_string(ExternRelocSec.first)); - OS.EmitIntValue(ExternRelocSec.first, 4); - OS.EmitIntValue(ExternRelocSec.second.size(), 4); - for (const auto &ExternRelocInfo : ExternRelocSec.second) { - Asm->EmitLabelReference(ExternRelocInfo.Label, 4); - OS.EmitIntValue(ExternRelocInfo.ExternNameOff, 4); + // Emit field reloc table. + if (FieldRelocLen) { + OS.AddComment("FieldReloc"); + OS.EmitIntValue(BTF::BPFFieldRelocSize, 4); + for (const auto &FieldRelocSec : FieldRelocTable) { + OS.AddComment("Field reloc section string offset=" + + std::to_string(FieldRelocSec.first)); + OS.EmitIntValue(FieldRelocSec.first, 4); + OS.EmitIntValue(FieldRelocSec.second.size(), 4); + for (const auto &FieldRelocInfo : FieldRelocSec.second) { + Asm->EmitLabelReference(FieldRelocInfo.Label, 4); + OS.EmitIntValue(FieldRelocInfo.TypeID, 4); + OS.EmitIntValue(FieldRelocInfo.OffsetNameOff, 4); + OS.EmitIntValue(FieldRelocInfo.RelocKind, 4); } } } @@ -942,7 +898,7 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) { // Construct subprogram func type auto FuncTypeEntry = - llvm::make_unique(SP->getName(), ProtoTypeId); + std::make_unique(SP->getName(), ProtoTypeId); uint32_t FuncTypeId = addType(std::move(FuncTypeEntry)); for (const auto &TypeEntry : TypeEntries) @@ -980,71 +936,27 @@ unsigned BTFDebug::populateStructType(const DIType *Ty) { return Id; } -// Find struct/array debuginfo types given a type id. -void BTFDebug::setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType, - BTFTypeArray **PrevArrayType) { - for (const auto &StructType : StructTypes) { - if (StructType->getId() == TypeId) { - *PrevStructType = StructType; - return; - } - } - for (const auto &ArrayType : ArrayTypes) { - if (ArrayType->getId() == TypeId) { - *PrevArrayType = ArrayType; - return; - } - } -} - -/// Generate a struct member offset relocation. -void BTFDebug::generateOffsetReloc(const MachineInstr *MI, +/// Generate a struct member field relocation. +void BTFDebug::generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym, DIType *RootTy, StringRef AccessPattern) { - BTFTypeStruct *PrevStructType = nullptr; - BTFTypeArray *PrevArrayType = nullptr; unsigned RootId = populateStructType(RootTy); - setTypeFromId(RootId, &PrevStructType, &PrevArrayType); - unsigned RootTySize = PrevStructType->getStructSize(); - - BTFOffsetReloc OffsetReloc; - OffsetReloc.Label = ORSym; - OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back()); - OffsetReloc.TypeID = RootId; - - uint32_t Start = 0, End = 0, Offset = 0; - bool FirstAccess = true; - for (auto C : AccessPattern) { - if (C != ':') { - End++; - } else { - std::string SubStr = AccessPattern.substr(Start, End - Start); - int Loc = std::stoi(SubStr); - - if (FirstAccess) { - Offset = Loc * RootTySize; - FirstAccess = false; - } else if (PrevStructType) { - uint32_t MemberOffset, MemberTypeId; - PrevStructType->getMemberInfo(Loc, MemberOffset, MemberTypeId); - - Offset += MemberOffset >> 3; - PrevStructType = nullptr; - setTypeFromId(MemberTypeId, &PrevStructType, &PrevArrayType); - } else if (PrevArrayType) { - uint32_t LocOffset, ElementTypeId; - PrevArrayType->getLocInfo(Loc, LocOffset, ElementTypeId); - - Offset += LocOffset; - PrevArrayType = nullptr; - setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType); - } - Start = End + 1; - End = Start; - } - } - AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset; - OffsetRelocTable[SecNameOff].push_back(OffsetReloc); + size_t FirstDollar = AccessPattern.find_first_of('$'); + size_t FirstColon = AccessPattern.find_first_of(':'); + size_t SecondColon = AccessPattern.find_first_of(':', FirstColon + 1); + StringRef IndexPattern = AccessPattern.substr(FirstDollar + 1); + StringRef RelocKindStr = AccessPattern.substr(FirstColon + 1, + SecondColon - FirstColon); + StringRef PatchImmStr = AccessPattern.substr(SecondColon + 1, + FirstDollar - SecondColon); + + BTFFieldReloc FieldReloc; + FieldReloc.Label = ORSym; + FieldReloc.OffsetNameOff = addString(IndexPattern); + FieldReloc.TypeID = RootId; + FieldReloc.RelocKind = std::stoull(RelocKindStr); + PatchImms[AccessPattern.str()] = std::stoul(PatchImmStr); + FieldRelocTable[SecNameOff].push_back(FieldReloc); } void BTFDebug::processLDimm64(const MachineInstr *MI) { @@ -1052,7 +964,7 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) { // will generate an .BTF.ext record. // // If the insn is "r2 = LD_imm64 @__BTF_...", - // add this insn into the .BTF.ext OffsetReloc subsection. + // add this insn into the .BTF.ext FieldReloc subsection. // Relocation looks like: // . SecName: // . InstOffset @@ -1083,16 +995,7 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) { MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = dyn_cast(MDN); - generateOffsetReloc(MI, ORSym, Ty, GVar->getName()); - } else if (GVar && !GVar->hasInitializer() && GVar->hasExternalLinkage() && - GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) { - MCSymbol *ORSym = OS.getContext().createTempSymbol(); - OS.EmitLabel(ORSym); - - BTFExternReloc ExternReloc; - ExternReloc.Label = ORSym; - ExternReloc.ExternNameOff = addString(GVar->getName()); - ExternRelocTable[SecNameOff].push_back(ExternReloc); + generateFieldReloc(MI, ORSym, Ty, GVar->getName()); } } } @@ -1200,12 +1103,12 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { ? BTF::VAR_GLOBAL_ALLOCATED : BTF::VAR_STATIC; auto VarEntry = - llvm::make_unique(Global.getName(), GVTypeId, GVarInfo); + std::make_unique(Global.getName(), GVTypeId, GVarInfo); uint32_t VarId = addType(std::move(VarEntry)); // Find or create a DataSec if (DataSecEntries.find(SecName) == DataSecEntries.end()) { - DataSecEntries[SecName] = llvm::make_unique(Asm, SecName); + DataSecEntries[SecName] = std::make_unique(Asm, SecName); } // Calculate symbol size @@ -1224,30 +1127,12 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) { const GlobalValue *GVal = MO.getGlobal(); auto *GVar = dyn_cast(GVal); if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) { - MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); - DIType *Ty = dyn_cast(MDN); - std::string TypeName = Ty->getName(); - int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()]; - - // Emit "mov ri, " for abstract member accesses. + // Emit "mov ri, " for patched immediate. + uint32_t Imm = PatchImms[GVar->getName().str()]; OutMI.setOpcode(BPF::MOV_ri); OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); OutMI.addOperand(MCOperand::createImm(Imm)); return true; - } else if (GVar && !GVar->hasInitializer() && - GVar->hasExternalLinkage() && - GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) { - const IntegerType *IntTy = dyn_cast(GVar->getValueType()); - assert(IntTy); - // For patchable externals, emit "LD_imm64, ri, 0" if the external - // variable is 64bit width, emit "mov ri, 0" otherwise. - if (IntTy->getBitWidth() == 64) - OutMI.setOpcode(BPF::LD_imm64); - else - OutMI.setOpcode(BPF::MOV_ri); - OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); - OutMI.addOperand(MCOperand::createImm(0)); - return true; } } } @@ -1281,7 +1166,7 @@ void BTFDebug::endModule() { } if (StructTypeId == 0) { - auto FwdTypeEntry = llvm::make_unique(TypeName, IsUnion); + auto FwdTypeEntry = std::make_unique(TypeName, IsUnion); StructTypeId = addType(std::move(FwdTypeEntry)); } diff --git a/lib/Target/BPF/BTFDebug.h b/lib/Target/BPF/BTFDebug.h index 6c0cdde17d9b..c01e0d1d1612 100644 --- a/lib/Target/BPF/BTFDebug.h +++ b/lib/Target/BPF/BTFDebug.h @@ -104,15 +104,13 @@ public: /// Handle array type. class BTFTypeArray : public BTFTypeBase { - uint32_t ElemSize; struct BTF::BTFArray ArrayInfo; public: - BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems); + BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems); uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; } void completeType(BTFDebug &BDebug); void emitType(MCStreamer &OS); - void getLocInfo(uint32_t Loc, uint32_t &LocOffset, uint32_t &ElementTypeId); }; /// Handle struct/union type. @@ -130,8 +128,6 @@ public: void completeType(BTFDebug &BDebug); void emitType(MCStreamer &OS); std::string getName(); - void getMemberInfo(uint32_t Loc, uint32_t &Offset, uint32_t &MemberType); - uint32_t getStructSize(); }; /// Handle function pointer. @@ -199,7 +195,7 @@ class BTFStringTable { /// A mapping from string table offset to the index /// of the Table. It is used to avoid putting /// duplicated strings in the table. - std::unordered_map OffsetToIdMap; + std::map OffsetToIdMap; /// A vector of strings to represent the string table. std::vector Table; @@ -228,16 +224,11 @@ struct BTFLineInfo { }; /// Represent one offset relocation. -struct BTFOffsetReloc { +struct BTFFieldReloc { const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc uint32_t TypeID; ///< Type ID uint32_t OffsetNameOff; ///< The string to traverse types -}; - -/// Represent one extern relocation. -struct BTFExternReloc { - const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc - uint32_t ExternNameOff; ///< The extern variable name + uint32_t RelocKind; ///< What to patch the instruction }; /// Collect and emit BTF information. @@ -253,13 +244,11 @@ class BTFDebug : public DebugHandlerBase { std::unordered_map DIToIdMap; std::map> FuncInfoTable; std::map> LineInfoTable; - std::map> OffsetRelocTable; - std::map> ExternRelocTable; + std::map> FieldRelocTable; StringMap> FileContent; std::map> DataSecEntries; std::vector StructTypes; - std::vector ArrayTypes; - std::map AccessOffsets; + std::map PatchImms; std::map>> FixupDerivedTypes; @@ -305,13 +294,9 @@ class BTFDebug : public DebugHandlerBase { void processGlobals(bool ProcessingMapDef); /// Generate one offset relocation record. - void generateOffsetReloc(const MachineInstr *MI, const MCSymbol *ORSym, + void generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym, DIType *RootTy, StringRef AccessPattern); - /// Set the to-be-traversed Struct/Array Type based on TypeId. - void setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType, - BTFTypeArray **PrevArrayType); - /// Populating unprocessed struct type. unsigned populateStructType(const DIType *Ty); diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index 057bbf5c3b06..ef4e324c3bdd 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -39,7 +39,7 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { // determine the type of the relocation - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); case FK_SecRel_8: @@ -85,5 +85,5 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, std::unique_ptr llvm::createBPFELFObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 0881bf841f90..590c4a2eb69d 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -702,7 +702,7 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) { // Make sure we have a number (false is returned if expression is a number) if (!getParser().parseExpression(Value)) { // Make sure this is a number that is in range - const MCConstantExpr *MCE = dyn_cast(Value); + auto *MCE = cast(Value); uint64_t IntValue = MCE->getValue(); if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue)) return Error(ExprLoc, "literal value out of range (256) for falign"); diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index b7e95caf24fb..efd5ed915127 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -84,7 +84,7 @@ namespace { raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { if (PV.R) - OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R); + OS << 'v' << Register::virtReg2Index(PV.R); else OS << 's'; return OS; @@ -201,7 +201,7 @@ BitTracker::~BitTracker() { bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) { // An example when "meet" can be invoked with SelfR == 0 is a phi node // with a physical register as an operand. - assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR)); + assert(SelfR == 0 || Register::isVirtualRegister(SelfR)); bool Changed = false; for (uint16_t i = 0, n = Bits.size(); i < n; ++i) { const BitValue &RCV = RC[i]; @@ -335,12 +335,13 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { // 1. find a physical register PhysR from the same class as RR.Reg, // 2. find a physical register PhysS that corresponds to PhysR:RR.Sub, // 3. find a register class that contains PhysS. - if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) { + if (Register::isVirtualRegister(RR.Reg)) { const auto &VC = composeWithSubRegIndex(*MRI.getRegClass(RR.Reg), RR.Sub); return TRI.getRegSizeInBits(VC); } - assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg)); - unsigned PhysR = (RR.Sub == 0) ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub); + assert(Register::isPhysicalRegister(RR.Reg)); + Register PhysR = + (RR.Sub == 0) ? Register(RR.Reg) : TRI.getSubReg(RR.Reg, RR.Sub); return getPhysRegBitWidth(PhysR); } @@ -350,10 +351,10 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, // Physical registers are assumed to be present in the map with an unknown // value. Don't actually insert anything in the map, just return the cell. - if (TargetRegisterInfo::isPhysicalRegister(RR.Reg)) + if (Register::isPhysicalRegister(RR.Reg)) return RegisterCell::self(0, BW); - assert(TargetRegisterInfo::isVirtualRegister(RR.Reg)); + assert(Register::isVirtualRegister(RR.Reg)); // For virtual registers that belong to a class that is not tracked, // generate an "unknown" value as well. const TargetRegisterClass *C = MRI.getRegClass(RR.Reg); @@ -376,7 +377,7 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, // While updating the cell map can be done in a meaningful way for // a part of a register, it makes little sense to implement it as the // SSA representation would never contain such "partial definitions". - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return; assert(RR.Sub == 0 && "Unexpected sub-register in definition"); // Eliminate all ref-to-reg-0 bit values: replace them with "self". @@ -711,7 +712,7 @@ BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { } uint16_t BT::MachineEvaluator::getPhysRegBitWidth(unsigned Reg) const { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); const TargetRegisterClass &PC = *TRI.getMinimalPhysRegClass(Reg); return TRI.getRegSizeInBits(PC); } @@ -874,7 +875,7 @@ void BT::visitNonBranch(const MachineInstr &MI) { continue; RegisterRef RD(MO); assert(RD.Sub == 0 && "Unexpected sub-register in definition"); - if (!TargetRegisterInfo::isVirtualRegister(RD.Reg)) + if (!Register::isVirtualRegister(RD.Reg)) continue; bool Changed = false; diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index b07d15609ede..3d771d388e28 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -130,7 +130,7 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!MO.isReg()) return true; - unsigned RegNumber = MO.getReg(); + Register RegNumber = MO.getReg(); // This should be an assert in the frontend. if (Hexagon::DoubleRegsRegClass.contains(RegNumber)) RegNumber = TRI->getSubReg(RegNumber, ExtraCode[0] == 'L' ? diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 7b75d251ccd3..3068fb6f9629 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -147,11 +147,11 @@ namespace { } static inline unsigned v2x(unsigned v) { - return TargetRegisterInfo::virtReg2Index(v); + return Register::virtReg2Index(v); } static inline unsigned x2v(unsigned x) { - return TargetRegisterInfo::index2VirtReg(x); + return Register::index2VirtReg(x); } }; @@ -290,8 +290,8 @@ void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, for (auto &Op : MI.operands()) { if (!Op.isReg() || !Op.isDef()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) continue; Defs.insert(R); } @@ -302,8 +302,8 @@ void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, for (auto &Op : MI.operands()) { if (!Op.isReg() || !Op.isUse()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) continue; Uses.insert(R); } @@ -353,8 +353,7 @@ bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(OldR) || - !TargetRegisterInfo::isVirtualRegister(NewR)) + if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR)) return false; auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); decltype(End) NextI; @@ -367,8 +366,7 @@ bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(OldR) || - !TargetRegisterInfo::isVirtualRegister(NewR)) + if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR)) return false; if (hasTiedUse(OldR, MRI, NewSR)) return false; @@ -384,8 +382,7 @@ bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(OldR) || - !TargetRegisterInfo::isVirtualRegister(NewR)) + if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR)) return false; if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR)) return false; @@ -896,7 +893,7 @@ bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, // register class. const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return nullptr; auto *RC = MRI.getRegClass(RR.Reg); if (RR.Sub == 0) @@ -927,8 +924,8 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( // with a 32-bit register. bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) || - !TargetRegisterInfo::isVirtualRegister(RS.Reg)) + if (!Register::isVirtualRegister(RD.Reg) || + !Register::isVirtualRegister(RS.Reg)) return false; // Return false if one (or both) classes are nullptr. auto *DRC = getFinalVRegClass(RD, MRI); @@ -979,7 +976,7 @@ bool DeadCodeElimination::isDead(unsigned R) const { continue; if (UseI->isPHI()) { assert(!UseI->getOperand(0).getSubReg()); - unsigned DR = UseI->getOperand(0).getReg(); + Register DR = UseI->getOperand(0).getReg(); if (DR == R) continue; } @@ -1018,8 +1015,8 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { for (auto &Op : MI->operands()) { if (!Op.isReg() || !Op.isDef()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) { + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R) || !isDead(R)) { AllDead = false; break; } @@ -1220,8 +1217,8 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { return false; MachineInstr &UseI = *I->getParent(); if (UseI.isPHI() || UseI.isCopy()) { - unsigned DefR = UseI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefR)) + Register DefR = UseI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(DefR)) return false; Pending.push_back(DefR); } else { @@ -1345,7 +1342,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, // If found, replace the instruction with a COPY. const DebugLoc &DL = MI->getDebugLoc(); const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); MachineInstr *CopyI = BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) .addReg(RS.Reg, 0, RS.Sub); @@ -1412,7 +1409,7 @@ bool ConstGeneration::isTfrConst(const MachineInstr &MI) { // register class and the actual value being transferred. unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) { - unsigned Reg = MRI.createVirtualRegister(RC); + Register Reg = MRI.createVirtualRegister(RC); if (RC == &Hexagon::IntRegsRegClass) { BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) .addImm(int32_t(C)); @@ -1470,7 +1467,7 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { if (Defs.count() != 1) continue; unsigned DR = Defs.find_first(); - if (!TargetRegisterInfo::isVirtualRegister(DR)) + if (!Register::isVirtualRegister(DR)) continue; uint64_t U; const BitTracker::RegisterCell &DRC = BT.lookup(DR); @@ -1609,7 +1606,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B, auto *FRC = HBS::getFinalVRegClass(R, MRI); if (findMatch(R, MR, AVB)) { - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) .addReg(MR.Reg, 0, MR.Sub); BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); @@ -1628,7 +1625,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B, BitTracker::RegisterRef ML, MH; if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) { auto *FRC = HBS::getFinalVRegClass(R, MRI); - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR) .addReg(ML.Reg, 0, ML.Sub) .addImm(SubLo) @@ -1819,7 +1816,7 @@ bool BitSimplification::matchHalf(unsigned SelfR, if (Reg == 0 || Reg == SelfR) // Don't match "self". return false; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (!BT.has(Reg)) return false; @@ -2025,7 +2022,7 @@ bool BitSimplification::genPackhl(MachineInstr *MI, return false; MachineBasicBlock &B = *MI->getParent(); - unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + Register NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); DebugLoc DL = MI->getDebugLoc(); auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); @@ -2097,7 +2094,7 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI, MachineBasicBlock &B = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); BuildMI(B, At, DL, HII.get(COpc), NewR) @@ -2154,7 +2151,7 @@ bool BitSimplification::genExtractLow(MachineInstr *MI, if (!validateReg(RS, NewOpc, 1)) continue; - unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR) @@ -2368,7 +2365,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, return true; } } else if (V.is(0) || V.is(1)) { - unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true; BuildMI(B, At, DL, HII.get(NewOpc), NewR); HBS::replaceReg(RD.Reg, NewR, MRI); @@ -2541,7 +2538,7 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI, DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock &B = *MI->getParent(); - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR) @@ -2612,8 +2609,8 @@ bool BitSimplification::simplifyRCmp0(MachineInstr *MI, KnownNZ = true; } - auto ReplaceWithConst = [&] (int C) { - unsigned NewR = MRI.createVirtualRegister(FRC); + auto ReplaceWithConst = [&](int C) { + Register NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR) .addImm(C); HBS::replaceReg(RD.Reg, NewR, MRI); @@ -2678,7 +2675,7 @@ bool BitSimplification::simplifyRCmp0(MachineInstr *MI, // replace the comparison with a C2_muxii, using the same predicate // register, but with operands substituted with 0/1 accordingly. if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) { - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR) .addReg(InpDef->getOperand(1).getReg()) .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi)) @@ -3071,7 +3068,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, DenseMap RegMap; const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); - unsigned PhiR = MRI->createVirtualRegister(PhiRC); + Register PhiR = MRI->createVirtualRegister(PhiRC); BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) .addReg(NewPredR) .addMBB(&PB) @@ -3083,7 +3080,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, const MachineInstr *SI = G.Ins[i-1]; unsigned DR = getDefReg(SI); const TargetRegisterClass *RC = MRI->getRegClass(DR); - unsigned NewDR = MRI->createVirtualRegister(RC); + Register NewDR = MRI->createVirtualRegister(RC); DebugLoc DL = SI->getDebugLoc(); auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); @@ -3162,7 +3159,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { if (Defs.count() != 1) continue; unsigned DefR = Defs.find_first(); - if (!TargetRegisterInfo::isVirtualRegister(DefR)) + if (!Register::isVirtualRegister(DefR)) continue; if (!isBitShuffle(&*I, DefR)) continue; diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index ba50faac2cf9..ebd060ce503e 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -111,7 +111,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { } uint16_t HexagonEvaluator::getPhysRegBitWidth(unsigned Reg) const { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); using namespace Hexagon; const auto &HST = MF.getSubtarget(); @@ -1042,8 +1042,8 @@ unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const { for (const MachineOperand &Op : MI.operands()) { if (!Op.isReg() || !Op.isDef()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) continue; if (DefReg != 0) return 0; @@ -1220,7 +1220,7 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI, RegisterRef RD = MI.getOperand(0); RegisterRef RS = MI.getOperand(1); assert(RD.Sub == 0); - if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg)) + if (!Register::isPhysicalRegister(RS.Reg)) return false; RegExtMap::const_iterator F = VRX.find(RD.Reg); if (F == VRX.end()) diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp index 999150fc8c6e..d1d1b8ee7d41 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -268,14 +268,14 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs( return SRs; } - if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) { + if (Register::isPhysicalRegister(R.Reg)) { MCSubRegIterator I(R.Reg, &TRI); if (!I.isValid()) SRs.insert({R.Reg, 0}); for (; I.isValid(); ++I) SRs.insert({*I, 0}); } else { - assert(TargetRegisterInfo::isVirtualRegister(R.Reg)); + assert(Register::isVirtualRegister(R.Reg)); auto &RC = *MRI.getRegClass(R.Reg); unsigned PReg = *RC.begin(); MCSubRegIndexIterator I(PReg, &TRI); @@ -321,7 +321,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, if (!Op.isReg() || !Op.isUse() || Op.isUndef()) continue; RegisterRef R = { Op.getReg(), Op.getSubReg() }; - if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) + if (Register::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) continue; bool IsKill = Op.isKill(); for (auto S : expandToSubRegs(R, MRI, TRI)) { @@ -338,7 +338,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, continue; RegisterRef R = { Op.getReg(), Op.getSubReg() }; for (auto S : expandToSubRegs(R, MRI, TRI)) { - if (TargetRegisterInfo::isPhysicalRegister(S.Reg) && Reserved[S.Reg]) + if (Register::isPhysicalRegister(S.Reg) && Reserved[S.Reg]) continue; if (Op.isDead()) Clobbers.insert(S); @@ -374,7 +374,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, // Update maps for defs. for (RegisterRef S : Defs) { // Defs should already be expanded into subregs. - assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) || + assert(!Register::isPhysicalRegister(S.Reg) || !MCSubRegIterator(S.Reg, &TRI, false).isValid()); if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None) closeRange(S); @@ -383,7 +383,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, // Update maps for clobbers. for (RegisterRef S : Clobbers) { // Clobbers should already be expanded into subregs. - assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) || + assert(!Register::isPhysicalRegister(S.Reg) || !MCSubRegIterator(S.Reg, &TRI, false).isValid()); if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None) closeRange(S); @@ -482,7 +482,7 @@ HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap( } } for (auto &P : LiveMap) - if (TargetRegisterInfo::isVirtualRegister(P.first.Reg)) + if (Register::isVirtualRegister(P.first.Reg)) addDeadRanges(P.first); LLVM_DEBUG(dbgs() << __func__ << ": dead map\n" diff --git a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp index ee93739b2c7b..08f740806879 100644 --- a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp +++ b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp @@ -105,12 +105,11 @@ void HexagonBranchRelaxation::computeOffset(MachineFunction &MF, // offset of the current instruction from the start. unsigned InstOffset = 0; for (auto &B : MF) { - if (B.getAlignment()) { + if (B.getAlignment() != Align::None()) { // Although we don't know the exact layout of the final code, we need // to account for alignment padding somehow. This heuristic pads each // aligned basic block according to the alignment value. - int ByteAlign = (1u << B.getAlignment()) - 1; - InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); + InstOffset = alignTo(InstOffset, B.getAlignment()); } OffsetMap[&B] = InstOffset; for (auto &MI : B.instrs()) { diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp index cfed0ecef272..ddc9b847ef1c 100644 --- a/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -14,9 +14,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Pass.h" #include #include #include @@ -235,24 +236,24 @@ namespace { Reg = Op.getReg(); Sub = Op.getSubReg(); } else if (Op.isFI()) { - Reg = TargetRegisterInfo::index2StackSlot(Op.getIndex()); + Reg = llvm::Register::index2StackSlot(Op.getIndex()); } return *this; } bool isVReg() const { - return Reg != 0 && !TargetRegisterInfo::isStackSlot(Reg) && - TargetRegisterInfo::isVirtualRegister(Reg); + return Reg != 0 && !llvm::Register::isStackSlot(Reg) && + llvm::Register::isVirtualRegister(Reg); } bool isSlot() const { - return Reg != 0 && TargetRegisterInfo::isStackSlot(Reg); + return Reg != 0 && llvm::Register::isStackSlot(Reg); } operator MachineOperand() const { if (isVReg()) return MachineOperand::CreateReg(Reg, /*Def*/false, /*Imp*/false, /*Kill*/false, /*Dead*/false, /*Undef*/false, /*EarlyClobber*/false, Sub); - if (TargetRegisterInfo::isStackSlot(Reg)) { - int FI = TargetRegisterInfo::stackSlot2Index(Reg); + if (llvm::Register::isStackSlot(Reg)) { + int FI = llvm::Register::stackSlot2Index(Reg); return MachineOperand::CreateFI(FI); } llvm_unreachable("Cannot create MachineOperand"); @@ -1524,7 +1525,7 @@ void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs, } HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) { - unsigned DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + llvm::Register DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); MachineBasicBlock &MBB = *DefL.Block; MachineBasicBlock::iterator At = DefL.At; DebugLoc dl = DefL.Block->findDebugLoc(DefL.At); diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index d1fde5da5fe8..a82501cabb9b 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -208,14 +208,14 @@ namespace { bool has(unsigned R) const { // All non-virtual registers are considered "bottom". - if (!TargetRegisterInfo::isVirtualRegister(R)) + if (!Register::isVirtualRegister(R)) return true; MapType::const_iterator F = Map.find(R); return F != Map.end(); } const LatticeCell &get(unsigned R) const { - if (!TargetRegisterInfo::isVirtualRegister(R)) + if (!Register::isVirtualRegister(R)) return Bottom; MapType::const_iterator F = Map.find(R); if (F != Map.end()) @@ -623,7 +623,7 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) { const MachineOperand &MD = PN.getOperand(0); RegisterSubReg DefR(MD); - assert(TargetRegisterInfo::isVirtualRegister(DefR.Reg)); + assert(Register::isVirtualRegister(DefR.Reg)); bool Changed = false; @@ -652,7 +652,7 @@ Bottomize: RegisterSubReg UseR(SO); // If the input is not a virtual register, we don't really know what // value it holds. - if (!TargetRegisterInfo::isVirtualRegister(UseR.Reg)) + if (!Register::isVirtualRegister(UseR.Reg)) goto Bottomize; // If there is no cell for an input register, it means top. if (!Cells.has(UseR.Reg)) @@ -694,7 +694,7 @@ void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) { continue; RegisterSubReg DefR(MO); // Only track virtual registers. - if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg)) + if (!Register::isVirtualRegister(DefR.Reg)) continue; bool Changed = false; // If the evaluation failed, set cells for all output registers to bottom. @@ -1070,7 +1070,7 @@ bool MachineConstPropagator::run(MachineFunction &MF) { bool MachineConstEvaluator::getCell(const RegisterSubReg &R, const CellMap &Inputs, LatticeCell &RC) { - if (!TargetRegisterInfo::isVirtualRegister(R.Reg)) + if (!Register::isVirtualRegister(R.Reg)) return false; const LatticeCell &L = Inputs.get(R.Reg); if (!R.SubReg) { @@ -1926,7 +1926,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI, unsigned Opc = MI.getOpcode(); RegisterSubReg DefR(MD); assert(!DefR.SubReg); - if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg)) + if (!Register::isVirtualRegister(DefR.Reg)) return false; if (MI.isCopy()) { @@ -2793,7 +2793,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, if (!MO.isReg() || !MO.isUse() || MO.isImplicit()) continue; RegisterSubReg R(MO); - if (!TargetRegisterInfo::isVirtualRegister(R.Reg)) + if (!Register::isVirtualRegister(R.Reg)) continue; HasUse = true; // PHIs can legitimately have "top" cells after propagation. @@ -2813,7 +2813,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse() || MO.isImplicit()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); dbgs() << printReg(R, &TRI) << ": " << Inputs.get(R) << "\n"; } } @@ -2831,8 +2831,8 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; assert(!MO.getSubReg()); assert(Inputs.has(R)); @@ -2871,7 +2871,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, const MCInstrDesc *NewD = (Ps & P::Zero) ? &HII.get(Hexagon::PS_false) : &HII.get(Hexagon::PS_true); - unsigned NewR = MRI->createVirtualRegister(PredRC); + Register NewR = MRI->createVirtualRegister(PredRC); const MachineInstrBuilder &MIB = BuildMI(B, At, DL, *NewD, NewR); (void)MIB; #ifndef NDEBUG @@ -2893,7 +2893,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, NewRC = &Hexagon::IntRegsRegClass; else NewRC = &Hexagon::DoubleRegsRegClass; - unsigned NewR = MRI->createVirtualRegister(NewRC); + Register NewR = MRI->createVirtualRegister(NewRC); const MachineInstr *NewMI; if (W == 32) { @@ -3009,7 +3009,7 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI, if (V < 0) V = -V; const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg); - unsigned NewR = MRI->createVirtualRegister(RC); + Register NewR = MRI->createVirtualRegister(RC); const MachineOperand &Src1 = MI.getOperand(1); NewMI = BuildMI(B, At, DL, D, NewR) .addReg(Src1.getReg(), getRegState(Src1), Src1.getSubReg()) @@ -3111,8 +3111,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI, void HexagonConstEvaluator::replaceAllRegUsesWith(unsigned FromReg, unsigned ToReg) { - assert(TargetRegisterInfo::isVirtualRegister(FromReg)); - assert(TargetRegisterInfo::isVirtualRegister(ToReg)); + assert(Register::isVirtualRegister(FromReg)); + assert(Register::isVirtualRegister(ToReg)); for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) { MachineOperand &O = *I; ++I; diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index a09ccab483cf..394a329ac447 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -133,8 +133,8 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII, const MachineOperand &Op1 = MI.getOperand(1); assert(Op0.isReg() && Op1.isReg()); - unsigned DestReg = Op0.getReg(); - unsigned SrcReg = Op1.getReg(); + Register DestReg = Op0.getReg(); + Register SrcReg = Op1.getReg(); return Hexagon::IntRegsRegClass.contains(DestReg) && Hexagon::IntRegsRegClass.contains(SrcReg); } @@ -146,7 +146,7 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII, const MachineOperand &Op1 = MI.getOperand(1); assert(Op0.isReg()); - unsigned DestReg = Op0.getReg(); + Register DestReg = Op0.getReg(); // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a // workaround for an ABI bug that prevents GOT relocations on combine // instructions @@ -226,7 +226,7 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI, } static bool isEvenReg(unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); if (Hexagon::IntRegsRegClass.contains(Reg)) return (Reg - Hexagon::R0) % 2 == 0; if (Hexagon::HvxVRRegClass.contains(Reg)) @@ -265,7 +265,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, unsigned I1DestReg, unsigned I2DestReg, bool &DoInsertAtI1) { - unsigned I2UseReg = UseReg(I2.getOperand(1)); + Register I2UseReg = UseReg(I2.getOperand(1)); // It is not safe to move I1 and I2 into one combine if I2 has a true // dependence on I1. @@ -332,7 +332,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, // At O3 we got better results (dhrystone) by being more conservative here. if (!ShouldCombineAggressively) End = std::next(MachineBasicBlock::iterator(I2)); - unsigned I1UseReg = UseReg(I1.getOperand(1)); + Register I1UseReg = UseReg(I1.getOperand(1)); // Track killed operands. If we move across an instruction that kills our // operand, we need to update the kill information on the moved I1. It kills // the operand now. @@ -410,7 +410,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { continue; // Look for the defining instruction. - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); MachineInstr *DefInst = LastDef[Reg]; if (!DefInst) continue; @@ -442,7 +442,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { if (Op.isReg()) { if (!Op.isDef() || !Op.getReg()) continue; - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); if (Hexagon::DoubleRegsRegClass.contains(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) LastDef[*SubRegs] = &MI; @@ -528,7 +528,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1, while (I2 != I1.getParent()->end() && I2->isDebugInstr()) ++I2; - unsigned I1DestReg = I1.getOperand(0).getReg(); + Register I1DestReg = I1.getOperand(0).getReg(); for (MachineBasicBlock::iterator End = I1.getParent()->end(); I2 != End; ++I2) { @@ -544,7 +544,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1, if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(&*I2)) continue; - unsigned I2DestReg = I2->getOperand(0).getReg(); + Register I2DestReg = I2->getOperand(0).getReg(); // Check that registers are adjacent and that the first destination register // is even. @@ -579,8 +579,8 @@ void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2, ++MI; // Figure out whether I1 or I2 goes into the lowreg part. - unsigned I1DestReg = I1.getOperand(0).getReg(); - unsigned I2DestReg = I2.getOperand(0).getReg(); + Register I1DestReg = I1.getOperand(0).getReg(); + Register I2DestReg = I2.getOperand(0).getReg(); bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; unsigned SubLo; @@ -758,7 +758,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, unsigned DoubleDestReg, MachineOperand &HiOperand, MachineOperand &LoOperand) { - unsigned LoReg = LoOperand.getReg(); + Register LoReg = LoOperand.getReg(); unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); DebugLoc DL = InsertPt->getDebugLoc(); @@ -807,7 +807,7 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, MachineOperand &HiOperand, MachineOperand &LoOperand) { unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); - unsigned HiReg = HiOperand.getReg(); + Register HiReg = HiOperand.getReg(); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); @@ -857,8 +857,8 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, MachineOperand &LoOperand) { unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); - unsigned LoReg = LoOperand.getReg(); - unsigned HiReg = HiOperand.getReg(); + Register LoReg = LoOperand.getReg(); + Register HiReg = HiOperand.getReg(); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); diff --git a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 2ce1419e4790..e4a2ba0ec29c 100644 --- a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -37,12 +37,12 @@ def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2), (F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1), (A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred:$src2), - (A4_combineri IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -75,8 +75,8 @@ def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2), (A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1), (S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2), @@ -89,10 +89,10 @@ def: Pat<(int_hexagon_C2_cmpgtu IntRegs:$src1, IntRegs:$src2), (C2_cmpgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2), - (A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2), - (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2), (M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1), @@ -103,12 +103,12 @@ def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3), + (S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2), (S4_ntstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1), @@ -125,10 +125,10 @@ def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2), (A4_cmpbgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2), @@ -137,28 +137,28 @@ def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2), (M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2), (A4_cmpbgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2), (A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2), - (A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2), (A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2), (S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2), (S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2), @@ -177,10 +177,10 @@ def: Pat<(int_hexagon_A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), (A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -199,10 +199,10 @@ def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2), (M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2), (C2_bitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2), - (M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2), - (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2), + (M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2), + (M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_boundscheck IntRegs:$src1, DoubleRegs:$src2), (A4_boundscheck IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2), @@ -225,10 +225,10 @@ def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1), (F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1), (S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2), @@ -241,12 +241,12 @@ def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$sr (M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2), (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4), - (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4), + (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2), (S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -259,8 +259,8 @@ def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1), (F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2), - (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1), (A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2), @@ -279,8 +279,8 @@ def: Pat<(int_hexagon_C2_any8 PredRegs:$src1), (C2_any8 PredRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2), (S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1), (F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1), @@ -303,10 +303,10 @@ def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3) (C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -323,34 +323,34 @@ def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleR (M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred:$src1), - (A2_tfrsi s32_0ImmPred:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred_timm:$src1), + (A2_tfrsi s32_0ImmPred_timm:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2), (A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2), (A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2), (A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2), @@ -381,10 +381,10 @@ def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3 (M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3), - (M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3), + (M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2), @@ -453,8 +453,8 @@ def: Pat<(int_hexagon_M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2), - (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1), @@ -469,8 +469,8 @@ def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2), (A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2), - (C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2), + (C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2), (A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2), @@ -535,10 +535,10 @@ def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3 (C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2), (S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -557,30 +557,30 @@ def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src (M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3), + (S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2), (S2_tstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3), - (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3), + (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2), (S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), @@ -605,14 +605,14 @@ def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1), (F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2), (A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2), - (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2), (S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2), @@ -633,8 +633,8 @@ def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3 (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1), (S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3), - (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2), (F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2), @@ -653,8 +653,8 @@ def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1), (F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -669,32 +669,32 @@ def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2), (C2_andn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2), (M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2), - (A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2), (A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2), (A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2), - (A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineir s32_0ImmPred:$src1, IntRegs:$src2), - (A4_combineir s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2), + (A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2), + (A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), (C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -703,8 +703,8 @@ def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2), (A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1), (S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -721,8 +721,8 @@ def: Pat<(int_hexagon_M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2), (C4_nbitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -745,10 +745,10 @@ def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs (M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred:$src1), - (F2_sfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred:$src1), - (F2_sfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred_timm:$src1), + (F2_sfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred_timm:$src1), + (F2_sfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -759,14 +759,14 @@ def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1), (F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2), - (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2), + (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2), (F2_sfcmpuo IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1), (S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -783,8 +783,8 @@ def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_minp DoubleRegs:$src1, DoubleRegs:$src2), (A2_minp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2), @@ -817,16 +817,16 @@ def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2), (S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2), - (C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_tfrcpp CtrRegs64:$src1), (A4_tfrcpp CtrRegs64:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2), - (A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -837,8 +837,8 @@ def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRe (S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2), (S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -851,14 +851,14 @@ def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_satb IntRegs:$src1), (A2_satb IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4), - (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4), + (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2), (S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2), @@ -925,8 +925,8 @@ def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2), (M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3), - (C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3), + (C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -937,8 +937,8 @@ def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -947,8 +947,8 @@ def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -957,16 +957,16 @@ def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs (M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3), - (M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3), + (M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_tfrcrr CtrRegs:$src1), (A2_tfrcrr CtrRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2), (C2_orn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1005,8 +1005,8 @@ def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntR (M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2), (F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2), (A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2), @@ -1017,10 +1017,10 @@ def: Pat<(int_hexagon_A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1), (S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2), @@ -1035,22 +1035,22 @@ def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1), (F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2), - (F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2), + (F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3), - (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2), (M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred:$src2), - (A2_addi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2), (A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2), @@ -1063,8 +1063,8 @@ def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2), (S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -1131,12 +1131,12 @@ def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2), (C2_and PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1), (S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3), + (S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1), (S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -1167,8 +1167,8 @@ def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2), - (A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -1185,26 +1185,26 @@ def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1), (F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred:$src1), - (F2_dfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred_timm:$src1), + (F2_dfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2), (A4_cmphgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred:$src1), - (F2_dfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred_timm:$src1), + (F2_dfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3), - (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3), + (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3), - (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1215,20 +1215,20 @@ def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2), (A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2), (A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred:$src2), - (A2_andir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfrecipa IntRegs:$src1, IntRegs:$src2), (F2_sfrecipa IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2), - (A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2), + (A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2), (A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2), - (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2), - (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2), + (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2), @@ -1251,16 +1251,16 @@ def: Pat<(int_hexagon_A2_satub IntRegs:$src1), (A2_satub IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2), (M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2), (C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2), - (A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred:$src2), - (A2_tfril IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2), + (A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2), + (A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1), (S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1), @@ -1269,14 +1269,14 @@ def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1), (A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2), (F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3), - (C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3), + (C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_swiz IntRegs:$src1), (A2_swiz IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2), @@ -1295,44 +1295,44 @@ def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs (M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3), + (S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2), (A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1), (F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_tfr IntRegs:$src1), (A2_tfr IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_subri s32_0ImmPred:$src1, IntRegs:$src2), - (A2_subri s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2), + (A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2), (M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_brev IntRegs:$src1), (S2_brev IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2), @@ -1343,8 +1343,8 @@ def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1), (S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2), @@ -1357,24 +1357,24 @@ def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4), - (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4), + (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1), (F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred:$src2), - (A2_orir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2), (A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2), (S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2), (A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2), - (M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2), + (M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1397,10 +1397,10 @@ def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1), (F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2), - (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1), (F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -1423,8 +1423,8 @@ def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4), (F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2), - (F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1), (F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1), @@ -1433,16 +1433,16 @@ def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2), - (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2), (C4_cmplteu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2), (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2), (A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2), @@ -1471,14 +1471,14 @@ def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2), (M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2), (A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2), (A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), @@ -1499,8 +1499,8 @@ def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_lsli s6_0ImmPred:$src1, IntRegs:$src2), - (S4_lsli s6_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2), + (S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2), (S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2), @@ -1529,8 +1529,8 @@ def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2), (A4_cmpbeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1), (A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2), (A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsatwuh DoubleRegs:$src1), @@ -1541,10 +1541,10 @@ def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1), (S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2), - (A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2), - (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2), + (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2), (A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1563,12 +1563,12 @@ def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3), + (S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2), (A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2), - (S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2), + (S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1), @@ -1589,16 +1589,16 @@ def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$sr (M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1623,8 +1623,8 @@ def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntReg (M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2), - (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1637,10 +1637,10 @@ def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRe (M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2), (A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2), @@ -1655,8 +1655,8 @@ def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2), (C2_bitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2), (A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), @@ -1673,14 +1673,14 @@ def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2), (M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2), (M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2), - (C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2), + (C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; // V55 Scalar Instructions. @@ -1689,30 +1689,30 @@ def: Pat<(int_hexagon_A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src // V60 Scalar Instructions. -def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; // V62 Scalar Instructions. @@ -1744,8 +1744,8 @@ def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2), (F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>; def: Pat<(int_hexagon_M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV66]>; -def: Pat<(int_hexagon_S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2), - (S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2)>, Requires<[HasV66]>; +def: Pat<(int_hexagon_S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2), + (S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV66]>; // V60 HVX Instructions. @@ -1773,10 +1773,10 @@ def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2), (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2), (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2), (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2), @@ -1789,10 +1789,10 @@ def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2), (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2), (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2), (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2), @@ -2369,10 +2369,10 @@ def: Pat<(int_hexagon_V6_vsubhsat HvxVR:$src1, HvxVR:$src2), (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubhsat_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1), (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1), @@ -2489,10 +2489,10 @@ def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), @@ -2677,10 +2677,10 @@ def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2), (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2), @@ -2721,10 +2721,10 @@ def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2), (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2), @@ -2885,10 +2885,10 @@ def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2), (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2), (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2), @@ -2929,10 +2929,10 @@ def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2), (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2), (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2), @@ -3016,10 +3016,10 @@ def: Pat<(int_hexagon_V6_vlsrb HvxVR:$src1, IntRegs:$src2), (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vlsrb_128B HvxVR:$src1, IntRegs:$src2), (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2), (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2), @@ -3032,10 +3032,10 @@ def: Pat<(int_hexagon_V6_ldtp0 PredRegs:$src1, IntRegs:$src2), (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_ldtp0_128B PredRegs:$src1, IntRegs:$src2), (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2), (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2), @@ -3124,10 +3124,10 @@ def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$sr (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2), (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2), @@ -3188,10 +3188,10 @@ def: Pat<(int_hexagon_V6_ldcnp0 PredRegs:$src1, IntRegs:$src2), (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_ldcnp0_128B PredRegs:$src1, IntRegs:$src2), (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2), (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2), diff --git a/lib/Target/Hexagon/HexagonDepOperands.td b/lib/Target/Hexagon/HexagonDepOperands.td index fdba7b971258..8a94d96522cc 100644 --- a/lib/Target/Hexagon/HexagonDepOperands.td +++ b/lib/Target/Hexagon/HexagonDepOperands.td @@ -8,120 +8,125 @@ // Automatically generated file, please consult code owner before editing. //===----------------------------------------------------------------------===// +multiclass ImmOpPred { + def "" : PatLeaf<(vt imm), pred>; + def _timm : PatLeaf<(vt timm), pred>; +} + def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_0Imm : Operand { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; } -def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>; +defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>; def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s29_3Imm : Operand { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; } -def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>; +defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>; def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; } def u6_0Imm : Operand { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>; +defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>; def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def a30_2Imm : Operand { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; } def u29_3Imm : Operand { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>; +defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>; def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s8_0Imm : Operand { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; } -def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>; +defm s8_0ImmPred : ImmOpPred<[{ return isShiftedInt<8, 0>(N->getSExtValue());}]>; def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; } def u32_0Imm : Operand { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>; +defm u32_0ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>; def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; } def u4_2Imm : Operand { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>; +defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>; def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; } def u3_0Imm : Operand { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>; +defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>; def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b15_2Imm : Operand { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>; +defm b15_2ImmPred : ImmOpPred<[{ return isShiftedInt<15, 2>(N->getSExtValue());}]>; def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; } def u11_3Imm : Operand { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>; +defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>; def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_3Imm : Operand { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; } -def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>; +defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>; def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; } def m32_0Imm : Operand { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; +defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; } def u3_1Imm : Operand { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>; +defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>; def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; } def u1_0Imm : Operand { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>; +defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>; def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s31_1Imm : Operand { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; } -def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>; +defm s31_1ImmPred : ImmOpPred<[{ return isShiftedInt<32, 1>(N->getSExtValue());}]>; def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s3_0Imm : Operand { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; } -def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; +defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s30_2Imm : Operand { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; } -def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm s30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; } def u4_0Imm : Operand { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>; +defm u4_0ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>; def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s6_0Imm : Operand { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; } -def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>; +defm s6_0ImmPred : ImmOpPred<[{ return isShiftedInt<6, 0>(N->getSExtValue());}]>; def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; } def u5_3Imm : Operand { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>; +defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>; def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s32_0Imm : Operand { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; } -def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; +defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s6_3Imm : Operand { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; } -def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>; +defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>; def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; } def u10_0Imm : Operand { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>; +defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>; def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; } def u31_1Imm : Operand { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>; +defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>; def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_1Imm : Operand { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; } -def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>; +defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>; def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; } def u16_0Imm : Operand { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>; +defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>; def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; } def u6_1Imm : Operand { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>; +defm u6_1ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>; def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; } def u5_2Imm : Operand { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>; +defm u5_2ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>; def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; } def u26_6Imm : Operand { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>; +defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>; def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; } def u6_2Imm : Operand { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>; +defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>; def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; } def u7_0Imm : Operand { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>; +defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>; def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b13_2Imm : Operand { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>; +defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>; def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; } def u5_0Imm : Operand { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>; +defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>; def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; } def u2_0Imm : Operand { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>; +defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>; def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_2Imm : Operand { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; } -def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>; +defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>; def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b30_2Imm : Operand { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; } def u8_0Imm : Operand { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>; +defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>; def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; } def u30_2Imm : Operand { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>; +defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>; diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index c1f32e54e98d..0844fb8a8629 100644 --- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -250,7 +250,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, unsigned Opc = T1I->getOpcode(); if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf) return false; - unsigned PredR = T1I->getOperand(0).getReg(); + Register PredR = T1I->getOperand(0).getReg(); // Get the layout successor, or 0 if B does not have one. MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); @@ -384,8 +384,8 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; if (!isPredicate(R)) continue; @@ -401,8 +401,8 @@ bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; const MachineInstr *DefI = MRI->getVRegDef(R); // "Undefined" virtual registers are actually defined via IMPLICIT_DEF. @@ -437,7 +437,7 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { break; if (usesUndefVReg(&MI)) return false; - unsigned DefR = MI.getOperand(0).getReg(); + Register DefR = MI.getOperand(0).getReg(); if (isPredicate(DefR)) return false; } @@ -491,8 +491,8 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs( for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; if (isPredicate(R)) PredDefs++; @@ -798,7 +798,7 @@ unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock *B, const MCInstrDesc &D = HII->get(Opc); DebugLoc DL = B->findBranchDebugLoc(); - unsigned MuxR = MRI->createVirtualRegister(DRC); + Register MuxR = MRI->createVirtualRegister(DRC); BuildMI(*B, At, DL, D, MuxR) .addReg(PredR) .addReg(TR, 0, TSR) @@ -837,7 +837,7 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, unsigned MuxR = 0, MuxSR = 0; if (TR && FR) { - unsigned DR = PN->getOperand(0).getReg(); + Register DR = PN->getOperand(0).getReg(); const TargetRegisterClass *RC = MRI->getRegClass(DR); MuxR = buildMux(FP.SplitB, FP.SplitB->getFirstTerminator(), RC, FP.PredR, TR, TSR, FR, FSR); @@ -988,8 +988,8 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { MachineInstr *PN = &*I; assert(PN->getNumOperands() == 3 && "Invalid phi node"); MachineOperand &UO = PN->getOperand(1); - unsigned UseR = UO.getReg(), UseSR = UO.getSubReg(); - unsigned DefR = PN->getOperand(0).getReg(); + Register UseR = UO.getReg(), UseSR = UO.getSubReg(); + Register DefR = PN->getOperand(0).getReg(); unsigned NewR = UseR; if (UseSR) { // MRI.replaceVregUsesWith does not allow to update the subregister, diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index c343e426ac7d..8984ee82960d 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -285,7 +285,7 @@ bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) { } LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) : MRI->getMaxLaneMaskForVReg(Reg); } @@ -364,7 +364,7 @@ void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); if (Range.empty()) return; @@ -372,8 +372,8 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair { if (!Op.isReg() || !Op.isDef()) return { false, false }; - unsigned DR = Op.getReg(), DSR = Op.getSubReg(); - if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg) + Register DR = Op.getReg(), DSR = Op.getSubReg(); + if (!Register::isVirtualRegister(DR) || DR != Reg) return { false, false }; LaneBitmask SLM = getLaneMask(DR, DSR); LaneBitmask A = SLM & LM; @@ -551,8 +551,8 @@ void HexagonExpandCondsets::updateLiveness(std::set &RegSet, bool Recalc, bool UpdateKills, bool UpdateDeads) { UpdateKills |= UpdateDeads; for (unsigned R : RegSet) { - if (!TargetRegisterInfo::isVirtualRegister(R)) { - assert(TargetRegisterInfo::isPhysicalRegister(R)); + if (!Register::isVirtualRegister(R)) { + assert(Register::isPhysicalRegister(R)); // There shouldn't be any physical registers as operands, except // possibly reserved registers. assert(MRI->isReserved(R)); @@ -579,17 +579,17 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, using namespace Hexagon; if (SO.isReg()) { - unsigned PhysR; + Register PhysR; RegisterRef RS = SO; - if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) { + if (Register::isVirtualRegister(RS.Reg)) { const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); assert(VC->begin() != VC->end() && "Empty register class"); PhysR = *VC->begin(); } else { - assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg)); + assert(Register::isPhysicalRegister(RS.Reg)); PhysR = RS.Reg; } - unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); + Register PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); switch (TRI->getRegSizeInBits(*RC)) { case 32: @@ -671,7 +671,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, MachineOperand &MD = MI.getOperand(0); // Definition MachineOperand &MP = MI.getOperand(1); // Predicate register assert(MD.isDef()); - unsigned DR = MD.getReg(), DSR = MD.getSubReg(); + Register DR = MD.getReg(), DSR = MD.getSubReg(); bool ReadUndef = MD.isUndef(); MachineBasicBlock::iterator At = MI; @@ -802,7 +802,7 @@ bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs, // For physical register we would need to check register aliases, etc. // and we don't want to bother with that. It would be of little value // before the actual register rewriting (from virtual to physical). - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return false; // No redefs for any operand. if (isRefInMap(RR, Defs, Exec_Then)) @@ -954,7 +954,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, return false; RegisterRef RT(MS); - unsigned PredR = MP.getReg(); + Register PredR = MP.getReg(); MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); if (!DefI || !isPredicable(DefI)) return false; @@ -999,7 +999,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, // subregisters are other physical registers, and we are not checking // that. RegisterRef RR = Op; - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return false; ReferenceMap &Map = Op.isDef() ? Defs : Uses; @@ -1091,7 +1091,7 @@ bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, } bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return false; const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); if (RC == &Hexagon::IntRegsRegClass) { diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index f7edc168de4a..d21de8ccb5ab 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -114,12 +114,11 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { // First pass - compute the offset of each basic block. for (const MachineBasicBlock &MBB : MF) { - if (MBB.getAlignment()) { + if (MBB.getAlignment() != Align::None()) { // Although we don't know the exact layout of the final code, we need // to account for alignment padding somehow. This heuristic pads each // aligned basic block according to the alignment value. - int ByteAlign = (1u << MBB.getAlignment()) - 1; - InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); + InstOffset = alignTo(InstOffset, MBB.getAlignment()); } BlockToInstOffset[&MBB] = InstOffset; diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 3368ee4fb3b9..bfa3372d7faf 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -303,10 +303,10 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, if (MO.isFI()) return true; if (MO.isReg()) { - unsigned R = MO.getReg(); + Register R = MO.getReg(); // Virtual registers will need scavenging, which then may require // a stack slot. - if (TargetRegisterInfo::isVirtualRegister(R)) + if (Register::isVirtualRegister(R)) return true; for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S) if (CSR[*S]) @@ -973,8 +973,8 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, // understand paired registers for cfi_offset. // Eg .cfi_offset r1:0, -64 - unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi); - unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo); + Register HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi); + Register LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo); unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, @@ -1377,10 +1377,10 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized( } MFI.setLocalFrameSize(LFS); - unsigned A = MFI.getLocalFrameMaxAlign(); + Align A = MFI.getLocalFrameMaxAlign(); assert(A <= 8 && "Unexpected local frame alignment"); - if (A == 0) - MFI.setLocalFrameMaxAlign(8); + if (A == 1) + MFI.setLocalFrameMaxAlign(Align(8)); MFI.setUseLocalStackAllocationBlock(true); // Set the physical aligned-stack base address register. @@ -1570,13 +1570,13 @@ bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B, const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { MachineInstr *MI = &*It; DebugLoc DL = MI->getDebugLoc(); - unsigned DstR = MI->getOperand(0).getReg(); - unsigned SrcR = MI->getOperand(1).getReg(); + Register DstR = MI->getOperand(0).getReg(); + Register SrcR = MI->getOperand(1).getReg(); if (!Hexagon::ModRegsRegClass.contains(DstR) || !Hexagon::ModRegsRegClass.contains(SrcR)) return false; - unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1)); BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR) .addReg(TmpR, RegState::Kill); @@ -1595,13 +1595,13 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B, DebugLoc DL = MI->getDebugLoc(); unsigned Opc = MI->getOpcode(); - unsigned SrcR = MI->getOperand(2).getReg(); + Register SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); // TmpR = C2_tfrpr SrcR if SrcR is a predicate register // TmpR = A2_tfrcrr SrcR if SrcR is a modifier register - unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr : Hexagon::A2_tfrcrr; BuildMI(B, It, DL, HII.get(TfrOpc), TmpR) @@ -1628,11 +1628,11 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B, DebugLoc DL = MI->getDebugLoc(); unsigned Opc = MI->getOpcode(); - unsigned DstR = MI->getOperand(0).getReg(); + Register DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); // TmpR = L2_loadri_io FI, 0 - unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR) .addFrameIndex(FI) .addImm(0) @@ -1658,7 +1658,7 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, return false; DebugLoc DL = MI->getDebugLoc(); - unsigned SrcR = MI->getOperand(2).getReg(); + Register SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); auto *RC = &Hexagon::HvxVRRegClass; @@ -1667,8 +1667,8 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, // TmpR0 = A2_tfrsi 0x01010101 // TmpR1 = V6_vandqrt Qx, TmpR0 // store FI, 0, TmpR1 - unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - unsigned TmpR1 = MRI.createVirtualRegister(RC); + Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR1 = MRI.createVirtualRegister(RC); BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) .addImm(0x01010101); @@ -1695,15 +1695,15 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, return false; DebugLoc DL = MI->getDebugLoc(); - unsigned DstR = MI->getOperand(0).getReg(); + Register DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); auto *RC = &Hexagon::HvxVRRegClass; // TmpR0 = A2_tfrsi 0x01010101 // TmpR1 = load FI, 0 // DstR = V6_vandvrt TmpR1, TmpR0 - unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - unsigned TmpR1 = MRI.createVirtualRegister(RC); + Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR1 = MRI.createVirtualRegister(RC); BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) .addImm(0x01010101); @@ -1745,9 +1745,9 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, } DebugLoc DL = MI->getDebugLoc(); - unsigned SrcR = MI->getOperand(2).getReg(); - unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo); - unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi); + Register SrcR = MI->getOperand(2).getReg(); + Register SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo); + Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); @@ -1793,9 +1793,9 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, return false; DebugLoc DL = MI->getDebugLoc(); - unsigned DstR = MI->getOperand(0).getReg(); - unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi); - unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo); + Register DstR = MI->getOperand(0).getReg(); + Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi); + Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo); int FI = MI->getOperand(1).getIndex(); unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass); @@ -1834,7 +1834,7 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, auto &HRI = *MF.getSubtarget().getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); - unsigned SrcR = MI->getOperand(2).getReg(); + Register SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); @@ -1863,7 +1863,7 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, auto &HRI = *MF.getSubtarget().getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); - unsigned DstR = MI->getOperand(0).getReg(); + Register DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); @@ -2299,7 +2299,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, int TFI; if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI) continue; - unsigned DstR = MI.getOperand(0).getReg(); + Register DstR = MI.getOperand(0).getReg(); assert(MI.getOperand(0).getSubReg() == 0); MachineInstr *CopyOut = nullptr; if (DstR != FoundR) { diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 65e8c7686640..27265dd53794 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -30,7 +30,7 @@ class TargetRegisterClass; class HexagonFrameLowering : public TargetFrameLowering { public: explicit HexagonFrameLowering() - : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {} + : TargetFrameLowering(StackGrowsDown, Align(8), 0, Align::None(), true) {} // All of the prolog/epilog functionality, including saving and restoring // callee-saved registers is handled in emitPrologue. This is to have the diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp index 3417c74e359b..caa0e4d80397 100644 --- a/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -184,7 +184,7 @@ bool HexagonGenExtract::convert(Instruction *In) { // The width of the extracted field is the minimum of the original bits // that remain after the shifts and the number of contiguous 1s in the mask. uint32_t W = std::min(U, T); - if (W == 0) + if (W == 0 || W == 1) return false; // Check if the extracted bits are contained within the mask that it is diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index 81025c1c5325..48881e02f4d3 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -163,11 +163,11 @@ namespace { } static inline unsigned v2x(unsigned v) { - return TargetRegisterInfo::virtReg2Index(v); + return Register::virtReg2Index(v); } static inline unsigned x2v(unsigned x) { - return TargetRegisterInfo::index2VirtReg(x); + return Register::index2VirtReg(x); } }; @@ -267,7 +267,7 @@ namespace { CellMapShadow(const BitTracker &T) : BT(T) {} const BitTracker::RegisterCell &lookup(unsigned VR) { - unsigned RInd = TargetRegisterInfo::virtReg2Index(VR); + unsigned RInd = Register::virtReg2Index(VR); // Grow the vector to at least 32 elements. if (RInd >= CVect.size()) CVect.resize(std::max(RInd+16, 32U), nullptr); @@ -606,9 +606,9 @@ void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { - unsigned R = MO.getReg(); + Register R = MO.getReg(); assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); - if (TargetRegisterInfo::isVirtualRegister(R)) + if (Register::isVirtualRegister(R)) RO.insert(std::make_pair(R, Index++)); } } @@ -724,8 +724,8 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; Defs.insert(R); } @@ -737,8 +737,8 @@ void HexagonGenInsert::getInstrUses(const MachineInstr *MI, const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; Uses.insert(R); } @@ -1399,7 +1399,7 @@ bool HexagonGenInsert::generateInserts() { for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { unsigned VR = I->first; const TargetRegisterClass *RC = MRI->getRegClass(VR); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); RegMap[VR] = NewVR; } @@ -1477,9 +1477,8 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R) || - !MRI->use_nodbg_empty(R)) { + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R) || !MRI->use_nodbg_empty(R)) { AllDead = false; break; } @@ -1598,7 +1597,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { IterListType Out; for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first); + unsigned Idx = Register::virtReg2Index(I->first); if (Idx >= Cutoff) Out.push_back(I); } diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index cdafbc20ab86..b559e7bbbb60 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -171,7 +171,7 @@ void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isImplicit()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); BitVector &Set = MO.isDef() ? Defs : Uses; expandReg(R, Set); } @@ -239,14 +239,14 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned Opc = MI->getOpcode(); if (!isCondTransfer(Opc)) continue; - unsigned DR = MI->getOperand(0).getReg(); + Register DR = MI->getOperand(0).getReg(); if (isRegPair(DR)) continue; MachineOperand &PredOp = MI->getOperand(1); if (PredOp.isUndef()) continue; - unsigned PR = PredOp.getReg(); + Register PR = PredOp.getReg(); unsigned Idx = I2X.lookup(MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp index e991fa8b61c8..24d33c91a29b 100644 --- a/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -133,7 +133,7 @@ INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred", "Hexagon generate predicate operations", false, false) bool HexagonGenPredicate::isPredReg(unsigned R) { - if (!TargetRegisterInfo::isVirtualRegister(R)) + if (!Register::isVirtualRegister(R)) return false; const TargetRegisterClass *RC = MRI->getRegClass(R); return RC == &Hexagon::PredRegsRegClass; @@ -213,7 +213,7 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) { case TargetOpcode::COPY: if (isPredReg(MI->getOperand(1).getReg())) { RegisterSubReg RD = MI->getOperand(0); - if (TargetRegisterInfo::isVirtualRegister(RD.R)) + if (Register::isVirtualRegister(RD.R)) PredGPRs.insert(RD); } break; @@ -245,7 +245,7 @@ RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) { // Create a predicate register for a given Reg. The newly created register // will have its value copied from Reg, so that it can be later used as // an operand in other instructions. - assert(TargetRegisterInfo::isVirtualRegister(Reg.R)); + assert(Register::isVirtualRegister(Reg.R)); RegToRegMap::iterator F = G2P.find(Reg); if (F != G2P.end()) return F->second; @@ -265,7 +265,7 @@ RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) { MachineBasicBlock &B = *DefI->getParent(); DebugLoc DL = DefI->getDebugLoc(); const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; - unsigned NewPR = MRI->createVirtualRegister(PredRC); + Register NewPR = MRI->createVirtualRegister(PredRC); // For convertible instructions, do not modify them, so that they can // be converted later. Generate a copy from Reg to NewPR. @@ -432,7 +432,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR // with NewGPR. const TargetRegisterClass *RC = MRI->getRegClass(OutR.R); - unsigned NewOutR = MRI->createVirtualRegister(RC); + Register NewOutR = MRI->createVirtualRegister(RC); BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR) .addReg(NewPR.R, 0, NewPR.S); MRI->replaceRegWith(OutR.R, NewOutR); @@ -471,9 +471,9 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { continue; RegisterSubReg DR = MI.getOperand(0); RegisterSubReg SR = MI.getOperand(1); - if (!TargetRegisterInfo::isVirtualRegister(DR.R)) + if (!Register::isVirtualRegister(DR.R)) continue; - if (!TargetRegisterInfo::isVirtualRegister(SR.R)) + if (!Register::isVirtualRegister(SR.R)) continue; if (MRI->getRegClass(DR.R) != PredRC) continue; diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index cecbaedb6d70..62291790f0fe 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -435,17 +435,17 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, if (Phi->getOperand(i+1).getMBB() != Latch) continue; - unsigned PhiOpReg = Phi->getOperand(i).getReg(); + Register PhiOpReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiOpReg); if (DI->getDesc().isAdd()) { // If the register operand to the add is the PHI we're looking at, this // meets the induction pattern. - unsigned IndReg = DI->getOperand(1).getReg(); + Register IndReg = DI->getOperand(1).getReg(); MachineOperand &Opnd2 = DI->getOperand(2); int64_t V; if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { - unsigned UpdReg = DI->getOperand(0).getReg(); + Register UpdReg = DI->getOperand(0).getReg(); IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } } @@ -694,7 +694,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, Cmp = Comparison::getSwappedComparison(Cmp); if (InitialValue->isReg()) { - unsigned R = InitialValue->getReg(); + Register R = InitialValue->getReg(); MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); if (!MDT->properlyDominates(DefBB, Header)) { int64_t V; @@ -704,7 +704,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, OldInsts.push_back(MRI->getVRegDef(R)); } if (EndValue->isReg()) { - unsigned R = EndValue->getReg(); + Register R = EndValue->getReg(); MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); if (!MDT->properlyDominates(DefBB, Header)) { int64_t V; @@ -910,7 +910,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, (RegToImm ? TII->get(Hexagon::A2_subri) : TII->get(Hexagon::A2_addi)); if (RegToReg || RegToImm) { - unsigned SubR = MRI->createVirtualRegister(IntRC); + Register SubR = MRI->createVirtualRegister(IntRC); MachineInstrBuilder SubIB = BuildMI(*PH, InsertPos, DL, SubD, SubR); @@ -931,7 +931,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, EndValInstr->getOperand(2).getImm() == StartV) { DistR = EndValInstr->getOperand(1).getReg(); } else { - unsigned SubR = MRI->createVirtualRegister(IntRC); + Register SubR = MRI->createVirtualRegister(IntRC); MachineInstrBuilder SubIB = BuildMI(*PH, InsertPos, DL, SubD, SubR); SubIB.addReg(End->getReg(), 0, End->getSubReg()) @@ -950,7 +950,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, AdjSR = DistSR; } else { // Generate CountR = ADD DistR, AdjVal - unsigned AddR = MRI->createVirtualRegister(IntRC); + Register AddR = MRI->createVirtualRegister(IntRC); MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); BuildMI(*PH, InsertPos, DL, AddD, AddR) .addReg(DistR, 0, DistSR) @@ -971,7 +971,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, unsigned Shift = Log2_32(IVBump); // Generate NormR = LSR DistR, Shift. - unsigned LsrR = MRI->createVirtualRegister(IntRC); + Register LsrR = MRI->createVirtualRegister(IntRC); const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); BuildMI(*PH, InsertPos, DL, LsrD, LsrR) .addReg(AdjR, 0, AdjSR) @@ -1038,7 +1038,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI, if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MRI->use_nodbg_empty(Reg)) continue; @@ -1058,7 +1058,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI, if (!OPO.isReg() || !OPO.isDef()) continue; - unsigned OPReg = OPO.getReg(); + Register OPReg = OPO.getReg(); use_nodbg_iterator nextJ; for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); J != End; J = nextJ) { @@ -1092,7 +1092,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); MachineRegisterInfo::use_iterator nextI; for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), E = MRI->use_end(); I != E; I = nextI) { @@ -1244,7 +1244,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, if (TripCount->isReg()) { // Create a copy of the loop count register. - unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); // Add the Loop instruction to the beginning of the loop. @@ -1257,7 +1257,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, // create a new virtual register. int64_t CountImm = TripCount->getImm(); if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) { - unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) .addImm(CountImm); BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) @@ -1333,7 +1333,7 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, return true; // Out of order. - unsigned PredR = CmpI->getOperand(0).getReg(); + Register PredR = CmpI->getOperand(0).getReg(); bool FoundBump = false; instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { @@ -1428,10 +1428,10 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( if (checkForImmediate(*InitVal, Imm)) return (EndVal->getImm() == Imm); - unsigned Reg = InitVal->getReg(); + Register Reg = InitVal->getReg(); // We don't know the value of a physical register. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return true; MachineInstr *Def = MRI->getVRegDef(Reg); @@ -1508,8 +1508,8 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, // processed to handle potential subregisters in MO. int64_t TV; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) return false; MachineInstr *DI = MRI->getVRegDef(R); unsigned DOpc = DI->getOpcode(); @@ -1582,11 +1582,11 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { } assert(MO.isReg()); - unsigned R = MO.getReg(); + Register R = MO.getReg(); MachineInstr *DI = MRI->getVRegDef(R); const TargetRegisterClass *RC = MRI->getRegClass(R); - unsigned NewR = MRI->createVirtualRegister(RC); + Register NewR = MRI->createVirtualRegister(RC); MachineBasicBlock &B = *DI->getParent(); DebugLoc DL = DI->getDebugLoc(); BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); @@ -1634,17 +1634,17 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (Phi->getOperand(i+1).getMBB() != Latch) continue; - unsigned PhiReg = Phi->getOperand(i).getReg(); + Register PhiReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiReg); if (DI->getDesc().isAdd()) { // If the register operand to the add/sub is the PHI we are looking // at, this meets the induction pattern. - unsigned IndReg = DI->getOperand(1).getReg(); + Register IndReg = DI->getOperand(1).getReg(); MachineOperand &Opnd2 = DI->getOperand(2); int64_t V; if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { - unsigned UpdReg = DI->getOperand(0).getReg(); + Register UpdReg = DI->getOperand(0).getReg(); IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } } @@ -1702,7 +1702,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (!Cond[CSz-1].isReg()) return false; - unsigned P = Cond[CSz-1].getReg(); + Register P = Cond[CSz - 1].getReg(); MachineInstr *PredDef = MRI->getVRegDef(P); if (!PredDef->isCompare()) @@ -1903,15 +1903,15 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); NewPH->insert(NewPH->end(), NewPN); - unsigned PR = PN->getOperand(0).getReg(); + Register PR = PN->getOperand(0).getReg(); const TargetRegisterClass *RC = MRI->getRegClass(PR); - unsigned NewPR = MRI->createVirtualRegister(RC); + Register NewPR = MRI->createVirtualRegister(RC); NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); // Copy all non-latch operands of a header's PHI node to the newly // created PHI node in the preheader. for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { - unsigned PredR = PN->getOperand(i).getReg(); + Register PredR = PN->getOperand(i).getReg(); unsigned PredRSub = PN->getOperand(i).getSubReg(); MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); if (PredB == Latch) diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 605fcfc25559..4684d8e4781a 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -697,7 +697,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { // void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { SDLoc dl(N); - ConstantFPSDNode *CN = dyn_cast(N); + auto *CN = cast(N); APInt A = CN->getValueAPF().bitcastToAPInt(); if (N->getValueType(0) == MVT::f32) { SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index fef5a98cdb00..8a8986e232a0 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -240,12 +240,12 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return true; } -unsigned HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &) const { // Just support r19, the linux kernel uses it. - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("r19", Hexagon::R19) - .Default(0); + .Default(Register()); if (Reg) return Reg; @@ -286,7 +286,7 @@ SDValue HexagonTargetLowering::LowerCallResult( SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), MVT::i32, Glue); // FR0 = (Value, Chain, Glue) - unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR, FR0.getValue(0), FR0.getValue(2)); // TPR = (Chain, Glue) @@ -736,7 +736,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments( RegVT = VA.getValVT(); const TargetRegisterClass *RC = getRegClassFor(RegVT); - unsigned VReg = MRI.createVirtualRegister(RC); + Register VReg = MRI.createVirtualRegister(RC); SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); // Treat values of type MVT::i1 specially: they are passed in @@ -870,15 +870,20 @@ SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue PredOp = Op.getOperand(0); SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2); - EVT OpVT = Op1.getValueType(); - SDLoc DL(Op); + MVT OpTy = ty(Op1); + const SDLoc &dl(Op); - if (OpVT == MVT::v2i16) { - SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1); - SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2); - SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2); - SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL); - return TR; + if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) { + MVT ElemTy = OpTy.getVectorElementType(); + assert(ElemTy.isScalarInteger()); + MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()), + OpTy.getVectorNumElements()); + // Generate (trunc (select (_, sext, sext))). + return DAG.getSExtOrTrunc( + DAG.getSelect(dl, WideTy, PredOp, + DAG.getSExtOrTrunc(Op1, dl, WideTy), + DAG.getSExtOrTrunc(Op2, dl, WideTy)), + dl, OpTy); } return SDValue(); @@ -1230,9 +1235,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, Subtarget(ST) { auto &HRI = *Subtarget.getRegisterInfo(); - setPrefLoopAlignment(4); - setPrefFunctionAlignment(4); - setMinFunctionAlignment(2); + setPrefLoopAlignment(Align(16)); + setMinFunctionAlignment(Align(4)); + setPrefFunctionAlignment(Align(16)); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); setBooleanContents(TargetLoweringBase::UndefinedBooleanContent); setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent); @@ -1434,12 +1439,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE }; - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { for (unsigned VectExpOp : VectExpOps) setOperationAction(VectExpOp, VT, Expand); // Expand all extending loads and truncating stores: - for (MVT TargetVT : MVT::vector_valuetypes()) { + for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) { if (TargetVT == VT) continue; setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand); @@ -1496,16 +1501,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, VT, Custom); } - for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) { - setCondCodeAction(ISD::SETLT, VT, Expand); + for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16, + MVT::v2i32}) { + setCondCodeAction(ISD::SETNE, VT, Expand); setCondCodeAction(ISD::SETLE, VT, Expand); - setCondCodeAction(ISD::SETULT, VT, Expand); + setCondCodeAction(ISD::SETGE, VT, Expand); + setCondCodeAction(ISD::SETLT, VT, Expand); setCondCodeAction(ISD::SETULE, VT, Expand); + setCondCodeAction(ISD::SETUGE, VT, Expand); + setCondCodeAction(ISD::SETULT, VT, Expand); } // Custom-lower bitcasts from i8 to v8i1. setOperationAction(ISD::BITCAST, MVT::i8, Custom); setOperationAction(ISD::SETCC, MVT::v2i16, Custom); + setOperationAction(ISD::VSELECT, MVT::v4i8, Custom); setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); @@ -1554,6 +1564,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSUB, MVT::f64, Legal); } + setTargetDAGCombine(ISD::VSELECT); + if (Subtarget.useHVXOps()) initializeHVXLowering(); @@ -1643,6 +1655,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0"; case HexagonISD::VROR: return "HexagonISD::VROR"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; + case HexagonISD::PTRUE: return "HexagonISD::PTRUE"; + case HexagonISD::PFALSE: return "HexagonISD::PFALSE"; case HexagonISD::VZERO: return "HexagonISD::VZERO"; case HexagonISD::VSPLATW: return "HexagonISD::VSPLATW"; case HexagonISD::D2P: return "HexagonISD::D2P"; @@ -1783,7 +1797,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // The offset value comes through Modifier register. For now, assume the // offset is 0. Info.offset = 0; - Info.align = DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont)); + Info.align = + MaybeAlign(DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont))); Info.flags = MachineMemOperand::MOLoad; return true; } @@ -1805,7 +1820,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(VecTy); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8; + Info.align = + MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; @@ -1817,6 +1833,10 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } +bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const { + return X.getValueType().isScalarInteger(); // 'tstbit' +} + bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2)); } @@ -1844,26 +1864,33 @@ bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef Mask, TargetLoweringBase::LegalizeTypeAction HexagonTargetLowering::getPreferredVectorAction(MVT VT) const { - if (VT.getVectorNumElements() == 1) - return TargetLoweringBase::TypeScalarizeVector; - - // Always widen vectors of i1. + unsigned VecLen = VT.getVectorNumElements(); MVT ElemTy = VT.getVectorElementType(); - if (ElemTy == MVT::i1) - return TargetLoweringBase::TypeWidenVector; + + if (VecLen == 1 || VT.isScalableVector()) + return TargetLoweringBase::TypeScalarizeVector; if (Subtarget.useHVXOps()) { + unsigned HwLen = Subtarget.getVectorLength(); // If the size of VT is at least half of the vector length, // widen the vector. Note: the threshold was not selected in // any scientific way. ArrayRef Tys = Subtarget.getHVXElementTypes(); if (llvm::find(Tys, ElemTy) != Tys.end()) { - unsigned HwWidth = 8*Subtarget.getVectorLength(); + unsigned HwWidth = 8*HwLen; unsigned VecWidth = VT.getSizeInBits(); if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) return TargetLoweringBase::TypeWidenVector; } + // Split vectors of i1 that correspond to (byte) vector pairs. + if (ElemTy == MVT::i1 && VecLen == 2*HwLen) + return TargetLoweringBase::TypeSplitVector; } + + // Always widen (remaining) vectors of i1. + if (ElemTy == MVT::i1) + return TargetLoweringBase::TypeWidenVector; + return TargetLoweringBase::TypeSplitVector; } @@ -2452,6 +2479,23 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return buildVector64(Ops, dl, VecTy, DAG); if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) { + // Check if this is a special case or all-0 or all-1. + bool All0 = true, All1 = true; + for (SDValue P : Ops) { + auto *CN = dyn_cast(P.getNode()); + if (CN == nullptr) { + All0 = All1 = false; + break; + } + uint32_t C = CN->getZExtValue(); + All0 &= (C == 0); + All1 &= (C == 1); + } + if (All0) + return DAG.getNode(HexagonISD::PFALSE, dl, VecTy); + if (All1) + return DAG.getNode(HexagonISD::PTRUE, dl, VecTy); + // For each i1 element in the resulting predicate register, put 1 // shifted by the index of the element into a general-purpose register, // then or them together and transfer it back into a predicate register. @@ -2629,7 +2673,8 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) DoDefault = true; if (!AlignLoads) { - if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), *LN->getMemOperand())) + if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(), + *LN->getMemOperand())) return Op; DoDefault = true; } @@ -2637,7 +2682,8 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)". MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign) : MVT::getVectorVT(MVT::i8, HaveAlign); - DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, *LN->getMemOperand()); + DoDefault = + allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand()); } if (DoDefault) { std::pair P = expandUnalignedLoad(LN, DAG); @@ -2865,12 +2911,54 @@ HexagonTargetLowering::ReplaceNodeResults(SDNode *N, if (N->getValueType(0) == MVT::i8) { SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, N->getOperand(0), DAG); - Results.push_back(P); + SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8); + Results.push_back(T); } break; } } +SDValue +HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) + const { + SDValue Op(N, 0); + if (isHvxOperation(Op)) { + if (SDValue V = PerformHvxDAGCombine(N, DCI)) + return V; + return SDValue(); + } + + const SDLoc &dl(Op); + unsigned Opc = Op.getOpcode(); + + if (Opc == HexagonISD::P2D) { + SDValue P = Op.getOperand(0); + switch (P.getOpcode()) { + case HexagonISD::PTRUE: + return DCI.DAG.getConstant(-1, dl, ty(Op)); + case HexagonISD::PFALSE: + return getZero(dl, ty(Op), DCI.DAG); + default: + break; + } + } else if (Opc == ISD::VSELECT) { + // This is pretty much duplicated in HexagonISelLoweringHVX... + // + // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0) + SDValue Cond = Op.getOperand(0); + if (Cond->getOpcode() == ISD::XOR) { + SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); + if (C1->getOpcode() == HexagonISD::PTRUE) { + SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, + Op.getOperand(2), Op.getOperand(1)); + return VSel; + } + } + } + + return SDValue(); +} + /// Returns relocation base for the given PIC jumptable. SDValue HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 4e467cb22727..75f553bfec7f 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -68,6 +68,8 @@ namespace HexagonISD { EH_RETURN, DCFETCH, READCYCLE, + PTRUE, + PFALSE, D2P, // Convert 8-byte value to 8-bit predicate register. [*] P2D, // Convert 8-bit predicate register to 8-byte value. [*] V2Q, // Convert HVX vector to a vector predicate reg. [*] @@ -127,6 +129,8 @@ namespace HexagonISD { bool isCheapToSpeculateCtlz() const override { return true; } bool isCtlzFast() const override { return true; } + bool hasBitTest(SDValue X, SDValue Y) const override; + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; /// Return true if an FMA operation is faster than a pair of mul and add @@ -221,10 +225,12 @@ namespace HexagonISD { const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. @@ -299,7 +305,8 @@ namespace HexagonISD { const AttributeList &FuncAttributes) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override; + unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) + const override; /// Returns relocation base for the given PIC jumptable. SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) @@ -456,6 +463,8 @@ namespace HexagonISD { bool isHvxOperation(SDValue Op) const; SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; + + SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 345c657787a0..bc8a9959c917 100644 --- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -193,6 +193,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::OR, BoolV, Legal); setOperationAction(ISD::XOR, BoolV, Legal); } + + setTargetDAGCombine(ISD::VSELECT); } SDValue @@ -1580,6 +1582,28 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Unhandled HVX operation"); } +SDValue +HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) + const { + const SDLoc &dl(N); + SDValue Op(N, 0); + + unsigned Opc = Op.getOpcode(); + if (Opc == ISD::VSELECT) { + // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) + SDValue Cond = Op.getOperand(0); + if (Cond->getOpcode() == ISD::XOR) { + SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); + if (C1->getOpcode() == HexagonISD::QTRUE) { + SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, + Op.getOperand(2), Op.getOperand(1)); + return VSel; + } + } + } + return SDValue(); +} + bool HexagonTargetLowering::isHvxOperation(SDValue Op) const { // If the type of the result, or any operand type are HVX vector types, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index a156de5ba128..767538f92ed6 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -193,7 +193,7 @@ static inline void parseOperands(const MachineInstr &MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -674,86 +674,96 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -/// Analyze the loop code to find the loop induction variable and compare used -/// to compute the number of iterations. Currently, we analyze loop that are -/// controlled using hardware loops. In this case, the induction variable -/// instruction is null. For all other cases, this function returns true, which -/// means we're unable to analyze it. -bool HexagonInstrInfo::analyzeLoop(MachineLoop &L, - MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const { +namespace { +class HexagonPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *Loop, *EndLoop; + MachineFunction *MF; + const HexagonInstrInfo *TII; + int64_t TripCount; + Register LoopCount; + DebugLoc DL; - MachineBasicBlock *LoopEnd = L.getBottomBlock(); - MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); - // We really "analyze" only hardware loops right now. - if (I != LoopEnd->end() && isEndLoopN(I->getOpcode())) { - IndVarInst = nullptr; - CmpInst = &*I; - return false; +public: + HexagonPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop) + : Loop(Loop), EndLoop(EndLoop), MF(Loop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()), + DL(Loop->getDebugLoc()) { + // Inspect the Loop instruction up-front, as it may be deleted when we call + // createTripCountGreaterCondition. + TripCount = Loop->getOpcode() == Hexagon::J2_loop0r + ? -1 + : Loop->getOperand(1).getImm(); + if (TripCount == -1) + LoopCount = Loop->getOperand(1).getReg(); } - return true; -} -/// Generate code to reduce the loop iteration by one and check if the loop is -/// finished. Return the value/register of the new loop count. this function -/// assumes the nth iteration is peeled first. -unsigned HexagonInstrInfo::reduceLoopCount( - MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, - MachineInstr &Cmp, SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, unsigned Iter, - unsigned MaxIter) const { - // We expect a hardware loop currently. This means that IndVar is set - // to null, and the compare is the ENDLOOP instruction. - assert((!IndVar) && isEndLoopN(Cmp.getOpcode()) - && "Expecting a hardware loop"); - MachineFunction *MF = MBB.getParent(); - DebugLoc DL = Cmp.getDebugLoc(); - SmallPtrSet VisitedBBs; - MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(), - Cmp.getOperand(0).getMBB(), VisitedBBs); - if (!Loop) - return 0; - // If the loop trip count is a compile-time value, then just change the - // value. - if (Loop->getOpcode() == Hexagon::J2_loop0i || - Loop->getOpcode() == Hexagon::J2_loop1i) { - int64_t Offset = Loop->getOperand(1).getImm(); - if (Offset <= 1) - Loop->eraseFromParent(); - else - Loop->getOperand(1).setImm(Offset - 1); - return Offset - 1; + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop; } - // The loop trip count is a run-time value. We generate code to subtract - // one from the trip count, and update the loop instruction. - assert(Loop->getOpcode() == Hexagon::J2_loop0r && "Unexpected instruction"); - unsigned LoopCount = Loop->getOperand(1).getReg(); - // Check if we're done with the loop. - unsigned LoopEnd = createVR(MF, MVT::i1); - MachineInstr *NewCmp = BuildMI(&MBB, DL, get(Hexagon::C2_cmpgtui), LoopEnd). - addReg(LoopCount).addImm(1); - unsigned NewLoopCount = createVR(MF, MVT::i32); - MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount). - addReg(LoopCount).addImm(-1); - const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); - // Update the previously generated instructions with the new loop counter. - for (SmallVectorImpl::iterator I = PrevInsts.begin(), - E = PrevInsts.end(); I != E; ++I) - (*I)->substituteRegister(LoopCount, NewLoopCount, 0, HRI); - PrevInsts.clear(); - PrevInsts.push_back(NewCmp); - PrevInsts.push_back(NewAdd); - // Insert the new loop instruction if this is the last time the loop is - // decremented. - if (Iter == MaxIter) - BuildMI(&MBB, DL, get(Hexagon::J2_loop0r)). - addMBB(Loop->getOperand(0).getMBB()).addReg(NewLoopCount); - // Delete the old loop instruction. - if (Iter == 0) - Loop->eraseFromParent(); - Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf)); - Cond.push_back(NewCmp->getOperand(0)); - return NewLoopCount; + + Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) override { + if (TripCount == -1) { + // Check if we're done with the loop. + unsigned Done = TII->createVR(MF, MVT::i1); + MachineInstr *NewCmp = BuildMI(&MBB, DL, + TII->get(Hexagon::C2_cmpgtui), Done) + .addReg(LoopCount) + .addImm(TC); + Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf)); + Cond.push_back(NewCmp->getOperand(0)); + return {}; + } + + return TripCount > TC; + } + + void setPreheader(MachineBasicBlock *NewPreheader) override { + NewPreheader->splice(NewPreheader->getFirstTerminator(), Loop->getParent(), + Loop); + } + + void adjustTripCount(int TripCountAdjust) override { + // If the loop trip count is a compile-time value, then just change the + // value. + if (Loop->getOpcode() == Hexagon::J2_loop0i || + Loop->getOpcode() == Hexagon::J2_loop1i) { + int64_t TripCount = Loop->getOperand(1).getImm() + TripCountAdjust; + assert(TripCount > 0 && "Can't create an empty or negative loop!"); + Loop->getOperand(1).setImm(TripCount); + return; + } + + // The loop trip count is a run-time value. We generate code to subtract + // one from the trip count, and update the loop instruction. + Register LoopCount = Loop->getOperand(1).getReg(); + Register NewLoopCount = TII->createVR(MF, MVT::i32); + BuildMI(*Loop->getParent(), Loop, Loop->getDebugLoc(), + TII->get(Hexagon::A2_addi), NewLoopCount) + .addReg(LoopCount) + .addImm(TripCountAdjust); + Loop->getOperand(1).setReg(NewLoopCount); + } + + void disposed() override { Loop->eraseFromParent(); } +}; +} // namespace + +std::unique_ptr +HexagonInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + // We really "analyze" only hardware loops right now. + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + + if (I != LoopBB->end() && isEndLoopN(I->getOpcode())) { + SmallPtrSet VisitedBBs; + MachineInstr *LoopInst = findLoopInstr( + LoopBB, I->getOpcode(), I->getOperand(0).getMBB(), VisitedBBs); + if (LoopInst) + return std::make_unique(LoopInst, &*I); + } + return nullptr; } bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, @@ -839,8 +849,8 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } if (Hexagon::HvxWRRegClass.contains(SrcReg, DestReg)) { - unsigned LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); - unsigned HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); + Register LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + Register HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg) .addReg(HiSrc, KillFlag) .addReg(LoSrc, KillFlag); @@ -1017,7 +1027,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); auto RealCirc = [&](unsigned Opc, bool HasImm, unsigned MxOp) { - unsigned Mx = MI.getOperand(MxOp).getReg(); + Register Mx = MI.getOperand(MxOp).getReg(); unsigned CSx = (Mx == Hexagon::M0 ? Hexagon::CS0 : Hexagon::CS1); BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrrcr), CSx) .add(MI.getOperand((HasImm ? 5 : 4))); @@ -1049,8 +1059,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; case Hexagon::V6_vassignp: { - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); unsigned Kill = getKillRegState(MI.getOperand(1).isKill()); BuildMI(MBB, MI, DL, get(Hexagon::V6_vcombine), DstReg) .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_hi), Kill) @@ -1059,18 +1069,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case Hexagon::V6_lo: { - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI.getOperand(1).isKill()); MBB.erase(MI); MRI.clearKillFlags(SrcSubLo); return true; } case Hexagon::V6_hi: { - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI.getOperand(1).isKill()); MBB.erase(MI); MRI.clearKillFlags(SrcSubHi); @@ -1079,9 +1089,9 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case Hexagon::PS_vstorerw_ai: case Hexagon::PS_vstorerwu_ai: { bool Aligned = Opc == Hexagon::PS_vstorerw_ai; - unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); - unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + Register SrcReg = MI.getOperand(2).getReg(); + Register SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); + Register SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); unsigned NewOpc = Aligned ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32Ub_ai; unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass); @@ -1103,7 +1113,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case Hexagon::PS_vloadrw_ai: case Hexagon::PS_vloadrwu_ai: { bool Aligned = Opc == Hexagon::PS_vloadrw_ai; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); unsigned NewOpc = Aligned ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32Ub_ai; unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass); @@ -1122,7 +1132,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case Hexagon::PS_true: { - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -1130,7 +1140,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case Hexagon::PS_false: { - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -1152,7 +1162,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case Hexagon::PS_vdd0: { - unsigned Vd = MI.getOperand(0).getReg(); + Register Vd = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::V6_vsubw_dv), Vd) .addReg(Vd, RegState::Undef) .addReg(Vd, RegState::Undef); @@ -1161,13 +1171,13 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case Hexagon::PS_vmulw: { // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Src1Reg = MI.getOperand(1).getReg(); - unsigned Src2Reg = MI.getOperand(2).getReg(); - unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); - unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); - unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); - unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); + Register DstReg = MI.getOperand(0).getReg(); + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + Register Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); + Register Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); + Register Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); + Register Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi), HRI.getSubReg(DstReg, Hexagon::isub_hi)) .addReg(Src1SubHi) @@ -1185,16 +1195,16 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case Hexagon::PS_vmulw_acc: { // Expand 64-bit vector multiply with addition into 2 scalar multiplies. - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Src1Reg = MI.getOperand(1).getReg(); - unsigned Src2Reg = MI.getOperand(2).getReg(); - unsigned Src3Reg = MI.getOperand(3).getReg(); - unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); - unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); - unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); - unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); - unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::isub_hi); - unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::isub_lo); + Register DstReg = MI.getOperand(0).getReg(); + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + Register Src3Reg = MI.getOperand(3).getReg(); + Register Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); + Register Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); + Register Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); + Register Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); + Register Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::isub_hi); + Register Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::isub_lo); BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci), HRI.getSubReg(DstReg, Hexagon::isub_hi)) .addReg(Src1SubHi) @@ -1219,10 +1229,10 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { const MachineOperand &Op1 = MI.getOperand(1); const MachineOperand &Op2 = MI.getOperand(2); const MachineOperand &Op3 = MI.getOperand(3); - unsigned Rd = Op0.getReg(); - unsigned Pu = Op1.getReg(); - unsigned Rs = Op2.getReg(); - unsigned Rt = Op3.getReg(); + Register Rd = Op0.getReg(); + Register Pu = Op1.getReg(); + Register Rs = Op2.getReg(); + Register Rt = Op3.getReg(); DebugLoc DL = MI.getDebugLoc(); unsigned K1 = getKillRegState(Op1.isKill()); unsigned K2 = getKillRegState(Op2.isKill()); @@ -1246,7 +1256,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); - unsigned PReg = Op1.getReg(); + Register PReg = Op1.getReg(); assert(Op1.getSubReg() == 0); unsigned PState = getRegState(Op1); @@ -1280,15 +1290,15 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); - unsigned PReg = Op1.getReg(); + Register PReg = Op1.getReg(); assert(Op1.getSubReg() == 0); unsigned PState = getRegState(Op1); if (Op0.getReg() != Op2.getReg()) { unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill : PState; - unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo); - unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi); + Register SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo); + Register SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi); auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine)) .add(Op0) .addReg(PReg, S) @@ -1299,8 +1309,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { IsDestLive = true; } if (Op0.getReg() != Op3.getReg()) { - unsigned SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo); - unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi); + Register SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo); + Register SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi); auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine)) .add(Op0) .addReg(PReg, PState) @@ -1856,8 +1866,7 @@ DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( // S2_storeri_io %r29, 132, killed %r1; flags: mem:ST4[FixedStack1] // Currently AA considers the addresses in these instructions to be aliasing. bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, - AliasAnalysis *AA) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; @@ -1872,7 +1881,7 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( if (!getBaseAndOffsetPosition(MIa, BasePosA, OffsetPosA)) return false; const MachineOperand &BaseA = MIa.getOperand(BasePosA); - unsigned BaseRegA = BaseA.getReg(); + Register BaseRegA = BaseA.getReg(); unsigned BaseSubA = BaseA.getSubReg(); // Get the base register in MIb. @@ -1880,7 +1889,7 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( if (!getBaseAndOffsetPosition(MIb, BasePosB, OffsetPosB)) return false; const MachineOperand &BaseB = MIb.getOperand(BasePosB); - unsigned BaseRegB = BaseB.getReg(); + Register BaseRegB = BaseB.getReg(); unsigned BaseSubB = BaseB.getSubReg(); if (BaseRegA != BaseRegB || BaseSubA != BaseSubB) @@ -1984,7 +1993,7 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { llvm_unreachable("Cannot handle this register class"); } - unsigned NewReg = MRI.createVirtualRegister(TRC); + Register NewReg = MRI.createVirtualRegister(TRC); return NewReg; } @@ -2094,12 +2103,12 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, if (RegA == RegB) return true; - if (TargetRegisterInfo::isPhysicalRegister(RegA)) + if (Register::isPhysicalRegister(RegA)) for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs) if (RegB == *SubRegs) return true; - if (TargetRegisterInfo::isPhysicalRegister(RegB)) + if (Register::isPhysicalRegister(RegB)) for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs) if (RegA == *SubRegs) return true; @@ -2605,7 +2614,7 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const { if (mayBeCurLoad(MI1)) { // if (result of SU is used in Next) return true; - unsigned DstReg = MI1.getOperand(0).getReg(); + Register DstReg = MI1.getOperand(0).getReg(); int N = MI2.getNumOperands(); for (int I = 0; I < N; I++) if (MI2.getOperand(I).isReg() && DstReg == MI2.getOperand(I).getReg()) @@ -3374,7 +3383,7 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, if ((GA.getOpcode() != Hexagon::C2_cmpeqi) || (GB.getOpcode() != Hexagon::J2_jumptnew)) return -1u; - unsigned DestReg = GA.getOperand(0).getReg(); + Register DestReg = GA.getOperand(0).getReg(); if (!GB.readsRegister(DestReg)) return -1u; if (DestReg != Hexagon::P0 && DestReg != Hexagon::P1) @@ -4091,7 +4100,7 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // Get DefIdx and UseIdx for super registers. const MachineOperand &DefMO = DefMI.getOperand(DefIdx); - if (DefMO.isReg() && HRI.isPhysicalRegister(DefMO.getReg())) { + if (DefMO.isReg() && Register::isPhysicalRegister(DefMO.getReg())) { if (DefMO.isImplicit()) { for (MCSuperRegIterator SR(DefMO.getReg(), &HRI); SR.isValid(); ++SR) { int Idx = DefMI.findRegisterDefOperandIdx(*SR, false, false, &HRI); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index e0a999d0f4c4..60298cd666bb 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -129,21 +129,10 @@ public: const DebugLoc &DL, int *BytesAdded = nullptr) const override; - /// Analyze the loop code, return true if it cannot be understood. Upon - /// success, this function returns false and returns information about the - /// induction variable and compare instruction used at the end. - bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const override; - - /// Generate code to reduce the loop iteration by one and check if the loop - /// is finished. Return the value/register of the new loop count. We need - /// this function when peeling off one or more iterations of a loop. This - /// function assumes the nth iteration is peeled first. - unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, - MachineInstr *IndVar, MachineInstr &Cmp, - SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, - unsigned Iter, unsigned MaxIter) const override; + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; /// Return true if it's profitable to predicate /// instructions with accumulated instruction latency of "NumCycles" @@ -299,8 +288,7 @@ public: // memory addresses and false otherwise. bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; /// For instructions with a base and offset, return the position of the /// base register and offset operands. diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index cabfd783effa..c5e3cfd080d6 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -22,14 +22,14 @@ class T_RP_pat def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt), (A2_add IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16), +def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, timm:$s16), (A2_addi IntRegs:$Rs, imm:$s16)>; def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt), (A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>; def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt), (A2_sub IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs), +def: Pat<(int_hexagon_A2_subri timm:$s10, IntRegs:$Rs), (A2_subri imm:$s10, IntRegs:$Rs)>; def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt), (A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>; @@ -45,26 +45,26 @@ def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt), (M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, timm:$u5), (S2_asl_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, timm:$u5), (S2_lsr_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, timm:$u5), (S2_asr_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, timm:$u6), (S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>; -def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, timm:$u6), (S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>; -def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, timm:$u6), (S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>; def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt), (A2_and IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10), +def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, timm:$s10), (A2_andir IntRegs:$Rs, imm:$s10)>; def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt), (A2_or IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10), +def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, timm:$s10), (A2_orir IntRegs:$Rs, imm:$s10)>; def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt), (A2_xor IntRegs:$Rs, IntRegs:$Rt)>; @@ -99,13 +99,13 @@ def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, (i32 0)), (S2_vsathub I64:$Rs)>; } -def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred:$imm), +def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred_timm:$imm), (S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred:$imm), +def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred_timm:$imm), (S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm), +def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm), (S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm), +def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm), (S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; def ImmExt64: SDNodeXForm; -def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2), +def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred_timm:$src2), (C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>; -def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2), +def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred_timm:$src2), (C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>; def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0), @@ -142,7 +142,7 @@ def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2), //===----------------------------------------------------------------------===// class S2op_tableidx_pat - : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4), + : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4), (OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3, (XformImm u5_0ImmPred:$src4))>; @@ -197,11 +197,11 @@ class T_stc_pat : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), (MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; multiclass MaskedStore { def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3), diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index ac48e1dc30b0..bda3eccac0cd 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -93,9 +93,9 @@ static cl::opt OnlyNonNestedMemmove("only-nonnested-memmove-idiom", cl::Hidden, cl::init(true), cl::desc("Only enable generating memmove in non-nested loops")); -cl::opt HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", - cl::Hidden, cl::init(false), - cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); +static cl::opt HexagonVolatileMemcpy( + "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), + cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); static cl::opt SimplifyLimit("hlir-simplify-limit", cl::init(10000), cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); @@ -632,9 +632,9 @@ Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) { if (!isa(InitV) || !cast(InitV)->isZero()) continue; Value *IterV = PN->getIncomingValueForBlock(BB); - if (!isa(IterV)) - continue; auto *BO = dyn_cast(IterV); + if (!BO) + continue; if (BO->getOpcode() != Instruction::Add) continue; Value *IncV = nullptr; @@ -2020,7 +2020,7 @@ bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a // random load we can't handle. - LoadInst *LI = dyn_cast(SI->getValueOperand()); + auto *LI = cast(SI->getValueOperand()); auto *LoadEv = cast(SE->getSCEV(LI->getPointerOperand())); // The trip count of the loop and the base pointer of the addrec SCEV is @@ -2426,7 +2426,8 @@ bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { DL = &L->getHeader()->getModule()->getDataLayout(); DT = &getAnalysis().getDomTree(); LF = &getAnalysis().getLoopInfo(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI( + *L->getHeader()->getParent()); SE = &getAnalysis().getSE(); HasMemcpy = TLI->has(LibFunc_memcpy); diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index db44901ca706..680d01e12af0 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -177,7 +177,7 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, (II->getOperand(i).isUse() || II->getOperand(i).isDef())) { MachineBasicBlock::iterator localII = II; ++localII; - unsigned Reg = II->getOperand(i).getReg(); + Register Reg = II->getOperand(i).getReg(); for (MachineBasicBlock::iterator localBegin = localII; localBegin != end; ++localBegin) { if (localBegin == skip) @@ -290,7 +290,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // at machine code level, we don't need this, but if we decide // to move new value jump prior to RA, we would be needing this. MachineRegisterInfo &MRI = MF.getRegInfo(); - if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) { + if (secondReg && !Register::isPhysicalRegister(cmpOp2)) { MachineInstr *def = MRI.getVRegDef(cmpOp2); if (def->getOpcode() == TargetOpcode::COPY) return false; @@ -516,7 +516,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { jmpPos = MII; jmpInstr = &MI; predReg = MI.getOperand(0).getReg(); - afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); + afterRA = Register::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. @@ -603,7 +603,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { (isSecondOpReg && MI.getOperand(0).getReg() == (unsigned)cmpOp2))) { - unsigned feederReg = MI.getOperand(0).getReg(); + Register feederReg = MI.getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true @@ -651,7 +651,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { for (MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned UseR = MO.getReg(); + Register UseR = MO.getReg(); for (auto I = std::next(MI.getIterator()); I != jmpPos; ++I) { if (I == cmpPos) continue; diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 547da9fd598f..9121115020a2 100644 --- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -162,7 +162,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr AddAslSN, if (!OffsetOp.isImm() || OffsetOp.getImm() > 3) return false; - unsigned OffsetReg = MI.getOperand(2).getReg(); + Register OffsetReg = MI.getOperand(2).getReg(); RegisterRef OffsetRR; NodeId OffsetRegRD = 0; for (NodeAddr UA : AddAslSN.Addr->members_if(DFG->IsUse, *DFG)) { @@ -348,7 +348,7 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, MachineInstr *AddMI, const NodeList &UNodeList) { - unsigned AddDefR = AddMI->getOperand(0).getReg(); + Register AddDefR = AddMI->getOperand(0).getReg(); for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { NodeAddr UN = *I; NodeAddr SN = UN.Addr->getOwner(*DFG); @@ -381,7 +381,7 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, // Ex: Rx= add(Rt,#10) // memw(Rx+#0) = Rs // will be replaced with => memw(Rt+#10) = Rs - unsigned BaseReg = AddMI->getOperand(1).getReg(); + Register BaseReg = AddMI->getOperand(1).getReg(); if (!isSafeToExtLR(AddSN, AddMI, BaseReg, UNodeList)) return false; } @@ -411,7 +411,7 @@ bool HexagonOptAddrMode::updateAddUses(MachineInstr *AddMI, MachineInstr *UseMI) { const MachineOperand ImmOp = AddMI->getOperand(2); const MachineOperand AddRegOp = AddMI->getOperand(1); - unsigned newReg = AddRegOp.getReg(); + Register newReg = AddRegOp.getReg(); const MCInstrDesc &MID = UseMI->getDesc(); MachineOperand &BaseOp = MID.mayLoad() ? UseMI->getOperand(1) @@ -543,7 +543,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum) { bool Changed = false; - unsigned OpStart; + unsigned OpStart = 0; unsigned OpEnd = OldMI->getNumOperands(); MachineBasicBlock *BB = OldMI->getParent(); auto UsePos = MachineBasicBlock::iterator(OldMI); @@ -724,7 +724,7 @@ bool HexagonOptAddrMode::processBlock(NodeAddr BA) { } short SizeInc = 0; - unsigned DefR = MI->getOperand(0).getReg(); + Register DefR = MI->getOperand(0).getReg(); InstrEvalMap InstrEvalResult; // Analyze all uses and calculate increase in size. Perform the optimization diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index fb731f56bfbf..485e658e1c84 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -99,13 +99,21 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def SDTVecLeaf: + SDTypeProfile<1, 0, [SDTCisVec<0>]>; def SDTVecVecIntOp: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, SDTCisVT<3,i32>]>; +def HexagonPTRUE: SDNode<"HexagonISD::PTRUE", SDTVecLeaf>; +def HexagonPFALSE: SDNode<"HexagonISD::PFALSE", SDTVecLeaf>; def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>; def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; +def ptrue: PatFrag<(ops), (HexagonPTRUE)>; +def pfalse: PatFrag<(ops), (HexagonPFALSE)>; +def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>; + def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; @@ -154,6 +162,11 @@ def IsNPow2_64H: PatLeaf<(i64 imm), [{ return isPowerOf2_64(NV) && Log2_64(NV) >= 32; }]>; +class IsULE: PatLeaf<(i32 imm), + "uint64_t V = N->getZExtValue();" # + "return isUInt<" # Width # ">(V) && V <= " # Arg # ";" +>; + class IsUGT: PatLeaf<(i32 imm), "uint64_t V = N->getZExtValue();" # "return isUInt<" # Width # ">(V) && V > " # Arg # ";" @@ -320,6 +333,24 @@ multiclass SelMinMax_pats; } +multiclass MinMax_pats { + def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)), + CmpPred:$Vt, CmpPred:$Vs), + (PickT CmpPred:$Vs, CmpPred:$Vt)>; + def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)), + CmpPred:$Vs, CmpPred:$Vt), + (PickS CmpPred:$Vs, CmpPred:$Vt)>; +} + +// Bitcasts between same-size vector types are no-ops, except for the +// actual type change. +multiclass NopCast_pat { + def: Pat<(Ty1 (bitconvert (Ty2 RC:$Val))), (Ty1 RC:$Val)>; + def: Pat<(Ty2 (bitconvert (Ty1 RC:$Val))), (Ty2 RC:$Val)>; +} + // Frags for commonly used SDNodes. def Add: pf2; def And: pf2; def Sra: pf2; @@ -403,17 +434,18 @@ def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; -multiclass Cast_pat { - def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>; - def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>; -} - -// Bit convert vector types to integers. -defm: Cast_pat; -defm: Cast_pat; -defm: Cast_pat; -defm: Cast_pat; -defm: Cast_pat; +// Bit convert 32- and 64-bit types. +// All of these are bitcastable to one another: i32, v2i16, v4i8. +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; +// All of these are bitcastable to one another: i64, v2i32, v4i16, v8i8. +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; // --(3) Extend/truncate ------------------------------------------------- @@ -497,7 +529,9 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)), // def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>; -def: Pat<(not V8I1:$Ps), (C2_not V8I1:$Ps)>; +def: Pat<(pnot V2I1:$Ps), (C2_not V2I1:$Ps)>; +def: Pat<(pnot V4I1:$Ps), (C2_not V4I1:$Ps)>; +def: Pat<(pnot V8I1:$Ps), (C2_not V8I1:$Ps)>; def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>; multiclass BoolOpR_RR_pat { @@ -816,14 +850,6 @@ def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; -def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt), - (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>; -def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt), - (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>; -def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt), - (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), - (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; - def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt), (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), @@ -831,6 +857,14 @@ def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt), (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; +def: Pat<(vselect (pnot V8I1:$Pu), V8I8:$Rs, V8I8:$Rt), + (C2_vmux V8I1:$Pu, V8I8:$Rt, V8I8:$Rs)>; +def: Pat<(vselect (pnot V4I1:$Pu), V4I16:$Rs, V4I16:$Rt), + (C2_vmux V4I1:$Pu, V4I16:$Rt, V4I16:$Rs)>; +def: Pat<(vselect (pnot V2I1:$Pu), V2I32:$Rs, V2I32:$Rt), + (C2_vmux V2I1:$Pu, V2I32:$Rt, V2I32:$Rs)>; + + // From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw). def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw), (C2_or (C2_and I1:$Pu, I1:$Pv), @@ -863,32 +897,44 @@ let AddedComplexity = 200 in { } let AddedComplexity = 200 in { - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; } let AddedComplexity = 100 in { - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; -} - + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; +} + +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; // --(7) Insert/extract -------------------------------------------------- // @@ -1639,19 +1685,19 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), // // Count leading zeros. -def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; // Count trailing zeros. -def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>; +def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; // Count leading ones. -def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; // Count trailing ones. -def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>; +def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; // Define leading/trailing patterns that require zero-extensions to 64 bits. @@ -1706,6 +1752,7 @@ let AddedComplexity = 20 in { // Complexity greater than and/or/xor (i32 (LoReg $Rss)))>; } + let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), (S2_tstbit_i IntRegs:$Rs, imm:$u5)>; @@ -1717,6 +1764,20 @@ let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; } +def: Pat<(and (srl I32:$Rs, u5_0ImmPred:$u5), 1), + (I1toI32 (S2_tstbit_i I32:$Rs, imm:$u5))>; +def: Pat<(and (srl I64:$Rss, IsULE<32,31>:$u6), 1), + (ToZext64 (I1toI32 (S2_tstbit_i (LoReg $Rss), imm:$u6)))>; +def: Pat<(and (srl I64:$Rss, IsUGT<32,31>:$u6), 1), + (ToZext64 (I1toI32 (S2_tstbit_i (HiReg $Rss), (UDEC32 $u6))))>; + +def: Pat<(and (not (srl I32:$Rs, u5_0ImmPred:$u5)), 1), + (I1toI32 (S4_ntstbit_i I32:$Rs, imm:$u5))>; +def: Pat<(and (not (srl I64:$Rss, IsULE<32,31>:$u6)), 1), + (ToZext64 (I1toI32 (S4_ntstbit_i (LoReg $Rss), imm:$u6)))>; +def: Pat<(and (not (srl I64:$Rss, IsUGT<32,31>:$u6)), 1), + (ToZext64 (I1toI32 (S4_ntstbit_i (HiReg $Rss), (UDEC32 $u6))))>; + let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)), (C2_bitsclri IntRegs:$Rs, imm:$u6)>; @@ -1737,23 +1798,28 @@ def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5), def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt), (S2_tstbit_r I32:$Rs, I32:$Rt)>; +// Add extra complexity to prefer these instructions over bitsset/bitsclr. +// The reason is that tstbit/ntstbit can be folded into a compound instruction: +// if ([!]tstbit(...)) jump ... let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), - (S4_ntstbit_i I32:$Rs, imm:$u5)>; + def: Pat<(i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)), + (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + def: Pat<(i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)), + (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), (S4_ntstbit_r I32:$Rs, I32:$Rt)>; + def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S2_tstbit_r I32:$Rs, I32:$Rt)>; } -// Add extra complexity to prefer these instructions over bitsset/bitsclr. -// The reason is that tstbit/ntstbit can be folded into a compound instruction: -// if ([!]tstbit(...)) jump ... -let AddedComplexity = 100 in -def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; - -let AddedComplexity = 100 in -def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; +def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64L:$u6), 0)), + (S4_ntstbit_i (LoReg $Rs), (Log2_64 $u6))>; +def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64H:$u6), 0)), + (S4_ntstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 $u6))))>; +def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64L:$u6), 0)), + (S2_tstbit_i (LoReg $Rs), (Log2_32 imm:$u6))>; +def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64H:$u6), 0)), + (S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_32 imm:$u6))))>; // Do not increase complexity of these patterns. In the DAG, "cmp i8" may be // represented as a compare against "value & 0xFF", which is an exact match @@ -1773,10 +1839,18 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), let AddedComplexity = 100 in { // Avoid A4_rcmp[n]eqi in these cases: + def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), (I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i32 (zext (i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)))), + (I1toI32 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5)))>; + def: Pat<(i32 (zext (i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)))), + (I1toI32 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5)))>; def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), - (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; + (I1toI32 (S4_ntstbit_r I32:$Rs, I32:$Rt))>; + def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S2_tstbit_r I32:$Rs, I32:$Rt))>; } // --(11) PIC ------------------------------------------------------------ diff --git a/lib/Target/Hexagon/HexagonPatternsHVX.td b/lib/Target/Hexagon/HexagonPatternsHVX.td index a4cfca9ac7d7..078a7135c55b 100644 --- a/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -1,5 +1,3 @@ -def SDTVecLeaf: - SDTypeProfile<1, 0, [SDTCisVec<0>]>; def SDTVecBinOp: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; @@ -162,23 +160,14 @@ let Predicates = [UseHVX] in { // Bitcasts between same-size vector types are no-ops, except for the // actual type change. -class Bitcast - : Pat<(ResTy (bitconvert (InpTy RC:$Val))), (ResTy RC:$Val)>; - let Predicates = [UseHVX] in { - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; } let Predicates = [UseHVX] in { @@ -259,6 +248,21 @@ class Vneg1 class Vnot : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1)>; +let Predicates = [UseHVX] in { + let AddedComplexity = 220 in { + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + } +} + let Predicates = [UseHVX] in { let AddedComplexity = 200 in { def: Pat<(Vnot HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 8f761d2d4805..0ccfe64ad1e5 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -136,11 +136,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { assert(MI.getNumOperands() == 2); MachineOperand &Dst = MI.getOperand(0); MachineOperand &Src = MI.getOperand(1); - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src.getReg(); + Register DstReg = Dst.getReg(); + Register SrcReg = Src.getReg(); // Just handle virtual registers. - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(DstReg) && + Register::isVirtualRegister(SrcReg)) { // Map the following: // %170 = SXTW %166 // PeepholeMap[170] = %166 @@ -157,8 +157,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MachineOperand &Src2 = MI.getOperand(2); if (Src1.getImm() != 0) continue; - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src2.getReg(); + Register DstReg = Dst.getReg(); + Register SrcReg = Src2.getReg(); PeepholeMap[DstReg] = SrcReg; } @@ -174,8 +174,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MachineOperand &Src2 = MI.getOperand(2); if (Src2.getImm() != 32) continue; - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src1.getReg(); + Register DstReg = Dst.getReg(); + Register SrcReg = Src1.getReg(); PeepholeDoubleRegsMap[DstReg] = std::make_pair(*&SrcReg, Hexagon::isub_hi); } @@ -185,11 +185,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { assert(MI.getNumOperands() == 2); MachineOperand &Dst = MI.getOperand(0); MachineOperand &Src = MI.getOperand(1); - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src.getReg(); + Register DstReg = Dst.getReg(); + Register SrcReg = Src.getReg(); // Just handle virtual registers. - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(DstReg) && + Register::isVirtualRegister(SrcReg)) { // Map the following: // %170 = NOT_xx %166 // PeepholeMap[170] = %166 @@ -208,10 +208,10 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (Src.getSubReg() != Hexagon::isub_lo) continue; - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src.getReg(); - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + Register DstReg = Dst.getReg(); + Register SrcReg = Src.getReg(); + if (Register::isVirtualRegister(DstReg) && + Register::isVirtualRegister(SrcReg)) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { // Change the 1st operand. @@ -237,12 +237,12 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { bool Done = false; if (QII->isPredicated(MI)) { MachineOperand &Op0 = MI.getOperand(0); - unsigned Reg0 = Op0.getReg(); + Register Reg0 = Op0.getReg(); const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0); if (RC0->getID() == Hexagon::PredRegsRegClassID) { // Handle instructions that have a prediate register in op0 // (most cases of predicable instructions). - if (TargetRegisterInfo::isVirtualRegister(Reg0)) { + if (Register::isVirtualRegister(Reg0)) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { // Change the 1st operand and, flip the opcode. @@ -275,7 +275,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { break; } if (NewOp) { - unsigned PSrc = MI.getOperand(PR).getReg(); + Register PSrc = MI.getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(), QII->get(NewOp), MI.getOperand(0).getReg()) diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 4f5f750e5842..b7171fb14272 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -217,7 +217,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If the offset is not valid, calculate the address in a temporary // register and use it with offset 0. auto &MRI = MF.getRegInfo(); - unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(MB, II, DL, HII.get(Hexagon::A2_addi), TmpR) .addReg(BP) @@ -249,8 +249,8 @@ bool HexagonRegisterInfo::shouldCoalesce(MachineInstr *MI, if (!SmallSrc && !SmallDst) return true; - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); const SlotIndexes &Indexes = *LIS.getSlotIndexes(); auto HasCall = [&Indexes] (const LiveInterval::Segment &S) { for (SlotIndex I = S.start.getBaseIndex(), E = S.end.getBaseIndex(); diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index bd4254aea276..f9fb14c190ff 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -76,18 +76,18 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { unsigned Opc = MI.getOpcode(); if (Opc == Hexagon::CONST32) { - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); uint64_t ImmValue = MI.getOperand(1).getImm(); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestReg) .addImm(ImmValue); B.erase(&MI); } else if (Opc == Hexagon::CONST64) { - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); int64_t ImmValue = MI.getOperand(1).getImm(); const DebugLoc &DL = MI.getDebugLoc(); - unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::isub_lo); - unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::isub_hi); + Register DestLo = TRI->getSubReg(DestReg, Hexagon::isub_lo); + Register DestHi = TRI->getSubReg(DestReg, Hexagon::isub_hi); int32_t LowWord = (ImmValue & 0xFFFFFFFF); int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index 013eede2d414..55f31c628854 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -210,8 +210,8 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { for (auto &Op : MI->operands()) { if (!Op.isReg()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) return true; } return false; @@ -224,14 +224,14 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { unsigned NumRegs = MRI->getNumVirtRegs(); BitVector DoubleRegs(NumRegs); for (unsigned i = 0; i < NumRegs; ++i) { - unsigned R = TargetRegisterInfo::index2VirtReg(i); + unsigned R = Register::index2VirtReg(i); if (MRI->getRegClass(R) == DoubleRC) DoubleRegs.set(i); } BitVector FixedRegs(NumRegs); for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { - unsigned R = TargetRegisterInfo::index2VirtReg(x); + unsigned R = Register::index2VirtReg(x); MachineInstr *DefI = MRI->getVRegDef(R); // In some cases a register may exist, but never be defined or used. // It should never appear anywhere, but mark it as "fixed", just to be @@ -244,7 +244,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { if (FixedRegs[x]) continue; - unsigned R = TargetRegisterInfo::index2VirtReg(x); + unsigned R = Register::index2VirtReg(x); LLVM_DEBUG(dbgs() << printReg(R, TRI) << " ~~"); USet &Asc = AssocMap[R]; for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end(); @@ -258,14 +258,14 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { // Skip non-registers or registers with subregisters. if (&MO == &Op || !MO.isReg() || MO.getSubReg()) continue; - unsigned T = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(T)) { + Register T = MO.getReg(); + if (!Register::isVirtualRegister(T)) { FixedRegs.set(x); continue; } if (MRI->getRegClass(T) != DoubleRC) continue; - unsigned u = TargetRegisterInfo::virtReg2Index(T); + unsigned u = Register::virtReg2Index(T); if (FixedRegs[u]) continue; LLVM_DEBUG(dbgs() << ' ' << printReg(T, TRI)); @@ -281,7 +281,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { unsigned NextP = 1; USet Visited; for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { - unsigned R = TargetRegisterInfo::index2VirtReg(x); + unsigned R = Register::index2VirtReg(x); if (Visited.count(R)) continue; // Create a new partition for R. @@ -372,8 +372,8 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { case Hexagon::A2_andp: case Hexagon::A2_orp: case Hexagon::A2_xorp: { - unsigned Rs = MI->getOperand(1).getReg(); - unsigned Rt = MI->getOperand(2).getReg(); + Register Rs = MI->getOperand(1).getReg(); + Register Rt = MI->getOperand(2).getReg(); return profit(Rs) + profit(Rt); } @@ -400,7 +400,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { } int32_t HexagonSplitDoubleRegs::profit(unsigned Reg) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const MachineInstr *DefI = MRI->getVRegDef(Reg); switch (DefI->getOpcode()) { @@ -499,7 +499,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, return; assert(Cond[1].isReg() && "Unexpected Cond vector from analyzeBranch"); // Expect a predicate register. - unsigned PR = Cond[1].getReg(); + Register PR = Cond[1].getReg(); assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass); // Get the registers on which the loop controlling compare instruction @@ -535,7 +535,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, if (!MI.isPHI()) break; const MachineOperand &MD = MI.getOperand(0); - unsigned R = MD.getReg(); + Register R = MD.getReg(); if (MRI->getRegClass(R) == DoubleRC) DP.push_back(R); } @@ -551,7 +551,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, // Get the output from the add. If it is one of the inputs to the // loop-controlling compare instruction, then R is likely an induc- // tion register. - unsigned T = UseI->getOperand(0).getReg(); + Register T = UseI->getOperand(0).getReg(); if (T == CmpR1 || T == CmpR2) return false; } @@ -603,9 +603,9 @@ void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, continue; } // For register operands, set the subregister. - unsigned R = Op.getReg(); + Register R = Op.getReg(); unsigned SR = Op.getSubReg(); - bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R); + bool isVirtReg = Register::isVirtualRegister(R); bool isKill = Op.isKill(); if (isVirtReg && MRI->getRegClass(R) == DoubleRC) { isKill = false; @@ -674,7 +674,7 @@ void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, : MI->getOperand(2).getImm(); MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0); const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg()); - unsigned NewR = MRI->createVirtualRegister(RC); + Register NewR = MRI->createVirtualRegister(RC); assert(!UpdOp.getSubReg() && "Def operand with subreg"); BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR) .addReg(AdrOp.getReg(), RSA) @@ -789,8 +789,8 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); assert(F != PairMap.end()); const UUPair &P = F->second; - unsigned LoR = P.first; - unsigned HiR = P.second; + Register LoR = P.first; + Register HiR = P.second; unsigned Opc = MI->getOpcode(); bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p); @@ -813,7 +813,7 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, .addReg(Op1.getReg(), RS, HiSR); } else if (S < 32) { const TargetRegisterClass *IntRC = &IntRegsRegClass; - unsigned TmpR = MRI->createVirtualRegister(IntRC); + Register TmpR = MRI->createVirtualRegister(IntRC); // Expansion: // Shift left: DR = shl R, #s // LoR = shl R.lo, #s @@ -953,12 +953,12 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) .addImm(S); - unsigned TmpR1 = MRI->createVirtualRegister(IntRC); + Register TmpR1 = MRI->createVirtualRegister(IntRC); BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1) .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) .addImm(S) .addImm(32-S); - unsigned TmpR2 = MRI->createVirtualRegister(IntRC); + Register TmpR2 = MRI->createVirtualRegister(IntRC); BuildMI(B, MI, DL, TII->get(A2_or), TmpR2) .addReg(Op1.getReg(), RS1, HiSR) .addReg(TmpR1); @@ -1002,7 +1002,7 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, switch (Opc) { case TargetOpcode::PHI: case TargetOpcode::COPY: { - unsigned DstR = MI->getOperand(0).getReg(); + Register DstR = MI->getOperand(0).getReg(); if (MRI->getRegClass(DstR) == DoubleRC) { createHalfInstr(Opc, MI, PairMap, isub_lo); createHalfInstr(Opc, MI, PairMap, isub_hi); @@ -1079,7 +1079,7 @@ void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, for (auto &Op : MI->operands()) { if (!Op.isReg() || !Op.isUse() || !Op.getSubReg()) continue; - unsigned R = Op.getReg(); + Register R = Op.getReg(); UUPairMap::const_iterator F = PairMap.find(R); if (F == PairMap.end()) continue; @@ -1104,8 +1104,8 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, for (auto &Op : MI->operands()) { if (!Op.isReg() || !Op.isUse()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) continue; if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg()) continue; @@ -1113,7 +1113,7 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, if (F == PairMap.end()) continue; const UUPair &Pr = F->second; - unsigned NewDR = MRI->createVirtualRegister(DoubleRC); + Register NewDR = MRI->createVirtualRegister(DoubleRC); BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR) .addReg(Pr.first) .addImm(Hexagon::isub_lo) @@ -1145,8 +1145,8 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { U != W; ++U) SplitIns.insert(U->getParent()); - unsigned LoR = MRI->createVirtualRegister(IntRC); - unsigned HiR = MRI->createVirtualRegister(IntRC); + Register LoR = MRI->createVirtualRegister(IntRC); + Register HiR = MRI->createVirtualRegister(IntRC); LLVM_DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> " << printReg(HiR, TRI) << ':' << printReg(LoR, TRI) << '\n'); diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp index b8b61517ff95..27fefa5f5e2b 100644 --- a/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -441,7 +441,7 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); - unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) .addImm(int(Acc)); NG.push_back(TfrI); diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 7ec63a642b0c..6c706fea096b 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -119,7 +119,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { FeatureBitset Features = getFeatureBits(); if (HexagonDisableDuplex) - setFeatureBits(Features.set(Hexagon::FeatureDuplex, false)); + setFeatureBits(Features.reset(Hexagon::FeatureDuplex)); setFeatureBits(Hexagon_MC::completeHVXFeatures(Features)); return *this; @@ -230,7 +230,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) { else if (SchedRetvalOptimization) { const MachineInstr *MI = DAG->SUnits[su].getInstr(); if (MI->isCopy() && - TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { + Register::isPhysicalRegister(MI->getOperand(1).getReg())) { // %vregX = COPY %r0 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg(); LastVRegUse.erase(MI->getOperand(1).getReg()); @@ -243,8 +243,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) { VRegHoldingReg.count(MO.getReg())) { // LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su]; - } else if (MO.isDef() && - TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + } else if (MO.isDef() && Register::isPhysicalRegister(MO.getReg())) { for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid(); ++AI) { if (LastVRegUse.count(*AI) && @@ -345,7 +344,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine // the correct latency. if ((DstInst->isRegSequence() || DstInst->isCopy()) && Dst->NumSuccs == 1) { - unsigned DReg = DstInst->getOperand(0).getReg(); + Register DReg = DstInst->getOperand(0).getReg(); MachineInstr *DDst = Dst->Succs[0].getSUnit()->getInstr(); unsigned UseIdx = -1; for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) { @@ -375,15 +374,15 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, void HexagonSubtarget::getPostRAMutations( std::vector> &Mutations) const { - Mutations.push_back(llvm::make_unique()); - Mutations.push_back(llvm::make_unique()); - Mutations.push_back(llvm::make_unique()); + Mutations.push_back(std::make_unique()); + Mutations.push_back(std::make_unique()); + Mutations.push_back(std::make_unique()); } void HexagonSubtarget::getSMSMutations( std::vector> &Mutations) const { - Mutations.push_back(llvm::make_unique()); - Mutations.push_back(llvm::make_unique()); + Mutations.push_back(std::make_unique()); + Mutations.push_back(std::make_unique()); } // Pin the vtable to this file. diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 007423ef1902..31157a0065d9 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -228,7 +228,7 @@ public: } bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const { - if (!VecTy.isVector() || !useHVXOps()) + if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector()) return false; MVT ElemTy = VecTy.getVectorElementType(); if (!IncludeBool && ElemTy == MVT::i1) diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 80b8480448fe..d709a82be660 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -111,10 +111,10 @@ int HexagonTargetMachineModule = 0; static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { ScheduleDAGMILive *DAG = - new VLIWMachineScheduler(C, make_unique()); - DAG->addMutation(make_unique()); - DAG->addMutation(make_unique()); - DAG->addMutation(make_unique()); + new VLIWMachineScheduler(C, std::make_unique()); + DAG->addMutation(std::make_unique()); + DAG->addMutation(std::make_unique()); + DAG->addMutation(std::make_unique()); DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; } @@ -218,7 +218,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), (HexagonNoOpt ? CodeGenOpt::None : OL)), - TLOF(make_unique()) { + TLOF(std::make_unique()) { initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); initAsmInfo(); } @@ -244,7 +244,7 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this); + I = std::make_unique(TargetTriple, CPU, FS, *this); } return I.get(); } diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 38062e8e922c..ddbc5543348d 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -45,6 +45,8 @@ bool HexagonTTIImpl::useHVX() const { bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const { assert(VecTy->isVectorTy()); + if (cast(VecTy)->isScalable()) + return false; // Avoid types like <2 x i32*>. if (!cast(VecTy)->getElementType()->isIntegerTy()) return false; diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 27e8fc019007..12ede503af83 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -68,8 +68,8 @@ public: bool shouldFavorPostInc() const; // L1 cache prefetch. - unsigned getPrefetchDistance() const; - unsigned getCacheLineSize() const; + unsigned getPrefetchDistance() const override; + unsigned getCacheLineSize() const override; /// @} diff --git a/lib/Target/Hexagon/HexagonVExtract.cpp b/lib/Target/Hexagon/HexagonVExtract.cpp index a9692f42e468..0c0266a6839a 100644 --- a/lib/Target/Hexagon/HexagonVExtract.cpp +++ b/lib/Target/Hexagon/HexagonVExtract.cpp @@ -67,9 +67,9 @@ unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR, MachineRegisterInfo &MRI) { MachineBasicBlock &ExtB = *ExtI->getParent(); DebugLoc DL = ExtI->getDebugLoc(); - unsigned ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - unsigned ExtIdxR = ExtI->getOperand(2).getReg(); + Register ExtIdxR = ExtI->getOperand(2).getReg(); unsigned ExtIdxS = ExtI->getOperand(2).getSubReg(); // Simplified check for a compile-time constant value of ExtIdxR. @@ -86,7 +86,7 @@ unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR, } } - unsigned IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::A2_andir), IdxR) .add(ExtI->getOperand(2)) .addImm(-4); @@ -111,7 +111,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { unsigned Opc = MI.getOpcode(); if (Opc != Hexagon::V6_extractw) continue; - unsigned VecR = MI.getOperand(1).getReg(); + Register VecR = MI.getOperand(1).getReg(); VExtractMap[VecR].push_back(&MI); } } @@ -144,13 +144,13 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock &ExtB = *ExtI->getParent(); DebugLoc DL = ExtI->getDebugLoc(); - unsigned BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR) .addFrameIndex(FI) .addImm(SR == 0 ? 0 : VecSize/2); unsigned ElemR = genElemLoad(ExtI, BaseR, MRI); - unsigned ExtR = ExtI->getOperand(0).getReg(); + Register ExtR = ExtI->getOperand(0).getReg(); MRI.replaceRegWith(ExtR, ElemR); ExtB.erase(ExtI); Changed = true; diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 3619e4c239d7..fab5edefb553 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -57,9 +58,9 @@ static cl::opt DisablePacketizer("disable-packetizer", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon packetizer pass")); -cl::opt Slot1Store("slot1-store-slot0-load", cl::Hidden, - cl::ZeroOrMore, cl::init(true), - cl::desc("Allow slot1 store and slot0 load")); +static cl::opt Slot1Store("slot1-store-slot0-load", cl::Hidden, + cl::ZeroOrMore, cl::init(true), + cl::desc("Allow slot1 store and slot0 load")); static cl::opt PacketizeVolatiles("hexagon-packetize-volatiles", cl::ZeroOrMore, cl::Hidden, cl::init(true), @@ -129,16 +130,16 @@ INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer", "Hexagon Packetizer", false, false) HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, - MachineLoopInfo &MLI, AliasAnalysis *AA, + MachineLoopInfo &MLI, AAResults *AA, const MachineBranchProbabilityInfo *MBPI, bool Minimal) : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI), Minimal(Minimal) { HII = MF.getSubtarget().getInstrInfo(); HRI = MF.getSubtarget().getRegisterInfo(); - addMutation(llvm::make_unique()); - addMutation(llvm::make_unique()); - addMutation(llvm::make_unique()); + addMutation(std::make_unique()); + addMutation(std::make_unique()); + addMutation(std::make_unique()); } // Check if FirstI modifies a register that SecondI reads. @@ -148,7 +149,7 @@ static bool hasWriteToReadDep(const MachineInstr &FirstI, for (auto &MO : FirstI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); if (SecondI.readsRegister(R, TRI)) return true; } @@ -422,7 +423,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI, dbgs() << "Checking CUR against "; MJ.dump(); }); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); bool FoundMatch = false; for (auto &MO : MJ.operands()) if (MO.isReg() && MO.getReg() == DestReg) @@ -515,7 +516,7 @@ bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) { unsigned BPJ, OPJ; if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ)) return false; - unsigned Reg = MI.getOperand(BPI).getReg(); + Register Reg = MI.getOperand(BPI).getReg(); if (Reg != MJ.getOperand(BPJ).getReg()) return false; // Make sure that the dependences do not restrict adding MI to the packet. @@ -788,7 +789,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, return false; if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); if (R == DepReg || HRI->isSuperRegister(DepReg, R)) return false; } @@ -1208,7 +1209,7 @@ bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I, for (auto &MO : J.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.isDead()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); if (R != Hexagon::USR_OVF && DeadDefs[R]) return true; } @@ -1585,7 +1586,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // subset of the volatile register set. for (const MachineOperand &Op : I.operands()) { if (Op.isReg() && Op.isDef()) { - unsigned R = Op.getReg(); + Register R = Op.getReg(); if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI)) continue; } else if (!Op.isRegMask()) { @@ -1763,6 +1764,16 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator EndMI) { // Replace VLIWPacketizerList::endPacket(MBB, EndMI). + LLVM_DEBUG({ + if (!CurrentPacketMIs.empty()) { + dbgs() << "Finalizing packet:\n"; + unsigned Idx = 0; + for (MachineInstr *MI : CurrentPacketMIs) { + unsigned R = ResourceTracker->getUsedResources(Idx++); + dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI; + } + } + }); bool memShufDisabled = getmemShufDisabled(); if (memShufDisabled && !foundLSInPacket()) { diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index daa86b6f5393..943b9ac7ecc4 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -69,8 +69,7 @@ private: public: HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA, - const MachineBranchProbabilityInfo *MBPI, + AAResults *AA, const MachineBranchProbabilityInfo *MBPI, bool Minimal); // initPacketizerState - initialize some internal flags. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 7c0770926abe..75cb398d4097 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -201,9 +201,7 @@ public: bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) override { - MCFixupKind Kind = Fixup.getKind(); - - switch((unsigned)Kind) { + switch(Fixup.getTargetKind()) { default: llvm_unreachable("Unknown Fixup Kind!"); @@ -583,7 +581,7 @@ public: return false; // If we cannot resolve the fixup value, it requires relaxation. if (!Resolved) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case fixup_Hexagon_B22_PCREL: // GetFixupCount assumes B22 won't relax LLVM_FALLTHROUGH; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index f678bf49322e..cdbeae38b3a1 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -44,7 +44,7 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, MCFixup const &Fixup, bool IsPCRel) const { MCSymbolRefExpr::VariantKind Variant = Target.getAccessVariant(); - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: report_fatal_error("Unrecognized relocation type"); break; @@ -299,5 +299,5 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createHexagonELFObjectWriter(uint8_t OSABI, StringRef CPU) { - return llvm::make_unique(OSABI, CPU); + return std::make_unique(OSABI, CPU); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index fcd3758600c1..8b262bd0248e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -726,9 +726,6 @@ void HexagonMCChecker::reportNote(SMLoc Loc, llvm::Twine const &Msg) { } void HexagonMCChecker::reportWarning(Twine const &Msg) { - if (ReportErrors) { - auto SM = Context.getSourceManager(); - if (SM) - SM->PrintMessage(MCB.getLoc(), SourceMgr::DK_Warning, Msg); - } + if (ReportErrors) + Context.reportWarning(MCB.getLoc(), Msg); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index f2432883af6f..a799f7f7c0b9 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -116,8 +116,8 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, } // Update the maximum alignment of the section if necessary. - if (ByteAlignment > Section.getAlignment()) - Section.setAlignment(ByteAlignment); + if (Align(ByteAlignment) > Section.getAlignment()) + Section.setAlignment(Align(ByteAlignment)); SwitchSection(P.first, P.second); } else { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 9c50b25156c3..870ab9e94a63 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -72,7 +72,6 @@ cl::opt MV65("mv65", cl::Hidden, cl::desc("Build for Hexagon V65"), cl::init(false)); cl::opt MV66("mv66", cl::Hidden, cl::desc("Build for Hexagon V66"), cl::init(false)); -} // namespace cl::opt EnableHVX("mhvx", @@ -86,6 +85,7 @@ cl::opt clEnumValN(Hexagon::ArchEnum::Generic, "", "")), // Sentinel for flag not present. cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional); +} // namespace static cl::opt DisableHVX("mno-hvx", cl::Hidden, @@ -264,14 +264,12 @@ createHexagonObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { } static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) { - uint64_t FB = STI->getFeatureBits().to_ullong(); - if (FB & (1ULL << F)) + if (STI->getFeatureBits()[F]) STI->ToggleFeature(F); } static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) { - uint64_t FB = STI->getFeatureBits().to_ullong(); - return (FB & (1ULL << F)) != 0; + return STI->getFeatureBits()[F]; } namespace { @@ -398,7 +396,7 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT, MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS); if (HexagonDisableDuplex) { llvm::FeatureBitset Features = X->getFeatureBits(); - X->setFeatureBits(Features.set(Hexagon::FeatureDuplex, false)); + X->setFeatureBits(Features.reset(Hexagon::FeatureDuplex)); } X->setFeatureBits(completeHVXFeatures(X->getFeatureBits())); diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp index 7702024f87bd..a9d39fd4b2dc 100644 --- a/lib/Target/Hexagon/RDFCopy.cpp +++ b/lib/Target/Hexagon/RDFCopy.cpp @@ -45,8 +45,8 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { const MachineOperand &Src = MI->getOperand(1); RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg()); RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg()); - assert(TargetRegisterInfo::isPhysicalRegister(DstR.Reg)); - assert(TargetRegisterInfo::isPhysicalRegister(SrcR.Reg)); + assert(Register::isPhysicalRegister(DstR.Reg)); + assert(Register::isPhysicalRegister(SrcR.Reg)); const TargetRegisterInfo &TRI = DFG.getTRI(); if (TRI.getMinimalPhysRegClass(DstR.Reg) != TRI.getMinimalPhysRegClass(SrcR.Reg)) diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp index 52178931aa6d..af86c7b1956b 100644 --- a/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/lib/Target/Hexagon/RDFDeadCode.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" #include diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 9d8f706b8a0f..0cb35dc98819 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -633,7 +633,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) // uses or defs, and those lists do not allow sub-registers. if (Op.getSubReg() != 0) return false; - RegisterId Reg = Op.getReg(); + Register Reg = Op.getReg(); const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs() : D.getImplicitUses(); if (!ImpR) @@ -963,7 +963,7 @@ void DataFlowGraph::build(unsigned Options) { RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const { assert(PhysicalRegisterInfo::isRegMaskId(Reg) || - TargetRegisterInfo::isPhysicalRegister(Reg)); + Register::isPhysicalRegister(Reg)); assert(Reg != 0); if (Sub != 0) Reg = TRI.getSubReg(Reg, Sub); @@ -1291,8 +1291,8 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) continue; - unsigned R = Op.getReg(); - if (!R || !TargetRegisterInfo::isPhysicalRegister(R)) + Register R = Op.getReg(); + if (!R || !Register::isPhysicalRegister(R)) continue; uint16_t Flags = NodeAttrs::None; if (TOI.isPreserving(In, OpN)) { @@ -1336,8 +1336,8 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isDef() || !Op.isImplicit()) continue; - unsigned R = Op.getReg(); - if (!R || !TargetRegisterInfo::isPhysicalRegister(R) || DoneDefs.test(R)) + Register R = Op.getReg(); + if (!R || !Register::isPhysicalRegister(R) || DoneDefs.test(R)) continue; RegisterRef RR = makeRegRef(Op); uint16_t Flags = NodeAttrs::None; @@ -1365,8 +1365,8 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isUse()) continue; - unsigned R = Op.getReg(); - if (!R || !TargetRegisterInfo::isPhysicalRegister(R)) + Register R = Op.getReg(); + if (!R || !Register::isPhysicalRegister(R)) continue; uint16_t Flags = NodeAttrs::None; if (Op.isUndef()) diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp index 9cd304aa10bc..7d7b89462ff9 100644 --- a/lib/Target/Hexagon/RDFLiveness.cpp +++ b/lib/Target/Hexagon/RDFLiveness.cpp @@ -889,8 +889,8 @@ void Liveness::resetKills(MachineBasicBlock *B) { // implicit defs. if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(R)) + Register R = Op.getReg(); + if (!Register::isPhysicalRegister(R)) continue; for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) Live.reset(*SR); @@ -898,8 +898,8 @@ void Liveness::resetKills(MachineBasicBlock *B) { for (auto &Op : MI->operands()) { if (!Op.isReg() || !Op.isUse() || Op.isUndef()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(R)) + Register R = Op.getReg(); + if (!Register::isPhysicalRegister(R)) continue; bool IsLive = false; for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) { diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp index 6e0f33695f0e..b5675784e34b 100644 --- a/lib/Target/Hexagon/RDFRegisters.cpp +++ b/lib/Target/Hexagon/RDFRegisters.cpp @@ -101,7 +101,7 @@ RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const { std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { // Do not include RR in the alias set. std::set AS; - assert(isRegMaskId(Reg) || TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(isRegMaskId(Reg) || Register::isPhysicalRegister(Reg)); if (isRegMaskId(Reg)) { // XXX SLOW const uint32_t *MB = getRegMaskBits(Reg); @@ -129,8 +129,8 @@ std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { } bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const { - assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg)); - assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg)); + assert(Register::isPhysicalRegister(RA.Reg)); + assert(Register::isPhysicalRegister(RB.Reg)); MCRegUnitMaskIterator UMA(RA.Reg, &TRI); MCRegUnitMaskIterator UMB(RB.Reg, &TRI); @@ -160,7 +160,7 @@ bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const { } bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const { - assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg)); + assert(Register::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg)); const uint32_t *MB = getRegMaskBits(RM.Reg); bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32)); // If the lane mask information is "full", e.g. when the given lane mask diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h index 646233bacda5..4afaf80e4659 100644 --- a/lib/Target/Hexagon/RDFRegisters.h +++ b/lib/Target/Hexagon/RDFRegisters.h @@ -99,15 +99,15 @@ namespace rdf { const MachineFunction &mf); static bool isRegMaskId(RegisterId R) { - return TargetRegisterInfo::isStackSlot(R); + return Register::isStackSlot(R); } RegisterId getRegMaskId(const uint32_t *RM) const { - return TargetRegisterInfo::index2StackSlot(RegMasks.find(RM)); + return Register::index2StackSlot(RegMasks.find(RM)); } const uint32_t *getRegMaskBits(RegisterId R) const { - return RegMasks.get(TargetRegisterInfo::stackSlot2Index(R)); + return RegMasks.get(Register::stackSlot2Index(R)); } RegisterRef normalize(RegisterRef RR) const; @@ -125,7 +125,7 @@ namespace rdf { } const BitVector &getMaskUnits(RegisterId MaskId) const { - return MaskInfos[TargetRegisterInfo::stackSlot2Index(MaskId)].Units; + return MaskInfos[Register::stackSlot2Index(MaskId)].Units; } RegisterRef mapTo(RegisterRef RR, unsigned R) const; diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 9af8a0b35b2f..ec82e3a41f2a 100644 --- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -469,13 +469,14 @@ public: else if (isa(getImm())) { #ifndef NDEBUG const LanaiMCExpr *SymbolRefExpr = dyn_cast(getImm()); - assert(SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_LO); + assert(SymbolRefExpr && + SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_LO); #endif Inst.addOperand(MCOperand::createExpr(getImm())); } else if (isa(getImm())) { #ifndef NDEBUG const MCBinaryExpr *BinaryExpr = dyn_cast(getImm()); - assert(isa(BinaryExpr->getLHS()) && + assert(BinaryExpr && isa(BinaryExpr->getLHS()) && cast(BinaryExpr->getLHS())->getKind() == LanaiMCExpr::VK_Lanai_ABS_LO); #endif @@ -499,13 +500,14 @@ public: else if (isa(getImm())) { #ifndef NDEBUG const LanaiMCExpr *SymbolRefExpr = dyn_cast(getImm()); - assert(SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_HI); + assert(SymbolRefExpr && + SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_HI); #endif Inst.addOperand(MCOperand::createExpr(getImm())); } else if (isa(getImm())) { #ifndef NDEBUG const MCBinaryExpr *BinaryExpr = dyn_cast(getImm()); - assert(isa(BinaryExpr->getLHS()) && + assert(BinaryExpr && isa(BinaryExpr->getLHS()) && cast(BinaryExpr->getLHS())->getKind() == LanaiMCExpr::VK_Lanai_ABS_HI); #endif @@ -544,10 +546,9 @@ public: } else if (isa(getImm())) { #ifndef NDEBUG const MCBinaryExpr *BinaryExpr = dyn_cast(getImm()); - const LanaiMCExpr *SymbolRefExpr = - dyn_cast(BinaryExpr->getLHS()); - assert(SymbolRefExpr && - SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_None); + assert(BinaryExpr && isa(BinaryExpr->getLHS()) && + cast(BinaryExpr->getLHS())->getKind() == + LanaiMCExpr::VK_Lanai_None); #endif Inst.addOperand(MCOperand::createExpr(getImm())); } else @@ -580,7 +581,7 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc Start) { - auto Op = make_unique(TOKEN); + auto Op = std::make_unique(TOKEN); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = Start; @@ -590,7 +591,7 @@ public: static std::unique_ptr createReg(unsigned RegNum, SMLoc Start, SMLoc End) { - auto Op = make_unique(REGISTER); + auto Op = std::make_unique(REGISTER); Op->Reg.RegNum = RegNum; Op->StartLoc = Start; Op->EndLoc = End; @@ -599,7 +600,7 @@ public: static std::unique_ptr createImm(const MCExpr *Value, SMLoc Start, SMLoc End) { - auto Op = make_unique(IMMEDIATE); + auto Op = std::make_unique(IMMEDIATE); Op->Imm.Value = Value; Op->StartLoc = Start; Op->EndLoc = End; diff --git a/lib/Target/Lanai/LanaiAsmPrinter.cpp b/lib/Target/Lanai/LanaiAsmPrinter.cpp index 64d963475e1a..12a3202446a8 100644 --- a/lib/Target/Lanai/LanaiAsmPrinter.cpp +++ b/lib/Target/Lanai/LanaiAsmPrinter.cpp @@ -133,7 +133,7 @@ bool LanaiAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(RegOp); if (!MO.isReg()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); O << LanaiInstPrinter::getRegisterName(Reg); return false; } diff --git a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp index 09c63dca23e2..b9e577d201f9 100644 --- a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp +++ b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// Simple pass to fills delay slots with useful instructions. +// Simple pass to fill delay slots with useful instructions. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/Lanai/LanaiFrameLowering.cpp b/lib/Target/Lanai/LanaiFrameLowering.cpp index 142c09c504cc..eddc2b8e61f7 100644 --- a/lib/Target/Lanai/LanaiFrameLowering.cpp +++ b/lib/Target/Lanai/LanaiFrameLowering.cpp @@ -72,8 +72,8 @@ void LanaiFrameLowering::replaceAdjDynAllocPseudo(MachineFunction &MF) const { MachineInstr &MI = *MBBI++; if (MI.getOpcode() == Lanai::ADJDYNALLOC) { DebugLoc DL = MI.getDebugLoc(); - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); BuildMI(*MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst) .addReg(Src) diff --git a/lib/Target/Lanai/LanaiFrameLowering.h b/lib/Target/Lanai/LanaiFrameLowering.h index 5fe4535543ec..380d63df7301 100644 --- a/lib/Target/Lanai/LanaiFrameLowering.h +++ b/lib/Target/Lanai/LanaiFrameLowering.h @@ -31,7 +31,7 @@ protected: public: explicit LanaiFrameLowering(const LanaiSubtarget &Subtarget) : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/8, + /*StackAlignment=*/Align(8), /*LocalAreaOffset=*/0), STI(Subtarget) {} diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp index 1ed078bb433f..43933d062a7e 100644 --- a/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/lib/Target/Lanai/LanaiISelLowering.cpp @@ -144,9 +144,9 @@ LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); - // Function alignments (log2) - setMinFunctionAlignment(2); - setPrefFunctionAlignment(2); + // Function alignments + setMinFunctionAlignment(Align(4)); + setPrefFunctionAlignment(Align(4)); setJumpIsExpensive(true); @@ -212,10 +212,11 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op, // Lanai Inline Assembly Support //===----------------------------------------------------------------------===// -unsigned LanaiTargetLowering::getRegisterByName(const char *RegName, EVT /*VT*/, - SelectionDAG & /*DAG*/) const { +Register LanaiTargetLowering::getRegisterByName( + const char *RegName, EVT /*VT*/, + const MachineFunction & /*MF*/) const { // Only unallocatable registers should be matched here. - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("pc", Lanai::PC) .Case("sp", Lanai::SP) .Case("fp", Lanai::FP) @@ -459,7 +460,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments( EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { case MVT::i32: { - unsigned VReg = RegInfo.createVirtualRegister(&Lanai::GPRRegClass); + Register VReg = RegInfo.createVirtualRegister(&Lanai::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); diff --git a/lib/Target/Lanai/LanaiISelLowering.h b/lib/Target/Lanai/LanaiISelLowering.h index e7b5755e9041..4c35a2c6fb8e 100644 --- a/lib/Target/Lanai/LanaiISelLowering.h +++ b/lib/Target/Lanai/LanaiISelLowering.h @@ -90,8 +90,8 @@ public: SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - unsigned getRegisterByName(const char *RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char *RegName, EVT VT, + const MachineFunction &MF) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/lib/Target/Lanai/LanaiInstrInfo.cpp b/lib/Target/Lanai/LanaiInstrInfo.cpp index 700a86069102..b950fd0424ef 100644 --- a/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -86,8 +86,7 @@ void LanaiInstrInfo::loadRegFromStackSlot( } bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, - AliasAnalysis * /*AA*/) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); @@ -457,7 +456,7 @@ bool LanaiInstrInfo::analyzeSelect(const MachineInstr &MI, // return the defining instruction. static MachineInstr *canFoldIntoSelect(unsigned Reg, const MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) return nullptr; @@ -479,7 +478,7 @@ static MachineInstr *canFoldIntoSelect(unsigned Reg, // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) return nullptr; - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) return nullptr; if (MO.isDef() && !MO.isDead()) return nullptr; @@ -505,7 +504,7 @@ LanaiInstrInfo::optimizeSelect(MachineInstr &MI, // Find new register class to use. MachineOperand FalseReg = MI.getOperand(Invert ? 1 : 2); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) return nullptr; diff --git a/lib/Target/Lanai/LanaiInstrInfo.h b/lib/Target/Lanai/LanaiInstrInfo.h index d71424aeb0b1..59a04d2cc388 100644 --- a/lib/Target/Lanai/LanaiInstrInfo.h +++ b/lib/Target/Lanai/LanaiInstrInfo.h @@ -36,8 +36,7 @@ public: } bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA) const override; + const MachineInstr &MIb) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp index d3056a1eba8e..7c28debb94dd 100644 --- a/lib/Target/Lanai/LanaiRegisterInfo.cpp +++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp @@ -155,7 +155,7 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!HasFP || (needsStackRealignment(MF) && FrameIndex >= 0)) Offset += MF.getFrameInfo().getStackSize(); - unsigned FrameReg = getFrameRegister(MF); + Register FrameReg = getFrameRegister(MF); if (FrameIndex >= 0) { if (hasBasePointer(MF)) FrameReg = getBaseRegister(); diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index 4313fa5a82b5..919d43ad9b9b 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -88,5 +88,5 @@ bool LanaiELFObjectWriter::needsRelocateWithSymbol(const MCSymbol & /*SD*/, std::unique_ptr llvm::createLanaiELFObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp index a0ec14ae2381..85dcc0f152f9 100644 --- a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp +++ b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp @@ -191,33 +191,33 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - return make_unique(Str, S); + return std::make_unique(Str, S); } static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - return make_unique(k_Reg, RegNum, S, E); + return std::make_unique(k_Reg, RegNum, S, E); } static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - return make_unique(Val, S, E); + return std::make_unique(Val, S, E); } static std::unique_ptr CreateMem(unsigned RegNum, const MCExpr *Val, SMLoc S, SMLoc E) { - return make_unique(RegNum, Val, S, E); + return std::make_unique(RegNum, Val, S, E); } static std::unique_ptr CreateIndReg(unsigned RegNum, SMLoc S, SMLoc E) { - return make_unique(k_IndReg, RegNum, S, E); + return std::make_unique(k_IndReg, RegNum, S, E); } static std::unique_ptr CreatePostIndReg(unsigned RegNum, SMLoc S, SMLoc E) { - return make_unique(k_PostIndReg, RegNum, S, E); + return std::make_unique(k_PostIndReg, RegNum, S, E); } SMLoc getStartLoc() const { return Start; } diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp index 38b7da32c246..0cdd1f4f701f 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp @@ -31,7 +31,7 @@ protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override { // Translate fixup kind to ELF relocation type. - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case FK_Data_1: return ELF::R_MSP430_8; case FK_Data_2: return ELF::R_MSP430_16_BYTE; case FK_Data_4: return ELF::R_MSP430_32; @@ -54,5 +54,5 @@ protected: std::unique_ptr llvm::createMSP430ELFObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index 3a71a084d1af..a3b91acdc6d0 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -159,8 +159,9 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) { MCSection *Cur = OutStreamer->getCurrentSectionOnly(); const auto *F = &ISR.getFunction(); - assert(F->hasFnAttribute("interrupt") && - "Functions with MSP430_INTR CC should have 'interrupt' attribute"); + if (F->getCallingConv() != CallingConv::MSP430_INTR) { + report_fatal_error("Functions with 'interrupt' attribute must have msp430_intrcc CC"); + } StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString(); MCSection *IV = OutStreamer->getContext().getELFSection( "__interrupt_vector_" + IVIdx, @@ -174,8 +175,9 @@ void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) { bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit separate section for an interrupt vector if ISR - if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR) + if (MF.getFunction().hasFnAttribute("interrupt")) { EmitInterruptVectorSection(MF); + } SetupMachineFunction(MF); EmitFunctionBody(); diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index 45e7c26e4d30..ce5affdc25b0 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index 33ce3c70a2a3..70e284053021 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -22,7 +22,8 @@ protected: public: explicit MSP430FrameLowering() - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(2), -2, + Align(2)) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index fedfb857bd0f..64169d1f5eb1 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -327,8 +327,8 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::MSP430_BUILTIN); // TODO: __mspabi_srall, __mspabi_srlll, __mspabi_sllll - setMinFunctionAlignment(1); - setPrefFunctionAlignment(1); + setMinFunctionAlignment(Align(2)); + setPrefFunctionAlignment(Align(2)); } SDValue MSP430TargetLowering::LowerOperation(SDValue Op, @@ -353,6 +353,9 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, } } +unsigned MSP430TargetLowering::getShiftAmountThreshold(EVT VT) const { + return 2; +} //===----------------------------------------------------------------------===// // MSP430 Inline Assembly Support //===----------------------------------------------------------------------===// @@ -632,7 +635,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments( llvm_unreachable(nullptr); } case MVT::i16: - unsigned VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass); + Register VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); @@ -1446,8 +1449,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI, case MSP430::Rrcl16: { BuildMI(*BB, MI, dl, TII.get(MSP430::BIC16rc), MSP430::SR) .addReg(MSP430::SR).addImm(1); - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); unsigned RrcOpc = MI.getOpcode() == MSP430::Rrcl16 ? MSP430::RRC16r : MSP430::RRC8r; BuildMI(*BB, MI, dl, TII.get(RrcOpc), DstReg) @@ -1479,13 +1482,13 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI, LoopBB->addSuccessor(RemBB); LoopBB->addSuccessor(LoopBB); - unsigned ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass); - unsigned ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass); - unsigned ShiftReg = RI.createVirtualRegister(RC); - unsigned ShiftReg2 = RI.createVirtualRegister(RC); - unsigned ShiftAmtSrcReg = MI.getOperand(2).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass); + Register ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass); + Register ShiftReg = RI.createVirtualRegister(RC); + Register ShiftReg2 = RI.createVirtualRegister(RC); + Register ShiftAmtSrcReg = MI.getOperand(2).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); // BB: // cmp 0, N diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index ee6b6316d7a9..9224e5e3d005 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -124,6 +124,8 @@ namespace llvm { bool isZExtFree(EVT VT1, EVT VT2) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; + unsigned getShiftAmountThreshold(EVT VT) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index afbb2f213b45..bec357a1548d 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -139,7 +139,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; // We need to materialize the offset via add instruction. - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (Offset < 0) BuildMI(MBB, std::next(II), dl, TII.get(MSP430::SUB16ri), DstReg) .addReg(DstReg).addImm(-Offset); diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 8c4ca982c966..e9aeba76de85 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -46,7 +46,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 1f7d095bf49b..21d0df74d458 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -39,6 +39,7 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -233,9 +234,14 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); - bool expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, bool Is64FPU, - SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI); + bool expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU, SMLoc IDLoc, + MCStreamer &Out, const MCSubtargetInfo *STI); bool expandLoadAddress(unsigned DstReg, unsigned BaseReg, const MCOperand &Offset, bool Is32BitAddress, @@ -512,11 +518,11 @@ public: // Remember the initial assembler options. The user can not modify these. AssemblerOptions.push_back( - llvm::make_unique(getSTI().getFeatureBits())); + std::make_unique(getSTI().getFeatureBits())); // Create an assembler options environment for the user to modify. AssemblerOptions.push_back( - llvm::make_unique(getSTI().getFeatureBits())); + std::make_unique(getSTI().getFeatureBits())); getTargetStreamer().updateABIInfo(*this); @@ -844,7 +850,7 @@ private: const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E, MipsAsmParser &Parser) { - auto Op = llvm::make_unique(k_RegisterIndex, Parser); + auto Op = std::make_unique(k_RegisterIndex, Parser); Op->RegIdx.Index = Index; Op->RegIdx.RegInfo = RegInfo; Op->RegIdx.Kind = RegKind; @@ -1446,7 +1452,7 @@ public: static std::unique_ptr CreateToken(StringRef Str, SMLoc S, MipsAsmParser &Parser) { - auto Op = llvm::make_unique(k_Token, Parser); + auto Op = std::make_unique(k_Token, Parser); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -1521,7 +1527,7 @@ public: static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MipsAsmParser &Parser) { - auto Op = llvm::make_unique(k_Immediate, Parser); + auto Op = std::make_unique(k_Immediate, Parser); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -1531,7 +1537,7 @@ public: static std::unique_ptr CreateMem(std::unique_ptr Base, const MCExpr *Off, SMLoc S, SMLoc E, MipsAsmParser &Parser) { - auto Op = llvm::make_unique(k_Memory, Parser); + auto Op = std::make_unique(k_Memory, Parser); Op->Mem.Base = Base.release(); Op->Mem.Off = Off; Op->StartLoc = S; @@ -1544,7 +1550,7 @@ public: MipsAsmParser &Parser) { assert(Regs.size() > 0 && "Empty list not allowed"); - auto Op = llvm::make_unique(k_RegList, Parser); + auto Op = std::make_unique(k_RegList, Parser); Op->RegList.List = new SmallVector(Regs.begin(), Regs.end()); Op->StartLoc = StartLoc; Op->EndLoc = EndLoc; @@ -1804,8 +1810,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), - 1LL << (inMicroMipsMode() ? 1 : 2))) + if (offsetToAlignment(Offset.getImm(), + (inMicroMipsMode() ? Align(2) : Align(4)))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BGEZ: @@ -1834,8 +1840,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), - 1LL << (inMicroMipsMode() ? 1 : 2))) + if (offsetToAlignment(Offset.getImm(), + (inMicroMipsMode() ? Align(2) : Align(4)))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BGEC: case Mips::BGEC_MMR6: @@ -1850,7 +1856,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << 2)) + if (offsetToAlignment(Offset.getImm(), Align(4))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BLEZC: case Mips::BLEZC_MMR6: @@ -1863,7 +1869,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << 2)) + if (offsetToAlignment(Offset.getImm(), Align(4))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BEQZC: case Mips::BEQZC_MMR6: @@ -1874,7 +1880,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(23, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << 2)) + if (offsetToAlignment(Offset.getImm(), Align(4))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BEQZ16_MM: @@ -1887,7 +1893,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isInt<8>(Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 2LL)) + if (offsetToAlignment(Offset.getImm(), Align(2))) return Error(IDLoc, "branch to misaligned address"); break; } @@ -2454,25 +2460,21 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, : MER_Success; case Mips::LoadImmSingleGPR: - return expandLoadImmReal(Inst, true, true, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadSingleImmToGPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmSingleFGR: - return expandLoadImmReal(Inst, true, false, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadSingleImmToFPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleGPR: - return expandLoadImmReal(Inst, false, true, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToGPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleFGR: - return expandLoadImmReal(Inst, false, false, true, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToFPR(Inst, true, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleFGR_32: - return expandLoadImmReal(Inst, false, false, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToFPR(Inst, false, IDLoc, Out, STI) ? MER_Fail + : MER_Success; + case Mips::Ulh: return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::Ulhu: @@ -2868,12 +2870,12 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, bool Is32BitSym, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { - // FIXME: These expansions do not respect -mxgot. MipsTargetStreamer &TOut = getTargetStreamer(); - bool UseSrcReg = SrcReg != Mips::NoRegister; + bool UseSrcReg = SrcReg != Mips::NoRegister && SrcReg != Mips::ZERO && + SrcReg != Mips::ZERO_64; warnIfNoMacro(IDLoc); - if (inPicMode() && ABI.IsO32()) { + if (inPicMode()) { MCValue Res; if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { Error(IDLoc, "expected relocatable expression"); @@ -2884,46 +2886,41 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, return true; } + bool IsPtr64 = ABI.ArePtrs64bit(); + bool IsLocalSym = + Res.getSymA()->getSymbol().isInSection() || + Res.getSymA()->getSymbol().isTemporary() || + (Res.getSymA()->getSymbol().isELF() && + cast(Res.getSymA()->getSymbol()).getBinding() == + ELF::STB_LOCAL); + bool UseXGOT = STI->getFeatureBits()[Mips::FeatureXGOT] && !IsLocalSym; + // The case where the result register is $25 is somewhat special. If the // symbol in the final relocation is external and not modified with a - // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16. + // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16 + // or R_MIPS_CALL16 instead of R_MIPS_GOT_DISP in 64-bit case. if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg && - Res.getConstant() == 0 && - !(Res.getSymA()->getSymbol().isInSection() || - Res.getSymA()->getSymbol().isTemporary() || - (Res.getSymA()->getSymbol().isELF() && - cast(Res.getSymA()->getSymbol()).getBinding() == - ELF::STB_LOCAL))) { - const MCExpr *CallExpr = - MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); - TOut.emitRRX(Mips::LW, DstReg, GPReg, MCOperand::createExpr(CallExpr), - IDLoc, STI); + Res.getConstant() == 0 && !IsLocalSym) { + if (UseXGOT) { + const MCExpr *CallHiExpr = MipsMCExpr::create(MipsMCExpr::MEK_CALL_HI16, + SymExpr, getContext()); + const MCExpr *CallLoExpr = MipsMCExpr::create(MipsMCExpr::MEK_CALL_LO16, + SymExpr, getContext()); + TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(CallHiExpr), IDLoc, + STI); + TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, DstReg, GPReg, + IDLoc, STI); + TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, DstReg, DstReg, + MCOperand::createExpr(CallLoExpr), IDLoc, STI); + } else { + const MCExpr *CallExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); + TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, DstReg, GPReg, + MCOperand::createExpr(CallExpr), IDLoc, STI); + } return false; } - // The remaining cases are: - // External GOT: lw $tmp, %got(symbol+offset)($gp) - // >addiu $tmp, $tmp, %lo(offset) - // >addiu $rd, $tmp, $rs - // Local GOT: lw $tmp, %got(symbol+offset)($gp) - // addiu $tmp, $tmp, %lo(symbol+offset)($gp) - // >addiu $rd, $tmp, $rs - // The addiu's marked with a '>' may be omitted if they are redundant. If - // this happens then the last instruction must use $rd as the result - // register. - const MipsMCExpr *GotExpr = - MipsMCExpr::create(MipsMCExpr::MEK_GOT, SymExpr, getContext()); - const MCExpr *LoExpr = nullptr; - if (Res.getSymA()->getSymbol().isInSection() || - Res.getSymA()->getSymbol().isTemporary()) - LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, SymExpr, getContext()); - else if (Res.getConstant() != 0) { - // External symbols fully resolve the symbol with just the %got(symbol) - // but we must still account for any offset to the symbol for expressions - // like symbol+8. - LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); - } - unsigned TmpReg = DstReg; if (UseSrcReg && getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, @@ -2936,94 +2933,102 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, TmpReg = ATReg; } - TOut.emitRRX(Mips::LW, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc, - STI); - - if (LoExpr) - TOut.emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), + if (UseXGOT) { + // Loading address from XGOT + // External GOT: lui $tmp, %got_hi(symbol)($gp) + // addu $tmp, $tmp, $gp + // lw $tmp, %got_lo(symbol)($tmp) + // >addiu $tmp, $tmp, offset + // >addiu $rd, $tmp, $rs + // The addiu's marked with a '>' may be omitted if they are redundant. If + // this happens then the last instruction must use $rd as the result + // register. + const MCExpr *CallHiExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT_HI16, SymExpr, getContext()); + const MCExpr *CallLoExpr = MipsMCExpr::create( + MipsMCExpr::MEK_GOT_LO16, Res.getSymA(), getContext()); + + TOut.emitRX(Mips::LUi, TmpReg, MCOperand::createExpr(CallHiExpr), IDLoc, + STI); + TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg, GPReg, IDLoc, STI); + TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, TmpReg, TmpReg, + MCOperand::createExpr(CallLoExpr), IDLoc, STI); - if (UseSrcReg) - TOut.emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, STI); - - return false; - } - - if (inPicMode() && ABI.ArePtrs64bit()) { - MCValue Res; - if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { - Error(IDLoc, "expected relocatable expression"); - return true; - } - if (Res.getSymB() != nullptr) { - Error(IDLoc, "expected relocatable expression with only one symbol"); - return true; - } + if (Res.getConstant() != 0) + TOut.emitRRX(IsPtr64 ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg, + MCOperand::createExpr(MCConstantExpr::create( + Res.getConstant(), getContext())), + IDLoc, STI); - // The case where the result register is $25 is somewhat special. If the - // symbol in the final relocation is external and not modified with a - // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT_DISP. - if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg && - Res.getConstant() == 0 && - !(Res.getSymA()->getSymbol().isInSection() || - Res.getSymA()->getSymbol().isTemporary() || - (Res.getSymA()->getSymbol().isELF() && - cast(Res.getSymA()->getSymbol()).getBinding() == - ELF::STB_LOCAL))) { - const MCExpr *CallExpr = - MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); - TOut.emitRRX(Mips::LD, DstReg, GPReg, MCOperand::createExpr(CallExpr), - IDLoc, STI); + if (UseSrcReg) + TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, TmpReg, SrcReg, + IDLoc, STI); return false; } - // The remaining cases are: - // Small offset: ld $tmp, %got_disp(symbol)($gp) - // >daddiu $tmp, $tmp, offset - // >daddu $rd, $tmp, $rs - // The daddiu's marked with a '>' may be omitted if they are redundant. If - // this happens then the last instruction must use $rd as the result - // register. - const MipsMCExpr *GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, - Res.getSymA(), - getContext()); + const MipsMCExpr *GotExpr = nullptr; const MCExpr *LoExpr = nullptr; - if (Res.getConstant() != 0) { - // Symbols fully resolve with just the %got_disp(symbol) but we - // must still account for any offset to the symbol for - // expressions like symbol+8. - LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); - - // FIXME: Offsets greater than 16 bits are not yet implemented. - // FIXME: The correct range is a 32-bit sign-extended number. - if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) { - Error(IDLoc, "macro instruction uses large offset, which is not " - "currently supported"); - return true; + if (IsPtr64) { + // The remaining cases are: + // Small offset: ld $tmp, %got_disp(symbol)($gp) + // >daddiu $tmp, $tmp, offset + // >daddu $rd, $tmp, $rs + // The daddiu's marked with a '>' may be omitted if they are redundant. If + // this happens then the last instruction must use $rd as the result + // register. + GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, Res.getSymA(), + getContext()); + if (Res.getConstant() != 0) { + // Symbols fully resolve with just the %got_disp(symbol) but we + // must still account for any offset to the symbol for + // expressions like symbol+8. + LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); + + // FIXME: Offsets greater than 16 bits are not yet implemented. + // FIXME: The correct range is a 32-bit sign-extended number. + if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) { + Error(IDLoc, "macro instruction uses large offset, which is not " + "currently supported"); + return true; + } + } + } else { + // The remaining cases are: + // External GOT: lw $tmp, %got(symbol)($gp) + // >addiu $tmp, $tmp, offset + // >addiu $rd, $tmp, $rs + // Local GOT: lw $tmp, %got(symbol+offset)($gp) + // addiu $tmp, $tmp, %lo(symbol+offset)($gp) + // >addiu $rd, $tmp, $rs + // The addiu's marked with a '>' may be omitted if they are redundant. If + // this happens then the last instruction must use $rd as the result + // register. + if (IsLocalSym) { + GotExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT, SymExpr, getContext()); + LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, SymExpr, getContext()); + } else { + // External symbols fully resolve the symbol with just the %got(symbol) + // but we must still account for any offset to the symbol for + // expressions like symbol+8. + GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT, Res.getSymA(), + getContext()); + if (Res.getConstant() != 0) + LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); } } - unsigned TmpReg = DstReg; - if (UseSrcReg && - getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, - SrcReg)) { - // If $rs is the same as $rd, we need to use AT. - // If it is not available we exit. - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) - return true; - TmpReg = ATReg; - } - - TOut.emitRRX(Mips::LD, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc, - STI); + TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, TmpReg, GPReg, + MCOperand::createExpr(GotExpr), IDLoc, STI); if (LoExpr) - TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), - IDLoc, STI); + TOut.emitRRX(IsPtr64 ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); if (UseSrcReg) - TOut.emitRRR(Mips::DADDu, DstReg, TmpReg, SrcReg, IDLoc, STI); + TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, TmpReg, SrcReg, + IDLoc, STI); return false; } @@ -3289,10 +3294,43 @@ bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, return false; } -bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, - bool Is64FPU, SMLoc IDLoc, - MCStreamer &Out, - const MCSubtargetInfo *STI) { +static uint64_t convertIntToDoubleImm(uint64_t ImmOp64) { + // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the + // exponent field), convert it to double (e.g. 1 to 1.0) + if ((Hi_32(ImmOp64) & 0x7ff00000) == 0) { + APFloat RealVal(APFloat::IEEEdouble(), ImmOp64); + ImmOp64 = RealVal.bitcastToAPInt().getZExtValue(); + } + return ImmOp64; +} + +static uint32_t covertDoubleImmToSingleImm(uint64_t ImmOp64) { + // Conversion of a double in an uint64_t to a float in a uint32_t, + // retaining the bit pattern of a float. + double DoubleImm = BitsToDouble(ImmOp64); + float TmpFloat = static_cast(DoubleImm); + return FloatToBits(TmpFloat); +} + +bool MipsAsmParser::expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); + + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + uint32_t ImmOp32 = covertDoubleImmToSingleImm(convertIntToDoubleImm(ImmOp64)); + + return loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, false, IDLoc, + Out, STI); +} + +bool MipsAsmParser::expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { MipsTargetStreamer &TOut = getTargetStreamer(); assert(Inst.getNumOperands() == 2 && "Invalid operand count"); assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && @@ -3301,166 +3339,184 @@ bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, unsigned FirstReg = Inst.getOperand(0).getReg(); uint64_t ImmOp64 = Inst.getOperand(1).getImm(); - uint32_t HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; - // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the - // exponent field), convert it to double (e.g. 1 to 1.0) - if ((HiImmOp64 & 0x7ff00000) == 0) { - APFloat RealVal(APFloat::IEEEdouble(), ImmOp64); - ImmOp64 = RealVal.bitcastToAPInt().getZExtValue(); + ImmOp64 = convertIntToDoubleImm(ImmOp64); + + uint32_t ImmOp32 = covertDoubleImmToSingleImm(ImmOp64); + + unsigned TmpReg = Mips::ZERO; + if (ImmOp32 != 0) { + TmpReg = getATReg(IDLoc); + if (!TmpReg) + return true; } - uint32_t LoImmOp64 = ImmOp64 & 0xffffffff; - HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; + if (Lo_32(ImmOp64) == 0) { + if (TmpReg != Mips::ZERO && loadImmediate(ImmOp32, TmpReg, Mips::NoRegister, + true, false, IDLoc, Out, STI)) + return true; + TOut.emitRR(Mips::MTC1, FirstReg, TmpReg, IDLoc, STI); + return false; + } - if (IsSingle) { - // Conversion of a double in an uint64_t to a float in a uint32_t, - // retaining the bit pattern of a float. - uint32_t ImmOp32; - double doubleImm = BitsToDouble(ImmOp64); - float tmp_float = static_cast(doubleImm); - ImmOp32 = FloatToBits(tmp_float); + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - if (IsGPR) { - if (loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc, - Out, STI)) - return true; - return false; - } else { - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) - return true; - if (LoImmOp64 == 0) { - if (loadImmediate(ImmOp32, ATReg, Mips::NoRegister, true, true, IDLoc, - Out, STI)) - return true; - TOut.emitRR(Mips::MTC1, FirstReg, ATReg, IDLoc, STI); - return false; - } + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); - MCSection *CS = getStreamer().getCurrentSectionOnly(); - // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections - // where appropriate. - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(ImmOp32, 4); + getStreamer().SwitchSection(CS); - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + if (emitPartialAddress(TOut, IDLoc, Sym)) + return true; + TOut.emitRRX(Mips::LWC1, FirstReg, TmpReg, MCOperand::createExpr(LoExpr), + IDLoc, STI); + return false; +} + +bool MipsAsmParser::expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); + + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + ImmOp64 = convertIntToDoubleImm(ImmOp64); - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(ImmOp32, 4); - getStreamer().SwitchSection(CS); + if (Lo_32(ImmOp64) == 0) { + if (isGP64bit()) { + if (loadImmediate(ImmOp64, FirstReg, Mips::NoRegister, false, false, + IDLoc, Out, STI)) + return true; + } else { + if (loadImmediate(Hi_32(ImmOp64), FirstReg, Mips::NoRegister, true, false, + IDLoc, Out, STI)) + return true; - if(emitPartialAddress(TOut, IDLoc, Sym)) + if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, false, + IDLoc, Out, STI)) return true; - TOut.emitRRX(Mips::LWC1, FirstReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); } return false; } - // if(!IsSingle) - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) + MCSection *CS = getStreamer().getCurrentSectionOnly(); + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitValueToAlignment(8); + getStreamer().EmitIntValue(ImmOp64, 8); + getStreamer().SwitchSection(CS); + + unsigned TmpReg = getATReg(IDLoc); + if (!TmpReg) return true; - if (IsGPR) { - if (LoImmOp64 == 0) { - if(isABI_N32() || isABI_N64()) { - if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, false, true, - IDLoc, Out, STI)) - return true; - return false; - } else { - if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true, - IDLoc, Out, STI)) - return true; + if (emitPartialAddress(TOut, IDLoc, Sym)) + return true; - if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true, - IDLoc, Out, STI)) - return true; - return false; - } - } + TOut.emitRRX(isABI_N64() ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); - MCSection *CS = getStreamer().getCurrentSectionOnly(); - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + if (isGP64bit()) + TOut.emitRRI(Mips::LD, FirstReg, TmpReg, 0, IDLoc, STI); + else { + TOut.emitRRI(Mips::LW, FirstReg, TmpReg, 0, IDLoc, STI); + TOut.emitRRI(Mips::LW, nextReg(FirstReg), TmpReg, 4, IDLoc, STI); + } + return false; +} - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); +bool MipsAsmParser::expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU, + SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(HiImmOp64, 4); - getStreamer().EmitIntValue(LoImmOp64, 4); - getStreamer().SwitchSection(CS); + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + ImmOp64 = convertIntToDoubleImm(ImmOp64); - if(emitPartialAddress(TOut, IDLoc, Sym)) + unsigned TmpReg = Mips::ZERO; + if (ImmOp64 != 0) { + TmpReg = getATReg(IDLoc); + if (!TmpReg) return true; - if(isABI_N64()) - TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); - else - TOut.emitRRX(Mips::ADDiu, ATReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); + } - if(isABI_N32() || isABI_N64()) - TOut.emitRRI(Mips::LD, FirstReg, ATReg, 0, IDLoc, STI); - else { - TOut.emitRRI(Mips::LW, FirstReg, ATReg, 0, IDLoc, STI); - TOut.emitRRI(Mips::LW, nextReg(FirstReg), ATReg, 4, IDLoc, STI); - } - return false; - } else { // if(!IsGPR && !IsSingle) - if ((LoImmOp64 == 0) && - !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) { - // FIXME: In the case where the constant is zero, we can load the - // register directly from the zero register. - if (loadImmediate(HiImmOp64, ATReg, Mips::NoRegister, true, true, IDLoc, + if ((Lo_32(ImmOp64) == 0) && + !((Hi_32(ImmOp64) & 0xffff0000) && (Hi_32(ImmOp64) & 0x0000ffff))) { + if (isGP64bit()) { + if (TmpReg != Mips::ZERO && + loadImmediate(ImmOp64, TmpReg, Mips::NoRegister, false, false, IDLoc, Out, STI)) return true; - if (isABI_N32() || isABI_N64()) - TOut.emitRR(Mips::DMTC1, FirstReg, ATReg, IDLoc, STI); - else if (hasMips32r2()) { - TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); - TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, ATReg, IDLoc, STI); - } else { - TOut.emitRR(Mips::MTC1, nextReg(FirstReg), ATReg, IDLoc, STI); - TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); - } + TOut.emitRR(Mips::DMTC1, FirstReg, TmpReg, IDLoc, STI); return false; } - MCSection *CS = getStreamer().getCurrentSectionOnly(); - // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections - // where appropriate. - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + if (TmpReg != Mips::ZERO && + loadImmediate(Hi_32(ImmOp64), TmpReg, Mips::NoRegister, true, false, + IDLoc, Out, STI)) + return true; - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + if (hasMips32r2()) { + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, TmpReg, IDLoc, STI); + } else { + TOut.emitRR(Mips::MTC1, nextReg(FirstReg), TmpReg, IDLoc, STI); + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + } + return false; + } - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(HiImmOp64, 4); - getStreamer().EmitIntValue(LoImmOp64, 4); - getStreamer().SwitchSection(CS); + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitValueToAlignment(8); + getStreamer().EmitIntValue(ImmOp64, 8); + getStreamer().SwitchSection(CS); + + if (emitPartialAddress(TOut, IDLoc, Sym)) + return true; + + TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, TmpReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); - if(emitPartialAddress(TOut, IDLoc, Sym)) - return true; - TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); - } return false; } @@ -3489,7 +3545,7 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc, } else { if (!isInt<17>(Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << 1)) + if (offsetToAlignment(Offset.getImm(), Align(2))) return Error(IDLoc, "branch to misaligned address"); Inst.clear(); Inst.setOpcode(Mips::BEQ_MM); @@ -3581,7 +3637,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, assert(DstRegOp.isReg() && "expected register operand kind"); const MCOperand &BaseRegOp = Inst.getOperand(1); assert(BaseRegOp.isReg() && "expected register operand kind"); - const MCOperand &OffsetOp = Inst.getOperand(2); MipsTargetStreamer &TOut = getTargetStreamer(); unsigned DstReg = DstRegOp.getReg(); @@ -3603,6 +3658,26 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return; } + if (Inst.getNumOperands() > 3) { + const MCOperand &BaseRegOp = Inst.getOperand(2); + assert(BaseRegOp.isReg() && "expected register operand kind"); + const MCOperand &ExprOp = Inst.getOperand(3); + assert(ExprOp.isExpr() && "expected expression oprand kind"); + + unsigned BaseReg = BaseRegOp.getReg(); + const MCExpr *ExprOffset = ExprOp.getExpr(); + + MCOperand LoOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); + MCOperand HiOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); + TOut.emitSCWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand, + LoOperand, TmpReg, IDLoc, STI); + return; + } + + const MCOperand &OffsetOp = Inst.getOperand(2); + if (OffsetOp.isImm()) { int64_t LoOffset = OffsetOp.getImm() & 0xffff; int64_t HiOffset = OffsetOp.getImm() & ~0xffff; @@ -3625,21 +3700,54 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, LoOffset, IDLoc, STI); - } else { - assert(OffsetOp.isExpr() && "expected expression operand kind"); - const MCExpr *ExprOffset = OffsetOp.getExpr(); - MCOperand LoOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); - MCOperand HiOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); + return; + } + + if (OffsetOp.isExpr()) { + if (inPicMode()) { + // FIXME: + // c) Check that immediates of R_MIPS_GOT16/R_MIPS_LO16 relocations + // do not exceed 16-bit. + // d) Use R_MIPS_GOT_PAGE/R_MIPS_GOT_OFST relocations instead + // of R_MIPS_GOT_DISP in appropriate cases to reduce number + // of GOT entries. + MCValue Res; + if (!OffsetOp.getExpr()->evaluateAsRelocatable(Res, nullptr, nullptr)) { + Error(IDLoc, "expected relocatable expression"); + return; + } + if (Res.getSymB() != nullptr) { + Error(IDLoc, "expected relocatable expression with only one symbol"); + return; + } - if (IsLoad) - TOut.emitLoadWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand, - LoOperand, TmpReg, IDLoc, STI); - else - TOut.emitStoreWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand, - LoOperand, TmpReg, IDLoc, STI); + loadAndAddSymbolAddress(Res.getSymA(), TmpReg, BaseReg, + !ABI.ArePtrs64bit(), IDLoc, Out, STI); + TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, Res.getConstant(), IDLoc, + STI); + } else { + // FIXME: Implement 64-bit case. + // 1) lw $8, sym => lui $8, %hi(sym) + // lw $8, %lo(sym)($8) + // 2) sw $8, sym => lui $at, %hi(sym) + // sw $8, %lo(sym)($at) + const MCExpr *ExprOffset = OffsetOp.getExpr(); + MCOperand LoOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); + MCOperand HiOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); + + // Generate the base address in TmpReg. + TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); + if (BaseReg != Mips::ZERO) + TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); + // Emit the load or store with the adjusted base and offset. + TOut.emitRRX(Inst.getOpcode(), DstReg, TmpReg, LoOperand, IDLoc, STI); + } + return; } + + llvm_unreachable("unexpected operand type"); } bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, @@ -6976,7 +7084,7 @@ bool MipsAsmParser::parseSetPushDirective() { // Create a copy of the current assembler options environment and push it. AssemblerOptions.push_back( - llvm::make_unique(AssemblerOptions.back().get())); + std::make_unique(AssemblerOptions.back().get())); getTargetStreamer().emitDirectiveSetPush(); return false; diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index ef13507fe63a..c3e98fe410c1 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -267,6 +267,13 @@ static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, uint64_t Address, const void *Decoder); +// DecodeJumpTargetXMM - Decode microMIPS jump and link exchange target, +// which is shifted left by 2 bit. +static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -2291,6 +2298,15 @@ static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2; + Inst.addOperand(MCOperand::createImm(JumpOffset)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, unsigned Value, uint64_t Address, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 859f9cbbca07..70f2a7bdf10f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -304,7 +304,6 @@ Optional MipsAsmBackend::getFixupKind(StringRef Name) const { return StringSwitch>(Name) .Case("R_MIPS_NONE", FK_NONE) .Case("R_MIPS_32", FK_Data_4) - .Case("R_MIPS_GOT_PAGE", (MCFixupKind)Mips::fixup_Mips_GOT_PAGE) .Case("R_MIPS_CALL_HI16", (MCFixupKind)Mips::fixup_Mips_CALL_HI16) .Case("R_MIPS_CALL_LO16", (MCFixupKind)Mips::fixup_Mips_CALL_LO16) .Case("R_MIPS_CALL16", (MCFixupKind)Mips::fixup_Mips_CALL16) diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 4d7e36995ae4..cca75dfc45c2 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -66,9 +66,9 @@ public: /// fixupNeedsRelaxation - Target specific predicate for whether a given /// fixup requires the associated instruction to be relaxed. - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override { + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { // FIXME. llvm_unreachable("RelaxInstruction() unimplemented"); return false; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index cf7bae98a27f..cc3168790b98 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -219,7 +219,7 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx, const MCFixup &Fixup, bool IsPCRel) const { // Determine the type of the relocation. - unsigned Kind = (unsigned)Fixup.getKind(); + unsigned Kind = Fixup.getTargetKind(); switch (Kind) { case FK_NONE: @@ -690,6 +690,6 @@ llvm::createMipsELFObjectWriter(const Triple &TT, bool IsN32) { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); bool IsN64 = TT.isArch64Bit() && !IsN32; bool HasRelocationAddend = TT.isArch64Bit(); - return llvm::make_unique(OSABI, HasRelocationAddend, + return std::make_unique(OSABI, HasRelocationAddend, IsN64); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 759a7fdb32b8..142e9cebb79e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -485,8 +485,11 @@ getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo, assert(MO.isExpr() && "getJumpOffset16OpValue expects only expressions or an immediate"); - // TODO: Push fixup. - return 0; + const MCExpr *Expr = MO.getExpr(); + Mips::Fixups FixupKind = + isMicroMips(STI) ? Mips::fixup_MICROMIPS_LO16 : Mips::fixup_Mips_LO16; + Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind))); + return 0; } /// getJumpTargetOpValue - Return binary encoding of the jump diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index ad5aff6552f6..a84ca8ccfb2d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -10,11 +10,12 @@ #define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCNACL_H #include "llvm/MC/MCELFStreamer.h" +#include "llvm/Support/Alignment.h" namespace llvm { -// Log2 of the NaCl MIPS sandbox's instruction bundle size. -static const unsigned MIPS_NACL_BUNDLE_ALIGN = 4u; +// NaCl MIPS sandbox's instruction bundle size. +static const Align MIPS_NACL_BUNDLE_ALIGN = Align(16); bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx, bool *IsStore = nullptr); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index ddeec03ba784..79c47d1b6508 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -143,12 +143,15 @@ public: return false; switch (Info->get(Inst.getOpcode()).OpInfo[NumOps - 1].OperandType) { case MCOI::OPERAND_UNKNOWN: - case MCOI::OPERAND_IMMEDIATE: - // jal, bal ... - Target = Inst.getOperand(NumOps - 1).getImm(); + case MCOI::OPERAND_IMMEDIATE: { + // j, jal, jalx, jals + // Absolute branch within the current 256 MB-aligned region + uint64_t Region = Addr & ~uint64_t(0xfffffff); + Target = Region + Inst.getOperand(NumOps - 1).getImm(); return true; + } case MCOI::OPERAND_PCREL: - // b, j, beq ... + // b, beq ... Target = Addr + Inst.getOperand(NumOps - 1).getImm(); return true; default: diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index c050db8a17fd..2d53750ad0ee 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -270,7 +270,7 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, S->getAssembler().setRelaxAll(true); // Set bundle-alignment as required by the NaCl ABI for the target. - S->EmitBundleAlignMode(MIPS_NACL_BUNDLE_ALIGN); + S->EmitBundleAlignMode(Log2(MIPS_NACL_BUNDLE_ALIGN)); return S; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index b4ebb9d18b72..3ff9c722484b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -37,7 +37,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { Context.getELFSection(".MIPS.options", ELF::SHT_MIPS_OPTIONS, ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, 1, ""); MCA.registerSection(*Sec); - Sec->setAlignment(8); + Sec->setAlignment(Align(8)); Streamer->SwitchSection(Sec); Streamer->EmitIntValue(ELF::ODK_REGINFO, 1); // kind @@ -55,7 +55,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { MCSectionELF *Sec = Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO, ELF::SHF_ALLOC, 24, ""); MCA.registerSection(*Sec); - Sec->setAlignment(MTS->getABI().IsN32() ? 8 : 4); + Sec->setAlignment(MTS->getABI().IsN32() ? Align(8) : Align(4)); Streamer->SwitchSection(Sec); Streamer->EmitIntValue(ri_gprmask, 4); diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index e3bdb3b140a8..b6dae9f6dea8 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -34,6 +34,15 @@ static cl::opt RoundSectionSizes( cl::desc("Round section sizes up to the section alignment"), cl::Hidden); } // end anonymous namespace +static bool isMipsR6(const MCSubtargetInfo *STI) { + return STI->getFeatureBits()[Mips::FeatureMips32r6] || + STI->getFeatureBits()[Mips::FeatureMips64r6]; +} + +static bool isMicroMips(const MCSubtargetInfo *STI) { + return STI->getFeatureBits()[Mips::FeatureMicroMips]; +} + MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) : MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) { GPRInfoSet = FPRInfoSet = FrameInfoSet = false; @@ -216,6 +225,19 @@ void MipsTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI); } +void MipsTargetStreamer::emitRRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, + unsigned Reg2, MCOperand Op3, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(MCOperand::createReg(Reg2)); + TmpInst.addOperand(Op3); + TmpInst.setLoc(IDLoc); + getStreamer().EmitInstruction(TmpInst, *STI); +} + void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI) { @@ -264,8 +286,7 @@ void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc, } void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { - const FeatureBitset &Features = STI->getFeatureBits(); - if (Features[Mips::FeatureMicroMips]) + if (isMicroMips(STI)) emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI); else emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI); @@ -311,21 +332,34 @@ void MipsTargetStreamer::emitStoreWithImmOffset( emitRRI(Opcode, SrcReg, ATReg, LoOffset, IDLoc, STI); } -/// Emit a store instruction with an symbol offset. Symbols are assumed to be -/// out of range for a simm16 will be expanded to appropriate instructions. -void MipsTargetStreamer::emitStoreWithSymOffset( - unsigned Opcode, unsigned SrcReg, unsigned BaseReg, MCOperand &HiOperand, - MCOperand &LoOperand, unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI) { - // sw $8, sym => lui $at, %hi(sym) - // sw $8, %lo(sym)($at) +/// Emit a store instruction with an symbol offset. +void MipsTargetStreamer::emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, + unsigned BaseReg, + MCOperand &HiOperand, + MCOperand &LoOperand, + unsigned ATReg, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + // sc $8, sym => lui $at, %hi(sym) + // sc $8, %lo(sym)($at) // Generate the base address in ATReg. emitRX(Mips::LUi, ATReg, HiOperand, IDLoc, STI); - if (BaseReg != Mips::ZERO) - emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI); - // Emit the store with the adjusted base and offset. - emitRRX(Opcode, SrcReg, ATReg, LoOperand, IDLoc, STI); + if (!isMicroMips(STI) && isMipsR6(STI)) { + // For non-micromips r6 offset for 'sc' is not in the lower 16 bits so we + // put it in 'at'. + // sc $8, sym => lui $at, %hi(sym) + // addiu $at, $at, %lo(sym) + // sc $8, 0($at) + emitRRX(Mips::ADDiu, ATReg, ATReg, LoOperand, IDLoc, STI); + MCOperand Offset = MCOperand::createImm(0); + // Emit the store with the adjusted base and offset. + emitRRRX(Opcode, SrcReg, SrcReg, ATReg, Offset, IDLoc, STI); + } else { + if (BaseReg != Mips::ZERO) + emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI); + // Emit the store with the adjusted base and offset. + emitRRRX(Opcode, SrcReg, SrcReg, ATReg, LoOperand, IDLoc, STI); + } } /// Emit a load instruction with an immediate offset. DstReg and TmpReg are @@ -364,30 +398,6 @@ void MipsTargetStreamer::emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, emitRRI(Opcode, DstReg, TmpReg, LoOffset, IDLoc, STI); } -/// Emit a load instruction with an symbol offset. Symbols are assumed to be -/// out of range for a simm16 will be expanded to appropriate instructions. -/// DstReg and TmpReg are permitted to be the same register iff DstReg is a -/// GPR. It is the callers responsibility to identify such cases and pass the -/// appropriate register in TmpReg. -void MipsTargetStreamer::emitLoadWithSymOffset(unsigned Opcode, unsigned DstReg, - unsigned BaseReg, - MCOperand &HiOperand, - MCOperand &LoOperand, - unsigned TmpReg, SMLoc IDLoc, - const MCSubtargetInfo *STI) { - // 1) lw $8, sym => lui $8, %hi(sym) - // lw $8, %lo(sym)($8) - // 2) ldc1 $f0, sym => lui $at, %hi(sym) - // ldc1 $f0, %lo(sym)($at) - - // Generate the base address in TmpReg. - emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); - if (BaseReg != Mips::ZERO) - emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); - // Emit the load with the adjusted base and offset. - emitRRX(Opcode, DstReg, TmpReg, LoOperand, IDLoc, STI); -} - MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) : MipsTargetStreamer(S), OS(OS) {} @@ -891,9 +901,9 @@ void MipsTargetELFStreamer::finish() { MCSection &BSSSection = *OFI.getBSSSection(); MCA.registerSection(BSSSection); - TextSection.setAlignment(std::max(16u, TextSection.getAlignment())); - DataSection.setAlignment(std::max(16u, DataSection.getAlignment())); - BSSSection.setAlignment(std::max(16u, BSSSection.getAlignment())); + TextSection.setAlignment(Align(std::max(16u, TextSection.getAlignment()))); + DataSection.setAlignment(Align(std::max(16u, DataSection.getAlignment()))); + BSSSection.setAlignment(Align(std::max(16u, BSSSection.getAlignment()))); if (RoundSectionSizes) { // Make sections sizes a multiple of the alignment. This is useful for @@ -1016,7 +1026,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Context); MCA.registerSection(*Sec); - Sec->setAlignment(4); + Sec->setAlignment(Align(4)); OS.PushSection(); @@ -1306,7 +1316,7 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() { MCSectionELF *Sec = Context.getELFSection( ".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS, ELF::SHF_ALLOC, 24, ""); MCA.registerSection(*Sec); - Sec->setAlignment(8); + Sec->setAlignment(Align(8)); OS.SwitchSection(Sec); OS << ABIFlagsSection; diff --git a/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/lib/Target/Mips/MicroMipsDSPInstrInfo.td index 5a12568893af..9a1e47e5ecca 100644 --- a/lib/Target/Mips/MicroMipsDSPInstrInfo.td +++ b/lib/Target/Mips/MicroMipsDSPInstrInfo.td @@ -360,7 +360,7 @@ class RDDSP_MM_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins uimm7:$mask); string AsmString = !strconcat("rddsp", "\t$rt, $mask"); - list Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt7:$mask))]; + list Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp timmZExt7:$mask))]; InstrItinClass Itinerary = NoItinerary; } @@ -383,7 +383,7 @@ class WRDSP_MM_DESC { dag OutOperandList = (outs); dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask); string AsmString = !strconcat("wrdsp", "\t$rt, $mask"); - list Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)]; + list Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, timmZExt7:$mask)]; InstrItinClass Itinerary = NoItinerary; bit isMoveReg = 1; } diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 9b7f7b25fa94..8cc0029fc896 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -955,17 +955,18 @@ let DecoderNamespace = "MicroMips" in { EXT_FM_MM<0x0c>, ISA_MICROMIPS32_NOT_MIPS32R6; /// Jump Instructions - let DecoderMethod = "DecodeJumpTargetMM" in + let DecoderMethod = "DecodeJumpTargetMM" in { def J_MM : MMRel, JumpFJ, J_FM_MM<0x35>, AdditionalRequires<[RelocNotPIC]>, IsBranch, ISA_MICROMIPS32_NOT_MIPS32R6; - - let DecoderMethod = "DecodeJumpTargetMM" in { def JAL_MM : MMRel, JumpLink<"jal", calltarget_mm>, J_FM_MM<0x3d>, ISA_MICROMIPS32_NOT_MIPS32R6; + } + + let DecoderMethod = "DecodeJumpTargetXMM" in def JALX_MM : MMRel, JumpLink<"jalx", calltarget>, J_FM_MM<0x3c>, ISA_MICROMIPS32_NOT_MIPS32R6; - } + def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>, ISA_MICROMIPS32_NOT_MIPS32R6; def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>, diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp index 70af95592aa5..db93b3d80ede 100644 --- a/lib/Target/Mips/MicroMipsSizeReduction.cpp +++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -361,7 +361,7 @@ static bool CheckXWPInstr(MachineInstr *MI, bool ReduceToLwp, MI->getOpcode() == Mips::SW16_MM)) return false; - unsigned reg = MI->getOperand(0).getReg(); + Register reg = MI->getOperand(0).getReg(); if (reg == Mips::RA) return false; @@ -403,8 +403,8 @@ static bool ConsecutiveInstr(MachineInstr *MI1, MachineInstr *MI2) { if (!GetImm(MI2, 2, Offset2)) return false; - unsigned Reg1 = MI1->getOperand(0).getReg(); - unsigned Reg2 = MI2->getOperand(0).getReg(); + Register Reg1 = MI1->getOperand(0).getReg(); + Register Reg2 = MI2->getOperand(0).getReg(); return ((Offset1 == (Offset2 - 4)) && (ConsecutiveRegisters(Reg1, Reg2))); } @@ -475,8 +475,8 @@ bool MicroMipsSizeReduce::ReduceXWtoXWP(ReduceEntryFunArgs *Arguments) { if (!CheckXWPInstr(MI2, ReduceToLwp, Entry)) return false; - unsigned Reg1 = MI1->getOperand(1).getReg(); - unsigned Reg2 = MI2->getOperand(1).getReg(); + Register Reg1 = MI1->getOperand(1).getReg(); + Register Reg2 = MI2->getOperand(1).getReg(); if (Reg1 != Reg2) return false; @@ -621,8 +621,8 @@ bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) { MachineInstr *MI1 = Arguments->MI; MachineInstr *MI2 = &*NextMII; - unsigned RegDstMI1 = MI1->getOperand(0).getReg(); - unsigned RegSrcMI1 = MI1->getOperand(1).getReg(); + Register RegDstMI1 = MI1->getOperand(0).getReg(); + Register RegSrcMI1 = MI1->getOperand(1).getReg(); if (!IsMovepSrcRegister(RegSrcMI1)) return false; @@ -633,8 +633,8 @@ bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) { if (MI2->getOpcode() != Entry.WideOpc()) return false; - unsigned RegDstMI2 = MI2->getOperand(0).getReg(); - unsigned RegSrcMI2 = MI2->getOperand(1).getReg(); + Register RegDstMI2 = MI2->getOperand(0).getReg(); + Register RegSrcMI2 = MI2->getOperand(1).getReg(); if (!IsMovepSrcRegister(RegSrcMI2)) return false; diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 7b83ea8535ae..a5908362e81f 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -25,6 +25,8 @@ class PredicateControl { list GPRPredicates = []; // Predicates for the PTR size such as IsPTR64bit list PTRPredicates = []; + // Predicates for a symbol's size such as hasSym32. + list SYMPredicates = []; // Predicates for the FGR size and layout such as IsFP64bit list FGRPredicates = []; // Predicates for the instruction group membership such as ISA's. @@ -38,6 +40,7 @@ class PredicateControl { list Predicates = !listconcat(EncodingPredicates, GPRPredicates, PTRPredicates, + SYMPredicates, FGRPredicates, InsnPredicates, HardFloatPredicate, @@ -206,6 +209,9 @@ def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">; def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true", "Disable use of the jal instruction">; +def FeatureXGOT + : SubtargetFeature<"xgot", "UseXGOT", "true", "Assume 32-bit GOT">; + def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard", "UseIndirectJumpsHazard", "true", "Use indirect jump" @@ -257,3 +263,9 @@ def Mips : Target { let AssemblyParserVariants = [MipsAsmParserVariant]; let AllowRegisterRenaming = 1; } + +//===----------------------------------------------------------------------===// +// Pfm Counters +//===----------------------------------------------------------------------===// + +include "MipsPfmCounters.td" diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 3ab4f1e064da..768d54fc9c24 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -72,7 +72,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL; - unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + Register V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass; V0 = RegInfo.createVirtualRegister(RC); diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 6d8e5aef2a3f..5a5b78c9d5f9 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -708,8 +708,8 @@ Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, if (DontExpandCondPseudos16) return BB; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - unsigned regX = MI.getOperand(0).getReg(); - unsigned regY = MI.getOperand(1).getReg(); + Register regX = MI.getOperand(0).getReg(); + Register regY = MI.getOperand(1).getReg(); MachineBasicBlock *target = MI.getOperand(2).getMBB(); BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(CmpOpc)) .addReg(regX) @@ -725,7 +725,7 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins( if (DontExpandCondPseudos16) return BB; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - unsigned regX = MI.getOperand(0).getReg(); + Register regX = MI.getOperand(0).getReg(); int64_t imm = MI.getOperand(1).getImm(); MachineBasicBlock *target = MI.getOperand(2).getMBB(); unsigned CmpOpc; @@ -758,9 +758,9 @@ Mips16TargetLowering::emitFEXT_CCRX16_ins(unsigned SltOpc, MachineInstr &MI, if (DontExpandCondPseudos16) return BB; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - unsigned CC = MI.getOperand(0).getReg(); - unsigned regX = MI.getOperand(1).getReg(); - unsigned regY = MI.getOperand(2).getReg(); + Register CC = MI.getOperand(0).getReg(); + Register regX = MI.getOperand(1).getReg(); + Register regY = MI.getOperand(2).getReg(); BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SltOpc)) .addReg(regX) .addReg(regY); @@ -777,8 +777,8 @@ Mips16TargetLowering::emitFEXT_CCRXI16_ins(unsigned SltiOpc, unsigned SltiXOpc, if (DontExpandCondPseudos16) return BB; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - unsigned CC = MI.getOperand(0).getReg(); - unsigned regX = MI.getOperand(1).getReg(); + Register CC = MI.getOperand(0).getReg(); + Register regX = MI.getOperand(1).getReg(); int64_t Imm = MI.getOperand(2).getImm(); unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SltOpc)).addReg(regX).addImm(Imm); diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index c234c309d760..0d735c20ec2f 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -358,7 +358,7 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm, for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { MachineOperand &MO = II->getOperand(i); if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + !Register::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 7f35280f7936..cc15949b0d57 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -16,6 +16,7 @@ // shamt must fit in 6 bits. def immZExt6 : ImmLeaf; +def timmZExt6 : TImmLeaf; // Node immediate fits as 10-bit sign extended on target immediate. // e.g. seqi, snei @@ -651,6 +652,7 @@ def : MipsPat<(MipsTlsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>, let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(MipsJmpLink (i64 texternalsym:$dst)), (JAL texternalsym:$dst)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), (LUi64 tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHighest (i64 tblockaddress:$in)), @@ -682,6 +684,20 @@ let AdditionalPredicates = [NotInMicroMips] in { (DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tconstpool:$lo))), (DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 texternalsym:$lo))), + (DADDiu GPR64:$hi, texternalsym:$lo)>, + ISA_MIPS3, GPR_64, SYM_64; + + def : MipsPat<(MipsHi (i64 tglobaladdr:$in)), + (DADDiu ZERO_64, tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHi (i64 tblockaddress:$in)), + (DADDiu ZERO_64, tblockaddress:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHi (i64 tjumptable:$in)), + (DADDiu ZERO_64, tjumptable:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHi (i64 tconstpool:$in)), + (DADDiu ZERO_64, tconstpool:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHi (i64 texternalsym:$in)), + (DADDiu ZERO_64, texternalsym:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), (DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; @@ -692,6 +708,23 @@ let AdditionalPredicates = [NotInMicroMips] in { (DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tconstpool:$lo))), (DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(add GPR64:$hi, (MipsHi (i64 texternalsym:$lo))), + (DADDiu GPR64:$hi, texternalsym:$lo)>, + ISA_MIPS3, GPR_64, SYM_64; + + def : MipsPat<(MipsLo (i64 tglobaladdr:$in)), + (DADDiu ZERO_64, tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 tblockaddress:$in)), + (DADDiu ZERO_64, tblockaddress:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 tjumptable:$in)), + (DADDiu ZERO_64, tjumptable:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 tconstpool:$in)), + (DADDiu ZERO_64, tconstpool:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 tglobaltlsaddr:$in)), + (DADDiu ZERO_64, tglobaltlsaddr:$in)>, + ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 texternalsym:$in)), + (DADDiu ZERO_64, texternalsym:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), (DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; @@ -705,6 +738,9 @@ let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaltlsaddr:$lo))), (DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(add GPR64:$hi, (MipsLo (i64 texternalsym:$lo))), + (DADDiu GPR64:$hi, texternalsym:$lo)>, + ISA_MIPS3, GPR_64, SYM_64; } // gp_rel relocs diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index db83fe49cec0..2201545adc94 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -56,6 +56,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include #include @@ -376,7 +377,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() { void MipsAsmPrinter::emitFrameDirective() { const TargetRegisterInfo &RI = *MF->getSubtarget().getRegisterInfo(); - unsigned stackReg = RI.getFrameRegister(*MF); + Register stackReg = RI.getFrameRegister(*MF); unsigned returnReg = RI.getRARegister(); unsigned stackSize = MF->getFrameInfo().getStackSize(); @@ -571,7 +572,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // for 2 for 32 bit mode and 1 for 64 bit mode. if (NumVals != 2) { if (Subtarget->isGP64bit() && NumVals == 1 && MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); O << '$' << MipsInstPrinter::getRegisterName(Reg); return false; } @@ -597,7 +598,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(RegOp); if (!MO.isReg()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); O << '$' << MipsInstPrinter::getRegisterName(Reg); return false; } @@ -780,7 +781,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU()); StringRef FS = TM.getTargetFeatureString(); const MipsTargetMachine &MTM = static_cast(TM); - const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, 0); + const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, None); bool IsABICalls = STI.isABICalls(); const MipsABIInfo &ABI = MTM.getABI(); @@ -821,6 +822,9 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // option has changed the default (i.e. FPXX) and omit it otherwise. if (ABI.IsO32() && (!STI.useOddSPReg() || STI.isABI_FPXX())) TS.emitDirectiveModuleOddSPReg(); + + // Switch to the .text section. + OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); } void MipsAsmPrinter::emitInlineAsmStart() const { diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp index da65689ecff5..cad82953af50 100644 --- a/lib/Target/Mips/MipsCallLowering.cpp +++ b/lib/Target/Mips/MipsCallLowering.cpp @@ -106,6 +106,7 @@ private: Register ArgsReg, const EVT &VT) override; virtual void markPhysRegUsed(unsigned PhysReg) { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } @@ -357,7 +358,7 @@ bool OutgoingValueHandler::handleSplit(SmallVectorImpl &VRegs, return true; } -static bool isSupportedType(Type *T) { +static bool isSupportedArgumentType(Type *T) { if (T->isIntegerTy()) return true; if (T->isPointerTy()) @@ -367,6 +368,18 @@ static bool isSupportedType(Type *T) { return false; } +static bool isSupportedReturnType(Type *T) { + if (T->isIntegerTy()) + return true; + if (T->isPointerTy()) + return true; + if (T->isFloatingPointTy()) + return true; + if (T->isAggregateType()) + return true; + return false; +} + static CCValAssign::LocInfo determineLocInfo(const MVT RegisterVT, const EVT VT, const ISD::ArgFlagsTy &Flags) { // > does not mean loss of information as type RegisterVT can't hold type VT, @@ -403,7 +416,7 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA); - if (Val != nullptr && !isSupportedType(Val->getType())) + if (Val != nullptr && !isSupportedReturnType(Val->getType())) return false; if (!VRegs.empty()) { @@ -411,21 +424,13 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Function &F = MF.getFunction(); const DataLayout &DL = MF.getDataLayout(); const MipsTargetLowering &TLI = *getTLI(); - LLVMContext &Ctx = Val->getType()->getContext(); - - SmallVector SplitEVTs; - ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); - assert(VRegs.size() == SplitEVTs.size() && - "For each split Type there should be exactly one VReg."); SmallVector RetInfos; SmallVector OrigArgIndices; - for (unsigned i = 0; i < SplitEVTs.size(); ++i) { - ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)}; - setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(CurArgInfo, 0, RetInfos, OrigArgIndices); - } + ArgInfo ArgRetInfo(VRegs, Val->getType()); + setArgFlags(ArgRetInfo, AttributeList::ReturnIndex, DL, F); + splitToValueTypes(DL, ArgRetInfo, 0, RetInfos, OrigArgIndices); SmallVector Outs; subTargetRegTypeForCallingConv(F, RetInfos, OrigArgIndices, Outs); @@ -453,12 +458,8 @@ bool MipsCallLowering::lowerFormalArguments( if (F.arg_empty()) return true; - if (F.isVarArg()) { - return false; - } - for (auto &Arg : F.args()) { - if (!isSupportedType(Arg.getType())) + if (!isSupportedArgumentType(Arg.getType())) return false; } @@ -472,7 +473,8 @@ bool MipsCallLowering::lowerFormalArguments( for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[i], Arg.getType()); setArgFlags(AInfo, i + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(AInfo, i, ArgInfos, OrigArgIndices); + ArgInfos.push_back(AInfo); + OrigArgIndices.push_back(i); ++i; } @@ -495,30 +497,64 @@ bool MipsCallLowering::lowerFormalArguments( if (!Handler.handle(ArgLocs, ArgInfos)) return false; + if (F.isVarArg()) { + ArrayRef ArgRegs = ABI.GetVarArgRegs(); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + + int VaArgOffset; + unsigned RegSize = 4; + if (ArgRegs.size() == Idx) + VaArgOffset = alignTo(CCInfo.getNextStackOffset(), RegSize); + else { + VaArgOffset = + (int)ABI.GetCalleeAllocdArgSizeInBytes(CCInfo.getCallingConv()) - + (int)(RegSize * (ArgRegs.size() - Idx)); + } + + MachineFrameInfo &MFI = MF.getFrameInfo(); + int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + MF.getInfo()->setVarArgsFrameIndex(FI); + + for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) { + MIRBuilder.getMBB().addLiveIn(ArgRegs[I]); + + MachineInstrBuilder Copy = + MIRBuilder.buildCopy(LLT::scalar(RegSize * 8), Register(ArgRegs[I])); + FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + MachinePointerInfo MPO = MachinePointerInfo::getFixedStack(MF, FI); + MachineInstrBuilder FrameIndex = + MIRBuilder.buildFrameIndex(LLT::pointer(MPO.getAddrSpace(), 32), FI); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, RegSize, + /* Alignment */ RegSize); + MIRBuilder.buildStore(Copy, FrameIndex, *MMO); + } + } + return true; } bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, - const MachineOperand &Callee, - const ArgInfo &OrigRet, - ArrayRef OrigArgs) const { + CallLoweringInfo &Info) const { - if (CallConv != CallingConv::C) + if (Info.CallConv != CallingConv::C) return false; - for (auto &Arg : OrigArgs) { - if (!isSupportedType(Arg.Ty)) + for (auto &Arg : Info.OrigArgs) { + if (!isSupportedArgumentType(Arg.Ty)) + return false; + if (Arg.Flags[0].isByVal()) return false; - if (Arg.Flags.isByVal() || Arg.Flags.isSRet()) + if (Arg.Flags[0].isSRet() && !Arg.Ty->isPointerTy()) return false; } - if (OrigRet.Regs[0] && !isSupportedType(OrigRet.Ty)) + if (!Info.OrigRet.Ty->isVoidTy() && !isSupportedReturnType(Info.OrigRet.Ty)) return false; MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); + const DataLayout &DL = MF.getDataLayout(); const MipsTargetLowering &TLI = *getTLI(); const MipsTargetMachine &TM = static_cast(MF.getTarget()); @@ -528,37 +564,38 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MIRBuilder.buildInstr(Mips::ADJCALLSTACKDOWN); const bool IsCalleeGlobalPIC = - Callee.isGlobal() && TM.isPositionIndependent(); + Info.Callee.isGlobal() && TM.isPositionIndependent(); MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert( - Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL); + Info.Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL); MIB.addDef(Mips::SP, RegState::Implicit); if (IsCalleeGlobalPIC) { Register CalleeReg = MF.getRegInfo().createGenericVirtualRegister(LLT::pointer(0, 32)); MachineInstr *CalleeGlobalValue = - MIRBuilder.buildGlobalValue(CalleeReg, Callee.getGlobal()); - if (!Callee.getGlobal()->hasLocalLinkage()) + MIRBuilder.buildGlobalValue(CalleeReg, Info.Callee.getGlobal()); + if (!Info.Callee.getGlobal()->hasLocalLinkage()) CalleeGlobalValue->getOperand(1).setTargetFlags(MipsII::MO_GOT_CALL); MIB.addUse(CalleeReg); } else - MIB.add(Callee); + MIB.add(Info.Callee); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv())); TargetLowering::ArgListTy FuncOrigArgs; - FuncOrigArgs.reserve(OrigArgs.size()); + FuncOrigArgs.reserve(Info.OrigArgs.size()); SmallVector ArgInfos; SmallVector OrigArgIndices; unsigned i = 0; - for (auto &Arg : OrigArgs) { + for (auto &Arg : Info.OrigArgs) { TargetLowering::ArgListEntry Entry; Entry.Ty = Arg.Ty; FuncOrigArgs.push_back(Entry); - splitToValueTypes(Arg, i, ArgInfos, OrigArgIndices); + ArgInfos.push_back(Arg); + OrigArgIndices.push_back(i); ++i; } @@ -566,11 +603,17 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Outs); SmallVector ArgLocs; - MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, + bool IsCalleeVarArg = false; + if (Info.Callee.isGlobal()) { + const Function *CF = static_cast(Info.Callee.getGlobal()); + IsCalleeVarArg = CF->isVarArg(); + } + MipsCCState CCInfo(F.getCallingConv(), IsCalleeVarArg, MF, ArgLocs, F.getContext()); - CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); - const char *Call = Callee.isSymbol() ? Callee.getSymbolName() : nullptr; + CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(Info.CallConv), 1); + const char *Call = + Info.Callee.isSymbol() ? Info.Callee.getSymbolName() : nullptr; CCInfo.AnalyzeCallOperands(Outs, TLI.CCAssignFnForCall(), FuncOrigArgs, Call); setLocInfo(ArgLocs, Outs); @@ -599,11 +642,11 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, *STI.getRegBankInfo()); } - if (OrigRet.Regs[0]) { + if (!Info.OrigRet.Ty->isVoidTy()) { ArgInfos.clear(); SmallVector OrigRetIndices; - splitToValueTypes(OrigRet, 0, ArgInfos, OrigRetIndices); + splitToValueTypes(DL, Info.OrigRet, 0, ArgInfos, OrigRetIndices); SmallVector Ins; subTargetRegTypeForCallingConv(F, ArgInfos, OrigRetIndices, Ins); @@ -612,7 +655,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); - CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), OrigRet.Ty, Call); + CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), Info.OrigRet.Ty, Call); setLocInfo(ArgLocs, Ins); CallReturnHandler Handler(MIRBuilder, MF.getRegInfo(), MIB); @@ -642,12 +685,12 @@ void MipsCallLowering::subTargetRegTypeForCallingConv( F.getContext(), F.getCallingConv(), VT); for (unsigned i = 0; i < NumRegs; ++i) { - ISD::ArgFlagsTy Flags = Arg.Flags; + ISD::ArgFlagsTy Flags = Arg.Flags[0]; if (i == 0) Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL)); else - Flags.setOrigAlign(1); + Flags.setOrigAlign(Align::None()); ISDArgs.emplace_back(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo], 0); @@ -657,12 +700,21 @@ void MipsCallLowering::subTargetRegTypeForCallingConv( } void MipsCallLowering::splitToValueTypes( - const ArgInfo &OrigArg, unsigned OriginalIndex, + const DataLayout &DL, const ArgInfo &OrigArg, unsigned OriginalIndex, SmallVectorImpl &SplitArgs, SmallVectorImpl &SplitArgsOrigIndices) const { - // TODO : perform structure and array split. For now we only deal with - // types that pass isSupportedType check. - SplitArgs.push_back(OrigArg); - SplitArgsOrigIndices.push_back(OriginalIndex); + SmallVector SplitEVTs; + SmallVector SplitVRegs; + const MipsTargetLowering &TLI = *getTLI(); + LLVMContext &Ctx = OrigArg.Ty->getContext(); + + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitEVTs); + + for (unsigned i = 0; i < SplitEVTs.size(); ++i) { + ArgInfo Info = ArgInfo{OrigArg.Regs[i], SplitEVTs[i].getTypeForEVT(Ctx)}; + Info.Flags = OrigArg.Flags; + SplitArgs.push_back(Info); + SplitArgsOrigIndices.push_back(OriginalIndex); + } } diff --git a/lib/Target/Mips/MipsCallLowering.h b/lib/Target/Mips/MipsCallLowering.h index 11c2d53ad35d..a284cf5e26cf 100644 --- a/lib/Target/Mips/MipsCallLowering.h +++ b/lib/Target/Mips/MipsCallLowering.h @@ -68,9 +68,8 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; private: /// Based on registers available on target machine split or extend @@ -83,7 +82,8 @@ private: /// Split structures and arrays, save original argument indices since /// Mips calling convention needs info about original argument type. - void splitToValueTypes(const ArgInfo &OrigArg, unsigned OriginalIndex, + void splitToValueTypes(const DataLayout &DL, const ArgInfo &OrigArg, + unsigned OriginalIndex, SmallVectorImpl &SplitArgs, SmallVectorImpl &SplitArgsOrigIndices) const; }; diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index eea28df7eda1..f50640521738 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -222,12 +222,7 @@ namespace { BasicBlockInfo() = default; - // FIXME: ignore LogAlign for this patch - // - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - return PO; - } + unsigned postOffset() const { return Offset + Size; } }; std::vector BBInfo; @@ -376,7 +371,7 @@ namespace { void doInitialPlacement(std::vector &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - unsigned getCPELogAlign(const MachineInstr &CPEMI); + Align getCPEAlign(const MachineInstr &CPEMI); void initializeFunctionInfo(const std::vector &CPEMIs); unsigned getOffsetOf(MachineInstr *MI) const; unsigned getUserOffset(CPUser&) const; @@ -534,11 +529,11 @@ MipsConstantIslands::doInitialPlacement(std::vector &CPEMIs) { MF->push_back(BB); // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). - unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + const Align MaxAlign(MCP->getConstantPoolAlignment()); // Mark the basic block as required by the const-pool. // If AlignConstantIslands isn't set, use 4-byte alignment for everything. - BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + BB->setAlignment(AlignConstantIslands ? MaxAlign : Align(4)); // The function needs to be as aligned as the basic blocks. The linker may // move functions around based on their alignment. @@ -548,7 +543,8 @@ MipsConstantIslands::doInitialPlacement(std::vector &CPEMIs) { // alignment of all entries as long as BB is sufficiently aligned. Keep // track of the insertion point for each alignment. We are going to bucket // sort the entries as they are created. - SmallVector InsPoint(MaxAlign + 1, BB->end()); + SmallVector InsPoint(Log2(MaxAlign) + 1, + BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. @@ -576,7 +572,7 @@ MipsConstantIslands::doInitialPlacement(std::vector &CPEMIs) { // Ensure that future entries with higher alignment get inserted before // CPEMI. This is bucket sort with iterators. - for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) + for (unsigned a = LogAlign + 1; a <= Log2(MaxAlign); ++a) if (InsPoint[a] == InsAt) InsPoint[a] = CPEMI; // Add a new CPEntry, but no corresponding CPUser yet. @@ -621,20 +617,18 @@ MipsConstantIslands::CPEntry return nullptr; } -/// getCPELogAlign - Returns the required alignment of the constant pool entry +/// getCPEAlign - Returns the required alignment of the constant pool entry /// represented by CPEMI. Alignment is measured in log2(bytes) units. -unsigned MipsConstantIslands::getCPELogAlign(const MachineInstr &CPEMI) { +Align MipsConstantIslands::getCPEAlign(const MachineInstr &CPEMI) { assert(CPEMI.getOpcode() == Mips::CONSTPOOL_ENTRY); // Everything is 4-byte aligned unless AlignConstantIslands is set. if (!AlignConstantIslands) - return 2; + return Align(4); unsigned CPI = CPEMI.getOperand(1).getIndex(); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); - unsigned Align = MCP->getConstants()[CPI].getAlignment(); - assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); - return Log2_32(Align); + return Align(MCP->getConstants()[CPI].getAlignment()); } /// initializeFunctionInfo - Do the initial scan of the function, building up @@ -940,13 +934,13 @@ bool MipsConstantIslands::isOffsetInRange(unsigned UserOffset, bool MipsConstantIslands::isWaterInRange(unsigned UserOffset, MachineBasicBlock* Water, CPUser &U, unsigned &Growth) { - unsigned CPELogAlign = getCPELogAlign(*U.CPEMI); - unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); - unsigned NextBlockOffset, NextBlockAlignment; + unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(); + unsigned NextBlockOffset; + Align NextBlockAlignment; MachineFunction::const_iterator NextBlock = ++Water->getIterator(); if (NextBlock == MF->end()) { NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); - NextBlockAlignment = 0; + NextBlockAlignment = Align::None(); } else { NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; NextBlockAlignment = NextBlock->getAlignment(); @@ -961,7 +955,7 @@ bool MipsConstantIslands::isWaterInRange(unsigned UserOffset, Growth = CPEEnd - NextBlockOffset; // Compute the padding that would go at the end of the CPE to align the next // block. - Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment); + Growth += offsetToAlignment(CPEEnd, NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise // the offset of the instruction. Also account for unknown alignment padding @@ -1221,7 +1215,6 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; - unsigned CPELogAlign = getCPELogAlign(*CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; @@ -1231,7 +1224,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, // Size of branch to insert. unsigned Delta = 2; // Compute the offset where the CPE will begin. - unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; + unsigned CPEOffset = UserBBI.postOffset() + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) @@ -1257,9 +1250,8 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, // Try to split the block so it's fully aligned. Compute the latest split // point where we can add a 4-byte branch instruction, and then align to - // LogAlign which is the largest possible alignment in the function. - unsigned LogAlign = MF->getAlignment(); - assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + // Align which is the largest possible alignment in the function. + const Align Align = MF->getAlignment(); unsigned BaseInsertOffset = UserOffset + U.getMaxDisp(); LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); @@ -1270,7 +1262,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, BaseInsertOffset -= 4; LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) - << " la=" << LogAlign << '\n'); + << " la=" << Log2(Align) << '\n'); // This could point off the end of the block if we've already got constant // pool entries following this block; only the last one is in the water list. @@ -1295,8 +1287,8 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUIndex]; if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. - BaseInsertOffset -= 1u << LogAlign; - EndInsertOffset -= 1u << LogAlign; + BaseInsertOffset -= Align.value(); + EndInsertOffset -= Align.value(); } // This is overly conservative, as we don't account for CPEMIs being // reused within the block, but it doesn't matter much. Also assume CPEs @@ -1399,7 +1391,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { ++NumCPEs; // Mark the basic block as aligned as required by the const-pool entry. - NewIsland->setAlignment(getCPELogAlign(*U.CPEMI)); + NewIsland->setAlignment(getCPEAlign(*U.CPEMI)); // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; @@ -1431,10 +1423,11 @@ void MipsConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { BBInfo[CPEBB->getNumber()].Size = 0; // This block no longer needs to be aligned. - CPEBB->setAlignment(0); - } else + CPEBB->setAlignment(Align(1)); + } else { // Entries are sorted by descending alignment, so realign from the front. - CPEBB->setAlignment(getCPELogAlign(*CPEBB->begin())); + CPEBB->setAlignment(getCPEAlign(*CPEBB->begin())); + } adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if @@ -1529,7 +1522,7 @@ MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { // We should have a way to back out this alignment restriction if we "can" later. // but it is not harmful. // - DestBB->setAlignment(2); + DestBB->setAlignment(Align(4)); Br.MaxDisp = ((1<<24)-1) * 2; MI->setDesc(TII->get(Mips::JalB16)); } diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index daca8b907081..d3e68c014fb7 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -12,12 +12,19 @@ // ImmLeaf def immZExt1 : ImmLeaf(Imm);}]>; +def timmZExt1 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt2 : ImmLeaf(Imm);}]>; +def timmZExt2 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt3 : ImmLeaf(Imm);}]>; +def timmZExt3 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt4 : ImmLeaf(Imm);}]>; +def timmZExt4 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt8 : ImmLeaf(Imm);}]>; +def timmZExt8 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt10 : ImmLeaf(Imm);}]>; +def timmZExt10 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immSExt6 : ImmLeaf(Imm);}]>; +def timmSExt6 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immSExt10 : ImmLeaf(Imm);}]>; // Mips-specific dsp nodes @@ -306,7 +313,7 @@ class PRECR_SRA_PH_W_DESC_BASE Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))]; + list Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, timmZExt5:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; string BaseOpcode = instr_asm; @@ -443,7 +450,7 @@ class RDDSP_DESC_BASE Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))]; + list Pattern = [(set GPR32Opnd:$rd, (OpNode timmZExt10:$mask))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; @@ -454,7 +461,7 @@ class WRDSP_DESC_BASE Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)]; + list Pattern = [(OpNode GPR32Opnd:$rs, timmZExt10:$mask)]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; @@ -1096,14 +1103,14 @@ class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>; // Misc -class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5, +class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, timmZExt5, NoItinerary>; -class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2, +class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, timmZExt2, NoItinerary>; class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5, - immZExt5, NoItinerary>; + timmZExt5, NoItinerary>; // Pseudos. def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASEgetOperand(0).getReg(); - unsigned Ptr = I->getOperand(1).getReg(); - unsigned Mask = I->getOperand(2).getReg(); - unsigned ShiftCmpVal = I->getOperand(3).getReg(); - unsigned Mask2 = I->getOperand(4).getReg(); - unsigned ShiftNewVal = I->getOperand(5).getReg(); - unsigned ShiftAmnt = I->getOperand(6).getReg(); - unsigned Scratch = I->getOperand(7).getReg(); - unsigned Scratch2 = I->getOperand(8).getReg(); + Register Dest = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register Mask = I->getOperand(2).getReg(); + Register ShiftCmpVal = I->getOperand(3).getReg(); + Register Mask2 = I->getOperand(4).getReg(); + Register ShiftNewVal = I->getOperand(5).getReg(); + Register ShiftAmnt = I->getOperand(6).getReg(); + Register Scratch = I->getOperand(7).getReg(); + Register Scratch2 = I->getOperand(8).getReg(); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB.getBasicBlock(); @@ -240,11 +240,11 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, MOVE = Mips::OR64; } - unsigned Dest = I->getOperand(0).getReg(); - unsigned Ptr = I->getOperand(1).getReg(); - unsigned OldVal = I->getOperand(2).getReg(); - unsigned NewVal = I->getOperand(3).getReg(); - unsigned Scratch = I->getOperand(4).getReg(); + Register Dest = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register OldVal = I->getOperand(2).getReg(); + Register NewVal = I->getOperand(3).getReg(); + Register Scratch = I->getOperand(4).getReg(); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB.getBasicBlock(); @@ -374,15 +374,15 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword( llvm_unreachable("Unknown subword atomic pseudo for expansion!"); } - unsigned Dest = I->getOperand(0).getReg(); - unsigned Ptr = I->getOperand(1).getReg(); - unsigned Incr = I->getOperand(2).getReg(); - unsigned Mask = I->getOperand(3).getReg(); - unsigned Mask2 = I->getOperand(4).getReg(); - unsigned ShiftAmnt = I->getOperand(5).getReg(); - unsigned OldVal = I->getOperand(6).getReg(); - unsigned BinOpRes = I->getOperand(7).getReg(); - unsigned StoreVal = I->getOperand(8).getReg(); + Register Dest = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register Incr = I->getOperand(2).getReg(); + Register Mask = I->getOperand(3).getReg(); + Register Mask2 = I->getOperand(4).getReg(); + Register ShiftAmnt = I->getOperand(5).getReg(); + Register OldVal = I->getOperand(6).getReg(); + Register BinOpRes = I->getOperand(7).getReg(); + Register StoreVal = I->getOperand(8).getReg(); const BasicBlock *LLVM_BB = BB.getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -513,10 +513,10 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, BEQ = Mips::BEQ64; } - unsigned OldVal = I->getOperand(0).getReg(); - unsigned Ptr = I->getOperand(1).getReg(); - unsigned Incr = I->getOperand(2).getReg(); - unsigned Scratch = I->getOperand(3).getReg(); + Register OldVal = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register Incr = I->getOperand(2).getReg(); + Register Scratch = I->getOperand(3).getReg(); unsigned Opcode = 0; unsigned OR = 0; diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 123d3cc242f0..80f288ac500c 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -1162,14 +1162,20 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F12); } else if (ArgVT == MVT::f64) { - VA.convertToReg(Mips::D6); + if (Subtarget->isFP64bit()) + VA.convertToReg(Mips::D6_64); + else + VA.convertToReg(Mips::D6); } } else if (i == 1) { if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) { if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F14); } else if (ArgVT == MVT::f64) { - VA.convertToReg(Mips::D7); + if (Subtarget->isFP64bit()) + VA.convertToReg(Mips::D7_64); + else + VA.convertToReg(Mips::D7); } } } @@ -1722,7 +1728,7 @@ bool MipsFastISel::selectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - unsigned DestReg = VA.getLocReg(); + Register DestReg = VA.getLocReg(); // Avoid a cross-class copy. This is very unlikely. if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 0537cfd1cb30..612b2b712fa8 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -24,8 +24,9 @@ protected: const MipsSubtarget &STI; public: - explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment) - : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {} + explicit MipsFrameLowering(const MipsSubtarget &sti, Align Alignment) + : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) { + } static const MipsFrameLowering *create(const MipsSubtarget &ST); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 9ba54d6bb73c..e5997af3bcc5 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -65,7 +65,7 @@ bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { /// getGlobalBaseReg - Output the instructions required to put the /// GOT address into a register. SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { - unsigned GlobalBaseReg = MF->getInfo()->getGlobalBaseReg(); + Register GlobalBaseReg = MF->getInfo()->getGlobalBaseReg(); return CurDAG->getRegister(GlobalBaseReg, getTargetLowering()->getPointerTy( CurDAG->getDataLayout())) .getNode(); @@ -217,6 +217,51 @@ bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { return false; } +/// Convert vector addition with vector subtraction if that allows to encode +/// constant as an immediate and thus avoid extra 'ldi' instruction. +/// add X, <-1, -1...> --> sub X, <1, 1...> +bool MipsDAGToDAGISel::selectVecAddAsVecSubIfProfitable(SDNode *Node) { + assert(Node->getOpcode() == ISD::ADD && "Should only get 'add' here."); + + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Should only be called for vectors."); + + SDValue X = Node->getOperand(0); + SDValue C = Node->getOperand(1); + + auto *BVN = dyn_cast(C); + if (!BVN) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, !Subtarget->isLittle())) + return false; + + auto IsInlineConstant = [](const APInt &Imm) { return Imm.isIntN(5); }; + + if (IsInlineConstant(SplatValue)) + return false; // Can already be encoded as an immediate. + + APInt NegSplatValue = 0 - SplatValue; + if (!IsInlineConstant(NegSplatValue)) + return false; // Even if we negate it it won't help. + + SDLoc DL(Node); + + SDValue NegC = CurDAG->FoldConstantArithmetic( + ISD::SUB, DL, VT, CurDAG->getConstant(0, DL, VT).getNode(), C.getNode()); + assert(NegC && "Constant-folding failed!"); + SDValue NewNode = CurDAG->getNode(ISD::SUB, DL, VT, X, NegC); + + ReplaceNode(Node, NewNode.getNode()); + SelectCode(NewNode.getNode()); + return true; +} + /// Select instructions not customized! Used for /// expanded, promoted and normal instructions void MipsDAGToDAGISel::Select(SDNode *Node) { @@ -236,6 +281,12 @@ void MipsDAGToDAGISel::Select(SDNode *Node) { switch(Opcode) { default: break; + case ISD::ADD: + if (Node->getSimpleValueType(0).isVector() && + selectVecAddAsVecSubIfProfitable(Node)) + return; + break; + // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: ReplaceNode(Node, getGlobalBaseReg()); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index bae3bbf71f3b..a768589b374b 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -125,6 +125,11 @@ private: /// starting at bit zero. virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; + /// Convert vector addition with vector subtraction if that allows to encode + /// constant as an immediate and thus avoid extra 'ldi' instruction. + /// add X, <-1, -1...> --> sub X, <1, 1...> + bool selectVecAddAsVecSubIfProfitable(SDNode *Node); + void Select(SDNode *N) override; virtual bool trySelect(SDNode *Node) = 0; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 0ff09007da4b..bf1b4756b24f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -82,10 +82,6 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt -LargeGOT("mxgot", cl::Hidden, - cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); - static cl::opt NoZeroDivCheck("mno-check-zero-division", cl::Hidden, cl::desc("MIPS: Don't trap on integer division by zero."), @@ -330,7 +326,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, } // Set LoadExtAction for f16 vectors to Expand - for (MVT VT : MVT::fp_vector_valuetypes()) { + for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements()); if (F16VT.isValid()) setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand); @@ -518,11 +514,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setLibcallName(RTLIB::SRA_I128, nullptr); } - setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2); + setMinFunctionAlignment(Subtarget.isGP64bit() ? Align(8) : Align(4)); // The arguments on the stack are defined in terms of 4-byte slots on O32 // and 8-byte slots on N32/N64. - setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? 8 : 4); + setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? Align(8) + : Align(4)); setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP); @@ -552,8 +549,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, !Subtarget.inMicroMipsMode(); // Disable if either of the following is true: - // We do not generate PIC, the ABI is not O32, LargeGOT is being used. - if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT) + // We do not generate PIC, the ABI is not O32, XGOT is being used. + if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || + Subtarget.useXGOT()) UseFastISel = false; return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr; @@ -1257,7 +1255,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) { - unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); + Register VReg = MF.getRegInfo().createVirtualRegister(RC); MF.getRegInfo().addLiveIn(PReg, VReg); return VReg; } @@ -1477,10 +1475,10 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, llvm_unreachable("Unknown pseudo atomic for replacement!"); } - unsigned OldVal = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned Incr = MI.getOperand(2).getReg(); - unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); + Register OldVal = MI.getOperand(0).getReg(); + Register Ptr = MI.getOperand(1).getReg(); + Register Incr = MI.getOperand(2).getReg(); + Register Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); MachineBasicBlock::iterator II(MI); @@ -1519,8 +1517,8 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, // containing the word. // - unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); - unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); + Register PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); + Register IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); BuildMI(*BB, II, DL, TII->get(Mips::COPY), IncrCopy).addReg(Incr); BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr); @@ -1556,7 +1554,7 @@ MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg( MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); - unsigned ScrReg = RegInfo.createVirtualRegister(RC); + Register ScrReg = RegInfo.createVirtualRegister(RC); assert(Size < 32); int64_t ShiftImm = 32 - (Size * 8); @@ -1581,21 +1579,21 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned Incr = MI.getOperand(2).getReg(); - - unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); - unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); - unsigned Mask = RegInfo.createVirtualRegister(RC); - unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned Incr2 = RegInfo.createVirtualRegister(RC); - unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); - unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); - unsigned MaskUpper = RegInfo.createVirtualRegister(RC); - unsigned Scratch = RegInfo.createVirtualRegister(RC); - unsigned Scratch2 = RegInfo.createVirtualRegister(RC); - unsigned Scratch3 = RegInfo.createVirtualRegister(RC); + Register Dest = MI.getOperand(0).getReg(); + Register Ptr = MI.getOperand(1).getReg(); + Register Incr = MI.getOperand(2).getReg(); + + Register AlignedAddr = RegInfo.createVirtualRegister(RCp); + Register ShiftAmt = RegInfo.createVirtualRegister(RC); + Register Mask = RegInfo.createVirtualRegister(RC); + Register Mask2 = RegInfo.createVirtualRegister(RC); + Register Incr2 = RegInfo.createVirtualRegister(RC); + Register MaskLSB2 = RegInfo.createVirtualRegister(RCp); + Register PtrLSB2 = RegInfo.createVirtualRegister(RC); + Register MaskUpper = RegInfo.createVirtualRegister(RC); + Register Scratch = RegInfo.createVirtualRegister(RC); + Register Scratch2 = RegInfo.createVirtualRegister(RC); + Register Scratch3 = RegInfo.createVirtualRegister(RC); unsigned AtomicOp = 0; switch (MI.getOpcode()) { @@ -1678,7 +1676,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( if (Subtarget.isLittle()) { BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { - unsigned Off = RegInfo.createVirtualRegister(RC); + Register Off = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); @@ -1738,12 +1736,12 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? Mips::ATOMIC_CMP_SWAP_I32_POSTRA : Mips::ATOMIC_CMP_SWAP_I64_POSTRA; - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned OldVal = MI.getOperand(2).getReg(); - unsigned NewVal = MI.getOperand(3).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Ptr = MI.getOperand(1).getReg(); + Register OldVal = MI.getOperand(2).getReg(); + Register NewVal = MI.getOperand(3).getReg(); - unsigned Scratch = MRI.createVirtualRegister(RC); + Register Scratch = MRI.createVirtualRegister(RC); MachineBasicBlock::iterator II(MI); // We need to create copies of the various registers and kill them at the @@ -1751,9 +1749,9 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, // after fast register allocation, the spills will end up outside of the // blocks that their values are defined in, causing livein errors. - unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); - unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); - unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); + Register PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); + Register OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); + Register NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr); BuildMI(*BB, II, DL, TII->get(Mips::COPY), OldValCopy).addReg(OldVal); @@ -1790,22 +1788,22 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned CmpVal = MI.getOperand(2).getReg(); - unsigned NewVal = MI.getOperand(3).getReg(); - - unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); - unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); - unsigned Mask = RegInfo.createVirtualRegister(RC); - unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); - unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); - unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); - unsigned MaskUpper = RegInfo.createVirtualRegister(RC); - unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); + Register Dest = MI.getOperand(0).getReg(); + Register Ptr = MI.getOperand(1).getReg(); + Register CmpVal = MI.getOperand(2).getReg(); + Register NewVal = MI.getOperand(3).getReg(); + + Register AlignedAddr = RegInfo.createVirtualRegister(RCp); + Register ShiftAmt = RegInfo.createVirtualRegister(RC); + Register Mask = RegInfo.createVirtualRegister(RC); + Register Mask2 = RegInfo.createVirtualRegister(RC); + Register ShiftedCmpVal = RegInfo.createVirtualRegister(RC); + Register ShiftedNewVal = RegInfo.createVirtualRegister(RC); + Register MaskLSB2 = RegInfo.createVirtualRegister(RCp); + Register PtrLSB2 = RegInfo.createVirtualRegister(RC); + Register MaskUpper = RegInfo.createVirtualRegister(RC); + Register MaskedCmpVal = RegInfo.createVirtualRegister(RC); + Register MaskedNewVal = RegInfo.createVirtualRegister(RC); unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8 ? Mips::ATOMIC_CMP_SWAP_I8_POSTRA : Mips::ATOMIC_CMP_SWAP_I16_POSTRA; @@ -1820,8 +1818,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( // value isn't a problem. // The Dead flag is needed as the value in scratch isn't used by any other // instruction. Kill isn't used as Dead is more precise. - unsigned Scratch = RegInfo.createVirtualRegister(RC); - unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + Register Scratch = RegInfo.createVirtualRegister(RC); + Register Scratch2 = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -1859,7 +1857,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( if (Subtarget.isLittle()) { BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { - unsigned Off = RegInfo.createVirtualRegister(RC); + Register Off = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); @@ -1967,10 +1965,10 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, // %gp_rel relocation return getAddrGPRel(N, SDLoc(N), Ty, DAG, ABI.IsN64()); - // %hi/%lo relocation + // %hi/%lo relocation return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) - // %highest/%higher/%hi/%lo relocation - : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); + // %highest/%higher/%hi/%lo relocation + : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); } // Every other architecture would use shouldAssumeDSOLocal in here, but @@ -1987,7 +1985,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, if (GV->hasLocalLinkage()) return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); - if (LargeGOT) + if (Subtarget.useXGOT()) return getAddrGlobalLargeGOT( N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16, DAG.getEntryNode(), @@ -2149,7 +2147,8 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); SDValue Chain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); + const Align Align = + llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne(); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc DL(Node); unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4; @@ -2166,14 +2165,13 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { // when the pointer is still aligned from the last va_arg (or pair of // va_args for the i64 on O32 case). if (Align > getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + VAList = DAG.getNode( + ISD::ADD, DL, VAList.getValueType(), VAList, + DAG.getConstant(Align.value() - 1, DL, VAList.getValueType())); - VAList = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, DL, VAList.getValueType())); - - VAList = DAG.getNode(ISD::AND, DL, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, DL, - VAList.getValueType())); + VAList = DAG.getNode( + ISD::AND, DL, VAList.getValueType(), VAList, + DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg. @@ -2870,7 +2868,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, #include "MipsGenCallingConv.inc" CCAssignFn *MipsTargetLowering::CCAssignFnForCall() const{ - return CC_Mips; + return CC_Mips_FixedArg; } CCAssignFn *MipsTargetLowering::CCAssignFnForReturn() const{ @@ -3167,7 +3165,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Arg, DAG.getConstant(1, DL, MVT::i32)); if (!Subtarget.isLittle()) std::swap(Lo, Hi); - unsigned LocRegLo = VA.getLocReg(); + Register LocRegLo = VA.getLocReg(); unsigned LocRegHigh = getNextIntArgReg(LocRegLo); RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); @@ -3270,7 +3268,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (InternalLinkage) Callee = getAddrLocal(G, DL, Ty, DAG, ABI.IsN32() || ABI.IsN64()); - else if (LargeGOT) { + else if (Subtarget.useXGOT()) { Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(Val)); @@ -3292,7 +3290,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!IsPIC) // static Callee = DAG.getTargetExternalSymbol( Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG); - else if (LargeGOT) { + else if (Subtarget.useXGOT()) { Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(Sym)); @@ -3523,7 +3521,7 @@ SDValue MipsTargetLowering::LowerFormalArguments( // Arguments stored on registers if (IsRegLoc) { MVT RegVT = VA.getLocVT(); - unsigned ArgReg = VA.getLocReg(); + Register ArgReg = VA.getLocReg(); const TargetRegisterClass *RC = getRegClassFor(RegVT); // Transform the arguments stored on @@ -4568,20 +4566,20 @@ MachineBasicBlock *MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { // Named registers is expected to be fairly rare. For now, just support $28 // since the linux kernel uses it. if (Subtarget.isGP64bit()) { - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("$28", Mips::GP_64) - .Default(0); + .Default(Register()); if (Reg) return Reg; } else { - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("$28", Mips::GP) - .Default(0); + .Default(Register()); if (Reg) return Reg; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2db60e9801f1..0a5cddd45afb 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -304,11 +304,12 @@ class TargetRegisterClass; unsigned &NumIntermediates, MVT &RegisterVT) const override; /// Return the correct alignment for the current calling convention. - unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const override { + Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const override { + const Align ABIAlign(DL.getABITypeAlignment(ArgTy)); if (ArgTy->isVectorTy()) - return std::min(DL.getABITypeAlignment(ArgTy), 8U); - return DL.getABITypeAlignment(ArgTy); + return std::min(ABIAlign, Align(8)); + return ABIAlign; } ISD::NodeType getExtendForAtomicOps() const override { @@ -347,8 +348,8 @@ class TargetRegisterClass; void HandleByVal(CCState *, unsigned &, unsigned) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index fbd56206b249..6bb25ee5754d 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -677,7 +677,8 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, return MIB; } -bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, +bool MipsInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { assert(!MI.isBundle() && "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index a626c0c3fdb8..092a960b4ba7 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -148,7 +148,7 @@ public: MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const; - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; /// Perform target specific instruction verification. diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index a4e85a38ab28..58167e0f344d 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -211,9 +211,9 @@ def HasCnMips : Predicate<"Subtarget->hasCnMips()">, AssemblerPredicate<"FeatureCnMips">; def NotCnMips : Predicate<"!Subtarget->hasCnMips()">, AssemblerPredicate<"!FeatureCnMips">; -def IsSym32 : Predicate<"Subtarget->HasSym32()">, +def IsSym32 : Predicate<"Subtarget->hasSym32()">, AssemblerPredicate<"FeatureSym32">; -def IsSym64 : Predicate<"!Subtarget->HasSym32()">, +def IsSym64 : Predicate<"!Subtarget->hasSym32()">, AssemblerPredicate<"!FeatureSym32">; def IsN64 : Predicate<"Subtarget->isABI_N64()">; def IsNotN64 : Predicate<"!Subtarget->isABI_N64()">; @@ -1263,6 +1263,7 @@ def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; // Node immediate fits as 7-bit zero extended on target immediate. def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; +def timmZExt7 : PatLeaf<(timm), [{ return isUInt<7>(N->getZExtValue()); }]>; // Node immediate fits as 16-bit zero extended on target immediate. // The LO16 param means that only the lower 16 bits of the node @@ -1295,6 +1296,7 @@ def immZExt32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>; // shamt field must fit in 5 bits. def immZExt5 : ImmLeaf; +def timmZExt5 : TImmLeaf; def immZExt5Plus1 : PatLeaf<(imm), [{ return isUInt<5>(N->getZExtValue() - 1); @@ -3142,25 +3144,31 @@ multiclass MipsHiLoRelocs; def : MipsPat<(MipsHi texternalsym:$in), (Lui texternalsym:$in)>; - def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>; + def : MipsPat<(MipsLo tglobaladdr:$in), + (Addiu ZeroReg, tglobaladdr:$in)>; def : MipsPat<(MipsLo tblockaddress:$in), (Addiu ZeroReg, tblockaddress:$in)>; - def : MipsPat<(MipsLo tjumptable:$in), (Addiu ZeroReg, tjumptable:$in)>; - def : MipsPat<(MipsLo tconstpool:$in), (Addiu ZeroReg, tconstpool:$in)>; + def : MipsPat<(MipsLo tjumptable:$in), + (Addiu ZeroReg, tjumptable:$in)>; + def : MipsPat<(MipsLo tconstpool:$in), + (Addiu ZeroReg, tconstpool:$in)>; def : MipsPat<(MipsLo tglobaltlsaddr:$in), (Addiu ZeroReg, tglobaltlsaddr:$in)>; - def : MipsPat<(MipsLo texternalsym:$in), (Addiu ZeroReg, texternalsym:$in)>; + def : MipsPat<(MipsLo texternalsym:$in), + (Addiu ZeroReg, texternalsym:$in)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)), - (Addiu GPROpnd:$hi, tglobaladdr:$lo)>; + (Addiu GPROpnd:$hi, tglobaladdr:$lo)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tblockaddress:$lo)), - (Addiu GPROpnd:$hi, tblockaddress:$lo)>; + (Addiu GPROpnd:$hi, tblockaddress:$lo)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tjumptable:$lo)), - (Addiu GPROpnd:$hi, tjumptable:$lo)>; + (Addiu GPROpnd:$hi, tjumptable:$lo)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tconstpool:$lo)), - (Addiu GPROpnd:$hi, tconstpool:$lo)>; + (Addiu GPROpnd:$hi, tconstpool:$lo)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaltlsaddr:$lo)), - (Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>; + (Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>; + def : MipsPat<(add GPROpnd:$hi, (MipsLo texternalsym:$lo)), + (Addiu GPROpnd:$hi, texternalsym:$lo)>; } // wrapper_pic diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp index 45a47ad3c087..f8fc7cb0898b 100644 --- a/lib/Target/Mips/MipsInstructionSelector.cpp +++ b/lib/Target/Mips/MipsInstructionSelector.cpp @@ -17,6 +17,7 @@ #include "MipsTargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #define DEBUG_TYPE "mips-isel" @@ -33,7 +34,7 @@ public: MipsInstructionSelector(const MipsTargetMachine &TM, const MipsSubtarget &STI, const MipsRegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -44,6 +45,8 @@ private: const TargetRegisterClass * getRegClassForTypeOnBank(unsigned OpSize, const RegisterBank &RB, const RegisterBankInfo &RBI) const; + unsigned selectLoadStoreOpCode(MachineInstr &I, + MachineRegisterInfo &MRI) const; const MipsTargetMachine &TM; const MipsSubtarget &STI; @@ -84,7 +87,7 @@ MipsInstructionSelector::MipsInstructionSelector( bool MipsInstructionSelector::selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const { Register DstReg = I.getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) return true; const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI); @@ -158,9 +161,15 @@ bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm, } /// Returning Opc indicates that we failed to select MIPS instruction opcode. -static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes, - unsigned RegBank, bool isFP64) { - bool isStore = Opc == TargetOpcode::G_STORE; +unsigned +MipsInstructionSelector::selectLoadStoreOpCode(MachineInstr &I, + MachineRegisterInfo &MRI) const { + STI.getRegisterInfo(); + const Register DestReg = I.getOperand(0).getReg(); + const unsigned RegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID(); + const unsigned MemSizeInBytes = (*I.memoperands_begin())->getSize(); + unsigned Opc = I.getOpcode(); + const bool isStore = Opc == TargetOpcode::G_STORE; if (RegBank == Mips::GPRBRegBankID) { if (isStore) switch (MemSizeInBytes) { @@ -192,10 +201,24 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes, case 4: return isStore ? Mips::SWC1 : Mips::LWC1; case 8: - if (isFP64) + if (STI.isFP64bit()) return isStore ? Mips::SDC164 : Mips::LDC164; else return isStore ? Mips::SDC1 : Mips::LDC1; + case 16: { + assert(STI.hasMSA() && "Vector instructions require target with MSA."); + const unsigned VectorElementSizeInBytes = + MRI.getType(DestReg).getElementType().getSizeInBytes(); + if (VectorElementSizeInBytes == 1) + return isStore ? Mips::ST_B : Mips::LD_B; + if (VectorElementSizeInBytes == 2) + return isStore ? Mips::ST_H : Mips::LD_H; + if (VectorElementSizeInBytes == 4) + return isStore ? Mips::ST_W : Mips::LD_W; + if (VectorElementSizeInBytes == 8) + return isStore ? Mips::ST_D : Mips::LD_D; + return Opc; + } default: return Opc; } @@ -203,8 +226,7 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes, return Opc; } -bool MipsInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool MipsInstructionSelector::select(MachineInstr &I) { MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -231,7 +253,7 @@ bool MipsInstructionSelector::select(MachineInstr &I, return true; } - if (selectImpl(I, CoverageInfo)) + if (selectImpl(I, *CoverageInfo)) return true; MachineInstr *MI = nullptr; @@ -265,6 +287,11 @@ bool MipsInstructionSelector::select(MachineInstr &I, .add(I.getOperand(2)); break; } + case G_INTTOPTR: + case G_PTRTOINT: { + I.setDesc(TII.get(COPY)); + return selectCopy(I, MRI); + } case G_FRAME_INDEX: { MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu)) .add(I.getOperand(0)) @@ -279,12 +306,71 @@ bool MipsInstructionSelector::select(MachineInstr &I, .add(I.getOperand(1)); break; } + case G_BRJT: { + unsigned EntrySize = + MF.getJumpTableInfo()->getEntrySize(MF.getDataLayout()); + assert(isPowerOf2_32(EntrySize) && + "Non-power-of-two jump-table entry size not supported."); + + Register JTIndex = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *SLL = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SLL)) + .addDef(JTIndex) + .addUse(I.getOperand(2).getReg()) + .addImm(Log2_32(EntrySize)); + if (!constrainSelectedInstRegOperands(*SLL, TII, TRI, RBI)) + return false; + + Register DestAddress = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *ADDu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu)) + .addDef(DestAddress) + .addUse(I.getOperand(0).getReg()) + .addUse(JTIndex); + if (!constrainSelectedInstRegOperands(*ADDu, TII, TRI, RBI)) + return false; + + Register Dest = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *LW = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW)) + .addDef(Dest) + .addUse(DestAddress) + .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_ABS_LO) + .addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, 4, 4)); + if (!constrainSelectedInstRegOperands(*LW, TII, TRI, RBI)) + return false; + + if (MF.getTarget().isPositionIndependent()) { + Register DestTmp = MRI.createVirtualRegister(&Mips::GPR32RegClass); + LW->getOperand(0).setReg(DestTmp); + MachineInstr *ADDu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu)) + .addDef(Dest) + .addUse(DestTmp) + .addUse(MF.getInfo() + ->getGlobalBaseRegForGlobalISel()); + if (!constrainSelectedInstRegOperands(*ADDu, TII, TRI, RBI)) + return false; + } + + MachineInstr *Branch = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoIndirectBranch)) + .addUse(Dest); + if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; + } + case G_BRINDIRECT: { + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoIndirectBranch)) + .add(I.getOperand(0)); + break; + } case G_PHI: { const Register DestReg = I.getOperand(0).getReg(); const unsigned OpSize = MRI.getType(DestReg).getSizeInBits(); const TargetRegisterClass *DefRC = nullptr; - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) DefRC = TRI.getRegClass(DestReg); else DefRC = getRegClassForTypeOnBank(OpSize, @@ -297,26 +383,35 @@ bool MipsInstructionSelector::select(MachineInstr &I, case G_LOAD: case G_ZEXTLOAD: case G_SEXTLOAD: { - const Register DestReg = I.getOperand(0).getReg(); - const unsigned DestRegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID(); - const unsigned OpSize = MRI.getType(DestReg).getSizeInBits(); - const unsigned OpMemSizeInBytes = (*I.memoperands_begin())->getSize(); - - if (DestRegBank == Mips::GPRBRegBankID && OpSize != 32) - return false; - - if (DestRegBank == Mips::FPRBRegBankID && OpSize != 32 && OpSize != 64) - return false; - - const unsigned NewOpc = selectLoadStoreOpCode( - I.getOpcode(), OpMemSizeInBytes, DestRegBank, STI.isFP64bit()); + const unsigned NewOpc = selectLoadStoreOpCode(I, MRI); if (NewOpc == I.getOpcode()) return false; + MachineOperand BaseAddr = I.getOperand(1); + int64_t SignedOffset = 0; + // Try to fold load/store + G_GEP + G_CONSTANT + // %SignedOffset:(s32) = G_CONSTANT i32 16_bit_signed_immediate + // %Addr:(p0) = G_GEP %BaseAddr, %SignedOffset + // %LoadResult/%StoreSrc = load/store %Addr(p0) + // into: + // %LoadResult/%StoreSrc = NewOpc %BaseAddr(p0), 16_bit_signed_immediate + + MachineInstr *Addr = MRI.getVRegDef(I.getOperand(1).getReg()); + if (Addr->getOpcode() == G_GEP) { + MachineInstr *Offset = MRI.getVRegDef(Addr->getOperand(2).getReg()); + if (Offset->getOpcode() == G_CONSTANT) { + APInt OffsetValue = Offset->getOperand(1).getCImm()->getValue(); + if (OffsetValue.isSignedIntN(16)) { + BaseAddr = Addr->getOperand(1); + SignedOffset = OffsetValue.getSExtValue(); + } + } + } + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc)) .add(I.getOperand(0)) - .add(I.getOperand(1)) - .addImm(0) + .add(BaseAddr) + .addImm(SignedOffset) .addMemOperand(*I.memoperands_begin()); break; } @@ -356,6 +451,18 @@ bool MipsInstructionSelector::select(MachineInstr &I, .add(I.getOperand(3)); break; } + case G_IMPLICIT_DEF: { + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::IMPLICIT_DEF)) + .add(I.getOperand(0)); + + // Set class based on register bank, there can be fpr and gpr implicit def. + MRI.setRegClass(MI->getOperand(0).getReg(), + getRegClassForTypeOnBank( + MRI.getType(I.getOperand(0).getReg()).getSizeInBits(), + *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI), + RBI)); + break; + } case G_CONSTANT: { MachineIRBuilder B(I); if (!materialize32BitImm(I.getOperand(0).getReg(), @@ -423,7 +530,7 @@ bool MipsInstructionSelector::select(MachineInstr &I, Opcode = Mips::TRUNC_W_S; else Opcode = STI.isFP64bit() ? Mips::TRUNC_W_D64 : Mips::TRUNC_W_D32; - unsigned ResultInFPR = MRI.createVirtualRegister(&Mips::FGR32RegClass); + Register ResultInFPR = MRI.createVirtualRegister(&Mips::FGR32RegClass); MachineInstr *Trunc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode)) .addDef(ResultInFPR) .addUse(I.getOperand(1).getReg()); @@ -496,6 +603,24 @@ bool MipsInstructionSelector::select(MachineInstr &I, I.eraseFromParent(); return true; } + case G_JUMP_TABLE: { + if (MF.getTarget().isPositionIndependent()) { + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW)) + .addDef(I.getOperand(0).getReg()) + .addReg(MF.getInfo() + ->getGlobalBaseRegForGlobalISel()) + .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_GOT) + .addMemOperand( + MF.getMachineMemOperand(MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad, 4, 4)); + } else { + MI = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi)) + .addDef(I.getOperand(0).getReg()) + .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_ABS_HI); + } + break; + } case G_ICMP: { struct Instr { unsigned Opcode; @@ -626,7 +751,7 @@ bool MipsInstructionSelector::select(MachineInstr &I, // MipsFCMPCondCode, result is inverted i.e. MOVT_I is used. unsigned MoveOpcode = isLogicallyNegated ? Mips::MOVT_I : Mips::MOVF_I; - unsigned TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu)) .addDef(TrueInReg) .addUse(Mips::ZERO) @@ -654,6 +779,33 @@ bool MipsInstructionSelector::select(MachineInstr &I, I.eraseFromParent(); return true; } + case G_FENCE: { + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SYNC)).addImm(0); + break; + } + case G_VASTART: { + MipsFunctionInfo *FuncInfo = MF.getInfo(); + int FI = FuncInfo->getVarArgsFrameIndex(); + + Register LeaReg = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *LEA_ADDiu = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LEA_ADDiu)) + .addDef(LeaReg) + .addFrameIndex(FI) + .addImm(0); + if (!constrainSelectedInstRegOperands(*LEA_ADDiu, TII, TRI, RBI)) + return false; + + MachineInstr *Store = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SW)) + .addUse(LeaReg) + .addUse(I.getOperand(0).getReg()) + .addImm(0); + if (!constrainSelectedInstRegOperands(*Store, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; + } default: return false; } diff --git a/lib/Target/Mips/MipsLegalizerInfo.cpp b/lib/Target/Mips/MipsLegalizerInfo.cpp index e442a81837ed..bb4a1d902d75 100644 --- a/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -16,18 +16,65 @@ using namespace llvm; +struct TypesAndMemOps { + LLT ValTy; + LLT PtrTy; + unsigned MemSize; + bool MustBeNaturallyAligned; +}; + +static bool +CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query, + std::initializer_list SupportedValues) { + for (auto &Val : SupportedValues) { + if (Val.ValTy != Query.Types[0]) + continue; + if (Val.PtrTy != Query.Types[1]) + continue; + if (Val.MemSize != Query.MMODescrs[0].SizeInBits) + continue; + if (Val.MustBeNaturallyAligned && + Query.MMODescrs[0].SizeInBits % Query.MMODescrs[0].AlignInBits != 0) + continue; + return true; + } + return false; +} + +static bool CheckTyN(unsigned N, const LegalityQuery &Query, + std::initializer_list SupportedValues) { + for (auto &Val : SupportedValues) + if (Val == Query.Types[N]) + return true; + return false; +} + MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { using namespace TargetOpcode; const LLT s1 = LLT::scalar(1); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); const LLT p0 = LLT::pointer(0, 32); - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) + getActionDefinitionsBuilder({G_SUB, G_MUL}) .legalFor({s32}) .clampScalar(0, s32, s32); + getActionDefinitionsBuilder(G_ADD) + .legalIf([=, &ST](const LegalityQuery &Query) { + if (CheckTyN(0, Query, {s32})) + return true; + if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64})) + return true; + return false; + }) + .clampScalar(0, s32, s32); + getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO}) .lowerFor({{s32, s1}}); @@ -36,13 +83,26 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .maxScalar(0, s32); getActionDefinitionsBuilder({G_LOAD, G_STORE}) - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, - {s32, p0, 16, 8}, - {s32, p0, 32, 8}, - {s64, p0, 64, 8}, - {p0, p0, 32, 8}}) + .legalIf([=, &ST](const LegalityQuery &Query) { + if (CheckTy0Ty1MemSizeAlign(Query, {{s32, p0, 8, ST.hasMips32r6()}, + {s32, p0, 16, ST.hasMips32r6()}, + {s32, p0, 32, ST.hasMips32r6()}, + {p0, p0, 32, ST.hasMips32r6()}, + {s64, p0, 64, ST.hasMips32r6()}})) + return true; + if (ST.hasMSA() && + CheckTy0Ty1MemSizeAlign(Query, {{v16s8, p0, 128, false}, + {v8s16, p0, 128, false}, + {v4s32, p0, 128, false}, + {v2s64, p0, 128, false}})) + return true; + return false; + }) .minScalar(0, s32); + getActionDefinitionsBuilder(G_IMPLICIT_DEF) + .legalFor({s32, s64}); + getActionDefinitionsBuilder(G_UNMERGE_VALUES) .legalFor({{s32, s64}}); @@ -50,9 +110,17 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .legalFor({{s64, s32}}); getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD}) - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, - {s32, p0, 16, 8}}) - .minScalar(0, s32); + .legalForTypesWithMemDesc({{s32, p0, 8, 8}, + {s32, p0, 16, 8}}) + .clampScalar(0, s32, s32); + + getActionDefinitionsBuilder({G_ZEXT, G_SEXT}) + .legalIf([](const LegalityQuery &Query) { return false; }) + .maxScalar(0, s32); + + getActionDefinitionsBuilder(G_TRUNC) + .legalIf([](const LegalityQuery &Query) { return false; }) + .maxScalar(1, s32); getActionDefinitionsBuilder(G_SELECT) .legalForCartesianProduct({p0, s32, s64}, {s32}) @@ -63,6 +131,12 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .legalFor({s32}) .minScalar(0, s32); + getActionDefinitionsBuilder(G_BRJT) + .legalFor({{p0, s32}}); + + getActionDefinitionsBuilder(G_BRINDIRECT) + .legalFor({p0}); + getActionDefinitionsBuilder(G_PHI) .legalFor({p0, s32, s64}) .minScalar(0, s32); @@ -77,8 +151,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .libcallFor({s64}); getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) - .legalFor({s32, s32}) - .minScalar(1, s32); + .legalFor({{s32, s32}}) + .clampScalar(1, s32, s32) + .clampScalar(0, s32, s32); getActionDefinitionsBuilder(G_ICMP) .legalForCartesianProduct({s32}, {s32, p0}) @@ -89,15 +164,24 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .legalFor({s32}) .clampScalar(0, s32, s32); - getActionDefinitionsBuilder(G_GEP) + getActionDefinitionsBuilder({G_GEP, G_INTTOPTR}) .legalFor({{p0, s32}}); + getActionDefinitionsBuilder(G_PTRTOINT) + .legalFor({{s32, p0}}); + getActionDefinitionsBuilder(G_FRAME_INDEX) .legalFor({p0}); - getActionDefinitionsBuilder(G_GLOBAL_VALUE) + getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE}) .legalFor({p0}); + getActionDefinitionsBuilder(G_DYN_STACKALLOC) + .lowerFor({{p0, s32}}); + + getActionDefinitionsBuilder(G_VASTART) + .legalFor({p0}); + // FP instructions getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}); @@ -126,6 +210,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { getActionDefinitionsBuilder(G_FPTOUI) .libcallForCartesianProduct({s64}, {s64, s32}) + .lowerForCartesianProduct({s32}, {s64, s32}) .minScalar(0, s32); // Int to FP conversion instructions @@ -136,8 +221,11 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { getActionDefinitionsBuilder(G_UITOFP) .libcallForCartesianProduct({s64, s32}, {s64}) + .customForCartesianProduct({s64, s32}, {s32}) .minScalar(1, s32); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + computeTables(); verify(*ST.getInstrInfo()); } @@ -150,6 +238,134 @@ bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI, using namespace TargetOpcode; MIRBuilder.setInstr(MI); + const MipsSubtarget &STI = + static_cast(MIRBuilder.getMF().getSubtarget()); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); - return false; + switch (MI.getOpcode()) { + case G_UITOFP: { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (SrcTy != s32) + return false; + if (DstTy != s32 && DstTy != s64) + return false; + + // Let 0xABCDEFGH be given unsigned in MI.getOperand(1). First let's convert + // unsigned to double. Mantissa has 52 bits so we use following trick: + // First make floating point bit mask 0x43300000ABCDEFGH. + // Mask represents 2^52 * 0x1.00000ABCDEFGH i.e. 0x100000ABCDEFGH.0 . + // Next, subtract 2^52 * 0x1.0000000000000 i.e. 0x10000000000000.0 from it. + // Done. Trunc double to float if needed. + + MachineInstrBuilder Bitcast = MIRBuilder.buildInstr( + STI.isFP64bit() ? Mips::BuildPairF64_64 : Mips::BuildPairF64, {s64}, + {Src, MIRBuilder.buildConstant(s32, UINT32_C(0x43300000))}); + Bitcast.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + + MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant( + s64, BitsToDouble(UINT64_C(0x4330000000000000))); + + if (DstTy == s64) + MIRBuilder.buildFSub(Dst, Bitcast, TwoP52FP); + else { + MachineInstrBuilder ResF64 = MIRBuilder.buildFSub(s64, Bitcast, TwoP52FP); + MIRBuilder.buildFPTrunc(Dst, ResF64); + } + + MI.eraseFromParent(); + break; + } + default: + return false; + } + + return true; +} + +static bool SelectMSA3OpIntrinsic(MachineInstr &MI, unsigned Opcode, + MachineIRBuilder &MIRBuilder, + const MipsSubtarget &ST) { + assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA."); + if (!MIRBuilder.buildInstr(Opcode) + .add(MI.getOperand(0)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(), + *ST.getRegBankInfo())) + return false; + MI.eraseFromParent(); + return true; +} + +static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, + MachineIRBuilder &MIRBuilder, + const MipsSubtarget &ST) { + assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA."); + MIRBuilder.buildInstr(Opcode) + .add(MI.getOperand(0)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + MI.eraseFromParent(); + return true; +} + +bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + const MipsSubtarget &ST = + static_cast(MI.getMF()->getSubtarget()); + const MipsInstrInfo &TII = *ST.getInstrInfo(); + const MipsRegisterInfo &TRI = *ST.getRegisterInfo(); + const RegisterBankInfo &RBI = *ST.getRegBankInfo(); + MIRBuilder.setInstr(MI); + + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memset: + case Intrinsic::memmove: + if (createMemLibcall(MIRBuilder, MRI, MI) == + LegalizerHelper::UnableToLegalize) + return false; + MI.eraseFromParent(); + return true; + case Intrinsic::trap: { + MachineInstr *Trap = MIRBuilder.buildInstr(Mips::TRAP); + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*Trap, TII, TRI, RBI); + } + case Intrinsic::vacopy: { + Register Tmp = MRI.createGenericVirtualRegister(LLT::pointer(0, 32)); + MachinePointerInfo MPO; + MIRBuilder.buildLoad(Tmp, MI.getOperand(2), + *MI.getMF()->getMachineMemOperand( + MPO, MachineMemOperand::MOLoad, 4, 4)); + MIRBuilder.buildStore(Tmp, MI.getOperand(1), + *MI.getMF()->getMachineMemOperand( + MPO, MachineMemOperand::MOStore, 4, 4)); + MI.eraseFromParent(); + return true; + } + case Intrinsic::mips_addv_b: + case Intrinsic::mips_addv_h: + case Intrinsic::mips_addv_w: + case Intrinsic::mips_addv_d: + return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_ADD, MIRBuilder, ST); + case Intrinsic::mips_addvi_b: + return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_B, MIRBuilder, ST); + case Intrinsic::mips_addvi_h: + return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_H, MIRBuilder, ST); + case Intrinsic::mips_addvi_w: + return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_W, MIRBuilder, ST); + case Intrinsic::mips_addvi_d: + return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_D, MIRBuilder, ST); + default: + break; + } + return true; } diff --git a/lib/Target/Mips/MipsLegalizerInfo.h b/lib/Target/Mips/MipsLegalizerInfo.h index e5021e081890..9696c262b2db 100644 --- a/lib/Target/Mips/MipsLegalizerInfo.h +++ b/lib/Target/Mips/MipsLegalizerInfo.h @@ -28,6 +28,9 @@ public: bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override; + + bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; }; } // end namespace llvm #endif diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 907ed9ef746f..f585d9c1a148 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -60,6 +60,11 @@ def immZExt2Ptr : ImmLeaf(Imm);}]>; def immZExt3Ptr : ImmLeaf(Imm);}]>; def immZExt4Ptr : ImmLeaf(Imm);}]>; +def timmZExt1Ptr : TImmLeaf(Imm);}]>; +def timmZExt2Ptr : TImmLeaf(Imm);}]>; +def timmZExt3Ptr : TImmLeaf(Imm);}]>; +def timmZExt4Ptr : TImmLeaf(Imm);}]>; + // Operands def immZExt2Lsa : ImmLeaf(Imm - 1);}]>; @@ -1270,7 +1275,7 @@ class MSA_I8_SHF_DESC_BASE Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))]; + list Pattern = [(set ROWD:$wd, (MipsSHF timmZExt8:$u8, ROWS:$ws))]; InstrItinClass Itinerary = itin; } @@ -2299,13 +2304,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC : class INSERT_FD_VIDX64_PSEUDO_DESC : MSA_INSERT_VIDX_PSEUDO_BASE; -class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4, +class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, timmZExt4, MSA128BOpnd>; -class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3, +class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, timmZExt3, MSA128HOpnd>; -class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2, +class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, timmZExt2, MSA128WOpnd>; -class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1, +class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, timmZExt1, MSA128DOpnd>; class LD_DESC_BASE; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>; class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>; @@ -2546,16 +2551,16 @@ class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>; class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128BOpnd, MSA128BOpnd, uimm4, - immZExt4>; + timmZExt4>; class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128HOpnd, MSA128HOpnd, uimm3, - immZExt3>; + timmZExt3>; class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128WOpnd, MSA128WOpnd, uimm2, - immZExt2>; + timmZExt2>; class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128DOpnd, MSA128DOpnd, uimm1, - immZExt1>; + timmZExt1>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>; @@ -2609,13 +2614,13 @@ class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; @@ -2637,13 +2642,13 @@ class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class ST_DESC_BASEgetParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - unsigned SrcReg = I->getOperand(0).getReg(); + Register SrcReg = I->getOperand(0).getReg(); unsigned DstReg = getRegTy(SrcReg, MF) == MVT::i32 ? Mips::T9 : Mips::T9_64; BuildMI(*MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg) .addReg(SrcReg); diff --git a/lib/Target/Mips/MipsPfmCounters.td b/lib/Target/Mips/MipsPfmCounters.td new file mode 100644 index 000000000000..c7779b474b91 --- /dev/null +++ b/lib/Target/Mips/MipsPfmCounters.td @@ -0,0 +1,18 @@ +//===-- MipsPfmCounters.td - Mips Hardware Counters --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This describes the available hardware counters for Mips. +// +//===----------------------------------------------------------------------===// + +def CpuCyclesPfmCounter : PfmCounter<"CYCLES">; + +def DefaultPfmCounters : ProcPfmCounters { + let CycleCounter = CpuCyclesPfmCounter; +} +def : PfmCountersDefaultBinding; diff --git a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp index 85076590d407..ace0735652bd 100644 --- a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp +++ b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp @@ -27,7 +27,8 @@ class MipsPreLegalizerCombinerInfo : public CombinerInfo { public: MipsPreLegalizerCombinerInfo() : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, - /*LegalizerInfo*/ nullptr) {} + /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false, + /*EnableOptSize*/ false, /*EnableMinSize*/ false) {} virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const override; }; diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp index d8bcf16afd50..d334366e727c 100644 --- a/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -12,6 +12,7 @@ #include "MipsRegisterBankInfo.h" #include "MipsInstrInfo.h" +#include "MipsTargetMachine.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" @@ -27,20 +28,23 @@ enum PartialMappingIdx { PMI_GPR, PMI_SPR, PMI_DPR, + PMI_MSA, PMI_Min = PMI_GPR, }; RegisterBankInfo::PartialMapping PartMappings[]{ {0, 32, GPRBRegBank}, {0, 32, FPRBRegBank}, - {0, 64, FPRBRegBank} + {0, 64, FPRBRegBank}, + {0, 128, FPRBRegBank} }; enum ValueMappingIdx { InvalidIdx = 0, GPRIdx = 1, SPRIdx = 4, - DPRIdx = 7 + DPRIdx = 7, + MSAIdx = 10 }; RegisterBankInfo::ValueMapping ValueMappings[] = { @@ -50,14 +54,18 @@ RegisterBankInfo::ValueMapping ValueMappings[] = { {&PartMappings[PMI_GPR - PMI_Min], 1}, {&PartMappings[PMI_GPR - PMI_Min], 1}, {&PartMappings[PMI_GPR - PMI_Min], 1}, - // up to 3 ops operands FPRs - single precission + // up to 3 operands in FPRs - single precission {&PartMappings[PMI_SPR - PMI_Min], 1}, {&PartMappings[PMI_SPR - PMI_Min], 1}, {&PartMappings[PMI_SPR - PMI_Min], 1}, - // up to 3 ops operands FPRs - double precission + // up to 3 operands in FPRs - double precission {&PartMappings[PMI_DPR - PMI_Min], 1}, {&PartMappings[PMI_DPR - PMI_Min], 1}, - {&PartMappings[PMI_DPR - PMI_Min], 1} + {&PartMappings[PMI_DPR - PMI_Min], 1}, + // up to 3 operands in FPRs - MSA + {&PartMappings[PMI_MSA - PMI_Min], 1}, + {&PartMappings[PMI_MSA - PMI_Min], 1}, + {&PartMappings[PMI_MSA - PMI_Min], 1} }; } // end namespace Mips @@ -86,6 +94,10 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass( case Mips::FGR32RegClassID: case Mips::FGR64RegClassID: case Mips::AFGR64RegClassID: + case Mips::MSA128BRegClassID: + case Mips::MSA128HRegClassID: + case Mips::MSA128WRegClassID: + case Mips::MSA128DRegClassID: return getRegBank(Mips::FPRBRegBankID); default: llvm_unreachable("Register class not supported"); @@ -149,6 +161,7 @@ static bool isAmbiguous(unsigned Opc) { case TargetOpcode::G_STORE: case TargetOpcode::G_PHI: case TargetOpcode::G_SELECT: + case TargetOpcode::G_IMPLICIT_DEF: return true; default: return false; @@ -163,8 +176,7 @@ void MipsRegisterBankInfo::AmbiguousRegDefUseContainer::addDefUses( MachineInstr *NonCopyInstr = skipCopiesOutgoing(&UseMI); // Copy with many uses. if (NonCopyInstr->getOpcode() == TargetOpcode::COPY && - !TargetRegisterInfo::isPhysicalRegister( - NonCopyInstr->getOperand(0).getReg())) + !Register::isPhysicalRegister(NonCopyInstr->getOperand(0).getReg())) addDefUses(NonCopyInstr->getOperand(0).getReg(), MRI); else DefUses.push_back(skipCopiesOutgoing(&UseMI)); @@ -186,7 +198,7 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesOutgoing( const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineInstr *Ret = MI; while (Ret->getOpcode() == TargetOpcode::COPY && - !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(0).getReg()) && + !Register::isPhysicalRegister(Ret->getOperand(0).getReg()) && MRI.hasOneUse(Ret->getOperand(0).getReg())) { Ret = &(*MRI.use_instr_begin(Ret->getOperand(0).getReg())); } @@ -200,7 +212,7 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesIncoming( const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineInstr *Ret = MI; while (Ret->getOpcode() == TargetOpcode::COPY && - !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(1).getReg())) + !Register::isPhysicalRegister(Ret->getOperand(1).getReg())) Ret = MRI.getVRegDef(Ret->getOperand(1).getReg()); return Ret; } @@ -231,6 +243,9 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::AmbiguousRegDefUseContainer( addUseDef(MI->getOperand(2).getReg(), MRI); addUseDef(MI->getOperand(3).getReg(), MRI); } + + if (MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) + addDefUses(MI->getOperand(0).getReg(), MRI); } bool MipsRegisterBankInfo::TypeInfoForMF::visit( @@ -318,8 +333,7 @@ void MipsRegisterBankInfo::TypeInfoForMF::setTypes(const MachineInstr *MI, void MipsRegisterBankInfo::TypeInfoForMF::setTypesAccordingToPhysicalRegister( const MachineInstr *MI, const MachineInstr *CopyInst, unsigned Op) { - assert((TargetRegisterInfo::isPhysicalRegister( - CopyInst->getOperand(Op).getReg())) && + assert((Register::isPhysicalRegister(CopyInst->getOperand(Op).getReg())) && "Copies of non physical registers should not be considered here.\n"); const MachineFunction &MF = *CopyInst->getMF(); @@ -353,6 +367,31 @@ void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction( } } +static const MipsRegisterBankInfo::ValueMapping * +getMSAMapping(const MachineFunction &MF) { + assert(static_cast(MF.getSubtarget()).hasMSA() && + "MSA mapping not available on target without MSA."); + return &Mips::ValueMappings[Mips::MSAIdx]; +} + +static const MipsRegisterBankInfo::ValueMapping *getFprbMapping(unsigned Size) { + return Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] + : &Mips::ValueMappings[Mips::DPRIdx]; +} + +static const unsigned CustomMappingID = 1; + +// Only 64 bit mapping is available in fprb and will be marked as custom, i.e. +// will be split into two 32 bit registers in gprb. +static const MipsRegisterBankInfo::ValueMapping * +getGprbOrCustomMapping(unsigned Size, unsigned &MappingID) { + if (Size == 32) + return &Mips::ValueMappings[Mips::GPRIdx]; + + MappingID = CustomMappingID; + return &Mips::ValueMappings[Mips::DPRIdx]; +} + const RegisterBankInfo::InstructionMapping & MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { @@ -377,17 +416,35 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned NumOperands = MI.getNumOperands(); const ValueMapping *OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx]; unsigned MappingID = DefaultMappingID; - const unsigned CustomMappingID = 1; + + // Check if LLT sizes match sizes of available register banks. + for (const MachineOperand &Op : MI.operands()) { + if (Op.isReg()) { + LLT RegTy = MRI.getType(Op.getReg()); + + if (RegTy.isScalar() && + (RegTy.getSizeInBits() != 32 && RegTy.getSizeInBits() != 64)) + return getInvalidInstructionMapping(); + + if (RegTy.isVector() && RegTy.getSizeInBits() != 128) + return getInvalidInstructionMapping(); + } + } + + const LLT Op0Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned Op0Size = Op0Ty.getSizeInBits(); + InstType InstTy = InstType::Integer; switch (Opc) { case G_TRUNC: - case G_ADD: case G_SUB: case G_MUL: case G_UMULH: case G_ZEXTLOAD: case G_SEXTLOAD: case G_GEP: + case G_INTTOPTR: + case G_PTRTOINT: case G_AND: case G_OR: case G_XOR: @@ -398,66 +455,42 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_UDIV: case G_SREM: case G_UREM: + case G_BRINDIRECT: + case G_VASTART: OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx]; break; - case G_LOAD: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - InstType InstTy = InstType::Integer; - if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) { - InstTy = TI.determineInstType(&MI); - } - - if (InstTy == InstType::FloatingPoint || - (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb - OperandsMapping = - getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - &Mips::ValueMappings[Mips::GPRIdx]}); + case G_ADD: + OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx]; + if (Op0Size == 128) + OperandsMapping = getMSAMapping(MF); + break; + case G_STORE: + case G_LOAD: + if (Op0Size == 128) { + OperandsMapping = getOperandsMapping( + {getMSAMapping(MF), &Mips::ValueMappings[Mips::GPRIdx]}); break; - } else { // gprb - OperandsMapping = - getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - &Mips::ValueMappings[Mips::GPRIdx]}); - if (Size == 64) - MappingID = CustomMappingID; } - break; - } - case G_STORE: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - InstType InstTy = InstType::Integer; - if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) { + if (!Op0Ty.isPointer()) InstTy = TI.determineInstType(&MI); - } if (InstTy == InstType::FloatingPoint || - (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb - OperandsMapping = - getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - &Mips::ValueMappings[Mips::GPRIdx]}); - break; - } else { // gprb + (Op0Size == 64 && InstTy == InstType::Ambiguous)) + OperandsMapping = getOperandsMapping( + {getFprbMapping(Op0Size), &Mips::ValueMappings[Mips::GPRIdx]}); + else OperandsMapping = - getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], + getOperandsMapping({getGprbOrCustomMapping(Op0Size, MappingID), &Mips::ValueMappings[Mips::GPRIdx]}); - if (Size == 64) - MappingID = CustomMappingID; - } + break; - } - case G_PHI: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - InstType InstTy = InstType::Integer; - if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) { + case G_PHI: + if (!Op0Ty.isPointer()) InstTy = TI.determineInstType(&MI); - } // PHI is copylike and should have one regbank in mapping for def register. - if (InstTy == InstType::Integer && Size == 64) { // fprb + if (InstTy == InstType::Integer && Op0Size == 64) { OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx]}); return getInstructionMapping(CustomMappingID, /*Cost=*/1, OperandsMapping, @@ -465,80 +498,63 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } // Use default handling for PHI, i.e. set reg bank of def operand to match // register banks of use operands. - const RegisterBankInfo::InstructionMapping &Mapping = - getInstrMappingImpl(MI); - return Mapping; - } + return getInstrMappingImpl(MI); case G_SELECT: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - InstType InstTy = InstType::Integer; - if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) { + if (!Op0Ty.isPointer()) InstTy = TI.determineInstType(&MI); - } if (InstTy == InstType::FloatingPoint || - (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb - const RegisterBankInfo::ValueMapping *Bank = - Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; + (Op0Size == 64 && InstTy == InstType::Ambiguous)) { + const RegisterBankInfo::ValueMapping *Bank = getFprbMapping(Op0Size); OperandsMapping = getOperandsMapping( {Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank}); break; - } else { // gprb + } else { const RegisterBankInfo::ValueMapping *Bank = - Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; + getGprbOrCustomMapping(Op0Size, MappingID); OperandsMapping = getOperandsMapping( {Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank}); - if (Size == 64) - MappingID = CustomMappingID; } break; } - case G_UNMERGE_VALUES: { + case G_IMPLICIT_DEF: + if (!Op0Ty.isPointer()) + InstTy = TI.determineInstType(&MI); + + if (InstTy == InstType::FloatingPoint) + OperandsMapping = getFprbMapping(Op0Size); + else + OperandsMapping = getGprbOrCustomMapping(Op0Size, MappingID); + + break; + case G_UNMERGE_VALUES: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], &Mips::ValueMappings[Mips::GPRIdx], &Mips::ValueMappings[Mips::DPRIdx]}); MappingID = CustomMappingID; break; - } - case G_MERGE_VALUES: { + case G_MERGE_VALUES: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx], &Mips::ValueMappings[Mips::GPRIdx], &Mips::ValueMappings[Mips::GPRIdx]}); MappingID = CustomMappingID; break; - } case G_FADD: case G_FSUB: case G_FMUL: case G_FDIV: case G_FABS: - case G_FSQRT:{ - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - assert((Size == 32 || Size == 64) && "Unsupported floating point size"); - OperandsMapping = Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; + case G_FSQRT: + OperandsMapping = getFprbMapping(Op0Size); break; - } - case G_FCONSTANT: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - assert((Size == 32 || Size == 64) && "Unsupported floating point size"); - const RegisterBankInfo::ValueMapping *FPRValueMapping = - Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; - OperandsMapping = getOperandsMapping({FPRValueMapping, nullptr}); + case G_FCONSTANT: + OperandsMapping = getOperandsMapping({getFprbMapping(Op0Size), nullptr}); break; - } case G_FCMP: { - unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); - assert((Size == 32 || Size == 64) && "Unsupported floating point size"); - const RegisterBankInfo::ValueMapping *FPRValueMapping = - Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; + unsigned Op2Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr, - FPRValueMapping, FPRValueMapping}); + getFprbMapping(Op2Size), getFprbMapping(Op2Size)}); break; } case G_FPEXT: @@ -550,36 +566,31 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { &Mips::ValueMappings[Mips::DPRIdx]}); break; case G_FPTOSI: { + assert((Op0Size == 32) && "Unsupported integer size"); unsigned SizeFP = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - assert((MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 32) && - "Unsupported integer size"); - assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size"); - OperandsMapping = getOperandsMapping({ - &Mips::ValueMappings[Mips::GPRIdx], - SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - }); + OperandsMapping = getOperandsMapping( + {&Mips::ValueMappings[Mips::GPRIdx], getFprbMapping(SizeFP)}); break; } - case G_SITOFP: { - unsigned SizeInt = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - unsigned SizeFP = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - (void)SizeInt; - assert((SizeInt == 32) && "Unsupported integer size"); - assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size"); - OperandsMapping = - getOperandsMapping({SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - &Mips::ValueMappings[Mips::GPRIdx]}); + case G_SITOFP: + assert((MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() == 32) && + "Unsupported integer size"); + OperandsMapping = getOperandsMapping( + {getFprbMapping(Op0Size), &Mips::ValueMappings[Mips::GPRIdx]}); break; - } case G_CONSTANT: case G_FRAME_INDEX: case G_GLOBAL_VALUE: + case G_JUMP_TABLE: case G_BRCOND: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr}); break; + case G_BRJT: + OperandsMapping = + getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr, + &Mips::ValueMappings[Mips::GPRIdx]}); + break; case G_ICMP: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr, @@ -609,11 +620,41 @@ public: }; } // end anonymous namespace -/// Here we have to narrowScalar s64 operands to s32, combine away -/// G_MERGE/G_UNMERGE and erase instructions that became dead in the process. -/// We manually assign 32 bit gprb to register operands of all new instructions -/// that got created in the process since they will not end up in RegBankSelect -/// loop. Careful not to delete instruction after MI i.e. MI.getIterator()++. +void MipsRegisterBankInfo::setRegBank(MachineInstr &MI, + MachineRegisterInfo &MRI) const { + Register Dest = MI.getOperand(0).getReg(); + switch (MI.getOpcode()) { + case TargetOpcode::G_STORE: + // No def operands, skip this instruction. + break; + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_LOAD: + case TargetOpcode::G_SELECT: + case TargetOpcode::G_PHI: + case TargetOpcode::G_IMPLICIT_DEF: { + assert(MRI.getType(Dest) == LLT::scalar(32) && "Unexpected operand type."); + MRI.setRegBank(Dest, getRegBank(Mips::GPRBRegBankID)); + break; + } + case TargetOpcode::G_GEP: { + assert(MRI.getType(Dest).isPointer() && "Unexpected operand type."); + MRI.setRegBank(Dest, getRegBank(Mips::GPRBRegBankID)); + break; + } + default: + llvm_unreachable("Unexpected opcode."); + } +} + +static void +combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner, + MachineInstr &MI) { + SmallVector DeadInstrs; + ArtCombiner.tryCombineMerges(MI, DeadInstrs); + for (MachineInstr *DeadMI : DeadInstrs) + DeadMI->eraseFromParent(); +} + void MipsRegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -621,18 +662,19 @@ void MipsRegisterBankInfo::applyMappingImpl( MachineIRBuilder B(MI); MachineFunction *MF = MI.getMF(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); + const LegalizerInfo &LegInfo = *MF->getSubtarget().getLegalizerInfo(); InstManager NewInstrObserver(NewInstrs); GISelObserverWrapper WrapperObserver(&NewInstrObserver); LegalizerHelper Helper(*MF, WrapperObserver, B); - LegalizationArtifactCombiner ArtCombiner( - B, MF->getRegInfo(), *MF->getSubtarget().getLegalizerInfo()); + LegalizationArtifactCombiner ArtCombiner(B, MF->getRegInfo(), LegInfo); switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: case TargetOpcode::G_PHI: - case TargetOpcode::G_SELECT: { + case TargetOpcode::G_SELECT: + case TargetOpcode::G_IMPLICIT_DEF: { Helper.narrowScalar(MI, 0, LLT::scalar(32)); // Handle new instructions. while (!NewInstrs.empty()) { @@ -640,35 +682,21 @@ void MipsRegisterBankInfo::applyMappingImpl( // This is new G_UNMERGE that was created during narrowScalar and will // not be considered for regbank selection. RegBankSelect for mips // visits/makes corresponding G_MERGE first. Combine them here. - if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) { - SmallVector DeadInstrs; - ArtCombiner.tryCombineMerges(*NewMI, DeadInstrs); - for (MachineInstr *DeadMI : DeadInstrs) - DeadMI->eraseFromParent(); - } + if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) + combineAwayG_UNMERGE_VALUES(ArtCombiner, *NewMI); // This G_MERGE will be combined away when its corresponding G_UNMERGE // gets regBankSelected. else if (NewMI->getOpcode() == TargetOpcode::G_MERGE_VALUES) continue; else - // Manually set register banks for all register operands to 32 bit gprb. - for (auto Op : NewMI->operands()) { - if (Op.isReg()) { - assert(MRI.getType(Op.getReg()).getSizeInBits() == 32 && - "Only 32 bit gprb is handled here.\n"); - MRI.setRegBank(Op.getReg(), getRegBank(Mips::GPRBRegBankID)); - } - } + // Manually set register banks for def operands to 32 bit gprb. + setRegBank(*NewMI, MRI); } return; } - case TargetOpcode::G_UNMERGE_VALUES: { - SmallVector DeadInstrs; - ArtCombiner.tryCombineMerges(MI, DeadInstrs); - for (MachineInstr *DeadMI : DeadInstrs) - DeadMI->eraseFromParent(); + case TargetOpcode::G_UNMERGE_VALUES: + combineAwayG_UNMERGE_VALUES(ArtCombiner, MI); return; - } default: break; } diff --git a/lib/Target/Mips/MipsRegisterBankInfo.h b/lib/Target/Mips/MipsRegisterBankInfo.h index 176813c031ed..fa0f1c7bc941 100644 --- a/lib/Target/Mips/MipsRegisterBankInfo.h +++ b/lib/Target/Mips/MipsRegisterBankInfo.h @@ -38,8 +38,17 @@ public: const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override; + /// Here we have to narrowScalar s64 operands to s32, combine away G_MERGE or + /// G_UNMERGE and erase instructions that became dead in the process. We + /// manually assign bank to def operand of all new instructions that were + /// created in the process since they will not end up in RegBankSelect loop. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + /// RegBankSelect determined that s64 operand is better to be split into two + /// s32 operands in gprb. Here we manually set register banks of def operands + /// of newly created instructions since they will not get regbankselected. + void setRegBank(MachineInstr &MI, MachineRegisterInfo &MRI) const; + private: /// Some instructions are used with both floating point and integer operands. /// We assign InstType to such instructions as it helps us to avoid cross bank diff --git a/lib/Target/Mips/MipsRegisterBanks.td b/lib/Target/Mips/MipsRegisterBanks.td index 14a0181f8f11..7d11475884ce 100644 --- a/lib/Target/Mips/MipsRegisterBanks.td +++ b/lib/Target/Mips/MipsRegisterBanks.td @@ -11,4 +11,4 @@ def GPRBRegBank : RegisterBank<"GPRB", [GPR32]>; -def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64]>; +def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64, MSA128D]>; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 4c6cc1ef771c..166ddea0431f 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -171,8 +171,8 @@ void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) { assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(4); - unsigned VR = MRI.createVirtualRegister(RC); - unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + Register VR = MRI.createVirtualRegister(RC); + Register Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0); BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst) @@ -186,8 +186,8 @@ void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) { assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(4); - unsigned VR = MRI.createVirtualRegister(RC); - unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + Register VR = MRI.createVirtualRegister(RC); + Register Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR) .addReg(Src, getKillRegState(I->getOperand(0).isKill())); @@ -204,11 +204,11 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I, assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); - unsigned VR0 = MRI.createVirtualRegister(RC); - unsigned VR1 = MRI.createVirtualRegister(RC); - unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); - unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo); - unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi); + Register VR0 = MRI.createVirtualRegister(RC); + Register VR1 = MRI.createVirtualRegister(RC); + Register Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + Register Lo = RegInfo.getSubReg(Dst, Mips::sub_lo); + Register Hi = RegInfo.getSubReg(Dst, Mips::sub_hi); DebugLoc DL = I->getDebugLoc(); const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); @@ -229,9 +229,9 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I, assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); - unsigned VR0 = MRI.createVirtualRegister(RC); - unsigned VR1 = MRI.createVirtualRegister(RC); - unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + Register VR0 = MRI.createVirtualRegister(RC); + Register VR1 = MRI.createVirtualRegister(RC); + Register Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); unsigned SrcKill = getKillRegState(I->getOperand(0).isKill()); DebugLoc DL = I->getDebugLoc(); @@ -242,7 +242,7 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I, } bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) { - unsigned Src = I->getOperand(1).getReg(); + Register Src = I->getOperand(1).getReg(); std::pair Opcodes = getMFHiLoOpc(Src); if (!Opcodes.first) @@ -262,11 +262,11 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, const TargetRegisterClass *DstRC = RegInfo.getMinimalPhysRegClass(Dst); unsigned VRegSize = RegInfo.getRegSizeInBits(*DstRC) / 16; const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize); - unsigned VR0 = MRI.createVirtualRegister(RC); - unsigned VR1 = MRI.createVirtualRegister(RC); + Register VR0 = MRI.createVirtualRegister(RC); + Register VR1 = MRI.createVirtualRegister(RC); unsigned SrcKill = getKillRegState(I->getOperand(1).isKill()); - unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo); - unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi); + Register DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo); + Register DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi); DebugLoc DL = I->getDebugLoc(); BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src); @@ -304,9 +304,9 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, // stack is used. if (I->getNumOperands() == 4 && I->getOperand(3).isReg() && I->getOperand(3).getReg() == Mips::SP) { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned LoReg = I->getOperand(1).getReg(); - unsigned HiReg = I->getOperand(2).getReg(); + Register DstReg = I->getOperand(0).getReg(); + Register LoReg = I->getOperand(1).getReg(); + Register HiReg = I->getOperand(2).getReg(); // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are // the cases where mthc1 is not available). 64-bit architectures and @@ -346,7 +346,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, const MachineOperand &Op2 = I->getOperand(2); if ((Op1.isReg() && Op1.isUndef()) || (Op2.isReg() && Op2.isUndef())) { - unsigned DstReg = I->getOperand(0).getReg(); + Register DstReg = I->getOperand(0).getReg(); BuildMI(MBB, I, I->getDebugLoc(), TII.get(Mips::IMPLICIT_DEF), DstReg); return true; } @@ -369,8 +369,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, // stack is used. if (I->getNumOperands() == 4 && I->getOperand(3).isReg() && I->getOperand(3).getReg() == Mips::SP) { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned SrcReg = Op1.getReg(); + Register DstReg = I->getOperand(0).getReg(); + Register SrcReg = Op1.getReg(); unsigned N = Op2.getImm(); int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N)); @@ -538,7 +538,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, if (RegInfo.needsStackRealignment(MF)) { // addiu $Reg, $zero, -MaxAlignment // andi $sp, $sp, $Reg - unsigned VR = MF.getRegInfo().createVirtualRegister(RC); + Register VR = MF.getRegInfo().createVirtualRegister(RC); assert(isInt<16>(MFI.getMaxAlignment()) && "Function's alignment size requirement is not supported."); int MaxAlign = -(int)MFI.getMaxAlignment(); @@ -865,12 +865,15 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsABIInfo ABI = STI.getABI(); + unsigned RA = ABI.IsN64() ? Mips::RA_64 : Mips::RA; unsigned FP = ABI.GetFramePtr(); unsigned BP = ABI.IsN64() ? Mips::S7_64 : Mips::S7; - // Mark $fp as used if function has dedicated frame pointer. - if (hasFP(MF)) + // Mark $ra and $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) { + setAliasRegs(MF, SavedRegs, RA); setAliasRegs(MF, SavedRegs, FP); + } // Mark $s7 as used if function has dedicated base pointer. if (hasBP(MF)) setAliasRegs(MF, SavedRegs, BP); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 703f99f37dd1..c8313240a678 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -124,6 +124,33 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, return true; } +void MipsSEDAGToDAGISel::emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB, + MachineFunction &MF) { + MachineInstrBuilder MIB(MF, &MI); + if (!Subtarget->isABI_O32()) { // N32, N64 + // Save current return address. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR64)) + .addDef(Mips::AT_64) + .addUse(Mips::RA_64, RegState::Undef) + .addUse(Mips::ZERO_64); + // Stops instruction above from being removed later on. + MIB.addUse(Mips::AT_64, RegState::Implicit); + } else { // O32 + // Save current return address. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR)) + .addDef(Mips::AT) + .addUse(Mips::RA, RegState::Undef) + .addUse(Mips::ZERO); + // _mcount pops 2 words from stack. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::ADDiu)) + .addDef(Mips::SP) + .addUse(Mips::SP) + .addImm(-8); + // Stops first instruction above from being removed later on. + MIB.addUse(Mips::AT, RegState::Implicit); + } +} + void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { MF.getInfo()->initGlobalBaseReg(); @@ -150,6 +177,24 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1()) MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true)); break; + case Mips::JAL: + case Mips::JAL_MM: + if (MI.getOperand(0).isGlobal() && + MI.getOperand(0).getGlobal()->getGlobalIdentifier() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; + case Mips::JALRPseudo: + case Mips::JALR64Pseudo: + case Mips::JALR16_MM: + if (MI.getOperand(2).isMCSymbol() && + MI.getOperand(2).getMCSymbol()->getName() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; + case Mips::JALR: + if (MI.getOperand(3).isMCSymbol() && + MI.getOperand(3).getMCSymbol()->getName() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; default: replaceUsesWithZeroReg(MRI, MI); } @@ -247,7 +292,8 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset( Base = Addr.getOperand(0); // If base is a FI, additional offset calculation is done in // eliminateFrameIndex, otherwise we need to check the alignment - if (OffsetToAlignment(CN->getZExtValue(), 1ull << ShiftAmount) != 0) + const Align Alignment(1ULL << ShiftAmount); + if (!isAligned(Alignment, CN->getZExtValue())) return false; } @@ -719,7 +765,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast(Node); + auto *CN = cast(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { if (Subtarget->isGP64bit()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, @@ -743,7 +789,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::Constant: { - const ConstantSDNode *CN = dyn_cast(Node); + auto *CN = cast(Node); int64_t Imm = CN->getSExtValue(); unsigned Size = CN->getValueSizeInBits(0); @@ -969,7 +1015,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { break; } - SDNode *Res; + SDNode *Res = nullptr; // If we have a signed 10 bit integer, we can splat it directly. // diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index ce594e1fb4fa..39f665be571e 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -120,7 +120,7 @@ private: /// power of 2. bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const override; /// Select constant vector splats whose value is a run of set bits - /// ending at the most significant bit + /// ending at the most significant bit. bool selectVSplatMaskL(SDValue N, SDValue &Imm) const override; /// Select constant vector splats whose value is a run of set bits /// starting at bit zero. @@ -128,6 +128,10 @@ private: bool trySelect(SDNode *Node) override; + // Emits proper ABI for _mcount profiling calls. + void emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB, + MachineFunction &MF); + void processFunctionAfterISel(MachineFunction &MF) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index edf57a3840d1..5bd234f955ba 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -71,8 +71,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, if (Subtarget.hasDSP() || Subtarget.hasMSA()) { // Expand all truncating stores and extending loads. - for (MVT VT0 : MVT::vector_valuetypes()) { - for (MVT VT1 : MVT::vector_valuetypes()) { + for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { + for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT0, VT1, Expand); setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); @@ -327,6 +327,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::UNDEF, Ty, Legal); setOperationAction(ISD::ADD, Ty, Legal); setOperationAction(ISD::AND, Ty, Legal); @@ -2595,7 +2596,8 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SDLoc DL(Op); return DAG.getNode(MipsISD::SHF, DL, ResTy, - DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); + DAG.getTargetConstant(Imm, DL, MVT::i32), + Op->getOperand(0)); } /// Determine whether a range fits a regular pattern of values. @@ -3062,13 +3064,13 @@ MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); // Fill $FBB. - unsigned VR2 = RegInfo.createVirtualRegister(RC); + Register VR2 = RegInfo.createVirtualRegister(RC); BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) .addReg(Mips::ZERO).addImm(0); BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); // Fill $TBB. - unsigned VR1 = RegInfo.createVirtualRegister(RC); + Register VR1 = RegInfo.createVirtualRegister(RC); BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) .addReg(Mips::ZERO).addImm(1); @@ -3131,13 +3133,13 @@ MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( .addMBB(TBB); // Fill $FBB. - unsigned RD1 = RegInfo.createVirtualRegister(RC); + Register RD1 = RegInfo.createVirtualRegister(RC); BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) .addReg(Mips::ZERO).addImm(0); BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); // Fill $TBB. - unsigned RD2 = RegInfo.createVirtualRegister(RC); + Register RD2 = RegInfo.createVirtualRegister(RC); BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) .addReg(Mips::ZERO).addImm(1); @@ -3169,8 +3171,8 @@ MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Fd = MI.getOperand(0).getReg(); - unsigned Ws = MI.getOperand(1).getReg(); + Register Fd = MI.getOperand(0).getReg(); + Register Ws = MI.getOperand(1).getReg(); unsigned Lane = MI.getOperand(2).getImm(); if (Lane == 0) { @@ -3185,9 +3187,9 @@ MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); } else { - unsigned Wt = RegInfo.createVirtualRegister( - Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : - &Mips::MSA128WEvensRegClass); + Register Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass + : &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); @@ -3214,15 +3216,15 @@ MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - unsigned Fd = MI.getOperand(0).getReg(); - unsigned Ws = MI.getOperand(1).getReg(); + Register Fd = MI.getOperand(0).getReg(); + Register Ws = MI.getOperand(1).getReg(); unsigned Lane = MI.getOperand(2).getImm() * 2; DebugLoc DL = MI.getDebugLoc(); if (Lane == 0) BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); else { - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); @@ -3244,13 +3246,13 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Wd_in = MI.getOperand(1).getReg(); + Register Wd = MI.getOperand(0).getReg(); + Register Wd_in = MI.getOperand(1).getReg(); unsigned Lane = MI.getOperand(2).getImm(); - unsigned Fs = MI.getOperand(3).getReg(); - unsigned Wt = RegInfo.createVirtualRegister( - Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : - &Mips::MSA128WEvensRegClass); + Register Fs = MI.getOperand(3).getReg(); + Register Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass + : &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) @@ -3280,11 +3282,11 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Wd_in = MI.getOperand(1).getReg(); + Register Wd = MI.getOperand(0).getReg(); + Register Wd_in = MI.getOperand(1).getReg(); unsigned Lane = MI.getOperand(2).getImm(); - unsigned Fs = MI.getOperand(3).getReg(); - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + Register Fs = MI.getOperand(3).getReg(); + Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) @@ -3326,10 +3328,10 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned SrcVecReg = MI.getOperand(1).getReg(); - unsigned LaneReg = MI.getOperand(2).getReg(); - unsigned SrcValReg = MI.getOperand(3).getReg(); + Register Wd = MI.getOperand(0).getReg(); + Register SrcVecReg = MI.getOperand(1).getReg(); + Register LaneReg = MI.getOperand(2).getReg(); + Register SrcValReg = MI.getOperand(3).getReg(); const TargetRegisterClass *VecRC = nullptr; // FIXME: This should be true for N32 too. @@ -3370,7 +3372,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( } if (IsFP) { - unsigned Wt = RegInfo.createVirtualRegister(VecRC); + Register Wt = RegInfo.createVirtualRegister(VecRC); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) .addReg(SrcValReg) @@ -3380,7 +3382,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( // Convert the lane index into a byte index if (EltSizeInBytes != 1) { - unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); + Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) .addReg(LaneReg) .addImm(EltLog2Size); @@ -3388,13 +3390,13 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( } // Rotate bytes around so that the desired lane is element zero - unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); + Register WdTmp1 = RegInfo.createVirtualRegister(VecRC); BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) .addReg(SrcVecReg) .addReg(SrcVecReg) .addReg(LaneReg, 0, SubRegIdx); - unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); + Register WdTmp2 = RegInfo.createVirtualRegister(VecRC); if (IsFP) { // Use insve.df to insert to element zero BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) @@ -3413,7 +3415,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( // Rotate elements the rest of the way for a full rotation. // sld.df inteprets $rt modulo the number of columns so we only need to negate // the lane index to do this. - unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); + Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), LaneTmp2) .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) @@ -3440,12 +3442,12 @@ MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Fs = MI.getOperand(1).getReg(); - unsigned Wt1 = RegInfo.createVirtualRegister( + Register Wd = MI.getOperand(0).getReg(); + Register Fs = MI.getOperand(1).getReg(); + Register Wt1 = RegInfo.createVirtualRegister( Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : &Mips::MSA128WEvensRegClass); - unsigned Wt2 = RegInfo.createVirtualRegister( + Register Wt2 = RegInfo.createVirtualRegister( Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : &Mips::MSA128WEvensRegClass); @@ -3475,10 +3477,10 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Fs = MI.getOperand(1).getReg(); - unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); - unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + Register Wd = MI.getOperand(0).getReg(); + Register Fs = MI.getOperand(1).getReg(); + Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) @@ -3509,8 +3511,8 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Ws = MI.getOperand(0).getReg(); - unsigned Rt = MI.getOperand(1).getReg(); + Register Ws = MI.getOperand(0).getReg(); + Register Rt = MI.getOperand(1).getReg(); const MachineMemOperand &MMO = **MI.memoperands_begin(); unsigned Imm = MMO.getOffset(); @@ -3522,11 +3524,11 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass : &Mips::GPR64RegClass); const bool UsingMips32 = RC == &Mips::GPR32RegClass; - unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); + Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); if(!UsingMips32) { - unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) .addImm(0) .addReg(Rs) @@ -3564,7 +3566,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); + Register Wd = MI.getOperand(0).getReg(); // Caution: A load via the GOT can expand to a GPR32 operand, a load via // spill and reload can expand as a GPR64 operand. Examine the @@ -3575,7 +3577,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, : &Mips::GPR64RegClass); const bool UsingMips32 = RC == &Mips::GPR32RegClass; - unsigned Rt = RegInfo.createVirtualRegister(RC); + Register Rt = RegInfo.createVirtualRegister(RC); MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); @@ -3583,7 +3585,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, MIB.add(MI.getOperand(i)); if(!UsingMips32) { - unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); + Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); Rt = Tmp; } @@ -3658,11 +3660,11 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Fs = MI.getOperand(1).getReg(); + Register Wd = MI.getOperand(0).getReg(); + Register Fs = MI.getOperand(1).getReg(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); const TargetRegisterClass *GPRRC = IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; unsigned MFC1Opc = IsFGR64onMips64 @@ -3671,16 +3673,16 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; // Perform the register class copy as mentioned above. - unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); + Register Rtemp = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); unsigned WPHI = Wtemp; if (IsFGR64onMips32) { - unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); + Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); - unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); - unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) .addReg(Wtemp) .addReg(Rtemp2) @@ -3693,7 +3695,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, } if (IsFGR64) { - unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) .addReg(WPHI) .addReg(WPHI); @@ -3817,8 +3819,8 @@ MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *RC = &Mips::MSA128WRegClass; - unsigned Ws1 = RegInfo.createVirtualRegister(RC); - unsigned Ws2 = RegInfo.createVirtualRegister(RC); + Register Ws1 = RegInfo.createVirtualRegister(RC); + Register Ws2 = RegInfo.createVirtualRegister(RC); DebugLoc DL = MI.getDebugLoc(); // Splat 1.0 into a vector @@ -3846,8 +3848,8 @@ MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *RC = &Mips::MSA128DRegClass; - unsigned Ws1 = RegInfo.createVirtualRegister(RC); - unsigned Ws2 = RegInfo.createVirtualRegister(RC); + Register Ws1 = RegInfo.createVirtualRegister(RC); + Register Ws2 = RegInfo.createVirtualRegister(RC); DebugLoc DL = MI.getDebugLoc(); // Splat 1.0 into a vector diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 4e49f5e7d9d1..2126a1bda493 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -628,7 +628,7 @@ unsigned MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. - unsigned Reg = RegInfo.createVirtualRegister(RC); + Register Reg = RegInfo.createVirtualRegister(RC); if (Inst->Opc == LUi) BuildMI(MBB, II, DL, get(LUi), Reg).addImm(SignExtend64<16>(Inst->ImmOpnd)); @@ -734,9 +734,9 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB, // Add lo/hi registers if the mtlo/hi instructions created have explicit // def registers. if (HasExplicitDef) { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); - unsigned DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi); + Register DstReg = I->getOperand(0).getReg(); + Register DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); + Register DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi); LoInst.addReg(DstLo, RegState::Define); HiInst.addReg(DstHi, RegState::Define); } @@ -773,14 +773,14 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool isMicroMips, bool FP64) const { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned SrcReg = I->getOperand(1).getReg(); + Register DstReg = I->getOperand(0).getReg(); + Register SrcReg = I->getOperand(1).getReg(); unsigned N = I->getOperand(2).getImm(); DebugLoc dl = I->getDebugLoc(); assert(N < 2 && "Invalid immediate"); unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo; - unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); + Register SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. @@ -815,7 +815,7 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool isMicroMips, bool FP64) const { - unsigned DstReg = I->getOperand(0).getReg(); + Register DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); DebugLoc dl = I->getDebugLoc(); @@ -883,8 +883,8 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, unsigned RA = Subtarget.isGP64bit() ? Mips::RA_64 : Mips::RA; unsigned T9 = Subtarget.isGP64bit() ? Mips::T9_64 : Mips::T9; unsigned ZERO = Subtarget.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; - unsigned OffsetReg = I->getOperand(0).getReg(); - unsigned TargetReg = I->getOperand(1).getReg(); + Register OffsetReg = I->getOperand(0).getReg(); + Register TargetReg = I->getOperand(1).getReg(); // addu $ra, $v0, $zero // addu $sp, $sp, $v1 diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index f4b164d5c0ab..a48088c28919 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -212,11 +212,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // element size), otherwise it is a 16-bit signed immediate. unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits(MI.getOpcode(), MI.getOperand(OpNo - 1)); - unsigned OffsetAlign = getLoadStoreOffsetAlign(MI.getOpcode()); - + const Align OffsetAlign(getLoadStoreOffsetAlign(MI.getOpcode())); if (OffsetBitSize < 16 && isInt<16>(Offset) && - (!isIntN(OffsetBitSize, Offset) || - OffsetToAlignment(Offset, OffsetAlign) != 0)) { + (!isIntN(OffsetBitSize, Offset) || !isAligned(OffsetAlign, Offset))) { // If we have an offset that needs to fit into a signed n-bit immediate // (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu MachineBasicBlock &MBB = *MI.getParent(); @@ -224,7 +222,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); - unsigned Reg = RegInfo.createVirtualRegister(PtrRC); + Register Reg = RegInfo.createVirtualRegister(PtrRC); const MipsSEInstrInfo &TII = *static_cast( MBB.getParent()->getSubtarget().getInstrInfo()); diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index d021b3d021b1..b9245c9fc0eb 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -69,7 +69,7 @@ void MipsSubtarget::anchor() {} MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little, const MipsTargetMachine &TM, - unsigned StackAlignOverride) + MaybeAlign StackAlignOverride) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault), IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false), NoABICalls(false), Abs2008(false), IsFP64bit(false), UseOddSPReg(true), @@ -81,10 +81,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false), HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false), - StackAlignOverride(StackAlignOverride), - TM(TM), TargetTriple(TT), TSInfo(), - InstrInfo( - MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), + StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT), + TSInfo(), InstrInfo(MipsInstrInfo::create( + initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(TM, *this)) { @@ -248,12 +247,12 @@ MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, InMips16HardFloat = true; if (StackAlignOverride) - stackAlignment = StackAlignOverride; + stackAlignment = *StackAlignOverride; else if (isABI_N32() || isABI_N64()) - stackAlignment = 16; + stackAlignment = Align(16); else { assert(isABI_O32() && "Unknown ABI for stack alignment!"); - stackAlignment = 8; + stackAlignment = Align(8); } return *this; @@ -286,6 +285,6 @@ const RegisterBankInfo *MipsSubtarget::getRegBankInfo() const { return RegBankInfo.get(); } -const InstructionSelector *MipsSubtarget::getInstructionSelector() const { +InstructionSelector *MipsSubtarget::getInstructionSelector() const { return InstSelector.get(); } diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index aa1200579fc8..0a8c2ef8ae5c 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -189,12 +189,15 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // Disable use of the `jal` instruction. bool UseLongCalls = false; + // Assume 32-bit GOT. + bool UseXGOT = false; + /// The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned stackAlignment; + Align stackAlignment; /// The overridden stack alignment. - unsigned StackAlignOverride; + MaybeAlign StackAlignOverride; InstrItineraryData InstrItins; @@ -227,7 +230,7 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little, - const MipsTargetMachine &TM, unsigned StackAlignOverride); + const MipsTargetMachine &TM, MaybeAlign StackAlignOverride); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -323,6 +326,8 @@ public: bool useLongCalls() const { return UseLongCalls; } + bool useXGOT() const { return UseXGOT; } + bool enableLongBranchPass() const { return hasStandardEncoding() || inMicroMipsMode() || allowMixed16_32(); } @@ -344,7 +349,7 @@ public: // really use them if in addition we are in mips16 mode static bool useConstantIslands(); - unsigned getStackAlignment() const { return stackAlignment; } + Align getStackAlignment() const { return stackAlignment; } // Grab relocation model Reloc::Model getRelocationModel() const; @@ -391,7 +396,7 @@ public: const CallLowering *getCallLowering() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index c878abb042e4..e58f316791ba 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -117,14 +117,17 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, CPU, FS, Options, getEffectiveRelocModel(JIT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - isLittle(isLittle), TLOF(llvm::make_unique()), + isLittle(isLittle), TLOF(std::make_unique()), ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)), - Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this, - Options.StackAlignmentOverride), + Subtarget(nullptr), + DefaultSubtarget(TT, CPU, FS, isLittle, *this, + MaybeAlign(Options.StackAlignmentOverride)), NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16", - isLittle, *this, Options.StackAlignmentOverride), + isLittle, *this, + MaybeAlign(Options.StackAlignmentOverride)), Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16", - isLittle, *this, Options.StackAlignmentOverride) { + isLittle, *this, + MaybeAlign(Options.StackAlignmentOverride)) { Subtarget = &DefaultSubtarget; initAsmInfo(); } @@ -196,8 +199,9 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, isLittle, *this, - Options.StackAlignmentOverride); + I = std::make_unique( + TargetTriple, CPU, FS, isLittle, *this, + MaybeAlign(Options.StackAlignmentOverride)); } return I.get(); } diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 1fa8ebadd643..298d056ce2c3 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -130,6 +130,8 @@ public: SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, + MCOperand Op3, SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, @@ -154,17 +156,13 @@ public: unsigned BaseReg, int64_t Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI); - void emitStoreWithSymOffset(unsigned Opcode, unsigned SrcReg, - unsigned BaseReg, MCOperand &HiOperand, - MCOperand &LoOperand, unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI); + void emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, unsigned BaseReg, + MCOperand &HiOperand, MCOperand &LoOperand, + unsigned ATReg, SMLoc IDLoc, + const MCSubtargetInfo *STI); void emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg, int64_t Offset, unsigned TmpReg, SMLoc IDLoc, const MCSubtargetInfo *STI); - void emitLoadWithSymOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg, - MCOperand &HiOperand, MCOperand &LoOperand, - unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI); void emitGPRestore(int Offset, SMLoc IDLoc, const MCSubtargetInfo *STI); void forbidModuleDirective() { ModuleDirectiveAllowed = false; } diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 6530c40ea100..0acbace5f848 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -44,7 +44,7 @@ MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM); -BasicBlockPass *createNVPTXLowerAllocaPass(); +FunctionPass *createNVPTXLowerAllocaPass(); MachineFunctionPass *createNVPTXPeephole(); MachineFunctionPass *createNVPTXProxyRegErasurePass(); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 5f38b4a3c4c5..307f4d58c3ab 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -282,7 +282,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, } unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const TargetRegisterClass *RC = MRI->getRegClass(Reg); DenseMap &RegMap = VRegMapping[RC]; @@ -434,7 +434,7 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( return false; } -void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { +void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { AsmPrinter::EmitBasicBlockStart(MBB); if (isLoopHeaderOfNoUnroll(MBB)) OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); @@ -507,8 +507,8 @@ const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const { } void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { - unsigned RegNo = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(RegNo)) { + Register RegNo = MI->getOperand(0).getReg(); + if (Register::isVirtualRegister(RegNo)) { OutStreamer->AddComment(Twine("implicit-def: ") + getVirtualRegisterName(RegNo)); } else { @@ -1397,7 +1397,7 @@ static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { auto *FTy = dyn_cast(Ty); if (FTy) - return DL.getPointerPrefAlignment(); + return DL.getPointerPrefAlignment().value(); return DL.getPrefTypeAlignment(Ty); } @@ -1473,12 +1473,11 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Just print .param .align .b8 .param[size]; // = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex); - if (align == 0) - align = DL.getABITypeAlignment(Ty); + const Align align = DL.getValueOrABITypeAlignment( + PAL.getParamAlignment(paramIndex), Ty); unsigned sz = DL.getTypeAllocSize(Ty); - O << "\t.param .align " << align << " .b8 "; + O << "\t.param .align " << align.value() << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; @@ -1559,9 +1558,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Just print .param .align .b8 .param[size]; // = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex); - if (align == 0) - align = DL.getABITypeAlignment(ETy); + Align align = + DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy); // Work around a bug in ptxas. When PTX code takes address of // byval parameter with alignment < 4, ptxas generates code to // spill argument into memory. Alas on sm_50+ ptxas generates @@ -1573,10 +1571,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // TODO: this will need to be undone when we get to support multi-TU // device-side compilation as it breaks ABI compatibility with nvcc. // Hopefully ptxas bug is fixed by then. - if (!isKernelFunc && align < 4) - align = 4; + if (!isKernelFunc && align < Align(4)) + align = Align(4); unsigned sz = DL.getTypeAllocSize(ETy); - O << "\t.param .align " << align << " .b8 "; + O << "\t.param .align " << align.value() << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; continue; @@ -1653,7 +1651,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( // We use the per class virtual register number in the ptx output. unsigned int numVRs = MRI->getNumVirtRegs(); for (unsigned i = 0; i < numVRs; i++) { - unsigned int vr = TRI->index2VirtReg(i); + unsigned int vr = Register::index2VirtReg(i); const TargetRegisterClass *RC = MRI->getRegClass(vr); DenseMap ®map = VRegMapping[RC]; int n = regmap.size(); @@ -1861,7 +1859,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::HalfTyID: case Type::FloatTyID: case Type::DoubleTyID: { - const ConstantFP *CFP = dyn_cast(CPV); + const auto *CFP = cast(CPV); Type *Ty = CFP->getType(); if (Ty == Type::getHalfTy(CPV->getContext())) { APInt API = CFP->getValueAPF().bitcastToAPInt(); @@ -2212,7 +2210,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { case MachineOperand::MO_Register: - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(MO.getReg())) { if (MO.getReg() == NVPTX::VRDepot) O << DEPOTNAME << getFunctionNumber(); else diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 43ae57ac1262..7a66854d32f4 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -200,7 +200,7 @@ private: const Function *F; std::string CurrentFnName; - void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) override; void EmitFunctionEntryLabel() override; void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 46f08b23d31a..d26912f47e50 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -25,7 +25,7 @@ using namespace llvm; NVPTXFrameLowering::NVPTXFrameLowering() - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, Align(8), 0) {} bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index ae1aa98da0e8..9acd0bea66fd 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -480,7 +480,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::TRAP, MVT::Other, Legal); // Register custom handling for vector loads/stores - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { if (IsPTXVectorType(VT)) { setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -1291,8 +1291,8 @@ std::string NVPTXTargetLowering::getPrototype( O << ".param .b" << size << " _"; } else if (isa(retTy)) { O << ".param .b" << PtrVT.getSizeInBits() << " _"; - } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) { - auto &DL = CS.getCalledFunction()->getParent()->getDataLayout(); + } else if (retTy->isAggregateType() || retTy->isVectorTy() || + retTy->isIntegerTy(128)) { O << ".param .align " << retAlignment << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; } else { @@ -2230,8 +2230,8 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::v2f16) { LoadSDNode *Load = cast(Op); EVT MemVT = Load->getMemoryVT(); - if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - *Load->getMemOperand())) { + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + MemVT, *Load->getMemOperand())) { SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); return DAG.getMergeValues(Ops, SDLoc(Op)); @@ -2273,8 +2273,8 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // v2f16 is legal, so we can't rely on legalizer to handle unaligned // stores and have to handle it here. if (VT == MVT::v2f16 && - !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - *Store->getMemOperand())) + !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + VT, *Store->getMemOperand())) return expandUnalignedStore(Store, DAG); if (VT.isVector()) @@ -3497,7 +3497,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col: @@ -3521,7 +3521,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 8; + Info.align = Align(8); return true; } @@ -3547,7 +3547,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3585,7 +3585,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 4; + Info.align = Align(4); return true; } @@ -3606,7 +3606,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3627,7 +3627,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3648,7 +3648,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3665,7 +3665,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 8; + Info.align = Align(8); return true; } @@ -3686,7 +3686,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOStore; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3707,7 +3707,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOStore; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3728,7 +3728,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOStore; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3745,7 +3745,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOStore; - Info.align = 8; + Info.align = Align(8); return true; } @@ -3780,7 +3780,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - Info.align = 0; + Info.align.reset(); return true; } @@ -3798,7 +3798,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = cast(I.getArgOperand(1))->getZExtValue(); + Info.align = + MaybeAlign(cast(I.getArgOperand(1))->getZExtValue()); return true; } @@ -3817,7 +3818,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = cast(I.getArgOperand(1))->getZExtValue(); + Info.align = + MaybeAlign(cast(I.getArgOperand(1))->getZExtValue()); return true; } @@ -3883,7 +3885,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_tex_1d_v4s32_s32: @@ -4003,7 +4005,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_suld_1d_i8_clamp: @@ -4056,7 +4058,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_suld_1d_i16_clamp: @@ -4109,7 +4111,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_suld_1d_i32_clamp: @@ -4162,7 +4164,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_suld_1d_i64_clamp: @@ -4200,7 +4202,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } return false; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 62da3c79f465..fe7a84f9a361 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -143,12 +143,17 @@ def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">; def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">; +def hasPTX64 : Predicate<"Subtarget->getPTXVersion() >= 64">; def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">; def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">; def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">; def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">; +// non-sync shfl instructions are not available on sm_70+ in PTX6.4+ +def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" + "&& Subtarget->getPTXVersion() >= 64)">; + def useShortPtr : Predicate<"useShortPointers()">; def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; @@ -2908,7 +2913,7 @@ def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>; // ctz instruction always returns a 32-bit value. For ctlz.i64, convert the // ptx value to 64 bits to match the ISD node's semantics, unless we know we're // truncating back down to 32 bits. -def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>; // For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the @@ -2925,10 +2930,10 @@ def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>; // and then ctlz that value. This way we don't have to subtract 16 from the // result. Unfortunately today we don't have a way to generate // "mov b32reg, {b16imm, b16reg}", so we don't do this optimization. -def : Pat<(ctlz Int16Regs:$a), +def : Pat<(i16 (ctlz Int16Regs:$a)), (SUBi16ri (CVT_u16_u32 (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>; -def : Pat<(i32 (zext (ctlz Int16Regs:$a))), +def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))), (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>; // Population count @@ -2953,7 +2958,7 @@ def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>; // If we know that we're storing into an i32, we can avoid the final trunc. def : Pat<(ctpop Int16Regs:$a), (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; -def : Pat<(i32 (zext (ctpop Int16Regs:$a))), +def : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))), (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>; // fpround f32 -> f16 diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 1752d3e0575e..c52195fb0449 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -56,6 +56,10 @@ class RegSeq { []); } +class THREADMASK_INFO { + list ret = !if(sync, [0,1], [0]); +} + //----------------------------------- // Synchronization and shuffle functions //----------------------------------- @@ -129,121 +133,64 @@ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, Requires<[hasPTX60, hasSM30]>; - -// shfl.{up,down,bfly,idx}.b32 -multiclass SHFL { - // The last two parameters to shfl can be regs or imms. ptxas is smart - // enough to inline constant registers, so strictly speaking we don't need to - // handle immediates here. But it's easy enough, and it makes our ptx more - // readable. - def reg : NVPTXInst< - (outs regclass:$dst), - (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask), - !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), - [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>; - - def imm1 : NVPTXInst< - (outs regclass:$dst), - (ins regclass:$src, i32imm:$offset, Int32Regs:$mask), - !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), - [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>; - - def imm2 : NVPTXInst< - (outs regclass:$dst), - (ins regclass:$src, Int32Regs:$offset, i32imm:$mask), - !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), - [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>; - - def imm3 : NVPTXInst< - (outs regclass:$dst), - (ins regclass:$src, i32imm:$offset, i32imm:$mask), - !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), - [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>; +class SHFL_INSTR + : NVPTXInst<(outs), (ins), "?", []> { + NVPTXRegClass rc = !cond( + !eq(reg, "i32"): Int32Regs, + !eq(reg, "f32"): Float32Regs); + string IntrName = "int_nvvm_shfl_" + # !if(sync, "sync_", "") + # mode + # "_" # reg + # !if(return_pred, "p", ""); + Intrinsic Intr = !cast(IntrName); + let InOperandList = !con( + !if(sync, + !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), + (ins)), + (ins rc:$src), + !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), + !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) + ); + let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); + let AsmString = "shfl." + # !if(sync, "sync.", "") + # mode # ".b32\t" + # "$dst" + # !if(return_pred, "|$pred", "") # ", " + # "$src, $offset, $mask" + # !if(sync, ", $threadmask", "") + # ";" + ; + let Pattern = [!con( + !foreach(tmp, OutOperandList, + !subst(outs, set, + !subst(i32imm, imm, tmp))), + (set !foreach(tmp, InOperandList, + !subst(ins, Intr, + !subst(i32imm, imm, tmp)))) + )]; } -defm INT_SHFL_DOWN_I32 : SHFL; -defm INT_SHFL_DOWN_F32 : SHFL; -defm INT_SHFL_UP_I32 : SHFL; -defm INT_SHFL_UP_F32 : SHFL; -defm INT_SHFL_BFLY_I32 : SHFL; -defm INT_SHFL_BFLY_F32 : SHFL; -defm INT_SHFL_IDX_I32 : SHFL; -defm INT_SHFL_IDX_F32 : SHFL; - -multiclass SHFL_SYNC { - // Threadmask and the last two parameters to shfl.sync can be regs or imms. - // ptxas is smart enough to inline constant registers, so strictly speaking we - // don't need to handle immediates here. But it's easy enough, and it makes - // our ptx more readable. - def rrr : NVPTXInst< - (outs regclass:$dst), - (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, - Int32Regs:$offset, Int32Regs:$mask))]>; - - def rri : NVPTXInst< - (outs regclass:$dst), - (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, - Int32Regs:$offset, imm:$mask))]>; - - def rir : NVPTXInst< - (outs regclass:$dst), - (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, - imm:$offset, Int32Regs:$mask))]>; - - def rii : NVPTXInst< - (outs regclass:$dst), - (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, - imm:$offset, imm:$mask))]>; - - def irr : NVPTXInst< - (outs regclass:$dst), - (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, - Int32Regs:$offset, Int32Regs:$mask))]>; - - def iri : NVPTXInst< - (outs regclass:$dst), - (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, - Int32Regs:$offset, imm:$mask))]>; - - def iir : NVPTXInst< - (outs regclass:$dst), - (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, - imm:$offset, Int32Regs:$mask))]>; - - def iii : NVPTXInst< - (outs regclass:$dst), - (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, - imm:$offset, imm:$mask))]>; +foreach sync = [0, 1] in { + foreach mode = ["up", "down", "bfly", "idx"] in { + foreach regclass = ["i32", "f32"] in { + foreach return_pred = [0, 1] in { + foreach offset_imm = [0, 1] in { + foreach mask_imm = [0, 1] in { + foreach threadmask_imm = THREADMASK_INFO.ret in { + def : SHFL_INSTR, + Requires; + } + } + } + } + } + } } -// On sm_70 these don't have to be convergent, so we may eventually want to -// implement non-convergent variant of this intrinsic. -defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC; -defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC; -defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC; -defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC; -defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC; -defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC; -defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC; -defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC; - - // vote.{all,any,uni,ballot} multiclass VOTE { def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 0743a2986718..83039241a7c7 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -103,7 +103,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // Do the transformation of an aggr load/copy/set to a loop // for (LoadInst *LI : AggrLoads) { - StoreInst *SI = dyn_cast(*LI->user_begin()); + auto *SI = cast(*LI->user_begin()); Value *SrcAddr = LI->getOperand(0); Value *DstAddr = SI->getOperand(1); unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); diff --git a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp index 76fb9f3fa692..945b7286b03c 100644 --- a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp @@ -41,12 +41,12 @@ void initializeNVPTXLowerAllocaPass(PassRegistry &); } namespace { -class NVPTXLowerAlloca : public BasicBlockPass { - bool runOnBasicBlock(BasicBlock &BB) override; +class NVPTXLowerAlloca : public FunctionPass { + bool runOnFunction(Function &F) override; public: static char ID; // Pass identification, replacement for typeid - NVPTXLowerAlloca() : BasicBlockPass(ID) {} + NVPTXLowerAlloca() : FunctionPass(ID) {} StringRef getPassName() const override { return "convert address space of alloca'ed memory to local"; } @@ -61,58 +61,61 @@ INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", // ============================================================================= // Main function for this pass. // ============================================================================= -bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) { - if (skipBasicBlock(BB)) +bool NVPTXLowerAlloca::runOnFunction(Function &F) { + if (skipFunction(F)) return false; bool Changed = false; - for (auto &I : BB) { - if (auto allocaInst = dyn_cast(&I)) { - Changed = true; - auto PTy = dyn_cast(allocaInst->getType()); - auto ETy = PTy->getElementType(); - auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); - auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, ""); - auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); - auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal, - GenericAddrTy, ""); - NewASCToLocal->insertAfter(allocaInst); - NewASCToGeneric->insertAfter(NewASCToLocal); - for (Value::use_iterator UI = allocaInst->use_begin(), - UE = allocaInst->use_end(); - UI != UE; ) { - // Check Load, Store, GEP, and BitCast Uses on alloca and make them - // use the converted generic address, in order to expose non-generic - // addrspacecast to NVPTXInferAddressSpaces. For other types - // of instructions this is unnecessary and may introduce redundant - // address cast. - const auto &AllocaUse = *UI++; - auto LI = dyn_cast(AllocaUse.getUser()); - if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) { - LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); - continue; - } - auto SI = dyn_cast(AllocaUse.getUser()); - if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) { - SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric); - continue; - } - auto GI = dyn_cast(AllocaUse.getUser()); - if (GI && GI->getPointerOperand() == allocaInst) { - GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); - continue; - } - auto BI = dyn_cast(AllocaUse.getUser()); - if (BI && BI->getOperand(0) == allocaInst) { - BI->setOperand(0, NewASCToGeneric); - continue; + for (auto &BB : F) + for (auto &I : BB) { + if (auto allocaInst = dyn_cast(&I)) { + Changed = true; + auto PTy = dyn_cast(allocaInst->getType()); + auto ETy = PTy->getElementType(); + auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); + auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, ""); + auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); + auto NewASCToGeneric = + new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, ""); + NewASCToLocal->insertAfter(allocaInst); + NewASCToGeneric->insertAfter(NewASCToLocal); + for (Value::use_iterator UI = allocaInst->use_begin(), + UE = allocaInst->use_end(); + UI != UE;) { + // Check Load, Store, GEP, and BitCast Uses on alloca and make them + // use the converted generic address, in order to expose non-generic + // addrspacecast to NVPTXInferAddressSpaces. For other types + // of instructions this is unnecessary and may introduce redundant + // address cast. + const auto &AllocaUse = *UI++; + auto LI = dyn_cast(AllocaUse.getUser()); + if (LI && LI->getPointerOperand() == allocaInst && + !LI->isVolatile()) { + LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto SI = dyn_cast(AllocaUse.getUser()); + if (SI && SI->getPointerOperand() == allocaInst && + !SI->isVolatile()) { + SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto GI = dyn_cast(AllocaUse.getUser()); + if (GI && GI->getPointerOperand() == allocaInst) { + GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto BI = dyn_cast(AllocaUse.getUser()); + if (BI && BI->getOperand(0) == allocaInst) { + BI->setOperand(0, NewASCToGeneric); + continue; + } } } } - } return Changed; } -BasicBlockPass *llvm::createNVPTXLowerAllocaPass() { +FunctionPass *llvm::createNVPTXLowerAllocaPass() { return new NVPTXLowerAlloca(); } diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp index c5e02e34e25e..c3c5f6fbcba7 100644 --- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -164,7 +164,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) { // Set the alignment to alignment of the byval parameter. This is because, // later load/stores assume that alignment, and we are going to replace // the use of the byval parameter with this alloca instruction. - AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo())); + AllocA->setAlignment(MaybeAlign(Func->getParamAlignment(Arg->getArgNo()))); Arg->replaceAllUsesWith(AllocA); Value *ArgInParam = new AddrSpaceCastInst( diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp index 629757db8707..5e6411c61eab 100644 --- a/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -81,7 +81,7 @@ static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { auto &Op = Root.getOperand(1); const auto &MRI = MF.getRegInfo(); MachineInstr *GenericAddrDef = nullptr; - if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) { + if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) { GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg()); } diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 4c5a9adf1f65..a7127b0e9a99 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -178,7 +178,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI.getUseLocalStackAllocationBlock()) { - unsigned Align = MFI.getLocalFrameMaxAlign(); + unsigned Align = MFI.getLocalFrameMaxAlign().value(); // Adjust to alignment boundary. Offset = (Offset + Align - 1) / Align * Align; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 11b3fe2fa3d3..f58fb5717773 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -116,7 +116,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, CPU, FS, Options, Reloc::PIC_, getEffectiveCodeModel(CM, CodeModel::Small), OL), is64bit(is64bit), UseShortPointers(UseShortPointersOpt), - TLOF(llvm::make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { if (TT.getOS() == Triple::NVCL) drvInterface = NVPTX::NVCL; diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 665eb1383253..43c2e9920403 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -19,10 +19,11 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/Mutex.h" #include #include #include +#include #include #include @@ -38,12 +39,12 @@ static ManagedStatic annotationCache; static sys::Mutex Lock; void clearAnnotationCache(const Module *Mod) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); annotationCache->erase(Mod); } static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); assert(md && "Invalid mdnode for annotation"); assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); // start index = 1, to skip the global variable key @@ -69,7 +70,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { } static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); NamedMDNode *NMD = m->getNamedMetadata("nvvm.annotations"); if (!NMD) return; @@ -103,7 +104,7 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, unsigned &retval) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); const Module *m = gv->getParent(); if ((*annotationCache).find(m) == (*annotationCache).end()) cacheAnnotationFromMD(m, gv); @@ -117,7 +118,7 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, std::vector &retval) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); const Module *m = gv->getParent(); if ((*annotationCache).find(m) == (*annotationCache).end()) cacheAnnotationFromMD(m, gv); diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index c9524da93acd..aedf5b713c3f 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -579,7 +579,7 @@ public: static std::unique_ptr CreateToken(StringRef Str, SMLoc S, bool IsPPC64) { - auto Op = make_unique(Token); + auto Op = std::make_unique(Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -608,7 +608,7 @@ public: static std::unique_ptr CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique(Immediate); + auto Op = std::make_unique(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -618,7 +618,7 @@ public: static std::unique_ptr CreateExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique(Expression); + auto Op = std::make_unique(Expression); Op->Expr.Val = Val; Op->Expr.CRVal = EvaluateCRExpr(Val); Op->StartLoc = S; @@ -629,7 +629,7 @@ public: static std::unique_ptr CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique(TLSRegister); + auto Op = std::make_unique(TLSRegister); Op->TLSReg.Sym = Sym; Op->StartLoc = S; Op->EndLoc = E; @@ -639,7 +639,7 @@ public: static std::unique_ptr CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique(ContextImmediate); + auto Op = std::make_unique(ContextImmediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 7a8af57961cb..3597fd15eeb1 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -167,12 +167,6 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, QFRegs); } -static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, RRegs); -} - static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 042ddf48d5df..20f752c3041a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -78,7 +78,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, // determine the type of the relocation unsigned Type; if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("Unimplemented"); case PPC::fixup_ppc_br24: @@ -131,7 +131,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, break; } } else { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("invalid fixup kind!"); case FK_NONE: Type = ELF::R_PPC_NONE; @@ -443,5 +443,5 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, std::unique_ptr llvm::createPPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) { - return llvm::make_unique(Is64Bit, OSABI); + return std::make_unique(Is64Bit, OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index 0e64ae55ab1c..7fc231618fa9 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -66,6 +66,31 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) { + // Customize printing of the addis instruction on AIX. When an operand is a + // symbol reference, the instruction syntax is changed to look like a load + // operation, i.e: + // Transform: addis $rD, $rA, $src --> addis $rD, $src($rA). + if (TT.isOSAIX() && + (MI->getOpcode() == PPC::ADDIS8 || MI->getOpcode() == PPC::ADDIS) && + MI->getOperand(2).isExpr()) { + assert((MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) && + "The first and the second operand of an addis instruction" + " should be registers."); + + assert(isa(MI->getOperand(2).getExpr()) && + "The third operand of an addis instruction should be a symbol " + "reference expression if it is an expression at all."); + + O << "\taddis "; + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 2, O); + O << "("; + printOperand(MI, 1, O); + O << ")"; + return; + } + // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 5f0005ea1d7b..1216cd727289 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -86,4 +86,5 @@ void PPCXCOFFMCAsmInfo::anchor() {} PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { assert(!IsLittleEndian && "Little-endian XCOFF not supported."); CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4; + ZeroDirective = "\t.space\t"; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index d467f5c4a439..fb9dd5d7aa75 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -19,8 +19,8 @@ using namespace llvm; const PPCMCExpr* PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin); + bool IsDarwin, MCContext &Ctx) { + return new (Ctx) PPCMCExpr(Kind, Expr, IsDarwin); } void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 449e2c34f74d..ad1454566162 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -45,21 +45,21 @@ public: /// @{ static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr, - bool isDarwin, MCContext &Ctx); + bool IsDarwin, MCContext &Ctx); static const PPCMCExpr *createLo(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_LO, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_LO, Expr, IsDarwin, Ctx); } static const PPCMCExpr *createHi(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_HI, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_HI, Expr, IsDarwin, Ctx); } static const PPCMCExpr *createHa(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_HA, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_HA, Expr, IsDarwin, Ctx); } /// @} diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 4cf7fd15fa75..672f910ab086 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -178,7 +178,7 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout, uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); // On Mach-O, ppc_fixup_half16 relocations must refer to the // start of the instruction, not the second halfword, as ELF does - if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16) + if (Fixup.getTargetKind() == PPC::fixup_ppc_half16) FixupOffset &= ~uint32_t(3); return FixupOffset; } @@ -376,5 +376,5 @@ void PPCMachObjectWriter::RecordPPCRelocation( std::unique_ptr llvm::createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { - return llvm::make_unique(Is64Bit, CPUType, CPUSubtype); + return std::make_unique(Is64Bit, CPUType, CPUSubtype); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp index 9c661286d455..7fdbb8990b55 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -25,5 +25,5 @@ PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit) std::unique_ptr llvm::createPPCXCOFFObjectWriter(bool Is64Bit) { - return llvm::make_unique(Is64Bit); + return std::make_unique(Is64Bit); } diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td index 2a10322d3f49..f6cd8ed00c82 100644 --- a/lib/Target/PowerPC/P9InstrResources.td +++ b/lib/Target/PowerPC/P9InstrResources.td @@ -64,6 +64,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], XXLAND, XXLANDC, XXLEQV, + XXLEQVOnes, XXLNAND, XXLNOR, XXLOR, @@ -124,8 +125,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "SRAD(I)?$"), (instregex "EXTSWSLI_32_64$"), (instregex "MFV(S)?RD$"), - (instregex "MTVSRD$"), - (instregex "MTVSRW(A|Z)$"), + (instregex "MTV(S)?RD$"), + (instregex "MTV(S)?RW(A|Z)$"), (instregex "CMP(WI|LWI|W|LW)(8)?$"), (instregex "CMP(L)?D(I)?$"), (instregex "SUBF(I)?C(8)?$"), @@ -148,7 +149,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), (instregex "ADD(4|8)(TLS)?(_)?$"), (instregex "NEG(8)?$"), - (instregex "ADDI(S)?toc(HA|L)$"), + (instregex "ADDI(S)?toc(HA|L)(8)?$"), COPY, MCRF, MCRXRX, @@ -158,6 +159,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], XSNEGDP, XSCPSGNDP, MFVSRWZ, + MFVRWZ, EXTSWSLI, SRADI_32, RLDIC, diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index c6951ab67b08..0534773c4c9e 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -50,10 +50,10 @@ namespace llvm { FunctionPass *createPPCExpandISELPass(); FunctionPass *createPPCPreEmitPeepholePass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin); + AsmPrinter &AP, bool IsDarwin); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin); + bool IsDarwin); void initializePPCCTRLoopsPass(PassRegistry&); #ifndef NDEBUG @@ -86,8 +86,8 @@ namespace llvm { MO_NO_FLAG, /// On a symbol operand "FOO", this indicates that the reference is actually - /// to "FOO@plt". This is used for calls and jumps to external functions on - /// for PIC calls on Linux and ELF systems. + /// to "FOO@plt". This is used for calls and jumps to external functions + /// and for PIC calls on 32-bit ELF systems. MO_PLT = 1, /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index bd87ce06b4fb..66236b72a1a3 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -51,9 +51,11 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -76,7 +78,7 @@ namespace { class PPCAsmPrinter : public AsmPrinter { protected: - MapVector TOC; + MapVector TOC; const PPCSubtarget *Subtarget; StackMaps SM; @@ -87,7 +89,7 @@ public: StringRef getPassName() const override { return "PowerPC Assembly Printer"; } - MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); + MCSymbol *lookUpOrCreateTOCEntry(const MCSymbol *Sym); bool doInitialization(Module &M) override { if (!TOC.empty()) @@ -164,6 +166,14 @@ public: : PPCAsmPrinter(TM, std::move(Streamer)) {} StringRef getPassName() const override { return "AIX PPC Assembly Printer"; } + + void SetupMachineFunction(MachineFunction &MF) override; + + void EmitGlobalVariable(const GlobalVariable *GV) override; + + void EmitFunctionDescriptor() override; + + void EmitEndOfAsmFile(Module &) override; }; } // end anonymous namespace @@ -265,7 +275,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return true; // This operand uses VSX numbering. // If the operand is a VMX register, convert it to a VSX register. - unsigned Reg = MI->getOperand(OpNo).getReg(); + Register Reg = MI->getOperand(OpNo).getReg(); if (PPCInstrInfo::isVRRegister(Reg)) Reg = PPC::VSX32 + (Reg - PPC::V0); else if (PPCInstrInfo::isVFRegister(Reg)) @@ -328,7 +338,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. -MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { +MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym) { MCSymbol *&TOCEntry = TOC[Sym]; if (!TOCEntry) TOCEntry = createTempSymbol("C"); @@ -378,7 +388,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { if (CallTarget) { assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && "High 16 bits of call target should be zero."); - unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); + Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); EncodedBytes = 0; // Materialize the jump address: EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8) @@ -502,13 +512,32 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, .addExpr(SymVar)); } +/// Map a machine operand for a TOC pseudo-machine instruction to its +/// corresponding MCSymbol. +static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO, + AsmPrinter &AP) { + switch (MO.getType()) { + case MachineOperand::MO_GlobalAddress: + return AP.getSymbol(MO.getGlobal()); + case MachineOperand::MO_ConstantPoolIndex: + return AP.GetCPISymbol(MO.getIndex()); + case MachineOperand::MO_JumpTableIndex: + return AP.GetJTISymbol(MO.getIndex()); + case MachineOperand::MO_BlockAddress: + return AP.GetBlockAddressSymbol(MO.getBlockAddress()); + default: + llvm_unreachable("Unexpected operand type to get symbol."); + } +} + /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; - bool isPPC64 = Subtarget->isPPC64(); - bool isDarwin = TM.getTargetTriple().isOSDarwin(); + const bool IsDarwin = TM.getTargetTriple().isOSDarwin(); + const bool IsPPC64 = Subtarget->isPPC64(); + const bool IsAIX = Subtarget->isAIXABI(); const Module *M = MF->getFunction().getParent(); PICLevel::Level PL = M->getPICLevel(); @@ -517,7 +546,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (!MI->isInlineAsm()) { for (const MachineOperand &MO: MI->operands()) { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Subtarget->hasSPE()) { if (PPC::F4RCRegClass.contains(Reg) || PPC::F8RCRegClass.contains(Reg) || @@ -595,7 +624,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha // addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l // Get the offset from the GOT Base Register to the GOT - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); if (Subtarget->isSecurePlt() && isPositionIndependent() ) { unsigned PICR = TmpInst.getOperand(0).getReg(); MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol( @@ -646,43 +675,57 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } case PPC::LWZtoc: { - // Transform %r3 = LWZtoc @min1, %r2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + assert(!IsDarwin && "TOC is an ELF/XCOFF construct."); + + // Transform %rN = LWZtoc @op1, %r2 + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LWZ, and the global address operand to be a - // reference to the GOT entry we will synthesize later. + // Change the opcode to LWZ. TmpInst.setOpcode(PPC::LWZ); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for LWZtoc."); - // Map symbol -> label of TOC entry - assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) - MOSymbol = getSymbol(MO.getGlobal()); - else if (MO.isCPI()) - MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - - if (PL == PICLevel::SmallPIC) { + // Map the operand to its corresponding MCSymbol. + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + // Create a reference to the GOT entry for the symbol. The GOT entry will be + // synthesized later. + if (PL == PICLevel::SmallPIC && !IsAIX) { const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); - } else { - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } - const MCExpr *Exp = - MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, - OutContext); - const MCExpr *PB = - MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")), - OutContext); - Exp = MCBinaryExpr::createSub(Exp, PB, OutContext); + // Otherwise, use the TOC. 'TOCEntry' is a label used to reference the + // storage allocated in the TOC which contains the address of + // 'MOSymbol'. Said TOC entry will be synthesized later. + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = + MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext); + + // AIX uses the label directly as the lwz displacement operand for + // references into the toc section. The displacement value will be generated + // relative to the toc-base. + if (IsAIX) { + assert( + TM.getCodeModel() == CodeModel::Small && + "This pseudo should only be selected for 32-bit small code model."); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; } + + // Create an explicit subtract expression between the local symbol and + // '.LTOC' to manifest the toc-relative offset. + const MCExpr *PB = MCSymbolRefExpr::create( + OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext); + Exp = MCBinaryExpr::createSub(Exp, PB, OutContext); + TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } @@ -690,72 +733,121 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDtocCPT: case PPC::LDtocBA: case PPC::LDtoc: { + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); + // Transform %x3 = LDtoc @min1, %x2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LD, and the global address operand to be a - // reference to the TOC entry we will synthesize later. + // Change the opcode to LD. TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand!"); + + // Map the machine operand to its corresponding MCSymbol, then map the + // global address operand to be a reference to the TOC entry we will + // synthesize later. + MCSymbol *TOCEntry = + lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this)); + + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC; + const MCExpr *Exp = + MCSymbolRefExpr::create(TOCEntry, VK, OutContext); + TmpInst.getOperand(1) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + case PPC::ADDIStocHA: { + assert((IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large) && + "This pseudo should only be selected for 32-bit large code model on" + " AIX."); + + // Transform %rd = ADDIStocHA %rA, @sym(%r2) + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Map symbol -> label of TOC entry - assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) - MOSymbol = getSymbol(MO.getGlobal()); - else if (MO.isCPI()) - MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); + // Change the opcode to ADDIS. + TmpInst.setOpcode(PPC::ADDIS); + const MachineOperand &MO = MI->getOperand(2); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for ADDIStocHA."); + + // Map the machine operand to its corresponding MCSymbol. + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + // Always use TOC on AIX. Map the global address operand to be a reference + // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to + // reference the storage allocated in the TOC which contains the address of + // 'MOSymbol'. MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, + MCSymbolRefExpr::VK_PPC_U, + OutContext); + TmpInst.getOperand(2) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + case PPC::LWZtocL: { + assert(IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large && + "This pseudo should only be selected for 32-bit large code model on" + " AIX."); - const MCExpr *Exp = - MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, - OutContext); + // Transform %rd = LWZtocL @sym, %rs. + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); + + // Change the opcode to lwz. + TmpInst.setOpcode(PPC::LWZ); + + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for LWZtocL."); + + // Map the machine operand to its corresponding MCSymbol. + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + // Always use TOC on AIX. Map the global address operand to be a reference + // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to + // reference the storage allocated in the TOC which contains the address of + // 'MOSymbol'. + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, + MCSymbolRefExpr::VK_PPC_L, + OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } + case PPC::ADDIStocHA8: { + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); - case PPC::ADDIStocHA: { - // Transform %xd = ADDIStocHA %x2, @sym - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + // Transform %xd = ADDIStocHA8 %x2, @sym + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to ADDIS8. If the global address is external, has - // common linkage, is a non-local function address, or is a jump table - // address, then generate a TOC entry and reference that. Otherwise - // reference the symbol directly. + // Change the opcode to ADDIS8. If the global address is the address of + // an external symbol, is a jump table address, is a block address, or is a + // constant pool index with large code model enabled, then generate a TOC + // entry and reference that. Otherwise, reference the symbol directly. TmpInst.setOpcode(PPC::ADDIS8); + const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || - MO.isBlockAddress()) && - "Invalid operand for ADDIStocHA!"); - MCSymbol *MOSymbol = nullptr; - bool GlobalToc = false; - - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG); - } else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - } else if (MO.isJTI()) { - MOSymbol = GetJTISymbol(MO.getIndex()); - } else if (MO.isBlockAddress()) { - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - } + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for ADDIStocHA8!"); + + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + const bool GlobalToc = + MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal()); if (GlobalToc || MO.isJTI() || MO.isBlockAddress() || - TM.getCodeModel() == CodeModel::Large) + (MO.isCPI() && TM.getCodeModel() == CodeModel::Large)) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA; + const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, - OutContext); + MCSymbolRefExpr::create(MOSymbol, VK, OutContext); if (!MO.isJTI() && MO.getOffset()) Exp = MCBinaryExpr::createAdd(Exp, @@ -768,73 +860,59 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::LDtocL: { + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); + // Transform %xd = LDtocL @sym, %xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LD. If the global address is external, has - // common linkage, or is a jump table address, then reference the - // associated TOC entry. Otherwise reference the symbol directly. + // Change the opcode to LD. If the global address is the address of + // an external symbol, is a jump table address, is a block address, or is + // a constant pool index with large code model enabled, then generate a + // TOC entry and reference that. Otherwise, reference the symbol directly. TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand for LDtocL!"); - MCSymbol *MOSymbol = nullptr; - if (MO.isJTI()) - MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else if (MO.isBlockAddress()) { - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } - else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - if (TM.getCodeModel() == CodeModel::Large) - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } - else if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - LLVM_DEBUG( - unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - assert((GVFlags & PPCII::MO_NLP_FLAG) && - "LDtocL used on symbol that could be accessed directly is " - "invalid. Must match ADDIStocHA.")); + LLVM_DEBUG(assert( + (!MO.isGlobal() || Subtarget->isGVIndirectSymbol(MO.getGlobal())) && + "LDtocL used on symbol that could be accessed directly is " + "invalid. Must match ADDIStocHA8.")); + + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO; const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, - OutContext); + MCSymbolRefExpr::create(MOSymbol, VK, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } case PPC::ADDItocL: { // Transform %xd = ADDItocL %xs, @sym - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to ADDI8. If the global address is external, then - // generate a TOC entry and reference that. Otherwise reference the + // Change the opcode to ADDI8. If the global address is external, then + // generate a TOC entry and reference that. Otherwise, reference the // symbol directly. TmpInst.setOpcode(PPC::ADDI8); + const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); - MCSymbol *MOSymbol = nullptr; - - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - LLVM_DEBUG(unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - assert(!(GVFlags & PPCII::MO_NLP_FLAG) && - "Interposable definitions must use indirect access.")); - MOSymbol = getSymbol(GV); - } else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - } + assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL."); + + LLVM_DEBUG(assert( + !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) && + "Interposable definitions must use indirect access.")); const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, - OutContext); + MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this), + MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -842,13 +920,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDISgotTprelHA: { // Transform: %xd = ADDISgotTprelHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTprel = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, - OutContext); + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, + OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) @@ -858,16 +936,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDgotTprelL: case PPC::LDgotTprelL32: { // Transform %xd = LDgotTprelL @sym, %xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to LD. - TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ); + TmpInst.setOpcode(IsPPC64 ? PPC::LD : PPC::LWZ); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, - OutContext); + const MCExpr *Exp = MCSymbolRefExpr::create( + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO + : MCSymbolRefExpr::VK_PPC_GOT_TPREL, + OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -920,7 +999,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStlsgdHA: { // Transform: %xd = ADDIStlsgdHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -943,11 +1022,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create( - MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO - : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO + : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsGD)); @@ -965,7 +1044,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStlsldHA: { // Transform: %xd = ADDIStlsldHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -988,11 +1067,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create( - MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO - : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO + : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsLD)); @@ -1021,7 +1100,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutContext); EmitToStreamer( *OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) + MCInstBuilder(IsPPC64 ? PPC::ADDIS8 : PPC::ADDIS) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -1040,7 +1119,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -1087,7 +1166,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // suite shows a handful of test cases that fail this check for // Darwin. Those need to be investigated before this sanity test // can be enabled for those subtargets. - if (!Subtarget->isDarwin()) { + if (!IsDarwin) { unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1; const MachineOperand &MO = MI->getOperand(OpNum); if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4) @@ -1098,7 +1177,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); EmitToStreamer(*OutStreamer, TmpInst); } @@ -1368,15 +1447,16 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); OutStreamer->SwitchSection(Section); - for (MapVector::iterator I = TOC.begin(), - E = TOC.end(); I != E; ++I) { - OutStreamer->EmitLabel(I->second); - MCSymbol *S = I->first; + for (const auto &TOCMapPair : TOC) { + const MCSymbol *const TOCEntryTarget = TOCMapPair.first; + MCSymbol *const TOCEntryLabel = TOCMapPair.second; + + OutStreamer->EmitLabel(TOCEntryLabel); if (isPPC64) { - TS.emitTCEntry(*S); + TS.emitTCEntry(*TOCEntryTarget); } else { OutStreamer->EmitValueToAlignment(4); - OutStreamer->EmitSymbolValue(S, 4); + OutStreamer->EmitSymbolValue(TOCEntryTarget, 4); } } } @@ -1602,7 +1682,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); - EmitAlignment(isPPC64 ? 3 : 2); + EmitAlignment(isPPC64 ? Align(8) : Align(4)); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { // L_foo$stub: @@ -1643,6 +1723,106 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) { + // Get the function descriptor symbol. + CurrentFnDescSym = getSymbol(&MF.getFunction()); + // Set the containing csect. + MCSectionXCOFF *FnDescSec = OutStreamer->getContext().getXCOFFSection( + CurrentFnDescSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD, + XCOFF::C_HIDEXT, SectionKind::getData()); + cast(CurrentFnDescSym)->setContainingCsect(FnDescSec); + + return AsmPrinter::SetupMachineFunction(MF); +} + +void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + // Early error checking limiting what is supported. + if (GV->isThreadLocal()) + report_fatal_error("Thread local not yet supported on AIX."); + + if (GV->hasSection()) + report_fatal_error("Custom section for Data not yet supported."); + + if (GV->hasComdat()) + report_fatal_error("COMDAT not yet supported by AIX."); + + SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM); + if (!GVKind.isCommon() && !GVKind.isBSSLocal() && !GVKind.isData()) + report_fatal_error("Encountered a global variable kind that is " + "not supported yet."); + + // Create the containing csect and switch to it. + MCSectionXCOFF *CSect = cast( + getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); + OutStreamer->SwitchSection(CSect); + + // Create the symbol, set its storage class, and emit it. + MCSymbolXCOFF *GVSym = cast(getSymbol(GV)); + GVSym->setStorageClass( + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); + GVSym->setContainingCsect(CSect); + + const DataLayout &DL = GV->getParent()->getDataLayout(); + + // Handle common symbols. + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + unsigned Align = + GV->getAlignment() ? GV->getAlignment() : DL.getPreferredAlignment(GV); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + + if (GVKind.isBSSLocal()) + OutStreamer->EmitXCOFFLocalCommonSymbol(GVSym, Size, Align); + else + OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + return; + } + + MCSymbol *EmittedInitSym = GVSym; + EmitLinkage(GV, EmittedInitSym); + EmitAlignment(getGVAlignment(GV, DL), GV); + OutStreamer->EmitLabel(EmittedInitSym); + EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); +} + +void PPCAIXAsmPrinter::EmitFunctionDescriptor() { + const DataLayout &DL = getDataLayout(); + const unsigned PointerSize = DL.getPointerSizeInBits() == 64 ? 8 : 4; + + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + // Emit function descriptor. + OutStreamer->SwitchSection( + cast(CurrentFnDescSym)->getContainingCsect()); + OutStreamer->EmitLabel(CurrentFnDescSym); + // Emit function entry point address. + OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext), + PointerSize); + // Emit TOC base address. + MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]")); + OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext), + PointerSize); + // Emit a null environment pointer. + OutStreamer->EmitIntValue(0, PointerSize); + + OutStreamer->SwitchSection(Current.first, Current.second); +} + +void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) { + // If there are no functions in this module, we will never need to reference + // the TOC base. + if (M.empty()) + return; + + // Emit TOC base. + MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]")); + MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection( + StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT, + SectionKind::getData()); + cast(TOCBaseSym)->setContainingCsect(TOCBaseSection); + // Switch to section to emit TOC base. + OutStreamer->SwitchSection(TOCBaseSection); +} + + /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code /// for a MachineFunction to the given output stream, in a format that the /// Darwin assembler can deal with. diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp index 5e9a661f8f0b..d325b078979f 100644 --- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp +++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -340,9 +340,10 @@ bool PPCBranchCoalescing::identicalOperands( if (Op1.isIdenticalTo(Op2)) { // filter out instructions with physical-register uses - if (Op1.isReg() && TargetRegisterInfo::isPhysicalRegister(Op1.getReg()) - // If the physical register is constant then we can assume the value - // has not changed between uses. + if (Op1.isReg() && + Register::isPhysicalRegister(Op1.getReg()) + // If the physical register is constant then we can assume the value + // has not changed between uses. && !(Op1.isUse() && MRI->isConstantPhysReg(Op1.getReg()))) { LLVM_DEBUG(dbgs() << "The operands are not provably identical.\n"); return false; @@ -355,8 +356,8 @@ bool PPCBranchCoalescing::identicalOperands( // definition of the register produces the same value. If they produce the // same value, consider them to be identical. if (Op1.isReg() && Op2.isReg() && - TargetRegisterInfo::isVirtualRegister(Op1.getReg()) && - TargetRegisterInfo::isVirtualRegister(Op2.getReg())) { + Register::isVirtualRegister(Op1.getReg()) && + Register::isVirtualRegister(Op2.getReg())) { MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg()); MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg()); if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) { @@ -456,7 +457,7 @@ bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI, << TargetMBB.getNumber() << "\n"); for (auto &Use : MI.uses()) { - if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) { + if (Use.isReg() && Register::isVirtualRegister(Use.getReg())) { MachineInstr *DefInst = MRI->getVRegDef(Use.getReg()); if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) { LLVM_DEBUG(dbgs() << " *** Cannot move this instruction ***\n"); diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 793d690baec3..cdff4d383d23 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -81,21 +81,20 @@ FunctionPass *llvm::createPPCBranchSelectionPass() { /// original Offset. unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset) { - unsigned Align = MBB.getAlignment(); - if (!Align) + const Align Alignment = MBB.getAlignment(); + if (Alignment == Align::None()) return 0; - unsigned AlignAmt = 1 << Align; - unsigned ParentAlign = MBB.getParent()->getAlignment(); + const Align ParentAlign = MBB.getParent()->getAlignment(); - if (Align <= ParentAlign) - return OffsetToAlignment(Offset, AlignAmt); + if (Alignment <= ParentAlign) + return offsetToAlignment(Offset, Alignment); // The alignment of this MBB is larger than the function's alignment, so we // can't tell whether or not it will insert nops. Assume that it will. if (FirstImpreciseBlock < 0) FirstImpreciseBlock = MBB.getNumber(); - return AlignAmt + OffsetToAlignment(Offset, AlignAmt); + return Alignment.value() + offsetToAlignment(Offset, Alignment); } /// We need to be careful about the offset of the first block in the function @@ -179,7 +178,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, const MachineBasicBlock *Dest, unsigned BrOffset) { int BranchSize; - unsigned MaxAlign = 2; + Align MaxAlign = Align(4); bool NeedExtraAdjustment = false; if (Dest->getNumber() <= Src->getNumber()) { // If this is a backwards branch, the delta is the offset from the @@ -192,8 +191,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, BranchSize += BlockSizes[DestBlock].first; for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) { BranchSize += BlockSizes[i].first; - MaxAlign = std::max(MaxAlign, - Fn.getBlockNumbered(i)->getAlignment()); + MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment()); } NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && @@ -207,8 +205,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, MaxAlign = std::max(MaxAlign, Dest->getAlignment()); for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) { BranchSize += BlockSizes[i].first; - MaxAlign = std::max(MaxAlign, - Fn.getBlockNumbered(i)->getAlignment()); + MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment()); } NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && @@ -258,7 +255,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, // The computed offset is at most ((1 << alignment) - 4) bytes smaller // than actual offset. So we add this number to the offset for safety. if (NeedExtraAdjustment) - BranchSize += (1 << MaxAlign) - 4; + BranchSize += MaxAlign.value() - 4; return BranchSize; } @@ -339,16 +336,16 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // 1. CR register // 2. Target MBB PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); - unsigned CRReg = I->getOperand(1).getReg(); + Register CRReg = I->getOperand(1).getReg(); // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); } else if (I->getOpcode() == PPC::BC) { - unsigned CRBit = I->getOperand(0).getReg(); + Register CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BCn) { - unsigned CRBit = I->getOperand(0).getReg(); + Register CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 264d6b590f95..d8425d89da92 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -162,7 +162,7 @@ class PPCFastISel final : public FastISel { bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg, const PPC::Predicate Pred); - bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt = true, unsigned FP64LoadOpc = PPC::LFD); bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); @@ -451,7 +451,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset, // Emit a load instruction if possible, returning true if we succeeded, // otherwise false. See commentary below for how the register class of // the load is determined. -bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, +bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt, unsigned FP64LoadOpc) { unsigned Opc; @@ -469,7 +469,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, (ResultReg ? MRI.getRegClass(ResultReg) : (RC ? RC : (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) : - (VT == MVT::f32 ? (HasSPE ? &PPC::SPE4RCRegClass : &PPC::F4RCRegClass) : + (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) : (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : &PPC::GPRC_and_GPRC_NOR0RegClass))))); @@ -612,7 +612,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) { const TargetRegisterClass *RC = AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true, PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) return false; @@ -989,7 +989,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { unsigned DestReg; auto RC = MRI.getRegClass(SrcReg); if (PPCSubTarget->hasSPE()) { - DestReg = createResultReg(&PPC::SPE4RCRegClass); + DestReg = createResultReg(&PPC::GPRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD), DestReg) .addReg(SrcReg); @@ -1051,7 +1051,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, } const TargetRegisterClass *RC = &PPC::F8RCRegClass; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) return 0; @@ -1176,7 +1176,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, const TargetRegisterClass *RC = AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) return 0; @@ -1229,9 +1229,9 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (PPCSubTarget->hasSPE()) { DestReg = createResultReg(&PPC::GPRCRegClass); if (IsSigned) - Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; + Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; else - Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; + Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; } else if (isVSFRCRegClass(RC)) { DestReg = createResultReg(&PPC::VSFRCRegClass); if (DstVT == MVT::i32) @@ -1717,7 +1717,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (const ConstantInt *CI = dyn_cast(RV)) { CCValAssign &VA = ValLocs[0]; - unsigned RetReg = VA.getLocReg(); + Register RetReg = VA.getLocReg(); // We still need to worry about properly extending the sign. For example, // we could have only a single bit or a constant that needs zero // extension rather than sign extension. Make sure we pass the return @@ -2002,7 +2002,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { const bool HasSPE = PPCSubTarget->hasSPE(); const TargetRegisterClass *RC; if (HasSPE) - RC = ((VT == MVT::f32) ? &PPC::SPE4RCRegClass : &PPC::SPERCRegClass); + RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass); else RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass); @@ -2031,8 +2031,8 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg).addMemOperand(MMO); } else { - // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), + // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)). + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); // But for large code model, we must generate a LDtocL followed // by the LF[SD]. @@ -2085,16 +2085,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: - // LDtocL(GV, ADDIStocHA(%x2, GV)) + // LDtocL(GV, ADDIStocHA8(%x2, GV)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%x2, GV), GV) - // Either way, start with the ADDIStocHA: + // ADDItocL(ADDIStocHA8(%x2, GV), GV) + // Either way, start with the ADDIStocHA8: unsigned HighPartReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); - unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); - if (GVFlags & PPCII::MO_NLP_FLAG) { + if (PPCSubTarget->isGVIndirectSymbol(GV)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); } else { @@ -2353,7 +2352,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, if (!PPCComputeAddress(LI->getOperand(0), Addr)) return false; - unsigned ResultReg = MI->getOperand(0).getReg(); + Register ResultReg = MI->getOperand(0).getReg(); if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt, PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) @@ -2464,7 +2463,7 @@ namespace llvm { const TargetLibraryInfo *LibInfo) { // Only available on 64-bit ELF for now. const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget(); - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) + if (Subtarget.is64BitELFABI()) return new PPCFastISel(FuncInfo, LibInfo); return nullptr; } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index ebfb1ef7f49b..06a4d183e781 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -47,13 +47,15 @@ static const MCPhysReg VRRegNo[] = { }; static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { - if (STI.isDarwinABI()) + if (STI.isDarwinABI() || STI.isAIXABI()) return STI.isPPC64() ? 16 : 8; // SVR4 ABI: return STI.isPPC64() ? 16 : 4; } static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { + if (STI.isAIXABI()) + return STI.isPPC64() ? 40 : 20; return STI.isELFv2ABI() ? 24 : 40; } @@ -88,6 +90,11 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; } +static unsigned computeCRSaveOffset() { + // The condition register save offset needs to be updated for AIX PPC32. + return 8; +} + PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, STI.getPlatformStackAlignment(), 0), @@ -95,7 +102,8 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) TOCSaveOffset(computeTOCSaveOffset(Subtarget)), FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), LinkageSize(computeLinkageSize(Subtarget)), - BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} + BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), + CRSaveOffset(computeCRSaveOffset()) {} // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( @@ -370,8 +378,8 @@ static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { return; } - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if ((UsedRegMask & 0xFFFF) == UsedRegMask) { if (DstReg != SrcReg) @@ -781,15 +789,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isAIXABI = Subtarget.isAIXABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - assert((Subtarget.isDarwinABI() || isSVR4ABI) && - "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); + assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) && + "Unsupported PPC ABI."); // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. if (!isSVR4ABI) for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + if (isAIXABI) + report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); HandleVRSaveUpdate(*MBBI, TII); break; } @@ -819,7 +830,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool HasRedZone = isPPC64 || !isSVR4ABI; unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = RegInfo->getBaseRegister(MF); + Register BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; @@ -908,6 +919,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + if (MustSaveCR && isAIXABI) + report_fatal_error("Prologue CR saving is unimplemented on AIX."); + // Check if we can move the stack update instruction (stdu) down the prologue // past the callee saves. Hopefully this will avoid the situation where the // saves are waiting for the update on the store with update to complete. @@ -966,7 +980,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, MIB.addReg(MustSaveCRs[i], CrState); BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); } @@ -1020,7 +1034,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert(HasRedZone && "A red zone is always available on PPC64"); BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); } @@ -1324,7 +1338,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // actually saved gets its own CFI record. unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); + nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset())); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); continue; @@ -1387,7 +1401,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = RegInfo->getBaseRegister(MF); + Register BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg @@ -1590,7 +1604,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // is live here. assert(HasRedZone && "Expecting red zone"); BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) @@ -1614,7 +1628,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, assert(isPPC64 && "Expecting 64-bit mode"); assert(RBReg == SPReg && "Should be using SP as a base register"); BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(RBReg); } @@ -1762,8 +1776,8 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); - bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); + const bool isPPC64 = Subtarget.isPPC64(); + const bool IsDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo &MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. @@ -1812,7 +1826,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. - if (!isPPC64 && !isDarwinABI && + if (!isPPC64 && !IsDarwinABI && (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || SavedRegs.test(PPC::CR4))) { @@ -1872,8 +1886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, assert((!MF.getInfo()->mustSaveTOC() || (Reg != PPC::X2 && Reg != PPC::R2)) && "Not expecting to try to spill R2 in a function that must save TOC"); - if (PPC::GPRCRegClass.contains(Reg) || - PPC::SPE4RCRegClass.contains(Reg)) { + if (PPC::GPRCRegClass.contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -1967,7 +1980,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, assert(FI && "No Base Pointer Save Slot!"); MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); - unsigned BP = RegInfo->getBaseRegister(MF); + Register BP = RegInfo->getBaseRegister(MF); if (PPC::G8RCRegClass.contains(BP)) { MinG8R = std::min(MinG8R, BP); HasG8SaveArea = true; @@ -2428,6 +2441,26 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +unsigned PPCFrameLowering::getTOCSaveOffset() const { + if (Subtarget.isAIXABI()) + // TOC save/restore is normally handled by the linker. + // Indirect calls should hit this limitation. + report_fatal_error("TOC save is not implemented on AIX yet."); + return TOCSaveOffset; +} + +unsigned PPCFrameLowering::getFramePointerSaveOffset() const { + if (Subtarget.isAIXABI()) + report_fatal_error("FramePointer is not implemented on AIX yet."); + return FramePointerSaveOffset; +} + +unsigned PPCFrameLowering::getBasePointerSaveOffset() const { + if (Subtarget.isAIXABI()) + report_fatal_error("BasePointer is not implemented on AIX yet."); + return BasePointerSaveOffset; +} + bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { if (MF.getInfo()->shrinkWrapDisabled()) return false; diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d116e9fd22e1..a5fbc9acbb28 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -26,6 +26,7 @@ class PPCFrameLowering: public TargetFrameLowering { const unsigned FramePointerSaveOffset; const unsigned LinkageSize; const unsigned BasePointerSaveOffset; + const unsigned CRSaveOffset; /** * Find register[s] that can be used in function prologue and epilogue @@ -142,15 +143,19 @@ public: /// getTOCSaveOffset - Return the previous frame offset to save the /// TOC register -- 64-bit SVR4 ABI only. - unsigned getTOCSaveOffset() const { return TOCSaveOffset; } + unsigned getTOCSaveOffset() const; /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. - unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; } + unsigned getFramePointerSaveOffset() const; /// getBasePointerSaveOffset - Return the previous frame offset to save the /// base pointer. - unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; } + unsigned getBasePointerSaveOffset() const; + + /// getCRSaveOffset - Return the previous frame offset to save the + /// CR register. + unsigned getCRSaveOffset() const { return CRSaveOffset; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 543cac075f55..4ad6c88233fe 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -371,7 +371,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // by the scheduler. Detect them now. bool HasVectorVReg = false; for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { HasVectorVReg = true; break; @@ -391,8 +391,8 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Create two vregs - one to hold the VRSAVE register that is live-in to the // function and one for the value after having bits or'd into it. - unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); MachineBasicBlock &EntryBB = *Fn.begin(); @@ -447,7 +447,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } else { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); - unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::UpdateGBR), GlobalBaseReg) .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); @@ -5065,52 +5065,95 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case PPCISD::TOC_ENTRY: { - assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && - "Only supported for 64-bit ABI and 32-bit SVR4"); - if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { - SDValue GA = N->getOperand(0); - SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, - N->getOperand(1)); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; - } + const bool isPPC64 = PPCSubTarget->isPPC64(); + const bool isELFABI = PPCSubTarget->isSVR4ABI(); + const bool isAIXABI = PPCSubTarget->isAIXABI(); + + assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct"); + + // PowerPC only support small, medium and large code model. + const CodeModel::Model CModel = TM.getCodeModel(); + assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && + "PowerPC doesn't support tiny or kernel code models."); - // For medium and large code model, we generate two instructions as - // described below. Otherwise we allow SelectCodeCommon to handle this, + if (isAIXABI && CModel == CodeModel::Medium) + report_fatal_error("Medium code model is not supported on AIX."); + + // For 64-bit small code model, we allow SelectCodeCommon to handle this, // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. - CodeModel::Model CModel = TM.getCodeModel(); - if (CModel != CodeModel::Medium && CModel != CodeModel::Large) + if (isPPC64 && CModel == CodeModel::Small) break; - // The first source operand is a TargetGlobalAddress or a TargetJumpTable. - // If it must be toc-referenced according to PPCSubTarget, we generate: - // LDtocL(@sym, ADDIStocHA(%x2, @sym)) + // Handle 32-bit small code model. + if (!isPPC64) { + // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc. + auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) { + SDValue GA = TocEntry->getOperand(0); + SDValue TocBase = TocEntry->getOperand(1); + SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, + TocBase); + transferMemOperands(TocEntry, MN); + ReplaceNode(TocEntry, MN); + }; + + if (isELFABI) { + assert(TM.isPositionIndependent() && + "32-bit ELF can only have TOC entries in position independent" + " code."); + // 32-bit ELF always uses a small code model toc access. + replaceWithLWZtoc(N); + return; + } + + if (isAIXABI && CModel == CodeModel::Small) { + replaceWithLWZtoc(N); + return; + } + } + + assert(CModel != CodeModel::Small && "All small code models handled."); + + assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" + " ELF/AIX or 32-bit AIX in the following."); + + // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode + // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We + // generate two instructions as described below. The first source operand + // is a symbol reference. If it must be toc-referenced according to + // PPCSubTarget, we generate: + // [32-bit AIX] + // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) + // [64-bit ELF/AIX] + // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%x2, @sym), @sym) + // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); - SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, - TOCbase, GA); + + EVT VT = isPPC64 ? MVT::i64 : MVT::i32; + SDNode *Tmp = CurDAG->getMachineNode( + isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); + if (PPCLowering->isAccessedAsGotIndirect(GA)) { - // If it is access as got-indirect, we need an extra LD to load + // If it is accessed as got-indirect, we need an extra LWZ/LD to load // the address. - SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, - SDValue(Tmp, 0)); + SDNode *MN = CurDAG->getMachineNode( + isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); + transferMemOperands(N, MN); ReplaceNode(N, MN); return; } - // Build the address relative to the TOC-pointer.. + // Build the address relative to the TOC-pointer. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA)); return; } case PPCISD::PPC32_PICGOT: // Generate a PIC-safe GOT reference. - assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && - "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); + assert(PPCSubTarget->is32BitELFABI() && + "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::i32); @@ -6456,7 +6499,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { continue; if (!HBase.isMachineOpcode() || - HBase.getMachineOpcode() != PPC::ADDIStocHA) + HBase.getMachineOpcode() != PPC::ADDIStocHA8) continue; if (!Base.hasOneUse() || !HBase.hasOneUse()) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 24d50074860d..8cf6a660b08b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -139,13 +139,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); - setMinStackArgumentAlignment(isPPC64 ? 8:4); + setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4)); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { if (hasSPE()) { - addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); + addRegisterClass(MVT::f32, &PPC::GPRCRegClass); addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); @@ -431,28 +431,26 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (Subtarget.isSVR4ABI()) { - if (isPPC64) { - // VAARG always uses double-word chunks, so promote anything smaller. - setOperationAction(ISD::VAARG, MVT::i1, Promote); - AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i8, Promote); - AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i16, Promote); - AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i32, Promote); - AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); - setOperationAction(ISD::VAARG, MVT::Other, Expand); - } else { - // VAARG is custom lowered with the 32-bit SVR4 ABI. - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::i64, Custom); - } + if (Subtarget.is64BitELFABI()) { + // VAARG always uses double-word chunks, so promote anything smaller. + setOperationAction(ISD::VAARG, MVT::i1, Promote); + AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i8, Promote); + AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i16, Promote); + AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i32, Promote); + AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + } else if (Subtarget.is32BitELFABI()) { + // VAARG is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i64, Custom); } else setOperationAction(ISD::VAARG, MVT::Other, Expand); - if (Subtarget.isSVR4ABI() && !isPPC64) - // VACOPY is custom lowered with the 32-bit SVR4 ABI. + // VACOPY is custom lowered with the 32-bit SVR4 ABI. + if (Subtarget.is32BitELFABI()) setOperationAction(ISD::VACOPY , MVT::Other, Custom); else setOperationAction(ISD::VACOPY , MVT::Other, Expand); @@ -553,17 +551,25 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); // For v2i64, these are only valid with P8Vector. This is corrected after // the loop. - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); + if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) { + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + } + else { + setOperationAction(ISD::SMAX, VT, Expand); + setOperationAction(ISD::SMIN, VT, Expand); + setOperationAction(ISD::UMAX, VT, Expand); + setOperationAction(ISD::UMIN, VT, Expand); + } if (Subtarget.hasVSX()) { setOperationAction(ISD::FMAXNUM, VT, Legal); @@ -646,7 +652,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); @@ -944,7 +950,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); - setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); setOperationAction(ISD::FNEG , MVT::v4f64, Legal); @@ -1118,6 +1123,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); @@ -1172,9 +1179,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setJumpIsExpensive(); } - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(4)); if (Subtarget.isDarwin()) - setPrefFunctionAlignment(4); + setPrefFunctionAlignment(Align(16)); switch (Subtarget.getDarwinDirective()) { default: break; @@ -1191,8 +1198,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: - setPrefFunctionAlignment(4); - setPrefLoopAlignment(4); + setPrefLoopAlignment(Align(16)); + setPrefFunctionAlignment(Align(16)); break; } @@ -1352,6 +1359,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; + case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; + case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; case PPCISD::ST_VSR_SCAL_INT: return "PPCISD::ST_VSR_SCAL_INT"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -1396,7 +1405,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; - case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH"; + case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; + case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; } return nullptr; } @@ -1517,7 +1527,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { const PPCSubtarget& Subtarget = - static_cast(DAG.getSubtarget()); + static_cast(DAG.getSubtarget()); if (!Subtarget.hasP8Vector()) return false; @@ -1769,10 +1779,10 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to -/// VSPLTB/VSPLTH/VSPLTW. +/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.). bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { - assert(N->getValueType(0) == MVT::v16i8 && - (EltSize == 1 || EltSize == 2 || EltSize == 4)); + assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && + EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"); // The consecutive indices need to specify an element, not part of two // different elements. So abandon ship early if this isn't the case. @@ -2065,10 +2075,11 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, } -/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the -/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. -unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, - SelectionDAG &DAG) { +/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is +/// appropriate for PPC mnemonics (which have a big endian bias - namely +/// elements are counted from the left of the vector register). +unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(N); assert(isSplatShuffleMask(SVOp, EltSize)); if (DAG.getDataLayout().isLittleEndian()) @@ -2667,12 +2678,14 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } -static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, - SDValue GA) { +SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, + SDValue GA) const { + const bool Is64Bit = Subtarget.isPPC64(); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; - SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : - DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); - + SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) + : Subtarget.isAIXABI() + ? DAG.getRegister(PPC::R2, VT) + : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, @@ -2688,10 +2701,10 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); - return getTOCEntry(DAG, SDLoc(CP), true, GA); + return getTOCEntry(DAG, SDLoc(CP), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2701,7 +2714,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, SDLoc(CP), false, GA); + return getTOCEntry(DAG, SDLoc(CP), GA); } SDValue CPIHi = @@ -2764,10 +2777,10 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return getTOCEntry(DAG, SDLoc(JT), true, GA); + return getTOCEntry(DAG, SDLoc(JT), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2777,7 +2790,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, SDLoc(GA), false, GA); + return getTOCEntry(DAG, SDLoc(GA), GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -2793,14 +2806,18 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual BlockAddress is stored in the TOC. - if (Subtarget.isSVR4ABI() && - (Subtarget.isPPC64() || isPositionIndependent())) { - if (Subtarget.isPPC64()) - setUsesTOCBasePtr(DAG); + if (Subtarget.is64BitELFABI()) { + setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); - return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA); + return getTOCEntry(DAG, SDLoc(BASDN), GA); } + // 32-bit position-independent ELF stores the BlockAddress in the .got. + if (Subtarget.is32BitELFABI() && isPositionIndependent()) + return getTOCEntry( + DAG, SDLoc(BASDN), + DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset())); + unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); @@ -2913,12 +2930,12 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); - // 64-bit SVR4 ABI code is always position-independent. + // 64-bit SVR4 ABI & AIX ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); - return getTOCEntry(DAG, DL, true, GA); + return getTOCEntry(DAG, DL, GA); } unsigned MOHiFlag, MOLoFlag; @@ -2929,7 +2946,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, DL, false, GA); + return getTOCEntry(DAG, DL, GA); } SDValue GAHi = @@ -3235,8 +3252,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV, nextOffset)); } -/// FPR - The set of FP registers that should be allocated for arguments, -/// on Darwin. +/// FPR - The set of FP registers that should be allocated for arguments +/// on Darwin and AIX. static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13}; @@ -3377,17 +3394,17 @@ SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { - if (Subtarget.isSVR4ABI()) { - if (Subtarget.isPPC64()) - return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - else - return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - } else { - return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - } + if (Subtarget.is64BitELFABI()) + return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); + else if (Subtarget.is32BitELFABI()) + return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); + + // FIXME: We are using this for both AIX and Darwin. We should add appropriate + // AIX testing, and rename it appropriately. + return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); } SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( @@ -3467,7 +3484,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; else if (Subtarget.hasSPE()) - RC = &PPC::SPE4RCRegClass; + RC = &PPC::GPRCRegClass; else RC = &PPC::F4RCRegClass; break; @@ -4516,7 +4533,7 @@ callsShareTOCBase(const Function *Caller, SDValue Callee, static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl &Outs) { - assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); + assert(Subtarget.is64BitELFABI()); const unsigned PtrByteSize = 8; const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); @@ -4926,7 +4943,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, ImmutableCallSite CS, const PPCSubtarget &Subtarget) { bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); - bool isELFv2ABI = Subtarget.isELFv2ABI(); + bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI(); bool isAIXABI = Subtarget.isAIXABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); @@ -4997,7 +5014,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - if (isSVR4ABI && isPPC64 && !isELFv2ABI) { + if (is64BitELFv1ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). @@ -5085,7 +5102,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) + if (is64BitELFv1ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -6730,8 +6747,12 @@ SDValue PPCTargetLowering::LowerCall_AIX( const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64) : array_lengthof(GPR_32); + const unsigned NumFPRs = array_lengthof(FPR); + assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing " + "on AIX"); + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; - unsigned GPR_idx = 0; + unsigned GPR_idx = 0, FPR_idx = 0; SmallVector, 8> RegsToPass; @@ -6768,6 +6789,20 @@ SDValue PPCTargetLowering::LowerCall_AIX( break; case MVT::f32: case MVT::f64: + if (FPR_idx != NumFPRs) { + RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); + + // If we have any FPRs remaining, we may also have GPRs remaining. + // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available + // GPRs. + if (GPR_idx != NumGPRs) + ++GPR_idx; + if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64) + ++GPR_idx; + } else + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -8152,6 +8187,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { Op0.getOperand(1)); } +static const SDValue *getNormalLoadInput(const SDValue &Op) { + const SDValue *InputLoad = &Op; + if (InputLoad->getOpcode() == ISD::BITCAST) + InputLoad = &InputLoad->getOperand(0); + if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR) + InputLoad = &InputLoad->getOperand(0); + if (InputLoad->getOpcode() != ISD::LOAD) + return nullptr; + LoadSDNode *LD = cast(*InputLoad); + return ISD::isNormalLoad(LD) ? InputLoad : nullptr; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -8274,6 +8321,34 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) { + + const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0)); + // Handle load-and-splat patterns as we have instructions that will do this + // in one go. + if (InputLoad && DAG.isSplatValue(Op, true)) { + LoadSDNode *LD = cast(*InputLoad); + + // We have handling for 4 and 8 byte elements. + unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits(); + + // Checking for a single use of this load, we have to check for vector + // width (128 bits) / ElementSize uses (since each operand of the + // BUILD_VECTOR is a separate use of the value. + if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) && + ((Subtarget.hasVSX() && ElementSize == 64) || + (Subtarget.hasP9Vector() && ElementSize == 32))) { + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + DAG.getValueType(Op.getValueType()) // VT + }; + return + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, + DAG.getVTList(Op.getValueType(), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + } + } + // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be // lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. @@ -8759,6 +8834,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned ShiftElts, InsertAtByte; bool Swap = false; + + // If this is a load-and-splat, we can do that with a single instruction + // in some cases. However if the load has multiple uses, we don't want to + // combine it because that will just produce multiple loads. + const SDValue *InputLoad = getNormalLoadInput(V1); + if (InputLoad && Subtarget.hasVSX() && V2.isUndef() && + (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) && + InputLoad->hasOneUse()) { + bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4); + int SplatIdx = + PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG); + + LoadSDNode *LD = cast(*InputLoad); + // For 4-byte load-and-splat, we need Power9. + if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) { + uint64_t Offset = 0; + if (IsFourByte) + Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4; + else + Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8; + SDValue BasePtr = LD->getBasePtr(); + if (Offset != 0) + BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + BasePtr, DAG.getIntPtrConstant(Offset, dl)); + SDValue Ops[] = { + LD->getChain(), // Chain + BasePtr, // BasePtr + DAG.getValueType(Op.getValueType()) // VT + }; + SDVTList VTL = + DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other); + SDValue LdSplt = + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL, + Ops, LD->getMemoryVT(), LD->getMemOperand()); + if (LdSplt.getValueType() != SVOp->getValueType(0)) + LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt); + return LdSplt; + } + } if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { @@ -8835,7 +8949,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { - int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); + int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG); SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, @@ -9880,6 +9994,30 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { switch (Op0.getOpcode()) { default: return SDValue(); + case ISD::EXTRACT_SUBVECTOR: { + assert(Op0.getNumOperands() == 2 && + isa(Op0->getOperand(1)) && + "Node should have 2 operands with second one being a constant!"); + + if (Op0.getOperand(0).getValueType() != MVT::v4f32) + return SDValue(); + + // Custom lower is only done for high or low doubleword. + int Idx = cast(Op0.getOperand(1))->getZExtValue(); + if (Idx % 2 != 0) + return SDValue(); + + // Since input is v4f32, at this point Idx is either 0 or 2. + // Shift to get the doubleword position we want. + int DWord = Idx >> 1; + + // High and low word positions are different on little endian. + if (Subtarget.isLittleEndian()) + DWord ^= 0x1; + + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, + Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32)); + } case ISD::FADD: case ISD::FMUL: case ISD::FSUB: { @@ -9891,26 +10029,25 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // Generate new load node. LoadSDNode *LD = cast(LdOp); - SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; - NewLoad[i] = - DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, - DAG.getVTList(MVT::v4f32, MVT::Other), - LoadOps, LD->getMemoryVT(), - LD->getMemOperand()); - } - SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, - NewLoad[0], NewLoad[1], - Op0.getNode()->getFlags()); - return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp); + SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; + NewLoad[i] = DAG.getMemIntrinsicNode( + PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps, + LD->getMemoryVT(), LD->getMemOperand()); + } + SDValue NewOp = + DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0], + NewLoad[1], Op0.getNode()->getFlags()); + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp, + DAG.getConstant(0, dl, MVT::i32)); } case ISD::LOAD: { LoadSDNode *LD = cast(Op0); - SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; - SDValue NewLd = - DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, - DAG.getVTList(MVT::v4f32, MVT::Other), - LoadOps, LD->getMemoryVT(), LD->getMemOperand()); - return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd); + SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; + SDValue NewLd = DAG.getMemIntrinsicNode( + PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps, + LD->getMemoryVT(), LD->getMemOperand()); + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd, + DAG.getConstant(0, dl, MVT::i32)); } } llvm_unreachable("ERROR:Should return for all cases within swtich."); @@ -10048,9 +10185,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::TRUNCATE: { EVT TrgVT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); if (TrgVT.isVector() && isOperationCustom(N->getOpcode(), TrgVT) && - N->getOperand(0).getValueType().getSizeInBits() <= 128) + OpVT.getSizeInBits() <= 128 && + isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits())) Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG)); return; } @@ -10192,7 +10331,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, if (CmpOpcode) { // Signed comparisons of byte or halfword values must be sign-extended. if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { - unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), ExtReg).addReg(dest); BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) @@ -10243,10 +10382,10 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned incr = MI.getOperand(3).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -10364,7 +10503,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( if (CmpOpcode) { // For unsigned comparisons, we can directly compare the shifted values. // For signed comparisons we shift and sign extend. - unsigned SReg = RegInfo.createVirtualRegister(GPRC); + Register SReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(PPC::AND), SReg) .addReg(TmpDestReg) .addReg(MaskReg); @@ -10375,7 +10514,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) .addReg(SReg) .addReg(ShiftReg); - unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC); + Register ValueSReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) .addReg(ValueReg); ValueReg = ValueSReg; @@ -10426,11 +10565,11 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); - unsigned mainDstReg = MRI.createVirtualRegister(RC); - unsigned restoreDstReg = MRI.createVirtualRegister(RC); + Register mainDstReg = MRI.createVirtualRegister(RC); + Register restoreDstReg = MRI.createVirtualRegister(RC); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && @@ -10482,10 +10621,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); - unsigned LabelReg = MRI.createVirtualRegister(PtrRC); - unsigned BufReg = MI.getOperand(1).getReg(); + Register LabelReg = MRI.createVirtualRegister(PtrRC); + Register BufReg = MI.getOperand(1).getReg(); - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) @@ -10570,7 +10709,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, const TargetRegisterClass *RC = (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; - unsigned Tmp = MRI.createVirtualRegister(RC); + Register Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; @@ -10587,7 +10726,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); - unsigned BufReg = MI.getOperand(0).getReg(); + Register BufReg = MI.getOperand(0).getReg(); // Reload FP (the jumped-to function may not have had a // frame pointer, and if so, then its r31 will be restored @@ -10662,7 +10801,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && + if (Subtarget.is64BitELFABI() && MI.getOpcode() == TargetOpcode::PATCHPOINT) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no @@ -10828,15 +10967,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BB = readMBB; MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - unsigned LoReg = MI.getOperand(0).getReg(); - unsigned HiReg = MI.getOperand(1).getReg(); + Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + Register LoReg = MI.getOperand(0).getReg(); + Register HiReg = MI.getOperand(1).getReg(); BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); - unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) .addReg(HiReg) @@ -10978,11 +11117,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, StoreMnemonic = PPC::STDCX; break; } - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned oldval = MI.getOperand(3).getReg(); - unsigned newval = MI.getOperand(4).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register oldval = MI.getOperand(3).getReg(); + Register newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -11057,11 +11196,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, bool isLittleEndian = Subtarget.isLittleEndian(); bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned oldval = MI.getOperand(3).getReg(); - unsigned newval = MI.getOperand(4).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register oldval = MI.getOperand(3).getReg(); + Register newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -11238,13 +11377,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src1 = MI.getOperand(1).getReg(); - unsigned Src2 = MI.getOperand(2).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); @@ -11270,7 +11409,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned Dest = RegInfo.createVirtualRegister( + Register Dest = RegInfo.createVirtualRegister( Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); DebugLoc dl = MI.getDebugLoc(); @@ -11283,7 +11422,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } else if (MI.getOpcode() == PPC::TCHECK_RET) { DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) @@ -11297,7 +11436,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(PPC::CR0EQ); } else if (MI.getOpcode() == PPC::SETRNDi) { DebugLoc dl = MI.getDebugLoc(); - unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); @@ -11378,7 +11517,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } }; - unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); @@ -11393,12 +11532,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // mtfsf 255, NewFPSCRReg MachineOperand SrcOp = MI.getOperand(1); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg); - unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); - unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); // The first operand of INSERT_SUBREG should be a register which has // subregisters, we only care about its RegClass, so we should use an @@ -11409,14 +11548,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .add(SrcOp) .addImm(1); - unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg) .addReg(OldFPSCRTmpReg) .addReg(ExtSrcReg) .addImm(0) .addImm(62); - unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg); // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63 @@ -13113,6 +13252,61 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, return Val; } +SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN, + LSBaseSDNode *LSBase, + DAGCombinerInfo &DCI) const { + assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && + "Not a reverse memop pattern!"); + + auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool { + auto Mask = SVN->getMask(); + int i = 0; + auto I = Mask.rbegin(); + auto E = Mask.rend(); + + for (; I != E; ++I) { + if (*I != i) + return false; + i++; + } + return true; + }; + + SelectionDAG &DAG = DCI.DAG; + EVT VT = SVN->getValueType(0); + + if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX()) + return SDValue(); + + // Before P9, we have PPCVSXSwapRemoval pass to hack the element order. + // See comment in PPCVSXSwapRemoval.cpp. + // It is conflict with PPCVSXSwapRemoval opt. So we don't do it. + if (!Subtarget.hasP9Vector()) + return SDValue(); + + if(!IsElementReverse(SVN)) + return SDValue(); + + if (LSBase->getOpcode() == ISD::LOAD) { + SDLoc dl(SVN); + SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()}; + return DAG.getMemIntrinsicNode( + PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps, + LSBase->getMemoryVT(), LSBase->getMemOperand()); + } + + if (LSBase->getOpcode() == ISD::STORE) { + SDLoc dl(LSBase); + SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0), + LSBase->getBasePtr()}; + return DAG.getMemIntrinsicNode( + PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps, + LSBase->getMemoryVT(), LSBase->getMemOperand()); + } + + llvm_unreachable("Expected a load or store node here"); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -13159,6 +13353,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); + case ISD::VECTOR_SHUFFLE: + if (ISD::isNormalLoad(N->getOperand(0).getNode())) { + LSBaseSDNode* LSBase = cast(N->getOperand(0)); + return combineVReverseMemOP(cast(N), LSBase, DCI); + } + break; case ISD::STORE: { EVT Op1VT = N->getOperand(1).getValueType(); @@ -13170,6 +13370,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return Val; } + if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) { + ShuffleVectorSDNode *SVN = cast(N->getOperand(1)); + SDValue Val= combineVReverseMemOP(SVN, cast(N), DCI); + if (Val) + return Val; + } + // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (cast(N)->isUnindexed() && Opcode == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && @@ -13903,7 +14110,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } } -unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { +Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: @@ -13924,7 +14131,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { // Actual alignment of the loop will depend on the hotness check and other // logic in alignBlocks. if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty()) - return 5; + return Align(32); } const PPCInstrInfo *TII = Subtarget.getInstrInfo(); @@ -13940,7 +14147,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { } if (LoopSize > 16 && LoopSize <= 32) - return 5; + return Align(32); break; } @@ -14063,7 +14270,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'f': if (Subtarget.hasSPE()) { if (VT == MVT::f32 || VT == MVT::i32) - return std::make_pair(0U, &PPC::SPE4RCRegClass); + return std::make_pair(0U, &PPC::GPRCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::SPERCRegClass); } else { @@ -14306,22 +14513,22 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); + bool IsDarwinABI = Subtarget.isDarwinABI(); if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || (!isPPC64 && VT != MVT::i32)) report_fatal_error("Invalid register global variable type"); bool is64Bit = isPPC64 && VT == MVT::i64; - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) - .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) - .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : + .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2) + .Case("r13", (!isPPC64 && IsDarwinABI) ? Register() : (is64Bit ? PPC::X13 : PPC::R13)) - .Default(0); + .Default(Register()); if (Reg) return Reg; @@ -14330,14 +14537,17 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { // 32-bit SVR4 ABI access everything as got-indirect. - if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) + if (Subtarget.is32BitELFABI()) + return true; + + // AIX accesses everything indirectly through the TOC, which is similar to + // the GOT. + if (Subtarget.isAIXABI()) return true; CodeModel::Model CModel = getTargetMachine().getCodeModel(); // If it is small or large code model, module locals are accessed - // indirectly by loading their address from .toc/.got. The difference - // is that for large code model we have ADDISTocHa + LDtocL and for - // small code model we simply have LDtoc. + // indirectly by loading their address from .toc/.got. if (CModel == CodeModel::Small || CModel == CodeModel::Large) return true; @@ -14345,14 +14555,8 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { if (isa(GA) || isa(GA)) return true; - if (GlobalAddressSDNode *G = dyn_cast(GA)) { - const GlobalValue *GV = G->getGlobal(); - unsigned char GVFlags = Subtarget.classifyGlobalReference(GV); - // The NLP flag indicates that a global access has to use an - // extra indirection. - if (GVFlags & PPCII::MO_NLP_FLAG) - return true; - } + if (GlobalAddressSDNode *G = dyn_cast(GA)) + return Subtarget.isGVIndirectSymbol(G->getGlobal()); return false; } @@ -14417,7 +14621,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOLoad; return true; } @@ -14451,7 +14655,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.size = VT.getStoreSize(); - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOLoad; return true; } @@ -14503,7 +14707,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOStore; return true; } @@ -14536,7 +14740,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.size = VT.getStoreSize(); - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOStore; return true; } @@ -14786,7 +14990,7 @@ void PPCTargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be @@ -15146,7 +15350,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. - if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) + if (!Subtarget.is64BitELFABI()) return false; // If not a tail call then no need to proceed. diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 97422c6eda36..62922ea2d4c4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -412,8 +412,9 @@ namespace llvm { /// representation. QBFLT, - /// Custom extend v4f32 to v2f64. - FP_EXTEND_LH, + /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or + /// lower (IDX=1) half of v4f32 to v2f64. + FP_EXTEND_HALF, /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of @@ -456,15 +457,29 @@ namespace llvm { /// an xxswapd. LXVD2X, + /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. + /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on + /// the vector type to load vector in big-endian element order. + LOAD_VEC_BE, + /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a /// v2f32 value into the lower half of a VSR register. LD_VSX_LH, + /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory + /// instructions such as LXVDSX, LXVWSX. + LD_SPLAT, + /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. STXVD2X, + /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. + /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on + /// the vector type to store vector in big-endian element order. + STORE_VEC_BE, + /// Store scalar integers from VSR. ST_VSR_SCAL_INT, @@ -563,9 +578,11 @@ namespace llvm { bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE); - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the - /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. - unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); + /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is + /// appropriate for PPC mnemonics (which have a big endian bias - namely + /// elements are counted from the left of the vector register). + unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, + SelectionDAG &DAG); /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element @@ -716,8 +733,8 @@ namespace llvm { SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, @@ -725,7 +742,7 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const override; - unsigned getPrefLoopAlignment(MachineLoop *ML) const override; + Align getPrefLoopAlignment(MachineLoop *ML) const override; bool shouldInsertFencesForAtomic(const Instruction *I) const override { return true; @@ -834,6 +851,18 @@ namespace llvm { return true; } + bool isDesirableToTransformToIntegerOp(unsigned Opc, + EVT VT) const override { + // Only handle float load/store pair because float(fpr) load/store + // instruction has more cycles than integer(gpr) load/store in PPC. + if (Opc != ISD::LOAD && Opc != ISD::STORE) + return false; + if (VT != MVT::f32 && VT != MVT::f64) + return false; + + return true; + } + // Returns true if the address of the global is stored in TOC entry. bool isAccessedAsGotIndirect(SDValue N) const; @@ -998,6 +1027,8 @@ namespace llvm { SDValue &FPOpOut, const SDLoc &dl) const; + SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; @@ -1155,6 +1186,8 @@ namespace llvm { SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, + DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index d598567f8e4e..f16187149d36 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1099,8 +1099,8 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), // Support for medium and large code model. let hasSideEffects = 0 in { let isReMaterializable = 1 in { -def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), - "#ADDIStocHA", []>, isPPC64; +def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), + "#ADDIStocHA8", []>, isPPC64; def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDItocL", []>, isPPC64; } diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 8176c5120a83..fd3fc2af2327 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -215,21 +215,21 @@ def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. def VSPLTB_get_imm : SDNodeXForm; def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast(N), 1); }], VSPLTB_get_imm>; def VSPLTH_get_imm : SDNodeXForm; def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast(N), 2); }], VSPLTH_get_imm>; def VSPLTW_get_imm : SDNodeXForm; def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ @@ -331,7 +331,7 @@ class VXBX_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> class VXCR_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> : VXForm_CR; + [(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>; //===----------------------------------------------------------------------===// // Instruction Definitions. @@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in { def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), "mfvscr $vD", IIC_LdStStore, - [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), "mtvscr $vB", IIC_LdStLoad, - [(int_ppc_altivec_mtvscr v4i32:$vB)]>; + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index a48eb1690695..96b9c9a119c0 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -1209,20 +1209,13 @@ class XX3Form opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } -class XX3Form_Zero opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, +class XX3Form_SameOp opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : XX3Form { let XA = XT; let XB = XT; } -class XX3Form_SetZero opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : XX3Form { - let XB = XT; - let XA = XT; -} - class XX3Form_1 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : I { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a787bdd56b9d..6b10672965c9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -90,7 +90,6 @@ enum SpillOpcodeKey { SOK_QuadBitSpill, SOK_SpillToVSR, SOK_SPESpill, - SOK_SPE4Spill, SOK_LastOpcodeSpill // This must be last on the enum. }; @@ -184,10 +183,10 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; const MachineOperand &DefMO = DefMI.getOperand(DefIdx); - unsigned Reg = DefMO.getReg(); + Register Reg = DefMO.getReg(); bool IsRegCR; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const MachineRegisterInfo *MRI = &DefMI.getParent()->getParent()->getRegInfo(); IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || @@ -330,11 +329,13 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, case PPC::LIS8: case PPC::QVGPCI: case PPC::ADDIStocHA: + case PPC::ADDIStocHA8: case PPC::ADDItocL: case PPC::LOAD_STACK_GUARD: case PPC::XXLXORz: case PPC::XXLXORspz: case PPC::XXLXORdpz: + case PPC::XXLEQVOnes: case PPC::V_SET0B: case PPC::V_SET0H: case PPC::V_SET0: @@ -448,7 +449,8 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return &MI; } -bool PPCInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, +bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { // For VSX A-Type FMA instructions, it is the first two operands that can be // commuted, however, because the non-encoded tied input operand is listed @@ -966,11 +968,11 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, getKillRegState(KillSrc); return; } else if (PPC::SPERCRegClass.contains(SrcReg) && - PPC::SPE4RCRegClass.contains(DestReg)) { + PPC::GPRCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; - } else if (PPC::SPE4RCRegClass.contains(SrcReg) && + } else if (PPC::GPRCRegClass.contains(SrcReg) && PPC::SPERCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg); getKillRegState(KillSrc); @@ -1009,8 +1011,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; - else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg)) - Opc = PPC::OR; else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) Opc = PPC::EVOR; else @@ -1043,8 +1043,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { @@ -1083,8 +1081,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.contains(Reg)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1133,8 +1129,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { @@ -1173,8 +1167,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.contains(Reg)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1648,7 +1640,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, return false; int OpC = CmpInstr.getOpcode(); - unsigned CRReg = CmpInstr.getOperand(0).getReg(); + Register CRReg = CmpInstr.getOperand(0).getReg(); // FP record forms set CR1 based on the exception status bits, not a // comparison with zero. @@ -1671,7 +1663,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // Look through copies unless that gets us to a physical register. unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI); - if (TargetRegisterInfo::isVirtualRegister(ActualSrc)) + if (Register::isVirtualRegister(ActualSrc)) SrcReg = ActualSrc; // Get the unique definition of SrcReg. @@ -1937,7 +1929,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // Rotates are expensive instructions. If we're emitting a record-form // rotate that can just be an andi/andis, we should just emit that. if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) { - unsigned GPRRes = MI->getOperand(0).getReg(); + Register GPRRes = MI->getOperand(0).getReg(); int64_t SH = MI->getOperand(2).getImm(); int64_t MB = MI->getOperand(3).getImm(); int64_t ME = MI->getOperand(4).getImm(); @@ -2122,7 +2114,7 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const { llvm_unreachable("Unknown Operation!"); } - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); unsigned Opcode; if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31)) @@ -2184,7 +2176,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return expandVSXMemPseudo(MI); } case PPC::SPILLTOVSR_LD: { - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(TargetReg)) { MI.setDesc(get(PPC::DFLOADf64)); return expandPostRAPseudo(MI); @@ -2194,7 +2186,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_ST: { - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(SrcReg)) { NumStoreSPILLVSRRCAsVec++; MI.setDesc(get(PPC::DFSTOREf64)); @@ -2206,7 +2198,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_LDX: { - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(TargetReg)) MI.setDesc(get(PPC::LXSDX)); else @@ -2214,7 +2206,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_STX: { - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(SrcReg)) { NumStoreSPILLVSRRCAsVec++; MI.setDesc(get(PPC::STXSDX)); @@ -2279,10 +2271,10 @@ void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI, int64_t Imm) const { assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG"); // Replace the REG with the Immediate. - unsigned InUseReg = MI.getOperand(OpNo).getReg(); + Register InUseReg = MI.getOperand(OpNo).getReg(); MI.getOperand(OpNo).ChangeToImmediate(Imm); - if (empty(MI.implicit_operands())) + if (MI.implicit_operands().empty()) return; // We need to make sure that the MI didn't have any implicit use @@ -2328,6 +2320,23 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, .addImm(LII.Imm); } +MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI, + bool &SeenIntermediateUse) const { + assert(!MI.getParent()->getParent()->getRegInfo().isSSA() && + "Should be called after register allocation."); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; + It++; + SeenIntermediateUse = false; + for (; It != E; ++It) { + if (It->modifiesRegister(Reg, TRI)) + return &*It; + if (It->readsRegister(Reg, TRI)) + SeenIntermediateUse = true; + } + return nullptr; +} + MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineInstr &MI, unsigned &OpNoForForwarding, @@ -2342,11 +2351,11 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( for (int i = 1, e = MI.getNumOperands(); i < e; i++) { if (!MI.getOperand(i).isReg()) continue; - unsigned Reg = MI.getOperand(i).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MI.getOperand(i).getReg(); + if (!Register::isVirtualRegister(Reg)) continue; unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI); - if (TargetRegisterInfo::isVirtualRegister(TrueReg)) { + if (Register::isVirtualRegister(TrueReg)) { DefMI = MRI->getVRegDef(TrueReg); if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) { OpNoForForwarding = i; @@ -2370,7 +2379,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 || Opc == PPC::RLWINM || Opc == PPC::RLWINMo || Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; - if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm) + bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg()) + ? isVFRegister(MI.getOperand(0).getReg()) + : false; + if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true)) return nullptr; // Don't convert or %X, %Y, %Y since that's just a register move. @@ -2381,29 +2393,24 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineOperand &MO = MI.getOperand(i); SeenIntermediateUse = false; if (MO.isReg() && MO.isUse() && !MO.isImplicit()) { - MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; - It++; - unsigned Reg = MI.getOperand(i).getReg(); - - // Is this register defined by some form of add-immediate (including - // load-immediate) within this basic block? - for ( ; It != E; ++It) { - if (It->modifiesRegister(Reg, &getRegisterInfo())) { - switch (It->getOpcode()) { - default: break; - case PPC::LI: - case PPC::LI8: - case PPC::ADDItocL: - case PPC::ADDI: - case PPC::ADDI8: - OpNoForForwarding = i; - return &*It; - } + Register Reg = MI.getOperand(i).getReg(); + // If we see another use of this reg between the def and the MI, + // we want to flat it so the def isn't deleted. + MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse); + if (DefMI) { + // Is this register defined by some form of add-immediate (including + // load-immediate) within this basic block? + switch (DefMI->getOpcode()) { + default: break; - } else if (It->readsRegister(Reg, &getRegisterInfo())) - // If we see another use of this reg between the def and the MI, - // we want to flat it so the def isn't deleted. - SeenIntermediateUse = true; + case PPC::LI: + case PPC::LI8: + case PPC::ADDItocL: + case PPC::ADDI: + case PPC::ADDI8: + OpNoForForwarding = i; + return DefMI; + } } } } @@ -2417,7 +2424,7 @@ const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, - PPC::SPILLTOVSR_ST, PPC::EVSTDD, PPC::SPESTW}, + PPC::SPILLTOVSR_ST, PPC::EVSTDD}, // Power 9 {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, @@ -2433,7 +2440,7 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, - PPC::SPILLTOVSR_LD, PPC::EVLDD, PPC::SPELWZ}, + PPC::SPILLTOVSR_LD, PPC::EVLDD}, // Power 9 {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, PPC::DFLOADf32, @@ -2538,12 +2545,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, "The forwarding operand needs to be valid at this point"); bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill(); bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled; - unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg(); + Register ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg(); if (KilledDef && KillFwdDefMI) *KilledDef = DefMI; ImmInstrInfo III; - bool HasImmForm = instrHasImmForm(MI, III, PostRA); + bool IsVFReg = MI.getOperand(0).isReg() + ? isVFRegister(MI.getOperand(0).getReg()) + : false; + bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA); // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by an add-immediate, // try to convert it. @@ -2591,7 +2601,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, // If a compare-immediate is fed by an immediate and is itself an input of // an ISEL (the most common case) into a COPY of the correct register. bool Changed = false; - unsigned DefReg = MI.getOperand(0).getReg(); + Register DefReg = MI.getOperand(0).getReg(); int64_t Comparand = MI.getOperand(2).getImm(); int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ? (Comparand | 0xFFFFFFFFFFFF0000) : Comparand; @@ -2601,8 +2611,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8) continue; unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg(); - unsigned TrueReg = CompareUseMI.getOperand(1).getReg(); - unsigned FalseReg = CompareUseMI.getOperand(2).getReg(); + Register TrueReg = CompareUseMI.getOperand(1).getReg(); + Register FalseReg = CompareUseMI.getOperand(2).getReg(); unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg); if (RegToCopy == PPC::NoRegister) @@ -2777,9 +2787,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; } -bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, +bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const { - unsigned Opc = MI.getOpcode(); // The vast majority of the instructions would need their operand 2 replaced // with an immediate when switching to the reg+imm form. A marked exception // are the update form loads/stores for which a constant operand 2 would need @@ -3111,7 +3120,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::LXSSPX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::LXSSP; else { III.ImmOpcode = PPC::LFS; @@ -3125,7 +3134,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::LXSDX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::LXSD; else { III.ImmOpcode = PPC::LFD; @@ -3143,7 +3152,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::STXSSPX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::STXSSP; else { III.ImmOpcode = PPC::STFS; @@ -3157,7 +3166,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::STXSDX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::STXSD; else { III.ImmOpcode = PPC::STFD; @@ -3287,7 +3296,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding( if (MRI.isSSA()) return false; - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg. MachineBasicBlock::const_reverse_iterator It = MI; @@ -3511,8 +3520,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig : III.ZeroIsSpecialNew + 1; - unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); - unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); + Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); + Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); // If R0 is in the operand where zero is special for the new instruction, // it is unsafe to transform if the constant operand isn't that operand. if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) && @@ -3563,16 +3572,20 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, } else { // The 32 bit and 64 bit instructions are quite different. if (SpecialShift32) { - // Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31). - uint64_t SH = RightShift ? 32 - ShAmt : ShAmt; + // Left shifts use (N, 0, 31-N). + // Right shifts use (32-N, N, 31) if 0 < N < 32. + // use (0, 0, 31) if N == 0. + uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt; uint64_t MB = RightShift ? ShAmt : 0; uint64_t ME = RightShift ? 31 : 31 - ShAmt; replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB) .addImm(ME); } else { - // Left shifts use (N, 63-N), right shifts use (64-N, N). - uint64_t SH = RightShift ? 64 - ShAmt : ShAmt; + // Left shifts use (N, 63-N). + // Right shifts use (64-N, N) if 0 < N < 64. + // use (0, 0) if N == 0. + uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt; uint64_t ME = RightShift ? ShAmt : 63 - ShAmt; replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME); @@ -3601,8 +3614,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, if (III.ZeroIsSpecialNew) { // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no // need to fix up register class. - unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); - if (TargetRegisterInfo::isVirtualRegister(RegToModify)) { + Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); + if (Register::isVirtualRegister(RegToModify)) { const TargetRegisterClass *NewRC = MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ? &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass; @@ -3747,7 +3760,7 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const { return false; unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); unsigned StackOffset = MI.getOperand(1).getImm(); - unsigned StackReg = MI.getOperand(2).getReg(); + Register StackReg = MI.getOperand(2).getReg(); if (StackReg == PPC::X1 && StackOffset == TOCSaveOffset) return true; @@ -3772,7 +3785,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, switch (MI.getOpcode()) { case PPC::COPY: { - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); // In both ELFv1 and v2 ABI, method parameters and the return value // are sign- or zero-extended. @@ -3781,7 +3794,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, // We check the ZExt/SExt flags for a method parameter. if (MI.getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock()) { - unsigned VReg = MI.getOperand(0).getReg(); + Register VReg = MI.getOperand(0).getReg(); if (MF->getRegInfo().isLiveIn(VReg)) return SignExt ? FuncInfo->isLiveInSExt(VReg) : FuncInfo->isLiveInZExt(VReg); @@ -3818,7 +3831,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, } // If this is a copy from another register, we recursively check source. - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI != NULL) @@ -3841,8 +3854,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, case PPC::XORIS8: { // logical operation with 16-bit immediate does not change the upper bits. // So, we track the operand register as we do for register copy. - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI != NULL) @@ -3870,8 +3883,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, for (unsigned I = 1; I != E; I += D) { if (MI.getOperand(I).isReg()) { - unsigned SrcReg = MI.getOperand(I).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(I).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) @@ -3893,12 +3906,12 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, assert(MI.getOperand(1).isReg() && MI.getOperand(2).isReg()); - unsigned SrcReg1 = MI.getOperand(1).getReg(); - unsigned SrcReg2 = MI.getOperand(2).getReg(); + Register SrcReg1 = MI.getOperand(1).getReg(); + Register SrcReg2 = MI.getOperand(2).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg1) || - !TargetRegisterInfo::isVirtualRegister(SrcReg2)) - return false; + if (!Register::isVirtualRegister(SrcReg1) || + !Register::isVirtualRegister(SrcReg2)) + return false; const MachineInstr *MISrc1 = MRI->getVRegDef(SrcReg1); const MachineInstr *MISrc2 = MRI->getVRegDef(SrcReg2); @@ -3923,21 +3936,99 @@ bool PPCInstrInfo::isBDNZ(unsigned Opcode) const { return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ)); } -bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const { - MachineBasicBlock *LoopEnd = L.getBottomBlock(); - MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); - // We really "analyze" only CTR loops right now. - if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) { - IndVarInst = nullptr; - CmpInst = &*I; - return false; +namespace { +class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *Loop, *EndLoop, *LoopCount; + MachineFunction *MF; + const TargetInstrInfo *TII; + int64_t TripCount; + +public: + PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop, + MachineInstr *LoopCount) + : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount), + MF(Loop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()) { + // Inspect the Loop instruction up-front, as it may be deleted when we call + // createTripCountGreaterCondition. + if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) + TripCount = LoopCount->getOperand(1).getImm(); + else + TripCount = -1; } - return true; + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop; + } + + Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) override { + if (TripCount == -1) { + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateReg( + MF->getSubtarget().isPPC64() ? PPC::CTR8 : PPC::CTR, + true)); + return {}; + } + + return TripCount > TC; + } + + void setPreheader(MachineBasicBlock *NewPreheader) override { + // Do nothing. We want the LOOP setup instruction to stay in the *old* + // preheader, so we can use BDZ in the prologs to adapt the loop trip count. + } + + void adjustTripCount(int TripCountAdjust) override { + // If the loop trip count is a compile-time value, then just change the + // value. + if (LoopCount->getOpcode() == PPC::LI8 || + LoopCount->getOpcode() == PPC::LI) { + int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust; + LoopCount->getOperand(1).setImm(TripCount); + return; + } + + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + } + + void disposed() override { + Loop->eraseFromParent(); + // Ensure the loop setup instruction is deleted too. + LoopCount->eraseFromParent(); + } +}; +} // namespace + +std::unique_ptr +PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + // We really "analyze" only hardware loops right now. + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + MachineBasicBlock *Preheader = *LoopBB->pred_begin(); + if (Preheader == LoopBB) + Preheader = *std::next(LoopBB->pred_begin()); + MachineFunction *MF = Preheader->getParent(); + + if (I != LoopBB->end() && isBDNZ(I->getOpcode())) { + SmallPtrSet Visited; + if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) { + Register LoopCountReg = LoopInst->getOperand(0).getReg(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); + return std::make_unique(LoopInst, &*I, LoopCount); + } + } + return nullptr; } -MachineInstr * -PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { +MachineInstr *PPCInstrInfo::findLoopInstr( + MachineBasicBlock &PreHeader, + SmallPtrSet &Visited) const { unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop); @@ -3948,50 +4039,6 @@ PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { return nullptr; } -unsigned PPCInstrInfo::reduceLoopCount( - MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, - MachineInstr &Cmp, SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, unsigned Iter, - unsigned MaxIter) const { - // We expect a hardware loop currently. This means that IndVar is set - // to null, and the compare is the ENDLOOP instruction. - assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop"); - MachineFunction *MF = MBB.getParent(); - DebugLoc DL = Cmp.getDebugLoc(); - MachineInstr *Loop = findLoopInstr(PreHeader); - if (!Loop) - return 0; - unsigned LoopCountReg = Loop->getOperand(0).getReg(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); - - if (!LoopCount) - return 0; - // If the loop trip count is a compile-time value, then just change the - // value. - if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) { - int64_t Offset = LoopCount->getOperand(1).getImm(); - if (Offset <= 1) { - LoopCount->eraseFromParent(); - Loop->eraseFromParent(); - return 0; - } - LoopCount->getOperand(1).setImm(Offset - 1); - return Offset - 1; - } - - // The loop trip count is a run-time value. - // We need to subtract one from the trip count, - // and insert branch later to check if we're done with the loop. - - // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, - // so we don't need to generate any thing here. - Cond.push_back(MachineOperand::CreateImm(0)); - Cond.push_back(MachineOperand::CreateReg( - Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true)); - return LoopCountReg; -} - // Return true if get the base operand, byte offset of an instruction and the // memory width. Width is the size of memory that is being loaded/stored. bool PPCInstrInfo::getMemOperandWithOffsetWidth( @@ -4018,8 +4065,7 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth( } bool PPCInstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, - AliasAnalysis * /*AA*/) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 70fb757e8f1e..19ab30cb0908 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -248,11 +248,11 @@ public: unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; void insertNoop(MachineBasicBlock &MBB, @@ -370,8 +370,7 @@ public: /// otherwise bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -439,9 +438,14 @@ public: void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const; - bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III, + bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const; + // In PostRA phase, try to find instruction defines \p Reg before \p MI. + // \p SeenIntermediate is set to true if uses between DefMI and \p MI exist. + MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI, + bool &SeenIntermediateUse) const; + /// getRegNumForOperand - some operands use different numbering schemes /// for the same registers. For example, a VSX instruction may have any of /// vs0-vs63 allocated whereas an Altivec instruction could only have @@ -481,26 +485,14 @@ public: /// On PPC, we have two instructions used to set-up the hardware loop /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8) /// instructions to indicate the end of a loop. - MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const; - - /// Analyze the loop code to find the loop induction variable and compare used - /// to compute the number of iterations. Currently, we analyze loop that are - /// controlled using hardware loops. In this case, the induction variable - /// instruction is null. For all other cases, this function returns true, - /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will - /// return new values when we can analyze the readonly loop \p L, otherwise, - /// nothing got changed - bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const override; - /// Generate code to reduce the loop iteration by one and check if the loop - /// is finished. Return the value/register of the new loop count. We need - /// this function when peeling off one or more iterations of a loop. This - /// function assumes the last iteration is peeled first. - unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, - MachineInstr *IndVar, MachineInstr &Cmp, - SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, - unsigned Iter, unsigned MaxIter) const override; + MachineInstr * + findLoopInstr(MachineBasicBlock &PreHeader, + SmallPtrSet &Visited) const; + + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index c313337047f0..24183277519b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -386,7 +386,9 @@ def immZExt16 : PatLeaf<(imm), [{ // field. Used by instructions like 'ori'. return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); }], LO16>; -def immAnyExt8 : ImmLeaf(Imm) || isUInt<8>(Imm); }]>; +def immNonAllOneAnyExt8 : ImmLeaf(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF)); +}]>; def immSExt5NonZero : ImmLeaf(Imm); }]>; // imm16Shifted* - These match immediates where the low 16-bits are zero. There @@ -577,7 +579,7 @@ def sperc : RegisterOperand { def PPCRegSPE4RCAsmOperand : AsmOperandClass { let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber"; } -def spe4rc : RegisterOperand { +def spe4rc : RegisterOperand { let ParserMatchClass = PPCRegSPE4RCAsmOperand; } @@ -3161,7 +3163,16 @@ def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s1 def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), "#LWZtoc", [(set i32:$rD, + (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +def LWZtocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc_nor0:$reg), + "#LWZtocL", + [(set i32:$rD, (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp), + "#ADDIStocHA", + [(set i32:$rD, + (PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>; + // Get Global (GOT) Base Register offset, from the word immediately preceding // the function label. def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; @@ -3177,21 +3188,21 @@ def : Pat<(srl i32:$rS, i32:$rB), def : Pat<(shl i32:$rS, i32:$rB), (SLW $rS, $rB)>; -def : Pat<(zextloadi1 iaddr:$src), +def : Pat<(i32 (zextloadi1 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(zextloadi1 xaddr:$src), +def : Pat<(i32 (zextloadi1 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi1 iaddr:$src), +def : Pat<(i32 (extloadi1 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(extloadi1 xaddr:$src), +def : Pat<(i32 (extloadi1 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi8 iaddr:$src), +def : Pat<(i32 (extloadi8 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(extloadi8 xaddr:$src), +def : Pat<(i32 (extloadi8 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi16 iaddr:$src), +def : Pat<(i32 (extloadi16 iaddr:$src)), (LHZ iaddr:$src)>; -def : Pat<(extloadi16 xaddr:$src), +def : Pat<(i32 (extloadi16 xaddr:$src)), (LHZX xaddr:$src)>; let Predicates = [HasFPU] in { def : Pat<(f64 (extloadf32 iaddr:$src)), @@ -3564,23 +3575,6 @@ def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; - -defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), - (LO16 imm:$imm)), sub_eq)>; - def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), @@ -3592,17 +3586,6 @@ def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), - (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), - (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; - // SETCC for i64. def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; @@ -3632,6 +3615,47 @@ def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +// Instantiations of CRNotPat for i32. +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +// Instantiations of CRNotPat for i64. defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), @@ -3649,17 +3673,6 @@ defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), - (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), - (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; - defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), @@ -3671,6 +3684,56 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; +let Predicates = [HasFPU] in { +// Instantiations of CRNotPat for f32. +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +// Instantiations of CRNotPat for f64. +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +// Instantiations of CRNotPat for f128. +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; +} + // SETCC for f32. let Predicates = [HasFPU] in { def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), @@ -3688,21 +3751,6 @@ def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; - // SETCC for f64. def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; @@ -3719,21 +3767,6 @@ def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; - // SETCC for f128. def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; @@ -3750,21 +3783,6 @@ def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)), def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; - } // This must be in this file because it relies on patterns defined in this file diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 07f38a61d098..2aad5860d87f 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -58,8 +58,12 @@ def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [ SDTCisVT<0, v4f32>, SDTCisPtrTy<1> ]>; -def SDT_PPCfpextlh : SDTypeProfile<1, 1, [ - SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32> +def SDT_PPCfpexth : SDTypeProfile<1, 2, [ + SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>, SDTCisPtrTy<2> +]>; + +def SDT_PPCldsplat : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> ]>; // Little-endian-specific nodes. @@ -78,12 +82,21 @@ def SDTVecConv : SDTypeProfile<1, 2, [ def SDTVabsd : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32> ]>; - +def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; +def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; +def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be, + [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; @@ -93,9 +106,11 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>; -def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>; +def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>; def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; multiclass XX3Form_Rcr opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -855,14 +870,14 @@ let Uses = [RM] in { let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { - def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins), + def XXLXORz : XX3Form_SameOp<60, 154, (outs vsrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set v4i32:$XT, (v4i32 immAllZerosV))]>; - def XXLXORdpz : XX3Form_SetZero<60, 154, + def XXLXORdpz : XX3Form_SameOp<60, 154, (outs vsfrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set f64:$XT, (fpimm0))]>; - def XXLXORspz : XX3Form_SetZero<60, 154, + def XXLXORspz : XX3Form_SameOp<60, 154, (outs vssrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set f32:$XT, (fpimm0))]>; @@ -996,21 +1011,21 @@ def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG $S, sub_64))>; } -// Additional fnmsub patterns: -a*c + b == -(a*c - b) -def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), - (XSNMSUBADP $B, $C, $A)>; -def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), - (XSNMSUBADP $B, $C, $A)>; +// Additional fnmsub patterns: -a*b + c == -(a*b - c) +def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C), + (XSNMSUBADP $C, $A, $B)>; +def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C), + (XSNMSUBADP $C, $A, $B)>; -def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), - (XVNMSUBADP $B, $C, $A)>; -def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), - (XVNMSUBADP $B, $C, $A)>; +def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C), + (XVNMSUBADP $C, $A, $B)>; +def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C), + (XVNMSUBADP $C, $A, $B)>; -def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), - (XVNMSUBASP $B, $C, $A)>; -def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), - (XVNMSUBASP $B, $C, $A)>; +def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C), + (XVNMSUBASP $C, $A, $B)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C), + (XVNMSUBASP $C, $A, $B)>; def : Pat<(v2f64 (bitconvert v4f32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; @@ -1077,7 +1092,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; -def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>; +def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>; +def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>; // Loads. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { @@ -1088,6 +1104,19 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; } + +// Load vector big endian order +let Predicates = [IsLittleEndian, HasVSX] in { + def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; + def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; +} + let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -1288,6 +1317,13 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; + let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isReMaterializable = 1 in { + def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins), + "xxleqv $XT, $XT, $XT", IIC_VecGeneral, + [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>; + } + def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, @@ -1476,6 +1512,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. AltVSXFMARel; } + // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c) + def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C), + (XSNMSUBASP $C, $A, $B)>; + def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C), + (XSNMSUBASP $C, $A, $B)>; + // Single Precision Conversions (FP <-> INT) def XSCVSXDSP : XX2Form<60, 312, (outs vssrc:$XT), (ins vsfrc:$XB), @@ -1564,16 +1606,33 @@ let Predicates = [HasDirectMove] in { def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), "mfvsrwz $rA, $XT", IIC_VecGeneral, [(set i32:$rA, (PPCmfvsr f64:$XT))]>; + let isCodeGenOnly = 1 in + def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + []>; def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), "mtvsrd $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsra i64:$rA))]>, Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in + def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + []>, + Requires<[In64BitMode]>; def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwa $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsra i32:$rA))]>; + let isCodeGenOnly = 1 in + def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + []>; def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; + let isCodeGenOnly = 1 in + def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + []>; } // HasDirectMove let Predicates = [IsISA3_0, HasDirectMove] in { @@ -1597,6 +1656,22 @@ def : InstAlias<"mfvrd $rA, $XT", (MFVRD g8rc:$rA, vrrc:$XT), 0>; def : InstAlias<"mffprd $rA, $src", (MFVSRD g8rc:$rA, f8rc:$src)>; +def : InstAlias<"mtvrd $XT, $rA", + (MTVRD vrrc:$XT, g8rc:$rA), 0>; +def : InstAlias<"mtfprd $dst, $rA", + (MTVSRD f8rc:$dst, g8rc:$rA)>; +def : InstAlias<"mfvrwz $rA, $XT", + (MFVRWZ gprc:$rA, vrrc:$XT), 0>; +def : InstAlias<"mffprwz $rA, $src", + (MFVSRWZ gprc:$rA, f8rc:$src)>; +def : InstAlias<"mtvrwa $XT, $rA", + (MTVRWA vrrc:$XT, gprc:$rA), 0>; +def : InstAlias<"mtfprwa $dst, $rA", + (MTVSRWA f8rc:$dst, gprc:$rA)>; +def : InstAlias<"mtvrwz $XT, $rA", + (MTVRWZ vrrc:$XT, gprc:$rA), 0>; +def : InstAlias<"mtfprwz $dst, $rA", + (MTVSRWZ f8rc:$dst, gprc:$rA)>; /* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than @@ -2581,9 +2656,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; - // Additional fnmsub patterns: -a*c + b == -(a*c - b) - def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>; - def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>; + // Additional fnmsub patterns: -a*b + c == -(a*b - c) + def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>; + def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>; //===--------------------------------------------------------------------===// // Quad/Double-Precision Compare Instructions: @@ -2799,12 +2874,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, [(set v4i32: $XT, - (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>; + (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>; def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, [(set v2i64: $XT, - (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>; + (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>; //===--------------------------------------------------------------------===// @@ -3024,6 +3099,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + + def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), + (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; + def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), + (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; + + def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), + (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; + def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), + (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; } // IsLittleEndian, HasP9Vector let Predicates = [IsBigEndian, HasP9Vector] in { @@ -3059,7 +3144,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; - } // IsLittleEndian, HasP9Vector + } // IsBigEndian, HasP9Vector // D-Form Load/Store def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; @@ -3858,6 +3943,10 @@ let AddedComplexity = 400 in { (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; + def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), + (v2f64 (LXVDSX xoaddr:$A))>; + def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), + (v2i64 (LXVDSX xoaddr:$A))>; // Build vectors of floating point converted to i64. def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), @@ -4063,27 +4152,32 @@ let AddedComplexity = 400 in { (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } + let Predicates = [HasP8Vector] in { + def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))), + (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))), + (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))), + (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))), + (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + } + let Predicates = [HasP9Vector] in { // Endianness-neutral patterns for const splats with ISA 3.0 instructions. def : Pat<(v4i32 (scalar_to_vector i32:$A)), (v4i32 (MTVSRWS $A))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (v4i32 (MTVSRWS $A))>; - def : Pat<(v16i8 (build_vector immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A)), + def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)), (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; - def : Pat<(v16i8 immAllOnesV), - (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; - def : Pat<(v8i16 immAllOnesV), - (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; - def : Pat<(v4i32 immAllOnesV), - (v4i32 (XXSPLTIB 255))>; - def : Pat<(v2i64 immAllOnesV), - (v2i64 (XXSPLTIB 255))>; def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), @@ -4102,6 +4196,10 @@ let AddedComplexity = 400 in { (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0))>; + def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), + (v4f32 (LXVWSX xoaddr:$A))>; + def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), + (v4i32 (LXVWSX xoaddr:$A))>; } let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 4d45d96d4479..d252cfbd26b1 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -63,8 +63,24 @@ static cl::opt MaxVars("ppc-preinc-prep-max-vars", cl::desc("Potential PHI threshold for PPC preinc loop prep")); STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form"); +STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten"); namespace { + struct BucketElement { + BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} + BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} + + const SCEVConstant *Offset; + Instruction *Instr; + }; + + struct Bucket { + Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), + Elements(1, BucketElement(I)) {} + + const SCEV *BaseSCEV; + SmallVector Elements; + }; class PPCLoopPreIncPrep : public FunctionPass { public: @@ -85,21 +101,47 @@ namespace { AU.addRequired(); } - bool alreadyPrepared(Loop *L, Instruction* MemI, - const SCEV *BasePtrStartSCEV, - const SCEVConstant *BasePtrIncSCEV); bool runOnFunction(Function &F) override; - bool runOnLoop(Loop *L); - void simplifyLoopLatch(Loop *L); - bool rotateLoop(Loop *L); - private: PPCTargetMachine *TM = nullptr; + const PPCSubtarget *ST; DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; bool PreserveLCSSA; + + bool runOnLoop(Loop *L); + + /// Check if required PHI node is already exist in Loop \p L. + bool alreadyPrepared(Loop *L, Instruction* MemI, + const SCEV *BasePtrStartSCEV, + const SCEVConstant *BasePtrIncSCEV); + + /// Collect condition matched(\p isValidCandidate() returns true) + /// candidates in Loop \p L. + SmallVector + collectCandidates(Loop *L, + std::function + isValidCandidate, + unsigned MaxCandidateNum); + + /// Add a candidate to candidates \p Buckets. + void addOneCandidate(Instruction *MemI, const SCEV *LSCEV, + SmallVector &Buckets, + unsigned MaxCandidateNum); + + /// Prepare all candidates in \p Buckets for update form. + bool updateFormPrep(Loop *L, SmallVector &Buckets); + + /// Prepare for one chain \p BucketChain, find the best base element and + /// update all other elements in \p BucketChain accordingly. + bool prepareBaseForUpdateFormChain(Bucket &BucketChain); + + /// Rewrite load/store instructions in \p BucketChain according to + /// preparation. + bool rewriteLoadStores(Loop *L, Bucket &BucketChain, + SmallSet &BBChanged); }; } // end anonymous namespace @@ -111,30 +153,15 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) +static const std::string PHINodeNameSuffix = ".phi"; +static const std::string CastNodeNameSuffix = ".cast"; +static const std::string GEPNodeIncNameSuffix = ".inc"; +static const std::string GEPNodeOffNameSuffix = ".off"; + FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { return new PPCLoopPreIncPrep(TM); } -namespace { - - struct BucketElement { - BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} - BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} - - const SCEVConstant *Offset; - Instruction *Instr; - }; - - struct Bucket { - Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), - Elements(1, BucketElement(I)) {} - - const SCEV *BaseSCEV; - SmallVector Elements; - }; - -} // end anonymous namespace - static bool IsPtrInBounds(Value *BasePtr) { Value *StrippedBasePtr = BasePtr; while (BitCastInst *BC = dyn_cast(StrippedBasePtr)) @@ -145,6 +172,14 @@ static bool IsPtrInBounds(Value *BasePtr) { return false; } +static std::string getInstrName(const Value *I, const std::string Suffix) { + assert(I && "Invalid paramater!"); + if (I->hasName()) + return (I->getName() + Suffix).str(); + else + return ""; +} + static Value *GetPointerOperand(Value *MemI) { if (LoadInst *LMemI = dyn_cast(MemI)) { return LMemI->getPointerOperand(); @@ -167,6 +202,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { auto *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + ST = TM ? TM->getSubtargetImpl(F) : nullptr; bool MadeChange = false; @@ -177,10 +213,280 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { return MadeChange; } +void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, + SmallVector &Buckets, + unsigned MaxCandidateNum) { + assert((MemI && GetPointerOperand(MemI)) && + "Candidate should be a memory instruction."); + assert(LSCEV && "Invalid SCEV for Ptr value."); + bool FoundBucket = false; + for (auto &B : Buckets) { + const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); + if (const auto *CDiff = dyn_cast(Diff)) { + B.Elements.push_back(BucketElement(CDiff, MemI)); + FoundBucket = true; + break; + } + } + + if (!FoundBucket) { + if (Buckets.size() == MaxCandidateNum) + return; + Buckets.push_back(Bucket(LSCEV, MemI)); + } +} + +SmallVector PPCLoopPreIncPrep::collectCandidates( + Loop *L, + std::function isValidCandidate, + unsigned MaxCandidateNum) { + SmallVector Buckets; + for (const auto &BB : L->blocks()) + for (auto &J : *BB) { + Value *PtrValue; + Instruction *MemI; + + if (LoadInst *LMemI = dyn_cast(&J)) { + MemI = LMemI; + PtrValue = LMemI->getPointerOperand(); + } else if (StoreInst *SMemI = dyn_cast(&J)) { + MemI = SMemI; + PtrValue = SMemI->getPointerOperand(); + } else if (IntrinsicInst *IMemI = dyn_cast(&J)) { + if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { + MemI = IMemI; + PtrValue = IMemI->getArgOperand(0); + } else continue; + } else continue; + + unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); + if (PtrAddrSpace) + continue; + + if (L->isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); + const SCEVAddRecExpr *LARSCEV = dyn_cast(LSCEV); + if (!LARSCEV || LARSCEV->getLoop() != L) + continue; + + if (isValidCandidate(&J, PtrValue)) + addOneCandidate(MemI, LSCEV, Buckets, MaxCandidateNum); + } + return Buckets; +} + +// TODO: implement a more clever base choosing policy. +// Currently we always choose an exist load/store offset. This maybe lead to +// suboptimal code sequences. For example, for one DS chain with offsets +// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp +// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a +// multipler of 4, it cannot be represented by sint16. +bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { + // We have a choice now of which instruction's memory operand we use as the + // base for the generated PHI. Always picking the first instruction in each + // bucket does not work well, specifically because that instruction might + // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, + // the choice is somewhat arbitrary, because the backend will happily + // generate direct offsets from both the pre-incremented and + // post-incremented pointer values. Thus, we'll pick the first non-prefetch + // instruction in each bucket, and adjust the recurrence and other offsets + // accordingly. + for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) { + if (auto *II = dyn_cast(BucketChain.Elements[j].Instr)) + if (II->getIntrinsicID() == Intrinsic::prefetch) + continue; + + // If we'd otherwise pick the first element anyway, there's nothing to do. + if (j == 0) + break; + + // If our chosen element has no offset from the base pointer, there's + // nothing to do. + if (!BucketChain.Elements[j].Offset || + BucketChain.Elements[j].Offset->isZero()) + break; + + const SCEV *Offset = BucketChain.Elements[j].Offset; + BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset); + for (auto &E : BucketChain.Elements) { + if (E.Offset) + E.Offset = cast(SE->getMinusSCEV(E.Offset, Offset)); + else + E.Offset = cast(SE->getNegativeSCEV(Offset)); + } + + std::swap(BucketChain.Elements[j], BucketChain.Elements[0]); + break; + } + return true; +} + +bool PPCLoopPreIncPrep::rewriteLoadStores( + Loop *L, Bucket &BucketChain, SmallSet &BBChanged) { + bool MadeChange = false; + const SCEVAddRecExpr *BasePtrSCEV = + cast(BucketChain.BaseSCEV); + if (!BasePtrSCEV->isAffine()) + return MadeChange; + + LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); + + assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?"); + + // The instruction corresponding to the Bucket's BaseSCEV must be the first + // in the vector of elements. + Instruction *MemI = BucketChain.Elements.begin()->Instr; + Value *BasePtr = GetPointerOperand(MemI); + assert(BasePtr && "No pointer operand"); + + Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); + Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), + BasePtr->getType()->getPointerAddressSpace()); + + const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); + if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) + return MadeChange; + + const SCEVConstant *BasePtrIncSCEV = + dyn_cast(BasePtrSCEV->getStepRecurrence(*SE)); + if (!BasePtrIncSCEV) + return MadeChange; + BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); + if (!isSafeToExpand(BasePtrStartSCEV, *SE)) + return MadeChange; + + if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV)) + return MadeChange; + + LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); + + BasicBlock *Header = L->getHeader(); + unsigned HeaderLoopPredCount = pred_size(Header); + BasicBlock *LoopPredecessor = L->getLoopPredecessor(); + + PHINode *NewPHI = + PHINode::Create(I8PtrTy, HeaderLoopPredCount, + getInstrName(MemI, PHINodeNameSuffix), + Header->getFirstNonPHI()); + + SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); + Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, + LoopPredecessor->getTerminator()); + + // Note that LoopPredecessor might occur in the predecessor list multiple + // times, and we need to add it the right number of times. + for (const auto &PI : predecessors(Header)) { + if (PI != LoopPredecessor) + continue; + + NewPHI->addIncoming(BasePtrStart, LoopPredecessor); + } + + Instruction *InsPoint = &*Header->getFirstInsertionPt(); + GetElementPtrInst *PtrInc = GetElementPtrInst::Create( + I8Ty, NewPHI, BasePtrIncSCEV->getValue(), + getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint); + PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); + for (const auto &PI : predecessors(Header)) { + if (PI == LoopPredecessor) + continue; + + NewPHI->addIncoming(PtrInc, PI); + } + + Instruction *NewBasePtr; + if (PtrInc->getType() != BasePtr->getType()) + NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), + getInstrName(PtrInc, CastNodeNameSuffix), InsPoint); + else + NewBasePtr = PtrInc; + + if (Instruction *IDel = dyn_cast(BasePtr)) + BBChanged.insert(IDel->getParent()); + BasePtr->replaceAllUsesWith(NewBasePtr); + RecursivelyDeleteTriviallyDeadInstructions(BasePtr); + + // Keep track of the replacement pointer values we've inserted so that we + // don't generate more pointer values than necessary. + SmallPtrSet NewPtrs; + NewPtrs.insert(NewBasePtr); + + for (auto I = std::next(BucketChain.Elements.begin()), + IE = BucketChain.Elements.end(); I != IE; ++I) { + Value *Ptr = GetPointerOperand(I->Instr); + assert(Ptr && "No pointer operand"); + if (NewPtrs.count(Ptr)) + continue; + + Instruction *RealNewPtr; + if (!I->Offset || I->Offset->getValue()->isZero()) { + RealNewPtr = NewBasePtr; + } else { + Instruction *PtrIP = dyn_cast(Ptr); + if (PtrIP && isa(NewBasePtr) && + cast(NewBasePtr)->getParent() == PtrIP->getParent()) + PtrIP = nullptr; + else if (PtrIP && isa(PtrIP)) + PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); + else if (!PtrIP) + PtrIP = I->Instr; + + GetElementPtrInst *NewPtr = GetElementPtrInst::Create( + I8Ty, PtrInc, I->Offset->getValue(), + getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP); + if (!PtrIP) + NewPtr->insertAfter(cast(PtrInc)); + NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); + RealNewPtr = NewPtr; + } + + if (Instruction *IDel = dyn_cast(Ptr)) + BBChanged.insert(IDel->getParent()); + + Instruction *ReplNewPtr; + if (Ptr->getType() != RealNewPtr->getType()) { + ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), + getInstrName(Ptr, CastNodeNameSuffix)); + ReplNewPtr->insertAfter(RealNewPtr); + } else + ReplNewPtr = RealNewPtr; + + Ptr->replaceAllUsesWith(ReplNewPtr); + RecursivelyDeleteTriviallyDeadInstructions(Ptr); + + NewPtrs.insert(RealNewPtr); + } + + MadeChange = true; + UpdFormChainRewritten++; + + return MadeChange; +} + +bool PPCLoopPreIncPrep::updateFormPrep(Loop *L, + SmallVector &Buckets) { + bool MadeChange = false; + if (Buckets.empty()) + return MadeChange; + SmallSet BBChanged; + for (auto &Bucket : Buckets) + // The base address of each bucket is transformed into a phi and the others + // are rewritten based on new base. + if (prepareBaseForUpdateFormChain(Bucket)) + MadeChange |= rewriteLoadStores(L, Bucket, BBChanged); + if (MadeChange) + for (auto &BB : L->blocks()) + if (BBChanged.count(BB)) + DeleteDeadPHIs(BB); + return MadeChange; +} + // In order to prepare for the pre-increment a PHI is added. // This function will check to see if that PHI already exists and will return -// true if it found an existing PHI with the same start and increment as the -// one we wanted to create. +// true if it found an existing PHI with the same start and increment as the +// one we wanted to create. bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, const SCEV *BasePtrStartSCEV, const SCEVConstant *BasePtrIncSCEV) { @@ -216,10 +522,10 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, continue; if (CurrentPHINode->getNumIncomingValues() == 2) { - if ( (CurrentPHINode->getIncomingBlock(0) == LatchBB && - CurrentPHINode->getIncomingBlock(1) == PredBB) || - (CurrentPHINode->getIncomingBlock(1) == LatchBB && - CurrentPHINode->getIncomingBlock(0) == PredBB) ) { + if ((CurrentPHINode->getIncomingBlock(0) == LatchBB && + CurrentPHINode->getIncomingBlock(1) == PredBB) || + (CurrentPHINode->getIncomingBlock(1) == LatchBB && + CurrentPHINode->getIncomingBlock(0) == PredBB)) { if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV && PHIBasePtrIncSCEV == BasePtrIncSCEV) { // The existing PHI (CurrentPHINode) has the same start and increment @@ -242,89 +548,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); - BasicBlock *Header = L->getHeader(); - - const PPCSubtarget *ST = - TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; - - unsigned HeaderLoopPredCount = pred_size(Header); - - // Collect buckets of comparable addresses used by loads and stores. - SmallVector Buckets; - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) { - for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); - J != JE; ++J) { - Value *PtrValue; - Instruction *MemI; - - if (LoadInst *LMemI = dyn_cast(J)) { - MemI = LMemI; - PtrValue = LMemI->getPointerOperand(); - } else if (StoreInst *SMemI = dyn_cast(J)) { - MemI = SMemI; - PtrValue = SMemI->getPointerOperand(); - } else if (IntrinsicInst *IMemI = dyn_cast(J)) { - if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { - MemI = IMemI; - PtrValue = IMemI->getArgOperand(0); - } else continue; - } else continue; - - unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); - if (PtrAddrSpace) - continue; - - // There are no update forms for Altivec vector load/stores. - if (ST && ST->hasAltivec() && - PtrValue->getType()->getPointerElementType()->isVectorTy()) - continue; - - if (L->isLoopInvariant(PtrValue)) - continue; - - const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); - if (const SCEVAddRecExpr *LARSCEV = dyn_cast(LSCEV)) { - if (LARSCEV->getLoop() != L) - continue; - // See getPreIndexedAddressParts, the displacement for LDU/STDU has to - // be 4's multiple (DS-form). For i64 loads/stores when the displacement - // fits in a 16-bit signed field but isn't a multiple of 4, it will be - // useless and possible to break some original well-form addressing mode - // to make this pre-inc prep for it. - if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) { - if (const SCEVConstant *StepConst = - dyn_cast(LARSCEV->getStepRecurrence(*SE))) { - const APInt &ConstInt = StepConst->getValue()->getValue(); - if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) - continue; - } - } - } else { - continue; - } - - bool FoundBucket = false; - for (auto &B : Buckets) { - const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); - if (const auto *CDiff = dyn_cast(Diff)) { - B.Elements.push_back(BucketElement(CDiff, MemI)); - FoundBucket = true; - break; - } - } - - if (!FoundBucket) { - if (Buckets.size() == MaxVars) - return MadeChange; - Buckets.push_back(Bucket(LSCEV, MemI)); - } - } - } - - if (Buckets.empty()) - return MadeChange; - BasicBlock *LoopPredecessor = L->getLoopPredecessor(); // If there is no loop predecessor, or the loop predecessor's terminator // returns a value (which might contribute to determining the loop's @@ -335,191 +558,48 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (LoopPredecessor) MadeChange = true; } - if (!LoopPredecessor) + if (!LoopPredecessor) { + LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n"); return MadeChange; + } - LLVM_DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); - - SmallSet BBChanged; - for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { - // The base address of each bucket is transformed into a phi and the others - // are rewritten as offsets of that variable. - - // We have a choice now of which instruction's memory operand we use as the - // base for the generated PHI. Always picking the first instruction in each - // bucket does not work well, specifically because that instruction might - // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, - // the choice is somewhat arbitrary, because the backend will happily - // generate direct offsets from both the pre-incremented and - // post-incremented pointer values. Thus, we'll pick the first non-prefetch - // instruction in each bucket, and adjust the recurrence and other offsets - // accordingly. - for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) { - if (auto *II = dyn_cast(Buckets[i].Elements[j].Instr)) - if (II->getIntrinsicID() == Intrinsic::prefetch) - continue; - - // If we'd otherwise pick the first element anyway, there's nothing to do. - if (j == 0) - break; - - // If our chosen element has no offset from the base pointer, there's - // nothing to do. - if (!Buckets[i].Elements[j].Offset || - Buckets[i].Elements[j].Offset->isZero()) - break; - - const SCEV *Offset = Buckets[i].Elements[j].Offset; - Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset); - for (auto &E : Buckets[i].Elements) { - if (E.Offset) - E.Offset = cast(SE->getMinusSCEV(E.Offset, Offset)); - else - E.Offset = cast(SE->getNegativeSCEV(Offset)); - } - - std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]); - break; - } - - const SCEVAddRecExpr *BasePtrSCEV = - cast(Buckets[i].BaseSCEV); - if (!BasePtrSCEV->isAffine()) - continue; - - LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); - assert(BasePtrSCEV->getLoop() == L && - "AddRec for the wrong loop?"); - - // The instruction corresponding to the Bucket's BaseSCEV must be the first - // in the vector of elements. - Instruction *MemI = Buckets[i].Elements.begin()->Instr; - Value *BasePtr = GetPointerOperand(MemI); - assert(BasePtr && "No pointer operand"); - - Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); - Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), - BasePtr->getType()->getPointerAddressSpace()); - - const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); - if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) - continue; - - const SCEVConstant *BasePtrIncSCEV = - dyn_cast(BasePtrSCEV->getStepRecurrence(*SE)); - if (!BasePtrIncSCEV) - continue; - BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); - if (!isSafeToExpand(BasePtrStartSCEV, *SE)) - continue; - - LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); - - if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV)) - continue; - - PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, - MemI->hasName() ? MemI->getName() + ".phi" : "", - Header->getFirstNonPHI()); - - SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); - Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, - LoopPredecessor->getTerminator()); - - // Note that LoopPredecessor might occur in the predecessor list multiple - // times, and we need to add it the right number of times. - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - if (*PI != LoopPredecessor) - continue; - - NewPHI->addIncoming(BasePtrStart, LoopPredecessor); - } - - Instruction *InsPoint = &*Header->getFirstInsertionPt(); - GetElementPtrInst *PtrInc = GetElementPtrInst::Create( - I8Ty, NewPHI, BasePtrIncSCEV->getValue(), - MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); - PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - if (*PI == LoopPredecessor) - continue; - - NewPHI->addIncoming(PtrInc, *PI); - } - - Instruction *NewBasePtr; - if (PtrInc->getType() != BasePtr->getType()) - NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), - PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint); - else - NewBasePtr = PtrInc; - - if (Instruction *IDel = dyn_cast(BasePtr)) - BBChanged.insert(IDel->getParent()); - BasePtr->replaceAllUsesWith(NewBasePtr); - RecursivelyDeleteTriviallyDeadInstructions(BasePtr); - - // Keep track of the replacement pointer values we've inserted so that we - // don't generate more pointer values than necessary. - SmallPtrSet NewPtrs; - NewPtrs.insert( NewBasePtr); - - for (auto I = std::next(Buckets[i].Elements.begin()), - IE = Buckets[i].Elements.end(); I != IE; ++I) { - Value *Ptr = GetPointerOperand(I->Instr); - assert(Ptr && "No pointer operand"); - if (NewPtrs.count(Ptr)) - continue; - - Instruction *RealNewPtr; - if (!I->Offset || I->Offset->getValue()->isZero()) { - RealNewPtr = NewBasePtr; - } else { - Instruction *PtrIP = dyn_cast(Ptr); - if (PtrIP && isa(NewBasePtr) && - cast(NewBasePtr)->getParent() == PtrIP->getParent()) - PtrIP = nullptr; - else if (isa(PtrIP)) - PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); - else if (!PtrIP) - PtrIP = I->Instr; - - GetElementPtrInst *NewPtr = GetElementPtrInst::Create( - I8Ty, PtrInc, I->Offset->getValue(), - I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP); - if (!PtrIP) - NewPtr->insertAfter(cast(PtrInc)); - NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); - RealNewPtr = NewPtr; + // Check if a load/store has update form. This lambda is used by function + // collectCandidates which can collect candidates for types defined by lambda. + auto isUpdateFormCandidate = [&] (const Instruction *I, + const Value *PtrValue) { + assert((PtrValue && I) && "Invalid parameter!"); + // There are no update forms for Altivec vector load/stores. + if (ST && ST->hasAltivec() && + PtrValue->getType()->getPointerElementType()->isVectorTy()) + return false; + // See getPreIndexedAddressParts, the displacement for LDU/STDU has to + // be 4's multiple (DS-form). For i64 loads/stores when the displacement + // fits in a 16-bit signed field but isn't a multiple of 4, it will be + // useless and possible to break some original well-form addressing mode + // to make this pre-inc prep for it. + if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) { + const SCEV *LSCEV = SE->getSCEVAtScope(const_cast(PtrValue), L); + const SCEVAddRecExpr *LARSCEV = dyn_cast(LSCEV); + if (!LARSCEV || LARSCEV->getLoop() != L) + return false; + if (const SCEVConstant *StepConst = + dyn_cast(LARSCEV->getStepRecurrence(*SE))) { + const APInt &ConstInt = StepConst->getValue()->getValue(); + if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) + return false; } - - if (Instruction *IDel = dyn_cast(Ptr)) - BBChanged.insert(IDel->getParent()); - - Instruction *ReplNewPtr; - if (Ptr->getType() != RealNewPtr->getType()) { - ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), - Ptr->hasName() ? Ptr->getName() + ".cast" : ""); - ReplNewPtr->insertAfter(RealNewPtr); - } else - ReplNewPtr = RealNewPtr; - - Ptr->replaceAllUsesWith(ReplNewPtr); - RecursivelyDeleteTriviallyDeadInstructions(Ptr); - - NewPtrs.insert(RealNewPtr); } + return true; + }; - MadeChange = true; - } + // Collect buckets of comparable addresses used by loads, stores and prefetch + // intrinsic for update form. + SmallVector UpdateFormBuckets = + collectCandidates(L, isUpdateFormCandidate, MaxVars); - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) { - if (BBChanged.count(*I)) - DeleteDeadPHIs(*I); - } + // Prepare for update form. + if (!UpdateFormBuckets.empty()) + MadeChange |= updateFormPrep(L, UpdateFormBuckets); return MadeChange; } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 027e6bd1ba06..b6496f189a3a 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -79,7 +79,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, - AsmPrinter &Printer, bool isDarwin) { + AsmPrinter &Printer, bool IsDarwin) { MCContext &Ctx = Printer.OutContext; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; @@ -137,10 +137,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Add ha16() / lo16() markers if required. switch (access) { case PPCII::MO_LO: - Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createLo(Expr, IsDarwin, Ctx); break; case PPCII::MO_HA: - Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createHa(Expr, IsDarwin, Ctx); break; } @@ -148,20 +148,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, } void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin) { + AsmPrinter &AP, bool IsDarwin) { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MCOperand MCOp; if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP, - isDarwin)) + IsDarwin)) OutMI.addOperand(MCOp); } } bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin) { + bool IsDarwin) { switch (MO.getType()) { default: llvm_unreachable("unknown operand type"); @@ -181,17 +181,20 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, return true; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: - OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); + OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, IsDarwin); return true; case MachineOperand::MO_JumpTableIndex: - OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); + OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, IsDarwin); return true; case MachineOperand::MO_ConstantPoolIndex: - OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); + OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, IsDarwin); return true; case MachineOperand::MO_BlockAddress: OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP, - isDarwin); + IsDarwin); + return true; + case MachineOperand::MO_MCSymbol: + OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, IsDarwin); return true; case MachineOperand::MO_RegisterMask: return false; diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index 446246358e96..ac8ac060f460 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -148,8 +148,8 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op, if (!Op->isReg()) return nullptr; - unsigned Reg = Op->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = Op->getReg(); + if (!Register::isVirtualRegister(Reg)) return nullptr; return MRI->getVRegDef(Reg); @@ -344,8 +344,7 @@ bool PPCMIPeephole::simplifyCode(void) { unsigned TrueReg2 = TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI); - if (TrueReg1 == TrueReg2 - && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { + if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) { MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0; @@ -358,7 +357,7 @@ bool PPCMIPeephole::simplifyCode(void) { return false; unsigned DefReg = TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); - if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + if (Register::isVirtualRegister(DefReg)) { MachineInstr *LoadMI = MRI->getVRegDef(DefReg); if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) return true; @@ -444,7 +443,7 @@ bool PPCMIPeephole::simplifyCode(void) { unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; unsigned TrueReg = TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) + if (!Register::isVirtualRegister(TrueReg)) break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); if (!DefMI) @@ -453,8 +452,8 @@ bool PPCMIPeephole::simplifyCode(void) { auto isConvertOfSplat = [=]() -> bool { if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS) return false; - unsigned ConvReg = DefMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(ConvReg)) + Register ConvReg = DefMI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(ConvReg)) return false; MachineInstr *Splt = MRI->getVRegDef(ConvReg); return Splt && (Splt->getOpcode() == PPC::LXVWSX || @@ -481,9 +480,9 @@ bool PPCMIPeephole::simplifyCode(void) { // Splat fed by a shift. Usually when we align value to splat into // vector element zero. if (DefOpcode == PPC::XXSLDWI) { - unsigned ShiftRes = DefMI->getOperand(0).getReg(); - unsigned ShiftOp1 = DefMI->getOperand(1).getReg(); - unsigned ShiftOp2 = DefMI->getOperand(2).getReg(); + Register ShiftRes = DefMI->getOperand(0).getReg(); + Register ShiftOp1 = DefMI->getOperand(1).getReg(); + Register ShiftOp2 = DefMI->getOperand(2).getReg(); unsigned ShiftImm = DefMI->getOperand(3).getImm(); unsigned SplatImm = MI.getOperand(2).getImm(); if (ShiftOp1 == ShiftOp2) { @@ -507,7 +506,7 @@ bool PPCMIPeephole::simplifyCode(void) { // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. unsigned TrueReg = TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) + if (!Register::isVirtualRegister(TrueReg)) break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); @@ -518,8 +517,8 @@ bool PPCMIPeephole::simplifyCode(void) { TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); unsigned DefsReg2 = TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) || - !TargetRegisterInfo::isVirtualRegister(DefsReg2)) + if (!Register::isVirtualRegister(DefsReg1) || + !Register::isVirtualRegister(DefsReg2)) break; MachineInstr *P1 = MRI->getVRegDef(DefsReg1); MachineInstr *P2 = MRI->getVRegDef(DefsReg2); @@ -533,8 +532,8 @@ bool PPCMIPeephole::simplifyCode(void) { if (RoundInstr->getOpcode() == PPC::FRSP && MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) { Simplified = true; - unsigned ConvReg1 = RoundInstr->getOperand(1).getReg(); - unsigned FRSPDefines = RoundInstr->getOperand(0).getReg(); + Register ConvReg1 = RoundInstr->getOperand(1).getReg(); + Register FRSPDefines = RoundInstr->getOperand(0).getReg(); MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines)); for (int i = 0, e = Use.getNumOperands(); i < e; ++i) if (Use.getOperand(i).isReg() && @@ -566,8 +565,8 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::EXTSH8: case PPC::EXTSH8_32_64: { if (!EnableSExtElimination) break; - unsigned NarrowReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + Register NarrowReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(NarrowReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); @@ -610,8 +609,8 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::EXTSW_32: case PPC::EXTSW_32_64: { if (!EnableSExtElimination) break; - unsigned NarrowReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + Register NarrowReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(NarrowReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); @@ -652,8 +651,8 @@ bool PPCMIPeephole::simplifyCode(void) { // We can eliminate EXTSW if the input is known to be already // sign-extended. LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); - unsigned TmpReg = - MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + Register TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), TmpReg); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG), @@ -679,8 +678,8 @@ bool PPCMIPeephole::simplifyCode(void) { if (MI.getOperand(2).getImm() != 0) break; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -695,8 +694,8 @@ bool PPCMIPeephole::simplifyCode(void) { SrcMI = SubRegMI; if (SubRegMI->getOpcode() == PPC::COPY) { - unsigned CopyReg = SubRegMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(CopyReg)) + Register CopyReg = SubRegMI->getOperand(1).getReg(); + if (Register::isVirtualRegister(CopyReg)) SrcMI = MRI->getVRegDef(CopyReg); } @@ -757,7 +756,7 @@ bool PPCMIPeephole::simplifyCode(void) { break; // We don't have an ADD fed by LI's that can be transformed // Now we know that Op1 is the PHI node and Op2 is the dominator - unsigned DominatorReg = Op2.getReg(); + Register DominatorReg = Op2.getReg(); const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8 ? &PPC::G8RC_and_G8RC_NOX0RegClass @@ -927,7 +926,7 @@ static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1, } else if (Inst->isFullCopy()) NextReg = Inst->getOperand(1).getReg(); - if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg)) + if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg)) break; SrcReg = NextReg; } @@ -949,9 +948,8 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB, (*BII).getOpcode() == PPC::BCC && (*BII).getOperand(1).isReg()) { // We optimize only if the condition code is used only by one BCC. - unsigned CndReg = (*BII).getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(CndReg) || - !MRI->hasOneNonDBGUse(CndReg)) + Register CndReg = (*BII).getOperand(1).getReg(); + if (!Register::isVirtualRegister(CndReg) || !MRI->hasOneNonDBGUse(CndReg)) return false; MachineInstr *CMPI = MRI->getVRegDef(CndReg); @@ -961,7 +959,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB, // We skip this BB if a physical register is used in comparison. for (MachineOperand &MO : CMPI->operands()) - if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && !Register::isVirtualRegister(MO.getReg())) return false; return true; @@ -1271,8 +1269,8 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { // We touch up the compare instruction in MBB2 and move it to // a previous BB to handle partially redundant case. if (SwapOperands) { - unsigned Op1 = CMPI2->getOperand(1).getReg(); - unsigned Op2 = CMPI2->getOperand(2).getReg(); + Register Op1 = CMPI2->getOperand(1).getReg(); + Register Op2 = CMPI2->getOperand(2).getReg(); CMPI2->getOperand(1).setReg(Op2); CMPI2->getOperand(2).setReg(Op1); } @@ -1295,7 +1293,7 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2)); DebugLoc DL = CMPI2->getDebugLoc(); - unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass); + Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass); BuildMI(MBB2, MBB2.begin(), DL, TII->get(PPC::PHI), NewVReg) .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1) @@ -1334,8 +1332,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) { if (MI.getOpcode() != PPC::RLDICR) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -1414,8 +1412,8 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, if (SHMI + MEMI != 63) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -1428,6 +1426,12 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, if (!MRI->hasOneNonDBGUse(SrcReg)) return false; + assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands"); + assert(SrcMI->getOperand(1).isReg() && + "EXTSW's second operand should be a register"); + if (!Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) + return false; + LLVM_DEBUG(dbgs() << "Combining pair: "); LLVM_DEBUG(SrcMI->dump()); LLVM_DEBUG(MI.dump()); diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index d83c92276800..b1c0433641dd 100644 --- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -57,6 +57,109 @@ namespace { MachineFunctionProperties::Property::NoVRegs); } + // This function removes any redundant load immediates. It has two level + // loops - The outer loop finds the load immediates BBI that could be used + // to replace following redundancy. The inner loop scans instructions that + // after BBI to find redundancy and update kill/dead flags accordingly. If + // AfterBBI is the same as BBI, it is redundant, otherwise any instructions + // that modify the def register of BBI would break the scanning. + // DeadOrKillToUnset is a pointer to the previous operand that had the + // kill/dead flag set. It keeps track of the def register of BBI, the use + // registers of AfterBBIs and the def registers of AfterBBIs. + bool removeRedundantLIs(MachineBasicBlock &MBB, + const TargetRegisterInfo *TRI) { + LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; + MBB.dump(); dbgs() << "\n"); + + DenseSet InstrsToErase; + for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { + // Skip load immediate that is marked to be erased later because it + // cannot be used to replace any other instructions. + if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) + continue; + // Skip non-load immediate. + unsigned Opc = BBI->getOpcode(); + if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && + Opc != PPC::LIS8) + continue; + // Skip load immediate, where the operand is a relocation (e.g., $r3 = + // LI target-flags(ppc-lo) %const.0). + if (!BBI->getOperand(1).isImm()) + continue; + assert(BBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + + LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); + + Register Reg = BBI->getOperand(0).getReg(); + int64_t Imm = BBI->getOperand(1).getImm(); + MachineOperand *DeadOrKillToUnset = nullptr; + if (BBI->getOperand(0).isDead()) { + DeadOrKillToUnset = &BBI->getOperand(0); + LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset + << " from load immediate " << *BBI + << " is a unsetting candidate\n"); + } + // This loop scans instructions after BBI to see if there is any + // redundant load immediate. + for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end(); + ++AfterBBI) { + // Track the operand that kill Reg. We would unset the kill flag of + // the operand if there is a following redundant load immediate. + int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); + if (KillIdx != -1) { + assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); + DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); + LLVM_DEBUG(dbgs() + << " Kill flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + } + + if (!AfterBBI->modifiesRegister(Reg, TRI)) + continue; + // Finish scanning because Reg is overwritten by a non-load + // instruction. + if (AfterBBI->getOpcode() != Opc) + break; + assert(AfterBBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + // Finish scanning because Reg is overwritten by a relocation or a + // different value. + if (!AfterBBI->getOperand(1).isImm() || + AfterBBI->getOperand(1).getImm() != Imm) + break; + + // It loads same immediate value to the same Reg, which is redundant. + // We would unset kill flag in previous Reg usage to extend live range + // of Reg first, then remove the redundancy. + if (DeadOrKillToUnset) { + LLVM_DEBUG(dbgs() + << " Unset dead/kill flag of " << *DeadOrKillToUnset + << " from " << *DeadOrKillToUnset->getParent()); + if (DeadOrKillToUnset->isDef()) + DeadOrKillToUnset->setIsDead(false); + else + DeadOrKillToUnset->setIsKill(false); + } + DeadOrKillToUnset = + AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); + if (DeadOrKillToUnset) + LLVM_DEBUG(dbgs() + << " Dead flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + InstrsToErase.insert(&*AfterBBI); + LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; + AfterBBI->dump()); + } + } + + for (MachineInstr *MI : InstrsToErase) { + MI->eraseFromParent(); + } + NumRemovedInPreEmit += InstrsToErase.size(); + return !InstrsToErase.empty(); + } + bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) return false; @@ -65,6 +168,7 @@ namespace { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); SmallVector InstrsToErase; for (MachineBasicBlock &MBB : MF) { + Changed |= removeRedundantLIs(MBB, TRI); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); // Detect self copies - these can result from running AADB. @@ -111,7 +215,7 @@ namespace { if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) continue; MachineInstr *CRSetMI = nullptr; - unsigned CRBit = Br->getOperand(0).getReg(); + Register CRBit = Br->getOperand(0).getReg(); unsigned CRReg = getCRFromCRBit(CRBit); bool SeenUse = false; MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp index 3a83cc27439c..6e9042643820 100644 --- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp +++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp @@ -79,8 +79,8 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { for (auto SI = Splats.begin(); SI != Splats.end();) { MachineInstr *SMI = *SI; - unsigned SplatReg = SMI->getOperand(0).getReg(); - unsigned SrcReg = SMI->getOperand(1).getReg(); + Register SplatReg = SMI->getOperand(0).getReg(); + Register SrcReg = SMI->getOperand(1).getReg(); if (MI->modifiesRegister(SrcReg, TRI)) { switch (MI->getOpcode()) { @@ -102,7 +102,7 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { // the QPX splat source register. unsigned SubRegIndex = TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg()); - unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); + Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); // Substitute both the explicit defined register, and also the // implicit def of the containing QPX register. diff --git a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp index 8eaa6dfe2bf7..3b71ed219c17 100644 --- a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -381,10 +381,10 @@ private: const MachineBranchProbabilityInfo *MBPI; // A vector to contain all the CR logical operations - std::vector AllCRLogicalOps; + SmallVector AllCRLogicalOps; void initialize(MachineFunction &MFParm); void collectCRLogicals(); - bool handleCROp(CRLogicalOpInfo &CRI); + bool handleCROp(unsigned Idx); bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI); static bool isCRLogical(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); @@ -398,7 +398,7 @@ private: // Not using a range-based for loop here as the vector may grow while being // operated on. for (unsigned i = 0; i < AllCRLogicalOps.size(); i++) - Changed |= handleCROp(AllCRLogicalOps[i]); + Changed |= handleCROp(i); return Changed; } @@ -535,15 +535,15 @@ MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg, unsigned &Subreg, MachineInstr *&CpDef) { Subreg = -1; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; MachineInstr *Copy = MRI->getVRegDef(Reg); CpDef = Copy; if (!Copy->isCopy()) return Copy; - unsigned CopySrc = Copy->getOperand(1).getReg(); + Register CopySrc = Copy->getOperand(1).getReg(); Subreg = Copy->getOperand(1).getSubReg(); - if (!TargetRegisterInfo::isVirtualRegister(CopySrc)) { + if (!Register::isVirtualRegister(CopySrc)) { const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); // Set the Subreg if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ) @@ -578,10 +578,11 @@ void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) { /// a unary CR logical might be used to change the condition code on a /// comparison feeding it. A nullary CR logical might simply be removable /// if the user of the bit it [un]sets can be transformed. -bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo &CRI) { +bool PPCReduceCRLogicals::handleCROp(unsigned Idx) { // We can definitely split a block on the inputs to a binary CR operation // whose defs and (single) use are within the same block. bool Changed = false; + CRLogicalOpInfo CRI = AllCRLogicalOps[Idx]; if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR && CRI.DefsSingleUse) { Changed = splitBlockOnBinaryCROp(CRI); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 12554ea8d079..9ec26a19bdaa 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -325,13 +325,13 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool IsPositionIndependent = TM.isPositionIndependent(); if (hasBasePointer(MF)) { - if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) + if (Subtarget.is32BitELFABI() && IsPositionIndependent) markSuperRegs(Reserved, PPC::R29); else markSuperRegs(Reserved, PPC::R30); } - if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) + if (Subtarget.is32BitELFABI() && IsPositionIndependent) markSuperRegs(Reserved, PPC::R30); // Reserve Altivec registers when Altivec is unavailable. @@ -391,7 +391,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); const PPCSubtarget &Subtarget = MF.getSubtarget(); const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!TM.isPPC64()) @@ -425,7 +425,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case PPC::G8RC_NOX0RegClassID: case PPC::GPRC_NOR0RegClassID: case PPC::SPERCRegClassID: - case PPC::SPE4RCRegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -527,7 +526,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { if (LP64) @@ -549,7 +548,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { } bool KillNegSizeReg = MI.getOperand(1).isKill(); - unsigned NegSizeReg = MI.getOperand(1).getReg(); + Register NegSizeReg = MI.getOperand(1).getReg(); // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. @@ -655,8 +654,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg. @@ -700,8 +699,8 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -744,8 +743,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register SrcReg = MI.getOperand(0).getReg(); // Search up the BB to find the definition of the CR bit. MachineBasicBlock::reverse_iterator Ins; @@ -823,8 +822,8 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CRBIT does not define its destination"); @@ -833,7 +832,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg); - unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO) .addReg(getCRFromCRBit(DestReg)); @@ -870,8 +869,8 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(GPRC); + Register SrcReg = MI.getOperand(0).getReg(); BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); @@ -896,8 +895,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_VRSAVE does not define its destination"); @@ -1128,7 +1127,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OperandBase = OffsetOperandNo; } - unsigned StackReg = MI.getOperand(FIOperandNum).getReg(); + Register StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index af0dff6347a6..4719e947b172 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -253,15 +253,14 @@ def RM: PPCReg<"**ROUNDING MODE**">; /// Register classes // Allocate volatiles first // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 -def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), - (sequence "R%u", 30, 13), - R31, R0, R1, FP, BP)> { +def GPRC : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "R%u", 2, 12), + (sequence "R%u", 30, 13), + R31, R0, R1, FP, BP)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. let AltOrders = [(add (sub GPRC, R2), R2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget().is64BitELFABI(); }]; } @@ -272,21 +271,19 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), // put it at the end of the list. let AltOrders = [(add (sub G8RC, X2), X2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget().is64BitELFABI(); }]; } // For some instructions r0 is special (representing the value 0 instead of // the value in the r0 register), and we use these register subclasses to // prevent r0 from being allocated for use by those instructions. -def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> { +def GPRC_NOR0 : RegisterClass<"PPC", [i32,f32], 32, (add (sub GPRC, R0), ZERO)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. let AltOrders = [(add (sub GPRC_NOR0, R2), R2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget().is64BitELFABI(); }]; } @@ -295,8 +292,7 @@ def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> { // put it at the end of the list. let AltOrders = [(add (sub G8RC_NOX0, X2), X2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget().is64BitELFABI(); }]; } @@ -304,8 +300,6 @@ def SPERC : RegisterClass<"PPC", [f64], 64, (add (sequence "S%u", 2, 12), (sequence "S%u", 30, 13), S31, S0, S1)>; -def SPE4RC : RegisterClass<"PPC", [f32], 32, (add GPRC)>; - // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the // allocated non-volatile register with the lowest register number, as FP diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 6aa7528634d3..10568ed4b655 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -60,7 +60,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, InstrInfo(*this), TLInfo(TM, *this) {} void PPCSubtarget::initializeEnvironment() { - StackAlignment = 16; + StackAlignment = Align(16); DarwinDirective = PPC::DIR_NONE; HasMFOCRF = false; Has64BitSupport = false; @@ -145,7 +145,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isDarwin()) HasLazyResolverStubs = true; - if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() || + if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) || + TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() || TargetTriple.isMusl()) SecurePlt = true; @@ -228,18 +229,13 @@ bool PPCSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; } -unsigned char -PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const { - // Note that currently we don't generate non-pic references. - // If a caller wants that, this will have to be updated. - +bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { // Large code model always uses the TOC even for local symbols. if (TM.getCodeModel() == CodeModel::Large) - return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; - + return true; if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) - return PPCII::MO_PIC_FLAG; - return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; + return false; + return true; } bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 55fec1cb6d99..d96c2893aee9 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -78,7 +78,7 @@ protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned StackAlignment; + Align StackAlignment; /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; @@ -166,7 +166,7 @@ public: /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. - unsigned getStackAlignment() const { return StackAlignment; } + Align getStackAlignment() const { return StackAlignment; } /// getDarwinDirective - Returns the -m directive specified for the cpu. /// @@ -210,7 +210,11 @@ public: /// instructions, regardless of whether we are in 32-bit or 64-bit mode. bool has64BitSupport() const { return Has64BitSupport; } // useSoftFloat - Return true if soft-float option is turned on. - bool useSoftFloat() const { return !HasHardFloat; } + bool useSoftFloat() const { + if (isAIXABI() && !HasHardFloat) + report_fatal_error("soft-float is not yet supported on AIX."); + return !HasHardFloat; + } /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit /// registers in 32-bit mode when possible. This can only true if @@ -277,11 +281,11 @@ public: bool hasDirectMove() const { return HasDirectMove; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } - unsigned getPlatformStackAlignment() const { + Align getPlatformStackAlignment() const { if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned()) - return 32; + return Align(32); - return 16; + return Align(16); } // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no @@ -316,6 +320,9 @@ public: bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); } bool isELFv2ABI() const; + bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); } + bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); } + /// Originally, this function return hasISEL(). Now we always enable it, /// but may expand the ISEL instruction later. bool enableEarlyIfConversion() const override { return true; } @@ -337,9 +344,8 @@ public: bool enableSubRegLiveness() const override; - /// classifyGlobalReference - Classify a global variable reference for the - /// current subtarget accourding to how we should reference it. - unsigned char classifyGlobalReference(const GlobalValue *GV) const; + /// True if the GV will be accessed via an indirect symbol. + bool isGVIndirectSymbol(const GlobalValue *GV) const; bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; } }; diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index fb826c4a32f1..8f313d9d01c4 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -74,8 +74,8 @@ protected: LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI); - unsigned OutReg = MI.getOperand(0).getReg(); - unsigned InReg = MI.getOperand(1).getReg(); + Register OutReg = MI.getOperand(0).getReg(); + Register InReg = MI.getOperand(1).getReg(); DebugLoc DL = MI.getDebugLoc(); unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3; unsigned Opc1, Opc2; diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 3eb0569fb955..895ae6744421 100644 --- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -95,7 +95,8 @@ namespace { protected: bool hasTOCLoReloc(const MachineInstr &MI) { if (MI.getOpcode() == PPC::LDtocL || - MI.getOpcode() == PPC::ADDItocL) + MI.getOpcode() == PPC::ADDItocL || + MI.getOpcode() == PPC::LWZtocL) return true; for (const MachineOperand &MO : MI.operands()) { @@ -109,11 +110,15 @@ protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; + const bool isPPC64 = + MBB.getParent()->getSubtarget().isPPC64(); + const unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; + for (auto &MI : MBB) { if (!hasTOCLoReloc(MI)) continue; - MI.addOperand(MachineOperand::CreateReg(PPC::X2, + MI.addOperand(MachineOperand::CreateReg(TOCReg, false /*IsDef*/, true /*IsImp*/)); Changed = true; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ce00f848dd72..abefee8b339d 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -93,7 +93,7 @@ EnableMachineCombinerPass("ppc-machine-combiner", static cl::opt ReduceCRLogical("ppc-reduce-cr-logicals", cl::desc("Expand eligible cr-logical binary ops to branches"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(getThePPC32Target()); @@ -185,12 +185,13 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, } static std::unique_ptr createTLOF(const Triple &TT) { - // If it isn't a Mach-O file then it's going to be a linux ELF - // object file. if (TT.isOSDarwin()) - return llvm::make_unique(); + return std::make_unique(); + + if (TT.isOSAIX()) + return std::make_unique(); - return llvm::make_unique(); + return std::make_unique(); } static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, @@ -248,10 +249,19 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, report_fatal_error("Target does not support the kernel CodeModel", false); return *CM; } - if (!TT.isOSDarwin() && !JIT && - (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)) - return CodeModel::Medium; - return CodeModel::Small; + + if (JIT) + return CodeModel::Small; + if (TT.isOSAIX()) + return CodeModel::Small; + + assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based."); + + if (TT.isArch32Bit()) + return CodeModel::Small; + + assert(TT.isArch64Bit() && "Unsupported PPC architecture."); + return CodeModel::Medium; } @@ -259,8 +269,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { const PPCSubtarget &ST = C->MF->getSubtarget(); ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ? - llvm::make_unique(C) : - llvm::make_unique(C)); + std::make_unique(C) : + std::make_unique(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; @@ -271,8 +281,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( const PPCSubtarget &ST = C->MF->getSubtarget(); ScheduleDAGMI *DAG = new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ? - llvm::make_unique(C) : - llvm::make_unique(C), true); + std::make_unique(C) : + std::make_unique(C), true); // add DAG Mutations here. return DAG; } @@ -328,7 +338,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique( + I = std::make_unique( TargetTriple, CPU, // FIXME: It would be good to have the subtarget additions here // not necessary. Anything that turns them on/off (overrides) ends diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index ff3dfbfaca05..f51300c656aa 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -594,10 +594,37 @@ bool PPCTTIImpl::enableInterleavedAccessVectorization() { return true; } -unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { - if (Vector && !ST->hasAltivec() && !ST->hasQPX()) - return 0; - return ST->hasVSX() ? 64 : 32; +unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + assert(ClassID == GPRRC || ClassID == FPRRC || + ClassID == VRRC || ClassID == VSXRC); + if (ST->hasVSX()) { + assert(ClassID == GPRRC || ClassID == VSXRC); + return ClassID == GPRRC ? 32 : 64; + } + assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC); + return 32; +} + +unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const { + if (Vector) + return ST->hasVSX() ? VSXRC : VRRC; + else if (Ty && Ty->getScalarType()->isFloatTy()) + return ST->hasVSX() ? VSXRC : FPRRC; + else + return GPRRC; +} + +const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const { + + switch (ClassID) { + default: + llvm_unreachable("unknown register class"); + return "PPC::unknown register class"; + case GPRRC: return "PPC::GPRRC"; + case FPRRC: return "PPC::FPRRC"; + case VRRC: return "PPC::VRRC"; + case VSXRC: return "PPC::VSXRC"; + } } unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { @@ -613,7 +640,7 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { } -unsigned PPCTTIImpl::getCacheLineSize() { +unsigned PPCTTIImpl::getCacheLineSize() const { // Check first if the user specified a custom line size. if (CacheLineSize.getNumOccurrences() > 0) return CacheLineSize; @@ -628,7 +655,7 @@ unsigned PPCTTIImpl::getCacheLineSize() { return 64; } -unsigned PPCTTIImpl::getPrefetchDistance() { +unsigned PPCTTIImpl::getPrefetchDistance() const { // This seems like a reasonable default for the BG/Q (this pass is enabled, by // default, only on the BG/Q). return 300; @@ -752,6 +779,35 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return 0; return Cost; + + } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) { + if (ST->hasP9Altivec()) { + if (ISD == ISD::INSERT_VECTOR_ELT) + // A move-to VSR and a permute/insert. Assume vector operation cost + // for both (cost will be 2x on P9). + return vectorCostAdjustment(2, Opcode, Val, nullptr); + + // It's an extract. Maybe we can do a cheap move-from VSR. + unsigned EltSize = Val->getScalarSizeInBits(); + if (EltSize == 64) { + unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0; + if (Index == MfvsrdIndex) + return 1; + } else if (EltSize == 32) { + unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1; + if (Index == MfvsrwzIndex) + return 1; + } + + // We need a vector extract (or mfvsrld). Assume vector operation cost. + // The cost of the load constant for a vector extract is disregarded + // (invariant, easily schedulable). + return vectorCostAdjustment(1, Opcode, Val, nullptr); + + } else if (ST->hasDirectMove()) + // Assume permute has standard cost. + // Assume move-to/move-from VSR have 2x standard cost. + return 3; } // Estimated cost of a load-hit-store delay. This was obtained diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 5d76ee418b69..83a70364bf68 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -72,10 +72,16 @@ public: TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; bool enableInterleavedAccessVectorization(); - unsigned getNumberOfRegisters(bool Vector); + + enum PPCRegisterClass { + GPRRC, FPRRC, VRRC, VSXRC + }; + unsigned getNumberOfRegisters(unsigned ClassID) const; + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; + const char* getRegisterClassName(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; - unsigned getCacheLineSize(); - unsigned getPrefetchDistance(); + unsigned getCacheLineSize() const override; + unsigned getPrefetchDistance() const override; unsigned getMaxInterleaveFactor(unsigned VF); int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int getArithmeticInstrCost( diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index 719ed7b63878..3463bbbdc5f0 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -50,7 +50,7 @@ namespace { bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, MachineRegisterInfo &MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { return RC->hasSubClassEq(MRI.getRegClass(Reg)); } else if (RC->contains(Reg)) { return true; @@ -102,7 +102,7 @@ protected: IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); - unsigned NewVReg = MRI.createVirtualRegister(SrcRC); + Register NewVReg = MRI.createVirtualRegister(SrcRC); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) .addImm(1) // add 1, not 0, because there is no implicit clearing @@ -124,7 +124,7 @@ protected: "Unknown destination for a VSX copy"); // Copy the VSX value into a new VSX register of the correct subclass. - unsigned NewVReg = MRI.createVirtualRegister(DstRC); + Register NewVReg = MRI.createVirtualRegister(DstRC); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg) .add(SrcMO); diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index ce78239df0a8..5e150be544ed 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -126,8 +126,8 @@ protected: if (!AddendMI->isFullCopy()) continue; - unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { + Register AddendSrcReg = AddendMI->getOperand(1).getReg(); + if (Register::isVirtualRegister(AddendSrcReg)) { if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != MRI.getRegClass(AddendSrcReg)) continue; @@ -182,12 +182,12 @@ protected: // %5 = A-form-op %5, %5, %11; // where %5 and %11 are both kills. This case would be skipped // otherwise. - unsigned OldFMAReg = MI.getOperand(0).getReg(); + Register OldFMAReg = MI.getOperand(0).getReg(); // Find one of the product operands that is killed by this instruction. unsigned KilledProdOp = 0, OtherProdOp = 0; - unsigned Reg2 = MI.getOperand(2).getReg(); - unsigned Reg3 = MI.getOperand(3).getReg(); + Register Reg2 = MI.getOperand(2).getReg(); + Register Reg3 = MI.getOperand(3).getReg(); if (LIS->getInterval(Reg2).Query(FMAIdx).isKill() && Reg2 != OldFMAReg) { KilledProdOp = 2; @@ -208,14 +208,14 @@ protected: // legality checks above, the live range for the addend source register // could be extended), but it seems likely that such a trivial copy can // be coalesced away later, and thus is not worth the effort. - if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && + if (Register::isVirtualRegister(AddendSrcReg) && !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) continue; // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. - unsigned KilledProdReg = MI.getOperand(KilledProdOp).getReg(); - unsigned OtherProdReg = MI.getOperand(OtherProdOp).getReg(); + Register KilledProdReg = MI.getOperand(KilledProdOp).getReg(); + Register OtherProdReg = MI.getOperand(OtherProdOp).getReg(); unsigned AddSubReg = AddendMI->getOperand(1).getSubReg(); unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg(); @@ -314,7 +314,7 @@ protected: // Extend the live interval of the addend source (it might end at the // copy to be removed, or somewhere in between there and here). This // is necessary only if it is a physical register. - if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) + if (!Register::isVirtualRegister(AddendSrcReg)) for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 44175af7f9b6..c3729da0b07b 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -158,7 +158,7 @@ private: // Return true iff the given register is in the given class. bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return RC->hasSubClassEq(MRI->getRegClass(Reg)); return RC->contains(Reg); } @@ -253,7 +253,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (isAnyVecReg(Reg, Partial)) { RelevantInstr = true; break; @@ -566,7 +566,7 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, CopySrcReg = MI->getOperand(2).getReg(); } - if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) { + if (!Register::isVirtualRegister(CopySrcReg)) { if (!isScalarVecReg(CopySrcReg)) SwapVector[VecIdx].MentionsPhysVR = 1; return CopySrcReg; @@ -601,11 +601,11 @@ void PPCVSXSwapRemoval::formWebs() { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!isVecReg(Reg) && !isScalarVecReg(Reg)) continue; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { if (!(MI->isCopy() && isScalarVecReg(Reg))) SwapVector[EntryIdx].MentionsPhysVR = 1; continue; @@ -667,7 +667,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { // than a swap instruction. else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); // We skip debug instructions in the analysis. (Note that debug // location information is still maintained by this optimization @@ -695,9 +695,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { // other than a swap instruction. } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned UseReg = MI->getOperand(0).getReg(); + Register UseReg = MI->getOperand(0).getReg(); MachineInstr *DefMI = MRI->getVRegDef(UseReg); - unsigned DefReg = DefMI->getOperand(0).getReg(); + Register DefReg = DefMI->getOperand(0).getReg(); int DefIdx = SwapMap[DefMI]; if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad || @@ -756,7 +756,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { if (!SwapVector[Repr].WebRejected) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) { int UseIdx = SwapMap[&UseMI]; @@ -772,7 +772,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { if (!SwapVector[Repr].WebRejected) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned UseReg = MI->getOperand(0).getReg(); + Register UseReg = MI->getOperand(0).getReg(); MachineInstr *DefMI = MRI->getVRegDef(UseReg); int DefIdx = SwapMap[DefMI]; SwapVector[DefIdx].WillRemove = 1; @@ -869,8 +869,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { Selector = 3 - Selector; MI->getOperand(3).setImm(Selector); - unsigned Reg1 = MI->getOperand(1).getReg(); - unsigned Reg2 = MI->getOperand(2).getReg(); + Register Reg1 = MI->getOperand(1).getReg(); + Register Reg2 = MI->getOperand(2).getReg(); MI->getOperand(1).setReg(Reg2); MI->getOperand(2).setReg(Reg1); @@ -894,9 +894,9 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { LLVM_DEBUG(dbgs() << "Changing SUBREG_TO_REG: "); LLVM_DEBUG(MI->dump()); - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - unsigned NewVReg = MRI->createVirtualRegister(DstRC); + Register NewVReg = MRI->createVirtualRegister(DstRC); MI->getOperand(0).setReg(NewVReg); LLVM_DEBUG(dbgs() << " Into: "); @@ -910,8 +910,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { // prior to the swap, and from VSRC to VRRC following the swap. // Coalescing will usually remove all this mess. if (DstRC == &PPC::VRRCRegClass) { - unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); - unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + Register VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + Register VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), TII->get(PPC::COPY), VSRCTmp1) diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 0172c6298772..300ba8dc675c 100644 --- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -79,7 +80,7 @@ class RISCVAsmParser : public MCTargetAsmParser { // Helper to emit a combination of LUI, ADDI(W), and SLLI instructions that // synthesize the desired immedate value into the destination register. - void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out); + void emitLoadImm(Register DestReg, int64_t Value, MCStreamer &Out); // Helper to emit a combination of AUIPC and SecondOpcode. Used to implement // helpers such as emitLoadLocalAddress and emitLoadAddress. @@ -127,6 +128,7 @@ class RISCVAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseRegister(OperandVector &Operands, bool AllowParens = false); OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands); + OperandMatchResultTy parseAtomicMemOp(OperandVector &Operands); OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands); OperandMatchResultTy parseBareSymbol(OperandVector &Operands); OperandMatchResultTy parseCallSymbol(OperandVector &Operands); @@ -193,7 +195,7 @@ public: /// instruction struct RISCVOperand : public MCParsedAsmOperand { - enum KindTy { + enum class KindTy { Token, Register, Immediate, @@ -203,7 +205,7 @@ struct RISCVOperand : public MCParsedAsmOperand { bool IsRV64; struct RegOp { - unsigned RegNum; + Register RegNum; }; struct ImmOp { @@ -235,26 +237,26 @@ public: StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { - case Register: + case KindTy::Register: Reg = o.Reg; break; - case Immediate: + case KindTy::Immediate: Imm = o.Imm; break; - case Token: + case KindTy::Token: Tok = o.Tok; break; - case SystemRegister: + case KindTy::SystemRegister: SysReg = o.SysReg; break; } } - bool isToken() const override { return Kind == Token; } - bool isReg() const override { return Kind == Register; } - bool isImm() const override { return Kind == Immediate; } + bool isToken() const override { return Kind == KindTy::Token; } + bool isReg() const override { return Kind == KindTy::Register; } + bool isImm() const override { return Kind == KindTy::Immediate; } bool isMem() const override { return false; } - bool isSystemRegister() const { return Kind == SystemRegister; } + bool isSystemRegister() const { return Kind == KindTy::SystemRegister; } static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, RISCVMCExpr::VariantKind &VK) { @@ -276,7 +278,7 @@ public: // modifiers and isShiftedInt(Op). template bool isBareSimmNLsb0() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); @@ -292,7 +294,7 @@ public: bool isBareSymbol() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; @@ -302,7 +304,7 @@ public: bool isCallSymbol() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; @@ -313,7 +315,7 @@ public: bool isTPRelAddSymbol() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; @@ -364,7 +366,7 @@ public: bool isImmXLenLI() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); @@ -372,13 +374,13 @@ public: return true; // Given only Imm, ensuring that the actually specified constant is either // a signed or unsigned 64-bit number is unfortunately impossible. - bool IsInRange = isRV64() ? true : isInt<32>(Imm) || isUInt<32>(Imm); - return IsConstantImm && IsInRange && VK == RISCVMCExpr::VK_RISCV_None; + return IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None && + (isRV64() || (isInt<32>(Imm) || isUInt<32>(Imm))); } bool isUImmLog2XLen() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; if (!evaluateConstantImm(getImm(), Imm, VK) || @@ -389,7 +391,7 @@ public: bool isUImmLog2XLenNonZero() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; if (!evaluateConstantImm(getImm(), Imm, VK) || @@ -402,7 +404,7 @@ public: bool isUImm5() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); @@ -411,7 +413,7 @@ public: bool isUImm5NonZero() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); @@ -422,7 +424,7 @@ public: bool isSImm6() const { if (!isImm()) return false; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isInt<6>(Imm) && @@ -432,7 +434,7 @@ public: bool isSImm6NonZero() const { if (!isImm()) return false; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isInt<6>(Imm) && (Imm != 0) && @@ -443,7 +445,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && (Imm != 0) && (isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff)) && @@ -454,7 +456,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<5, 2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; @@ -464,7 +466,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<6, 2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; @@ -474,7 +476,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<5, 3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; @@ -486,7 +488,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<6, 3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; @@ -496,14 +498,14 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<8, 2>(Imm) && (Imm != 0) && VK == RISCVMCExpr::VK_RISCV_None; } bool isSImm12() const { - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsValid; if (!isImm()) @@ -527,14 +529,14 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && (Imm != 0) && isShiftedInt<6, 4>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } bool isUImm20LUI() const { - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsValid; if (!isImm()) @@ -552,7 +554,7 @@ public: } bool isUImm20AUIPC() const { - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsValid; if (!isImm()) @@ -575,6 +577,15 @@ public: bool isSImm21Lsb0JAL() const { return isBareSimmNLsb0<21>(); } + bool isImmZero() const { + if (!isImm()) + return false; + int64_t Imm; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && (Imm == 0) && VK == RISCVMCExpr::VK_RISCV_None; + } + /// getStartLoc - Gets location of the first token of this operand SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Gets location of the last token of this operand @@ -583,38 +594,38 @@ public: bool isRV64() const { return IsRV64; } unsigned getReg() const override { - assert(Kind == Register && "Invalid type access!"); - return Reg.RegNum; + assert(Kind == KindTy::Register && "Invalid type access!"); + return Reg.RegNum.id(); } StringRef getSysReg() const { - assert(Kind == SystemRegister && "Invalid access!"); + assert(Kind == KindTy::SystemRegister && "Invalid access!"); return StringRef(SysReg.Data, SysReg.Length); } const MCExpr *getImm() const { - assert(Kind == Immediate && "Invalid type access!"); + assert(Kind == KindTy::Immediate && "Invalid type access!"); return Imm.Val; } StringRef getToken() const { - assert(Kind == Token && "Invalid type access!"); + assert(Kind == KindTy::Token && "Invalid type access!"); return Tok; } void print(raw_ostream &OS) const override { switch (Kind) { - case Immediate: + case KindTy::Immediate: OS << *getImm(); break; - case Register: + case KindTy::Register: OS << ""; break; - case Token: + case KindTy::Token: OS << "'" << getToken() << "'"; break; - case SystemRegister: + case KindTy::SystemRegister: OS << "'; break; } @@ -622,7 +633,7 @@ public: static std::unique_ptr createToken(StringRef Str, SMLoc S, bool IsRV64) { - auto Op = make_unique(Token); + auto Op = std::make_unique(KindTy::Token); Op->Tok = Str; Op->StartLoc = S; Op->EndLoc = S; @@ -632,7 +643,7 @@ public: static std::unique_ptr createReg(unsigned RegNo, SMLoc S, SMLoc E, bool IsRV64) { - auto Op = make_unique(Register); + auto Op = std::make_unique(KindTy::Register); Op->Reg.RegNum = RegNo; Op->StartLoc = S; Op->EndLoc = E; @@ -642,7 +653,7 @@ public: static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, SMLoc E, bool IsRV64) { - auto Op = make_unique(Immediate); + auto Op = std::make_unique(KindTy::Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -652,7 +663,7 @@ public: static std::unique_ptr createSysReg(StringRef Str, SMLoc S, unsigned Encoding, bool IsRV64) { - auto Op = make_unique(SystemRegister); + auto Op = std::make_unique(KindTy::SystemRegister); Op->SysReg.Data = Str.data(); Op->SysReg.Length = Str.size(); Op->SysReg.Encoding = Encoding; @@ -664,7 +675,7 @@ public: void addExpr(MCInst &Inst, const MCExpr *Expr) const { assert(Expr && "Expr shouldn't be null!"); int64_t Imm = 0; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstant = evaluateConstantImm(Expr, Imm, VK); if (IsConstant) @@ -730,46 +741,9 @@ public: #define GET_MATCHER_IMPLEMENTATION #include "RISCVGenAsmMatcher.inc" -// Return the matching FPR64 register for the given FPR32. -// FIXME: Ideally this function could be removed in favour of using -// information from TableGen. -unsigned convertFPR32ToFPR64(unsigned Reg) { - switch (Reg) { - default: - llvm_unreachable("Not a recognised FPR32 register"); - case RISCV::F0_32: return RISCV::F0_64; - case RISCV::F1_32: return RISCV::F1_64; - case RISCV::F2_32: return RISCV::F2_64; - case RISCV::F3_32: return RISCV::F3_64; - case RISCV::F4_32: return RISCV::F4_64; - case RISCV::F5_32: return RISCV::F5_64; - case RISCV::F6_32: return RISCV::F6_64; - case RISCV::F7_32: return RISCV::F7_64; - case RISCV::F8_32: return RISCV::F8_64; - case RISCV::F9_32: return RISCV::F9_64; - case RISCV::F10_32: return RISCV::F10_64; - case RISCV::F11_32: return RISCV::F11_64; - case RISCV::F12_32: return RISCV::F12_64; - case RISCV::F13_32: return RISCV::F13_64; - case RISCV::F14_32: return RISCV::F14_64; - case RISCV::F15_32: return RISCV::F15_64; - case RISCV::F16_32: return RISCV::F16_64; - case RISCV::F17_32: return RISCV::F17_64; - case RISCV::F18_32: return RISCV::F18_64; - case RISCV::F19_32: return RISCV::F19_64; - case RISCV::F20_32: return RISCV::F20_64; - case RISCV::F21_32: return RISCV::F21_64; - case RISCV::F22_32: return RISCV::F22_64; - case RISCV::F23_32: return RISCV::F23_64; - case RISCV::F24_32: return RISCV::F24_64; - case RISCV::F25_32: return RISCV::F25_64; - case RISCV::F26_32: return RISCV::F26_64; - case RISCV::F27_32: return RISCV::F27_64; - case RISCV::F28_32: return RISCV::F28_64; - case RISCV::F29_32: return RISCV::F29_64; - case RISCV::F30_32: return RISCV::F30_64; - case RISCV::F31_32: return RISCV::F31_64; - } +static Register convertFPR64ToFPR32(Register Reg) { + assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register"); + return Reg - RISCV::F0_D + RISCV::F0_F; } unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, @@ -778,17 +752,17 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, if (!Op.isReg()) return Match_InvalidOperand; - unsigned Reg = Op.getReg(); - bool IsRegFPR32 = - RISCVMCRegisterClasses[RISCV::FPR32RegClassID].contains(Reg); - bool IsRegFPR32C = - RISCVMCRegisterClasses[RISCV::FPR32CRegClassID].contains(Reg); + Register Reg = Op.getReg(); + bool IsRegFPR64 = + RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg); + bool IsRegFPR64C = + RISCVMCRegisterClasses[RISCV::FPR64CRegClassID].contains(Reg); // As the parser couldn't differentiate an FPR32 from an FPR64, coerce the - // register from FPR32 to FPR64 or FPR32C to FPR64C if necessary. - if ((IsRegFPR32 && Kind == MCK_FPR64) || - (IsRegFPR32C && Kind == MCK_FPR64C)) { - Op.Reg.RegNum = convertFPR32ToFPR64(Reg); + // register from FPR64 to FPR32 or FPR64C to FPR32C if necessary. + if ((IsRegFPR64 && Kind == MCK_FPR32) || + (IsRegFPR64C && Kind == MCK_FPR32C)) { + Op.Reg.RegNum = convertFPR64ToFPR32(Reg); return Match_Success; } return Match_InvalidOperand; @@ -853,6 +827,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError(Operands, ErrorInfo, std::numeric_limits::min(), std::numeric_limits::max()); + case Match_InvalidImmZero: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "immediate must be zero"); + } case Match_InvalidUImmLog2XLen: if (isRV64()) return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1); @@ -968,14 +946,19 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // alternative ABI names), setting RegNo to the matching register. Upon // failure, returns true and sets RegNo to 0. If IsRV32E then registers // x16-x31 will be rejected. -static bool matchRegisterNameHelper(bool IsRV32E, unsigned &RegNo, +static bool matchRegisterNameHelper(bool IsRV32E, Register &RegNo, StringRef Name) { RegNo = MatchRegisterName(Name); - if (RegNo == 0) + // The 32- and 64-bit FPRs have the same asm name. Check that the initial + // match always matches the 64-bit variant, and not the 32-bit one. + assert(!(RegNo >= RISCV::F0_F && RegNo <= RISCV::F31_F)); + // The default FPR register class is based on the tablegen enum ordering. + static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated"); + if (RegNo == RISCV::NoRegister) RegNo = MatchRegisterAltName(Name); if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31) - RegNo = 0; - return RegNo == 0; + RegNo = RISCV::NoRegister; + return RegNo == RISCV::NoRegister; } bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, @@ -986,7 +969,7 @@ bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, RegNo = 0; StringRef Name = getLexer().getTok().getIdentifier(); - if (matchRegisterNameHelper(isRV32E(), RegNo, Name)) + if (matchRegisterNameHelper(isRV32E(), (Register&)RegNo, Name)) return Error(StartLoc, "invalid register name"); getParser().Lex(); // Eat identifier token. @@ -1018,10 +1001,10 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands, return MatchOperand_NoMatch; case AsmToken::Identifier: StringRef Name = getLexer().getTok().getIdentifier(); - unsigned RegNo; + Register RegNo; matchRegisterNameHelper(isRV32E(), RegNo, Name); - if (RegNo == 0) { + if (RegNo == RISCV::NoRegister) { if (HadParens) getLexer().UnLex(LParen); return MatchOperand_NoMatch; @@ -1208,6 +1191,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { Res = V; } else Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + + MCBinaryExpr::Opcode Opcode; + switch (getLexer().getKind()) { + default: + Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); + return MatchOperand_Success; + case AsmToken::Plus: + Opcode = MCBinaryExpr::Add; + break; + case AsmToken::Minus: + Opcode = MCBinaryExpr::Sub; + break; + } + + const MCExpr *Expr; + if (getParser().parseExpression(Expr)) + return MatchOperand_ParseFail; + Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext()); Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); return MatchOperand_Success; } @@ -1282,6 +1283,73 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) { return MatchOperand_Success; } +OperandMatchResultTy RISCVAsmParser::parseAtomicMemOp(OperandVector &Operands) { + // Atomic operations such as lr.w, sc.w, and amo*.w accept a "memory operand" + // as one of their register operands, such as `(a0)`. This just denotes that + // the register (in this case `a0`) contains a memory address. + // + // Normally, we would be able to parse these by putting the parens into the + // instruction string. However, GNU as also accepts a zero-offset memory + // operand (such as `0(a0)`), and ignores the 0. Normally this would be parsed + // with parseImmediate followed by parseMemOpBaseReg, but these instructions + // do not accept an immediate operand, and we do not want to add a "dummy" + // operand that is silently dropped. + // + // Instead, we use this custom parser. This will: allow (and discard) an + // offset if it is zero; require (and discard) parentheses; and add only the + // parsed register operand to `Operands`. + // + // These operands are printed with RISCVInstPrinter::printAtomicMemOp, which + // will only print the register surrounded by parentheses (which GNU as also + // uses as its canonical representation for these operands). + std::unique_ptr OptionalImmOp; + + if (getLexer().isNot(AsmToken::LParen)) { + // Parse an Integer token. We do not accept arbritrary constant expressions + // in the offset field (because they may include parens, which complicates + // parsing a lot). + int64_t ImmVal; + SMLoc ImmStart = getLoc(); + if (getParser().parseIntToken(ImmVal, + "expected '(' or optional integer offset")) + return MatchOperand_ParseFail; + + // Create a RISCVOperand for checking later (so the error messages are + // nicer), but we don't add it to Operands. + SMLoc ImmEnd = getLoc(); + OptionalImmOp = + RISCVOperand::createImm(MCConstantExpr::create(ImmVal, getContext()), + ImmStart, ImmEnd, isRV64()); + } + + if (getLexer().isNot(AsmToken::LParen)) { + Error(getLoc(), OptionalImmOp ? "expected '(' after optional integer offset" + : "expected '(' or optional integer offset"); + return MatchOperand_ParseFail; + } + getParser().Lex(); // Eat '(' + + if (parseRegister(Operands) != MatchOperand_Success) { + Error(getLoc(), "expected register"); + return MatchOperand_ParseFail; + } + + if (getLexer().isNot(AsmToken::RParen)) { + Error(getLoc(), "expected ')'"); + return MatchOperand_ParseFail; + } + getParser().Lex(); // Eat ')' + + // Deferred Handling of non-zero offsets. This makes the error messages nicer. + if (OptionalImmOp && !OptionalImmOp->isImmZero()) { + Error(OptionalImmOp->getStartLoc(), "optional integer offset must be 0", + SMRange(OptionalImmOp->getStartLoc(), OptionalImmOp->getEndLoc())); + return MatchOperand_ParseFail; + } + + return MatchOperand_Success; +} + /// Looks at a token type and creates the relevant operand from this /// information, adding to Operands. If operand was parsed, returns false, else /// true. @@ -1523,12 +1591,12 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) { S.EmitInstruction((Res ? CInst : Inst), getSTI()); } -void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value, +void RISCVAsmParser::emitLoadImm(Register DestReg, int64_t Value, MCStreamer &Out) { RISCVMatInt::InstSeq Seq; RISCVMatInt::generateInstSeq(Value, isRV64(), Seq); - unsigned SrcReg = RISCV::X0; + Register SrcReg = RISCV::X0; for (RISCVMatInt::Inst &Inst : Seq) { if (Inst.Opc == RISCV::LUI) { emitToStreamer( @@ -1682,7 +1750,7 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, default: break; case RISCV::PseudoLI: { - unsigned Reg = Inst.getOperand(0).getReg(); + Register Reg = Inst.getOperand(0).getReg(); const MCOperand &Op1 = Inst.getOperand(1); if (Op1.isExpr()) { // We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar. diff --git a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 36200c03f703..15943ba42156 100644 --- a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "TargetInfo/RISCVTargetInfo.h" #include "Utils/RISCVBaseInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" @@ -56,17 +57,6 @@ extern "C" void LLVMInitializeRISCVDisassembler() { createRISCVDisassembler); } -static const unsigned GPRDecoderTable[] = { - RISCV::X0, RISCV::X1, RISCV::X2, RISCV::X3, - RISCV::X4, RISCV::X5, RISCV::X6, RISCV::X7, - RISCV::X8, RISCV::X9, RISCV::X10, RISCV::X11, - RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, - RISCV::X16, RISCV::X17, RISCV::X18, RISCV::X19, - RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23, - RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27, - RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31 -}; - static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { @@ -76,38 +66,21 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, .getFeatureBits(); bool IsRV32E = FeatureBits[RISCV::FeatureRV32E]; - if (RegNo > array_lengthof(GPRDecoderTable) || (IsRV32E && RegNo > 15)) + if (RegNo >= 32 || (IsRV32E && RegNo >= 16)) return MCDisassembler::Fail; - // We must define our own mapping from RegNo to register identifier. - // Accessing index RegNo in the register class will work in the case that - // registers were added in ascending order, but not in general. - unsigned Reg = GPRDecoderTable[RegNo]; + Register Reg = RISCV::X0 + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } -static const unsigned FPR32DecoderTable[] = { - RISCV::F0_32, RISCV::F1_32, RISCV::F2_32, RISCV::F3_32, - RISCV::F4_32, RISCV::F5_32, RISCV::F6_32, RISCV::F7_32, - RISCV::F8_32, RISCV::F9_32, RISCV::F10_32, RISCV::F11_32, - RISCV::F12_32, RISCV::F13_32, RISCV::F14_32, RISCV::F15_32, - RISCV::F16_32, RISCV::F17_32, RISCV::F18_32, RISCV::F19_32, - RISCV::F20_32, RISCV::F21_32, RISCV::F22_32, RISCV::F23_32, - RISCV::F24_32, RISCV::F25_32, RISCV::F26_32, RISCV::F27_32, - RISCV::F28_32, RISCV::F29_32, RISCV::F30_32, RISCV::F31_32 -}; - static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > array_lengthof(FPR32DecoderTable)) + if (RegNo >= 32) return MCDisassembler::Fail; - // We must define our own mapping from RegNo to register identifier. - // Accessing index RegNo in the register class will work in the case that - // registers were added in ascending order, but not in general. - unsigned Reg = FPR32DecoderTable[RegNo]; + Register Reg = RISCV::F0_F + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -115,35 +88,21 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 8) { + if (RegNo >= 8) { return MCDisassembler::Fail; } - unsigned Reg = FPR32DecoderTable[RegNo + 8]; + Register Reg = RISCV::F8_F + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } -static const unsigned FPR64DecoderTable[] = { - RISCV::F0_64, RISCV::F1_64, RISCV::F2_64, RISCV::F3_64, - RISCV::F4_64, RISCV::F5_64, RISCV::F6_64, RISCV::F7_64, - RISCV::F8_64, RISCV::F9_64, RISCV::F10_64, RISCV::F11_64, - RISCV::F12_64, RISCV::F13_64, RISCV::F14_64, RISCV::F15_64, - RISCV::F16_64, RISCV::F17_64, RISCV::F18_64, RISCV::F19_64, - RISCV::F20_64, RISCV::F21_64, RISCV::F22_64, RISCV::F23_64, - RISCV::F24_64, RISCV::F25_64, RISCV::F26_64, RISCV::F27_64, - RISCV::F28_64, RISCV::F29_64, RISCV::F30_64, RISCV::F31_64 -}; - static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > array_lengthof(FPR64DecoderTable)) + if (RegNo >= 32) return MCDisassembler::Fail; - // We must define our own mapping from RegNo to register identifier. - // Accessing index RegNo in the register class will work in the case that - // registers were added in ascending order, but not in general. - unsigned Reg = FPR64DecoderTable[RegNo]; + Register Reg = RISCV::F0_D + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -151,10 +110,10 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 8) { + if (RegNo >= 8) { return MCDisassembler::Fail; } - unsigned Reg = FPR64DecoderTable[RegNo + 8]; + Register Reg = RISCV::F8_D + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -182,10 +141,10 @@ static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 8) + if (RegNo >= 8) return MCDisassembler::Fail; - unsigned Reg = GPRDecoderTable[RegNo + 8]; + Register Reg = RISCV::X8 + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -279,8 +238,80 @@ static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm, return MCDisassembler::Success; } +static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "RISCVGenDisassemblerTables.inc" +static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + uint64_t SImm6 = + fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); + DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder); + (void)Result; + assert(Result == MCDisassembler::Success && "Invalid immediate"); + return MCDisassembler::Success; +} + +static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + DecodeGPRRegisterClass(Inst, 0, Address, Decoder); + uint64_t SImm6 = + fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); + DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder); + (void)Result; + assert(Result == MCDisassembler::Success && "Invalid immediate"); + return MCDisassembler::Success; +} + +static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + DecodeGPRRegisterClass(Inst, 0, Address, Decoder); + Inst.addOperand(Inst.getOperand(0)); + uint64_t UImm6 = + fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); + DecodeStatus Result = decodeUImmOperand<6>(Inst, UImm6, Address, Decoder); + (void)Result; + assert(Result == MCDisassembler::Success && "Invalid immediate"); + return MCDisassembler::Success; +} + +static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 7, 5); + unsigned Rs2 = fieldFromInstruction(Insn, 2, 5); + DecodeGPRRegisterClass(Inst, Rd, Address, Decoder); + DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 7, 5); + unsigned Rs2 = fieldFromInstruction(Insn, 2, 5); + DecodeGPRRegisterClass(Inst, Rd, Address, Decoder); + Inst.addOperand(Inst.getOperand(0)); + DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder); + return MCDisassembler::Success; +} + DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t Address, diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index ee5f760ebcb0..f6b727ae37c7 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -30,9 +30,16 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCValue &Target) { bool ShouldForce = false; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: break; + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: + if (Target.isAbsolute()) + return false; + break; case RISCV::fixup_riscv_got_hi20: case RISCV::fixup_riscv_tls_got_hi20: case RISCV::fixup_riscv_tls_gd_hi20: @@ -48,7 +55,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, return false; } - switch ((unsigned)T->getKind()) { + switch (T->getTargetKind()) { default: llvm_unreachable("Unexpected fixup kind for pcrel_lo12"); break; @@ -83,7 +90,7 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, return true; int64_t Offset = int64_t(Value); - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: return false; case RISCV::fixup_riscv_rvc_branch: @@ -174,8 +181,7 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, MCContext &Ctx) { - unsigned Kind = Fixup.getKind(); - switch (Kind) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("Unknown fixup kind!"); case RISCV::fixup_riscv_got_hi20: @@ -186,6 +192,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_Data_2: case FK_Data_4: case FK_Data_8: + case FK_Data_6b: return Value; case RISCV::fixup_riscv_lo12_i: case RISCV::fixup_riscv_pcrel_lo12_i: diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 3ccbc86d2619..cab2bbcb81bc 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/RISCVFixupKinds.h" +#include "MCTargetDesc/RISCVMCExpr.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" @@ -47,8 +48,9 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { + const MCExpr *Expr = Fixup.getValue(); // Determine the type of the relocation - unsigned Kind = Fixup.getKind(); + unsigned Kind = Fixup.getTargetKind(); if (IsPCRel) { switch (Kind) { default: @@ -87,6 +89,9 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, default: llvm_unreachable("invalid fixup kind!"); case FK_Data_4: + if (Expr->getKind() == MCExpr::Target && + cast(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL) + return ELF::R_RISCV_32_PCREL; return ELF::R_RISCV_32; case FK_Data_8: return ELF::R_RISCV_64; @@ -98,6 +103,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_ADD32; case FK_Data_Add_8: return ELF::R_RISCV_ADD64; + case FK_Data_Add_6b: + return ELF::R_RISCV_SET6; case FK_Data_Sub_1: return ELF::R_RISCV_SUB8; case FK_Data_Sub_2: @@ -106,6 +113,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_SUB32; case FK_Data_Sub_8: return ELF::R_RISCV_SUB64; + case FK_Data_Sub_6b: + return ELF::R_RISCV_SUB6; case RISCV::fixup_riscv_hi20: return ELF::R_RISCV_HI20; case RISCV::fixup_riscv_lo12_i: @@ -129,5 +138,5 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createRISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit) { - return llvm::make_unique(OSABI, Is64Bit); + return std::make_unique(OSABI, Is64Bit); } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index fe37b70811d8..8b5fe6dd8252 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -39,6 +39,30 @@ static cl::opt cl::desc("Disable the emission of assembler pseudo instructions"), cl::init(false), cl::Hidden); +static cl::opt + ArchRegNames("riscv-arch-reg-names", + cl::desc("Print architectural register names rather than the " + "ABI names (such as x2 instead of sp)"), + cl::init(false), cl::Hidden); + +// The command-line flags above are used by llvm-mc and llc. They can be used by +// `llvm-objdump`, but we override their values here to handle options passed to +// `llvm-objdump` with `-M` (which matches GNU objdump). There did not seem to +// be an easier way to allow these options in all these tools, without doing it +// this way. +bool RISCVInstPrinter::applyTargetSpecificCLOption(StringRef Opt) { + if (Opt == "no-aliases") { + NoAliases = true; + return true; + } + if (Opt == "numeric") { + ArchRegNames = true; + return true; + } + + return false; +} + void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) { bool Res = false; @@ -112,3 +136,20 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo, static_cast(MI->getOperand(OpNo).getImm()); O << RISCVFPRndMode::roundingModeToString(FRMArg); } + +void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNo); + + assert(MO.isReg() && "printAtomicMemOp can only print register operands"); + O << "("; + printRegName(O, MO.getReg()); + O << ")"; + return; +} + +const char *RISCVInstPrinter::getRegisterName(unsigned RegNo) { + return getRegisterName(RegNo, ArchRegNames ? RISCV::NoRegAltName + : RISCV::ABIRegAltName); +} diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index 5ca1d3fa20fe..189d72626f3e 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -25,6 +25,8 @@ public: const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} + bool applyTargetSpecificCLOption(StringRef Opt) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) override; void printRegName(raw_ostream &O, unsigned RegNo) const override; @@ -37,6 +39,8 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printAtomicMemOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, @@ -46,8 +50,8 @@ public: void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, unsigned PrintMethodIdx, const MCSubtargetInfo &STI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = RISCV::ABIRegAltName); + static const char *getRegisterName(unsigned RegNo); + static const char *getRegisterName(unsigned RegNo, unsigned AltIdx); }; } // namespace llvm diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp index 983629692883..089a2def4c21 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp @@ -11,7 +11,10 @@ //===----------------------------------------------------------------------===// #include "RISCVMCAsmInfo.h" +#include "MCTargetDesc/RISCVMCExpr.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; void RISCVMCAsmInfo::anchor() {} @@ -25,3 +28,20 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; } + +const MCExpr *RISCVMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + if (!(Encoding & dwarf::DW_EH_PE_pcrel)) + return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer); + + // The default symbol subtraction results in an ADD/SUB relocation pair. + // Processing this relocation pair is problematic when linker relaxation is + // enabled, so we follow binutils in using the R_RISCV_32_PCREL relocation + // for the FDE initial location. + MCContext &Ctx = Streamer.getContext(); + const MCExpr *ME = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); + assert(Encoding & dwarf::DW_EH_PE_sdata4 && "Unexpected encoding"); + return RISCVMCExpr::create(ME, RISCVMCExpr::VK_RISCV_32_PCREL, Ctx); +} diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h index 043fdb7c08c0..6824baf699aa 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h @@ -23,6 +23,9 @@ class RISCVMCAsmInfo : public MCAsmInfoELF { public: explicit RISCVMCAsmInfo(const Triple &TargetTriple); + + const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; }; } // namespace llvm diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 0fc775f63ed4..de99960848a5 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -100,7 +101,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS, const MCSubtargetInfo &STI) const { MCInst TmpInst; MCOperand Func; - unsigned Ra; + Register Ra; if (MI.getOpcode() == RISCV::PseudoTAIL) { Func = MI.getOperand(0); Ra = RISCV::X6; @@ -266,6 +267,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, switch (RVExpr->getKind()) { case RISCVMCExpr::VK_RISCV_None: case RISCVMCExpr::VK_RISCV_Invalid: + case RISCVMCExpr::VK_RISCV_32_PCREL: llvm_unreachable("Unhandled fixup kind!"); case RISCVMCExpr::VK_RISCV_TPREL_ADD: // tprel_add is only used to indicate that a relocation should be emitted diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h index b5a292dc1b1a..921df376f3df 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h @@ -36,6 +36,7 @@ public: VK_RISCV_TLS_GD_HI, VK_RISCV_CALL, VK_RISCV_CALL_PLT, + VK_RISCV_32_PCREL, VK_RISCV_Invalid }; diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index bc45262ab2de..5a4c86e48f1e 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -16,7 +16,9 @@ #include "RISCVMCAsmInfo.h" #include "RISCVTargetStreamer.h" #include "TargetInfo/RISCVTargetInfo.h" +#include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -52,7 +54,7 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT) { MCAsmInfo *MAI = new RISCVMCAsmInfo(TT); - unsigned SP = MRI.getDwarfRegNum(RISCV::X2, true); + Register SP = MRI.getDwarfRegNum(RISCV::X2, true); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0); MAI->addInitialFrameState(Inst); diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h index 834a1d171143..f23f742a4782 100644 --- a/lib/Target/RISCV/RISCV.h +++ b/lib/Target/RISCV/RISCV.h @@ -18,9 +18,12 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { +class RISCVRegisterBankInfo; +class RISCVSubtarget; class RISCVTargetMachine; class AsmPrinter; class FunctionPass; +class InstructionSelector; class MCInst; class MCOperand; class MachineInstr; @@ -39,6 +42,10 @@ void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &); FunctionPass *createRISCVExpandPseudoPass(); void initializeRISCVExpandPseudoPass(PassRegistry &); + +InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, + RISCVSubtarget &, + RISCVRegisterBankInfo &); } #endif diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td index e19b70b8e709..46530a8f74a8 100644 --- a/lib/Target/RISCV/RISCV.td +++ b/lib/Target/RISCV/RISCV.td @@ -43,6 +43,11 @@ def FeatureStdExtC def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">, AssemblerPredicate<"FeatureStdExtC">; +def FeatureRVCHints + : SubtargetFeature<"rvc-hints", "EnableRVCHintInstrs", "true", + "Enable RVC Hint Instructions.">; +def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">, + AssemblerPredicate<"FeatureRVCHints">; def Feature64Bit : SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">; @@ -77,14 +82,16 @@ include "RISCVSystemOperands.td" include "RISCVRegisterInfo.td" include "RISCVCallingConv.td" include "RISCVInstrInfo.td" +include "RISCVRegisterBanks.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. //===----------------------------------------------------------------------===// -def : ProcessorModel<"generic-rv32", NoSchedModel, []>; +def : ProcessorModel<"generic-rv32", NoSchedModel, [FeatureRVCHints]>; -def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>; +def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit, + FeatureRVCHints]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. diff --git a/lib/Target/RISCV/RISCVCallLowering.cpp b/lib/Target/RISCV/RISCVCallLowering.cpp new file mode 100644 index 000000000000..c63a84739c4a --- /dev/null +++ b/lib/Target/RISCV/RISCVCallLowering.cpp @@ -0,0 +1,50 @@ +//===-- RISCVCallLowering.cpp - Call lowering -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements the lowering of LLVM calls to machine code calls for +/// GlobalISel. +// +//===----------------------------------------------------------------------===// + +#include "RISCVCallLowering.h" +#include "RISCVISelLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" + +using namespace llvm; + +RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI) + : CallLowering(&TLI) {} + +bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, + const Value *Val, + ArrayRef VRegs) const { + + MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(RISCV::PseudoRET); + + if (Val != nullptr) { + return false; + } + MIRBuilder.insertInstr(Ret); + return true; +} + +bool RISCVCallLowering::lowerFormalArguments( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { + + if (F.arg_empty()) + return true; + + return false; +} + +bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { + return false; +} diff --git a/lib/Target/RISCV/RISCVCallLowering.h b/lib/Target/RISCV/RISCVCallLowering.h new file mode 100644 index 000000000000..7ce074a61f0a --- /dev/null +++ b/lib/Target/RISCV/RISCVCallLowering.h @@ -0,0 +1,42 @@ +//===-- RISCVCallLowering.h - Call lowering ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file describes how to lower LLVM calls to machine code calls. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H +#define LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/ValueTypes.h" + +namespace llvm { + +class RISCVTargetLowering; + +class RISCVCallLowering : public CallLowering { + +public: + RISCVCallLowering(const RISCVTargetLowering &TLI); + + bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, + ArrayRef VRegs) const override; + + bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const override; + + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td index db13e6e8beca..025454f8fcca 100644 --- a/lib/Target/RISCV/RISCVCallingConv.td +++ b/lib/Target/RISCV/RISCVCallingConv.td @@ -18,11 +18,11 @@ def CSR_ILP32_LP64 def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, - F8_32, F9_32, (sequence "F%u_32", 18, 27))>; + F8_F, F9_F, (sequence "F%u_F", 18, 27))>; def CSR_ILP32D_LP64D : CalleeSavedRegs<(add CSR_ILP32_LP64, - F8_64, F9_64, (sequence "F%u_64", 18, 27))>; + F8_D, F9_D, (sequence "F%u_D", 18, 27))>; // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; @@ -43,12 +43,12 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add X1, (sequence "X%u", 12, 17), (sequence "X%u", 18, 27), (sequence "X%u", 28, 31), - (sequence "F%u_32", 0, 7), - (sequence "F%u_32", 10, 11), - (sequence "F%u_32", 12, 17), - (sequence "F%u_32", 28, 31), - (sequence "F%u_32", 8, 9), - (sequence "F%u_32", 18, 27))>; + (sequence "F%u_F", 0, 7), + (sequence "F%u_F", 10, 11), + (sequence "F%u_F", 12, 17), + (sequence "F%u_F", 28, 31), + (sequence "F%u_F", 8, 9), + (sequence "F%u_F", 18, 27))>; // Same as CSR_Interrupt, but including all 64-bit FP registers. def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add X1, @@ -57,9 +57,9 @@ def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add X1, (sequence "X%u", 12, 17), (sequence "X%u", 18, 27), (sequence "X%u", 28, 31), - (sequence "F%u_64", 0, 7), - (sequence "F%u_64", 10, 11), - (sequence "F%u_64", 12, 17), - (sequence "F%u_64", 28, 31), - (sequence "F%u_64", 8, 9), - (sequence "F%u_64", 18, 27))>; + (sequence "F%u_D", 0, 7), + (sequence "F%u_D", 10, 11), + (sequence "F%u_D", 12, 17), + (sequence "F%u_D", 28, 31), + (sequence "F%u_D", 8, 9), + (sequence "F%u_D", 18, 27))>; diff --git a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 1c5171a7b7a4..da5cd16e750c 100644 --- a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -235,10 +235,10 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, MachineBasicBlock *LoopMBB, MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned ScratchReg = MI.getOperand(1).getReg(); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned IncrReg = MI.getOperand(3).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); AtomicOrdering Ordering = static_cast(MI.getOperand(4).getImm()); @@ -271,9 +271,9 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, } static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL, - MachineBasicBlock *MBB, unsigned DestReg, - unsigned OldValReg, unsigned NewValReg, - unsigned MaskReg, unsigned ScratchReg) { + MachineBasicBlock *MBB, Register DestReg, + Register OldValReg, Register NewValReg, + Register MaskReg, Register ScratchReg) { assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique"); assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique"); assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique"); @@ -297,11 +297,11 @@ static void doMaskedAtomicBinOpExpansion( MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { assert(Width == 32 && "Should never need to expand masked 64-bit operations"); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned ScratchReg = MI.getOperand(1).getReg(); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned IncrReg = MI.getOperand(3).getReg(); - unsigned MaskReg = MI.getOperand(4).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + Register MaskReg = MI.getOperand(4).getReg(); AtomicOrdering Ordering = static_cast(MI.getOperand(5).getImm()); @@ -394,8 +394,8 @@ bool RISCVExpandPseudo::expandAtomicBinOp( } static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL, - MachineBasicBlock *MBB, unsigned ValReg, - unsigned ShamtReg) { + MachineBasicBlock *MBB, Register ValReg, + Register ShamtReg) { BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg) .addReg(ValReg) .addReg(ShamtReg); @@ -436,12 +436,12 @@ bool RISCVExpandPseudo::expandAtomicMinMaxOp( DoneMBB->transferSuccessors(&MBB); MBB.addSuccessor(LoopHeadMBB); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Scratch1Reg = MI.getOperand(1).getReg(); - unsigned Scratch2Reg = MI.getOperand(2).getReg(); - unsigned AddrReg = MI.getOperand(3).getReg(); - unsigned IncrReg = MI.getOperand(4).getReg(); - unsigned MaskReg = MI.getOperand(5).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register Scratch1Reg = MI.getOperand(1).getReg(); + Register Scratch2Reg = MI.getOperand(2).getReg(); + Register AddrReg = MI.getOperand(3).getReg(); + Register IncrReg = MI.getOperand(4).getReg(); + Register MaskReg = MI.getOperand(5).getReg(); bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max; AtomicOrdering Ordering = static_cast(MI.getOperand(IsSigned ? 7 : 6).getImm()); @@ -549,11 +549,11 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg( DoneMBB->transferSuccessors(&MBB); MBB.addSuccessor(LoopHeadMBB); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned ScratchReg = MI.getOperand(1).getReg(); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned CmpValReg = MI.getOperand(3).getReg(); - unsigned NewValReg = MI.getOperand(4).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register CmpValReg = MI.getOperand(3).getReg(); + Register NewValReg = MI.getOperand(4).getReg(); AtomicOrdering Ordering = static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); @@ -582,7 +582,7 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg( // lr.w dest, (addr) // and scratch, dest, mask // bne scratch, cmpval, done - unsigned MaskReg = MI.getOperand(5).getReg(); + Register MaskReg = MI.getOperand(5).getReg(); BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg) @@ -629,7 +629,7 @@ bool RISCVExpandPseudo::expandAuipcInstPair( MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); const MachineOperand &Symbol = MI.getOperand(1); MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp index 32c3b9684d2c..6b6f62e18ce9 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { uint64_t FrameSize = MFI.getStackSize(); // Get the alignment. - uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment() - : getStackAlignment(); + unsigned StackAlign = getStackAlignment(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment()); + FrameSize += (MaxStackAlign - StackAlign); + StackAlign = MaxStackAlign; + } + + // Set Max Call Frame Size + uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign); + MFI.setMaxCallFrameSize(MaxCallSize); // Make sure the frame is aligned. FrameSize = alignTo(FrameSize, StackAlign); @@ -52,8 +60,8 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DestReg, - unsigned SrcReg, int64_t Val, + const DebugLoc &DL, Register DestReg, + Register SrcReg, int64_t Val, MachineInstr::MIFlag Flag) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const RISCVInstrInfo *TII = STI.getInstrInfo(); @@ -66,7 +74,7 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, .addReg(SrcReg) .addImm(Val) .setMIFlag(Flag); - } else if (isInt<32>(Val)) { + } else { unsigned Opc = RISCV::ADD; bool isSub = Val < 0; if (isSub) { @@ -74,22 +82,20 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, Opc = RISCV::SUB; } - unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->movImm32(MBB, MBBI, DL, ScratchReg, Val, Flag); + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag); BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) .addReg(SrcReg) .addReg(ScratchReg, RegState::Kill) .setMIFlag(Flag); - } else { - report_fatal_error("adjustReg cannot yet handle adjustments >32 bits"); } } // Returns the register used to hold the frame pointer. -static unsigned getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; } +static Register getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; } // Returns the register used to hold the stack pointer. -static unsigned getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } +static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { @@ -101,8 +107,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, const RISCVInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); - unsigned FPReg = getFPReg(STI); - unsigned SPReg = getSPReg(STI); + if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) { + report_fatal_error( + "RISC-V backend can't currently handle functions that need stack " + "realignment and have variable sized objects"); + } + + Register FPReg = getFPReg(STI); + Register SPReg = getSPReg(STI); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -119,6 +131,11 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, if (StackSize == 0 && !MFI.adjustsStack()) return; + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + // Split the SP adjustment to reduce the offsets of callee saved spill. + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + // Allocate space on the stack if necessary. adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); @@ -141,7 +158,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // directives. for (const auto &Entry : CSI) { int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx()); - unsigned Reg = Entry.getReg(); + Register Reg = Entry.getReg(); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, RI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -159,6 +176,45 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } + + // Emit the second SP adjustment after saving callee saved registers. + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, + MachineInstr::FrameSetup); + // Emit ".cfi_def_cfa_offset StackSize" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize())); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + + if (hasFP(MF)) { + // Realign Stack + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxAlignment = MFI.getMaxAlignment(); + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + if (isInt<12>(-(int)MaxAlignment)) { + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg) + .addReg(SPReg) + .addImm(-(int)MaxAlignment); + } else { + unsigned ShiftAmount = countTrailingZeros(MaxAlignment); + Register VR = + MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR) + .addReg(SPReg) + .addImm(ShiftAmount); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg) + .addReg(VR) + .addImm(ShiftAmount); + } + } + } } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -169,8 +225,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, auto *RVFI = MF.getInfo(); DebugLoc DL = MBBI->getDebugLoc(); const RISCVInstrInfo *TII = STI.getInstrInfo(); - unsigned FPReg = getFPReg(STI); - unsigned SPReg = getSPReg(STI); + Register FPReg = getFPReg(STI); + Register SPReg = getSPReg(STI); // Skip to before the restores of callee-saved registers // FIXME: assumes exactly one instruction is used to restore each @@ -189,11 +245,29 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); } + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + + adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, SecondSPAdjustAmount, + MachineInstr::FrameDestroy); + + // Emit ".cfi_def_cfa_offset FirstSPAdjustAmount" + unsigned CFIIndex = + MF.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, + -FirstSPAdjustAmount)); + BuildMI(MBB, LastFrameDestroy, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + if (hasFP(MF)) { // To find the instruction restoring FP from stack. for (auto &I = LastFrameDestroy; I != MBBI; ++I) { if (I->mayLoad() && I->getOperand(0).isReg()) { - unsigned DestReg = I->getOperand(0).getReg(); + Register DestReg = I->getOperand(0).getReg(); if (DestReg == FPReg) { // If there is frame pointer, after restoring $fp registers, we // need adjust CFA to ($sp - FPOffset). @@ -214,13 +288,16 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, // Iterate over list of callee-saved registers and emit .cfi_restore // directives. for (const auto &Entry : CSI) { - unsigned Reg = Entry.getReg(); + Register Reg = Entry.getReg(); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore( nullptr, RI->getDwarfRegNum(Reg, true))); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + // Deallocate stack adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); @@ -249,6 +326,8 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + MFI.getOffsetAdjustment(); + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); @@ -256,6 +335,17 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; + + if (FirstSPAdjustAmount) + Offset += FirstSPAdjustAmount; + else + Offset += MF.getFrameInfo().getStackSize(); + } else if (RI->needsStackRealignment(MF)) { + assert(!MFI.hasVarSizedObjects() && + "Unexpected combination of stack realignment and varsized objects"); + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, but we still access stack objects using SP. + FrameReg = RISCV::X2; Offset += MF.getFrameInfo().getStackSize(); } else { FrameReg = RI->getFrameRegister(MF); @@ -338,7 +428,7 @@ bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { - unsigned SPReg = RISCV::X2; + Register SPReg = RISCV::X2; DebugLoc DL = MI->getDebugLoc(); if (!hasReservedCallFrame(MF)) { @@ -362,3 +452,39 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(MI); } + +// We would like to split the SP adjustment to reduce prologue/epilogue +// as following instructions. In this way, the offset of the callee saved +// register could fit in a single store. +// add sp,sp,-2032 +// sw ra,2028(sp) +// sw s0,2024(sp) +// sw s1,2020(sp) +// sw s3,2012(sp) +// sw s4,2008(sp) +// add sp,sp,-64 +uint64_t +RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + uint64_t StackSize = MFI.getStackSize(); + uint64_t StackAlign = getStackAlignment(); + + // FIXME: Disable SplitSPAdjust if save-restore libcall enabled when the patch + // landing. The callee saved registers will be pushed by the + // save-restore libcalls, so we don't have to split the SP adjustment + // in this case. + // + // Return the FirstSPAdjustAmount if the StackSize can not fit in signed + // 12-bit and there exists a callee saved register need to be pushed. + if (!isInt<12>(StackSize) && (CSI.size() > 0)) { + // FirstSPAdjustAmount is choosed as (2048 - StackAlign) + // because 2048 will cause sp = sp + 2048 in epilogue split into + // multi-instructions. The offset smaller than 2048 can fit in signle + // load/store instruction and we have to stick with the stack alignment. + // 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16, + // for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment. + return 2048 - StackAlign; + } + return 0; +} diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h index 0e045c3ff853..f4a5949773d9 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.h +++ b/lib/Target/RISCV/RISCVFrameLowering.h @@ -22,7 +22,7 @@ class RISCVFrameLowering : public TargetFrameLowering { public: explicit RISCVFrameLowering(const RISCVSubtarget &STI) : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/16, + /*StackAlignment=*/Align(16), /*LocalAreaOffset=*/0), STI(STI) {} @@ -45,13 +45,18 @@ public: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + // Get the first stack adjustment amount for SplitSPAdjust. + // Return 0 if we don't want to to split the SP adjustment in prologue and + // epilogue. + uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const; + protected: const RISCVSubtarget &STI; private: void determineFrameLayout(MachineFunction &MF) const; void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + const DebugLoc &DL, Register DestReg, Register SrcReg, int64_t Val, MachineInstr::MIFlag Flag) const; }; } diff --git a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index d0a3af375a6d..1a12d9177d2a 100644 --- a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -68,7 +68,7 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm, RISCVMatInt::InstSeq Seq; RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64, Seq); - SDNode *Result; + SDNode *Result = nullptr; SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); for (RISCVMatInt::Inst &Inst : Seq) { SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); @@ -179,6 +179,9 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( // operand and need no special handling. OutOps.push_back(Op); return false; + case InlineAsm::Constraint_A: + OutOps.push_back(Op); + return false; default: break; } diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index ce7b85911ab6..dc829fce9013 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -100,6 +100,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (Subtarget.is64Bit()) { + setOperationAction(ISD::ADD, MVT::i32, Custom); + setOperationAction(ISD::SUB, MVT::i32, Custom); setOperationAction(ISD::SHL, MVT::i32, Custom); setOperationAction(ISD::SRA, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i32, Custom); @@ -116,6 +118,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { + setOperationAction(ISD::MUL, MVT::i32, Custom); setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Custom); @@ -194,8 +197,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setBooleanContents(ZeroOrOneBooleanContent); - // Function alignments (log2). - unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; + // Function alignments. + const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); setMinFunctionAlignment(FunctionAlignment); setPrefFunctionAlignment(FunctionAlignment); @@ -231,7 +234,7 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 4; + Info.align = Align(4); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; @@ -660,7 +663,7 @@ SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); - unsigned FrameReg = RI.getFrameRegister(MF); + Register FrameReg = RI.getFrameRegister(MF); int XLenInBytes = Subtarget.getXLen() / 8; EVT VT = Op.getValueType(); @@ -703,7 +706,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, // Return the value of the return address register, marking it an implicit // live-in. - unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); + Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); } @@ -834,6 +837,18 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); } +// Converts the given 32-bit operation to a i64 operation with signed extension +// semantic to reduce the signed extension instructions. +static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); + SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, + DAG.getValueType(MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); +} + void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { @@ -854,6 +869,15 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(RCW.getValue(2)); break; } + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + if (N->getOperand(1).getOpcode() == ISD::Constant) + return; + Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); + break; case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -1007,12 +1031,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( // We can materialise `c1 << c2` into an add immediate, so it's "free", // and the combine should happen, to potentially allow further combines // later. - if (isLegalAddImmediate(ShiftedC1Int.getSExtValue())) + if (ShiftedC1Int.getMinSignedBits() <= 64 && + isLegalAddImmediate(ShiftedC1Int.getSExtValue())) return true; // We can materialise `c1` in an add immediate, so it's "free", and the // combine should be prevented. - if (isLegalAddImmediate(C1Int.getSExtValue())) + if (C1Int.getMinSignedBits() <= 64 && + isLegalAddImmediate(C1Int.getSExtValue())) return false; // Neither constant will fit into an immediate, so find materialisation @@ -1052,8 +1078,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( return 1; } -MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, - MachineBasicBlock *BB) { +static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, + MachineBasicBlock *BB) { assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. @@ -1085,9 +1111,9 @@ MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, BB->addSuccessor(LoopMBB); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); - unsigned LoReg = MI.getOperand(0).getReg(); - unsigned HiReg = MI.getOperand(1).getReg(); + Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + Register LoReg = MI.getOperand(0).getReg(); + Register HiReg = MI.getOperand(1).getReg(); DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); @@ -1122,9 +1148,9 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); - unsigned LoReg = MI.getOperand(0).getReg(); - unsigned HiReg = MI.getOperand(1).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); + Register LoReg = MI.getOperand(0).getReg(); + Register HiReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; int FI = MF.getInfo()->getMoveF64FrameIndex(); @@ -1154,9 +1180,9 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned LoReg = MI.getOperand(1).getReg(); - unsigned HiReg = MI.getOperand(2).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register LoReg = MI.getOperand(1).getReg(); + Register HiReg = MI.getOperand(2).getReg(); const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; int FI = MF.getInfo()->getMoveF64FrameIndex(); @@ -1215,12 +1241,12 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, // previous selects in the sequence. // These conditions could be further relaxed. See the X86 target for a // related approach and more information. - unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); auto CC = static_cast(MI.getOperand(3).getImm()); SmallVector SelectDebugValues; - SmallSet SelectDests; + SmallSet SelectDests; SelectDests.insert(MI.getOperand(0).getReg()); MachineInstr *LastSelectPseudo = &MI; @@ -1363,12 +1389,12 @@ static const MCPhysReg ArgGPRs[] = { RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 }; static const MCPhysReg ArgFPR32s[] = { - RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32, - RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32 + RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, + RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F }; static const MCPhysReg ArgFPR64s[] = { - RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64, - RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64 + RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, + RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D }; // Pass a 2*XLEN argument that has been split into two XLEN values through @@ -1378,7 +1404,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2) { unsigned XLenInBytes = XLen / 8; - if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, VA1.getLocVT(), CCValAssign::Full)); @@ -1395,7 +1421,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, return false; } - if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + if (Register Reg = State.AllocateReg(ArgGPRs)) { // The second half can also be passed via register. State.addLoc( CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); @@ -1495,7 +1521,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, // GPRs, split between a GPR and the stack, or passed completely on the // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these // cases. - unsigned Reg = State.AllocateReg(ArgGPRs); + Register Reg = State.AllocateReg(ArgGPRs); LocVT = MVT::i32; if (!Reg) { unsigned StackOffset = State.AllocateStack(8, 8); @@ -1537,7 +1563,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, } // Allocate to a register if possible, or else a stack slot. - unsigned Reg; + Register Reg; if (ValVT == MVT::f32 && !UseGPRForF32) Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s); else if (ValVT == MVT::f64 && !UseGPRForF64) @@ -1673,7 +1699,7 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, break; } - unsigned VReg = RegInfo.createVirtualRegister(RC); + Register VReg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(VA.getLocReg(), VReg); Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); @@ -1751,7 +1777,7 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, assert(VA.isRegLoc() && "Expected register VA assignment"); - unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg(), LoVReg); SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); SDValue Hi; @@ -1763,13 +1789,70 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, MachinePointerInfo::getFixedStack(MF, FI)); } else { // Second half of f64 is passed in another GPR. - unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); } return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); } +// FastCC has less than 1% performance improvement for some particular +// benchmark. But theoretically, it may has benenfit for some cases. +static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (LocVT == MVT::i32 || LocVT == MVT::i64) { + // X5 and X6 might be used for save-restore libcall. + static const MCPhysReg GPRList[] = { + RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, + RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, + RISCV::X29, RISCV::X30, RISCV::X31}; + if (unsigned Reg = State.AllocateReg(GPRList)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f32) { + static const MCPhysReg FPR32List[] = { + RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, + RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, + RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, + RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; + if (unsigned Reg = State.AllocateReg(FPR32List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f64) { + static const MCPhysReg FPR64List[] = { + RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, + RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, + RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, + RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; + if (unsigned Reg = State.AllocateReg(FPR64List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + unsigned Offset4 = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); + return false; + } + + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + unsigned Offset5 = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); + return false; + } + + return true; // CC didn't match. +} + // Transform physical registers into virtual registers. SDValue RISCVTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -1809,7 +1892,11 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); + + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); + else + analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1877,8 +1964,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures // offsets to even-numbered registered remain 2*XLEN-aligned. if (Idx % 2) { - FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, - true); + MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); VarArgsSaveSize += XLenInBytes; } @@ -1886,7 +1972,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // to the vararg save area. for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += XLenInBytes) { - const unsigned Reg = RegInfo.createVirtualRegister(RC); + const Register Reg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(ArgRegs[I], Reg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); @@ -1920,7 +2006,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization( auto &Callee = CLI.Callee; auto CalleeCC = CLI.CallConv; - auto IsVarArg = CLI.IsVarArg; auto &Outs = CLI.Outs; auto &Caller = MF.getFunction(); auto CallerCC = Caller.getCallingConv(); @@ -1937,10 +2022,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization( if (Caller.hasFnAttribute("interrupt")) return false; - // Do not tail call opt functions with varargs. - if (IsVarArg) - return false; - // Do not tail call opt if the stack is used to pass parameters. if (CCInfo.getNextStackOffset() != 0) return false; @@ -2015,7 +2096,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); + + if (CallConv == CallingConv::Fast) + ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); + else + analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); // Check if it's really possible to do a tail call. if (IsTailCall) @@ -2057,7 +2142,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); // Copy argument values to their designated locations. - SmallVector, 8> RegsToPass; + SmallVector, 8> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { @@ -2074,7 +2159,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue Lo = SplitF64.getValue(0); SDValue Hi = SplitF64.getValue(1); - unsigned RegLo = VA.getLocReg(); + Register RegLo = VA.getLocReg(); RegsToPass.push_back(std::make_pair(RegLo, Lo)); if (RegLo == RISCV::X17) { @@ -2087,7 +2172,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); } else { // Second half of f64 is passed in another GPR. - unsigned RegHigh = RegLo + 1; + assert(RegLo < RISCV::X31 && "Invalid register pair"); + Register RegHigh = RegLo + 1; RegsToPass.push_back(std::make_pair(RegHigh, Hi)); } continue; @@ -2302,8 +2388,9 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, DAG.getVTList(MVT::i32, MVT::i32), Val); SDValue Lo = SplitF64.getValue(0); SDValue Hi = SplitF64.getValue(1); - unsigned RegLo = VA.getLocReg(); - unsigned RegHi = RegLo + 1; + Register RegLo = VA.getLocReg(); + assert(RegLo < RISCV::X31 && "Invalid register pair"); + Register RegHi = RegLo + 1; Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); @@ -2397,6 +2484,27 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +RISCVTargetLowering::ConstraintType +RISCVTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'f': + return C_RegisterClass; + case 'I': + case 'J': + case 'K': + return C_Immediate; + case 'A': + return C_Memory; + } + } + return TargetLowering::getConstraintType(Constraint); +} + std::pair RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, @@ -2407,14 +2515,125 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, switch (Constraint[0]) { case 'r': return std::make_pair(0U, &RISCV::GPRRegClass); + case 'f': + if (Subtarget.hasStdExtF() && VT == MVT::f32) + return std::make_pair(0U, &RISCV::FPR32RegClass); + if (Subtarget.hasStdExtD() && VT == MVT::f64) + return std::make_pair(0U, &RISCV::FPR64RegClass); + break; default: break; } } + // Clang will correctly decode the usage of register name aliases into their + // official names. However, other frontends like `rustc` do not. This allows + // users of these frontends to use the ABI names for registers in LLVM-style + // register constraints. + Register XRegFromAlias = StringSwitch(Constraint.lower()) + .Case("{zero}", RISCV::X0) + .Case("{ra}", RISCV::X1) + .Case("{sp}", RISCV::X2) + .Case("{gp}", RISCV::X3) + .Case("{tp}", RISCV::X4) + .Case("{t0}", RISCV::X5) + .Case("{t1}", RISCV::X6) + .Case("{t2}", RISCV::X7) + .Cases("{s0}", "{fp}", RISCV::X8) + .Case("{s1}", RISCV::X9) + .Case("{a0}", RISCV::X10) + .Case("{a1}", RISCV::X11) + .Case("{a2}", RISCV::X12) + .Case("{a3}", RISCV::X13) + .Case("{a4}", RISCV::X14) + .Case("{a5}", RISCV::X15) + .Case("{a6}", RISCV::X16) + .Case("{a7}", RISCV::X17) + .Case("{s2}", RISCV::X18) + .Case("{s3}", RISCV::X19) + .Case("{s4}", RISCV::X20) + .Case("{s5}", RISCV::X21) + .Case("{s6}", RISCV::X22) + .Case("{s7}", RISCV::X23) + .Case("{s8}", RISCV::X24) + .Case("{s9}", RISCV::X25) + .Case("{s10}", RISCV::X26) + .Case("{s11}", RISCV::X27) + .Case("{t3}", RISCV::X28) + .Case("{t4}", RISCV::X29) + .Case("{t5}", RISCV::X30) + .Case("{t6}", RISCV::X31) + .Default(RISCV::NoRegister); + if (XRegFromAlias != RISCV::NoRegister) + return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); + + // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the + // TableGen record rather than the AsmName to choose registers for InlineAsm + // constraints, plus we want to match those names to the widest floating point + // register type available, manually select floating point registers here. + // + // The second case is the ABI name of the register, so that frontends can also + // use the ABI names in register constraint lists. + if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) { + std::pair FReg = + StringSwitch>(Constraint.lower()) + .Cases("{f0}", "{ft0}", {RISCV::F0_F, RISCV::F0_D}) + .Cases("{f1}", "{ft1}", {RISCV::F1_F, RISCV::F1_D}) + .Cases("{f2}", "{ft2}", {RISCV::F2_F, RISCV::F2_D}) + .Cases("{f3}", "{ft3}", {RISCV::F3_F, RISCV::F3_D}) + .Cases("{f4}", "{ft4}", {RISCV::F4_F, RISCV::F4_D}) + .Cases("{f5}", "{ft5}", {RISCV::F5_F, RISCV::F5_D}) + .Cases("{f6}", "{ft6}", {RISCV::F6_F, RISCV::F6_D}) + .Cases("{f7}", "{ft7}", {RISCV::F7_F, RISCV::F7_D}) + .Cases("{f8}", "{fs0}", {RISCV::F8_F, RISCV::F8_D}) + .Cases("{f9}", "{fs1}", {RISCV::F9_F, RISCV::F9_D}) + .Cases("{f10}", "{fa0}", {RISCV::F10_F, RISCV::F10_D}) + .Cases("{f11}", "{fa1}", {RISCV::F11_F, RISCV::F11_D}) + .Cases("{f12}", "{fa2}", {RISCV::F12_F, RISCV::F12_D}) + .Cases("{f13}", "{fa3}", {RISCV::F13_F, RISCV::F13_D}) + .Cases("{f14}", "{fa4}", {RISCV::F14_F, RISCV::F14_D}) + .Cases("{f15}", "{fa5}", {RISCV::F15_F, RISCV::F15_D}) + .Cases("{f16}", "{fa6}", {RISCV::F16_F, RISCV::F16_D}) + .Cases("{f17}", "{fa7}", {RISCV::F17_F, RISCV::F17_D}) + .Cases("{f18}", "{fs2}", {RISCV::F18_F, RISCV::F18_D}) + .Cases("{f19}", "{fs3}", {RISCV::F19_F, RISCV::F19_D}) + .Cases("{f20}", "{fs4}", {RISCV::F20_F, RISCV::F20_D}) + .Cases("{f21}", "{fs5}", {RISCV::F21_F, RISCV::F21_D}) + .Cases("{f22}", "{fs6}", {RISCV::F22_F, RISCV::F22_D}) + .Cases("{f23}", "{fs7}", {RISCV::F23_F, RISCV::F23_D}) + .Cases("{f24}", "{fs8}", {RISCV::F24_F, RISCV::F24_D}) + .Cases("{f25}", "{fs9}", {RISCV::F25_F, RISCV::F25_D}) + .Cases("{f26}", "{fs10}", {RISCV::F26_F, RISCV::F26_D}) + .Cases("{f27}", "{fs11}", {RISCV::F27_F, RISCV::F27_D}) + .Cases("{f28}", "{ft8}", {RISCV::F28_F, RISCV::F28_D}) + .Cases("{f29}", "{ft9}", {RISCV::F29_F, RISCV::F29_D}) + .Cases("{f30}", "{ft10}", {RISCV::F30_F, RISCV::F30_D}) + .Cases("{f31}", "{ft11}", {RISCV::F31_F, RISCV::F31_D}) + .Default({RISCV::NoRegister, RISCV::NoRegister}); + if (FReg.first != RISCV::NoRegister) + return Subtarget.hasStdExtD() + ? std::make_pair(FReg.second, &RISCV::FPR64RegClass) + : std::make_pair(FReg.first, &RISCV::FPR32RegClass); + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } +unsigned +RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { + // Currently only support length 1 constraints. + if (ConstraintCode.size() == 1) { + switch (ConstraintCode[0]) { + case 'A': + return InlineAsm::Constraint_A; + default: + break; + } + } + + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); +} + void RISCVTargetLowering::LowerAsmOperandForConstraint( SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { @@ -2619,3 +2838,13 @@ unsigned RISCVTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return RISCV::X11; } + +bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { + // Return false to suppress the unnecessary extensions if the LibCall + // arguments or return value is f32 type for LP64 ABI. + RISCVABI::ABI ABI = Subtarget.getTargetABI(); + if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) + return false; + + return true; +} diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h index 17db03bbb69e..18fc7350bbbf 100644 --- a/lib/Target/RISCV/RISCVISelLowering.h +++ b/lib/Target/RISCV/RISCVISelLowering.h @@ -92,6 +92,10 @@ public: // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; + + unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override; + std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; @@ -141,6 +145,8 @@ public: unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + bool shouldExtendTypeInLibCall(EVT Type) const override; + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp index 99c8d2ef73de..084839299530 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -14,6 +14,7 @@ #include "RISCV.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" +#include "Utils/RISCVMatInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -28,8 +29,9 @@ using namespace llvm; -RISCVInstrInfo::RISCVInstrInfo() - : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP) {} +RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) + : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), + STI(STI) {} unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { @@ -156,24 +158,43 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(Opcode), DstReg).addFrameIndex(FI).addImm(0); } -void RISCVInstrInfo::movImm32(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DstReg, uint64_t Val, - MachineInstr::MIFlag Flag) const { - assert(isInt<32>(Val) && "Can only materialize 32-bit constants"); - - // TODO: If the value can be materialized using only one instruction, only - // insert a single instruction. - - uint64_t Hi20 = ((Val + 0x800) >> 12) & 0xfffff; - uint64_t Lo12 = SignExtend64<12>(Val); - BuildMI(MBB, MBBI, DL, get(RISCV::LUI), DstReg) - .addImm(Hi20) - .setMIFlag(Flag); - BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) - .addReg(DstReg, RegState::Kill) - .addImm(Lo12) - .setMIFlag(Flag); +void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DstReg, uint64_t Val, + MachineInstr::MIFlag Flag) const { + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + bool IsRV64 = MF->getSubtarget().is64Bit(); + Register SrcReg = RISCV::X0; + Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass); + unsigned Num = 0; + + if (!IsRV64 && !isInt<32>(Val)) + report_fatal_error("Should only materialize 32-bit constants for RV32"); + + RISCVMatInt::InstSeq Seq; + RISCVMatInt::generateInstSeq(Val, IsRV64, Seq); + assert(Seq.size() > 0); + + for (RISCVMatInt::Inst &Inst : Seq) { + // Write the final result to DstReg if it's the last instruction in the Seq. + // Otherwise, write the result to the temp register. + if (++Num == Seq.size()) + Result = DstReg; + + if (Inst.Opc == RISCV::LUI) { + BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result) + .addImm(Inst.Imm) + .setMIFlag(Flag); + } else { + BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result) + .addReg(SrcReg, RegState::Kill) + .addImm(Inst.Imm) + .setMIFlag(Flag); + } + // Only the first instruction has X0 as its source. + SrcReg = Result; + } } // The contents of values added to Cond are not examined outside of @@ -372,7 +393,7 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, // FIXME: A virtual register must be used initially, as the register // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch // uses the same workaround). - unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); auto II = MBB.end(); MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg) @@ -466,3 +487,58 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { } return MI.isAsCheapAsAMove(); } + +bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + const MCInstrInfo *MCII = STI.getInstrInfo(); + MCInstrDesc const &Desc = MCII->get(MI.getOpcode()); + + for (auto &OI : enumerate(Desc.operands())) { + unsigned OpType = OI.value().OperandType; + if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && + OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) { + const MachineOperand &MO = MI.getOperand(OI.index()); + if (MO.isImm()) { + int64_t Imm = MO.getImm(); + bool Ok; + switch (OpType) { + default: + llvm_unreachable("Unexpected operand type"); + case RISCVOp::OPERAND_UIMM4: + Ok = isUInt<4>(Imm); + break; + case RISCVOp::OPERAND_UIMM5: + Ok = isUInt<5>(Imm); + break; + case RISCVOp::OPERAND_UIMM12: + Ok = isUInt<12>(Imm); + break; + case RISCVOp::OPERAND_SIMM12: + Ok = isInt<12>(Imm); + break; + case RISCVOp::OPERAND_SIMM13_LSB0: + Ok = isShiftedInt<12, 1>(Imm); + break; + case RISCVOp::OPERAND_UIMM20: + Ok = isUInt<20>(Imm); + break; + case RISCVOp::OPERAND_SIMM21_LSB0: + Ok = isShiftedInt<20, 1>(Imm); + break; + case RISCVOp::OPERAND_UIMMLOG2XLEN: + if (STI.getTargetTriple().isArch64Bit()) + Ok = isUInt<6>(Imm); + else + Ok = isUInt<5>(Imm); + break; + } + if (!Ok) { + ErrInfo = "Invalid immediate"; + return false; + } + } + } + } + + return true; +} diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h index ff098e660d19..d3ae04aefe04 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.h +++ b/lib/Target/RISCV/RISCVInstrInfo.h @@ -21,10 +21,12 @@ namespace llvm { +class RISCVSubtarget; + class RISCVInstrInfo : public RISCVGenInstrInfo { public: - RISCVInstrInfo(); + explicit RISCVInstrInfo(RISCVSubtarget &STI); unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; @@ -46,10 +48,10 @@ public: int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - // Materializes the given int32 Val into DstReg. - void movImm32(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DstReg, uint64_t Val, - MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; + // Materializes the given integer Val into DstReg. + void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DstReg, uint64_t Val, + MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; unsigned getInstSizeInBytes(const MachineInstr &MI) const override; @@ -80,6 +82,12 @@ public: int64_t BrOffset) const override; bool isAsCheapAsAMove(const MachineInstr &MI) const override; + + bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const override; + +protected: + const RISCVSubtarget &STI; }; } #endif diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td index 69bde15f1218..db2ecc49d14e 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.td +++ b/lib/Target/RISCV/RISCVInstrInfo.td @@ -69,6 +69,12 @@ class ImmAsmOperand : AsmOperandClass { let DiagnosticType = !strconcat("Invalid", Name); } +def ImmZeroAsmOperand : AsmOperandClass { + let Name = "ImmZero"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = !strconcat("Invalid", Name); +} + class SImmAsmOperand : ImmAsmOperand<"S", width, suffix> { } @@ -87,6 +93,8 @@ def fencearg : Operand { let ParserMatchClass = FenceArg; let PrintMethod = "printFenceArg"; let DecoderMethod = "decodeUImmOperand<4>"; + let OperandType = "OPERAND_UIMM4"; + let OperandNamespace = "RISCVOp"; } def UImmLog2XLenAsmOperand : AsmOperandClass { @@ -111,11 +119,15 @@ def uimmlog2xlen : Operand, ImmLeaf(Imm); return isUInt<5>(Imm); }]; + let OperandType = "OPERAND_UIMMLOG2XLEN"; + let OperandNamespace = "RISCVOp"; } def uimm5 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5>; let DecoderMethod = "decodeUImmOperand<5>"; + let OperandType = "OPERAND_UIMM5"; + let OperandNamespace = "RISCVOp"; } def simm12 : Operand, ImmLeaf(Imm);}]> { @@ -128,6 +140,8 @@ def simm12 : Operand, ImmLeaf(Imm);}]> { return isInt<12>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_SIMM12"; + let OperandNamespace = "RISCVOp"; } // A 13-bit signed immediate where the least significant bit is zero. @@ -141,6 +155,8 @@ def simm13_lsb0 : Operand { return isShiftedInt<12, 1>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_SIMM13_LSB0"; + let OperandNamespace = "RISCVOp"; } class UImm20Operand : Operand { @@ -152,6 +168,8 @@ class UImm20Operand : Operand { return isUInt<20>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_UIMM20"; + let OperandNamespace = "RISCVOp"; } def uimm20_lui : UImm20Operand { @@ -176,6 +194,8 @@ def simm21_lsb0_jal : Operand { return isShiftedInt<20, 1>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_SIMM21_LSB0"; + let OperandNamespace = "RISCVOp"; } def BareSymbol : AsmOperandClass { @@ -224,6 +244,8 @@ def csr_sysreg : Operand { let ParserMatchClass = CSRSystemRegister; let PrintMethod = "printCSRSystemRegister"; let DecoderMethod = "decodeUImmOperand<12>"; + let OperandType = "OPERAND_UIMM12"; + let OperandNamespace = "RISCVOp"; } // A parameterized register class alternative to i32imm/i64imm from Target.td. diff --git a/lib/Target/RISCV/RISCVInstrInfoA.td b/lib/Target/RISCV/RISCVInstrInfoA.td index b768c9347b38..38ba3f9fb24e 100644 --- a/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/lib/Target/RISCV/RISCVInstrInfoA.td @@ -11,6 +11,24 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +// A parse method for (${gpr}) or 0(${gpr}), where the 0 is be silently ignored. +// Used for GNU as Compatibility. +def AtomicMemOpOperand : AsmOperandClass { + let Name = "AtomicMemOpOperand"; + let RenderMethod = "addRegOperands"; + let PredicateMethod = "isReg"; + let ParserMethod = "parseAtomicMemOp"; +} + +def GPRMemAtomic : RegisterOperand { + let ParserMatchClass = AtomicMemOpOperand; + let PrintMethod = "printAtomicMemOp"; +} + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -18,8 +36,8 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in class LR_r funct3, string opcodestr> : RVInstRAtomic<0b00010, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPR:$rs1), - opcodestr, "$rd, (${rs1})"> { + (outs GPR:$rd), (ins GPRMemAtomic:$rs1), + opcodestr, "$rd, $rs1"> { let rs2 = 0; } @@ -33,8 +51,8 @@ multiclass LR_r_aq_rl funct3, string opcodestr> { let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in class AMO_rr funct5, bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic; + (outs GPR:$rd), (ins GPRMemAtomic:$rs1, GPR:$rs2), + opcodestr, "$rd, $rs2, $rs1">; multiclass AMO_rr_aq_rl funct5, bits<3> funct3, string opcodestr> { def "" : AMO_rr; @@ -196,12 +214,12 @@ class PseudoMaskedAMOUMinUMax } class PseudoMaskedAMOPat - : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering), + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>; class PseudoMaskedAMOMinMaxPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, - imm:$ordering), + timm:$ordering), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, imm:$ordering)>; @@ -270,7 +288,7 @@ def PseudoMaskedCmpXchg32 } def : Pat<(int_riscv_masked_cmpxchg_i32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering), + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; @@ -347,7 +365,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>; def : Pat<(int_riscv_masked_cmpxchg_i64 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering), + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; } // Predicates = [HasStdExtA, IsRV64] diff --git a/lib/Target/RISCV/RISCVInstrInfoC.td b/lib/Target/RISCV/RISCVInstrInfoC.td index 94477341eea7..fa0050f107b2 100644 --- a/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/lib/Target/RISCV/RISCVInstrInfoC.td @@ -61,6 +61,11 @@ def simm6nonzero : Operand, }]; } +def immzero : Operand, + ImmLeaf { + let ParserMatchClass = ImmZeroAsmOperand; +} + def CLUIImmAsmOperand : AsmOperandClass { let Name = "CLUIImm"; let RenderMethod = "addImmOperands"; @@ -132,7 +137,8 @@ def uimm8_lsb000 : Operand, } // A 9-bit signed immediate where the least significant bit is zero. -def simm9_lsb0 : Operand { +def simm9_lsb0 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; @@ -191,7 +197,8 @@ def simm10_lsb0000nonzero : Operand, } // A 12-bit signed immediate where the least significant bit is zero. -def simm12_lsb0 : Operand { +def simm12_lsb0 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; @@ -344,7 +351,10 @@ def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> { } let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">; +def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", ""> +{ + let Inst{6-2} = 0; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), @@ -354,6 +364,15 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), let Inst{6-2} = imm{4-0}; } +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_ADDI_NOP : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), + (ins GPRX0:$rd, immzero:$imm), + "c.addi", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let isAsmParserOnly = 1; +} + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, DecoderNamespace = "RISCV32Only_", Defs = [X1], Predicates = [HasStdExtC, IsRV32] in @@ -522,6 +541,105 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> { } // Predicates = [HasStdExtC] +//===----------------------------------------------------------------------===// +// HINT Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0, + mayStore = 0 in +{ + +let rd = 0 in +def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), + "c.nop", "$imm"> { + let Inst{6-2} = imm{4-0}; + let DecoderMethod = "decodeRVCInstrSImm"; +} + +// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6. +def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), + (ins GPRX0:$rd, simm6nonzero:$imm), + "c.addi", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = imm{4-0}; + let isAsmParserOnly = 1; +} + +def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, immzero:$imm), + "c.addi", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let isAsmParserOnly = 1; +} + +def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm), + "c.li", "$rd, $imm"> { + let Inst{6-2} = imm{4-0}; + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdSImm"; +} + +def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd), + (ins c_lui_imm:$imm), + "c.lui", "$rd, $imm"> { + let Inst{6-2} = imm{4-0}; + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdSImm"; +} + +def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2), + "c.mv", "$rs1, $rs2"> +{ + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdRs2"; +} + +def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb), + (ins GPRX0:$rs1, GPRNoX0:$rs2), + "c.add", "$rs1, $rs2"> { + let Constraints = "$rs1 = $rs1_wb"; + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdRs1Rs2"; +} + +def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb), + (ins GPRX0:$rd, uimmlog2xlennonzero:$imm), + "c.slli" ,"$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = imm{4-0}; + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdRs1UImm"; +} + +def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd), + "c.slli64" ,"$rd"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let Inst{12} = 0; +} + +def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), + (ins GPRC:$rd), + "c.srli64", "$rd"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let Inst{11-10} = 0; + let Inst{12} = 0; +} + +def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), + (ins GPRC:$rd), + "c.srai64", "$rd"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let Inst{11-10} = 1; + let Inst{12} = 0; +} + +} // Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0, + // mayStore = 0 + //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/RISCV/RISCVInstrInfoF.td b/lib/Target/RISCV/RISCVInstrInfoF.td index 032642942f2b..3b73c865ea17 100644 --- a/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/lib/Target/RISCV/RISCVInstrInfoF.td @@ -227,6 +227,12 @@ def : InstAlias<"frcsr $rd", (CSRRS GPR:$rd, FCSR.Encoding, X0), 2>; def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, FCSR.Encoding, GPR:$rs)>; def : InstAlias<"fscsr $rs", (CSRRW X0, FCSR.Encoding, GPR:$rs), 2>; +// frsr, fssr are obsolete aliases replaced by frcsr, fscsr, so give them +// zero weight. +def : InstAlias<"frsr $rd", (CSRRS GPR:$rd, FCSR.Encoding, X0), 0>; +def : InstAlias<"fssr $rd, $rs", (CSRRW GPR:$rd, FCSR.Encoding, GPR:$rs), 0>; +def : InstAlias<"fssr $rs", (CSRRW X0, FCSR.Encoding, GPR:$rs), 0>; + def : InstAlias<"frrm $rd", (CSRRS GPR:$rd, FRM.Encoding, X0), 2>; def : InstAlias<"fsrm $rd, $rs", (CSRRW GPR:$rd, FRM.Encoding, GPR:$rs)>; def : InstAlias<"fsrm $rs", (CSRRW X0, FRM.Encoding, GPR:$rs), 2>; diff --git a/lib/Target/RISCV/RISCVInstructionSelector.cpp b/lib/Target/RISCV/RISCVInstructionSelector.cpp new file mode 100644 index 000000000000..5bd09a546114 --- /dev/null +++ b/lib/Target/RISCV/RISCVInstructionSelector.cpp @@ -0,0 +1,103 @@ +//===-- RISCVInstructionSelector.cpp -----------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the InstructionSelector class for +/// RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "RISCVRegisterBankInfo.h" +#include "RISCVSubtarget.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "riscv-isel" + +using namespace llvm; + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +namespace { + +class RISCVInstructionSelector : public InstructionSelector { +public: + RISCVInstructionSelector(const RISCVTargetMachine &TM, + const RISCVSubtarget &STI, + const RISCVRegisterBankInfo &RBI); + + bool select(MachineInstr &I) override; + static const char *getName() { return DEBUG_TYPE; } + +private: + bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + + const RISCVSubtarget &STI; + const RISCVInstrInfo &TII; + const RISCVRegisterInfo &TRI; + const RISCVRegisterBankInfo &RBI; + + // FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel + // uses "STI." in the code generated by TableGen. We need to unify the name of + // Subtarget variable. + const RISCVSubtarget *Subtarget = &STI; + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL +}; + +} // end anonymous namespace + +#define GET_GLOBALISEL_IMPL +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +RISCVInstructionSelector::RISCVInstructionSelector( + const RISCVTargetMachine &TM, const RISCVSubtarget &STI, + const RISCVRegisterBankInfo &RBI) + : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), + +#define GET_GLOBALISEL_PREDICATES_INIT +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} + +bool RISCVInstructionSelector::select(MachineInstr &I) { + + if (!isPreISelGenericOpcode(I.getOpcode())) { + // Certain non-generic instructions also need some special handling. + return true; + } + + if (selectImpl(I, *CoverageInfo)) + return true; + + return false; +} + +namespace llvm { +InstructionSelector * +createRISCVInstructionSelector(const RISCVTargetMachine &TM, + RISCVSubtarget &Subtarget, + RISCVRegisterBankInfo &RBI) { + return new RISCVInstructionSelector(TM, Subtarget, RBI); +} +} // end namespace llvm diff --git a/lib/Target/RISCV/RISCVLegalizerInfo.cpp b/lib/Target/RISCV/RISCVLegalizerInfo.cpp new file mode 100644 index 000000000000..c92f4a3ee17b --- /dev/null +++ b/lib/Target/RISCV/RISCVLegalizerInfo.cpp @@ -0,0 +1,23 @@ +//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the Machinelegalizer class for RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "RISCVLegalizerInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) { + computeTables(); +} diff --git a/lib/Target/RISCV/RISCVLegalizerInfo.h b/lib/Target/RISCV/RISCVLegalizerInfo.h new file mode 100644 index 000000000000..f2c2b9a3fd46 --- /dev/null +++ b/lib/Target/RISCV/RISCVLegalizerInfo.h @@ -0,0 +1,28 @@ +//===-- RISCVLegalizerInfo.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the Machinelegalizer class for RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINELEGALIZER_H +#define LLVM_LIB_TARGET_RISCV_RISCVMACHINELEGALIZER_H + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +namespace llvm { + +class RISCVSubtarget; + +/// This class provides the information for the target register banks. +class RISCVLegalizerInfo : public LegalizerInfo { +public: + RISCVLegalizerInfo(const RISCVSubtarget &ST); +}; +} // end namespace llvm +#endif diff --git a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 82b1209cb8e7..4c9013aa1e23 100644 --- a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -45,7 +45,7 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI); void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail, int64_t Offset); - bool matchLargeOffset(MachineInstr &TailAdd, unsigned GSReg, int64_t &Offset); + bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset); RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} MachineFunctionProperties getRequiredProperties() const override { @@ -85,7 +85,7 @@ bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI, HiLUI.getOperand(1).getOffset() != 0 || !MRI->hasOneUse(HiLUI.getOperand(0).getReg())) return false; - unsigned HiLuiDestReg = HiLUI.getOperand(0).getReg(); + Register HiLuiDestReg = HiLUI.getOperand(0).getReg(); LoADDI = MRI->use_begin(HiLuiDestReg)->getParent(); if (LoADDI->getOpcode() != RISCV::ADDI || LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO || @@ -132,12 +132,12 @@ void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI, // \ / // TailAdd: add vreg4, vreg2, voff bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, - unsigned GAReg, + Register GAReg, int64_t &Offset) { assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!"); - unsigned Rs = TailAdd.getOperand(1).getReg(); - unsigned Rt = TailAdd.getOperand(2).getReg(); - unsigned Reg = Rs == GAReg ? Rt : Rs; + Register Rs = TailAdd.getOperand(1).getReg(); + Register Rt = TailAdd.getOperand(2).getReg(); + Register Reg = Rs == GAReg ? Rt : Rs; // Can't fold if the register has more than one use. if (!MRI->hasOneUse(Reg)) @@ -178,7 +178,7 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI) { - unsigned DestReg = LoADDI.getOperand(0).getReg(); + Register DestReg = LoADDI.getOperand(0).getReg(); assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI"); // LoADDI has only one use. MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent(); @@ -232,7 +232,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, return false; // Register defined by LoADDI should be used in the base part of the // load\store instruction. Otherwise, no folding possible. - unsigned BaseAddrReg = Tail.getOperand(1).getReg(); + Register BaseAddrReg = Tail.getOperand(1).getReg(); if (DestReg != BaseAddrReg) return false; MachineOperand &TailImmOp = Tail.getOperand(2); diff --git a/lib/Target/RISCV/RISCVRegisterBankInfo.cpp b/lib/Target/RISCV/RISCVRegisterBankInfo.cpp new file mode 100644 index 000000000000..bd3b95a98b9f --- /dev/null +++ b/lib/Target/RISCV/RISCVRegisterBankInfo.cpp @@ -0,0 +1,26 @@ +//===-- RISCVRegisterBankInfo.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the RegisterBankInfo class for RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "RISCVRegisterBankInfo.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_TARGET_REGBANK_IMPL +#include "RISCVGenRegisterBank.inc" + +using namespace llvm; + +RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) + : RISCVGenRegisterBankInfo() {} diff --git a/lib/Target/RISCV/RISCVRegisterBankInfo.h b/lib/Target/RISCV/RISCVRegisterBankInfo.h new file mode 100644 index 000000000000..05fac992734d --- /dev/null +++ b/lib/Target/RISCV/RISCVRegisterBankInfo.h @@ -0,0 +1,37 @@ +//===-- RISCVRegisterBankInfo.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the RegisterBankInfo class for RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H +#define LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H + +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" + +#define GET_REGBANK_DECLARATIONS +#include "RISCVGenRegisterBank.inc" + +namespace llvm { + +class TargetRegisterInfo; + +class RISCVGenRegisterBankInfo : public RegisterBankInfo { +protected: +#define GET_TARGET_REGBANK_CLASS +#include "RISCVGenRegisterBank.inc" +}; + +/// This class provides the information for the target register banks. +class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo { +public: + RISCVRegisterBankInfo(const TargetRegisterInfo &TRI); +}; +} // end namespace llvm +#endif diff --git a/lib/Target/RISCV/RISCVRegisterBanks.td b/lib/Target/RISCV/RISCVRegisterBanks.td new file mode 100644 index 000000000000..400b65a1bf9a --- /dev/null +++ b/lib/Target/RISCV/RISCVRegisterBanks.td @@ -0,0 +1,13 @@ +//=-- RISCVRegisterBank.td - Describe the RISCV Banks --------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +/// General Purpose Registers: X. +def GPRRegBank : RegisterBank<"GPRB", [GPR]>; diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp index e6a126e3e513..66557687c0b6 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -26,6 +26,15 @@ using namespace llvm; +static_assert(RISCV::X1 == RISCV::X0 + 1, "Register list not consecutive"); +static_assert(RISCV::X31 == RISCV::X0 + 31, "Register list not consecutive"); +static_assert(RISCV::F1_F == RISCV::F0_F + 1, "Register list not consecutive"); +static_assert(RISCV::F31_F == RISCV::F0_F + 31, + "Register list not consecutive"); +static_assert(RISCV::F1_D == RISCV::F0_D + 1, "Register list not consecutive"); +static_assert(RISCV::F31_D == RISCV::F0_D + 31, + "Register list not consecutive"); + RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode) : RISCVGenRegisterInfo(RISCV::X1, /*DwarfFlavour*/0, /*EHFlavor*/0, /*PC*/0, HwMode) {} @@ -109,8 +118,8 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(isInt<32>(Offset) && "Int32 expected"); // The offset won't fit in an immediate, so use a scratch register instead // Modify Offset and FrameReg appropriately - unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->movImm32(MBB, II, DL, ScratchReg, Offset); + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, II, DL, ScratchReg, Offset); BuildMI(MBB, II, DL, TII->get(RISCV::ADD), ScratchReg) .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h index 4f339475508f..56a50fe6ddc0 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/lib/Target/RISCV/RISCVRegisterInfo.h @@ -52,6 +52,12 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { bool trackLivenessAfterRegAlloc(const MachineFunction &) const override { return true; } + + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override { + return &RISCV::GPRRegClass; + } }; } diff --git a/lib/Target/RISCV/RISCVRegisterInfo.td b/lib/Target/RISCV/RISCVRegisterInfo.td index 79f8ab12f6c0..82b37afd0805 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/lib/Target/RISCV/RISCVRegisterInfo.td @@ -101,6 +101,12 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; } +def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> { + let RegInfos = RegInfoByHwMode< + [RV32, RV64, DefaultMode], + [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; +} + // The order of registers represents the preferred allocation sequence. // Registers are listed in the order caller-save, callee-save, specials. def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (add @@ -159,41 +165,41 @@ def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> { // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { - def F0_32 : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; - def F1_32 : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; - def F2_32 : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; - def F3_32 : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; - def F4_32 : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; - def F5_32 : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; - def F6_32 : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; - def F7_32 : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; - def F8_32 : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; - def F9_32 : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; - def F10_32 : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; - def F11_32 : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; - def F12_32 : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; - def F13_32 : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; - def F14_32 : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; - def F15_32 : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; - def F16_32 : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; - def F17_32 : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; - def F18_32 : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; - def F19_32 : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; - def F20_32 : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; - def F21_32 : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; - def F22_32 : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; - def F23_32 : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; - def F24_32 : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; - def F25_32 : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; - def F26_32 : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; - def F27_32 : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; - def F28_32 : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; - def F29_32 : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; - def F30_32 : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; - def F31_32 : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; + def F0_F : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; + def F1_F : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; + def F2_F : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; + def F3_F : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; + def F4_F : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; + def F5_F : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; + def F6_F : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; + def F7_F : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; + def F8_F : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; + def F9_F : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; + def F10_F : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; + def F11_F : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; + def F12_F : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; + def F13_F : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; + def F14_F : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; + def F15_F : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; + def F16_F : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; + def F17_F : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; + def F18_F : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; + def F19_F : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; + def F20_F : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; + def F21_F : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; + def F22_F : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; + def F23_F : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; + def F24_F : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; + def F25_F : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; + def F26_F : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; + def F27_F : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; + def F28_F : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; + def F29_F : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; + def F30_F : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; + def F31_F : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; foreach Index = 0-31 in { - def F#Index#_64 : RISCVReg64("F"#Index#"_32")>, + def F#Index#_D : RISCVReg64("F"#Index#"_F")>, DwarfRegNum<[!add(Index, 32)]>; } } @@ -201,29 +207,29 @@ let RegAltNameIndices = [ABIRegAltName] in { // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. def FPR32 : RegisterClass<"RISCV", [f32], 32, (add - (sequence "F%u_32", 0, 7), - (sequence "F%u_32", 10, 17), - (sequence "F%u_32", 28, 31), - (sequence "F%u_32", 8, 9), - (sequence "F%u_32", 18, 27) + (sequence "F%u_F", 0, 7), + (sequence "F%u_F", 10, 17), + (sequence "F%u_F", 28, 31), + (sequence "F%u_F", 8, 9), + (sequence "F%u_F", 18, 27) )>; def FPR32C : RegisterClass<"RISCV", [f32], 32, (add - (sequence "F%u_32", 10, 15), - (sequence "F%u_32", 8, 9) + (sequence "F%u_F", 10, 15), + (sequence "F%u_F", 8, 9) )>; // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. def FPR64 : RegisterClass<"RISCV", [f64], 64, (add - (sequence "F%u_64", 0, 7), - (sequence "F%u_64", 10, 17), - (sequence "F%u_64", 28, 31), - (sequence "F%u_64", 8, 9), - (sequence "F%u_64", 18, 27) + (sequence "F%u_D", 0, 7), + (sequence "F%u_D", 10, 17), + (sequence "F%u_D", 28, 31), + (sequence "F%u_D", 8, 9), + (sequence "F%u_D", 18, 27) )>; def FPR64C : RegisterClass<"RISCV", [f64], 64, (add - (sequence "F%u_64", 10, 15), - (sequence "F%u_64", 8, 9) + (sequence "F%u_D", 10, 15), + (sequence "F%u_D", 8, 9) )>; diff --git a/lib/Target/RISCV/RISCVSubtarget.cpp b/lib/Target/RISCV/RISCVSubtarget.cpp index 6902ed75d852..f114c6ac1925 100644 --- a/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/lib/Target/RISCV/RISCVSubtarget.cpp @@ -12,7 +12,11 @@ #include "RISCVSubtarget.h" #include "RISCV.h" +#include "RISCVCallLowering.h" #include "RISCVFrameLowering.h" +#include "RISCVLegalizerInfo.h" +#include "RISCVRegisterBankInfo.h" +#include "RISCVTargetMachine.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -47,4 +51,28 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS, StringRef ABIName, const TargetMachine &TM) : RISCVGenSubtargetInfo(TT, CPU, FS), FrameLowering(initializeSubtargetDependencies(TT, CPU, FS, ABIName)), - InstrInfo(), RegInfo(getHwMode()), TLInfo(TM, *this) {} + InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) { + CallLoweringInfo.reset(new RISCVCallLowering(*getTargetLowering())); + Legalizer.reset(new RISCVLegalizerInfo(*this)); + + auto *RBI = new RISCVRegisterBankInfo(*getRegisterInfo()); + RegBankInfo.reset(RBI); + InstSelector.reset(createRISCVInstructionSelector( + *static_cast(&TM), *this, *RBI)); +} + +const CallLowering *RISCVSubtarget::getCallLowering() const { + return CallLoweringInfo.get(); +} + +InstructionSelector *RISCVSubtarget::getInstructionSelector() const { + return InstSelector.get(); +} + +const LegalizerInfo *RISCVSubtarget::getLegalizerInfo() const { + return Legalizer.get(); +} + +const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const { + return RegBankInfo.get(); +} diff --git a/lib/Target/RISCV/RISCVSubtarget.h b/lib/Target/RISCV/RISCVSubtarget.h index 106ff49f021a..7d0373a5253a 100644 --- a/lib/Target/RISCV/RISCVSubtarget.h +++ b/lib/Target/RISCV/RISCVSubtarget.h @@ -17,6 +17,10 @@ #include "RISCVISelLowering.h" #include "RISCVInstrInfo.h" #include "Utils/RISCVBaseInfo.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -38,6 +42,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; + bool EnableRVCHintInstrs = false; unsigned XLen = 32; MVT XLenVT = MVT::i32; RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; @@ -75,6 +80,7 @@ public: const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { return &TSInfo; } + bool enableMachineScheduler() const override { return true; } bool hasStdExtM() const { return HasStdExtM; } bool hasStdExtA() const { return HasStdExtA; } bool hasStdExtF() const { return HasStdExtF; } @@ -83,9 +89,23 @@ public: bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } + bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } RISCVABI::ABI getTargetABI() const { return TargetABI; } + +protected: + // GlobalISel related APIs. + std::unique_ptr CallLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + +public: + const CallLowering *getCallLowering() const override; + InstructionSelector *getInstructionSelector() const override; + const LegalizerInfo *getLegalizerInfo() const override; + const RegisterBankInfo *getRegBankInfo() const override; }; } // End llvm namespace diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp index f4e6ed9f6284..5ffc6eda6bd7 100644 --- a/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -17,6 +17,10 @@ #include "TargetInfo/RISCVTargetInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -30,6 +34,7 @@ extern "C" void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); auto PR = PassRegistry::getPassRegistry(); + initializeGlobalISel(*PR); initializeRISCVExpandPseudoPass(*PR); } @@ -58,7 +63,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, Options.MCOptions.getABIName(), *this) { initAsmInfo(); } @@ -80,6 +85,10 @@ public: void addIRPasses() override; bool addInstSelector() override; + bool addIRTranslator() override; + bool addLegalizeMachineIR() override; + bool addRegBankSelect() override; + bool addGlobalInstructionSelect() override; void addPreEmitPass() override; void addPreEmitPass2() override; void addPreRegAlloc() override; @@ -101,6 +110,26 @@ bool RISCVPassConfig::addInstSelector() { return false; } +bool RISCVPassConfig::addIRTranslator() { + addPass(new IRTranslator()); + return false; +} + +bool RISCVPassConfig::addLegalizeMachineIR() { + addPass(new Legalizer()); + return false; +} + +bool RISCVPassConfig::addRegBankSelect() { + addPass(new RegBankSelect()); + return false; +} + +bool RISCVPassConfig::addGlobalInstructionSelect() { + addPass(new InstructionSelect()); + return false; +} + void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } void RISCVPassConfig::addPreEmitPass2() { diff --git a/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/lib/Target/RISCV/Utils/RISCVBaseInfo.h index c33c72f24319..30e475e80a01 100644 --- a/lib/Target/RISCV/Utils/RISCVBaseInfo.h +++ b/lib/Target/RISCV/Utils/RISCVBaseInfo.h @@ -16,6 +16,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/SubtargetFeature.h" namespace llvm { @@ -63,6 +64,21 @@ enum { }; } // namespace RISCVII +namespace RISCVOp { +enum OperandType : unsigned { + OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET, + OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM, + OPERAND_UIMM5, + OPERAND_UIMM12, + OPERAND_SIMM12, + OPERAND_SIMM13_LSB0, + OPERAND_UIMM20, + OPERAND_SIMM21_LSB0, + OPERAND_UIMMLOG2XLEN, + OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN +}; +} // namespace RISCVOp + // Describes the predecessor/successor bits used in the FENCE instruction. namespace RISCVFenceField { enum FenceField { diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 15453ae59a4f..f6be9dd01249 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -376,7 +376,7 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - auto Op = make_unique(k_Token); + auto Op = std::make_unique(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -386,7 +386,7 @@ public: static std::unique_ptr CreateReg(unsigned RegNum, unsigned Kind, SMLoc S, SMLoc E) { - auto Op = make_unique(k_Register); + auto Op = std::make_unique(k_Register); Op->Reg.RegNum = RegNum; Op->Reg.Kind = (SparcOperand::RegisterKind)Kind; Op->StartLoc = S; @@ -396,7 +396,7 @@ public: static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - auto Op = make_unique(k_Immediate); + auto Op = std::make_unique(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -481,7 +481,7 @@ public: static std::unique_ptr CreateMEMr(unsigned Base, SMLoc S, SMLoc E) { - auto Op = make_unique(k_MemoryReg); + auto Op = std::make_unique(k_MemoryReg); Op->Mem.Base = Base; Op->Mem.OffsetReg = Sparc::G0; // always 0 Op->Mem.Off = nullptr; diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index f1ca8e18c228..db8e7850300f 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -253,7 +253,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, if (!MO.isReg()) continue; // skip - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isDef()) { // check whether Reg is defined or used before delay slot. @@ -324,7 +324,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isDef()) @@ -380,7 +380,7 @@ static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI, // // After : restore , , %o[0-7] - unsigned reg = AddMI->getOperand(0).getReg(); + Register reg = AddMI->getOperand(0).getReg(); if (reg < SP::I0 || reg > SP::I7) return false; @@ -408,7 +408,7 @@ static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI, // // After : restore , , %o[0-7] - unsigned reg = OrMI->getOperand(0).getReg(); + Register reg = OrMI->getOperand(0).getReg(); if (reg < SP::I0 || reg > SP::I7) return false; @@ -446,7 +446,7 @@ static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI, // // After : restore %g0, (imm3<<10), %o[0-7] - unsigned reg = SetHiMI->getOperand(0).getReg(); + Register reg = SetHiMI->getOperand(0).getReg(); if (reg < SP::I0 || reg > SP::I7) return false; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp index 88547075c5ae..c97a30e634cc 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp @@ -49,7 +49,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx, } if (IsPCRel) { - switch((unsigned)Fixup.getKind()) { + switch(Fixup.getTargetKind()) { default: llvm_unreachable("Unimplemented fixup -> relocation"); case FK_Data_1: return ELF::R_SPARC_DISP8; @@ -65,7 +65,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx, } } - switch((unsigned)Fixup.getKind()) { + switch(Fixup.getTargetKind()) { default: llvm_unreachable("Unimplemented fixup -> relocation"); case FK_Data_1: return ELF::R_SPARC_8; @@ -135,5 +135,5 @@ bool SparcELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, std::unique_ptr llvm::createSparcELFObjectWriter(bool Is64Bit, uint8_t OSABI) { - return llvm::make_unique(Is64Bit, OSABI); + return std::make_unique(Is64Bit, OSABI); } diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 1834a6fd861d..0f74f2bb344c 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -34,7 +34,8 @@ DisableLeafProc("disable-sparc-leaf-proc", SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, - ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8) {} + ST.is64Bit() ? Align(16) : Align(8), 0, + ST.is64Bit() ? Align(16) : Align(8)) {} void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 8cff50d19ed4..4e61c341b703 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -231,7 +231,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to // the original GPRs. - unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + Register GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); SDValue Chain = SDValue(N,0); @@ -278,7 +278,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two // i32 VRs of inline asm with it. - unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + Register GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index a6d440fa8aa2..4a2ba00ac6c2 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -417,7 +417,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32( if (VA.needsCustom()) { assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32); - unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + Register VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi); SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); @@ -445,7 +445,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32( InVals.push_back(WholeValue); continue; } - unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg); SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); if (VA.getLocVT() == MVT::f32) @@ -552,7 +552,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32( std::vector OutChains; for (; CurArgReg != ArgRegEnd; ++CurArgReg) { - unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg, VReg); SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); @@ -1016,9 +1016,9 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { - unsigned Reg = StringSwitch(RegName) +Register SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { + Register Reg = StringSwitch(RegName) .Case("i0", SP::I0).Case("i1", SP::I1).Case("i2", SP::I2).Case("i3", SP::I3) .Case("i4", SP::I4).Case("i5", SP::I5).Case("i6", SP::I6).Case("i7", SP::I7) .Case("o0", SP::O0).Case("o1", SP::O1).Case("o2", SP::O2).Case("o3", SP::O3) @@ -1438,7 +1438,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(Op, MVT::v2i32, Expand); } // Truncating/extending stores/loads are also not supported. - for (MVT VT : MVT::integer_vector_valuetypes()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand); @@ -1805,7 +1805,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(4)); computeRegisterProperties(Subtarget->getRegisterInfo()); } @@ -2244,7 +2244,7 @@ SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); } case SPCC::FCC_UL : { - SDValue Mask = DAG.getTargetConstant(1, DL, Result.getValueType()); + SDValue Mask = DAG.getConstant(1, DL, Result.getValueType()); Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType()); SPCC = SPCC::ICC_NE; @@ -2277,14 +2277,14 @@ SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); } case SPCC::FCC_LG : { - SDValue Mask = DAG.getTargetConstant(3, DL, Result.getValueType()); + SDValue Mask = DAG.getConstant(3, DL, Result.getValueType()); Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType()); SPCC = SPCC::ICC_NE; return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); } case SPCC::FCC_UE : { - SDValue Mask = DAG.getTargetConstant(3, DL, Result.getValueType()); + SDValue Mask = DAG.getConstant(3, DL, Result.getValueType()); Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType()); SPCC = SPCC::ICC_E; @@ -2951,9 +2951,11 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG, SDValue HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt); SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); SDValue MulResult = TLI.makeLibCall(DAG, RTLIB::MUL_I128, WideVT, - Args, isSigned, dl).first; + Args, CallOptions, dl).first; SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, MulResult, DAG.getIntPtrConstant(0, dl)); SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, @@ -3183,7 +3185,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const { case 'e': return C_RegisterClass; case 'I': // SIMM13 - return C_Other; + return C_Immediate; } } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 8d557a4225e5..3d798cec0c16 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -98,8 +98,8 @@ namespace llvm { return MVT::i32; } - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td index 2d4f687f72d2..d18ab3b1370b 100644 --- a/lib/Target/Sparc/SparcInstr64Bit.td +++ b/lib/Target/Sparc/SparcInstr64Bit.td @@ -177,7 +177,7 @@ def LEAX_ADDri : F3_2<2, 0b000000, def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>; def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>; -def : Pat<(ctpop i64:$src), (POPCrr $src)>; +def : Pat<(i64 (ctpop i64:$src)), (POPCrr $src)>; } // Predicates = [Is64Bit] diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index ad343fe6f80a..3d3d314a26bb 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -375,8 +375,8 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineInstr *MovMI = nullptr; for (unsigned i = 0; i != numSubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]); - unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); + Register Dst = TRI->getSubReg(DestReg, subRegIdx[i]); + Register Src = TRI->getSubReg(SrcReg, subRegIdx[i]); assert(Dst && Src && "Bad sub-register"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst); diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 8474c7abffb3..73dbdc4f443e 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -516,9 +516,9 @@ let DecoderMethod = "DecodeLoadQFP" in defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>, Requires<[HasV9, HasHardQuad]>; -let DecoderMethod = "DecodeLoadCP" in - defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>; -let DecoderMethod = "DecodeLoadCPPair" in +let DecoderMethod = "DecodeLoadCP" in + defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>; +let DecoderMethod = "DecodeLoadCPPair" in defm LDDC : Load<"ldd", 0b110011, load, CoprocPair, v2i32, IIC_ldd>; let DecoderMethod = "DecodeLoadCP", Defs = [CPSR] in { @@ -1508,7 +1508,7 @@ let rs1 = 0 in def POPCrr : F3_1<2, 0b101110, (outs IntRegs:$rd), (ins IntRegs:$rs2), "popc $rs2, $rd", []>, Requires<[HasV9]>; -def : Pat<(ctpop i32:$src), +def : Pat<(i32 (ctpop i32:$src)), (POPCrr (SRLri $src, 0))>; let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index ce11a423d10e..19a90e98db7e 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -182,9 +182,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) { if (MI.getOpcode() == SP::STQFri) { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64); - unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64); + Register SrcReg = MI.getOperand(2).getReg(); + Register SrcEvenReg = getSubReg(SrcReg, SP::sub_even64); + Register SrcOddReg = getSubReg(SrcReg, SP::sub_odd64); MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri)) .addReg(FrameReg).addImm(0).addReg(SrcEvenReg); @@ -194,9 +194,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset += 8; } else if (MI.getOpcode() == SP::LDQFri) { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64); - unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64); + Register DestReg = MI.getOperand(0).getReg(); + Register DestEvenReg = getSubReg(DestReg, SP::sub_even64); + Register DestOddReg = getSubReg(DestReg, SP::sub_odd64); MachineInstr *LdMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg) .addReg(FrameReg).addImm(0); diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 195cff79de03..c1e3f8c36982 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -98,7 +98,7 @@ SparcTargetMachine::SparcTargetMachine( getEffectiveSparcCodeModel( CM, getEffectiveRelocModel(RM), is64bit, JIT), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this, is64bit), is64Bit(is64bit) { initAsmInfo(); } @@ -133,7 +133,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, + I = std::make_unique(TargetTriple, CPU, FS, *this, this->is64Bit); } return I.get(); diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index a259ba3433d6..93c4ce4b5ccc 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -155,11 +155,11 @@ public: // Create particular kinds of operand. static std::unique_ptr createInvalid(SMLoc StartLoc, SMLoc EndLoc) { - return make_unique(KindInvalid, StartLoc, EndLoc); + return std::make_unique(KindInvalid, StartLoc, EndLoc); } static std::unique_ptr createToken(StringRef Str, SMLoc Loc) { - auto Op = make_unique(KindToken, Loc, Loc); + auto Op = std::make_unique(KindToken, Loc, Loc); Op->Token.Data = Str.data(); Op->Token.Length = Str.size(); return Op; @@ -167,7 +167,7 @@ public: static std::unique_ptr createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindReg, StartLoc, EndLoc); + auto Op = std::make_unique(KindReg, StartLoc, EndLoc); Op->Reg.Kind = Kind; Op->Reg.Num = Num; return Op; @@ -175,7 +175,7 @@ public: static std::unique_ptr createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindImm, StartLoc, EndLoc); + auto Op = std::make_unique(KindImm, StartLoc, EndLoc); Op->Imm = Expr; return Op; } @@ -184,7 +184,7 @@ public: createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base, const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm, unsigned LengthReg, SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindMem, StartLoc, EndLoc); + auto Op = std::make_unique(KindMem, StartLoc, EndLoc); Op->Mem.MemKind = MemKind; Op->Mem.RegKind = RegKind; Op->Mem.Base = Base; @@ -200,7 +200,7 @@ public: static std::unique_ptr createImmTLS(const MCExpr *Imm, const MCExpr *Sym, SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindImmTLS, StartLoc, EndLoc); + auto Op = std::make_unique(KindImmTLS, StartLoc, EndLoc); Op->ImmTLS.Imm = Imm; Op->ImmTLS.Sym = Sym; return Op; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 8d8ba5644e10..49b6fc490336 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -162,5 +162,5 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createSystemZObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index 2b0f90182d7f..88cf589a3f10 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -190,7 +190,6 @@ static inline bool isImmHF(uint64_t Val) { FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); -FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index ef378e4ade7a..10023e9e169c 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -501,6 +501,10 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { } break; + case TargetOpcode::FENTRY_CALL: + LowerFENTRY_CALL(*MI, Lower); + return; + case TargetOpcode::STACKMAP: LowerSTACKMAP(*MI); return; @@ -546,6 +550,22 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer, } } +void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, + SystemZMCInstLower &Lower) { + MCContext &Ctx = MF->getContext(); + if (MF->getFunction().getFnAttribute("mnop-mcount") + .getValueAsString() == "true") { + EmitNop(Ctx, *OutStreamer, 6, getSubtargetInfo()); + return; + } + + MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); + const MCSymbolRefExpr *Op = + MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_PLT, Ctx); + OutStreamer->EmitInstruction(MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R0D).addExpr(Op), getSubtargetInfo()); +} + void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { const SystemZInstrInfo *TII = static_cast(MF->getSubtarget().getInstrInfo()); diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h index aa5d3ca78e61..d01a17c2ebe2 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -46,6 +46,7 @@ public: } private: + void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL); void LowerSTACKMAP(const MachineInstr &MI); void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower); }; diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index 9cbf6b320504..946eb2ba7c79 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -152,7 +152,7 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (MO.isReg()) { - if (unsigned MOReg = MO.getReg()) { + if (Register MOReg = MO.getReg()) { if (TRI->regsOverlap(MOReg, Reg)) { if (MO.isUse()) Ref.Use = true; @@ -378,11 +378,8 @@ bool SystemZElimCompare::adjustCCMasksForInstr( } // CC is now live after MI. - if (!ConvOpc) { - int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); - assert(CCDef >= 0 && "Couldn't find CC set"); - MI.getOperand(CCDef).setIsDead(false); - } + if (!ConvOpc) + MI.clearRegisterDeads(SystemZ::CC); // Check if MI lies before Compare. bool BeforeCmp = false; diff --git a/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/lib/Target/SystemZ/SystemZExpandPseudo.cpp deleted file mode 100644 index 09708fb4241c..000000000000 --- a/lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that expands pseudo instructions into target -// instructions to allow proper scheduling and other late optimizations. This -// pass should be run after register allocation but before the post-regalloc -// scheduling pass. -// -//===----------------------------------------------------------------------===// - -#include "SystemZ.h" -#include "SystemZInstrInfo.h" -#include "SystemZSubtarget.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -using namespace llvm; - -#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" - -namespace llvm { - void initializeSystemZExpandPseudoPass(PassRegistry&); -} - -namespace { -class SystemZExpandPseudo : public MachineFunctionPass { -public: - static char ID; - SystemZExpandPseudo() : MachineFunctionPass(ID) { - initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry()); - } - - const SystemZInstrInfo *TII; - - bool runOnMachineFunction(MachineFunction &Fn) override; - - StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } - -private: - bool expandMBB(MachineBasicBlock &MBB); - bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); - bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); -}; -char SystemZExpandPseudo::ID = 0; -} - -INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo", - SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) - -/// Returns an instance of the pseudo instruction expansion pass. -FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) { - return new SystemZExpandPseudo(); -} - -// MI is a load-register-on-condition pseudo instruction that could not be -// handled as a single hardware instruction. Replace it by a branch sequence. -bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { - MachineFunction &MF = *MBB.getParent(); - const BasicBlock *BB = MBB.getBasicBlock(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned CCValid = MI.getOperand(3).getImm(); - unsigned CCMask = MI.getOperand(4).getImm(); - - LivePhysRegs LiveRegs(TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); - - // Splice MBB at MI, moving the rest of the block into RestMBB. - MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); - MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); - RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); - RestMBB->transferSuccessors(&MBB); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - RestMBB->addLiveIn(*I); - - // Create a new block MoveMBB to hold the move instruction. - MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); - MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); - MoveMBB->addLiveIn(SrcReg); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MoveMBB->addLiveIn(*I); - - // At the end of MBB, create a conditional branch to RestMBB if the - // condition is false, otherwise fall through to MoveMBB. - BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) - .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); - MBB.addSuccessor(RestMBB); - MBB.addSuccessor(MoveMBB); - - // In MoveMBB, emit an instruction to move SrcReg into DestReg, - // then fall through to RestMBB. - TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, - MI.getOperand(2).isKill()); - MoveMBB->addSuccessor(RestMBB); - - NextMBBI = MBB.end(); - MI.eraseFromParent(); - return true; -} - -/// If MBBI references a pseudo instruction that should be expanded here, -/// do the expansion and return true. Otherwise return false. -bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - case SystemZ::LOCRMux: - return expandLOCRMux(MBB, MBBI, NextMBBI); - default: - break; - } - return false; -} - -/// Iterate over the instructions in basic block MBB and expand any -/// pseudo instructions. Return true if anything was modified. -bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) { - bool Modified = false; - - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= expandMI(MBB, MBBI, NMBBI); - MBBI = NMBBI; - } - - return Modified; -} - -bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast(MF.getSubtarget().getInstrInfo()); - - bool Modified = false; - for (auto &MBB : MF) - Modified |= expandMBB(MBB); - return Modified; -} - diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index da28faebb326..0b8b6880accc 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -46,8 +46,8 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { } // end anonymous namespace SystemZFrameLowering::SystemZFrameLowering() - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, - -SystemZMC::CallFrameSize, 8, + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), + -SystemZMC::CallFrameSize, Align(8), false /* StackRealignable */) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); @@ -118,7 +118,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, unsigned GPR64, bool IsImplicit) { const TargetRegisterInfo *RI = MBB.getParent()->getSubtarget().getRegisterInfo(); - unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32); + Register GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32); bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32); if (!IsLive || !IsImplicit) { MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive)); diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 9dc4512255cc..751034c2d41a 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -346,6 +346,11 @@ public: : SelectionDAGISel(TM, OptLevel) {} bool runOnMachineFunction(MachineFunction &MF) override { + const Function &F = MF.getFunction(); + if (F.getFnAttribute("mnop-mcount").getValueAsString() == "true" && + F.getFnAttribute("fentry-call").getValueAsString() != "true") + report_fatal_error("mnop-mcount only supported with fentry-call"); + Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -1146,7 +1151,7 @@ void SystemZDAGToDAGISel::loadVectorConstant( SDLoc DL(Node); SmallVector Ops; for (unsigned OpVal : VCI.OpVals) - Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32)); SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops); if (VCI.VecVT == VT.getSimpleVT()) @@ -1550,8 +1555,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { uint64_t ConstCCMask = cast(CCMask.getNode())->getZExtValue(); // Invert the condition. - CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node), - CCMask.getValueType()); + CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask, + SDLoc(Node), CCMask.getValueType()); SDValue Op4 = Node->getOperand(4); SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 78820f511ab4..e0ca9da93561 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -120,9 +120,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Instructions are strings of 2-byte aligned 2-byte values. - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(2)); // For performance reasons we prefer 16-byte alignment. - setPrefFunctionAlignment(4); + setPrefFunctionAlignment(Align(16)); // Handle operations that are handled in a similar way for all types. for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; @@ -206,6 +206,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // the default expansion. if (!Subtarget.hasFPExtension()) setOperationAction(ISD::FP_TO_UINT, VT, Expand); + + // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all + // default to Expand, so need to be modified to Legal where appropriate. + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); + if (Subtarget.hasFPExtension()) + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); } } @@ -252,7 +258,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); setOperationAction(ISD::CTLZ, MVT::i64, Legal); - // On arch13 we have native support for a 64-bit CTPOP. + // On z15 we have native support for a 64-bit CTPOP. if (Subtarget.hasMiscellaneousExtensions3()) { setOperationAction(ISD::CTPOP, MVT::i32, Promote); setOperationAction(ISD::CTPOP, MVT::i64, Legal); @@ -294,14 +300,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Handle prefetches with PFD or PFDRL. setOperationAction(ISD::PREFETCH, MVT::Other, Custom); - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { // Assume by default that all vector operations need to be expanded. for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) if (getOperationAction(Opcode, VT) == Legal) setOperationAction(Opcode, VT, Expand); // Likewise all truncating stores and extending loads. - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); @@ -327,7 +333,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, } // Handle integer vector types. - for (MVT VT : MVT::integer_vector_valuetypes()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { if (isTypeLegal(VT)) { // These operations have direct equivalents. setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); @@ -381,6 +387,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); } if (Subtarget.hasVectorEnhancements2()) { @@ -392,6 +403,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); } // Handle floating-point types. @@ -831,7 +847,7 @@ supportedAddressingMode(Instruction *I, bool HasVector) { } if (isa(I) && I->hasOneUse()) { - auto *SingleUser = dyn_cast(*I->user_begin()); + auto *SingleUser = cast(*I->user_begin()); if (SingleUser->getParent() == I->getParent()) { if (isa(SingleUser)) { if (auto *C = dyn_cast(SingleUser->getOperand(1))) @@ -956,7 +972,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { case 'K': // Signed 16-bit constant case 'L': // Signed 20-bit displacement (on all targets we support) case 'M': // 0x7fffffff - return C_Other; + return C_Immediate; default: break; @@ -1335,7 +1351,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments( break; } - unsigned VReg = MRI.createVirtualRegister(RC); + Register VReg = MRI.createVirtualRegister(RC); MRI.addLiveIn(VA.getLocReg(), VReg); ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); } else { @@ -1430,7 +1446,7 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo, return false; if (!VA.isRegLoc()) return false; - unsigned Reg = VA.getLocReg(); + Register Reg = VA.getLocReg(); if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) return false; if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) @@ -1674,7 +1690,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); // Chain and glue the copies together. - unsigned Reg = VA.getLocReg(); + Register Reg = VA.getLocReg(); Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); @@ -2533,12 +2549,12 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, - DAG.getConstant(C.ICmpType, DL, MVT::i32)); + DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, - DAG.getConstant(RegisterOnly, DL, MVT::i32)); + DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } @@ -2576,10 +2592,10 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // in CCValid, so other values can be ignored. static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask) { - SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(CCValid, DL, MVT::i32), - DAG.getConstant(CCMask, DL, MVT::i32), CCReg }; + SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getTargetConstant(CCValid, DL, MVT::i32), + DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } @@ -2741,9 +2757,10 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue CCReg = emitCmp(DAG, DL, C); - return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); + return DAG.getNode( + SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), + DAG.getTargetConstant(C.CCValid, DL, MVT::i32), + DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, @@ -2794,8 +2811,9 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, } SDValue CCReg = emitCmp(DAG, DL, C); - SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg}; + SDValue Ops[] = {TrueOp, FalseOp, + DAG.getTargetConstant(C.CCValid, DL, MVT::i32), + DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } @@ -3882,11 +3900,8 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, bool IsWrite = cast(Op.getOperand(2))->getZExtValue(); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast(Op.getNode()); - SDValue Ops[] = { - Op.getOperand(0), - DAG.getConstant(Code, DL, MVT::i32), - Op.getOperand(1) - }; + SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), + Op.getOperand(1)}; return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); @@ -4228,7 +4243,7 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); SDValue Op; if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { - SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32); + SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); } else if (P.Opcode == SystemZISD::PACK) { MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), @@ -4253,7 +4268,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, unsigned StartIndex, OpNo0, OpNo1; if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], - Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32)); + Ops[OpNo1], + DAG.getTargetConstant(StartIndex, DL, MVT::i32)); // Fall back on VPERM. Construct an SDNode for the permute vector. SDValue IndexNodes[SystemZ::VectorBytes]; @@ -4751,7 +4767,7 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); // Otherwise keep it as a vector-to-vector operation. return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), - DAG.getConstant(Index, DL, MVT::i32)); + DAG.getTargetConstant(Index, DL, MVT::i32)); } GeneralShuffle GS(VT); @@ -6041,8 +6057,8 @@ SDValue SystemZTargetLowering::combineBR_CCMASK( if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, - DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), N->getOperand(3), CCReg); return SDValue(); } @@ -6063,10 +6079,9 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), - N->getOperand(0), - N->getOperand(1), - DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + N->getOperand(0), N->getOperand(1), + DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); return SDValue(); } @@ -6548,19 +6563,17 @@ static bool isSelectPseudo(MachineInstr &MI) { // Helper function, which inserts PHI functions into SinkMBB: // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], -// where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects -// in [MIItBegin, MIItEnd) range. -static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, - MachineBasicBlock::iterator MIItEnd, +// where %FalseValue(i) and %TrueValue(i) are taken from Selects. +static void createPHIsForSelects(SmallVector &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB) { MachineFunction *MF = TrueMBB->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - unsigned CCValid = MIItBegin->getOperand(3).getImm(); - unsigned CCMask = MIItBegin->getOperand(4).getImm(); - DebugLoc DL = MIItBegin->getDebugLoc(); + MachineInstr *FirstMI = Selects.front(); + unsigned CCValid = FirstMI->getOperand(3).getImm(); + unsigned CCMask = FirstMI->getOperand(4).getImm(); MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); @@ -6572,16 +6585,15 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, // destination registers, and the registers that went into the PHI. DenseMap> RegRewriteTable; - for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; - MIIt = skipDebugInstructionsForward(++MIIt, MIItEnd)) { - unsigned DestReg = MIIt->getOperand(0).getReg(); - unsigned TrueReg = MIIt->getOperand(1).getReg(); - unsigned FalseReg = MIIt->getOperand(2).getReg(); + for (auto MI : Selects) { + Register DestReg = MI->getOperand(0).getReg(); + Register TrueReg = MI->getOperand(1).getReg(); + Register FalseReg = MI->getOperand(2).getReg(); // If this Select we are generating is the opposite condition from // the jump we generated, then we have to swap the operands for the // PHI that is going to be generated. - if (MIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) + if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) std::swap(TrueReg, FalseReg); if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end()) @@ -6590,6 +6602,7 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end()) FalseReg = RegRewriteTable[FalseReg].second; + DebugLoc DL = MI->getDebugLoc(); BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) .addReg(TrueReg).addMBB(TrueMBB) .addReg(FalseReg).addMBB(FalseMBB); @@ -6605,36 +6618,61 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, MachineBasicBlock *MBB) const { + assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); - DebugLoc DL = MI.getDebugLoc(); // If we have a sequence of Select* pseudo instructions using the // same condition code value, we want to expand all of them into // a single pair of basic blocks using the same condition. - MachineInstr *LastMI = &MI; - MachineBasicBlock::iterator NextMIIt = skipDebugInstructionsForward( - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - - if (isSelectPseudo(MI)) - while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) && - NextMIIt->getOperand(3).getImm() == CCValid && - (NextMIIt->getOperand(4).getImm() == CCMask || - NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) { - LastMI = &*NextMIIt; - NextMIIt = skipDebugInstructionsForward(++NextMIIt, MBB->end()); + SmallVector Selects; + SmallVector DbgValues; + Selects.push_back(&MI); + unsigned Count = 0; + for (MachineBasicBlock::iterator NextMIIt = + std::next(MachineBasicBlock::iterator(MI)); + NextMIIt != MBB->end(); ++NextMIIt) { + if (NextMIIt->definesRegister(SystemZ::CC)) + break; + if (isSelectPseudo(*NextMIIt)) { + assert(NextMIIt->getOperand(3).getImm() == CCValid && + "Bad CCValid operands since CC was not redefined."); + if (NextMIIt->getOperand(4).getImm() == CCMask || + NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) { + Selects.push_back(&*NextMIIt); + continue; + } + break; } + bool User = false; + for (auto SelMI : Selects) + if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) { + User = true; + break; + } + if (NextMIIt->isDebugInstr()) { + if (User) { + assert(NextMIIt->isDebugValue() && "Unhandled debug opcode."); + DbgValues.push_back(&*NextMIIt); + } + } + else if (User || ++Count > 20) + break; + } + MachineInstr *LastMI = Selects.back(); + bool CCKilled = + (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB)); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); // Unless CC was killed in the last Select instruction, mark it as // live-in to both FalseMBB and JoinMBB. - if (!LastMI->killsRegister(SystemZ::CC) && !checkCCKill(*LastMI, JoinMBB)) { + if (!CCKilled) { FalseMBB->addLiveIn(SystemZ::CC); JoinMBB->addLiveIn(SystemZ::CC); } @@ -6643,7 +6681,7 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, // BRC CCMask, JoinMBB // # fallthrough to FalseMBB MBB = StartMBB; - BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(FalseMBB); @@ -6657,12 +6695,14 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] // ... MBB = JoinMBB; - MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); - MachineBasicBlock::iterator MIItEnd = skipDebugInstructionsForward( - std::next(MachineBasicBlock::iterator(LastMI)), MBB->end()); - createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB); + createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); + for (auto SelMI : Selects) + SelMI->eraseFromParent(); + + MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); + for (auto DbgMI : DbgValues) + MBB->splice(InsertPos, StartMBB, DbgMI); - StartMBB->erase(MIItBegin, MIItEnd); return JoinMBB; } @@ -6678,10 +6718,10 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); MachineOperand Base = MI.getOperand(1); int64_t Disp = MI.getOperand(2).getImm(); - unsigned IndexReg = MI.getOperand(3).getReg(); + Register IndexReg = MI.getOperand(3).getReg(); unsigned CCValid = MI.getOperand(4).getImm(); unsigned CCMask = MI.getOperand(5).getImm(); DebugLoc DL = MI.getDebugLoc(); @@ -6773,7 +6813,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( // Extract the operands. Base can be a register or a frame index. // Src2 can be a register or immediate. - unsigned Dest = MI.getOperand(0).getReg(); + Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); @@ -6833,7 +6873,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( .addReg(OldVal).addReg(BitShift).addImm(0); if (Invert) { // Perform the operation normally and then invert every bit of the field. - unsigned Tmp = MRI.createVirtualRegister(RC); + Register Tmp = MRI.createVirtualRegister(RC); BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); if (BitSize <= 32) // XILF with the upper BitSize bits set. @@ -6842,7 +6882,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( else { // Use LCGR and add -1 to the result, which is more compact than // an XILF, XILH pair. - unsigned Tmp2 = MRI.createVirtualRegister(RC); + Register Tmp2 = MRI.createVirtualRegister(RC); BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) .addReg(Tmp2).addImm(-1); @@ -6891,7 +6931,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. - unsigned Dest = MI.getOperand(0).getReg(); + Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); Register Src2 = MI.getOperand(3).getReg(); @@ -7005,13 +7045,13 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. - unsigned Dest = MI.getOperand(0).getReg(); + Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); - unsigned OrigCmpVal = MI.getOperand(3).getReg(); - unsigned OrigSwapVal = MI.getOperand(4).getReg(); - unsigned BitShift = MI.getOperand(5).getReg(); - unsigned NegBitShift = MI.getOperand(6).getReg(); + Register OrigCmpVal = MI.getOperand(3).getReg(); + Register OrigSwapVal = MI.getOperand(4).getReg(); + Register BitShift = MI.getOperand(5).getReg(); + Register NegBitShift = MI.getOperand(6).getReg(); int64_t BitSize = MI.getOperand(7).getImm(); DebugLoc DL = MI.getDebugLoc(); @@ -7023,14 +7063,14 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. - unsigned OrigOldVal = MRI.createVirtualRegister(RC); - unsigned OldVal = MRI.createVirtualRegister(RC); - unsigned CmpVal = MRI.createVirtualRegister(RC); - unsigned SwapVal = MRI.createVirtualRegister(RC); - unsigned StoreVal = MRI.createVirtualRegister(RC); - unsigned RetryOldVal = MRI.createVirtualRegister(RC); - unsigned RetryCmpVal = MRI.createVirtualRegister(RC); - unsigned RetrySwapVal = MRI.createVirtualRegister(RC); + Register OrigOldVal = MRI.createVirtualRegister(RC); + Register OldVal = MRI.createVirtualRegister(RC); + Register CmpVal = MRI.createVirtualRegister(RC); + Register SwapVal = MRI.createVirtualRegister(RC); + Register StoreVal = MRI.createVirtualRegister(RC); + Register RetryOldVal = MRI.createVirtualRegister(RC); + Register RetryCmpVal = MRI.createVirtualRegister(RC); + Register RetrySwapVal = MRI.createVirtualRegister(RC); // Insert 2 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; @@ -7129,11 +7169,11 @@ SystemZTargetLowering::emitPair128(MachineInstr &MI, MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Hi = MI.getOperand(1).getReg(); - unsigned Lo = MI.getOperand(2).getReg(); - unsigned Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); - unsigned Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Dest = MI.getOperand(0).getReg(); + Register Hi = MI.getOperand(1).getReg(); + Register Lo = MI.getOperand(2).getReg(); + Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2) @@ -7157,14 +7197,14 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); - unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Dest = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); if (ClearEven) { - unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); - unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) .addImm(0); @@ -7308,7 +7348,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( // The previous iteration might have created out-of-range displacements. // Apply them using LAY if so. if (!isUInt<12>(DestDisp)) { - unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) .add(DestBase) .addImm(DestDisp) @@ -7317,7 +7357,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( DestDisp = 0; } if (!isUInt<12>(SrcDisp)) { - unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) .add(SrcBase) .addImm(SrcDisp) @@ -7474,11 +7514,11 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( static_cast(Subtarget.getInstrInfo()); DebugLoc DL = MI.getDebugLoc(); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); // Create new virtual register of the same class as source. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - unsigned DstReg = MRI->createVirtualRegister(RC); + Register DstReg = MRI->createVirtualRegister(RC); // Replace pseudo with a normal load-and-test that models the def as // well. diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 19c7ec58ed3d..9c95e8aec940 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -25,10 +25,10 @@ let Predicates = [FeatureNoVectorEnhancements1] in let Predicates = [FeatureVectorEnhancements1] in def SelectVR128 : SelectWrapper; -defm CondStoreF32 : CondStores; -defm CondStoreF64 : CondStores; +defm CondStoreF32 : CondStores; +defm CondStoreF64 : CondStores; //===----------------------------------------------------------------------===// // Move instructions @@ -276,13 +276,13 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { } // fp_to_sint always rounds towards zero, which is modifier value 5. -def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; -def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; -def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; +def : Pat<(i32 (any_fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; +def : Pat<(i32 (any_fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; +def : Pat<(i32 (any_fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; -def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; -def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; -def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; +def : Pat<(i64 (any_fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; +def : Pat<(i64 (any_fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; +def : Pat<(i64 (any_fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; // The FP extension feature provides versions of the above that allow // also specifying the inexact-exception suppression flag. @@ -309,13 +309,13 @@ let Predicates = [FeatureFPExtension] in { def CLGXBR : TernaryRRFe<"clgxbr", 0xB3AE, GR64, FP128>; } - def : Pat<(i32 (fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>; - def : Pat<(i32 (fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>; - def : Pat<(i32 (fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>; } diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 2a1d14de3ddf..c9dbe3da686d 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2141,17 +2141,17 @@ class FixedCondBranchRXY opcode, } class CmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa; class AsmCmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa; class FixedCmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa { let isAsmParserOnly = V.alternate; @@ -2159,7 +2159,7 @@ class FixedCmpBranchRIEa opcode, } multiclass CmpBranchRIEaPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIEa; def Asm : AsmCmpBranchRIEa; @@ -2193,19 +2193,19 @@ multiclass CmpBranchRIEbPair opcode, } class CmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc; class AsmCmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc; class FixedCmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc { let isAsmParserOnly = V.alternate; @@ -2213,7 +2213,7 @@ class FixedCmpBranchRIEc opcode, } multiclass CmpBranchRIEcPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIEc; def Asm : AsmCmpBranchRIEc; @@ -2272,19 +2272,19 @@ multiclass CmpBranchRRSPair opcode, } class CmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS; class AsmCmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS; class FixedCmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS { let isAsmParserOnly = V.alternate; @@ -2292,7 +2292,7 @@ class FixedCmpBranchRIS opcode, } multiclass CmpBranchRISPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIS; def Asm : AsmCmpBranchRIS; @@ -2585,7 +2585,7 @@ multiclass StoreMultipleVRSaAlign opcode> { // We therefore match the address in the same way as a normal store and // only use the StoreSI* instruction if the matched address is suitable. class StoreSI opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSI { @@ -2593,7 +2593,7 @@ class StoreSI opcode, SDPatternOperator operator, } class StoreSIY opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSIY { @@ -2601,7 +2601,7 @@ class StoreSIY opcode, SDPatternOperator operator, } class StoreSIL opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSIL { @@ -2609,7 +2609,7 @@ class StoreSIL opcode, SDPatternOperator operator, } multiclass StoreSIPair siOpcode, bits<16> siyOpcode, - SDPatternOperator operator, Immediate imm> { + SDPatternOperator operator, ImmOpWithPattern imm> { let DispKey = mnemonic in { let DispSize = "12" in def "" : StoreSI; @@ -2665,7 +2665,7 @@ multiclass CondStoreRSYPair opcode, def Asm : AsmCondStoreRSY; } -class SideEffectUnaryI opcode, Immediate imm> +class SideEffectUnaryI opcode, ImmOpWithPattern imm> : InstI; @@ -2761,13 +2761,13 @@ class UnaryMemRRFc opcode, } class UnaryRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa; class UnaryRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa; @@ -2885,14 +2885,14 @@ multiclass UnaryRXPair rxOpcode, bits<16> rxyOpcode, } class UnaryVRIa opcode, SDPatternOperator operator, - TypedReg tr, Immediate imm, bits<4> type = 0> + TypedReg tr, ImmOpWithPattern imm, bits<4> type = 0> : InstVRIa { + [(set (tr.vt tr.op:$V1), (operator (i32 timm:$I2)))]> { let M3 = type; } -class UnaryVRIaGeneric opcode, Immediate imm> +class UnaryVRIaGeneric opcode, ImmOpWithPattern imm> : InstVRIa; @@ -3021,7 +3021,7 @@ class SideEffectBinaryRRFc opcode, } class SideEffectBinaryIE opcode, - Immediate imm1, Immediate imm2> + ImmOpWithPattern imm1, ImmOpWithPattern imm2> : InstIE; @@ -3030,7 +3030,7 @@ class SideEffectBinarySI opcode, Operand imm> mnemonic#"\t$BD1, $I2", []>; class SideEffectBinarySIL opcode, - SDPatternOperator operator, Immediate imm> + SDPatternOperator operator, ImmOpWithPattern imm> : InstSIL; @@ -3165,7 +3165,7 @@ class BinaryRRFc opcode, mnemonic#"\t$R1, $R2, $M3", []>; class BinaryMemRRFc opcode, - RegisterOperand cls1, RegisterOperand cls2, Immediate imm> + RegisterOperand cls1, RegisterOperand cls2, ImmOpWithPattern imm> : InstRRFc { let Constraints = "$R1 = $R1src"; @@ -3267,7 +3267,7 @@ multiclass CondBinaryRRFaPair opcode, } class BinaryRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa { @@ -3276,14 +3276,14 @@ class BinaryRI opcode, SDPatternOperator operator, } class BinaryRIE opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEd; multiclass BinaryRIAndK opcode1, bits<16> opcode2, SDPatternOperator operator, RegisterOperand cls, - Immediate imm> { + ImmOpWithPattern imm> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in def K : BinaryRIE, @@ -3294,7 +3294,7 @@ multiclass BinaryRIAndK opcode1, bits<16> opcode2, } class CondBinaryRIE opcode, RegisterOperand cls, - Immediate imm> + ImmOpWithPattern imm> : InstRIEg opcode, RegisterOperand cls, // Like CondBinaryRIE, but used for the raw assembly form. The condition-code // mask is the third operand rather than being part of the mnemonic. class AsmCondBinaryRIE opcode, RegisterOperand cls, - Immediate imm> + ImmOpWithPattern imm> : InstRIEg { @@ -3318,7 +3318,7 @@ class AsmCondBinaryRIE opcode, RegisterOperand cls, // Like CondBinaryRIE, but with a fixed CC mask. class FixedCondBinaryRIE opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEg { let Constraints = "$R1 = $R1src"; @@ -3328,14 +3328,14 @@ class FixedCondBinaryRIE opcode, } multiclass CondBinaryRIEPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CondBinaryRIE; def Asm : AsmCondBinaryRIE; } class BinaryRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa { @@ -3484,7 +3484,7 @@ class BinaryVRIb opcode, SDPatternOperator operator, TypedReg tr, bits<4> type> : InstVRIb { + [(set (tr.vt tr.op:$V1), (operator imm32zx8_timm:$I2, imm32zx8_timm:$I3))]> { let M4 = type; } @@ -3498,7 +3498,7 @@ class BinaryVRIc opcode, SDPatternOperator operator, : InstVRIc { + imm32zx16_timm:$I2))]> { let M4 = type; } @@ -3512,7 +3512,7 @@ class BinaryVRIe opcode, SDPatternOperator operator, : InstVRIe { + imm32zx12_timm:$I3))]> { let M4 = type; let M5 = m5; } @@ -3715,7 +3715,7 @@ class BinaryVRX opcode, SDPatternOperator operator, : InstVRX { + imm32zx4_timm:$M3))]> { let mayLoad = 1; let AccessBytes = bytes; } @@ -3765,7 +3765,7 @@ class BinaryVSI opcode, SDPatternOperator operator, } class StoreBinaryVRV opcode, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRV { let mayStore = 1; @@ -3774,7 +3774,7 @@ class StoreBinaryVRV opcode, bits<5> bytes, class StoreBinaryVRX opcode, SDPatternOperator operator, TypedReg tr, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRX { @@ -3809,7 +3809,7 @@ class CompareRRE opcode, SDPatternOperator operator, } class CompareRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa { @@ -3817,7 +3817,7 @@ class CompareRI opcode, SDPatternOperator operator, } class CompareRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa { @@ -3924,7 +3924,7 @@ class CompareSSb opcode> } class CompareSI opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm, + SDPatternOperator load, ImmOpWithPattern imm, AddressingMode mode = bdaddr12only> : InstSI opcode, SDPatternOperator operator, } class CompareSIL opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm> + SDPatternOperator load, ImmOpWithPattern imm> : InstSIL { @@ -3943,7 +3943,7 @@ class CompareSIL opcode, SDPatternOperator operator, } class CompareSIY opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm, + SDPatternOperator load, ImmOpWithPattern imm, AddressingMode mode = bdaddr20only> : InstSIY opcode, SDPatternOperator operator, multiclass CompareSIPair siOpcode, bits<16> siyOpcode, SDPatternOperator operator, SDPatternOperator load, - Immediate imm> { + ImmOpWithPattern imm> { let DispKey = mnemonic in { let DispSize = "12" in def "" : CompareSI; @@ -4012,7 +4012,7 @@ class TestRXE opcode, SDPatternOperator operator, } class TestBinarySIL opcode, - SDPatternOperator operator, Immediate imm> + SDPatternOperator operator, ImmOpWithPattern imm> : InstSIL; @@ -4073,7 +4073,7 @@ class SideEffectTernaryMemMemMemRRFb opcode, class SideEffectTernaryRRFc opcode, RegisterOperand cls1, RegisterOperand cls2, - Immediate imm> + ImmOpWithPattern imm> : InstRRFc; @@ -4086,7 +4086,7 @@ multiclass SideEffectTernaryRRFcOpt opcode, class SideEffectTernaryMemMemRRFc opcode, RegisterOperand cls1, RegisterOperand cls2, - Immediate imm> + ImmOpWithPattern imm> : InstRRFc { @@ -4221,7 +4221,7 @@ class TernaryRXF opcode, SDPatternOperator operator, } class TernaryVRIa opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index> + TypedReg tr1, TypedReg tr2, ImmOpWithPattern imm, ImmOpWithPattern index> : InstVRIa opcode, SDPatternOperator operator, mnemonic#"\t$V1, $V2, $V3, $I4", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx8:$I4))]> { + imm32zx8_timm:$I4))]> { let M5 = type; } @@ -4252,8 +4252,8 @@ class TernaryVRRa opcode, SDPatternOperator operator, (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5), mnemonic#"\t$V1, $V2, $M4, $M5", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), - imm32zx4:$M4, - imm32zx4:$M5))], + imm32zx4_timm:$M4, + imm32zx4_timm:$M5))], m4or> { let M3 = type; } @@ -4285,13 +4285,13 @@ multiclass TernaryOptVRRbSPair opcode, TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> modifier = 0> { def "" : TernaryVRRb; + imm32zx4even_timm, !and (modifier, 14)>; def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, 0)>; let Defs = [CC] in def S : TernaryVRRb; + imm32zx4even_timm, !add(!and (modifier, 14), 1)>; def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, 0)>; @@ -4314,7 +4314,7 @@ class TernaryVRRc opcode, SDPatternOperator operator, mnemonic#"\t$V1, $V2, $V3, $M4", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx4:$M4))]> { + imm32zx4_timm:$M4))]> { let M5 = 0; let M6 = 0; } @@ -4327,7 +4327,7 @@ class TernaryVRRcFloat opcode, mnemonic#"\t$V1, $V2, $V3, $M6", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx4:$M6))]> { + imm32zx4_timm:$M6))]> { let M4 = type; let M5 = m5; } @@ -4429,7 +4429,7 @@ class TernaryVRSbGeneric opcode> } class TernaryVRV opcode, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRV { @@ -4440,7 +4440,7 @@ class TernaryVRV opcode, bits<5> bytes, } class TernaryVRX opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index> + TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index> : InstVRX opcode, SDPatternOperator operato [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx8:$I4))]> { + imm32zx8_timm:$I4))]> { let Constraints = "$V1 = $V1src"; let DisableEncoding = "$V1src"; let M5 = type; @@ -4480,7 +4480,7 @@ class QuaternaryVRIf opcode> : InstVRIf; + mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>; class QuaternaryVRIg opcode> : InstVRIg opcode> class QuaternaryVRRd opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, TypedReg tr3, TypedReg tr4, bits<4> type, - SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0> + SDPatternOperator m6mask = imm32zx4_timm, bits<4> m6or = 0> : InstVRRd opcode, bits<4> modifier = 0> { def "" : QuaternaryVRRd; + imm32zx4even_timm, !and (modifier, 14)>; def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; let Defs = [CC] in def S : QuaternaryVRRd; + imm32zx4even_timm, !add (!and (modifier, 14), 1)>; def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; @@ -4536,7 +4536,7 @@ multiclass QuaternaryOptVRRdSPairGeneric opcode> { def "" : QuaternaryVRRdGeneric; def : InstAlias(NAME) VR128:$V1, VR128:$V2, VR128:$V3, - VR128:$V4, imm32zx4:$M5, 0)>; + VR128:$V4, imm32zx4_timm:$M5, 0)>; } class SideEffectQuaternaryRRFa opcode, @@ -4638,13 +4638,13 @@ class RotateSelectRIEf opcode, RegisterOperand cls1, class PrefetchRXY opcode, SDPatternOperator operator> : InstRXYb; + [(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>; class PrefetchRILPC opcode, SDPatternOperator operator> - : InstRILc { + [(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> { // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more // complex. @@ -4691,7 +4691,7 @@ class Pseudo pattern> // Like UnaryRI, but expanded after RA depending on the choice of register. class UnaryRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins imm:$I2), [(set cls:$R1, (operator imm:$I2))]>; @@ -4720,7 +4720,7 @@ class UnaryRRPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4728,13 +4728,13 @@ class BinaryRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2), [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; // Like BinaryRIAndK, but expanded after RA depending on the choice of register. multiclass BinaryRIAndKPseudo { + RegisterOperand cls, ImmOpWithPattern imm> { let NumOpsKey = key in { let NumOpsValue = "3" in def K : BinaryRIEPseudo, @@ -4764,7 +4764,7 @@ class MemFoldPseudo bytes, // Like CompareRI, but expanded after RA depending on the choice of register. class CompareRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; @@ -4783,7 +4783,7 @@ class CompareRXYPseudo +class TestBinarySILPseudo : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), [(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>; @@ -4812,7 +4812,7 @@ class CondBinaryRRFaPseudo +class CondBinaryRIEPseudo : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, @@ -4876,7 +4876,7 @@ class SelectWrapper : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, - imm32zx4:$valid, imm32zx4:$cc))]> { + imm32zx4_timm:$valid, imm32zx4_timm:$cc))]> { let usesCustomInserter = 1; let hasNoSchedulingInfo = 1; let Uses = [CC]; @@ -4890,12 +4890,12 @@ multiclass CondStores; def Inv : Pseudo<(outs), (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask (load mode:$addr), cls:$new, - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr)]>; } } @@ -4917,11 +4917,11 @@ class AtomicLoadBinary : AtomicLoadBinary; -class AtomicLoadBinaryImm32 +class AtomicLoadBinaryImm32 : AtomicLoadBinary; class AtomicLoadBinaryReg64 : AtomicLoadBinary; -class AtomicLoadBinaryImm64 +class AtomicLoadBinaryImm64 : AtomicLoadBinary; // OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND @@ -4944,7 +4944,7 @@ class AtomicLoadWBinary : AtomicLoadWBinary; -class AtomicLoadWBinaryImm +class AtomicLoadWBinaryImm : AtomicLoadWBinary; // A pseudo instruction that is a direct alias of a real instruction. @@ -4979,7 +4979,7 @@ class StoreAliasVRX + ImmOpWithPattern imm> : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4987,7 +4987,7 @@ class BinaryAliasRI + ImmOpWithPattern imm> : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4999,7 +4999,7 @@ class BinaryAliasVRRf // An alias of a CompareRI, but with different register sizes. class CompareAliasRI + ImmOpWithPattern imm> : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 57c1cf4ec70a..bc783608d45b 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -46,22 +46,12 @@ using namespace llvm; #include "SystemZGenInstrInfo.inc" #define DEBUG_TYPE "systemz-II" -STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; } -// Reg should be a 32-bit GPR. Return true if it is a high register rather -// than a low register. -static bool isHighReg(unsigned int Reg) { - if (SystemZ::GRH32BitRegClass.contains(Reg)) - return true; - assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32"); - return false; -} - // Pin the vtable to this file. void SystemZInstrInfo::anchor() {} @@ -85,7 +75,7 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, // Set up the two 64-bit registers and remember super reg and its flags. MachineOperand &HighRegOp = EarlierMI->getOperand(0); MachineOperand &LowRegOp = MI->getOperand(0); - unsigned Reg128 = LowRegOp.getReg(); + Register Reg128 = LowRegOp.getReg(); unsigned Reg128Killed = getKillRegState(LowRegOp.isKill()); unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef()); HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64)); @@ -147,8 +137,8 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode, bool ConvertHigh) const { - unsigned Reg = MI.getOperand(0).getReg(); - bool IsHigh = isHighReg(Reg); + Register Reg = MI.getOperand(0).getReg(); + bool IsHigh = SystemZ::isHighReg(Reg); MI.setDesc(get(IsHigh ? HighOpcode : LowOpcode)); if (IsHigh && ConvertHigh) MI.getOperand(1).setImm(uint32_t(MI.getOperand(1).getImm())); @@ -161,10 +151,10 @@ void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned LowOpcodeK, unsigned HighOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); + Register DestReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); if (!DestIsHigh && !SrcIsHigh) MI.setDesc(get(LowOpcodeK)); else { @@ -184,9 +174,10 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, // is a high GR32. void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const { - unsigned Reg = MI.getOperand(0).getReg(); - unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode, - MI.getOperand(2).getImm()); + Register Reg = MI.getOperand(0).getReg(); + unsigned Opcode = getOpcodeForOffset( + SystemZ::isHighReg(Reg) ? HighOpcode : LowOpcode, + MI.getOperand(2).getImm()); MI.setDesc(get(Opcode)); } @@ -195,93 +186,11 @@ void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, // register is a low GR32 and HighOpcode if the register is a high GR32. void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const { - unsigned Reg = MI.getOperand(0).getReg(); - unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode; + Register Reg = MI.getOperand(0).getReg(); + unsigned Opcode = SystemZ::isHighReg(Reg) ? HighOpcode : LowOpcode; MI.setDesc(get(Opcode)); } -// MI is a load-register-on-condition pseudo instruction. Replace it with -// LowOpcode if source and destination are both low GR32s and HighOpcode if -// source and destination are both high GR32s. -void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); - - if (!DestIsHigh && !SrcIsHigh) - MI.setDesc(get(LowOpcode)); - else if (DestIsHigh && SrcIsHigh) - MI.setDesc(get(HighOpcode)); - else - LOCRMuxJumps++; - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. -} - -// MI is a select pseudo instruction. Replace it with LowOpcode if source -// and destination are all low GR32s and HighOpcode if source and destination -// are all high GR32s. Otherwise, use the two-operand MixedOpcode. -void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode, - unsigned MixedOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Src1Reg = MI.getOperand(1).getReg(); - unsigned Src2Reg = MI.getOperand(2).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool Src1IsHigh = isHighReg(Src1Reg); - bool Src2IsHigh = isHighReg(Src2Reg); - - // If sources and destination aren't all high or all low, we may be able to - // simplify the operation by moving one of the sources to the destination - // first. But only if this doesn't clobber the other source. - if (DestReg != Src1Reg && DestReg != Src2Reg) { - if (DestIsHigh != Src1IsHigh) { - emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg, - SystemZ::LR, 32, MI.getOperand(1).isKill(), - MI.getOperand(1).isUndef()); - MI.getOperand(1).setReg(DestReg); - Src1Reg = DestReg; - Src1IsHigh = DestIsHigh; - } else if (DestIsHigh != Src2IsHigh) { - emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg, - SystemZ::LR, 32, MI.getOperand(2).isKill(), - MI.getOperand(2).isUndef()); - MI.getOperand(2).setReg(DestReg); - Src2Reg = DestReg; - Src2IsHigh = DestIsHigh; - } - } - - // If the destination (now) matches one source, prefer this to be first. - if (DestReg != Src1Reg && DestReg == Src2Reg) { - commuteInstruction(MI, false, 1, 2); - std::swap(Src1Reg, Src2Reg); - std::swap(Src1IsHigh, Src2IsHigh); - } - - if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh) - MI.setDesc(get(LowOpcode)); - else if (DestIsHigh && Src1IsHigh && Src2IsHigh) - MI.setDesc(get(HighOpcode)); - else { - // Given the simplifcation above, we must already have a two-operand case. - assert (DestReg == Src1Reg); - MI.setDesc(get(MixedOpcode)); - MI.tieOperands(0, 1); - LOCRMuxJumps++; - } - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. -} - // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -302,8 +211,8 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction &MF = *MBB->getParent(); - const unsigned Reg64 = MI->getOperand(0).getReg(); - const unsigned Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32); + const Register Reg64 = MI->getOperand(0).getReg(); + const Register Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32); // EAR can only load the low subregister so us a shift for %a0 to produce // the GR containing %a0 and %a1. @@ -341,8 +250,8 @@ SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, unsigned Size, bool KillSrc, bool UndefSrc) const { unsigned Opcode; - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); if (DestIsHigh && SrcIsHigh) Opcode = SystemZ::RISBHH; else if (DestIsHigh && !SrcIsHigh) @@ -468,7 +377,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Can't handle indirect branches. SystemZII::Branch Branch(getBranchInfo(*I)); - if (!Branch.Target->isMBB()) + if (!Branch.hasMBBTarget()) return true; // Punt on compound branches. @@ -478,7 +387,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, if (Branch.CCMask == SystemZ::CCMASK_ANY) { // Handle unconditional branches. if (!AllowModify) { - TBB = Branch.Target->getMBB(); + TBB = Branch.getMBBTarget(); continue; } @@ -490,7 +399,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, FBB = nullptr; // Delete the JMP if it's equivalent to a fall-through. - if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) { + if (MBB.isLayoutSuccessor(Branch.getMBBTarget())) { TBB = nullptr; I->eraseFromParent(); I = MBB.end(); @@ -498,7 +407,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } // TBB is used to indicate the unconditinal destination. - TBB = Branch.Target->getMBB(); + TBB = Branch.getMBBTarget(); continue; } @@ -506,7 +415,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, if (Cond.empty()) { // FIXME: add X86-style branch swap FBB = TBB; - TBB = Branch.Target->getMBB(); + TBB = Branch.getMBBTarget(); Cond.push_back(MachineOperand::CreateImm(Branch.CCValid)); Cond.push_back(MachineOperand::CreateImm(Branch.CCMask)); continue; @@ -517,7 +426,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Only handle the case where all conditional branches branch to the same // destination. - if (TBB != Branch.Target->getMBB()) + if (TBB != Branch.getMBBTarget()) return true; // If the conditions are the same, we can leave them alone. @@ -547,7 +456,7 @@ unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB, continue; if (!I->isBranch()) break; - if (!getBranchInfo(*I).Target->isMBB()) + if (!getBranchInfo(*I).hasMBBTarget()) break; // Remove the branch. I->eraseFromParent(); @@ -676,8 +585,8 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, else { Opc = SystemZ::LOCR; MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass); - unsigned TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); - unsigned FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg); BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg); TrueReg = TReg; @@ -1258,13 +1167,14 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( assert(NumOps == 3 && "Expected two source registers."); Register DstReg = MI.getOperand(0).getReg(); Register DstPhys = - (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); + (Register::isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg() : ((OpNum == 1 && MI.isCommutable()) ? MI.getOperand(2).getReg() : Register())); if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg && - TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg)) + Register::isVirtualRegister(SrcReg) && + DstPhys == VRM->getPhys(SrcReg)) NeedsCommute = (OpNum == 1); else MemOpcode = -1; @@ -1358,15 +1268,6 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); return true; - case SystemZ::LOCRMux: - expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); - return true; - - case SystemZ::SELRMux: - expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR, - SystemZ::LOCRMux); - return true; - case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1468,8 +1369,8 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; case SystemZ::RISBMux: { - bool DestIsHigh = isHighReg(MI.getOperand(0).getReg()); - bool SrcIsHigh = isHighReg(MI.getOperand(2).getReg()); + bool DestIsHigh = SystemZ::isHighReg(MI.getOperand(0).getReg()); + bool SrcIsHigh = SystemZ::isHighReg(MI.getOperand(2).getReg()); if (SrcIsHigh == DestIsHigh) MI.setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL)); else { @@ -1545,6 +1446,10 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const { return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP, MI.getOperand(2).getImm(), &MI.getOperand(3)); + case SystemZ::INLINEASM_BR: + // Don't try to analyze asm goto, so pass nullptr as branch target argument. + return SystemZII::Branch(SystemZII::AsmGoto, 0, 0, nullptr); + default: llvm_unreachable("Unrecognized branch opcode"); } @@ -1845,8 +1750,7 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, bool SystemZInstrInfo:: areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA) const { + const MachineInstr &MIb) const { if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) return false; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 2edde175542e..6dc6e72aa52a 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -100,11 +100,18 @@ enum BranchType { // An instruction that decrements a 64-bit register and branches if // the result is nonzero. - BranchCTG + BranchCTG, + + // An instruction representing an asm goto statement. + AsmGoto }; // Information about a branch instruction. -struct Branch { +class Branch { + // The target of the branch. In case of INLINEASM_BR, this is nullptr. + const MachineOperand *Target; + +public: // The type of the branch. BranchType Type; @@ -114,12 +121,15 @@ struct Branch { // CCMASK_ is set if the branch should be taken when CC == N. unsigned CCMask; - // The target of the branch. - const MachineOperand *Target; - Branch(BranchType type, unsigned ccValid, unsigned ccMask, const MachineOperand *target) - : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {} + : Target(target), Type(type), CCValid(ccValid), CCMask(ccMask) {} + + bool isIndirect() { return Target != nullptr && Target->isReg(); } + bool hasMBBTarget() { return Target != nullptr && Target->isMBB(); } + MachineBasicBlock *getMBBTarget() { + return hasMBBTarget() ? Target->getMBB() : nullptr; + } }; // Kinds of fused compares in compare-and-* instructions. Together with type @@ -160,10 +170,6 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { unsigned HighOpcode) const; void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; - void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const; - void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode, unsigned MixedOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -322,8 +328,7 @@ public: // memory addresses and false otherwise. bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 91856893e3bd..8b334756611a 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -337,15 +337,15 @@ defm CondStore8Mux : CondStores, Requires<[FeatureHighWord]>; -defm CondStore32Mux : CondStores, +defm CondStore32Mux : CondStores, Requires<[FeatureLoadStoreOnCond2]>; defm CondStore8 : CondStores; defm CondStore16 : CondStores; -defm CondStore32 : CondStores; +defm CondStore32 : CondStores; defm : CondStores64; @@ -353,8 +353,8 @@ defm : CondStores64; defm : CondStores64; -defm CondStore64 : CondStores; +defm CondStore64 : CondStores; //===----------------------------------------------------------------------===// // Move instructions @@ -531,8 +531,8 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { // Load on condition. Matched via DAG pattern. // Expands to LOC or LOCFH, depending on the choice of register. - def LOCMux : CondUnaryRSYPseudo; - defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>; + def LOCMux : CondUnaryRSYPseudo; + defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>; // Store on condition. Expanded from CondStore* pseudos. // Expands to STOC or STOCFH, depending on the choice of register. @@ -563,8 +563,8 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { } // Load on condition. Matched via DAG pattern. - defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>; - defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, nonvolatile_load, GR64, 8>; + defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>; + defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>; // Store on condition. Expanded from CondStore* pseudos. defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>; @@ -2082,7 +2082,7 @@ let Predicates = [FeatureProcessorAssist] in { // cleared. We only use the first result here. let Defs = [CC] in def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>; -def : Pat<(ctlz GR64:$src), +def : Pat<(i64 (ctlz GR64:$src)), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; // Population count. Counts bits set per byte or doubleword. diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index 261727f89058..02364bbda5c1 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in { // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16_timm>; // Generate mask. def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; @@ -71,10 +71,10 @@ let Predicates = [FeatureVector] in { // Replicate immediate. def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>; - def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; - def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; - def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; - def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; + def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16_timm, 0>; + def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>; + def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>; + def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>; } // Load element immediate. @@ -116,7 +116,7 @@ let Predicates = [FeatureVector] in { (ins bdxaddr12only:$XBD2, imm32zx4:$M3), "lcbb\t$R1, $XBD2, $M3", [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, - imm32zx4:$M3))]>; + imm32zx4_timm:$M3))]>; // Load with length. The number of loaded bytes is only known at run time. def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; @@ -362,9 +362,9 @@ let Predicates = [FeatureVector] in { def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; - def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)), + def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16_timm:$index)), (VREPF VR128:$vec, imm32zx16:$index)>; - def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)), + def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16_timm:$index)), (VREPG VR128:$vec, imm32zx16:$index)>; // Select. @@ -778,7 +778,7 @@ let Predicates = [FeatureVector] in { // Shift left double by byte. def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; - def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), + def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8_timm:$z), (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; // Shift left double by bit. @@ -1069,7 +1069,7 @@ let Predicates = [FeatureVector] in { def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; let Predicates = [FeatureVectorEnhancements2] in { let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in @@ -1078,7 +1078,7 @@ let Predicates = [FeatureVector] in { def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; } // Convert to logical. @@ -1088,7 +1088,7 @@ let Predicates = [FeatureVector] in { def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; let Predicates = [FeatureVectorEnhancements2] in { let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in @@ -1097,7 +1097,7 @@ let Predicates = [FeatureVector] in { def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; } // Divide. diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 95d7e22dec32..724111229569 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -85,9 +85,9 @@ struct MBBInfo { // This value never changes. uint64_t Size = 0; - // The minimum alignment of the block, as a log2 value. + // The minimum alignment of the block. // This value never changes. - unsigned Alignment = 0; + Align Alignment; // The number of terminators in this block. This value never changes. unsigned NumTerminators = 0; @@ -127,7 +127,8 @@ struct BlockPosition { // as the runtime address. unsigned KnownBits; - BlockPosition(unsigned InitialAlignment) : KnownBits(InitialAlignment) {} + BlockPosition(unsigned InitialLogAlignment) + : KnownBits(InitialLogAlignment) {} }; class SystemZLongBranch : public MachineFunctionPass { @@ -178,17 +179,16 @@ const uint64_t MaxForwardRange = 0xfffe; // instructions. void SystemZLongBranch::skipNonTerminators(BlockPosition &Position, MBBInfo &Block) { - if (Block.Alignment > Position.KnownBits) { + if (Log2(Block.Alignment) > Position.KnownBits) { // When calculating the address of Block, we need to conservatively // assume that Block had the worst possible misalignment. - Position.Address += ((uint64_t(1) << Block.Alignment) - - (uint64_t(1) << Position.KnownBits)); - Position.KnownBits = Block.Alignment; + Position.Address += + (Block.Alignment.value() - (uint64_t(1) << Position.KnownBits)); + Position.KnownBits = Log2(Block.Alignment); } // Align the addresses. - uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1; - Position.Address = (Position.Address + AlignMask) & ~AlignMask; + Position.Address = alignTo(Position.Address, Block.Alignment); // Record the block's position. Block.Address = Position.Address; @@ -257,7 +257,7 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) { } Terminator.Branch = &MI; Terminator.TargetBlock = - TII->getBranchInfo(MI).Target->getMBB()->getNumber(); + TII->getBranchInfo(MI).getMBBTarget()->getNumber(); } return Terminator; } @@ -275,7 +275,7 @@ uint64_t SystemZLongBranch::initMBBInfo() { Terminators.clear(); Terminators.reserve(NumBlocks); - BlockPosition Position(MF->getAlignment()); + BlockPosition Position(Log2(MF->getAlignment())); for (unsigned I = 0; I < NumBlocks; ++I) { MachineBasicBlock *MBB = MF->getBlockNumbered(I); MBBInfo &Block = MBBs[I]; @@ -339,7 +339,7 @@ bool SystemZLongBranch::mustRelaxABranch() { // must be long. void SystemZLongBranch::setWorstCaseAddresses() { SmallVector::iterator TI = Terminators.begin(); - BlockPosition Position(MF->getAlignment()); + BlockPosition Position(Log2(MF->getAlignment())); for (auto &Block : MBBs) { skipNonTerminators(Position, Block); for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { @@ -440,7 +440,7 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { // Run a shortening pass and relax any branches that need to be relaxed. void SystemZLongBranch::relaxBranches() { SmallVector::iterator TI = Terminators.begin(); - BlockPosition Position(MF->getAlignment()); + BlockPosition Position(Log2(MF->getAlignment())); for (auto &Block : MBBs) { skipNonTerminators(Position, Block); for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp index 0becfaa1d49c..3fc25034dded 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" +#include "llvm/CodeGen/MachineLoopInfo.h" using namespace llvm; @@ -108,8 +109,8 @@ void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { I != SinglePredMBB->end(); I++) { LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; I->dump();); bool TakenBranch = (I->isBranch() && - (TII->getBranchInfo(*I).Target->isReg() || // Relative branch - TII->getBranchInfo(*I).Target->getMBB() == MBB)); + (TII->getBranchInfo(*I).isIndirect() || + TII->getBranchInfo(*I).getMBBTarget() == MBB)); HazardRec->emitInstruction(&*I, TakenBranch); if (TakenBranch) break; diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 56632e1529a2..b2bab68a6274 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -21,15 +21,32 @@ class ImmediateTLSAsmOperand let RenderMethod = "addImmTLSOperands"; } +class ImmediateOp : Operand { + let PrintMethod = "print"##asmop##"Operand"; + let DecoderMethod = "decode"##asmop##"Operand"; + let ParserMatchClass = !cast(asmop); +} + +class ImmOpWithPattern : + ImmediateOp, PatLeaf<(vt ImmNode), pred, xform>; + +// class ImmediatePatLeaf +// : PatLeaf<(vt ImmNode), pred, xform>; + + // Constructs both a DAG pattern and instruction operand for an immediate // of type VT. PRED returns true if a node is acceptable and XFORM returns // the operand value associated with the node. ASMOP is the name of the // associated asm operand, and also forms the basis of the asm print method. -class Immediate - : PatLeaf<(vt imm), pred, xform>, Operand { - let PrintMethod = "print"##asmop##"Operand"; - let DecoderMethod = "decode"##asmop##"Operand"; - let ParserMatchClass = !cast(asmop); +multiclass Immediate { + // def "" : ImmediateOp, + // PatLeaf<(vt imm), pred, xform>; + def "" : ImmOpWithPattern; + +// def _timm : PatLeaf<(vt timm), pred, xform>; + def _timm : ImmOpWithPattern; } // Constructs an asm operand for a PC-relative address. SIZE says how @@ -295,87 +312,87 @@ def U48Imm : ImmediateAsmOperand<"U48Imm">; // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being zero. -def imm32ll16 : ImmediategetZExtValue()); }], LL16, "U16Imm">; -def imm32lh16 : ImmediategetZExtValue()); }], LH16, "U16Imm">; // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being one. -def imm32ll16c : ImmediategetZExtValue())); }], LL16, "U16Imm">; -def imm32lh16c : ImmediategetZExtValue())); }], LH16, "U16Imm">; // Short immediates -def imm32zx1 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U1Imm">; -def imm32zx2 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U2Imm">; -def imm32zx3 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U3Imm">; -def imm32zx4 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U4Imm">; // Note: this enforces an even value during code generation only. // When used from the assembler, any 4-bit value is allowed. -def imm32zx4even : Immediate(N->getZExtValue()); }], UIMM8EVEN, "U4Imm">; -def imm32zx6 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U6Imm">; -def imm32sx8 : Immediate(N->getSExtValue()); }], SIMM8, "S8Imm">; -def imm32zx8 : Immediate(N->getZExtValue()); }], UIMM8, "U8Imm">; -def imm32zx8trunc : Immediate; +defm imm32zx8trunc : Immediate; -def imm32zx12 : Immediate(N->getZExtValue()); }], UIMM12, "U12Imm">; -def imm32sx16 : Immediate(N->getSExtValue()); }], SIMM16, "S16Imm">; -def imm32sx16n : Immediate(-N->getSExtValue()); }], NEGSIMM16, "S16Imm">; -def imm32zx16 : Immediate(N->getZExtValue()); }], UIMM16, "U16Imm">; -def imm32sx16trunc : Immediate; -def imm32zx16trunc : Immediate; +defm imm32sx16trunc : Immediate; +defm imm32zx16trunc : Immediate; // Full 32-bit immediates. we need both signed and unsigned versions // because the assembler is picky. E.g. AFI requires signed operands // while NILF requires unsigned ones. -def simm32 : Immediate; -def uimm32 : Immediate; +defm simm32 : Immediate; +defm uimm32 : Immediate; -def simm32n : Immediate(-N->getSExtValue()); }], NEGSIMM32, "S32Imm">; @@ -387,107 +404,107 @@ def imm32 : ImmLeaf; // Immediates for 16-bit chunks of an i64, with the other bits of the // i32 being zero. -def imm64ll16 : ImmediategetZExtValue()); }], LL16, "U16Imm">; -def imm64lh16 : ImmediategetZExtValue()); }], LH16, "U16Imm">; -def imm64hl16 : ImmediategetZExtValue()); }], HL16, "U16Imm">; -def imm64hh16 : ImmediategetZExtValue()); }], HH16, "U16Imm">; // Immediates for 16-bit chunks of an i64, with the other bits of the // i32 being one. -def imm64ll16c : ImmediategetZExtValue())); }], LL16, "U16Imm">; -def imm64lh16c : ImmediategetZExtValue())); }], LH16, "U16Imm">; -def imm64hl16c : ImmediategetZExtValue())); }], HL16, "U16Imm">; -def imm64hh16c : ImmediategetZExtValue())); }], HH16, "U16Imm">; // Immediates for the lower and upper 32 bits of an i64, with the other // bits of the i32 being zero. -def imm64lf32 : ImmediategetZExtValue()); }], LF32, "U32Imm">; -def imm64hf32 : ImmediategetZExtValue()); }], HF32, "U32Imm">; // Immediates for the lower and upper 32 bits of an i64, with the other // bits of the i32 being one. -def imm64lf32c : ImmediategetZExtValue())); }], LF32, "U32Imm">; -def imm64hf32c : ImmediategetZExtValue())); }], HF32, "U32Imm">; // Negated immediates that fit LF32 or LH16. -def imm64lh16n : ImmediategetZExtValue())); }], NEGLH16, "U16Imm">; -def imm64lf32n : ImmediategetZExtValue())); }], NEGLF32, "U32Imm">; // Short immediates. -def imm64sx8 : Immediate(N->getSExtValue()); }], SIMM8, "S8Imm">; -def imm64zx8 : Immediate(N->getSExtValue()); }], UIMM8, "U8Imm">; -def imm64sx16 : Immediate(N->getSExtValue()); }], SIMM16, "S16Imm">; -def imm64sx16n : Immediate(-N->getSExtValue()); }], NEGSIMM16, "S16Imm">; -def imm64zx16 : Immediate(N->getZExtValue()); }], UIMM16, "U16Imm">; -def imm64sx32 : Immediate(N->getSExtValue()); }], SIMM32, "S32Imm">; -def imm64sx32n : Immediate(-N->getSExtValue()); }], NEGSIMM32, "S32Imm">; -def imm64zx32 : Immediate(N->getZExtValue()); }], UIMM32, "U32Imm">; -def imm64zx32n : Immediate(-N->getSExtValue()); }], NEGUIMM32, "U32Imm">; -def imm64zx48 : Immediate(N->getZExtValue()); }], UIMM48, "U48Imm">; @@ -637,7 +654,7 @@ def bdvaddr12only : BDVMode< "64", "12">; //===----------------------------------------------------------------------===// // A 4-bit condition-code mask. -def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>, +def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>, Operand { let PrintMethod = "printCond4Operand"; } diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 15bd12bc98a4..6fe383e64b74 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -472,17 +472,17 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs), (z_subcarry_1 node:$lhs, node:$rhs, CC)>; // Signed and unsigned comparisons. -def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ +def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ unsigned Type = cast(N->getOperand(2))->getZExtValue(); return Type != SystemZICMP::UnsignedOnly; }]>; -def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ +def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ unsigned Type = cast(N->getOperand(2))->getZExtValue(); return Type != SystemZICMP::SignedOnly; }]>; // Register- and memory-based TEST UNDER MASK. -def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>; +def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>; def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>; // Register sign-extend operations. Sub-32-bit values are represented as i32s. diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index beaf4de285a3..65300fb47627 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -100,12 +100,12 @@ multiclass CondStores64 { def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr), (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, imm32zx4:$valid, imm32zx4:$cc)>; def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr), (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, imm32zx4:$valid, imm32zx4:$cc)>; diff --git a/lib/Target/SystemZ/SystemZPostRewrite.cpp b/lib/Target/SystemZ/SystemZPostRewrite.cpp index 8e4060eac74c..aaa7f8fc88f5 100644 --- a/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -25,6 +25,7 @@ using namespace llvm; #define DEBUG_TYPE "systemz-postrewrite" STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops."); +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); namespace llvm { void initializeSystemZPostRewritePass(PassRegistry&); @@ -45,12 +46,20 @@ public: StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - private: + void selectLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode); + void selectSELRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode); + bool expandCondMove(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool selectMBB(MachineBasicBlock &MBB); @@ -68,11 +77,141 @@ FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) { return new SystemZPostRewrite(); } +// MI is a load-register-on-condition pseudo instruction. Replace it with +// LowOpcode if source and destination are both low GR32s and HighOpcode if +// source and destination are both high GR32s. Otherwise, a branch sequence +// is created. +void SystemZPostRewrite::selectLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode) { + Register DestReg = MBBI->getOperand(0).getReg(); + Register SrcReg = MBBI->getOperand(2).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); + + if (!DestIsHigh && !SrcIsHigh) + MBBI->setDesc(TII->get(LowOpcode)); + else if (DestIsHigh && SrcIsHigh) + MBBI->setDesc(TII->get(HighOpcode)); + else + expandCondMove(MBB, MBBI, NextMBBI); +} + +// MI is a select pseudo instruction. Replace it with LowOpcode if source +// and destination are all low GR32s and HighOpcode if source and destination +// are all high GR32s. Otherwise, a branch sequence is created. +void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode) { + Register DestReg = MBBI->getOperand(0).getReg(); + Register Src1Reg = MBBI->getOperand(1).getReg(); + Register Src2Reg = MBBI->getOperand(2).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool Src1IsHigh = SystemZ::isHighReg(Src1Reg); + bool Src2IsHigh = SystemZ::isHighReg(Src2Reg); + + // If sources and destination aren't all high or all low, we may be able to + // simplify the operation by moving one of the sources to the destination + // first. But only if this doesn't clobber the other source. + if (DestReg != Src1Reg && DestReg != Src2Reg) { + if (DestIsHigh != Src1IsHigh) { + BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), + TII->get(SystemZ::COPY), DestReg) + .addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1))); + MBBI->getOperand(1).setReg(DestReg); + Src1Reg = DestReg; + Src1IsHigh = DestIsHigh; + } else if (DestIsHigh != Src2IsHigh) { + BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), + TII->get(SystemZ::COPY), DestReg) + .addReg(MBBI->getOperand(2).getReg(), getRegState(MBBI->getOperand(2))); + MBBI->getOperand(2).setReg(DestReg); + Src2Reg = DestReg; + Src2IsHigh = DestIsHigh; + } + } + + // If the destination (now) matches one source, prefer this to be first. + if (DestReg != Src1Reg && DestReg == Src2Reg) { + TII->commuteInstruction(*MBBI, false, 1, 2); + std::swap(Src1Reg, Src2Reg); + std::swap(Src1IsHigh, Src2IsHigh); + } + + if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh) + MBBI->setDesc(TII->get(LowOpcode)); + else if (DestIsHigh && Src1IsHigh && Src2IsHigh) + MBBI->setDesc(TII->get(HighOpcode)); + else + // Given the simplification above, we must already have a two-operand case. + expandCondMove(MBB, MBBI, NextMBBI); +} + +// Replace MBBI by a branch sequence that performs a conditional move of +// operand 2 to the destination register. Operand 1 is expected to be the +// same register as the destination. +bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineFunction &MF = *MBB.getParent(); + const BasicBlock *BB = MBB.getBasicBlock(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + Register DestReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + assert(DestReg == MI.getOperand(1).getReg() && + "Expected destination and first source operand to be the same."); + + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + // Splice MBB at MI, moving the rest of the block into RestMBB. + MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); + RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); + RestMBB->transferSuccessors(&MBB); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + RestMBB->addLiveIn(*I); + + // Create a new block MoveMBB to hold the move instruction. + MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); + MoveMBB->addLiveIn(SrcReg); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MoveMBB->addLiveIn(*I); + + // At the end of MBB, create a conditional branch to RestMBB if the + // condition is false, otherwise fall through to MoveMBB. + BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); + MBB.addSuccessor(RestMBB); + MBB.addSuccessor(MoveMBB); + + // In MoveMBB, emit an instruction to move SrcReg into DestReg, + // then fall through to RestMBB. + BuildMI(*MoveMBB, MoveMBB->end(), DL, TII->get(SystemZ::COPY), DestReg) + .addReg(MI.getOperand(2).getReg(), getRegState(MI.getOperand(2))); + MoveMBB->addSuccessor(RestMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + LOCRMuxJumps++; + return true; +} + /// If MBBI references a pseudo instruction that should be selected here, /// do it and return true. Otherwise return false. bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); @@ -83,7 +222,7 @@ bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, if (TargetMemOpcode != -1) { MI.setDesc(TII->get(TargetMemOpcode)); MI.tieOperands(0, 1); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); MachineOperand &SrcMO = MI.getOperand(1); if (DstReg != SrcMO.getReg()) { BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg) @@ -94,6 +233,15 @@ bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, return true; } + switch (Opcode) { + case SystemZ::LOCRMux: + selectLOCRMux(MBB, MBBI, NextMBBI, SystemZ::LOCR, SystemZ::LOCFHR); + return true; + case SystemZ::SELRMux: + selectSELRMux(MBB, MBBI, NextMBBI, SystemZ::SELR, SystemZ::SELFHR); + return true; + } + return false; } diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td index b27c25beb58c..af33a0300552 100644 --- a/lib/Target/SystemZ/SystemZProcessors.td +++ b/lib/Target/SystemZ/SystemZProcessors.td @@ -35,5 +35,6 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>; def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>; def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>; -def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>; +def : ProcessorModel<"arch13", Z15Model, Arch13SupportedFeatures.List>; +def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index e7cd6871dbb4..39ace5594b7f 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -41,7 +41,7 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO, return &SystemZ::GRH32BitRegClass; if (VRM && VRM->hasPhys(MO.getReg())) { - unsigned PhysReg = VRM->getPhys(MO.getReg()); + Register PhysReg = VRM->getPhys(MO.getReg()); if (SystemZ::GR32BitRegClass.contains(PhysReg)) return &SystemZ::GR32BitRegClass; assert (SystemZ::GRH32BitRegClass.contains(PhysReg) && @@ -120,8 +120,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, } // Add the other operand of the LOCRMux to the worklist. - unsigned OtherReg = - (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); + Register OtherReg = + (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) Worklist.push_back(OtherReg); } // end LOCRMux @@ -169,7 +169,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, auto tryAddHint = [&](const MachineOperand *MO) -> void { Register Reg = MO->getReg(); - Register PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); + Register PhysReg = + Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); if (PhysReg) { if (MO->getSubReg()) PhysReg = getSubReg(PhysReg, MO->getSubReg()); @@ -297,8 +298,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(Mask && "One offset must be OK"); } while (!OpcodeForOffset); - unsigned ScratchReg = - MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register ScratchReg = + MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass); int64_t HighOffset = OldOffset - Offset; if (MI->getDesc().TSFlags & SystemZII::HasIndex @@ -351,8 +352,8 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, // regalloc may run out of registers. unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0); - unsigned GR128Reg = MI->getOperand(WideOpNo).getReg(); - unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg(); + Register GR128Reg = MI->getOperand(WideOpNo).getReg(); + Register GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg(); LiveInterval &IntGR128 = LIS.getInterval(GR128Reg); LiveInterval &IntGRNar = LIS.getInterval(GRNarReg); @@ -385,7 +386,7 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, MEE++; for (; MII != MEE; ++MII) { for (const MachineOperand &MO : MII->operands()) - if (MO.isReg() && isPhysicalRegister(MO.getReg())) { + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/); SI.isValid(); ++SI) if (NewRC->contains(*SI)) { diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 4f721ec23e53..7044efef1ac6 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -28,6 +28,15 @@ inline unsigned even128(bool Is32bit) { inline unsigned odd128(bool Is32bit) { return Is32bit ? subreg_l32 : subreg_l64; } + +// Reg should be a 32-bit GPR. Return true if it is a high register rather +// than a low register. +inline bool isHighReg(unsigned int Reg) { + if (SystemZ::GRH32BitRegClass.contains(Reg)) + return true; + assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32"); + return false; +} } // end namespace SystemZ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { diff --git a/lib/Target/SystemZ/SystemZSchedule.td b/lib/Target/SystemZ/SystemZSchedule.td index 98eca2802242..119e3ee7c22c 100644 --- a/lib/Target/SystemZ/SystemZSchedule.td +++ b/lib/Target/SystemZ/SystemZSchedule.td @@ -59,7 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit def MCD : SchedWrite; // Millicode -include "SystemZScheduleArch13.td" +include "SystemZScheduleZ15.td" include "SystemZScheduleZ14.td" include "SystemZScheduleZ13.td" include "SystemZScheduleZEC12.td" diff --git a/lib/Target/SystemZ/SystemZScheduleArch13.td b/lib/Target/SystemZ/SystemZScheduleArch13.td deleted file mode 100644 index 9f82f24d0e8f..000000000000 --- a/lib/Target/SystemZ/SystemZScheduleArch13.td +++ /dev/null @@ -1,1695 +0,0 @@ -//-- SystemZScheduleArch13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for Arch13 to support instruction -// scheduling and other instruction cost heuristics. -// -// Pseudos expanded right after isel do not need to be modelled here. -// -//===----------------------------------------------------------------------===// - -def Arch13Model : SchedMachineModel { - - let UnsupportedFeatures = Arch13UnsupportedFeatures.List; - - let IssueWidth = 6; // Number of instructions decoded per cycle. - let MicroOpBufferSize = 60; // Issue queues - let LoadLatency = 1; // Optimistic load latency. - - let PostRAScheduler = 1; - - // Extra cycles for a mispredicted branch. - let MispredictPenalty = 20; -} - -let SchedModel = Arch13Model in { -// These definitions need the SchedModel value. They could be put in a -// subtarget common include file, but it seems the include system in Tablegen -// currently (2016) rejects multiple includes of same file. - -// Decoder grouping rules -let NumMicroOps = 1 in { - def : WriteRes; - def : WriteRes { let BeginGroup = 1; } - def : WriteRes { let EndGroup = 1; } -} -def : WriteRes { - let NumMicroOps = 2; - let BeginGroup = 1; -} -def : WriteRes { - let NumMicroOps = 3; - let BeginGroup = 1; - let EndGroup = 1; -} -def : WriteRes { - let NumMicroOps = 6; - let BeginGroup = 1; - let EndGroup = 1; -} -def : WriteRes { - let NumMicroOps = 9; - let BeginGroup = 1; - let EndGroup = 1; -} - -// Incoming latency removed from the register operand which is used together -// with a memory operand by the instruction. -def : ReadAdvance; - -// LoadLatency (above) is not used for instructions in this file. This is -// instead the role of LSULatency, which is the latency value added to the -// result of loads and instructions with folded memory operands. -def : WriteRes { let Latency = 4; let NumMicroOps = 0; } - -let NumMicroOps = 0 in { - foreach L = 1-30 in - def : WriteRes("WLat"#L), []> { let Latency = L; } -} - -// Execution units. -def Arch13_FXaUnit : ProcResource<2>; -def Arch13_FXbUnit : ProcResource<2>; -def Arch13_LSUnit : ProcResource<2>; -def Arch13_VecUnit : ProcResource<2>; -def Arch13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } -def Arch13_VBUnit : ProcResource<2>; -def Arch13_MCD : ProcResource<1>; - -// Subtarget specific definitions of scheduling resources. -let NumMicroOps = 0 in { - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - foreach Num = 2-5 in { let ResourceCycles = [Num] in { - def : WriteRes("FXa"#Num), [Arch13_FXaUnit]>; - def : WriteRes("FXb"#Num), [Arch13_FXbUnit]>; - def : WriteRes("LSU"#Num), [Arch13_LSUnit]>; - def : WriteRes("VecBF"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecDF"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecDFX"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecMul"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecStr"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecXsPm"#Num), [Arch13_VecUnit]>; - }} - - def : WriteRes { let ResourceCycles = [30]; } - - def : WriteRes; // Virtual Branching Unit -} - -def : WriteRes { let NumMicroOps = 3; - let BeginGroup = 1; - let EndGroup = 1; } - -// -------------------------- INSTRUCTIONS ---------------------------------- // - -// InstRW constructs have been used in order to preserve the -// readability of the InstrInfo files. - -// For each instruction, as matched by a regexp, provide a list of -// resources that it needs. These will be combined into a SchedClass. - -//===----------------------------------------------------------------------===// -// Stack allocation -//===----------------------------------------------------------------------===// - -// Pseudo -> LA / LAY -def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; - -//===----------------------------------------------------------------------===// -// Branch instructions -//===----------------------------------------------------------------------===// - -// Branch -def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; -def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; -def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; -def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; -def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; -def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], - (instregex "B(R)?X(H|L).*$")>; - -// Compare and branch -def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; -def : InstRW<[WLat1, FXb2, GroupAlone], - (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; - -//===----------------------------------------------------------------------===// -// Trap instructions -//===----------------------------------------------------------------------===// - -// Trap -def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; - -// Compare and trap -def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; - -//===----------------------------------------------------------------------===// -// Call and return instructions -//===----------------------------------------------------------------------===// - -// Call -def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; - -// Return -def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>; - -//===----------------------------------------------------------------------===// -// Move instructions -//===----------------------------------------------------------------------===// - -// Moves -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; - -// Move character -def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; -def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; - -// Pseudo -> reg move -def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; - -// Loads -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; -def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; - -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; - -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>; - -// Load and zero rightmost byte -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; - -// Load and trap -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; - -// Load and test -def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; - -// Stores -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; - -// String moves. -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; - -//===----------------------------------------------------------------------===// -// Conditional move instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; -def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; -def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; -def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], - (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; - -def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>; -def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>; - -//===----------------------------------------------------------------------===// -// Sign extensions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; - -def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; - -def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; -def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; -def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; -def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; -def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; - -//===----------------------------------------------------------------------===// -// Zero extensions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; -def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; - -// Load and zero rightmost byte -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; - -// Load and trap -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; - -//===----------------------------------------------------------------------===// -// Truncations -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; - -//===----------------------------------------------------------------------===// -// Multi-register moves -//===----------------------------------------------------------------------===// - -// Load multiple (estimated average of 5 ops) -def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; - -// Load multiple disjoint -def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; - -// Store multiple -def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; - -//===----------------------------------------------------------------------===// -// Byte swaps -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; -def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; -def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; - -//===----------------------------------------------------------------------===// -// Load address instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; - -// Load the Global Offset Table address ( -> larl ) -def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; - -//===----------------------------------------------------------------------===// -// Absolute and Negation -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; -def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; -def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; -def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; - -//===----------------------------------------------------------------------===// -// Insertion -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; -def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "IC32(Y)?$")>; -def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], - (instregex "ICM(H|Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; - -//===----------------------------------------------------------------------===// -// Addition -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "A(Y)?$")>; -def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "AH(Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "AG$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "AL(Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "ALG(F)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; -def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; -def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; - -// Logical addition with carry -def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], - (instregex "ALC(G)?$")>; -def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; - -// Add with sign extension (16/32 -> 64) -def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "AG(F|H)$")>; -def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; - -//===----------------------------------------------------------------------===// -// Subtraction -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "S(G|Y)?$")>; -def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "SH(Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "SL(G|GF|Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; -def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; - -// Subtraction with borrow -def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], - (instregex "SLB(G)?$")>; -def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; - -// Subtraction with sign extension (16/32 -> 64) -def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "SG(F|H)$")>; -def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; - -//===----------------------------------------------------------------------===// -// AND -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "N(G|Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; -def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; -def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; - -//===----------------------------------------------------------------------===// -// OR -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "O(G|Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; -def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; -def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; - -//===----------------------------------------------------------------------===// -// XOR -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "X(G|Y)?$")>; -def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; -def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; - -//===----------------------------------------------------------------------===// -// Combined logical operations -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>; - -//===----------------------------------------------------------------------===// -// Multiplication -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "MS(GF|Y)?$")>; -def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>; -def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; -def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>; -def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>; -def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; -def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>; -def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; -def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; -def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; -def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>; -def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone], - (instregex "M(FY|L)?$")>; -def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; -def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; -def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>; -def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "MSC$")>; -def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], - (instregex "MSGC$")>; -def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>; -def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; - -//===----------------------------------------------------------------------===// -// Division and remainder -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; -def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; -def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; -def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], - (instregex "DSG(F)?$")>; -def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; -def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; -def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], - (instregex "DL(G)?$")>; - -//===----------------------------------------------------------------------===// -// Shifts -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; -def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], - (instregex "S(L|R)D(A|L)$")>; - -// Rotate -def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; - -// Rotate and insert -def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; -def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; - -// Rotate and Select -def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; - -//===----------------------------------------------------------------------===// -// Comparison -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], - (instregex "C(G|Y|Mux)?$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; -def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; -def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], - (instregex "CL(Y|Mux)?$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; -def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; -def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; -def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; -def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; - -// Compare halfword -def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; -def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; -def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; -def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; -def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; - -// Compare with sign extension (32 -> 64) -def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; -def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; -def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; - -// Compare logical character -def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; - -// Test under mask -def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; - -// Compare logical characters under mask -def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], - (instregex "CLM(H|Y)?$")>; - -//===----------------------------------------------------------------------===// -// Prefetch and execution hint -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; -def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; - -//===----------------------------------------------------------------------===// -// Atomic operations -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; - -def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; -def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; -def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; -def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; -def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; - -// Test and set -def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; - -// Compare and swap -def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], - (instregex "CS(G|Y)?$")>; - -// Compare double and swap -def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], - (instregex "CDS(Y)?$")>; -def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, - GroupAlone3], (instregex "CDSG$")>; - -// Compare and swap and store -def : InstRW<[WLat30, MCD], (instregex "CSST$")>; - -// Perform locked operation -def : InstRW<[WLat30, MCD], (instregex "PLO$")>; - -// Load/store pair from/to quadword -def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; -def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; - -// Load pair disjoint -def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; - -//===----------------------------------------------------------------------===// -// Translate and convert -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; -def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], - (instregex "TRT$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], - (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; - -//===----------------------------------------------------------------------===// -// Message-security assist -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], - (instregex "KM(C|F|O|CTR|A)?$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], - (instregex "(KIMD|KLMD|KMAC|KDSA)$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], - (instregex "(PCC|PPNO|PRNO)$")>; - -//===----------------------------------------------------------------------===// -// Guarded storage -//===----------------------------------------------------------------------===// - -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; -def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; - -//===----------------------------------------------------------------------===// -// Decimal arithmetic -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], - (instregex "CVBG$")>; -def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], - (instregex "CVB(Y)?$")>; -def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; -def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; -def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; -def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; -def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; -def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; - -def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], - (instregex "(A|S|ZA)P$")>; -def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>; -def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>; -def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; -def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; -def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; -def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; - -//===----------------------------------------------------------------------===// -// Access registers -//===----------------------------------------------------------------------===// - -// Extract/set/copy access register -def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; - -// Load address extended -def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; - -// Load/store access multiple (not modeled precisely) -def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; -def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; - -//===----------------------------------------------------------------------===// -// Program mask and addressing mode -//===----------------------------------------------------------------------===// - -// Insert Program Mask -def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; - -// Set Program Mask -def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; - -// Branch and link -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; - -// Test addressing mode -def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; - -// Set addressing mode -def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; - -// Branch (and save) and set mode. -def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; - -//===----------------------------------------------------------------------===// -// Transactional execution -//===----------------------------------------------------------------------===// - -// Transaction begin -def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; - -// Transaction end -def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; - -// Transaction abort -def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; - -// Extract Transaction Nesting Depth -def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; - -// Nontransactional store -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; - -//===----------------------------------------------------------------------===// -// Processor assist -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; - -//===----------------------------------------------------------------------===// -// Miscellaneous Instructions. -//===----------------------------------------------------------------------===// - -// Find leftmost one -def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; - -// Population count -def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>; - -// String instructions -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; - -// Various complex instructions -def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; -def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], - (instregex "UPT$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; -def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; -def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>; -def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>; - -// Execute -def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; - -//===----------------------------------------------------------------------===// -// .insn directive instructions -//===----------------------------------------------------------------------===// - -// An "empty" sched-class will be assigned instead of the "invalid sched-class". -// getNumDecoderSlots() will then return 1 instead of 0. -def : InstRW<[], (instregex "Insn.*")>; - - -// ----------------------------- Floating point ----------------------------- // - -//===----------------------------------------------------------------------===// -// FP: Move instructions -//===----------------------------------------------------------------------===// - -// Load zero -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; -def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; - -// Load -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; -def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; -def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; - -// Load and Test -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; -def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], - (instregex "LTXBR(Compare)?$")>; - -// Copy sign -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; - -//===----------------------------------------------------------------------===// -// FP: Load instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; - -//===----------------------------------------------------------------------===// -// FP: Store instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; - -//===----------------------------------------------------------------------===// -// FP: Conversion instructions -//===----------------------------------------------------------------------===// - -// Load rounded -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; -def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; - -// Load lengthened -def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>; -def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; -def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; - -// Convert from fixed / logical -def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; -def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; - -// Convert to fixed / logical -def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], - (instregex "C(F|G)(E|D)BR(A)?$")>; -def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], - (instregex "C(F|G)XBR(A)?$")>; -def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; -def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; -def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; -def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; - -//===----------------------------------------------------------------------===// -// FP: Unary arithmetic -//===----------------------------------------------------------------------===// - -// Load Complement / Negative / Positive -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; - -// Square root -def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>; -def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; - -// Load FP integer -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; -def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; - -//===----------------------------------------------------------------------===// -// FP: Binary arithmetic -//===----------------------------------------------------------------------===// - -// Addition -def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], - (instregex "A(E|D)B$")>; -def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; - -// Subtraction -def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], - (instregex "S(E|D)B$")>; -def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; - -// Multiply -def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], - (instregex "M(D|DE|EE)B$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; -def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], - (instregex "MXDB$")>; -def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>; -def : InstRW<[WLat15, VecDF4, GroupAlone], (instregex "MXBR$")>; - -// Multiply and add / subtract -def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], - (instregex "M(A|S)EB$")>; -def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; -def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], - (instregex "M(A|S)DB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; - -// Division -def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], - (instregex "D(E|D)B$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>; -def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; - -// Divide to integer -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; - -//===----------------------------------------------------------------------===// -// FP: Comparisons -//===----------------------------------------------------------------------===// - -// Compare -def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], - (instregex "(K|C)(E|D)B$")>; -def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; -def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; - -// Test Data Class -def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; -def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; - -//===----------------------------------------------------------------------===// -// FP: Floating-point control register instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; -def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; -def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; -def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; -def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; -def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; -def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; - - -// --------------------- Hexadecimal floating point ------------------------- // - -//===----------------------------------------------------------------------===// -// HFP: Move instructions -//===----------------------------------------------------------------------===// - -// Load and Test -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; - -//===----------------------------------------------------------------------===// -// HFP: Conversion instructions -//===----------------------------------------------------------------------===// - -// Load rounded -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>; -def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; - -// Load lengthened -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; -def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; -def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; - -// Convert from fixed -def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; - -// Convert to fixed -def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; -def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; - -// Convert BFP to HFP / HFP to BFP. -def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>; -def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>; - -//===----------------------------------------------------------------------===// -// HFP: Unary arithmetic -//===----------------------------------------------------------------------===// - -// Load Complement / Negative / Positive -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; - -// Halve -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>; - -// Square root -def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>; -def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; - -// Load FP integer -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>; -def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; - -//===----------------------------------------------------------------------===// -// HFP: Binary arithmetic -//===----------------------------------------------------------------------===// - -// Addition -def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], - (instregex "A(E|D|U|W)$")>; -def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; - -// Subtraction -def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], - (instregex "S(E|D|U|W)$")>; -def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; - -// Multiply -def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], - (instregex "M(D|DE|E|EE)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; -def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], - (instregex "MXD$")>; -def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>; -def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>; -def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; -def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone], - (instregex "MY(H|L)$")>; -def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>; -def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; - -// Multiply and add / subtract -def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], - (instregex "M(A|S)(E|D)$")>; -def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; -def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], - (instregex "MAY$")>; -def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], - (instregex "MAY(H|L)$")>; -def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>; -def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; - -// Division -def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>; -def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; - -//===----------------------------------------------------------------------===// -// HFP: Comparisons -//===----------------------------------------------------------------------===// - -// Compare -def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], - (instregex "C(E|D)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>; -def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; - - -// ------------------------ Decimal floating point -------------------------- // - -//===----------------------------------------------------------------------===// -// DFP: Move instructions -//===----------------------------------------------------------------------===// - -// Load and Test -def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; - -//===----------------------------------------------------------------------===// -// DFP: Conversion instructions -//===----------------------------------------------------------------------===// - -// Load rounded -def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; -def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; - -// Load lengthened -def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; -def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; - -// Convert from fixed / logical -def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>; -def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>; -def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>; -def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>; -def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>; -def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDLGTR$")>; -def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>; -def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>; - -// Convert to fixed / logical -def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], - (instregex "C(F|G)DTR(A)?$")>; -def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], - (instregex "C(F|G)XTR(A)?$")>; -def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; -def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; - -// Convert from / to signed / unsigned packed -def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; -def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; -def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; -def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; - -// Convert from / to zoned -def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; -def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; -def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; -def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; - -// Convert from / to packed -def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; -def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; -def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; -def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; - -// Perform floating-point operation -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; - -//===----------------------------------------------------------------------===// -// DFP: Unary arithmetic -//===----------------------------------------------------------------------===// - -// Load FP integer -def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; -def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; - -// Extract biased exponent -def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; -def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; - -// Extract significance -def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; -def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; - -//===----------------------------------------------------------------------===// -// DFP: Binary arithmetic -//===----------------------------------------------------------------------===// - -// Addition -def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; - -// Subtraction -def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; - -// Multiply -def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>; -def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; - -// Division -def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; -def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; - -// Quantize -def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; -def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; - -// Reround -def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; -def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; - -// Shift significand left/right -def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; -def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; - -// Insert biased exponent -def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; -def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; - -//===----------------------------------------------------------------------===// -// DFP: Comparisons -//===----------------------------------------------------------------------===// - -// Compare -def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; -def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; - -// Compare biased exponent -def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; -def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; - -// Test Data Class/Group -def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; -def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; - - -// --------------------------------- Vector --------------------------------- // - -//===----------------------------------------------------------------------===// -// Vector: Move instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; -def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; -def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; - -//===----------------------------------------------------------------------===// -// Vector: Immediate instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; - -//===----------------------------------------------------------------------===// -// Vector: Loads -//===----------------------------------------------------------------------===// - -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; -def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], - (instregex "VLE(B|F|G|H)$")>; -def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], - (instregex "VGE(F|G)$")>; -def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], - (instregex "VLM(Align)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; - -//===----------------------------------------------------------------------===// -// Vector: Stores -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; -def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; -def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; -def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; - -//===----------------------------------------------------------------------===// -// Vector: Byte swaps -//===----------------------------------------------------------------------===// - -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>; -def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], - (instregex "VLEBR(H|F|G)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>; -def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>; -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>; - -//===----------------------------------------------------------------------===// -// Vector: Selects and permutes -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; - -//===----------------------------------------------------------------------===// -// Vector: Widening and narrowing -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; - -//===----------------------------------------------------------------------===// -// Vector: Integer arithmetic -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>; - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>; - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; - -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; -def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; - -//===----------------------------------------------------------------------===// -// Vector: Integer comparison -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; -def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; -def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; - -//===----------------------------------------------------------------------===// -// Vector: Floating-point arithmetic -//===----------------------------------------------------------------------===// - -// Conversion and rounding -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>; -def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>; -def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>; - -// Sign operations -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; - -// Minimum / maximum -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; -def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; - -// Test data class -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; -def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; - -// Add / subtract -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; -def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>; - -// Multiply / multiply-and-add/subtract -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>; -def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; -def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; -def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; - -// Divide / square root -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>; -def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; - -//===----------------------------------------------------------------------===// -// Vector: Floating-point comparison -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; -def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; -def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], - (instregex "WF(C|K)(E|H|HE)DBS$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], - (instregex "VF(C|K)(E|H|HE)SBS$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; -def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; -def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; -def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; -def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; -def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; -def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; -def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; - -//===----------------------------------------------------------------------===// -// Vector: Floating-point insertion and extraction -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; -def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; - -//===----------------------------------------------------------------------===// -// Vector: String instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; -def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; -def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; -def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], - (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; -def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], - (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; -def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; -def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; -def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>; -def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>; - -//===----------------------------------------------------------------------===// -// Vector: Packed-decimal instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>; -def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; -def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>; -def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], - (instregex "VCVB(G)?(Opt)?$")>; -def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone], - (instregex "VCVD(G)?$")>; -def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; -def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>; -def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>; -def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>; -def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>; -def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; -def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; - - -// -------------------------------- System ---------------------------------- // - -//===----------------------------------------------------------------------===// -// System: Program-Status Word Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; -def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>; -def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; -def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; -def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; -def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; -def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; -def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; - -//===----------------------------------------------------------------------===// -// System: Control Register Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; -def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; -def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; -def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; - -//===----------------------------------------------------------------------===// -// System: Prefix-Register Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; - -//===----------------------------------------------------------------------===// -// System: Storage-Key and Real Memory Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; -def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; -def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; -def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; -def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; -def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; -def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; -def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; - -//===----------------------------------------------------------------------===// -// System: Dynamic-Address-Translation Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; -def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; -def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; -def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; -def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; -def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; -def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; -def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; -def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; - -//===----------------------------------------------------------------------===// -// System: Memory-move Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; -def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; -def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; -def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; - -//===----------------------------------------------------------------------===// -// System: Address-Space Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "LASP$")>; -def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; -def : InstRW<[WLat30, MCD], (instregex "PC$")>; -def : InstRW<[WLat30, MCD], (instregex "PR$")>; -def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; -def : InstRW<[WLat30, MCD], (instregex "RP$")>; -def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; -def : InstRW<[WLat30, MCD], (instregex "TAR$")>; - -//===----------------------------------------------------------------------===// -// System: Linkage-Stack Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; -def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; - -//===----------------------------------------------------------------------===// -// System: Time-Related Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; -def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; -def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; -def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; -def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; -def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; -def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; - -//===----------------------------------------------------------------------===// -// System: CPU-Related Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "STAP$")>; -def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; -def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; -def : InstRW<[WLat30, MCD], (instregex "PTF$")>; -def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; - -//===----------------------------------------------------------------------===// -// System: Miscellaneous Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "SVC$")>; -def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; -def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; -def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; -def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; -def : InstRW<[WLat30, MCD], (instregex "SIE$")>; - -//===----------------------------------------------------------------------===// -// System: CPU-Measurement Facility Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; -def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; -def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; -def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; -def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; -def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; - -//===----------------------------------------------------------------------===// -// System: I/O Instructions -//===----------------------------------------------------------------------===// - -def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; -def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; -def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; -def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; -def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; -def : InstRW<[WLat30, MCD], (instregex "TPI$")>; -def : InstRW<[WLat30, MCD], (instregex "SAL$")>; - -} - diff --git a/lib/Target/SystemZ/SystemZScheduleZ15.td b/lib/Target/SystemZ/SystemZScheduleZ15.td new file mode 100644 index 000000000000..56ceb88f35d4 --- /dev/null +++ b/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -0,0 +1,1695 @@ +//-- SystemZScheduleZ15.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z15 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Z15Model : SchedMachineModel { + + let UnsupportedFeatures = Arch13UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z15Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes; + def : WriteRes { let BeginGroup = 1; } + def : WriteRes { let EndGroup = 1; } +} +def : WriteRes { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Z15_FXaUnit : ProcResource<2>; +def Z15_FXbUnit : ProcResource<2>; +def Z15_LSUnit : ProcResource<2>; +def Z15_VecUnit : ProcResource<2>; +def Z15_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z15_VBUnit : ProcResource<2>; +def Z15_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + foreach Num = 2-5 in { let ResourceCycles = [Num] in { + def : WriteRes("FXa"#Num), [Z15_FXaUnit]>; + def : WriteRes("FXb"#Num), [Z15_FXbUnit]>; + def : WriteRes("LSU"#Num), [Z15_LSUnit]>; + def : WriteRes("VecBF"#Num), [Z15_VecUnit]>; + def : WriteRes("VecDF"#Num), [Z15_VecUnit]>; + def : WriteRes("VecDFX"#Num), [Z15_VecUnit]>; + def : WriteRes("VecMul"#Num), [Z15_VecUnit]>; + def : WriteRes("VecStr"#Num), [Z15_VecUnit]>; + def : WriteRes("VecXsPm"#Num), [Z15_VecUnit]>; + }} + + def : WriteRes { let ResourceCycles = [30]; } + + def : WriteRes; // Virtual Branching Unit +} + +def : WriteRes { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; +def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; +def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSC$")>; +def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSGC$")>; +def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>; +def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, + GroupAlone3], (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC|KDSA)$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; +def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], + (instregex "LTXBR(Compare)?$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat15, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], + (instregex "D(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDLGTR$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLEBR(H|F|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; +def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; +def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>; +def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], + (instregex "VCVB(G)?(Opt)?$")>; +def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone], + (instregex "VCVD(G)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPI$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +} + diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index a50e6aa59711..47c925dcf730 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -209,10 +209,10 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( // Now select between End and null, depending on whether the character // was found. - SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), - DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), - DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), - CCReg}; + SDValue Ops[] = { + End, DAG.getConstant(0, DL, PtrVT), + DAG.getTargetConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), + DAG.getTargetConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), CCReg}; End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops); return std::make_pair(End, Chain); } diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index e79dfc5b4b9e..2aca22c9082a 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -75,7 +75,7 @@ static void tieOpsIfNeeded(MachineInstr &MI) { // instead of IIxF. bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH) { - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // The new opcode will clear the other half of the GR64 reg, so // cancel if that is live. unsigned thisSubRegIdx = @@ -86,7 +86,7 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL, : SystemZ::subreg_l32); unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass); - unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); + Register OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); if (LiveRegs.contains(OtherReg)) return false; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 5c49e6eff0bf..20865037fe38 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -154,7 +154,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, getEffectiveRelocModel(RM), getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL), - TLOF(llvm::make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } @@ -176,7 +176,7 @@ public: ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { return new ScheduleDAGMI(C, - llvm::make_unique(C), + std::make_unique(C), /*RemoveKillFlags=*/true); } @@ -184,6 +184,7 @@ public: bool addInstSelector() override; bool addILPOpts() override; void addPostRewrite() override; + void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -217,14 +218,14 @@ void SystemZPassConfig::addPostRewrite() { addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); } -void SystemZPassConfig::addPreSched2() { +void SystemZPassConfig::addPostRegAlloc() { // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() // is not called). if (getOptLevel() == CodeGenOpt::None) addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); +} - addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); - +void SystemZPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); } diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 145cf87ef9f5..11c99aa11174 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -304,7 +304,8 @@ bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, C2.ScaleCost, C2.SetupCost); } -unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { +unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (!Vector) // Discount the stack pointer. Also leave out %r0, since it can't // be used in an address. @@ -707,7 +708,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // TODO: Fix base implementation which could simplify things a bit here // (seems to miss on differentiating on scalar/vector types). - // Only 64 bit vector conversions are natively supported before arch13. + // Only 64 bit vector conversions are natively supported before z15. if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) { if (SrcScalarBits == DstScalarBits) return NumDstVectors; diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 16ce2ef1d7a0..3ba80b31439f 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -56,12 +56,12 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; - unsigned getCacheLineSize() { return 256; } - unsigned getPrefetchDistance() { return 2000; } - unsigned getMinPrefetchStride() { return 2048; } + unsigned getCacheLineSize() const override { return 256; } + unsigned getPrefetchDistance() const override { return 2000; } + unsigned getMinPrefetchStride() const override { return 2048; } bool hasDivRemOp(Type *DataType, bool IsSigned); bool prefersVectorizedAddressing() { return false; } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 17274e1c2c6e..dcd3934de0fa 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -253,6 +253,7 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal( auto Attrs = GVar->getAttributes(); if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) || (Attrs.hasAttribute("data-section") && Kind.isData()) || + (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) || (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())) { return getExplicitSectionGlobal(GO, Kind, TM); } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 634866d93570..4c98e140f446 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -63,18 +63,6 @@ void TargetMachine::resetTargetOptions(const Function &F) const { RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math"); - RESET_OPTION(NoTrappingFPMath, "no-trapping-math"); - - StringRef Denormal = - F.getFnAttribute("denormal-fp-math").getValueAsString(); - if (Denormal == "ieee") - Options.FPDenormalMode = FPDenormal::IEEE; - else if (Denormal == "preserve-sign") - Options.FPDenormalMode = FPDenormal::PreserveSign; - else if (Denormal == "positive-zero") - Options.FPDenormalMode = FPDenormal::PositiveZero; - else - Options.FPDenormalMode = DefaultOptions.FPDenormalMode; } /// Returns the code generation relocation model. The choices are static, PIC, @@ -140,8 +128,8 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M, // don't assume the variables to be DSO local unless we actually know // that for sure. This only has to be done for variables; for functions // the linker can insert thunks for calling functions from another DLL. - if (TT.isWindowsGNUEnvironment() && GV && GV->isDeclarationForLinker() && - isa(GV)) + if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() && GV && + GV->isDeclarationForLinker() && isa(GV)) return false; // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols @@ -154,7 +142,9 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M, // Make an exception for windows OS in the triple: Some firmware builds use // *-win32-macho triples. This (accidentally?) produced windows relocations // without GOT tables in older clang versions; Keep this behaviour. - if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO())) + // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables + // either. + if (TT.isOSBinFormatCOFF() || TT.isOSWindows()) return true; // Most PIC code sequences that assume that a symbol is local cannot diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 5d9029682fdd..3ac9c38dfc0b 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -219,7 +219,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { std::error_code EC; - raw_fd_ostream dest(Filename, EC, sys::fs::F_None); + raw_fd_ostream dest(Filename, EC, sys::fs::OF_None); if (EC) { *ErrorMessage = strdup(EC.message().c_str()); return true; diff --git a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 09628e872dd5..53a96fd6a97d 100644 --- a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -313,16 +313,17 @@ public: return Optional(); } - WebAssembly::ExprType parseBlockType(StringRef ID) { - return StringSwitch(ID) - .Case("i32", WebAssembly::ExprType::I32) - .Case("i64", WebAssembly::ExprType::I64) - .Case("f32", WebAssembly::ExprType::F32) - .Case("f64", WebAssembly::ExprType::F64) - .Case("v128", WebAssembly::ExprType::V128) - .Case("exnref", WebAssembly::ExprType::Exnref) - .Case("void", WebAssembly::ExprType::Void) - .Default(WebAssembly::ExprType::Invalid); + WebAssembly::BlockType parseBlockType(StringRef ID) { + // Multivalue block types are handled separately in parseSignature + return StringSwitch(ID) + .Case("i32", WebAssembly::BlockType::I32) + .Case("i64", WebAssembly::BlockType::I64) + .Case("f32", WebAssembly::BlockType::F32) + .Case("f64", WebAssembly::BlockType::F64) + .Case("v128", WebAssembly::BlockType::V128) + .Case("exnref", WebAssembly::BlockType::Exnref) + .Case("void", WebAssembly::BlockType::Void) + .Default(WebAssembly::BlockType::Invalid); } bool parseRegTypeList(SmallVectorImpl &Types) { @@ -343,7 +344,7 @@ public: int64_t Val = Int.getIntVal(); if (IsNegative) Val = -Val; - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(), WebAssemblyOperand::IntOp{Val})); Parser.Lex(); @@ -356,7 +357,7 @@ public: return error("Cannot parse real: ", Flt); if (IsNegative) Val = -Val; - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(), WebAssemblyOperand::FltOp{Val})); Parser.Lex(); @@ -378,7 +379,7 @@ public: } if (IsNegative) Val = -Val; - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(), WebAssemblyOperand::FltOp{Val})); Parser.Lex(); @@ -407,7 +408,7 @@ public: // an opcode until after the assembly matcher, so set a default to fix // up later. auto Tok = Lexer.getTok(); - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(), WebAssemblyOperand::IntOp{-1})); } @@ -416,8 +417,8 @@ public: } void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc, - WebAssembly::ExprType BT) { - Operands.push_back(make_unique( + WebAssembly::BlockType BT) { + Operands.push_back(std::make_unique( WebAssemblyOperand::Integer, NameLoc, NameLoc, WebAssemblyOperand::IntOp{static_cast(BT)})); } @@ -449,13 +450,14 @@ public: } // Now construct the name as first operand. - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()), WebAssemblyOperand::TokOp{Name})); // If this instruction is part of a control flow structure, ensure // proper nesting. bool ExpectBlockType = false; + bool ExpectFuncType = false; if (Name == "block") { push(Block); ExpectBlockType = true; @@ -489,9 +491,37 @@ public: if (pop(Name, Block)) return true; } else if (Name == "end_function") { + ensureLocals(getStreamer()); CurrentState = EndFunction; if (pop(Name, Function) || ensureEmptyNestingStack()) return true; + } else if (Name == "call_indirect" || Name == "return_call_indirect") { + ExpectFuncType = true; + } + + if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) { + // This has a special TYPEINDEX operand which in text we + // represent as a signature, such that we can re-build this signature, + // attach it to an anonymous symbol, which is what WasmObjectWriter + // expects to be able to recreate the actual unique-ified type indices. + auto Loc = Parser.getTok(); + auto Signature = std::make_unique(); + if (parseSignature(Signature.get())) + return true; + // Got signature as block type, don't need more + ExpectBlockType = false; + auto &Ctx = getStreamer().getContext(); + // The "true" here will cause this to be a nameless symbol. + MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true); + auto *WasmSym = cast(Sym); + WasmSym->setSignature(Signature.get()); + addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = MCSymbolRefExpr::create( + WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx); + Operands.push_back(std::make_unique( + WebAssemblyOperand::Symbol, Loc.getLoc(), Loc.getEndLoc(), + WebAssemblyOperand::SymOp{Expr})); } while (Lexer.isNot(AsmToken::EndOfStatement)) { @@ -504,7 +534,7 @@ public: if (ExpectBlockType) { // Assume this identifier is a block_type. auto BT = parseBlockType(Id.getString()); - if (BT == WebAssembly::ExprType::Invalid) + if (BT == WebAssembly::BlockType::Invalid) return error("Unknown block type: ", Id); addBlockTypeOperand(Operands, NameLoc, BT); Parser.Lex(); @@ -514,7 +544,7 @@ public: SMLoc End; if (Parser.parseExpression(Val, End)) return error("Cannot parse symbol: ", Lexer.getTok()); - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(), WebAssemblyOperand::SymOp{Val})); if (checkForP2AlignIfLoadStore(Operands, Name)) @@ -549,7 +579,7 @@ public: } case AsmToken::LCurly: { Parser.Lex(); - auto Op = make_unique( + auto Op = std::make_unique( WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc()); if (!Lexer.is(AsmToken::RCurly)) for (;;) { @@ -572,7 +602,7 @@ public: } if (ExpectBlockType && Operands.size() == 1) { // Support blocks with no operands as default to void. - addBlockTypeOperand(Operands, NameLoc, WebAssembly::ExprType::Void); + addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void); } Parser.Lex(); return false; @@ -671,7 +701,7 @@ public: LastFunctionLabel = LastLabel; push(Function); } - auto Signature = make_unique(); + auto Signature = std::make_unique(); if (parseSignature(Signature.get())) return true; WasmSym->setSignature(Signature.get()); @@ -687,7 +717,7 @@ public: if (SymName.empty()) return true; auto WasmSym = cast(Ctx.getOrCreateSymbol(SymName)); - auto Signature = make_unique(); + auto Signature = std::make_unique(); if (parseRegTypeList(Signature->Params)) return true; WasmSym->setSignature(Signature.get()); @@ -737,24 +767,30 @@ public: return true; // We didn't process this directive. } + // Called either when the first instruction is parsed of the function ends. + void ensureLocals(MCStreamer &Out) { + if (CurrentState == FunctionStart) { + // We haven't seen a .local directive yet. The streamer requires locals to + // be encoded as a prelude to the instructions, so emit an empty list of + // locals here. + auto &TOut = reinterpret_cast( + *Out.getTargetStreamer()); + TOut.emitLocal(SmallVector()); + CurrentState = FunctionLocals; + } + } + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override { MCInst Inst; + Inst.setLoc(IDLoc); unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { case Match_Success: { - if (CurrentState == FunctionStart) { - // This is the first instruction in a function, but we haven't seen - // a .local directive yet. The streamer requires locals to be encoded - // as a prelude to the instructions, so emit an empty list of locals - // here. - auto &TOut = reinterpret_cast( - *Out.getTargetStreamer()); - TOut.emitLocal(SmallVector()); - } + ensureLocals(Out); // Fix unknown p2align operands. auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode()); if (Align != -1U) { diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index f9bf3f85d30f..9a9c31cff2d5 100644 --- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" @@ -213,10 +214,29 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( return MCDisassembler::Fail; break; } - // block_type operands (uint8_t). + // block_type operands: case WebAssembly::OPERAND_SIGNATURE: { - if (!parseImmediate(MI, Size, Bytes)) + int64_t Val; + uint64_t PrevSize = Size; + if (!nextLEB(Val, Bytes, Size, true)) return MCDisassembler::Fail; + if (Val < 0) { + // Negative values are single septet value types or empty types + if (Size != PrevSize + 1) { + MI.addOperand( + MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); + } else { + MI.addOperand(MCOperand::createImm(Val & 0x7f)); + } + } else { + // We don't have access to the signature, so create a symbol without one + MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); + auto *WasmSym = cast(Sym); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = MCSymbolRefExpr::create( + WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); + MI.addOperand(MCOperand::createExpr(Expr)); + } break; } // FP operands. diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 70b409cf4a90..8314de41021f 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -31,10 +31,12 @@ namespace { class WebAssemblyAsmBackend final : public MCAsmBackend { bool Is64Bit; + bool IsEmscripten; public: - explicit WebAssemblyAsmBackend(bool Is64Bit) - : MCAsmBackend(support::little), Is64Bit(Is64Bit) {} + explicit WebAssemblyAsmBackend(bool Is64Bit, bool IsEmscripten) + : MCAsmBackend(support::little), Is64Bit(Is64Bit), + IsEmscripten(IsEmscripten) {} unsigned getNumFixupKinds() const override { return WebAssembly::NumTargetFixupKinds; @@ -123,11 +125,11 @@ void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm, std::unique_ptr WebAssemblyAsmBackend::createObjectTargetWriter() const { - return createWebAssemblyWasmObjectWriter(Is64Bit); + return createWebAssemblyWasmObjectWriter(Is64Bit, IsEmscripten); } } // end anonymous namespace MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) { - return new WebAssemblyAsmBackend(TT.isArch64Bit()); + return new WebAssemblyAsmBackend(TT.isArch64Bit(), TT.isOSEmscripten()); } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index b5d4d369b726..221ac17b8336 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyUtilities.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -51,7 +52,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, // Print any additional variadic operands. const MCInstrDesc &Desc = MII.get(MI->getOpcode()); - if (Desc.isVariadic()) + if (Desc.isVariadic()) { + if (Desc.getNumOperands() == 0 && MI->getNumOperands() > 0) + OS << "\t"; for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) { // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because // we have an extra flags operand which is not currently printed, for @@ -62,6 +65,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, OS << ", "; printOperand(MI, I, OS); } + } // Print any added annotation. printAnnotation(OS, Annot); @@ -232,7 +236,16 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - Op.getExpr()->print(O, &MAI); + // call_indirect instructions have a TYPEINDEX operand that we print + // as a signature here, such that the assembler can recover this + // information. + auto SRE = static_cast(Op.getExpr()); + if (SRE->getKind() == MCSymbolRefExpr::VK_WASM_TYPEINDEX) { + auto &Sym = static_cast(SRE->getSymbol()); + O << WebAssembly::signatureToString(Sym.getSignature()); + } else { + Op.getExpr()->print(O, &MAI); + } } } @@ -259,14 +272,26 @@ void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI, void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - auto Imm = static_cast(MI->getOperand(OpNo).getImm()); - if (Imm != wasm::WASM_TYPE_NORESULT) - O << WebAssembly::anyTypeToString(Imm); + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + auto Imm = static_cast(Op.getImm()); + if (Imm != wasm::WASM_TYPE_NORESULT) + O << WebAssembly::anyTypeToString(Imm); + } else { + auto Expr = cast(Op.getExpr()); + auto *Sym = cast(&Expr->getSymbol()); + if (Sym->getSignature()) { + O << WebAssembly::signatureToString(Sym->getSignature()); + } else { + // Disassembler does not currently produce a signature + O << "unknown_type"; + } + } } // We have various enums representing a subset of these types, use this // function to convert any of them to text. -const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) { +const char *WebAssembly::anyTypeToString(unsigned Ty) { switch (Ty) { case wasm::WASM_TYPE_I32: return "i32"; @@ -291,6 +316,24 @@ const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) { } } -const char *llvm::WebAssembly::typeToString(wasm::ValType Ty) { +const char *WebAssembly::typeToString(wasm::ValType Ty) { return anyTypeToString(static_cast(Ty)); } + +std::string WebAssembly::typeListToString(ArrayRef List) { + std::string S; + for (auto &Ty : List) { + if (&Ty != &List[0]) S += ", "; + S += WebAssembly::typeToString(Ty); + } + return S; +} + +std::string WebAssembly::signatureToString(const wasm::WasmSignature *Sig) { + std::string S("("); + S += typeListToString(Sig->Params); + S += ") -> ("; + S += typeListToString(Sig->Returns); + S += ")"; + return S; +} diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h index b979de5028bf..cf37778099a0 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h @@ -58,6 +58,9 @@ namespace WebAssembly { const char *typeToString(wasm::ValType Ty); const char *anyTypeToString(unsigned Ty); +std::string typeListToString(ArrayRef List); +std::string signatureToString(const wasm::WasmSignature *Sig); + } // end namespace WebAssembly } // end namespace llvm diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index 44b6d6a968a9..1a4c57e66d2f 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -152,6 +152,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( break; case WebAssembly::OPERAND_FUNCTION32: case WebAssembly::OPERAND_OFFSET32: + case WebAssembly::OPERAND_SIGNATURE: case WebAssembly::OPERAND_TYPEINDEX: case WebAssembly::OPERAND_GLOBAL: case WebAssembly::OPERAND_EVENT: diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 7a9f59b1a4f2..b339860a381d 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -38,7 +38,7 @@ MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); std::unique_ptr -createWebAssemblyWasmObjectWriter(bool Is64Bit); +createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten); namespace WebAssembly { enum OperandType { @@ -122,16 +122,22 @@ enum TOF { namespace llvm { namespace WebAssembly { -/// This is used to indicate block signatures. -enum class ExprType : unsigned { +/// Used as immediate MachineOperands for block signatures +enum class BlockType : unsigned { + Invalid = 0x00, Void = 0x40, - I32 = 0x7F, - I64 = 0x7E, - F32 = 0x7D, - F64 = 0x7C, - V128 = 0x7B, - Exnref = 0x68, - Invalid = 0x00 + I32 = unsigned(wasm::ValType::I32), + I64 = unsigned(wasm::ValType::I64), + F32 = unsigned(wasm::ValType::F32), + F64 = unsigned(wasm::ValType::F64), + V128 = unsigned(wasm::ValType::V128), + Exnref = unsigned(wasm::ValType::EXNREF), + // Multivalue blocks (and other non-void blocks) are only emitted when the + // blocks will never be exited and are at the ends of functions (see + // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made + // to pop values off the stack, so the exact multivalue signature can always + // be inferred from the return type of the parent function in MCInstLower. + Multivalue = 0xffff, }; /// Instruction opcodes emitted via means other than CodeGen. @@ -191,6 +197,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S: case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S: + case WebAssembly::LOAD_SPLAT_v8x16: + case WebAssembly::LOAD_SPLAT_v8x16_S: return 0; case WebAssembly::LOAD16_S_I32: case WebAssembly::LOAD16_S_I32_S: @@ -240,6 +248,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S: case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S: + case WebAssembly::LOAD_SPLAT_v16x8: + case WebAssembly::LOAD_SPLAT_v16x8_S: return 1; case WebAssembly::LOAD_I32: case WebAssembly::LOAD_I32_S: @@ -295,6 +305,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { case WebAssembly::ATOMIC_NOTIFY_S: case WebAssembly::ATOMIC_WAIT_I32: case WebAssembly::ATOMIC_WAIT_I32_S: + case WebAssembly::LOAD_SPLAT_v32x4: + case WebAssembly::LOAD_SPLAT_v32x4_S: return 2; case WebAssembly::LOAD_I64: case WebAssembly::LOAD_I64_S: @@ -324,31 +336,25 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S: case WebAssembly::ATOMIC_WAIT_I64: case WebAssembly::ATOMIC_WAIT_I64_S: + case WebAssembly::LOAD_SPLAT_v64x2: + case WebAssembly::LOAD_SPLAT_v64x2_S: + case WebAssembly::LOAD_EXTEND_S_v8i16: + case WebAssembly::LOAD_EXTEND_S_v8i16_S: + case WebAssembly::LOAD_EXTEND_U_v8i16: + case WebAssembly::LOAD_EXTEND_U_v8i16_S: + case WebAssembly::LOAD_EXTEND_S_v4i32: + case WebAssembly::LOAD_EXTEND_S_v4i32_S: + case WebAssembly::LOAD_EXTEND_U_v4i32: + case WebAssembly::LOAD_EXTEND_U_v4i32_S: + case WebAssembly::LOAD_EXTEND_S_v2i64: + case WebAssembly::LOAD_EXTEND_S_v2i64_S: + case WebAssembly::LOAD_EXTEND_U_v2i64: + case WebAssembly::LOAD_EXTEND_U_v2i64_S: return 3; - case WebAssembly::LOAD_v16i8: - case WebAssembly::LOAD_v16i8_S: - case WebAssembly::LOAD_v8i16: - case WebAssembly::LOAD_v8i16_S: - case WebAssembly::LOAD_v4i32: - case WebAssembly::LOAD_v4i32_S: - case WebAssembly::LOAD_v2i64: - case WebAssembly::LOAD_v2i64_S: - case WebAssembly::LOAD_v4f32: - case WebAssembly::LOAD_v4f32_S: - case WebAssembly::LOAD_v2f64: - case WebAssembly::LOAD_v2f64_S: - case WebAssembly::STORE_v16i8: - case WebAssembly::STORE_v16i8_S: - case WebAssembly::STORE_v8i16: - case WebAssembly::STORE_v8i16_S: - case WebAssembly::STORE_v4i32: - case WebAssembly::STORE_v4i32_S: - case WebAssembly::STORE_v2i64: - case WebAssembly::STORE_v2i64_S: - case WebAssembly::STORE_v4f32: - case WebAssembly::STORE_v4f32_S: - case WebAssembly::STORE_v2f64: - case WebAssembly::STORE_v2f64_S: + case WebAssembly::LOAD_V128: + case WebAssembly::LOAD_V128_S: + case WebAssembly::STORE_V128: + case WebAssembly::STORE_V128_S: return 4; default: return -1; diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index e05efef7201b..40926201931a 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -60,39 +60,10 @@ void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef Types) { void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } -void WebAssemblyTargetAsmStreamer::emitSignature( - const wasm::WasmSignature *Sig) { - OS << "("; - emitParamList(Sig); - OS << ") -> ("; - emitReturnList(Sig); - OS << ")"; -} - -void WebAssemblyTargetAsmStreamer::emitParamList( - const wasm::WasmSignature *Sig) { - auto &Params = Sig->Params; - for (auto &Ty : Params) { - if (&Ty != &Params[0]) - OS << ", "; - OS << WebAssembly::typeToString(Ty); - } -} - -void WebAssemblyTargetAsmStreamer::emitReturnList( - const wasm::WasmSignature *Sig) { - auto &Returns = Sig->Returns; - for (auto &Ty : Returns) { - if (&Ty != &Returns[0]) - OS << ", "; - OS << WebAssembly::typeToString(Ty); - } -} - void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) { assert(Sym->isFunction()); OS << "\t.functype\t" << Sym->getName() << " "; - emitSignature(Sym->getSignature()); + OS << WebAssembly::signatureToString(Sym->getSignature()); OS << "\n"; } @@ -107,7 +78,7 @@ void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) { void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) { assert(Sym->isEvent()); OS << "\t.eventtype\t" << Sym->getName() << " "; - emitParamList(Sym->getSignature()); + OS << WebAssembly::typeListToString(Sym->getSignature()->Params); OS << "\n"; } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 5ea62b179d22..0164f8e572ef 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -56,9 +56,6 @@ protected: /// This part is for ascii assembly output class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer { formatted_raw_ostream &OS; - void emitSignature(const wasm::WasmSignature *Sig); - void emitParamList(const wasm::WasmSignature *Sig); - void emitReturnList(const wasm::WasmSignature *Sig); public: WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index a1cc3e268e8f..e7a599e3e175 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -31,7 +31,7 @@ using namespace llvm; namespace { class WebAssemblyWasmObjectWriter final : public MCWasmObjectTargetWriter { public: - explicit WebAssemblyWasmObjectWriter(bool Is64Bit); + explicit WebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten); private: unsigned getRelocType(const MCValue &Target, @@ -39,8 +39,9 @@ private: }; } // end anonymous namespace -WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit) - : MCWasmObjectTargetWriter(Is64Bit) {} +WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit, + bool IsEmscripten) + : MCWasmObjectTargetWriter(Is64Bit, IsEmscripten) {} static const MCSection *getFixupSection(const MCExpr *Expr) { if (auto SyExp = dyn_cast(Expr)) { @@ -116,6 +117,6 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, } std::unique_ptr -llvm::createWebAssemblyWasmObjectWriter(bool Is64Bit) { - return llvm::make_unique(Is64Bit); +llvm::createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten) { + return std::make_unique(Is64Bit, IsEmscripten); } diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 7f9d41da3978..5d8b873ce23b 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -67,8 +67,8 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { } std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { - unsigned RegNo = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(RegNo) && + Register RegNo = MO.getReg(); + assert(Register::isVirtualRegister(RegNo) && "Unlowered physical register encountered during assembly printing"); assert(!MFI->isVRegStackified(RegNo)); unsigned WAReg = MFI->getWAReg(RegNo); @@ -332,43 +332,15 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { // These represent values which are live into the function entry, so there's // no instruction to emit. break; - case WebAssembly::FALLTHROUGH_RETURN_I32: - case WebAssembly::FALLTHROUGH_RETURN_I32_S: - case WebAssembly::FALLTHROUGH_RETURN_I64: - case WebAssembly::FALLTHROUGH_RETURN_I64_S: - case WebAssembly::FALLTHROUGH_RETURN_F32: - case WebAssembly::FALLTHROUGH_RETURN_F32_S: - case WebAssembly::FALLTHROUGH_RETURN_F64: - case WebAssembly::FALLTHROUGH_RETURN_F64_S: - case WebAssembly::FALLTHROUGH_RETURN_v16i8: - case WebAssembly::FALLTHROUGH_RETURN_v16i8_S: - case WebAssembly::FALLTHROUGH_RETURN_v8i16: - case WebAssembly::FALLTHROUGH_RETURN_v8i16_S: - case WebAssembly::FALLTHROUGH_RETURN_v4i32: - case WebAssembly::FALLTHROUGH_RETURN_v4i32_S: - case WebAssembly::FALLTHROUGH_RETURN_v2i64: - case WebAssembly::FALLTHROUGH_RETURN_v2i64_S: - case WebAssembly::FALLTHROUGH_RETURN_v4f32: - case WebAssembly::FALLTHROUGH_RETURN_v4f32_S: - case WebAssembly::FALLTHROUGH_RETURN_v2f64: - case WebAssembly::FALLTHROUGH_RETURN_v2f64_S: { + case WebAssembly::FALLTHROUGH_RETURN: { // These instructions represent the implicit return at the end of a - // function body. Always pops one value off the stack. + // function body. if (isVerbose()) { - OutStreamer->AddComment("fallthrough-return-value"); + OutStreamer->AddComment("fallthrough-return"); OutStreamer->AddBlankLine(); } break; } - case WebAssembly::FALLTHROUGH_RETURN_VOID: - case WebAssembly::FALLTHROUGH_RETURN_VOID_S: - // This instruction represents the implicit return at the end of a - // function body with no return value. - if (isVerbose()) { - OutStreamer->AddComment("fallthrough-return-void"); - OutStreamer->AddBlankLine(); - } - break; case WebAssembly::COMPILER_FENCE: // This is a compiler barrier that prevents instruction reordering during // backend compilation, and should not be emitted. diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 4c5d0192fc28..c069af9eed62 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -97,14 +97,14 @@ public: // If the smallest region containing MBB is a loop if (LoopMap.count(ML)) return LoopMap[ML].get(); - LoopMap[ML] = llvm::make_unique>(ML); + LoopMap[ML] = std::make_unique>(ML); return LoopMap[ML].get(); } else { // If the smallest region containing MBB is an exception if (ExceptionMap.count(WE)) return ExceptionMap[WE].get(); ExceptionMap[WE] = - llvm::make_unique>(WE); + std::make_unique>(WE); return ExceptionMap[WE].get(); } } @@ -317,6 +317,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, // If Next was originally ordered before MBB, and it isn't because it was // loop-rotated above the header, it's not preferred. if (Next->getNumber() < MBB->getNumber() && + (WasmDisableEHPadSort || !Next->isEHPad()) && (!R || !R->contains(Next) || R->getHeader()->getNumber() < Next->getNumber())) { Ready.push(Next); diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index e6bfc5226e2e..7e867edaaa27 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCAsmInfo.h" using namespace llvm; @@ -315,12 +316,12 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { // br_on_exn 0, $__cpp_exception // rethrow // end_block - WebAssembly::ExprType ReturnType = WebAssembly::ExprType::Void; + WebAssembly::BlockType ReturnType = WebAssembly::BlockType::Void; if (IsBrOnExn) { const char *TagName = BrOnExn->getOperand(1).getSymbolName(); if (std::strcmp(TagName, "__cpp_exception") != 0) llvm_unreachable("Only C++ exception is supported"); - ReturnType = WebAssembly::ExprType::I32; + ReturnType = WebAssembly::BlockType::I32; } auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); @@ -406,7 +407,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) { auto InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet); MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos), TII.get(WebAssembly::LOOP)) - .addImm(int64_t(WebAssembly::ExprType::Void)); + .addImm(int64_t(WebAssembly::BlockType::Void)); // Decide where in Header to put the END_LOOP. BeforeSet.clear(); @@ -526,46 +527,56 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { AfterSet.insert(&MI); } - // Local expression tree should go after the TRY. - for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E; - --I) { - if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) - continue; - if (WebAssembly::isChild(*std::prev(I), MFI)) - AfterSet.insert(&*std::prev(I)); - else - break; - } - // If Header unwinds to MBB (= Header contains 'invoke'), the try block should // contain the call within it. So the call should go after the TRY. The // exception is when the header's terminator is a rethrow instruction, in // which case that instruction, not a call instruction before it, is gonna // throw. + MachineInstr *ThrowingCall = nullptr; if (MBB.isPredecessor(Header)) { auto TermPos = Header->getFirstTerminator(); if (TermPos == Header->end() || TermPos->getOpcode() != WebAssembly::RETHROW) { - for (const auto &MI : reverse(*Header)) { + for (auto &MI : reverse(*Header)) { if (MI.isCall()) { AfterSet.insert(&MI); + ThrowingCall = &MI; // Possibly throwing calls are usually wrapped by EH_LABEL // instructions. We don't want to split them and the call. if (MI.getIterator() != Header->begin() && - std::prev(MI.getIterator())->isEHLabel()) + std::prev(MI.getIterator())->isEHLabel()) { AfterSet.insert(&*std::prev(MI.getIterator())); + ThrowingCall = &*std::prev(MI.getIterator()); + } break; } } } } + // Local expression tree should go after the TRY. + // For BLOCK placement, we start the search from the previous instruction of a + // BB's terminator, but in TRY's case, we should start from the previous + // instruction of a call that can throw, or a EH_LABEL that precedes the call, + // because the return values of the call's previous instructions can be + // stackified and consumed by the throwing call. + auto SearchStartPt = ThrowingCall ? MachineBasicBlock::iterator(ThrowingCall) + : Header->getFirstTerminator(); + for (auto I = SearchStartPt, E = Header->begin(); I != E; --I) { + if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) + continue; + if (WebAssembly::isChild(*std::prev(I), MFI)) + AfterSet.insert(&*std::prev(I)); + else + break; + } + // Add the TRY. auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); MachineInstr *Begin = BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos), TII.get(WebAssembly::TRY)) - .addImm(int64_t(WebAssembly::ExprType::Void)); + .addImm(int64_t(WebAssembly::BlockType::Void)); // Decide where in Header to put the END_TRY. BeforeSet.clear(); @@ -694,8 +705,26 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { } } +// When MBB is split into MBB and Split, we should unstackify defs in MBB that +// have their uses in Split. +static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, + MachineBasicBlock &Split, + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { + for (auto &MI : Split) { + for (auto &MO : MI.explicit_uses()) { + if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg())) + continue; + if (MachineInstr *Def = MRI.getUniqueVRegDef(MO.getReg())) + if (Def->getParent() == &MBB) + MFI.unstackifyVReg(MO.getReg()); + } + } +} + bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { const auto &TII = *MF.getSubtarget().getInstrInfo(); + auto &MFI = *MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Linearizing the control flow by placing TRY / END_TRY markers can create @@ -830,7 +859,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { SmallVector EHPadStack; // Range of intructions to be wrapped in a new nested try/catch using TryRange = std::pair; - // In original CFG, + // In original CFG, DenseMap> UnwindDestToTryRanges; // In new CFG, DenseMap> BrDestToTryRanges; @@ -936,7 +965,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // of the function with a local.get and a rethrow instruction. if (NeedAppendixBlock) { auto *AppendixBB = getAppendixBlock(MF); - unsigned ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); + Register ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW)) .addReg(ExnReg); // These instruction ranges should branch to this appendix BB. @@ -967,7 +996,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // ... // cont: for (auto &P : UnwindDestToTryRanges) { - NumUnwindMismatches++; + NumUnwindMismatches += P.second.size(); // This means the destination is the appendix BB, which was separately // handled above. @@ -1007,6 +1036,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { BrDest->insert(BrDest->end(), EndTry->removeFromParent()); // Take out the handler body from EH pad to the new branch destination BB. BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end()); + unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI); // Fix predecessor-successor relationship. BrDest->transferSuccessors(EHPad); EHPad->addSuccessor(BrDest); @@ -1100,7 +1130,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { MachineInstr *NestedTry = BuildMI(*MBB, *RangeBegin, RangeBegin->getDebugLoc(), TII.get(WebAssembly::TRY)) - .addImm(int64_t(WebAssembly::ExprType::Void)); + .addImm(int64_t(WebAssembly::BlockType::Void)); // Create the nested EH pad and fill instructions in. MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock(); @@ -1122,6 +1152,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // new nested continuation BB. NestedCont->splice(NestedCont->end(), MBB, std::next(RangeEnd->getIterator()), MBB->end()); + unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI); registerTryScope(NestedTry, NestedEndTry, NestedEHPad); // Fix predecessor-successor relationship. @@ -1197,54 +1228,32 @@ getDepth(const SmallVectorImpl &Stack, /// checks for such cases and fixes up the signatures. void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { const auto &MFI = *MF.getInfo(); - assert(MFI.getResults().size() <= 1); if (MFI.getResults().empty()) return; - WebAssembly::ExprType RetType; - switch (MFI.getResults().front().SimpleTy) { - case MVT::i32: - RetType = WebAssembly::ExprType::I32; - break; - case MVT::i64: - RetType = WebAssembly::ExprType::I64; - break; - case MVT::f32: - RetType = WebAssembly::ExprType::F32; - break; - case MVT::f64: - RetType = WebAssembly::ExprType::F64; - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - RetType = WebAssembly::ExprType::V128; - break; - case MVT::exnref: - RetType = WebAssembly::ExprType::Exnref; - break; - default: - llvm_unreachable("unexpected return type"); - } + // MCInstLower will add the proper types to multivalue signatures based on the + // function return type + WebAssembly::BlockType RetType = + MFI.getResults().size() > 1 + ? WebAssembly::BlockType::Multivalue + : WebAssembly::BlockType( + WebAssembly::toValType(MFI.getResults().front())); for (MachineBasicBlock &MBB : reverse(MF)) { for (MachineInstr &MI : reverse(MBB)) { if (MI.isPosition() || MI.isDebugInstr()) continue; - if (MI.getOpcode() == WebAssembly::END_BLOCK) { - EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); - continue; - } - if (MI.getOpcode() == WebAssembly::END_LOOP) { + switch (MI.getOpcode()) { + case WebAssembly::END_BLOCK: + case WebAssembly::END_LOOP: + case WebAssembly::END_TRY: EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; + default: + // Something other than an `end`. We're done. + return; } - // Something other than an `end`. We're done. - return; } } } @@ -1280,7 +1289,9 @@ void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) { } } // Fix mismatches in unwind destinations induced by linearizing the code. - fixUnwindMismatches(MF); + if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && + MF.getFunction().hasPersonalityFn()) + fixUnwindMismatches(MF); } void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index dbd62179f055..ef75bb215317 100644 --- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -168,7 +168,7 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) { static MachineInstr *findStartOfTree(MachineOperand &MO, MachineRegisterInfo &MRI, WebAssemblyFunctionInfo &MFI) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); assert(MFI.isVRegStackified(Reg)); MachineInstr *Def = MRI.getVRegDef(Reg); @@ -207,7 +207,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I++; if (!WebAssembly::isArgument(MI.getOpcode())) break; - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); assert(!MFI.isVRegStackified(Reg)); Reg2Local[Reg] = static_cast(MI.getOperand(1).getImm()); MI.eraseFromParent(); @@ -221,7 +221,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // drops to their defs. BitVector UseEmpty(MRI.getNumVirtRegs()); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) - UseEmpty[I] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(I)); + UseEmpty[I] = MRI.use_empty(Register::index2VirtReg(I)); // Visit each instruction in the function. for (MachineBasicBlock &MBB : MF) { @@ -238,13 +238,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { if (WebAssembly::isTee(MI.getOpcode())) { assert(MFI.isVRegStackified(MI.getOperand(0).getReg())); assert(!MFI.isVRegStackified(MI.getOperand(1).getReg())); - unsigned OldReg = MI.getOperand(2).getReg(); + Register OldReg = MI.getOperand(2).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(OldReg); // Stackify the input if it isn't stackified yet. if (!MFI.isVRegStackified(OldReg)) { unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); unsigned Opc = getLocalGetOpcode(RC); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg) .addImm(LocalId); @@ -270,17 +270,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // we handle at most one def. assert(MI.getDesc().getNumDefs() <= 1); if (MI.getDesc().getNumDefs() == 1) { - unsigned OldReg = MI.getOperand(0).getReg(); + Register OldReg = MI.getOperand(0).getReg(); if (!MFI.isVRegStackified(OldReg)) { const TargetRegisterClass *RC = MRI.getRegClass(OldReg); - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); auto InsertPt = std::next(MI.getIterator()); if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) { MI.eraseFromParent(); Changed = true; continue; } - if (UseEmpty[TargetRegisterInfo::virtReg2Index(OldReg)]) { + if (UseEmpty[Register::virtReg2Index(OldReg)]) { unsigned Opc = getDropOpcode(RC); MachineInstr *Drop = BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc)) @@ -310,7 +310,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { if (!MO.isReg()) continue; - unsigned OldReg = MO.getReg(); + Register OldReg = MO.getReg(); // Inline asm may have a def in the middle of the operands. Our contract // with inline asm register operands is to provide local indices as @@ -345,7 +345,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // Insert a local.get. unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); const TargetRegisterClass *RC = MRI.getRegClass(OldReg); - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); unsigned Opc = getLocalGetOpcode(RC); InsertPt = BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg) @@ -369,7 +369,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // TODO: Sort the locals for better compression. MFI.setNumLocals(CurLocal - MFI.getParams().size()); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); auto RL = Reg2Local.find(Reg); if (RL == Reg2Local.end() || RL->second < MFI.getParams().size()) continue; diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 2552e9150833..c932f985489a 100644 --- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -1141,14 +1141,14 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) { return true; } - unsigned Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(), + Register Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(), In, I->getOperand(0)->hasOneUse()); if (!Reg) return false; MachineBasicBlock::iterator Iter = FuncInfo.InsertPt; --Iter; assert(Iter->isBitcast()); - Iter->setPhysRegsDeadExcept(ArrayRef(), TRI); + Iter->setPhysRegsDeadExcept(ArrayRef(), TRI); updateValueMap(I, Reg); return true; } @@ -1302,51 +1302,33 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { if (Ret->getNumOperands() == 0) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(WebAssembly::RETURN_VOID)); + TII.get(WebAssembly::RETURN)); return true; } + // TODO: support multiple return in FastISel + if (Ret->getNumOperands() > 1) + return false; + Value *RV = Ret->getOperand(0); if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy()) return false; - unsigned Opc; switch (getSimpleType(RV->getType())) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: - Opc = WebAssembly::RETURN_I32; - break; case MVT::i64: - Opc = WebAssembly::RETURN_I64; - break; case MVT::f32: - Opc = WebAssembly::RETURN_F32; - break; case MVT::f64: - Opc = WebAssembly::RETURN_F64; - break; case MVT::v16i8: - Opc = WebAssembly::RETURN_v16i8; - break; case MVT::v8i16: - Opc = WebAssembly::RETURN_v8i16; - break; case MVT::v4i32: - Opc = WebAssembly::RETURN_v4i32; - break; case MVT::v2i64: - Opc = WebAssembly::RETURN_v2i64; - break; case MVT::v4f32: - Opc = WebAssembly::RETURN_v4f32; - break; case MVT::v2f64: - Opc = WebAssembly::RETURN_v2f64; - break; case MVT::exnref: - Opc = WebAssembly::RETURN_EXNREF; break; default: return false; @@ -1363,7 +1345,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { if (Reg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::RETURN)) + .addReg(Reg); return true; } diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index b7fc65401fc4..6b1bbd7a2b07 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -70,6 +70,8 @@ static void findUses(Value *V, Function &F, for (Use &U : V->uses()) { if (auto *BC = dyn_cast(U.getUser())) findUses(BC, F, Uses, ConstantBCs); + else if (auto *A = dyn_cast(U.getUser())) + findUses(A, F, Uses, ConstantBCs); else if (U.get()->getType() != F.getType()) { CallSite CS(U.getUser()); if (!CS) diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 7d8e86d9b2c0..157ea9d525c9 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -56,6 +56,7 @@ #include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" @@ -358,7 +359,7 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( // Add the register which will be used to tell the jump table which block to // jump to. MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MIB.addReg(Reg); // Compute the indices in the superheader, one for each bad block, and diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 5299068efdd4..71eeebfada4b 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -183,14 +183,14 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, bool HasBP = hasBP(MF); if (HasBP) { auto FI = MF.getInfo(); - unsigned BasePtr = MRI.createVirtualRegister(PtrRC); + Register BasePtr = MRI.createVirtualRegister(PtrRC); FI->setBasePointerVreg(BasePtr); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr) .addReg(SPReg); } if (StackSize) { // Subtract the frame size - unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); + Register OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), @@ -199,7 +199,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, .addReg(OffsetReg); } if (HasBP) { - unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC); + Register BitmaskReg = MRI.createVirtualRegister(PtrRC); unsigned Alignment = MFI.getMaxAlignment(); assert((1u << countTrailingZeros(Alignment)) == Alignment && "Alignment must be a power of 2"); @@ -244,7 +244,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, } else if (StackSize) { const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); - unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); + Register OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); // In the epilog we don't need to write the result back to the SP32 physreg diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index daddd4ca16ff..fdc0f561dcd9 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -29,9 +29,9 @@ public: static const size_t RedZoneSize = 128; WebAssemblyFrameLowering() - : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/16, + : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/Align(16), /*LocalAreaOffset=*/0, - /*TransientStackAlignment=*/16, + /*TransientStackAlignment=*/Align(16), /*StackRealignable=*/true) {} MachineBasicBlock::iterator diff --git a/lib/Target/WebAssembly/WebAssemblyISD.def b/lib/Target/WebAssembly/WebAssemblyISD.def index 77217f16a727..13f0476eb4a5 100644 --- a/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/lib/Target/WebAssembly/WebAssemblyISD.def @@ -26,9 +26,11 @@ HANDLE_NODETYPE(WrapperPIC) HANDLE_NODETYPE(BR_IF) HANDLE_NODETYPE(BR_TABLE) HANDLE_NODETYPE(SHUFFLE) +HANDLE_NODETYPE(SWIZZLE) HANDLE_NODETYPE(VEC_SHL) HANDLE_NODETYPE(VEC_SHR_S) HANDLE_NODETYPE(VEC_SHR_U) +HANDLE_NODETYPE(LOAD_SPLAT) HANDLE_NODETYPE(THROW) HANDLE_NODETYPE(MEMORY_COPY) HANDLE_NODETYPE(MEMORY_FILL) diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 26339eaef37d..f83a8a984ae0 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -54,6 +54,12 @@ public: ForCodeSize = MF.getFunction().hasOptSize(); Subtarget = &MF.getSubtarget(); + + // Wasm64 is not fully supported right now (and is not specified) + if (Subtarget->hasAddr64()) + report_fatal_error( + "64-bit WebAssembly (wasm64) is not currently supported"); + return SelectionDAGISel::runOnMachineFunction(MF); } @@ -88,88 +94,36 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { uint64_t SyncScopeID = cast(Node->getOperand(2).getNode())->getZExtValue(); + MachineSDNode *Fence = nullptr; switch (SyncScopeID) { - case SyncScope::SingleThread: { + case SyncScope::SingleThread: // We lower a single-thread fence to a pseudo compiler barrier instruction // preventing instruction reordering. This will not be emitted in final // binary. - MachineSDNode *Fence = - CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, - DL, // debug loc - MVT::Other, // outchain type - Node->getOperand(0) // inchain - ); - ReplaceNode(Node, Fence); - CurDAG->RemoveDeadNode(Node); - return; - } - - case SyncScope::System: { - // For non-emscripten systems, we have not decided on what we should - // traslate fences to yet. - if (!Subtarget->getTargetTriple().isOSEmscripten()) - report_fatal_error( - "ATOMIC_FENCE is not yet supported in non-emscripten OSes"); - - // Wasm does not have a fence instruction, but because all atomic - // instructions in wasm are sequentially consistent, we translate a - // fence to an idempotent atomic RMW instruction to a linear memory - // address. All atomic instructions in wasm are sequentially consistent, - // but this is to ensure a fence also prevents reordering of non-atomic - // instructions in the VM. Even though LLVM IR's fence instruction does - // not say anything about its relationship with non-atomic instructions, - // we think this is more user-friendly. - // - // While any address can work, here we use a value stored in - // __stack_pointer wasm global because there's high chance that area is - // in cache. - // - // So the selected instructions will be in the form of: - // %addr = get_global $__stack_pointer - // %0 = i32.const 0 - // i32.atomic.rmw.or %addr, %0 - SDValue StackPtrSym = CurDAG->getTargetExternalSymbol( - "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout())); - MachineSDNode *GetGlobal = - CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode - DL, // debug loc - MVT::i32, // result type - StackPtrSym // __stack_pointer symbol - ); - - SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); - auto *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getUnknownStack(MF), - // FIXME Volatile isn't really correct, but currently all LLVM - // atomic instructions are treated as volatiles in the backend, so - // we should be consistent. - MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | - MachineMemOperand::MOStore, - 4, 4, AAMDNodes(), nullptr, SyncScope::System, - AtomicOrdering::SequentiallyConsistent); - MachineSDNode *Const0 = - CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero); - MachineSDNode *AtomicRMW = CurDAG->getMachineNode( - WebAssembly::ATOMIC_RMW_OR_I32, // opcode - DL, // debug loc - MVT::i32, // result type - MVT::Other, // outchain type - { - Zero, // alignment - Zero, // offset - SDValue(GetGlobal, 0), // __stack_pointer - SDValue(Const0, 0), // OR with 0 to make it idempotent - Node->getOperand(0) // inchain - }); - - CurDAG->setNodeMemRefs(AtomicRMW, {MMO}); - ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1)); - CurDAG->RemoveDeadNode(Node); - return; - } + Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, + DL, // debug loc + MVT::Other, // outchain type + Node->getOperand(0) // inchain + ); + break; + case SyncScope::System: + // Currently wasm only supports sequentially consistent atomics, so we + // always set the order to 0 (sequentially consistent). + Fence = CurDAG->getMachineNode( + WebAssembly::ATOMIC_FENCE, + DL, // debug loc + MVT::Other, // outchain type + CurDAG->getTargetConstant(0, DL, MVT::i32), // order + Node->getOperand(0) // inchain + ); + break; default: llvm_unreachable("Unknown scope!"); } + + ReplaceNode(Node, Fence); + CurDAG->RemoveDeadNode(Node); + return; } case ISD::GlobalTLSAddress: { @@ -224,6 +178,33 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, TLSSize); return; } + case Intrinsic::wasm_tls_align: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSAlign = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32)); + ReplaceNode(Node, TLSAlign); + return; + } + } + break; + } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + case Intrinsic::wasm_tls_base: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSBase = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other, + CurDAG->getTargetExternalSymbol("__tls_base", PtrVT), + Node->getOperand(0)); + ReplaceNode(Node, TLSBase); + return; + } } break; } diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4064a983099c..f06afdbcea9e 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -205,7 +205,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( for (auto T : {MVT::i8, MVT::i16, MVT::i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); } - for (auto T : MVT::integer_vector_valuetypes()) + for (auto T : MVT::integer_fixedlen_vector_valuetypes()) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); // Dynamic stack allocation: use the default expansion. @@ -228,7 +228,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // - Floating-point extending loads. // - Floating-point truncating stores. // - i1 extending loads. - // - extending/truncating SIMD loads/stores + // - truncating SIMD stores and most extending loads setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); for (auto T : MVT::integer_valuetypes()) @@ -237,7 +237,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}) { - for (auto MemT : MVT::vector_valuetypes()) { + for (auto MemT : MVT::fixedlen_vector_valuetypes()) { if (MVT(T) != MemT) { setTruncStoreAction(T, MemT, Expand); for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) @@ -245,6 +245,14 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( } } } + // But some vector extending loads are legal + if (Subtarget->hasUnimplementedSIMD128()) { + for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { + setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); + setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); + setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); + } + } } // Don't do anything clever with build_pairs @@ -259,16 +267,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setMaxAtomicSizeInBitsSupported(64); - if (Subtarget->hasBulkMemory()) { - // Use memory.copy and friends over multiple loads and stores - MaxStoresPerMemcpy = 1; - MaxStoresPerMemcpyOptSize = 1; - MaxStoresPerMemmove = 1; - MaxStoresPerMemmoveOptSize = 1; - MaxStoresPerMemset = 1; - MaxStoresPerMemsetOptSize = 1; - } - // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is // consistent with the f64 and f128 names. setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); @@ -337,8 +335,8 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, bool Float64, unsigned LoweredOpcode) { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned OutReg = MI.getOperand(0).getReg(); - unsigned InReg = MI.getOperand(1).getReg(); + Register OutReg = MI.getOperand(0).getReg(); + Register InReg = MI.getOperand(1).getReg(); unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; @@ -396,9 +394,9 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, // For unsigned numbers, we have to do a separate comparison with zero. if (IsUnsigned) { Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); - unsigned SecondCmpReg = + Register SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(BB, DL, TII.get(FConst), Tmp1) .addFPImm(cast(ConstantFP::get(Ty, 0.0))); BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); @@ -550,6 +548,16 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, return true; } +bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { + if (!Subtarget->hasUnimplementedSIMD128()) + return false; + MVT ExtT = ExtVal.getSimpleValueType(); + MVT MemT = cast(ExtVal->getOperand(0))->getSimpleValueType(0); + return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || + (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || + (ExtT == MVT::v2i64 && MemT == MVT::v2i32); +} + EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { @@ -569,7 +577,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 4; + Info.align = Align(4); // atomic.notify instruction does not really load the memory specified with // this argument, but MachineMemOperand should either be load or store, so // we set this to a load. @@ -583,7 +591,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 4; + Info.align = Align(4); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; case Intrinsic::wasm_atomic_wait_i64: @@ -591,7 +599,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 8; + Info.align = Align(8); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; default: @@ -623,7 +631,8 @@ static bool callingConvSupported(CallingConv::ID CallConv) { CallConv == CallingConv::Cold || CallConv == CallingConv::PreserveMost || CallConv == CallingConv::PreserveAll || - CallConv == CallingConv::CXX_FAST_TLS; + CallConv == CallingConv::CXX_FAST_TLS || + CallConv == CallingConv::WASM_EmscriptenInvoke; } SDValue @@ -644,13 +653,36 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (CLI.IsPatchPoint) fail(DL, DAG, "WebAssembly doesn't support patch point yet"); - // Fail if tail calls are required but not enabled - if (!Subtarget->hasTailCall()) { - if ((CallConv == CallingConv::Fast && CLI.IsTailCall && - MF.getTarget().Options.GuaranteedTailCallOpt) || - (CLI.CS && CLI.CS.isMustTailCall())) - fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); - CLI.IsTailCall = false; + if (CLI.IsTailCall) { + bool MustTail = CLI.CS && CLI.CS.isMustTailCall(); + if (Subtarget->hasTailCall() && !CLI.IsVarArg) { + // Do not tail call unless caller and callee return types match + const Function &F = MF.getFunction(); + const TargetMachine &TM = getTargetMachine(); + Type *RetTy = F.getReturnType(); + SmallVector CallerRetTys; + SmallVector CalleeRetTys; + computeLegalValueVTs(F, TM, RetTy, CallerRetTys); + computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); + bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && + std::equal(CallerRetTys.begin(), CallerRetTys.end(), + CalleeRetTys.begin()); + if (!TypesMatch) { + // musttail in this case would be an LLVM IR validation failure + assert(!MustTail); + CLI.IsTailCall = false; + } + } else { + CLI.IsTailCall = false; + if (MustTail) { + if (CLI.IsVarArg) { + // The return would pop the argument buffer + fail(DL, DAG, "WebAssembly does not support varargs tail calls"); + } else { + fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); + } + } + } } SmallVectorImpl &Ins = CLI.Ins; @@ -659,6 +691,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; + + // The generic code may have added an sret argument. If we're lowering an + // invoke function, the ABI requires that the function pointer be the first + // argument, so we may have to swap the arguments. + if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && + Outs[0].Flags.isSRet()) { + std::swap(Outs[0], Outs[1]); + std::swap(OutVals[0], OutVals[1]); + } + unsigned NumFixedArgs = 0; for (unsigned I = 0; I < Outs.size(); ++I) { const ISD::OutputArg &Out = Outs[I]; @@ -810,8 +852,8 @@ bool WebAssemblyTargetLowering::CanLowerReturn( CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, const SmallVectorImpl &Outs, LLVMContext & /*Context*/) const { - // WebAssembly can't currently handle returning tuples. - return Outs.size() <= 1; + // WebAssembly can only handle returning tuples with multivalue enabled + return Subtarget->hasMultivalue() || Outs.size() <= 1; } SDValue WebAssemblyTargetLowering::LowerReturn( @@ -819,7 +861,8 @@ SDValue WebAssemblyTargetLowering::LowerReturn( const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); + assert((Subtarget->hasMultivalue() || Outs.size() <= 1) && + "MVP WebAssembly can only return up to one value"); if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); @@ -881,7 +924,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( // the buffer is passed as an argument. if (IsVarArg) { MVT PtrVT = getPointerTy(MF.getDataLayout()); - unsigned VarargVreg = + Register VarargVreg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); MFI->setVarargBufferVreg(VarargVreg); Chain = DAG.getCopyToReg( @@ -1022,8 +1065,9 @@ SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, return SDValue(); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + MakeLibCallOptions CallOptions; return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), - {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL) + {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL) .first; } @@ -1037,7 +1081,7 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - unsigned FP = + Register FP = Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); } @@ -1249,68 +1293,116 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, const EVT VecT = Op.getValueType(); const EVT LaneT = Op.getOperand(0).getValueType(); const size_t Lanes = Op.getNumOperands(); + bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8; + + // BUILD_VECTORs are lowered to the instruction that initializes the highest + // possible number of lanes at once followed by a sequence of replace_lane + // instructions to individually initialize any remaining lanes. + + // TODO: Tune this. For example, lanewise swizzling is very expensive, so + // swizzled lanes should be given greater weight. + + // TODO: Investigate building vectors by shuffling together vectors built by + // separately specialized means. + auto IsConstant = [](const SDValue &V) { return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; }; - // Find the most common operand, which is approximately the best to splat - using Entry = std::pair; - SmallVector ValueCounts; - size_t NumConst = 0, NumDynamic = 0; - for (const SDValue &Lane : Op->op_values()) { - if (Lane.isUndef()) { - continue; - } else if (IsConstant(Lane)) { - NumConst++; - } else { - NumDynamic++; - } - auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(), - [&Lane](Entry A) { return A.first == Lane; }); - if (CountIt == ValueCounts.end()) { - ValueCounts.emplace_back(Lane, 1); + // Returns the source vector and index vector pair if they exist. Checks for: + // (extract_vector_elt + // $src, + // (sign_extend_inreg (extract_vector_elt $indices, $i)) + // ) + auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { + auto Bail = std::make_pair(SDValue(), SDValue()); + if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SwizzleSrc = Lane->getOperand(0); + const SDValue &IndexExt = Lane->getOperand(1); + if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) + return Bail; + const SDValue &Index = IndexExt->getOperand(0); + if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SwizzleIndices = Index->getOperand(0); + if (SwizzleSrc.getValueType() != MVT::v16i8 || + SwizzleIndices.getValueType() != MVT::v16i8 || + Index->getOperand(1)->getOpcode() != ISD::Constant || + Index->getConstantOperandVal(1) != I) + return Bail; + return std::make_pair(SwizzleSrc, SwizzleIndices); + }; + + using ValueEntry = std::pair; + SmallVector SplatValueCounts; + + using SwizzleEntry = std::pair, size_t>; + SmallVector SwizzleCounts; + + auto AddCount = [](auto &Counts, const auto &Val) { + auto CountIt = std::find_if(Counts.begin(), Counts.end(), + [&Val](auto E) { return E.first == Val; }); + if (CountIt == Counts.end()) { + Counts.emplace_back(Val, 1); } else { CountIt->second++; } + }; + + auto GetMostCommon = [](auto &Counts) { + auto CommonIt = + std::max_element(Counts.begin(), Counts.end(), + [](auto A, auto B) { return A.second < B.second; }); + assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); + return *CommonIt; + }; + + size_t NumConstantLanes = 0; + + // Count eligible lanes for each type of vector creation op + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op->getOperand(I); + if (Lane.isUndef()) + continue; + + AddCount(SplatValueCounts, Lane); + + if (IsConstant(Lane)) { + NumConstantLanes++; + } else if (CanSwizzle) { + auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); + if (SwizzleSrcs.first) + AddCount(SwizzleCounts, SwizzleSrcs); + } } - auto CommonIt = - std::max_element(ValueCounts.begin(), ValueCounts.end(), - [](Entry A, Entry B) { return A.second < B.second; }); - assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector"); - SDValue SplatValue = CommonIt->first; - size_t NumCommon = CommonIt->second; - - // If v128.const is available, consider using it instead of a splat + + SDValue SplatValue; + size_t NumSplatLanes; + std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); + + SDValue SwizzleSrc; + SDValue SwizzleIndices; + size_t NumSwizzleLanes = 0; + if (SwizzleCounts.size()) + std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), + NumSwizzleLanes) = GetMostCommon(SwizzleCounts); + + // Predicate returning true if the lane is properly initialized by the + // original instruction + std::function IsLaneConstructed; + SDValue Result; if (Subtarget->hasUnimplementedSIMD128()) { - // {i32,i64,f32,f64}.const opcode, and value - const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes); - // SIMD prefix and opcode - const size_t SplatBytes = 2; - const size_t SplatConstBytes = SplatBytes + ConstBytes; - // SIMD prefix, opcode, and lane index - const size_t ReplaceBytes = 3; - const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes; - // SIMD prefix, v128.const opcode, and 128-bit value - const size_t VecConstBytes = 18; - // Initial v128.const and a replace_lane for each non-const operand - const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes; - // Initial splat and all necessary replace_lanes - const size_t SplatInitBytes = - IsConstant(SplatValue) - // Initial constant splat - ? (SplatConstBytes + - // Constant replace_lanes - (NumConst - NumCommon) * ReplaceConstBytes + - // Dynamic replace_lanes - (NumDynamic * ReplaceBytes)) - // Initial dynamic splat - : (SplatBytes + - // Constant replace_lanes - (NumConst * ReplaceConstBytes) + - // Dynamic replace_lanes - (NumDynamic - NumCommon) * ReplaceBytes); - if (ConstInitBytes < SplatInitBytes) { - // Create build_vector that will lower to initial v128.const + // Prefer swizzles over vector consts over splats + if (NumSwizzleLanes >= NumSplatLanes && + NumSwizzleLanes >= NumConstantLanes) { + Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, + SwizzleIndices); + auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); + IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { + return Swizzled == GetSwizzleSrcs(I, Lane); + }; + } else if (NumConstantLanes >= NumSplatLanes) { SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1321,26 +1413,35 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); } } - SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes); - // Add replace_lane instructions for non-const lanes - for (size_t I = 0; I < Lanes; ++I) { - const SDValue &Lane = Op->getOperand(I); - if (!Lane.isUndef() && !IsConstant(Lane)) - Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, - DAG.getConstant(I, DL, MVT::i32)); - } - return Result; + Result = DAG.getBuildVector(VecT, DL, ConstLanes); + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; + } + } + if (!Result) { + // Use a splat, but possibly a load_splat + LoadSDNode *SplattedLoad; + if (Subtarget->hasUnimplementedSIMD128() && + (SplattedLoad = dyn_cast(SplatValue)) && + SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { + Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue); + } else { + Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); } + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return Lane == SplatValue; + }; } - // Use a splat for the initial vector - SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); - // Add replace_lane instructions for other values + + // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); - if (Lane != SplatValue) + if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, DAG.getConstant(I, DL, MVT::i32)); } + return Result; } @@ -1415,11 +1516,6 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, // Only manually lower vector shifts assert(Op.getSimpleValueType().isVector()); - // Expand all vector shifts until V8 fixes its implementation - // TODO: remove this once V8 is fixed - if (!Subtarget->hasUnimplementedSIMD128()) - return unrollVectorShift(Op, DAG); - // Unroll non-splat vector shifts BuildVectorSDNode *ShiftVec; SDValue SplatVal; diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index b3c7f3defd5f..a53e24a05542 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -63,7 +63,7 @@ private: MachineMemOperand::Flags Flags, bool *Fast) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; - + bool isVectorLoadExtDesirable(SDValue ExtVal) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index e85aa57efc42..a9a99d38f9f1 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -71,12 +71,6 @@ class NotifyPatImmOff : def : NotifyPatImmOff; def : NotifyPatImmOff; -def NotifyPatGlobalAddr : - Pat<(i32 (int_wasm_atomic_notify (regPlusGA I32:$addr, - (WebAssemblywrapper tglobaladdr:$off)), - I32:$count)), - (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>; - // Select notifys with just a constant offset. def NotifyPatOffsetOnly : Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)), @@ -105,13 +99,6 @@ def : WaitPatImmOff; def : WaitPatImmOff; def : WaitPatImmOff; -class WaitPatGlobalAddr : - Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$exp, I64:$timeout)), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>; -def : WaitPatGlobalAddr; -def : WaitPatGlobalAddr; - // Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset. class WaitPatOffsetOnly : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), @@ -126,6 +113,19 @@ def : WaitPatGlobalAddrOffOnly; def : WaitPatGlobalAddrOffOnly; } // Predicates = [HasAtomics] +//===----------------------------------------------------------------------===// +// Atomic fences +//===----------------------------------------------------------------------===// + +// A compiler fence instruction that prevents reordering of instructions. +let Defs = [ARGUMENTS] in { +let isPseudo = 1, hasSideEffects = 1 in +defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">; +let hasSideEffects = 1 in +defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence", + 0x03>; +} // Defs = [ARGUMENTS] + //===----------------------------------------------------------------------===// // Atomic loads //===----------------------------------------------------------------------===// @@ -151,9 +151,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - // Select loads with just a constant offset. def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; @@ -244,16 +241,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; // No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - // Extending loads with just a constant offset def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; @@ -313,13 +300,6 @@ def : AStorePatImmOff; def : AStorePatImmOff; def : AStorePatImmOff; -class AStorePatGlobalAddr : - Pat<(kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$val), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; - // Select stores with just a constant offset. class AStorePatOffsetOnly : Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; @@ -374,12 +354,6 @@ def : AStorePatImmOff; def : AStorePatImmOff; def : AStorePatImmOff; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; - // Truncating stores with just a constant offset def : AStorePatOffsetOnly; def : AStorePatOffsetOnly; @@ -500,11 +474,6 @@ class BinRMWPatImmOff : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)), (inst 0, imm:$off, I32:$addr, ty:$val)>; -class BinRMWPatGlobalAddr : - Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$val)), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>; - // Select binary RMWs with just a constant offset. class BinRMWPatOffsetOnly : Pat<(ty (kind imm:$off, ty:$val)), @@ -525,9 +494,6 @@ multiclass BinRMWPattern; def : BinRMWPatImmOff; - def : BinRMWPatGlobalAddr; - def : BinRMWPatGlobalAddr; - def : BinRMWPatOffsetOnly; def : BinRMWPatOffsetOnly; @@ -622,17 +588,6 @@ multiclass BinRMWTruncExtPattern< def : BinRMWPatImmOff, or_is_add, inst8_64>; def : BinRMWPatImmOff, or_is_add, inst16_64>; - def : BinRMWPatGlobalAddr, inst8_32>; - def : BinRMWPatGlobalAddr, inst16_32>; - def : BinRMWPatGlobalAddr, inst8_64>; - def : BinRMWPatGlobalAddr, inst16_64>; - def : BinRMWPatGlobalAddr, inst32_64>; - - def : BinRMWPatGlobalAddr, inst8_32>; - def : BinRMWPatGlobalAddr, inst16_32>; - def : BinRMWPatGlobalAddr, inst8_64>; - def : BinRMWPatGlobalAddr, inst16_64>; - // Truncating-extending binary RMWs with just a constant offset def : BinRMWPatOffsetOnly, inst8_32>; def : BinRMWPatOffsetOnly, inst16_32>; @@ -732,11 +687,6 @@ class TerRMWPatImmOff : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)), (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>; -class TerRMWPatGlobalAddr : - Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$exp, ty:$new)), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>; - // Select ternary RMWs with just a constant offset. class TerRMWPatOffsetOnly : Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), @@ -757,9 +707,6 @@ multiclass TerRMWPattern; def : TerRMWPatImmOff; - def : TerRMWPatGlobalAddr; - def : TerRMWPatGlobalAddr; - def : TerRMWPatOffsetOnly; def : TerRMWPatOffsetOnly; @@ -846,17 +793,6 @@ multiclass TerRMWTruncExtPattern< def : TerRMWPatImmOff, or_is_add, inst8_64>; def : TerRMWPatImmOff, or_is_add, inst16_64>; - def : TerRMWPatGlobalAddr, inst8_32>; - def : TerRMWPatGlobalAddr, inst16_32>; - def : TerRMWPatGlobalAddr, inst8_64>; - def : TerRMWPatGlobalAddr, inst16_64>; - def : TerRMWPatGlobalAddr, inst32_64>; - - def : TerRMWPatGlobalAddr, inst8_32>; - def : TerRMWPatGlobalAddr, inst16_32>; - def : TerRMWPatGlobalAddr, inst8_64>; - def : TerRMWPatGlobalAddr, inst16_64>; - // Truncating-extending ternary RMWs with just a constant offset def : TerRMWPatOffsetOnly, inst8_32>; def : TerRMWPatOffsetOnly, inst16_32>; @@ -887,13 +823,3 @@ defm : TerRMWTruncExtPattern< ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32, ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64, ATOMIC_RMW32_U_CMPXCHG_I64>; - -//===----------------------------------------------------------------------===// -// Atomic fences -//===----------------------------------------------------------------------===// - -// A compiler fence instruction that prevents reordering of instructions. -let Defs = [ARGUMENTS] in { -let isPseudo = 1, hasSideEffects = 1 in -defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">; -} // Defs = [ARGUMENTS] diff --git a/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td index f4352e3d12ec..05735cf6d31f 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td @@ -39,7 +39,7 @@ defm MEMORY_INIT : (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest, I32:$offset, I32:$size), (outs), (ins i32imm_op:$seg, i32imm_op:$idx), - [(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest, + [(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest, I32:$offset, I32:$size )], "memory.init\t$seg, $idx, $dest, $offset, $size", @@ -48,7 +48,7 @@ defm MEMORY_INIT : let hasSideEffects = 1 in defm DATA_DROP : BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg), - [(int_wasm_data_drop (i32 imm:$seg))], + [(int_wasm_data_drop (i32 timm:$seg))], "data.drop\t$seg", "data.drop\t$seg", 0x09>; let mayLoad = 1, mayStore = 1 in diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 1870c5bc34b0..1afc9a8790dc 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -84,49 +84,19 @@ let isTerminator = 1, isBarrier = 1 in defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] -multiclass RETURN { - defm RETURN_#vt : I<(outs), (ins vt:$val), (outs), (ins), - [(WebAssemblyreturn vt:$val)], - "return \t$val", "return", 0x0f>; - // Equivalent to RETURN_#vt, for use at the end of a function when wasm - // semantics return by falling off the end of the block. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), (outs), (ins), []>; -} - -multiclass SIMD_RETURN { - defm RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins), - [(WebAssemblyreturn (vt V128:$val))], - "return \t$val", "return", 0x0f>, - Requires<[HasSIMD128]>; - // Equivalent to RETURN_#vt, for use at the end of a function when wasm - // semantics return by falling off the end of the block. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins), - []>, - Requires<[HasSIMD128]>; -} let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { let isReturn = 1 in { - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - - defm RETURN_VOID : NRI<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>; - - // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_VOID : NRI<(outs), (ins), []>; + +defm RETURN : I<(outs), (ins variable_ops), (outs), (ins), + [(WebAssemblyreturn)], + "return", "return", 0x0f>; +// Equivalent to RETURN, for use at the end of a function when wasm +// semantics return by falling off the end of the block. +let isCodeGenOnly = 1 in +defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>; + } // isReturn = 1 defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td index 661fee2715ba..f3d9c5d5032c 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -171,6 +171,23 @@ defm I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), 0xb1>; } // hasSideEffects = 1 +def : Pat<(int_wasm_trunc_signed F32:$src), + (I32_TRUNC_S_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_unsigned F32:$src), + (I32_TRUNC_U_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_signed F64:$src), + (I32_TRUNC_S_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_unsigned F64:$src), + (I32_TRUNC_U_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_signed F32:$src), + (I64_TRUNC_S_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_unsigned F32:$src), + (I64_TRUNC_U_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_signed F64:$src), + (I64_TRUNC_S_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_unsigned F64:$src), + (I64_TRUNC_U_F64 F64:$src)>; + defm F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins), [(set F32:$dst, (sint_to_fp I32:$src))], "f32.convert_i32_s\t$dst, $src", "f32.convert_i32_s", diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index a86c9af28f0d..8e8126c90e72 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -38,7 +38,7 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) RI(STI.getTargetTriple()) {} bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( - const MachineInstr &MI, AliasAnalysis *AA) const { + const MachineInstr &MI, AAResults *AA) const { switch (MI.getOpcode()) { case WebAssembly::CONST_I32: case WebAssembly::CONST_I64: @@ -60,7 +60,7 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // exist. However we need to handle both here. auto &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = - TargetRegisterInfo::isVirtualRegister(DestReg) + Register::isVirtualRegister(DestReg) ? MRI.getRegClass(DestReg) : MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg); diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index df1051b4f42c..fe6211663c31 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -43,7 +43,7 @@ public: const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 73ddbe85d551..044901481381 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -106,7 +106,8 @@ def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE", def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT", SDT_WebAssemblyArgument>; def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN", - SDT_WebAssemblyReturn, [SDNPHasChain]>; + SDT_WebAssemblyReturn, + [SDNPHasChain, SDNPVariadic]>; def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", SDT_WebAssemblyWrapper>; def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC", diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 6916b165f970..eba9b80d3286 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -37,16 +37,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ return (~Known0.Zero & ~Known1.Zero) == 0; }]>; -// GlobalAddresses are conceptually unsigned values, so we can also fold them -// into immediate values as long as the add is 'nuw'. -// TODO: We'd like to also match GA offsets but there are cases where the -// register can have a negative value. Find out what more we can do. -def regPlusGA : PatFrag<(ops node:$addr, node:$off), - (add node:$addr, node:$off), - [{ - return N->getFlags().hasNoUnsignedWrap(); -}]>; - // We don't need a regPlusES because external symbols never have constant // offsets folded into them, so we can just use add. @@ -93,15 +83,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; -class LoadPatGlobalAddr : - Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (inst 0, tglobaladdr:$off, I32:$addr)>, Requires<[IsNotPIC]>; - -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - // Select loads with just a constant offset. class LoadPatOffsetOnly : Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>; @@ -167,18 +148,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - // Select extending loads with just a constant offset. def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; @@ -224,11 +193,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; // Select "don't care" extending loads with just a constant offset. def : LoadPatOffsetOnly; @@ -282,15 +246,6 @@ def : StorePatImmOff; def : StorePatImmOff; def : StorePatImmOff; -class StorePatGlobalAddr : - Pat<(kind ty:$val, - (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>, Requires<[IsNotPIC]>; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; - // Select stores with just a constant offset. class StorePatOffsetOnly : Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; @@ -333,12 +288,6 @@ def : StorePatImmOff; def : StorePatImmOff; def : StorePatImmOff; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; - // Select truncating stores with just a constant offset. def : StorePatOffsetOnly; def : StorePatOffsetOnly; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index dd8930f079b0..fc5d73dac52e 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -40,47 +40,124 @@ def LaneIdx#SIZE : ImmLeaf; //===----------------------------------------------------------------------===// // Load: v128.load -multiclass SIMDLoad { - let mayLoad = 1, UseNamedOperandTable = 1 in - defm LOAD_#vec_t : +let mayLoad = 1, UseNamedOperandTable = 1 in +defm LOAD_V128 : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.load\t$dst, ${off}(${addr})$p2align", + "v128.load\t$off$p2align", 0>; + +// Def load and store patterns from WebAssemblyInstrMemory.td for vector types +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { +def : LoadPatNoOffset; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatOffsetOnly; +def : LoadPatGlobalAddrOffOnly; +} + +// vNxM.load_splat +multiclass SIMDLoadSplat simdop> { + let mayLoad = 1, UseNamedOperandTable = 1, + Predicates = [HasUnimplementedSIMD128] in + defm LOAD_SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - "v128.load\t$dst, ${off}(${addr})$p2align", - "v128.load\t$off$p2align", 0>; + vec#".load_splat\t$dst, ${off}(${addr})$p2align", + vec#".load_splat\t$off$p2align", simdop>; } -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -defm "" : SIMDLoad; - -// Def load and store patterns from WebAssemblyInstrMemory.td for vector types -def : LoadPatNoOffset("LOAD_"#vec_t)>; -def : LoadPatImmOff("LOAD_"#vec_t)>; -def : LoadPatImmOff("LOAD_"#vec_t)>; -def : LoadPatGlobalAddr("LOAD_"#vec_t)>; -def : LoadPatOffsetOnly("LOAD_"#vec_t)>; -def : LoadPatGlobalAddrOffOnly("LOAD_"#vec_t)>; +defm "" : SIMDLoadSplat<"v8x16", 194>; +defm "" : SIMDLoadSplat<"v16x8", 195>; +defm "" : SIMDLoadSplat<"v32x4", 196>; +defm "" : SIMDLoadSplat<"v64x2", 197>; + +def wasm_load_splat_t : SDTypeProfile<1, 1, []>; +def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t>; + +foreach args = [["v16i8", "i32", "extloadi8"], ["v8i16", "i32", "extloadi16"], + ["v4i32", "i32", "load"], ["v2i64", "i64", "load"], + ["v4f32", "f32", "load"], ["v2f64", "f64", "load"]] in +def load_splat_#args[0] : + PatFrag<(ops node:$addr), (wasm_load_splat + (!cast(args[1]) (!cast(args[2]) node:$addr)))>; + +let Predicates = [HasUnimplementedSIMD128] in +foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], + ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { +def : LoadPatNoOffset(args[0]), + !cast("load_splat_"#args[0]), + !cast("LOAD_SPLAT_"#args[1])>; +def : LoadPatImmOff(args[0]), + !cast("load_splat_"#args[0]), + regPlusImm, + !cast("LOAD_SPLAT_"#args[1])>; +def : LoadPatImmOff(args[0]), + !cast("load_splat_"#args[0]), + or_is_add, + !cast("LOAD_SPLAT_"#args[1])>; +def : LoadPatOffsetOnly(args[0]), + !cast("load_splat_"#args[0]), + !cast("LOAD_SPLAT_"#args[1])>; +def : LoadPatGlobalAddrOffOnly(args[0]), + !cast("load_splat_"#args[0]), + !cast("LOAD_SPLAT_"#args[1])>; } -// Store: v128.store -multiclass SIMDStore { - let mayStore = 1, UseNamedOperandTable = 1 in - defm STORE_#vec_t : - SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), +// Load and extend +multiclass SIMDLoadExtend simdop> { + let mayLoad = 1, UseNamedOperandTable = 1, + Predicates = [HasUnimplementedSIMD128] in { + defm LOAD_EXTEND_S_#vec_t : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + name#"_s\t$dst, ${off}(${addr})$p2align", + name#"_s\t$off$p2align", simdop>; + defm LOAD_EXTEND_U_#vec_t : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - "v128.store\t${off}(${addr})$p2align, $vec", - "v128.store\t$off$p2align", 1>; + name#"_u\t$dst, ${off}(${addr})$p2align", + name#"_u\t$off$p2align", !add(simdop, 1)>; + } } -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -defm "" : SIMDStore; +defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; + +let Predicates = [HasUnimplementedSIMD128] in +foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in +foreach exts = [["sextloadv", "_S"], + ["zextloadv", "_U"], + ["extloadv", "_U"]] in { +def : LoadPatNoOffset(exts[0]#types[1]), + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatImmOff(exts[0]#types[1]), regPlusImm, + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatImmOff(exts[0]#types[1]), or_is_add, + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatOffsetOnly(exts[0]#types[1]), + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatGlobalAddrOffOnly(exts[0]#types[1]), + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +} + + +// Store: v128.store +let mayStore = 1, UseNamedOperandTable = 1 in +defm STORE_V128 : + SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.store\t${off}(${addr})$p2align, $vec", + "v128.store\t$off$p2align", 1>; +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { // Def load and store patterns from WebAssemblyInstrMemory.td for vector types -def : StorePatNoOffset("STORE_"#vec_t)>; -def : StorePatImmOff("STORE_"#vec_t)>; -def : StorePatImmOff("STORE_"#vec_t)>; -def : StorePatGlobalAddr("STORE_"#vec_t)>; -def : StorePatOffsetOnly("STORE_"#vec_t)>; -def : StorePatGlobalAddrOffOnly("STORE_"#vec_t)>; +def : StorePatNoOffset; +def : StorePatImmOff; +def : StorePatImmOff; +def : StorePatOffsetOnly; +def : StorePatGlobalAddrOffOnly; } //===----------------------------------------------------------------------===// @@ -90,7 +167,7 @@ def : StorePatGlobalAddrOffOnly("STORE_"#vec_t)>; // Constant: v128.const multiclass ConstVec { let isMoveImm = 1, isReMaterializable = 1, - Predicates = [HasSIMD128, HasUnimplementedSIMD128] in + Predicates = [HasUnimplementedSIMD128] in defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, [(set V128:$dst, (vec_t pat))], "v128.const\t$dst, "#args, @@ -198,6 +275,19 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; } +// Swizzle lanes: v8x16.swizzle +def wasm_swizzle_t : SDTypeProfile<1, 2, []>; +def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; +let Predicates = [HasUnimplementedSIMD128] in +defm SWIZZLE : + SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), + [(set (v16i8 V128:$dst), + (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], + "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>; + +def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), + (SWIZZLE V128:$src, V128:$mask)>; + // Create vector with identical lanes: splat def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>; def splat4 : PatFrag<(ops node:$x), (build_vector @@ -286,7 +376,7 @@ multiclass ExtractLaneExtended baseInst> { } defm "" : ExtractLaneExtended<"_s", 5>; -let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in +let Predicates = [HasUnimplementedSIMD128] in defm "" : ExtractLaneExtended<"_u", 6>; defm "" : ExtractLane; defm "" : ExtractLane; @@ -472,6 +562,11 @@ defm OR : SIMDBitwise; defm XOR : SIMDBitwise; } // isCommutable = 1 +// Bitwise logic: v128.andnot +def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; +let Predicates = [HasUnimplementedSIMD128] in +defm ANDNOT : SIMDBitwise; + // Bitwise select: v128.bitselect foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in defm BITSELECT_#vec_t : @@ -655,7 +750,7 @@ defm ABS : SIMDUnaryFP; defm NEG : SIMDUnaryFP; // Square root: sqrt -let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in +let Predicates = [HasUnimplementedSIMD128] in defm SQRT : SIMDUnaryFP; //===----------------------------------------------------------------------===// @@ -679,7 +774,7 @@ let isCommutable = 1 in defm MUL : SIMDBinaryFP; // Division: div -let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in +let Predicates = [HasUnimplementedSIMD128] in defm DIV : SIMDBinaryFP; // NaN-propagating minimum: min @@ -712,6 +807,42 @@ defm "" : SIMDConvert; defm "" : SIMDConvert; defm "" : SIMDConvert; +// Widening operations +multiclass SIMDWiden baseInst> { + defm "" : SIMDConvert; + defm "" : SIMDConvert; + defm "" : SIMDConvert; + defm "" : SIMDConvert; +} + +defm "" : SIMDWiden; +defm "" : SIMDWiden; + +// Narrowing operations +multiclass SIMDNarrow baseInst> { + defm NARROW_S_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), + [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed + (arg_t V128:$low), (arg_t V128:$high))))], + vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s", + baseInst>; + defm NARROW_U_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), + [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned + (arg_t V128:$low), (arg_t V128:$high))))], + vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u", + !add(baseInst, 1)>; +} + +defm "" : SIMDNarrow; +defm "" : SIMDNarrow; + // Lower llvm.wasm.trunc.saturate.* to saturating instructions def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; @@ -732,3 +863,25 @@ foreach t2 = !foldl( ) ) in def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; + +//===----------------------------------------------------------------------===// +// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) +//===----------------------------------------------------------------------===// + +multiclass SIMDQFM baseInst> { + defm QFMA_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>; + defm QFMS_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; +} + +defm "" : SIMDQFM; +defm "" : SIMDQFM; diff --git a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index e92b34430272..75d04252cbe9 100644 --- a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "wasm-late-eh-prepare" @@ -131,7 +132,7 @@ bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) { auto InsertPos = MBB.begin(); if (InsertPos->isEHLabel()) // EH pad starts with an EH label ++InsertPos; - unsigned DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); + Register DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(), TII.get(WebAssembly::CATCH), DstReg); } @@ -168,7 +169,7 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { if (CatchPos->isEHLabel()) // EH pad starts with an EH label ++CatchPos; MachineInstr *Catch = &*CatchPos; - unsigned ExnReg = Catch->getOperand(0).getReg(); + Register ExnReg = Catch->getOperand(0).getReg(); BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) .addReg(ExnReg); TI->eraseFromParent(); @@ -233,6 +234,7 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( // it. The pseudo instruction will be deleted later. bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { const auto &TII = *MF.getSubtarget().getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); auto *EHInfo = MF.getWasmEHFuncInfo(); SmallVector ExtractInstrs; SmallVector ToDelete; @@ -292,7 +294,7 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { // thenbb: // %exn:i32 = extract_exception // ... use exn ... - unsigned ExnReg = Catch->getOperand(0).getReg(); + Register ExnReg = Catch->getOperand(0).getReg(); auto *ThenMBB = MF.CreateMachineBasicBlock(); auto *ElseMBB = MF.CreateMachineBasicBlock(); MF.insert(std::next(MachineFunction::iterator(EHPad)), ElseMBB); @@ -339,9 +341,11 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { WebAssembly::ClangCallTerminateFn); assert(ClangCallTerminateFn && "There is no __clang_call_terminate() function"); + Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(ElseMBB, DL, TII.get(WebAssembly::CONST_I32), Reg).addImm(0); BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL_VOID)) .addGlobalAddress(ClangCallTerminateFn) - .addImm(0); + .addReg(Reg); BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE)); } else { diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index 34a8195ac4b4..4314aa611549 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -68,7 +68,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { if (MI->getOpcode() != WebAssembly::BR_UNLESS) continue; - unsigned Cond = MI->getOperand(1).getReg(); + Register Cond = MI->getOperand(1).getReg(); bool Inverted = false; // Attempt to invert the condition in place. @@ -188,7 +188,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { // If we weren't able to invert the condition in place. Insert an // instruction to invert it. if (!Inverted) { - unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp) .addReg(Cond); MFI.stackifyVReg(Tmp); diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 960d5134f6e9..1cf397dd060b 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -227,15 +227,6 @@ static cl::list namespace { class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { - static const char *ResumeFName; - static const char *EHTypeIDFName; - static const char *EmLongjmpFName; - static const char *EmLongjmpJmpbufFName; - static const char *SaveSetjmpFName; - static const char *TestSetjmpFName; - static const char *FindMatchingCatchPrefix; - static const char *InvokePrefix; - bool EnableEH; // Enable exception handling bool EnableSjLj; // Enable setjmp/longjmp handling @@ -274,6 +265,7 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); } bool canLongjmp(Module &M, const Value *Callee) const; + bool isEmAsmCall(Module &M, const Value *Callee) const; void rebuildSSA(Function &F); @@ -292,19 +284,6 @@ public: }; } // End anonymous namespace -const char *WebAssemblyLowerEmscriptenEHSjLj::ResumeFName = "__resumeException"; -const char *WebAssemblyLowerEmscriptenEHSjLj::EHTypeIDFName = - "llvm_eh_typeid_for"; -const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpFName = - "emscripten_longjmp"; -const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpJmpbufFName = - "emscripten_longjmp_jmpbuf"; -const char *WebAssemblyLowerEmscriptenEHSjLj::SaveSetjmpFName = "saveSetjmp"; -const char *WebAssemblyLowerEmscriptenEHSjLj::TestSetjmpFName = "testSetjmp"; -const char *WebAssemblyLowerEmscriptenEHSjLj::FindMatchingCatchPrefix = - "__cxa_find_matching_catch_"; -const char *WebAssemblyLowerEmscriptenEHSjLj::InvokePrefix = "__invoke_"; - char WebAssemblyLowerEmscriptenEHSjLj::ID = 0; INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE, "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp", @@ -335,7 +314,8 @@ static bool canThrow(const Value *V) { static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB, const char *Name) { - auto* GV = dyn_cast(M.getOrInsertGlobal(Name, IRB.getInt32Ty())); + auto *GV = + dyn_cast(M.getOrInsertGlobal(Name, IRB.getInt32Ty())); if (!GV) report_fatal_error(Twine("unable to create global: ") + Name); @@ -376,9 +356,9 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); SmallVector Args(NumClauses, Int8PtrTy); FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false); - Function *F = - Function::Create(FTy, GlobalValue::ExternalLinkage, - FindMatchingCatchPrefix + Twine(NumClauses + 2), &M); + Function *F = Function::Create( + FTy, GlobalValue::ExternalLinkage, + "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M); FindMatchingCatches[NumClauses] = F; return F; } @@ -418,7 +398,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { Args.append(CI->arg_begin(), CI->arg_end()); CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args); NewCall->takeName(CI); - NewCall->setCallingConv(CI->getCallingConv()); + NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke); NewCall->setDebugLoc(CI->getDebugLoc()); // Because we added the pointer to the callee as first argument, all @@ -432,9 +412,22 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I) ArgAttributes.push_back(InvokeAL.getParamAttributes(I)); + AttrBuilder FnAttrs(InvokeAL.getFnAttributes()); + if (FnAttrs.contains(Attribute::AllocSize)) { + // The allocsize attribute (if any) referes to parameters by index and needs + // to be adjusted. + unsigned SizeArg; + Optional NEltArg; + std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs(); + SizeArg += 1; + if (NEltArg.hasValue()) + NEltArg = NEltArg.getValue() + 1; + FnAttrs.addAllocSizeAttr(SizeArg, NEltArg); + } + // Reconstruct the AttributesList based on the vector we constructed. AttributeList NewCallAL = - AttributeList::get(C, InvokeAL.getFnAttributes(), + AttributeList::get(C, AttributeSet::get(C, FnAttrs), InvokeAL.getRetAttributes(), ArgAttributes); NewCall->setAttributes(NewCallAL); @@ -473,8 +466,8 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) { FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys, CalleeFTy->isVarArg()); - Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage, - InvokePrefix + Sig, M); + Function *F = + Function::Create(FTy, GlobalValue::ExternalLinkage, "__invoke_" + Sig, M); InvokeWrappers[Sig] = F; return F; } @@ -491,39 +484,44 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M, // and can't be passed by pointer. The result is a crash with illegal IR. if (isa(Callee)) return false; + StringRef CalleeName = Callee->getName(); // The reason we include malloc/free here is to exclude the malloc/free // calls generated in setjmp prep / cleanup routines. - Function *SetjmpF = M.getFunction("setjmp"); - Function *MallocF = M.getFunction("malloc"); - Function *FreeF = M.getFunction("free"); - if (Callee == SetjmpF || Callee == MallocF || Callee == FreeF) + if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free") return false; // There are functions in JS glue code - if (Callee == ResumeF || Callee == EHTypeIDF || Callee == SaveSetjmpF || - Callee == TestSetjmpF) + if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" || + CalleeName == "saveSetjmp" || CalleeName == "testSetjmp" || + CalleeName == "getTempRet0" || CalleeName == "setTempRet0") return false; // __cxa_find_matching_catch_N functions cannot longjmp - if (Callee->getName().startswith(FindMatchingCatchPrefix)) + if (Callee->getName().startswith("__cxa_find_matching_catch_")) return false; // Exception-catching related functions - Function *BeginCatchF = M.getFunction("__cxa_begin_catch"); - Function *EndCatchF = M.getFunction("__cxa_end_catch"); - Function *AllocExceptionF = M.getFunction("__cxa_allocate_exception"); - Function *ThrowF = M.getFunction("__cxa_throw"); - Function *TerminateF = M.getFunction("__clang_call_terminate"); - if (Callee == BeginCatchF || Callee == EndCatchF || - Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF || - Callee == GetTempRet0Func || Callee == SetTempRet0Func) + if (CalleeName == "__cxa_begin_catch" || CalleeName == "__cxa_end_catch" || + CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" || + CalleeName == "__clang_call_terminate") return false; // Otherwise we don't know return true; } +bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M, + const Value *Callee) const { + StringRef CalleeName = Callee->getName(); + // This is an exhaustive list from Emscripten's . + return CalleeName == "emscripten_asm_const_int" || + CalleeName == "emscripten_asm_const_double" || + CalleeName == "emscripten_asm_const_int_sync_on_main_thread" || + CalleeName == "emscripten_asm_const_double_sync_on_main_thread" || + CalleeName == "emscripten_asm_const_async_on_main_thread"; +} + // Generate testSetjmp function call seqence with preamble and postamble. // The code this generates is equivalent to the following JavaScript code: // if (%__THREW__.val != 0 & threwValue != 0) { @@ -605,15 +603,12 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { SSAUpdater SSA; for (BasicBlock &BB : F) { for (Instruction &I : BB) { + SSA.Initialize(I.getType(), I.getName()); + SSA.AddAvailableValue(&BB, &I); for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) { Use &U = *UI; ++UI; - SSA.Initialize(I.getType(), I.getName()); - SSA.AddAvailableValue(&BB, &I); auto *User = cast(U.getUser()); - if (User->getParent() == &BB) - continue; - if (auto *UserPN = dyn_cast(User)) if (UserPN->getIncomingBlock(U) == &BB) continue; @@ -660,13 +655,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { FunctionType *ResumeFTy = FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false); ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage, - ResumeFName, &M); + "__resumeException", &M); // Register llvm_eh_typeid_for function FunctionType *EHTypeIDTy = FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false); EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage, - EHTypeIDFName, &M); + "llvm_eh_typeid_for", &M); for (Function &F : M) { if (F.isDeclaration()) @@ -684,7 +679,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { // defined in JS code EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(), GlobalValue::ExternalLinkage, - EmLongjmpJmpbufFName, &M); + "emscripten_longjmp_jmpbuf", &M); LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF); } @@ -697,19 +692,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { IRB.getInt32Ty()}; FunctionType *FTy = FunctionType::get(Type::getInt32PtrTy(C), Params, false); - SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, - SaveSetjmpFName, &M); + SaveSetjmpF = + Function::Create(FTy, GlobalValue::ExternalLinkage, "saveSetjmp", &M); // Register testSetjmp function Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}; FTy = FunctionType::get(IRB.getInt32Ty(), Params, false); - TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, - TestSetjmpFName, &M); + TestSetjmpF = + Function::Create(FTy, GlobalValue::ExternalLinkage, "testSetjmp", &M); FTy = FunctionType::get(IRB.getVoidTy(), {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, - EmLongjmpFName, &M); + "emscripten_longjmp", &M); // Only traverse functions that uses setjmp in order not to insert // unnecessary prep / cleanup code in every function @@ -970,10 +965,16 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { const Value *Callee = CI->getCalledValue(); if (!canLongjmp(M, Callee)) continue; + if (isEmAsmCall(M, Callee)) + report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " + + F.getName() + + ". Please consider using EM_JS, or move the " + "EM_ASM into another function.", + false); Value *Threw = nullptr; BasicBlock *Tail; - if (Callee->getName().startswith(InvokePrefix)) { + if (Callee->getName().startswith("__invoke_")) { // If invoke wrapper has already been generated for this call in // previous EH phase, search for the load instruction // %__THREW__.val = __THREW__; diff --git a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp index 494d3fadbc8c..750b2233e67a 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp @@ -94,7 +94,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) { break; // Found a null terminator, skip the rest. Constant *Associated = CS->getOperand(2); - Associated = cast(Associated->stripPointerCastsNoFollowAliases()); + Associated = cast(Associated->stripPointerCasts()); DtorFuncs[PriorityValue][Associated].push_back(DtorFunc); } diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 288b991ae2c5..59c10243c545 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -79,7 +79,7 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( // Clang-provided symbols. if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 || strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 || - strcmp(Name, "__tls_size") == 0) { + strcmp(Name, "__tls_size") == 0 || strcmp(Name, "__tls_align") == 0) { bool Mutable = strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); @@ -115,7 +115,7 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( getLibcallSignature(Subtarget, Name, Returns, Params); } auto Signature = - make_unique(std::move(Returns), std::move(Params)); + std::make_unique(std::move(Returns), std::move(Params)); WasmSym->setSignature(Signature.get()); Printer.addSignature(std::move(Signature)); @@ -163,6 +163,21 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO, return MCOperand::createExpr(Expr); } +MCOperand WebAssemblyMCInstLower::lowerTypeIndexOperand( + SmallVector &&Returns, + SmallVector &&Params) const { + auto Signature = std::make_unique(std::move(Returns), + std::move(Params)); + MCSymbol *Sym = Printer.createTempSymbol("typeindex"); + auto *WasmSym = cast(Sym); + WasmSym->setSignature(Signature.get()); + Printer.addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = + MCSymbolRefExpr::create(WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx); + return MCOperand::createExpr(Expr); +} + // Return the WebAssembly type associated with the given register class. static wasm::ValType getType(const TargetRegisterClass *RC) { if (RC == &WebAssembly::I32RegClass) @@ -178,6 +193,16 @@ static wasm::ValType getType(const TargetRegisterClass *RC) { llvm_unreachable("Unexpected register class"); } +static void getFunctionReturns(const MachineInstr *MI, + SmallVectorImpl &Returns) { + const Function &F = MI->getMF()->getFunction(); + const TargetMachine &TM = MI->getMF()->getTarget(); + Type *RetTy = F.getReturnType(); + SmallVector CallerRetTys; + computeLegalValueVTs(F, TM, RetTy, CallerRetTys); + valTypesFromMVTs(CallerRetTys, Returns); +} + void WebAssemblyMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); @@ -208,8 +233,6 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, if (I < Desc.NumOperands) { const MCOperandInfo &Info = Desc.OpInfo[I]; if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) { - MCSymbol *Sym = Printer.createTempSymbol("typeindex"); - SmallVector Returns; SmallVector Params; @@ -226,17 +249,23 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, if (WebAssembly::isCallIndirect(MI->getOpcode())) Params.pop_back(); - auto *WasmSym = cast(Sym); - auto Signature = make_unique(std::move(Returns), - std::move(Params)); - WasmSym->setSignature(Signature.get()); - Printer.addSignature(std::move(Signature)); - WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + // return_call_indirect instructions have the return type of the + // caller + if (MI->getOpcode() == WebAssembly::RET_CALL_INDIRECT) + getFunctionReturns(MI, Returns); - const MCExpr *Expr = MCSymbolRefExpr::create( - WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx); - MCOp = MCOperand::createExpr(Expr); + MCOp = lowerTypeIndexOperand(std::move(Returns), std::move(Params)); break; + } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) { + auto BT = static_cast(MO.getImm()); + assert(BT != WebAssembly::BlockType::Invalid); + if (BT == WebAssembly::BlockType::Multivalue) { + SmallVector Returns; + getFunctionReturns(MI, Returns); + MCOp = lowerTypeIndexOperand(std::move(Returns), + SmallVector()); + break; + } } } MCOp = MCOperand::createImm(MO.getImm()); diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h index 2c375a01a7f5..d79c54097eb7 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Compiler.h" @@ -33,6 +34,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower { MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + MCOperand lowerTypeIndexOperand(SmallVector &&, + SmallVector &&) const; public: WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer) diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index d31c1226bfdb..e4cc2389147b 100644 --- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -49,10 +49,12 @@ void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F, computeLegalValueVTs(F, TM, Ty->getReturnType(), Results); MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()); - if (Results.size() > 1) { - // WebAssembly currently can't lower returns of multiple values without - // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So - // replace multiple return values with a pointer parameter. + if (Results.size() > 1 && + !TM.getSubtarget(F).hasMultivalue()) { + // WebAssembly can't lower returns of multiple values without demoting to + // sret unless multivalue is enabled (see + // WebAssemblyTargetLowering::CanLowerReturn). So replace multiple return + // values with a poitner parameter. Results.clear(); Params.push_back(PtrVT); } @@ -72,7 +74,7 @@ void llvm::valTypesFromMVTs(const ArrayRef &In, std::unique_ptr llvm::signatureFromMVTs(const SmallVectorImpl &Results, const SmallVectorImpl &Params) { - auto Sig = make_unique(); + auto Sig = std::make_unique(); valTypesFromMVTs(Results, Sig->Returns); valTypesFromMVTs(Params, Sig->Params); return Sig; diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 4b9ba491dee6..16e2f4392984 100644 --- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -96,13 +96,18 @@ public: void stackifyVReg(unsigned VReg) { assert(MF.getRegInfo().getUniqueVRegDef(VReg)); - auto I = TargetRegisterInfo::virtReg2Index(VReg); + auto I = Register::virtReg2Index(VReg); if (I >= VRegStackified.size()) VRegStackified.resize(I + 1); VRegStackified.set(I); } + void unstackifyVReg(unsigned VReg) { + auto I = Register::virtReg2Index(VReg); + if (I < VRegStackified.size()) + VRegStackified.reset(I); + } bool isVRegStackified(unsigned VReg) const { - auto I = TargetRegisterInfo::virtReg2Index(VReg); + auto I = Register::virtReg2Index(VReg); if (I >= VRegStackified.size()) return false; return VRegStackified.test(I); @@ -111,12 +116,12 @@ public: void initWARegs(); void setWAReg(unsigned VReg, unsigned WAReg) { assert(WAReg != UnusedReg); - auto I = TargetRegisterInfo::virtReg2Index(VReg); + auto I = Register::virtReg2Index(VReg); assert(I < WARegs.size()); WARegs[I] = WAReg; } unsigned getWAReg(unsigned VReg) const { - auto I = TargetRegisterInfo::virtReg2Index(VReg); + auto I = Register::virtReg2Index(VReg); assert(I < WARegs.size()); return WARegs[I]; } diff --git a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp index 7ac0511c28b0..ac428fcc826a 100644 --- a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp @@ -166,8 +166,8 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI, if (!LibInfo.getLibFunc(Name, Func)) return false; - unsigned FromReg = MI.getOperand(2).getReg(); - unsigned ToReg = MI.getOperand(0).getReg(); + Register FromReg = MI.getOperand(2).getReg(); + Register ToReg = MI.getOperand(0).getReg(); if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) report_fatal_error("Memory Intrinsic results: call to builtin function " "with wrong signature, from/to mismatch"); @@ -184,7 +184,8 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) { auto &MDT = getAnalysis(); const WebAssemblyTargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); - const auto &LibInfo = getAnalysis().getTLI(); + const auto &LibInfo = + getAnalysis().getTLI(MF.getFunction()); auto &LIS = getAnalysis(); bool Changed = false; diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp index 8c7c3305c201..0bd30791e57c 100644 --- a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -81,7 +81,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( // Split multiple-VN LiveIntervals into multiple LiveIntervals. SmallVector SplitLIs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp index d20352259e07..9b60596e42b4 100644 --- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -64,11 +64,8 @@ void OptimizeReturned::visitCallSite(CallSite CS) { if (isa(Arg)) continue; // Like replaceDominatedUsesWith but using Instruction/Use dominance. - for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) { - Use &U = *UI++; - if (DT->dominates(Inst, U)) - U.set(Inst); - } + Arg->replaceUsesWithIf(Inst, + [&](Use &U) { return DT->dominates(Inst, U); }); } } diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index e11cdeaa0e79..ea6cd09a604c 100644 --- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -63,7 +63,7 @@ static bool maybeRewriteToDrop(unsigned OldReg, unsigned NewReg, bool Changed = false; if (OldReg == NewReg) { Changed = true; - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); MO.setReg(NewReg); MO.setIsDead(); MFI.stackifyVReg(NewReg); @@ -75,9 +75,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, const MachineFunction &MF, WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI, - const WebAssemblyInstrInfo &TII, - unsigned FallthroughOpc, - unsigned CopyLocalOpc) { + const WebAssemblyInstrInfo &TII) { if (DisableWebAssemblyFallthroughReturnOpt) return false; if (&MBB != &MF.back()) @@ -90,13 +88,36 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, if (&MI != &*End) return false; - if (FallthroughOpc != WebAssembly::FALLTHROUGH_RETURN_VOID) { - // If the operand isn't stackified, insert a COPY to read the operand and - // stackify it. - MachineOperand &MO = MI.getOperand(0); - unsigned Reg = MO.getReg(); + for (auto &MO : MI.explicit_operands()) { + // If the operand isn't stackified, insert a COPY to read the operands and + // stackify them. + Register Reg = MO.getReg(); if (!MFI.isVRegStackified(Reg)) { - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + unsigned CopyLocalOpc; + const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); + switch (RegClass->getID()) { + case WebAssembly::I32RegClassID: + CopyLocalOpc = WebAssembly::COPY_I32; + break; + case WebAssembly::I64RegClassID: + CopyLocalOpc = WebAssembly::COPY_I64; + break; + case WebAssembly::F32RegClassID: + CopyLocalOpc = WebAssembly::COPY_F32; + break; + case WebAssembly::F64RegClassID: + CopyLocalOpc = WebAssembly::COPY_F64; + break; + case WebAssembly::V128RegClassID: + CopyLocalOpc = WebAssembly::COPY_V128; + break; + case WebAssembly::EXNREFRegClassID: + CopyLocalOpc = WebAssembly::COPY_EXNREF; + break; + default: + llvm_unreachable("Unexpected register class for return operand"); + } + Register NewReg = MRI.createVirtualRegister(RegClass); BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg) .addReg(Reg); MO.setReg(NewReg); @@ -104,8 +125,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, } } - // Rewrite the return. - MI.setDesc(TII.get(FallthroughOpc)); + MI.setDesc(TII.get(WebAssembly::FALLTHROUGH_RETURN)); return true; } @@ -120,7 +140,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { const auto &TII = *MF.getSubtarget().getInstrInfo(); const WebAssemblyTargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); - auto &LibInfo = getAnalysis().getTLI(); + auto &LibInfo = + getAnalysis().getTLI(MF.getFunction()); bool Changed = false; for (auto &MBB : MF) @@ -143,8 +164,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { report_fatal_error("Peephole: call to builtin function with " "wrong signature, not consuming reg"); MachineOperand &MO = MI.getOperand(0); - unsigned OldReg = MO.getReg(); - unsigned NewReg = Op2.getReg(); + Register OldReg = MO.getReg(); + Register NewReg = Op2.getReg(); if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg)) report_fatal_error("Peephole: call to builtin function with " @@ -156,60 +177,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { break; } // Optimize away an explicit void return at the end of the function. - case WebAssembly::RETURN_I32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32, - WebAssembly::COPY_I32); - break; - case WebAssembly::RETURN_I64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64, - WebAssembly::COPY_I64); - break; - case WebAssembly::RETURN_F32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32, - WebAssembly::COPY_F32); - break; - case WebAssembly::RETURN_F64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64, - WebAssembly::COPY_F64); - break; - case WebAssembly::RETURN_v16i8: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v16i8, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v8i16: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v8i16, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v4i32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v2i64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v4f32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v2f64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_VOID: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID, - WebAssembly::INSTRUCTION_LIST_END); + case WebAssembly::RETURN: + Changed |= maybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII); break; } diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 3bfbf607344d..799b9388097c 100644 --- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -95,7 +95,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction( // TODO: This is fairly heavy-handed; find a better approach. // for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); // Skip unused registers. if (MRI.use_nodbg_empty(Reg)) diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index 6f09c45b6642..043b6f1b7d18 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -98,7 +98,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Interesting register intervals:\n"); for (unsigned I = 0; I < NumVRegs; ++I) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(I); + unsigned VReg = Register::index2VirtReg(I); if (MFI.isVRegStackified(VReg)) continue; // Skip unused registers, which can use $drop. @@ -157,9 +157,8 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { Changed |= Old != New; UsedColors.set(Color); Assignments[Color].push_back(LI); - LLVM_DEBUG( - dbgs() << "Assigning vreg" << TargetRegisterInfo::virtReg2Index(LI->reg) - << " to vreg" << TargetRegisterInfo::virtReg2Index(New) << "\n"); + LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg) + << " to vreg" << Register::virtReg2Index(New) << "\n"); } if (!Changed) return false; diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index cdca23f55b29..72e7a7cf5042 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -89,7 +89,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { // Start the numbering for locals after the arg regs unsigned CurReg = MFI.getParams().size(); for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx); + unsigned VReg = Register::index2VirtReg(VRegIdx); // Skip unused registers. if (MRI.use_empty(VReg)) continue; diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index a120a6471014..421d353a89e8 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -120,7 +120,7 @@ static void convertImplicitDefToConstZero(MachineInstr *MI, Type::getDoubleTy(MF.getFunction().getContext()))); MI->addOperand(MachineOperand::CreateFPImm(Val)); } else if (RegClass == &WebAssembly::V128RegClass) { - unsigned TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32)); MI->addOperand(MachineOperand::CreateReg(TempReg, false)); MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), @@ -334,14 +334,14 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, for (const MachineOperand &MO : Def->operands()) { if (!MO.isReg() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If the register is dead here and at Insert, ignore it. if (MO.isDead() && Insert->definesRegister(Reg) && !Insert->readsRegister(Reg)) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions // from moving down, and we've already checked for that. if (Reg == WebAssembly::ARGUMENTS) @@ -436,8 +436,8 @@ static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse, const MachineOperand &MO = UseInst->getOperand(0); if (!MO.isReg()) return false; - unsigned DefReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg) || + Register DefReg = MO.getReg(); + if (!Register::isVirtualRegister(DefReg) || !MFI.isVRegStackified(DefReg)) return false; assert(MRI.hasOneNonDBGUse(DefReg)); @@ -499,7 +499,7 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op, } else { // The register may have unrelated uses or defs; create a new register for // just our one def and use so that we can stackify it. - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); Def->getOperand(0).setReg(NewReg); Op.setReg(NewReg); @@ -535,7 +535,7 @@ static MachineInstr *rematerializeCheapDef( WebAssemblyDebugValueManager DefDIs(&Def); - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI); Op.setReg(NewReg); MachineInstr *Clone = &*std::prev(Insert); @@ -607,8 +607,8 @@ static MachineInstr *moveAndTeeForMultiUse( // Create the Tee and attach the registers. const auto *RegClass = MRI.getRegClass(Reg); - unsigned TeeReg = MRI.createVirtualRegister(RegClass); - unsigned DefReg = MRI.createVirtualRegister(RegClass); + Register TeeReg = MRI.createVirtualRegister(RegClass); + Register DefReg = MRI.createVirtualRegister(RegClass); MachineOperand &DefMO = Def->getOperand(0); MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(), TII->get(getTeeOpcode(RegClass)), TeeReg) @@ -807,11 +807,11 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (!Op.isReg()) continue; - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); assert(Op.isUse() && "explicit_uses() should only iterate over uses"); assert(!Op.isImplicit() && "explicit_uses() should only iterate over explicit operands"); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // Identify the definition for this register at this point. @@ -915,7 +915,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { for (MachineOperand &MO : reverse(MI.explicit_operands())) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MFI.isVRegStackified(Reg)) { if (MO.isDef()) diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index ea9cfc00adfd..789a025794ea 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -91,8 +91,8 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( if (MI.getOpcode() == WebAssembly::ADD_I32) { MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum); if (OtherMO.isReg()) { - unsigned OtherMOReg = OtherMO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(OtherMOReg)) { + Register OtherMOReg = OtherMO.getReg(); + if (Register::isVirtualRegister(OtherMOReg)) { MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg); // TODO: For now we just opportunistically do this in the case where // the CONST_I32 happens to have exactly one def and one use. We @@ -117,7 +117,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( // Create i32.add SP, offset and make it the operand. const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); - unsigned OffsetOp = MRI.createVirtualRegister(PtrRC); + Register OffsetOp = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetOp) .addImm(FrameOffset); diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 7e65368e671a..bdf5fe2620a4 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -140,7 +140,7 @@ WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU, std::string FS) const { auto &I = SubtargetMap[CPU + FS]; if (!I) { - I = llvm::make_unique(TargetTriple, CPU, FS, *this); + I = std::make_unique(TargetTriple, CPU, FS, *this); } return I.get(); } diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 46ef765ce0f4..1c53e90daea7 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -25,10 +25,11 @@ WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { return TargetTransformInfo::PSK_FastHardware; } -unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) { - unsigned Result = BaseT::getNumberOfRegisters(Vector); +unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + unsigned Result = BaseT::getNumberOfRegisters(ClassID); // For SIMD, use at least 16 registers, as a rough guess. + bool Vector = (ClassID == 1); if (Vector) Result = std::max(Result, 16u); diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 1b11b4b631eb..f0ecc73e91de 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -53,7 +53,7 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index e9d88d4818a5..a237da8154ab 100644 --- a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -32,9 +32,8 @@ bool WebAssembly::isChild(const MachineInstr &MI, const MachineOperand &MO = MI.getOperand(0); if (!MO.isReg() || MO.isImplicit() || !MO.isDef()) return false; - unsigned Reg = MO.getReg(); - return TargetRegisterInfo::isVirtualRegister(Reg) && - MFI.isVRegStackified(Reg); + Register Reg = MO.getReg(); + return Register::isVirtualRegister(Reg) && MFI.isVRegStackified(Reg); } bool WebAssembly::mayThrow(const MachineInstr &MI) { @@ -51,7 +50,21 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) { return false; const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode())); - assert(MO.isGlobal()); + assert(MO.isGlobal() || MO.isSymbol()); + + if (MO.isSymbol()) { + // Some intrinsics are lowered to calls to external symbols, which are then + // lowered to calls to library functions. Most of libcalls don't throw, but + // we only list some of them here now. + // TODO Consider adding 'nounwind' info in TargetLowering::CallLoweringInfo + // instead for more accurate info. + const char *Name = MO.getSymbolName(); + if (strcmp(Name, "memcpy") == 0 || strcmp(Name, "memmove") == 0 || + strcmp(Name, "memset") == 0) + return false; + return true; + } + const auto *F = dyn_cast(MO.getGlobal()); if (!F) return true; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 95cbf46d37ed..25be79ec2b1e 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -870,6 +870,14 @@ private: bool parseDirectiveFPOEndProc(SMLoc L); bool parseDirectiveFPOData(SMLoc L); + /// SEH directives. + bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo); + bool parseDirectiveSEHPushReg(SMLoc); + bool parseDirectiveSEHSetFrame(SMLoc); + bool parseDirectiveSEHSaveReg(SMLoc); + bool parseDirectiveSEHSaveXMM(SMLoc); + bool parseDirectiveSEHPushFrame(SMLoc); + unsigned checkTargetMatchPredicate(MCInst &Inst) override; bool validateInstruction(MCInst &Inst, const OperandVector &Ops); @@ -955,6 +963,8 @@ private: public: enum X86MatchResultTy { Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "X86GenAsmMatcher.inc" }; X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, @@ -3173,6 +3183,13 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, EmitInstruction(Inst, Operands, Out); Opcode = Inst.getOpcode(); return false; + case Match_InvalidImmUnsignedi4: { + SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", + EmptyRange, MatchingInlineAsm); + } case Match_MissingFeature: return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); case Match_InvalidOperand: @@ -3520,6 +3537,15 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MatchingInlineAsm); } + if (std::count(std::begin(Match), std::end(Match), + Match_InvalidImmUnsignedi4) == 1) { + SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", + EmptyRange, MatchingInlineAsm); + } + // If all of these were an outright failure, report it in a useless way. return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, MatchingInlineAsm); @@ -3572,6 +3598,16 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); else if (IDVal == ".cv_fpo_endproc") return parseDirectiveFPOEndProc(DirectiveID.getLoc()); + else if (IDVal == ".seh_pushreg") + return parseDirectiveSEHPushReg(DirectiveID.getLoc()); + else if (IDVal == ".seh_setframe") + return parseDirectiveSEHSetFrame(DirectiveID.getLoc()); + else if (IDVal == ".seh_savereg") + return parseDirectiveSEHSaveReg(DirectiveID.getLoc()); + else if (IDVal == ".seh_savexmm") + return parseDirectiveSEHSaveXMM(DirectiveID.getLoc()); + else if (IDVal == ".seh_pushframe") + return parseDirectiveSEHPushFrame(DirectiveID.getLoc()); return true; } @@ -3708,6 +3744,140 @@ bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { return getTargetStreamer().emitFPOEndProc(L); } +bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID, + unsigned &RegNo) { + SMLoc startLoc = getLexer().getLoc(); + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); + + // Try parsing the argument as a register first. + if (getLexer().getTok().isNot(AsmToken::Integer)) { + SMLoc endLoc; + if (ParseRegister(RegNo, startLoc, endLoc)) + return true; + + if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) { + return Error(startLoc, + "register is not supported for use with this directive"); + } + } else { + // Otherwise, an integer number matching the encoding of the desired + // register may appear. + int64_t EncodedReg; + if (getParser().parseAbsoluteExpression(EncodedReg)) + return true; + + // The SEH register number is the same as the encoding register number. Map + // from the encoding back to the LLVM register number. + RegNo = 0; + for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) { + if (MRI->getEncodingValue(Reg) == EncodedReg) { + RegNo = Reg; + break; + } + } + if (RegNo == 0) { + return Error(startLoc, + "incorrect register number for use with this directive"); + } + } + + return false; +} + +bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { + unsigned Reg = 0; + if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFIPushReg(Reg, Loc); + return false; +} + +bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { + unsigned Reg = 0; + int64_t Off; + if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) + return true; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("you must specify a stack pointer offset"); + + getParser().Lex(); + if (getParser().parseAbsoluteExpression(Off)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFISetFrame(Reg, Off, Loc); + return false; +} + +bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { + unsigned Reg = 0; + int64_t Off; + if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) + return true; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("you must specify an offset on the stack"); + + getParser().Lex(); + if (getParser().parseAbsoluteExpression(Off)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFISaveReg(Reg, Off, Loc); + return false; +} + +bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { + unsigned Reg = 0; + int64_t Off; + if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg)) + return true; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("you must specify an offset on the stack"); + + getParser().Lex(); + if (getParser().parseAbsoluteExpression(Off)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc); + return false; +} + +bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { + bool Code = false; + StringRef CodeID; + if (getLexer().is(AsmToken::At)) { + SMLoc startLoc = getLexer().getLoc(); + getParser().Lex(); + if (!getParser().parseIdentifier(CodeID)) { + if (CodeID != "code") + return Error(startLoc, "expected @code"); + Code = true; + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFIPushFrame(Code, Loc); + return false; +} + // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { RegisterMCAsmParser X(getTheX86_32Target()); diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h index 5bc979d1f18c..e9be28ca77b0 100644 --- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h +++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h @@ -35,6 +35,10 @@ inline bool isImmUnsignedi8Value(uint64_t Value) { return isUInt<8>(Value) || isInt<8>(Value); } +inline bool isImmUnsignedi4Value(uint64_t Value) { + return isUInt<4>(Value); +} + } // End of namespace llvm #endif diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index a771ba366318..3a76d023e640 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -260,6 +260,15 @@ struct X86Operand final : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } + bool isImmUnsignedi4() const { + if (!isImm()) return false; + // If this isn't a constant expr, reject it. The immediate byte is shared + // with a register encoding. We can't have it affected by a relocation. + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + return isImmUnsignedi4Value(CE->getValue()); + } + bool isImmUnsignedi8() const { if (!isImm()) return false; // If this isn't a constant expr, just assume it fits and let relaxation @@ -491,7 +500,7 @@ struct X86Operand final : public MCParsedAsmOperand { void addGR32orGR64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - unsigned RegNo = getReg(); + MCRegister RegNo = getReg(); if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo)) RegNo = getX86SubSuperRegister(RegNo, 32); Inst.addOperand(MCOperand::createReg(RegNo)); @@ -572,7 +581,7 @@ struct X86Operand final : public MCParsedAsmOperand { static std::unique_ptr CreateToken(StringRef Str, SMLoc Loc) { SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); - auto Res = llvm::make_unique(Token, Loc, EndLoc); + auto Res = std::make_unique(Token, Loc, EndLoc); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); return Res; @@ -582,7 +591,7 @@ struct X86Operand final : public MCParsedAsmOperand { CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc(), StringRef SymName = StringRef(), void *OpDecl = nullptr) { - auto Res = llvm::make_unique(Register, StartLoc, EndLoc); + auto Res = std::make_unique(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; @@ -593,19 +602,19 @@ struct X86Operand final : public MCParsedAsmOperand { static std::unique_ptr CreateDXReg(SMLoc StartLoc, SMLoc EndLoc) { - return llvm::make_unique(DXRegister, StartLoc, EndLoc); + return std::make_unique(DXRegister, StartLoc, EndLoc); } static std::unique_ptr CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc) { - auto Res = llvm::make_unique(Prefix, StartLoc, EndLoc); + auto Res = std::make_unique(Prefix, StartLoc, EndLoc); Res->Pref.Prefixes = Prefixes; return Res; } static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc) { - auto Res = llvm::make_unique(Immediate, StartLoc, EndLoc); + auto Res = std::make_unique(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; return Res; } @@ -615,7 +624,7 @@ struct X86Operand final : public MCParsedAsmOperand { CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(), void *OpDecl = nullptr, unsigned FrontendSize = 0) { - auto Res = llvm::make_unique(Memory, StartLoc, EndLoc); + auto Res = std::make_unique(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; Res->Mem.BaseReg = 0; @@ -643,7 +652,7 @@ struct X86Operand final : public MCParsedAsmOperand { // The scale should always be one of {1,2,4,8}. assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && "Invalid scale!"); - auto Res = llvm::make_unique(Memory, StartLoc, EndLoc); + auto Res = std::make_unique(Memory, StartLoc, EndLoc); Res->Mem.SegReg = SegReg; Res->Mem.Disp = Disp; Res->Mem.BaseReg = BaseReg; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index a241362a271d..e287f6625115 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "X86DisassemblerDecoder.h" +#include "llvm/ADT/StringRef.h" + #include /* for va_*() */ #include /* for vsnprintf() */ #include /* for exit() */ #include /* for memset() */ -#include "X86DisassemblerDecoder.h" - using namespace llvm::X86Disassembler; /// Specifies whether a ModR/M byte is needed and (if so) which diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 54413fa1a02f..f08fcb575bf0 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -287,7 +287,7 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // Relax if the value is too big for a (signed) i8. - return int64_t(Value) != int64_t(int8_t(Value)); + return !isInt<8>(Value); } // FIXME: Can tblgen help at all here to verify there aren't other instructions @@ -557,7 +557,7 @@ protected: // If the frame pointer is other than esp/rsp, we do not have a way to // generate a compact unwinding representation, so bail out. - if (MRI.getLLVMRegNum(Inst.getRegister(), true) != + if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != (Is64Bit ? X86::RBP : X86::EBP)) return 0; @@ -605,7 +605,7 @@ protected: // unwind encoding. return CU::UNWIND_MODE_DWARF; - unsigned Reg = MRI.getLLVMRegNum(Inst.getRegister(), true); + unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); SavedRegs[SavedRegIdx++] = Reg; StackAdjust += OffsetSize; InstrOffset += PushInstrSize(Reg); diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 232a06593238..bd009da60851 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -46,10 +46,10 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, enum X86_64RelType { RT64_NONE, RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 }; -static X86_64RelType getType64(unsigned Kind, +static X86_64RelType getType64(MCFixupKind Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel) { - switch (Kind) { + switch (unsigned(Kind)) { default: llvm_unreachable("Unimplemented"); case FK_NONE: @@ -97,7 +97,7 @@ static void checkIs32(MCContext &Ctx, SMLoc Loc, X86_64RelType Type) { static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc, MCSymbolRefExpr::VariantKind Modifier, X86_64RelType Type, bool IsPCRel, - unsigned Kind) { + MCFixupKind Kind) { switch (Modifier) { default: llvm_unreachable("Unimplemented"); @@ -202,7 +202,7 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc, // and we want to keep back-compatibility. if (!Ctx.getAsmInfo()->canRelaxRelocations()) return ELF::R_X86_64_GOTPCREL; - switch (Kind) { + switch (unsigned(Kind)) { default: return ELF::R_X86_64_GOTPCREL; case X86::reloc_riprel_4byte_relax: @@ -237,7 +237,7 @@ static X86_32RelType getType32(X86_64RelType T) { static unsigned getRelocType32(MCContext &Ctx, MCSymbolRefExpr::VariantKind Modifier, X86_32RelType Type, bool IsPCRel, - unsigned Kind) { + MCFixupKind Kind) { switch (Modifier) { default: llvm_unreachable("Unimplemented"); @@ -265,8 +265,9 @@ static unsigned getRelocType32(MCContext &Ctx, if (!Ctx.getAsmInfo()->canRelaxRelocations()) return ELF::R_386_GOT32; - return Kind == X86::reloc_signed_4byte_relax ? ELF::R_386_GOT32X - : ELF::R_386_GOT32; + return Kind == MCFixupKind(X86::reloc_signed_4byte_relax) + ? ELF::R_386_GOT32X + : ELF::R_386_GOT32; case MCSymbolRefExpr::VK_GOTOFF: assert(Type == RT32_32); assert(!IsPCRel); @@ -317,7 +318,7 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); - unsigned Kind = Fixup.getKind(); + MCFixupKind Kind = Fixup.getKind(); X86_64RelType Type = getType64(Kind, Modifier, IsPCRel); if (getEMachine() == ELF::EM_X86_64) return getRelocType64(Ctx, Fixup.getLoc(), Modifier, Type, IsPCRel, Kind); @@ -329,5 +330,5 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, std::unique_ptr llvm::createX86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine) { - return llvm::make_unique(IsELF64, OSABI, EMachine); + return std::make_unique(IsELF64, OSABI, EMachine); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index e1125c176b25..d986c829d98e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -163,5 +163,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { TextAlignFillValue = 0x90; + AllowAtInName = true; + UseIntegratedAssembler = true; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 31d26d08a63f..ac36bf3a12fa 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -862,6 +862,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_B = ~(BaseRegEnc >> 3) & 1; unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg); VEX_X = ~(IndexRegEnc >> 3) & 1; + if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV. + EVEX_V2 = ~(IndexRegEnc >> 4) & 1; + break; } case X86II::MRMSrcReg: { diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index ce05ad974507..ced9eacc8b97 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -70,6 +70,10 @@ unsigned X86_MC::getDwarfRegFlavour(const Triple &TT, bool isEH) { return DWARFFlavour::X86_32_Generic; } +bool X86_MC::hasLockPrefix(const MCInst &MI) { + return MI.getFlags() & X86::IP_HAS_LOCK; +} + void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) { // FIXME: TableGen these. for (unsigned Reg = X86::NoRegister + 1; Reg < X86::NUM_TARGET_REGS; ++Reg) { @@ -399,6 +403,9 @@ public: findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, uint64_t GotSectionVA, const Triple &TargetTriple) const override; + Optional evaluateMemoryOperandAddress(const MCInst &Inst, + uint64_t Addr, + uint64_t Size) const override; }; #define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS @@ -511,7 +518,31 @@ std::vector> X86MCInstrAnalysis::findPltEntries( return findX86_64PltEntries(PltSectionVA, PltContents); default: return {}; - } + } +} + +Optional X86MCInstrAnalysis::evaluateMemoryOperandAddress( + const MCInst &Inst, uint64_t Addr, uint64_t Size) const { + const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); + int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags); + if (MemOpStart == -1) + return None; + MemOpStart += X86II::getOperandBias(MCID); + + const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg); + const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg); + const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg); + const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt); + const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp); + if (SegReg.getReg() != 0 || IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || + !Disp.isImm()) + return None; + + // RIP-relative addressing. + if (BaseReg.getReg() == X86::RIP) + return Addr + Size + Disp.getImm(); + + return None; } } // end of namespace X86_MC @@ -567,13 +598,13 @@ extern "C" void LLVMInitializeX86TargetMC() { createX86_64AsmBackend); } -unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, - bool High) { +MCRegister llvm::getX86SubSuperRegisterOrZero(MCRegister Reg, unsigned Size, + bool High) { switch (Size) { - default: return 0; + default: return X86::NoRegister; case 8: if (High) { - switch (Reg) { + switch (Reg.id()) { default: return getX86SubSuperRegisterOrZero(Reg, 64); case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: return X86::SI; @@ -593,8 +624,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, return X86::BH; } } else { - switch (Reg) { - default: return 0; + switch (Reg.id()) { + default: return X86::NoRegister; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AL; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -630,8 +661,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, } } case 16: - switch (Reg) { - default: return 0; + switch (Reg.id()) { + default: return X86::NoRegister; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -666,8 +697,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, return X86::R15W; } case 32: - switch (Reg) { - default: return 0; + switch (Reg.id()) { + default: return X86::NoRegister; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::EAX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -702,7 +733,7 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, return X86::R15D; } case 64: - switch (Reg) { + switch (Reg.id()) { default: return 0; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::RAX; @@ -740,9 +771,9 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, } } -unsigned llvm::getX86SubSuperRegister(unsigned Reg, unsigned Size, bool High) { - unsigned Res = getX86SubSuperRegisterOrZero(Reg, Size, High); - assert(Res != 0 && "Unexpected register or VT"); +MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High) { + MCRegister Res = getX86SubSuperRegisterOrZero(Reg, Size, High); + assert(Res != X86::NoRegister && "Unexpected register or VT"); return Res; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 00dd5908cbf5..0c789061f0e1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H #define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/DataTypes.h" #include @@ -57,6 +58,10 @@ unsigned getDwarfRegFlavour(const Triple &TT, bool isEH); void initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI); + +/// Returns true if this instruction has a LOCK prefix. +bool hasLockPrefix(const MCInst &MI); + /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc. /// do not need to go through TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU, @@ -111,12 +116,12 @@ createX86WinCOFFObjectWriter(bool Is64Bit); /// Returns the sub or super register of a specific X86 register. /// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX. /// Aborts on error. -unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false); +MCRegister getX86SubSuperRegister(MCRegister, unsigned, bool High=false); /// Returns the sub or super register of a specific X86 register. /// Like getX86SubSuperRegister() but returns 0 on error. -unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned, - bool High = false); +MCRegister getX86SubSuperRegisterOrZero(MCRegister, unsigned, + bool High = false); } // End llvm namespace diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index fc7e99f61e5e..b67a7508fe72 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -276,7 +276,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( // x86_64 distinguishes movq foo@GOTPCREL so that the linker can // rewrite the movq to an leaq at link time if the symbol ends up in // the same linkage unit. - if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) + if (Fixup.getTargetKind() == X86::reloc_riprel_4byte_movq_load) Type = MachO::X86_64_RELOC_GOT_LOAD; else Type = MachO::X86_64_RELOC_GOT; @@ -339,8 +339,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( return; } else { Type = MachO::X86_64_RELOC_UNSIGNED; - unsigned Kind = Fixup.getKind(); - if (Kind == X86::reloc_signed_4byte) { + if (Fixup.getTargetKind() == X86::reloc_signed_4byte) { Asm.getContext().reportError( Fixup.getLoc(), "32-bit absolute addressing is not supported in 64-bit mode"); @@ -600,5 +599,5 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, std::unique_ptr llvm::createX86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { - return llvm::make_unique(Is64Bit, CPUType, CPUSubtype); + return std::make_unique(Is64Bit, CPUType, CPUSubtype); } diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 3baab9da1c41..760239f76505 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -109,5 +109,5 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createX86WinCOFFObjectWriter(bool Is64Bit) { - return llvm::make_unique(Is64Bit); + return std::make_unique(Is64Bit); } diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index 796a27a17255..db624378d517 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -35,8 +35,9 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { MCStreamer::EmitWinEHHandlerData(Loc); // We have to emit the unwind info now, because this directive - // actually switches to the .xdata section! - EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo()); + // actually switches to the .xdata section. + if (WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo()) + EHStreamer.EmitUnwindInfo(*this, CurFrame); } void X86WinCOFFStreamer::EmitWindowsUnwindTables() { diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp index e9987d1f62bd..d5494ef12370 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp @@ -170,7 +170,7 @@ bool X86WinCOFFTargetStreamer::emitFPOProc(const MCSymbol *ProcSym, L, "opening new .cv_fpo_proc before closing previous frame"); return true; } - CurFPOData = llvm::make_unique(); + CurFPOData = std::make_unique(); CurFPOData->Function = ProcSym; CurFPOData->Begin = emitFPOLabel(); CurFPOData->ParamsSize = ParamsSize; diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index a95f68434d12..6840fc12751d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -81,6 +81,12 @@ FunctionPass *createX86FlagsCopyLoweringPass(); /// Return a pass that expands WinAlloca pseudo-instructions. FunctionPass *createX86WinAllocaExpander(); +/// Return a pass that inserts int3 at the end of the function if it ends with a +/// CALL instruction. The pass does the same for each funclet as well. This +/// ensures that the open interval of function start and end PCs contains all +/// return addresses for the benefit of the Windows x64 unwinder. +FunctionPass *createX86AvoidTrailingCallPass(); + /// Return a pass that optimizes the code-size of x86 call sequences. This is /// done by replacing esp-relative movs with pushes. FunctionPass *createX86CallFrameOptimization(); @@ -137,13 +143,13 @@ void initializeWinEHStatePassPass(PassRegistry &); void initializeX86AvoidSFBPassPass(PassRegistry &); void initializeX86CallFrameOptimizationPass(PassRegistry &); void initializeX86CmovConverterPassPass(PassRegistry &); -void initializeX86ExpandPseudoPass(PassRegistry&); void initializeX86CondBrFoldingPassPass(PassRegistry &); void initializeX86DomainReassignmentPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); +void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); - } // End llvm namespace #endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 3112f00c91f2..d8631aca2734 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions">; def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", - "64-bit with cmpxchg16b">; + "64-bit with cmpxchg16b", + [FeatureCMPXCHG8B]>; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", @@ -240,8 +241,11 @@ def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", "Enable Cache Demote">; def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", "Support ptwrite instruction">; -def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", - "Support MPX instructions">; +// FIXME: This feature is deprecated in 10.0 and should not be used for +// anything, but removing it would break IR files that may contain it in a +// target-feature attribute. +def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false", + "Deprecated. Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", @@ -374,6 +378,10 @@ def FeatureHasFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", "Indicates if gather is reasonably fast">; +def FeaturePrefer128Bit + : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", + "Prefer 128-bit AVX instructions">; + def FeaturePrefer256Bit : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", "Prefer 256-bit AVX instructions">; @@ -449,6 +457,10 @@ def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch", "Merge branches to a three-way " "conditional branch">; +// Enable use of alias analysis during code generation. +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", + "Use alias analysis during codegen">; + // Bonnell def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; // Silvermont @@ -579,7 +591,6 @@ def ProcessorFeatures { // Skylake list SKLAdditionalFeatures = [FeatureAES, - FeatureMPX, FeatureXSAVEC, FeatureXSAVES, FeatureCLFLUSHOPT, @@ -594,6 +605,7 @@ def ProcessorFeatures { // Skylake-AVX512 list SKXAdditionalFeatures = [FeatureAVX512, + FeaturePrefer256Bit, FeatureCDI, FeatureDQI, FeatureBWI, @@ -627,6 +639,7 @@ def ProcessorFeatures { // Cannonlake list CNLAdditionalFeatures = [FeatureAVX512, + FeaturePrefer256Bit, FeatureCDI, FeatureDQI, FeatureBWI, @@ -665,6 +678,17 @@ def ProcessorFeatures { list ICXFeatures = !listconcat(ICLInheritableFeatures, ICXSpecificFeatures); + //Tigerlake + list TGLAdditionalFeatures = [FeatureVP2INTERSECT, + FeatureMOVDIRI, + FeatureMOVDIR64B, + FeatureSHSTK]; + list TGLSpecificFeatures = [FeatureHasFastGather]; + list TGLInheritableFeatures = + !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures); + list TGLFeatures = + !listconcat(ICLFeatures, TGLInheritableFeatures ); + // Atom list AtomInheritableFeatures = [FeatureX87, FeatureCMPXCHG8B, @@ -707,7 +731,6 @@ def ProcessorFeatures { // Goldmont list GLMAdditionalFeatures = [FeatureAES, - FeatureMPX, FeatureSHA, FeatureRDSEED, FeatureXSAVE, @@ -786,6 +809,22 @@ def ProcessorFeatures { list KNMFeatures = !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); + // Barcelona + list BarcelonaInheritableFeatures = [FeatureX87, + FeatureCMPXCHG8B, + FeatureSSE4A, + Feature3DNowA, + FeatureFXSR, + FeatureNOPL, + FeatureCMPXCHG16B, + FeatureLZCNT, + FeaturePOPCNT, + FeatureSlowSHLD, + FeatureLAHFSAHF, + FeatureCMOV, + Feature64Bit, + FeatureFastScalarShiftMasks]; + list BarcelonaFeatures = BarcelonaInheritableFeatures; // Bobcat list BtVer1InheritableFeatures = [FeatureX87, @@ -1093,6 +1132,8 @@ def : ProcessorModel<"icelake-client", SkylakeServerModel, ProcessorFeatures.ICLFeatures>; def : ProcessorModel<"icelake-server", SkylakeServerModel, ProcessorFeatures.ICXFeatures>; +def : ProcessorModel<"tigerlake", SkylakeServerModel, + ProcessorFeatures.TGLFeatures>; // AMD CPUs. @@ -1129,10 +1170,7 @@ foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { } foreach P = ["amdfam10", "barcelona"] in { - def : Proc; + def : Proc; } // Bobcat diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 80120722e0e6..8d27be30a277 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -242,7 +242,7 @@ void X86AsmPrinter::PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo, return PrintOperand(MI, OpNo, O); if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) O << '%'; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (strncmp(Modifier, "subreg", strlen("subreg")) == 0) { unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 : (strcmp(Modifier+6,"32") == 0) ? 32 : @@ -388,7 +388,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI, static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, char Mode, raw_ostream &O) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); bool EmitPercent = true; if (!X86::GR8RegClass.contains(Reg) && @@ -575,7 +575,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { // Emitting note header. int WordSize = TT.isArch64Bit() ? 8 : 4; - EmitAlignment(WordSize == 4 ? 2 : 3); + EmitAlignment(WordSize == 4 ? Align(4) : Align(8)); OutStreamer->EmitIntValue(4, 4 /*size*/); // data size for "GNU\0" OutStreamer->EmitIntValue(8 + WordSize, 4 /*size*/); // Elf_Prop size OutStreamer->EmitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4 /*size*/); @@ -585,7 +585,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_X86_FEATURE_1_AND, 4); OutStreamer->EmitIntValue(4, 4); // data size OutStreamer->EmitIntValue(FeatureFlagsAnd, 4); // data - EmitAlignment(WordSize == 4 ? 2 : 3); // padding + EmitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding OutStreamer->endSection(Nt); OutStreamer->SwitchSection(Cur); diff --git a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index 3dcc1015dc7c..69c6b3356cbb 100644 --- a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -35,6 +35,7 @@ #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -390,7 +391,7 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode, MachineMemOperand *LMMO = *LoadInst->memoperands_begin(); MachineMemOperand *SMMO = *StoreInst->memoperands_begin(); - unsigned Reg1 = MRI->createVirtualRegister( + Register Reg1 = MRI->createVirtualRegister( TII->getRegClass(TII->get(NLoadOpcode), 0, TRI, *(MBB->getParent()))); MachineInstr *NewLoad = BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode), diff --git a/lib/Target/X86/X86AvoidTrailingCall.cpp b/lib/Target/X86/X86AvoidTrailingCall.cpp new file mode 100644 index 000000000000..fb4f9e2901dc --- /dev/null +++ b/lib/Target/X86/X86AvoidTrailingCall.cpp @@ -0,0 +1,108 @@ +//===----- X86AvoidTrailingCall.cpp - Insert int3 after trailing calls ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The Windows x64 unwinder has trouble unwinding the stack when a return +// address points to the end of the function. This pass maintains the invariant +// that every return address is inside the bounds of its parent function or +// funclet by inserting int3 if the last instruction would otherwise be a call. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +#define DEBUG_TYPE "x86-avoid-trailing-call" + +using namespace llvm; + +namespace { + +class X86AvoidTrailingCallPass : public MachineFunctionPass { +public: + X86AvoidTrailingCallPass() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + StringRef getPassName() const override { + return "X86 avoid trailing call pass"; + } + static char ID; +}; + +char X86AvoidTrailingCallPass::ID = 0; + +} // end anonymous namespace + +FunctionPass *llvm::createX86AvoidTrailingCallPass() { + return new X86AvoidTrailingCallPass(); +} + +// A real instruction is a non-meta, non-pseudo instruction. Some pseudos +// expand to nothing, and some expand to code. This logic conservatively assumes +// they might expand to nothing. +static bool isRealInstruction(MachineInstr &MI) { + return !MI.isPseudo() && !MI.isMetaInstruction(); +} + +// Return true if this is a call instruction, but not a tail call. +static bool isCallInstruction(const MachineInstr &MI) { + return MI.isCall() && !MI.isReturn(); +} + +bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) { + const X86Subtarget &STI = MF.getSubtarget(); + const X86InstrInfo &TII = *STI.getInstrInfo(); + assert(STI.isTargetWin64() && "pass only runs on Win64"); + + // FIXME: Perhaps this pass should also replace SEH_Epilogue by inserting nops + // before epilogues. + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + // Look for basic blocks that precede funclet entries or are at the end of + // the function. + MachineBasicBlock *NextMBB = MBB.getNextNode(); + if (NextMBB && !NextMBB->isEHFuncletEntry()) + continue; + + // Find the last real instruction in this block, or previous blocks if this + // block is empty. + MachineBasicBlock::reverse_iterator LastRealInstr; + for (MachineBasicBlock &RMBB : + make_range(MBB.getReverseIterator(), MF.rend())) { + LastRealInstr = llvm::find_if(reverse(RMBB), isRealInstruction); + if (LastRealInstr != RMBB.rend()) + break; + } + + // Do nothing if this function or funclet has no instructions. + if (LastRealInstr == MF.begin()->rend()) + continue; + + // If this is a call instruction, insert int3 right after it with the same + // DebugLoc. Convert back to a forward iterator and advance the insertion + // position once. + if (isCallInstruction(*LastRealInstr)) { + LLVM_DEBUG({ + dbgs() << "inserting int3 after trailing call instruction:\n"; + LastRealInstr->dump(); + dbgs() << '\n'; + }); + + MachineBasicBlock::iterator MBBI = std::next(LastRealInstr.getReverse()); + BuildMI(*LastRealInstr->getParent(), MBBI, LastRealInstr->getDebugLoc(), + TII.get(X86::INT3)); + Changed = true; + } + } + + return Changed; +} diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 4df849a2e14c..ad7e32b4efc8 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -155,12 +155,22 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { // This is bad, and breaks SP adjustment. // So, check that all of the frames in the function are closed inside // the same block, and, for good measure, that there are no nested frames. + // + // If any call allocates more argument stack memory than the stack + // probe size, don't do this optimization. Otherwise, this pass + // would need to synthesize additional stack probe calls to allocate + // memory for arguments. unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + bool UseStackProbe = + !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty(); + unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF); for (MachineBasicBlock &BB : MF) { bool InsideFrameSequence = false; for (MachineInstr &MI : BB) { if (MI.getOpcode() == FrameSetupOpcode) { + if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe) + return false; if (InsideFrameSequence) return false; InsideFrameSequence = true; @@ -325,8 +335,8 @@ X86CallFrameOptimization::classifyInstruction( for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned int Reg = MO.getReg(); - if (!RegInfo.isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister())) return Exit; @@ -370,7 +380,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, while (I->getOpcode() == X86::LEA32r || I->isDebugInstr()) ++I; - unsigned StackPtr = RegInfo.getStackRegister(); + Register StackPtr = RegInfo.getStackRegister(); auto StackPtrCopyInst = MBB.end(); // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual // register. If it's there, use that virtual register as stack pointer @@ -443,8 +453,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, for (const MachineOperand &MO : I->uses()) { if (!MO.isReg()) continue; - unsigned int Reg = MO.getReg(); - if (RegInfo.isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) UsedRegs.insert(Reg); } } @@ -524,12 +534,12 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, break; case X86::MOV32mr: case X86::MOV64mr: { - unsigned int Reg = PushOp.getReg(); + Register Reg = PushOp.getReg(); // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg // in preparation for the PUSH64. The upper 32 bits can be undef. if (Is64Bit && Store->getOpcode() == X86::MOV32mr) { - unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); Reg = MRI->createVirtualRegister(&X86::GR64RegClass); BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg); BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg) @@ -598,7 +608,7 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( // movl %eax, (%esp) // call // Get rid of those with prejudice. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; // Make sure this is the only use of Reg. diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index b16b3839c85a..7ee637cfd523 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -102,6 +102,8 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { DL(MIRBuilder.getMF().getDataLayout()), STI(MIRBuilder.getMF().getSubtarget()) {} + bool isIncomingArgumentHandler() const override { return false; } + Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0)); @@ -155,8 +157,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - const CallLowering::ArgInfo &Info, CCState &State) override { - bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, + CCState &State) override { + bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); StackSize = State.getNextStackOffset(); static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2, @@ -229,7 +232,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { : ValueHandler(MIRBuilder, MRI, AssignFn), DL(MIRBuilder.getMF().getDataLayout()) {} - bool isArgumentHandler() const override { return true; } + bool isIncomingArgumentHandler() const override { return true; } Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { @@ -237,7 +240,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { int FI = MFI.CreateFixedObject(Size, Offset, true); MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); - unsigned AddrReg = MRI.createGenericVirtualRegister( + Register AddrReg = MRI.createGenericVirtualRegister( LLT::pointer(0, DL.getPointerSizeInBits(0))); MIRBuilder.buildFrameIndex(AddrReg, FI); return AddrReg; @@ -301,6 +304,7 @@ struct FormalArgHandler : public IncomingValueHandler { : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {} void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } }; @@ -372,10 +376,7 @@ bool X86CallLowering::lowerFormalArguments( } bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, - const MachineOperand &Callee, - const ArgInfo &OrigRet, - ArrayRef OrigArgs) const { + CallLoweringInfo &Info) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -385,8 +386,8 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, auto TRI = STI.getRegisterInfo(); // Handle only Linux C, X86_64_SysV calling conventions for now. - if (!STI.isTargetLinux() || - !(CallConv == CallingConv::C || CallConv == CallingConv::X86_64_SysV)) + if (!STI.isTargetLinux() || !(Info.CallConv == CallingConv::C || + Info.CallConv == CallingConv::X86_64_SysV)) return false; unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); @@ -395,18 +396,19 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. bool Is64Bit = STI.is64Bit(); - unsigned CallOpc = Callee.isReg() + unsigned CallOpc = Info.Callee.isReg() ? (Is64Bit ? X86::CALL64r : X86::CALL32r) : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); - auto MIB = MIRBuilder.buildInstrNoInsert(CallOpc).add(Callee).addRegMask( - TRI->getCallPreservedMask(MF, CallConv)); + auto MIB = MIRBuilder.buildInstrNoInsert(CallOpc) + .add(Info.Callee) + .addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv)); SmallVector SplitArgs; - for (const auto &OrigArg : OrigArgs) { + for (const auto &OrigArg : Info.OrigArgs) { // TODO: handle not simple cases. - if (OrigArg.Flags.isByVal()) + if (OrigArg.Flags[0].isByVal()) return false; if (OrigArg.Regs.size() > 1) @@ -423,8 +425,8 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; - bool IsFixed = OrigArgs.empty() ? true : OrigArgs.back().IsFixed; - if (STI.is64Bit() && !IsFixed && !STI.isCallingConvWin64(CallConv)) { + bool IsFixed = Info.OrigArgs.empty() ? true : Info.OrigArgs.back().IsFixed; + if (STI.is64Bit() && !IsFixed && !STI.isCallingConvWin64(Info.CallConv)) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -445,23 +447,24 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // If Callee is a reg, since it is used by a target specific // instruction, it must have a register class matching the // constraint of that instruction. - if (Callee.isReg()) + if (Info.Callee.isReg()) MIB->getOperand(0).setReg(constrainOperandRegClass( MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0)); + *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee, + 0)); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arguments, the physical register must be an // implicit-define of the call instruction. - if (!OrigRet.Ty->isVoidTy()) { - if (OrigRet.Regs.size() > 1) + if (!Info.OrigRet.Ty->isVoidTy()) { + if (Info.OrigRet.Regs.size() > 1) return false; SplitArgs.clear(); SmallVector NewRegs; - if (!splitToValueTypes(OrigRet, SplitArgs, DL, MRI, + if (!splitToValueTypes(Info.OrigRet, SplitArgs, DL, MRI, [&](ArrayRef Regs) { NewRegs.assign(Regs.begin(), Regs.end()); })) @@ -472,7 +475,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; if (!NewRegs.empty()) - MIRBuilder.buildMerge(OrigRet.Regs[0], NewRegs); + MIRBuilder.buildMerge(Info.OrigRet.Regs[0], NewRegs); } CallSeqStart.addImm(Handler.getStackSize()) diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h index 0445331bc3ff..444a0c7d0122 100644 --- a/lib/Target/X86/X86CallLowering.h +++ b/lib/Target/X86/X86CallLowering.h @@ -34,9 +34,8 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; private: /// A function of this type is used to perform value split action. diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 1c3034a5116a..4c49d68bec99 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -433,6 +433,7 @@ defm X86_SysV64_RegCall : def RetCC_X86_32 : CallingConv<[ // If FastCC, use RetCC_X86_32_Fast. CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::Tail", CCDelegateTo>, // If HiPE, use RetCC_X86_32_HiPE. CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, @@ -1000,6 +1001,7 @@ def CC_X86_32 : CallingConv<[ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>, CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::Tail", CCDelegateTo>, CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp index a61fa3246f09..5123853f5455 100644 --- a/lib/Target/X86/X86CmovConversion.cpp +++ b/lib/Target/X86/X86CmovConversion.cpp @@ -436,8 +436,8 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates( // Checks for "isUse()" as "uses()" returns also implicit definitions. if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)]; + Register Reg = MO.getReg(); + auto &RDM = RegDefMaps[Register::isVirtualRegister(Reg)]; if (MachineInstr *DefMI = RDM.lookup(Reg)) { OperandToDefMap[&MO] = DefMI; DepthInfo Info = DepthMap.lookup(DefMI); @@ -456,8 +456,8 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates( for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI; + Register Reg = MO.getReg(); + RegDefMaps[Register::isVirtualRegister(Reg)][Reg] = &MI; } unsigned Latency = TSchedModel.computeInstrLatency(&MI); @@ -710,7 +710,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // Skip any CMOVs in this group which don't load from memory. if (!MI.mayLoad()) { // Remember the false-side register input. - unsigned FalseReg = + Register FalseReg = MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg(); // Walk back through any intermediate cmovs referenced. while (true) { @@ -753,7 +753,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // Get a fresh register to use as the destination of the MOV. const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg()); - unsigned TmpReg = MRI->createVirtualRegister(RC); + Register TmpReg = MRI->createVirtualRegister(RC); SmallVector NewMIs; bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg, @@ -810,9 +810,9 @@ void X86CmovConverterPass::convertCmovInstsToBranches( DenseMap> RegRewriteTable; for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { - unsigned DestReg = MIIt->getOperand(0).getReg(); - unsigned Op1Reg = MIIt->getOperand(1).getReg(); - unsigned Op2Reg = MIIt->getOperand(2).getReg(); + Register DestReg = MIIt->getOperand(0).getReg(); + Register Op1Reg = MIIt->getOperand(1).getReg(); + Register Op2Reg = MIIt->getOperand(2).getReg(); // If this CMOV we are processing is the opposite condition from the jump we // generated, then we have to swap the operands for the PHI that is going to diff --git a/lib/Target/X86/X86CondBrFolding.cpp b/lib/Target/X86/X86CondBrFolding.cpp index 9dea94f1368d..1bf2d5ba7b8f 100644 --- a/lib/Target/X86/X86CondBrFolding.cpp +++ b/lib/Target/X86/X86CondBrFolding.cpp @@ -564,7 +564,7 @@ X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) { Modified = false; break; } - return llvm::make_unique(TargetMBBInfo{ + return std::make_unique(TargetMBBInfo{ TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly}); } diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp index 18bbfa32e11b..b4cf5cafbc6e 100644 --- a/lib/Target/X86/X86DomainReassignment.cpp +++ b/lib/Target/X86/X86DomainReassignment.cpp @@ -182,7 +182,7 @@ public: MachineBasicBlock *MBB = MI->getParent(); auto &DL = MI->getDebugLoc(); - unsigned Reg = MRI->createVirtualRegister( + Register Reg = MRI->createVirtualRegister( TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), *MBB->getParent())); MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg); @@ -219,13 +219,13 @@ public: // Don't allow copies to/flow GR8/GR16 physical registers. // FIXME: Is there some better way to support this? - unsigned DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg) && + Register DstReg = MI->getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg) && (X86::GR8RegClass.contains(DstReg) || X86::GR16RegClass.contains(DstReg))) return false; - unsigned SrcReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + Register SrcReg = MI->getOperand(1).getReg(); + if (Register::isPhysicalRegister(SrcReg) && (X86::GR8RegClass.contains(SrcReg) || X86::GR16RegClass.contains(SrcReg))) return false; @@ -241,7 +241,7 @@ public: // Physical registers will not be converted. Assume that converting the // COPY to the destination domain will eventually result in a actual // instruction. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) return 1; RegDomain OpDomain = getDomain(MRI->getRegClass(MO.getReg()), @@ -436,7 +436,7 @@ void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg, if (EnclosedEdges.count(Reg)) return; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return; if (!MRI->hasOneDef(Reg)) @@ -593,8 +593,8 @@ void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) { if (!DefOp.isReg()) continue; - unsigned DefReg = DefOp.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) { + Register DefReg = DefOp.getReg(); + if (!Register::isVirtualRegister(DefReg)) { C.setAllIllegal(); continue; } @@ -751,7 +751,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { // Go over all virtual registers and calculate a closure. unsigned ClosureID = 0; for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx); + unsigned Reg = Register::index2VirtReg(Idx); // GPR only current source domain supported. if (!isGPR(MRI->getRegClass(Reg))) diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp index 58680f1815bb..24c8e6d6f6eb 100755 --- a/lib/Target/X86/X86EvexToVex.cpp +++ b/lib/Target/X86/X86EvexToVex.cpp @@ -131,7 +131,7 @@ static bool usesExtendedRegister(const MachineInstr &MI) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); assert(!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) && "ZMM instructions should not be in the EVEX->VEX tables"); diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp index b8624b40f2f7..9126a1fbea52 100644 --- a/lib/Target/X86/X86ExpandPseudo.cpp +++ b/lib/Target/X86/X86ExpandPseudo.cpp @@ -194,7 +194,8 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; MachineOperand &JumpTarget = MBBI->getOperand(0); - MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); + MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands + : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. @@ -259,7 +260,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ? X86::TAILJMPm : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); - for (unsigned i = 0; i != 5; ++i) + for (unsigned i = 0; i != X86::AddrNumOperands; ++i) MIB.add(MBBI->getOperand(i)); } else if (Opcode == X86::TCRETURNri64) { JumpTarget.setIsKill(); @@ -274,7 +275,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineInstr &NewMI = *std::prev(MBBI); NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI); - MBB.getParent()->updateCallSiteInfo(&*MBBI, &NewMI); + MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); @@ -287,7 +288,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, assert(DestAddr.isReg() && "Offset should be in register!"); const bool Uses64BitFramePtr = STI->isTarget64BitLP64() || STI->isTargetNaCl64(); - unsigned StackPtr = TRI->getStackRegister(); + Register StackPtr = TRI->getStackRegister(); BuildMI(MBB, MBBI, DL, TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr) .addReg(DestAddr.getReg()); @@ -347,7 +348,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // actualcmpxchg Addr // [E|R]BX = SaveRbx const MachineOperand &InArg = MBBI->getOperand(6); - unsigned SaveRbx = MBBI->getOperand(7).getReg(); + Register SaveRbx = MBBI->getOperand(7).getReg(); unsigned ActualInArg = Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7b9ce0271205..e5e089d07d55 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1160,6 +1160,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && + CC != CallingConv::Tail && CC != CallingConv::X86_FastCall && CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && @@ -1173,7 +1174,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) + if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || + CC == CallingConv::Tail) return false; // Let SDISel handle vararg functions. @@ -1241,7 +1243,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { } // Make the copy. - unsigned DstReg = VA.getLocReg(); + Register DstReg = VA.getLocReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); // Avoid a cross-class copy. This is very unlikely. if (!SrcRC->contains(DstReg)) @@ -3157,7 +3159,7 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, if (Subtarget->getTargetTriple().isOSMSVCRT()) return 0; if (CC == CallingConv::Fast || CC == CallingConv::GHC || - CC == CallingConv::HiPE) + CC == CallingConv::HiPE || CC == CallingConv::Tail) return 0; if (CS) @@ -3208,6 +3210,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { default: return false; case CallingConv::C: case CallingConv::Fast: + case CallingConv::Tail: case CallingConv::WebKit_JS: case CallingConv::Swift: case CallingConv::X86_FastCall: @@ -3224,7 +3227,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) + if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || + CC == CallingConv::Tail) return false; // Don't know how to handle Win64 varargs yet. Nothing special needed for @@ -3387,6 +3391,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CCValAssign::SExtUpper: case CCValAssign::ZExtUpper: case CCValAssign::FPExt: + case CCValAssign::Trunc: llvm_unreachable("Unexpected loc info!"); case CCValAssign::Indirect: // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully @@ -3547,7 +3552,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { CCValAssign &VA = RVLocs[i]; EVT CopyVT = VA.getValVT(); unsigned CopyReg = ResultReg + i; - unsigned SrcReg = VA.getLocReg(); + Register SrcReg = VA.getLocReg(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp index bf541d933790..9f7c4afde760 100644 --- a/lib/Target/X86/X86FixupBWInsts.cpp +++ b/lib/Target/X86/X86FixupBWInsts.cpp @@ -80,7 +80,7 @@ class FixupBWInstPass : public MachineFunctionPass { /// destination register of the MachineInstr passed in. It returns true if /// that super register is dead just prior to \p OrigMI, and false if not. bool getSuperRegDestIfDead(MachineInstr *OrigMI, - unsigned &SuperDestReg) const; + Register &SuperDestReg) const; /// Change the MachineInstr \p MI into the equivalent extending load to 32 bit /// register if it is safe to do so. Return the replacement instruction if @@ -92,6 +92,12 @@ class FixupBWInstPass : public MachineFunctionPass { /// nullptr. MachineInstr *tryReplaceCopy(MachineInstr *MI) const; + /// Change the MachineInstr \p MI into the equivalent extend to 32 bit + /// register if it is safe to do so. Return the replacement instruction if + /// OK, otherwise return nullptr. + MachineInstr *tryReplaceExtend(unsigned New32BitOpcode, + MachineInstr *MI) const; + // Change the MachineInstr \p MI into an eqivalent 32 bit instruction if // possible. Return the replacement instruction if OK, return nullptr // otherwise. @@ -169,10 +175,10 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { /// /// If so, return that super register in \p SuperDestReg. bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, - unsigned &SuperDestReg) const { + Register &SuperDestReg) const { auto *TRI = &TII->getRegisterInfo(); - unsigned OrigDestReg = OrigMI->getOperand(0).getReg(); + Register OrigDestReg = OrigMI->getOperand(0).getReg(); SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32); const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg); @@ -232,12 +238,12 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, // %ax = KILL %ax, implicit killed %eax // RET 0, %ax unsigned Opc = OrigMI->getOpcode(); (void)Opc; - // These are the opcodes currently handled by the pass, if something - // else will be added we need to ensure that new opcode has the same - // properties. - assert((Opc == X86::MOV8rm || Opc == X86::MOV16rm || Opc == X86::MOV8rr || - Opc == X86::MOV16rr) && - "Unexpected opcode."); + // These are the opcodes currently known to work with the code below, if + // something // else will be added we need to ensure that new opcode has the + // same properties. + if (Opc != X86::MOV8rm && Opc != X86::MOV16rm && Opc != X86::MOV8rr && + Opc != X86::MOV16rr) + return false; bool IsDefined = false; for (auto &MO: OrigMI->implicit_operands()) { @@ -247,7 +253,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!"); if (MO.isDef() && TRI->isSuperRegisterEq(OrigDestReg, MO.getReg())) - IsDefined = true; + IsDefined = true; // If MO is a use of any part of the destination register but is not equal // to OrigDestReg or one of its subregisters, we cannot use SuperDestReg. @@ -268,7 +274,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode, MachineInstr *MI) const { - unsigned NewDestReg; + Register NewDestReg; // We are going to try to rewrite this load to a larger zero-extending // load. This is safe if all portions of the 32 bit super-register @@ -295,11 +301,11 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const { auto &OldDest = MI->getOperand(0); auto &OldSrc = MI->getOperand(1); - unsigned NewDestReg; + Register NewDestReg; if (!getSuperRegDestIfDead(MI, NewDestReg)) return nullptr; - unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32); + Register NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32); // This is only correct if we access the same subregister index: otherwise, // we could try to replace "movb %ah, %al" with "movl %eax, %eax". @@ -326,6 +332,33 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const { return MIB; } +MachineInstr *FixupBWInstPass::tryReplaceExtend(unsigned New32BitOpcode, + MachineInstr *MI) const { + Register NewDestReg; + if (!getSuperRegDestIfDead(MI, NewDestReg)) + return nullptr; + + // Don't interfere with formation of CBW instructions which should be a + // shorter encoding than even the MOVSX32rr8. It's also immunte to partial + // merge issues on Intel CPUs. + if (MI->getOpcode() == X86::MOVSX16rr8 && + MI->getOperand(0).getReg() == X86::AX && + MI->getOperand(1).getReg() == X86::AL) + return nullptr; + + // Safe to change the instruction. + MachineInstrBuilder MIB = + BuildMI(*MF, MI->getDebugLoc(), TII->get(New32BitOpcode), NewDestReg); + + unsigned NumArgs = MI->getNumOperands(); + for (unsigned i = 1; i < NumArgs; ++i) + MIB.add(MI->getOperand(i)); + + MIB.setMemRefs(MI->memoperands()); + + return MIB; +} + MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB) const { // See if this is an instruction of the type we are currently looking for. @@ -355,6 +388,15 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI, // of the register. return tryReplaceCopy(MI); + case X86::MOVSX16rr8: + return tryReplaceExtend(X86::MOVSX32rr8, MI); + case X86::MOVSX16rm8: + return tryReplaceExtend(X86::MOVSX32rm8, MI); + case X86::MOVZX16rr8: + return tryReplaceExtend(X86::MOVZX32rr8, MI); + case X86::MOVZX16rm8: + return tryReplaceExtend(X86::MOVZX32rm8, MI); + default: // nothing to do here. break; diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 041529a0be68..543dc8b00fa0 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -67,8 +67,8 @@ class FixupLEAPass : public MachineFunctionPass { /// - LEA that uses RIP relative addressing mode /// - LEA that uses 16-bit addressing mode " /// This function currently handles the first 2 cases only. - MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI, - MachineBasicBlock &MBB); + void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, + MachineBasicBlock &MBB, bool OptIncDec); /// Look for LEAs that are really two address LEAs that we might be able to /// turn into regular ADD instructions. @@ -216,14 +216,10 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP)) continue; - if (IsSlowLEA) { + if (IsSlowLEA) processInstructionForSlowLEA(I, MBB); - } else if (IsSlow3OpsLEA) { - if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) { - MBB.erase(I); - I = NewMI; - } - } + else if (IsSlow3OpsLEA) + processInstrForSlow3OpLEA(I, MBB, OptIncDec); } // Second pass for creating LEAs. This may reverse some of the @@ -301,18 +297,14 @@ static inline bool isInefficientLEAReg(unsigned Reg) { Reg == X86::R13D || Reg == X86::R13; } -static inline bool isRegOperand(const MachineOperand &Op) { - return Op.isReg() && Op.getReg() != X86::NoRegister; -} - /// Returns true if this LEA uses base an index registers, and the base register /// is known to be inefficient for the subtarget. // TODO: use a variant scheduling class to model the latency profile // of LEA instructions, and implement this logic as a scheduling predicate. static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, const MachineOperand &Index) { - return Base.isReg() && isInefficientLEAReg(Base.getReg()) && - isRegOperand(Index); + return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && + Index.getReg() != X86::NoRegister; } static inline bool hasLEAOffset(const MachineOperand &Offset) { @@ -372,9 +364,9 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, !TII->isSafeToClobberEFLAGS(MBB, I)) return false; - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned BaseReg = Base.getReg(); - unsigned IndexReg = Index.getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register BaseReg = Base.getReg(); + Register IndexReg = Index.getReg(); // Don't change stack adjustment LEAs. if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) @@ -500,9 +492,9 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, if (Segment.getReg() != 0 || !Offset.isImm() || !TII->isSafeToClobberEFLAGS(MBB, I)) return; - const unsigned DstR = Dst.getReg(); - const unsigned SrcR1 = Base.getReg(); - const unsigned SrcR2 = Index.getReg(); + const Register DstR = Dst.getReg(); + const Register SrcR1 = Base.getReg(); + const Register SrcR2 = Index.getReg(); if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) return; if (Scale.getImm() > 1) @@ -534,111 +526,150 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, } } -MachineInstr * -FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI, - MachineBasicBlock &MBB) { +void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, + MachineBasicBlock &MBB, + bool OptIncDec) { + MachineInstr &MI = *I; const unsigned LEAOpcode = MI.getOpcode(); - const MachineOperand &Dst = MI.getOperand(0); + const MachineOperand &Dest = MI.getOperand(0); const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); - if (!(TII->isThreeOperandsLEA(MI) || - hasInefficientLEABaseReg(Base, Index)) || + if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || !TII->isSafeToClobberEFLAGS(MBB, MI) || Segment.getReg() != X86::NoRegister) - return nullptr; + return; + + Register DestReg = Dest.getReg(); + Register BaseReg = Base.getReg(); + Register IndexReg = Index.getReg(); + + if (MI.getOpcode() == X86::LEA64_32r) { + if (BaseReg != 0) + BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); + if (IndexReg != 0) + IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); + } - unsigned DstR = Dst.getReg(); - unsigned BaseR = Base.getReg(); - unsigned IndexR = Index.getReg(); - unsigned SSDstR = - (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR; bool IsScale1 = Scale.getImm() == 1; - bool IsInefficientBase = isInefficientLEAReg(BaseR); - bool IsInefficientIndex = isInefficientLEAReg(IndexR); + bool IsInefficientBase = isInefficientLEAReg(BaseReg); + bool IsInefficientIndex = isInefficientLEAReg(IndexReg); // Skip these cases since it takes more than 2 instructions // to replace the LEA instruction. - if (IsInefficientBase && SSDstR == BaseR && !IsScale1) - return nullptr; - if (LEAOpcode == X86::LEA64_32r && IsInefficientBase && - (IsInefficientIndex || !IsScale1)) - return nullptr; - - const DebugLoc DL = MI.getDebugLoc(); - const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode)); - const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset)); + if (IsInefficientBase && DestReg == BaseReg && !IsScale1) + return; LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); + MachineInstr *NewMI = nullptr; + // First try to replace LEA with one or two (for the 3-op LEA case) // add instructions: // 1.lea (%base,%index,1), %base => add %index,%base // 2.lea (%base,%index,1), %index => add %base,%index - if (IsScale1 && (DstR == BaseR || DstR == IndexR)) { - const MachineOperand &Src = DstR == BaseR ? Index : Base; - MachineInstr *NewMI = - BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src); - LLVM_DEBUG(NewMI->dump();); - // Create ADD instruction for the Offset in case of 3-Ops LEA. - if (hasLEAOffset(Offset)) { - NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); - LLVM_DEBUG(NewMI->dump();); + if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + if (DestReg != BaseReg) + std::swap(BaseReg, IndexReg); + + if (MI.getOpcode() == X86::LEA64_32r) { + // TODO: Do we need the super register implicit use? + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(BaseReg) + .addReg(IndexReg) + .addReg(Base.getReg(), RegState::Implicit) + .addReg(Index.getReg(), RegState::Implicit); + } else { + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(BaseReg) + .addReg(IndexReg); } - return NewMI; - } - // If the base is inefficient try switching the index and base operands, - // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: - // lea offset(%base,%index,scale),%dst => - // lea (%base,%index,scale); add offset,%dst - if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { - MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode)) - .add(Dst) - .add(IsInefficientBase ? Index : Base) - .add(Scale) - .add(IsInefficientBase ? Base : Index) - .addImm(0) - .add(Segment); + } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { + // If the base is inefficient try switching the index and base operands, + // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: + // lea offset(%base,%index,scale),%dst => + // lea (%base,%index,scale); add offset,%dst + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) + .add(Dest) + .add(IsInefficientBase ? Index : Base) + .add(Scale) + .add(IsInefficientBase ? Base : Index) + .addImm(0) + .add(Segment); LLVM_DEBUG(NewMI->dump();); + } + + // If either replacement succeeded above, add the offset if needed, then + // replace the instruction. + if (NewMI) { // Create ADD instruction for the Offset in case of 3-Ops LEA. if (hasLEAOffset(Offset)) { - NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); - LLVM_DEBUG(NewMI->dump();); + if (OptIncDec && Offset.isImm() && + (Offset.getImm() == 1 || Offset.getImm() == -1)) { + unsigned NewOpc = + getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg); + LLVM_DEBUG(NewMI->dump();); + } else { + unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Offset); + LLVM_DEBUG(NewMI->dump();); + } } - return NewMI; + + MBB.erase(I); + I = NewMI; + return; } + // Handle the rest of the cases with inefficient base register: - assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!"); + assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); assert(IsInefficientBase && "efficient base should be handled already!"); + // FIXME: Handle LEA64_32r. + if (LEAOpcode == X86::LEA64_32r) + return; + // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst if (IsScale1 && !hasLEAOffset(Offset)) { - bool BIK = Base.isKill() && BaseR != IndexR; - TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK); + bool BIK = Base.isKill() && BaseReg != IndexReg; + TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); LLVM_DEBUG(MI.getPrevNode()->dump();); - MachineInstr *NewMI = - BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index); + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Index); LLVM_DEBUG(NewMI->dump();); - return NewMI; + return; } + // lea offset(%base,%index,scale), %dst => // lea offset( ,%index,scale), %dst; add %base,%dst - MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode)) - .add(Dst) - .addReg(0) - .add(Scale) - .add(Index) - .add(Offset) - .add(Segment); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) + .add(Dest) + .addReg(0) + .add(Scale) + .add(Index) + .add(Offset) + .add(Segment); LLVM_DEBUG(NewMI->dump();); - NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base); + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Base); LLVM_DEBUG(NewMI->dump();); - return NewMI; + + MBB.erase(I); + I = NewMI; } diff --git a/lib/Target/X86/X86FixupSetCC.cpp b/lib/Target/X86/X86FixupSetCC.cpp index e2d4d1ede6f3..cbde280aa280 100644 --- a/lib/Target/X86/X86FixupSetCC.cpp +++ b/lib/Target/X86/X86FixupSetCC.cpp @@ -136,8 +136,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *RC = MF.getSubtarget().is64Bit() ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass; - unsigned ZeroReg = MRI->createVirtualRegister(RC); - unsigned InsertReg = MRI->createVirtualRegister(RC); + Register ZeroReg = MRI->createVirtualRegister(RC); + Register InsertReg = MRI->createVirtualRegister(RC); // Initialize a register with 0. This must go before the eflags def BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp index 5ce3255ea96a..cfba06fb6533 100644 --- a/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -721,8 +721,9 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) { X86::CondCode Cond = X86::getCondFromSETCC(MI); - if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() && - TRI->isVirtualRegister(MI.getOperand(0).getReg())) { + if (Cond != X86::COND_INVALID && !MI.mayStore() && + MI.getOperand(0).isReg() && + Register::isVirtualRegister(MI.getOperand(0).getReg())) { assert(MI.getOperand(0).isDef() && "A non-storing SETcc should always define a register!"); CondRegs[Cond] = MI.getOperand(0).getReg(); @@ -739,7 +740,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( unsigned X86FlagsCopyLoweringPass::promoteCondToReg( MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, X86::CondCode Cond) { - unsigned Reg = MRI->createVirtualRegister(PromoteRC); + Register Reg = MRI->createVirtualRegister(PromoteRC); auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg).addImm(Cond); (void)SetI; @@ -813,7 +814,7 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic( MachineBasicBlock &MBB = *MI.getParent(); // Insert an instruction that will set the flag back to the desired value. - unsigned TmpReg = MRI->createVirtualRegister(PromoteRC); + Register TmpReg = MRI->createVirtualRegister(PromoteRC); auto AddI = BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) .addDef(TmpReg, RegState::Dead) @@ -974,7 +975,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( // Now we need to turn this into a bitmask. We do this by subtracting it from // zero. - unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); + Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg); ZeroReg = AdjustReg(ZeroReg); @@ -999,7 +1000,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( default: llvm_unreachable("Invalid SETB_C* opcode!"); } - unsigned ResultReg = MRI->createVirtualRegister(&SetBRC); + Register ResultReg = MRI->createVirtualRegister(&SetBRC); BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg) .addReg(ZeroReg) .addReg(ExtCondReg); diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 074cf21d03f5..fcfb5bc91314 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -288,8 +288,8 @@ namespace { // Check if a COPY instruction is using FP registers. static bool isFPCopy(MachineInstr &MI) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); return X86::RFP80RegClass.contains(DstReg) || X86::RFP80RegClass.contains(SrcReg); @@ -313,7 +313,7 @@ FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } /// For example, this returns 3 for X86::FP3. static unsigned getFPReg(const MachineOperand &MO) { assert(MO.isReg() && "Expected an FP register!"); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); return Reg - X86::FP0; } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index e310fe069117..1b469a814adc 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -35,8 +35,8 @@ using namespace llvm; X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, - unsigned StackAlignOverride) - : TargetFrameLowering(StackGrowsDown, StackAlignOverride, + MaybeAlign StackAlignOverride) + : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), STI.is64Bit() ? -8 : -4), STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { // Cache a bunch of frame-related predicates for this subtarget. @@ -176,7 +176,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineOperand &MO = MBBI->getOperand(i); if (!MO.isReg() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) @@ -216,7 +216,7 @@ flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg != X86::EFLAGS) continue; @@ -995,11 +995,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; bool NeedsDwarfCFI = !IsWin64Prologue && (MMI.hasDebugInfo() || Fn.needsUnwindTableEntry()); - unsigned FramePtr = TRI->getFrameRegister(MF); - const unsigned MachineFramePtr = + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = STI.isTarget64BitILP32() - ? getX86SubSuperRegister(FramePtr, 64) : FramePtr; - unsigned BasePtr = TRI->getBaseRegister(); + ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; + Register BasePtr = TRI->getBaseRegister(); bool HasWinCFI = false; // Debug location must be unknown since the first debug location is used @@ -1016,14 +1016,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); - - // The default stack probe size is 4096 if the function has no stackprobesize - // attribute. - unsigned StackProbeSize = 4096; - if (Fn.hasFnAttribute("stack-probe-size")) - Fn.getFnAttribute("stack-probe-size") - .getValueAsString() - .getAsInteger(0, StackProbeSize); + unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); // Re-align the stack on 64-bit if the x86-interrupt calling convention is // used and an error code was pushed, since the x86-64 ABI requires a 16-byte @@ -1081,7 +1074,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, int stackGrowth = -SlotSize; // Find the funclet establisher parameter - unsigned Establisher = X86::NoRegister; + Register Establisher = X86::NoRegister; if (IsClrFunclet) Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; else if (IsFunclet) @@ -1192,7 +1185,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; - unsigned Reg = MBBI->getOperand(0).getReg(); + Register Reg = MBBI->getOperand(0).getReg(); ++MBBI; if (!HasFP && NeedsDwarfCFI) { @@ -1396,9 +1389,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, int FI; if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { if (X86::FR64RegClass.contains(Reg)) { + int Offset; unsigned IgnoredFrameReg; - int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg); - Offset += SEHFrameOffset; + if (IsWin64Prologue && IsFunclet) + Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg); + else + Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) + + SEHFrameOffset; HasWinCFI = true; assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data"); @@ -1554,9 +1551,13 @@ X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { unsigned X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { + const X86MachineFunctionInfo *X86FI = MF.getInfo(); // This is the size of the pushed CSRs. - unsigned CSSize = - MF.getInfo()->getCalleeSavedFrameSize(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + // This is the size of callee saved XMMs. + const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); + unsigned XMMSize = WinEHXMMSlotInfo.size() * + TRI->getSpillSize(X86::VR128RegClass); // This is the amount of stack a funclet needs to allocate. unsigned UsedSize; EHPersonality Personality = @@ -1576,7 +1577,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment()); // Subtract out the size of the callee saved registers. This is how much stack // each funclet will allocate. - return FrameSizeMinusRBP - CSSize; + return FrameSizeMinusRBP + XMMSize - CSSize; } static bool isTailCallOpcode(unsigned Opc) { @@ -1597,9 +1598,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, DL = MBBI->getDebugLoc(); // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. const bool Is64BitILP32 = STI.isTarget64BitILP32(); - unsigned FramePtr = TRI->getFrameRegister(MF); + Register FramePtr = TRI->getFrameRegister(MF); unsigned MachineFramePtr = - Is64BitILP32 ? getX86SubSuperRegister(FramePtr, 64) : FramePtr; + Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool NeedsWin64CFI = @@ -1850,6 +1851,20 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset + FPDelta; } +int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const X86MachineFunctionInfo *X86FI = MF.getInfo(); + const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); + const auto it = WinEHXMMSlotInfo.find(FI); + + if (it == WinEHXMMSlotInfo.end()) + return getFrameIndexReference(MF, FI, FrameReg); + + FrameReg = TRI->getStackRegister(); + return alignTo(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second; +} + int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, unsigned &FrameReg, int Adjustment) const { @@ -1948,6 +1963,8 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( X86MachineFunctionInfo *X86FI = MF.getInfo(); unsigned CalleeSavedFrameSize = 0; + unsigned XMMCalleeSavedFrameSize = 0; + auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -1984,7 +2001,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( // Since emitPrologue and emitEpilogue will handle spilling and restoring of // the frame register, we can delete it from CSI list and not have to worry // about avoiding it later. - unsigned FPReg = TRI->getFrameRegister(MF); + Register FPReg = TRI->getFrameRegister(MF); for (unsigned i = 0; i < CSI.size(); ++i) { if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { CSI.erase(CSI.begin() + i); @@ -2025,12 +2042,20 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( unsigned Size = TRI->getSpillSize(*RC); unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment - SpillSlotOffset -= std::abs(SpillSlotOffset) % Align; + assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86"); + SpillSlotOffset = -alignTo(-SpillSlotOffset, Align); + // spill into slot SpillSlotOffset -= Size; int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); CSI[i - 1].setFrameIdx(SlotIndex); MFI.ensureMaxAlignment(Align); + + // Save the start offset and size of XMM in stack frame for funclets. + if (X86::VR128RegClass.contains(Reg)) { + WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; + XMMCalleeSavedFrameSize += Size; + } } return true; @@ -2200,7 +2225,7 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, // Spill the BasePtr if it's used. if (TRI->hasBasePointer(MF)){ - unsigned BasePtr = TRI->getBaseRegister(); + Register BasePtr = TRI->getBaseRegister(); if (STI.isTarget64BitILP32()) BasePtr = getX86SubSuperRegister(BasePtr, 64); SavedRegs.set(BasePtr); @@ -2212,7 +2237,7 @@ HasNestArgument(const MachineFunction *MF) { const Function &F = MF->getFunction(); for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; I++) { - if (I->hasNestAttr()) + if (I->hasNestAttr() && !I->use_empty()) return true; } return false; @@ -2244,7 +2269,8 @@ GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Pr bool IsNested = HasNestArgument(&MF); if (CallingConvention == CallingConv::X86_FastCall || - CallingConvention == CallingConv::Fast) { + CallingConvention == CallingConv::Fast || + CallingConvention == CallingConv::Tail) { if (IsNested) report_fatal_error("Segmented stacks does not support fastcall with " "nested function."); @@ -2525,6 +2551,18 @@ static unsigned getHiPELiteral( + " required but not provided"); } +// Return true if there are no non-ehpad successors to MBB and there are no +// non-meta instructions between MBBI and MBB.end(). +static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator MBBI) { + return std::all_of( + MBB.succ_begin(), MBB.succ_end(), + [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) && + std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) { + return MI.isMetaInstruction(); + }); +} + /// Erlang programs may need a special prologue to handle the stack size they /// might need at runtime. That is because Erlang/OTP does not implement a C /// stack but uses a custom implementation of hybrid stack/heap architecture. @@ -2758,7 +2796,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, unsigned Opcode = I->getOpcode(); bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); DebugLoc DL = I->getDebugLoc(); - uint64_t Amount = !reserveCallFrame ? TII.getFrameSize(*I) : 0; + uint64_t Amount = TII.getFrameSize(*I); uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0; I = MBB.erase(I); auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); @@ -2847,7 +2885,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, return I; } - if (isDestroy && InternalAmt) { + if (isDestroy && InternalAmt && !blockEndIsUnreachable(MBB, I)) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. @@ -2912,8 +2950,8 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( "restoring EBP/ESI on non-32-bit target"); MachineFunction &MF = *MBB.getParent(); - unsigned FramePtr = TRI->getFrameRegister(MF); - unsigned BasePtr = TRI->getBaseRegister(); + Register FramePtr = TRI->getFrameRegister(MF); + Register BasePtr = TRI->getBaseRegister(); WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index d32746e3a36e..2103d6471ead 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -25,7 +25,7 @@ class X86RegisterInfo; class X86FrameLowering : public TargetFrameLowering { public: - X86FrameLowering(const X86Subtarget &STI, unsigned StackAlignOverride); + X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride); // Cached subtarget predicates. @@ -99,6 +99,8 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + int getWin64EHFrameIndexRef(const MachineFunction &MF, + int FI, unsigned &SPReg) const; int getFrameIndexReferenceSP(const MachineFunction &MF, int FI, unsigned &SPReg, int Adjustment) const; int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 95d31e62cafc..5b546d42d98a 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -253,6 +253,11 @@ namespace { return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment); } + bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment); + /// Implement addressing mode selection for inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, @@ -362,6 +367,11 @@ namespace { if (User->getNumOperands() != 2) continue; + // If this can match to INC/DEC, don't count it as a use. + if (User->getOpcode() == ISD::ADD && + (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0)))) + continue; + // Immediates that are used for offsets as part of stack // manipulation should be left alone. These are typically // used to indicate SP offsets for argument passing and @@ -502,8 +512,10 @@ namespace { bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; bool tryShiftAmountMod(SDNode *N); + bool combineIncDecVector(SDNode *Node); bool tryShrinkShlLogicImm(SDNode *N); bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); + bool tryMatchBitSelect(SDNode *N); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node); @@ -746,7 +758,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { return false; LoadSDNode *LD = dyn_cast(Callee.getNode()); if (!LD || - LD->isVolatile() || + !LD->isSimple() || LD->getAddressingMode() != ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD) return false; @@ -873,10 +885,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() { case ISD::FRINT: Imm = 0x4; break; } SDLoc dl(N); - SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, - N->getValueType(0), - N->getOperand(0), - CurDAG->getConstant(Imm, dl, MVT::i8)); + SDValue Res = CurDAG->getNode( + X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0), + CurDAG->getTargetConstant(Imm, dl, MVT::i8)); --I; CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); ++I; @@ -2305,10 +2316,10 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent, return false; // We can allow a full vector load here since narrowing a load is ok unless - // it's volatile. + // it's volatile or atomic. if (ISD::isNON_EXTLoad(N.getNode())) { LoadSDNode *LD = cast(N); - if (!LD->isVolatile() && + if (LD->isSimple() && IsProfitableToFold(N, LD, Root) && IsLegalToFold(N, Parent, Root, OptLevel)) { PatternNodeWithChain = N; @@ -2464,6 +2475,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, Complexity += 2; } + // Heuristic: try harder to form an LEA from ADD if the operands set flags. + // Unlike ADD, LEA does not affect flags, so we will be less likely to require + // duplicating flag-producing instructions later in the pipeline. + if (N.getOpcode() == ISD::ADD) { + auto isMathWithFlags = [](SDValue V) { + switch (V.getOpcode()) { + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::ADC: + case X86ISD::SBB: + /* TODO: These opcodes can be added safely, but we may want to justify + their inclusion for different reasons (better for reg-alloc). + case X86ISD::SMUL: + case X86ISD::UMUL: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + */ + // Value 1 is the flag output of the node - verify it's not dead. + return !SDValue(V.getNode(), 1).use_empty(); + default: + return false; + } + }; + // TODO: This could be an 'or' rather than 'and' to make the transform more + // likely to happen. We might want to factor in whether there's a + // load folding opportunity for the math op that disappears with LEA. + if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) + Complexity++; + } + if (AM.Disp) Complexity++; @@ -2544,6 +2586,7 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { + assert(Root && P && "Unknown root/parent nodes"); if (!ISD::isNON_EXTLoad(N.getNode()) || !IsProfitableToFold(N, P, Root) || !IsLegalToFold(N, P, Root, OptLevel)) @@ -2553,6 +2596,20 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, N.getOperand(1), Base, Scale, Index, Disp, Segment); } +bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment) { + assert(Root && P && "Unknown root/parent nodes"); + if (N->getOpcode() != X86ISD::VBROADCAST_LOAD || + !IsProfitableToFold(N, P, Root) || + !IsLegalToFold(N, P, Root, OptLevel)) + return false; + + return selectAddr(N.getNode(), + N.getOperand(1), Base, Scale, Index, Disp, Segment); +} + /// Return an SDNode that returns the value of the global base register. /// Output instructions required to initialize the global base register, /// if necessary. @@ -3302,8 +3359,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { SDValue ImplDef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); - NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef, - NBits); + + SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); + insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); + NBits = SDValue( + CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef, + NBits, SRIdxVal), 0); insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); if (Subtarget->hasBMI2()) { @@ -3400,8 +3461,9 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM // hoisting the move immediate would make it worthwhile with a less optimal // BEXTR? - if (!Subtarget->hasTBM() && - !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR())) + bool PreferBEXTR = + Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR()); + if (!PreferBEXTR && !Subtarget->hasBMI2()) return nullptr; // Must have a shift right. @@ -3440,23 +3502,50 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { if (Shift + MaskSize > NVT.getSizeInBits()) return nullptr; - SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); - unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; - unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; + // BZHI, if available, is always fast, unlike BEXTR. But even if we decide + // that we can't use BEXTR, it is only worthwhile using BZHI if the mask + // does not fit into 32 bits. Load folding is not a sufficient reason. + if (!PreferBEXTR && MaskSize <= 32) + return nullptr; - // BMI requires the immediate to placed in a register. - if (!Subtarget->hasTBM()) { - ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; - MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; + SDValue Control; + unsigned ROpc, MOpc; + + if (!PreferBEXTR) { + assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then."); + // If we can't make use of BEXTR then we can't fuse shift+mask stages. + // Let's perform the mask first, and apply shift later. Note that we need to + // widen the mask to account for the fact that we'll apply shift afterwards! + Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT); + ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr; + MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm; unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; - New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0); + Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); + } else { + // The 'control' of BEXTR has the pattern of: + // [15...8 bit][ 7...0 bit] location + // [ bit count][ shift] name + // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 + Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); + if (Subtarget->hasTBM()) { + ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; + MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; + } else { + assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then."); + // BMI requires the immediate to placed in a register. + ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; + MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; + unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; + Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); + } } MachineSDNode *NewNode; SDValue Input = N0->getOperand(0); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) }; + SDValue Ops[] = { + Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)}; SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); // Update the chain. @@ -3464,7 +3553,15 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { // Record the mem-refs CurDAG->setNodeMemRefs(NewNode, {cast(Input)->getMemOperand()}); } else { - NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New); + NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control); + } + + if (!PreferBEXTR) { + // We still need to apply the shift. + SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT); + unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri; + NewNode = + CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt); } return NewNode; @@ -3735,6 +3832,52 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { return true; } +/// Convert vector increment or decrement to sub/add with an all-ones constant: +/// add X, <1, 1...> --> sub X, <-1, -1...> +/// sub X, <1, 1...> --> add X, <-1, -1...> +/// The all-ones vector constant can be materialized using a pcmpeq instruction +/// that is commonly recognized as an idiom (has no register dependency), so +/// that's better/smaller than loading a splat 1 constant. +bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) { + assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) && + "Unexpected opcode for increment/decrement transform"); + + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Should only be called for vectors."); + + SDValue X = Node->getOperand(0); + SDValue OneVec = Node->getOperand(1); + + APInt SplatVal; + if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue()) + return false; + + SDLoc DL(Node); + SDValue OneConstant, AllOnesVec; + + APInt Ones = APInt::getAllOnesValue(32); + assert(VT.getSizeInBits() % 32 == 0 && + "Expected bit count to be a multiple of 32"); + OneConstant = CurDAG->getConstant(Ones, DL, MVT::i32); + insertDAGNode(*CurDAG, X, OneConstant); + + unsigned NumElts = VT.getSizeInBits() / 32; + assert(NumElts > 0 && "Expected to get non-empty vector."); + AllOnesVec = CurDAG->getSplatBuildVector(MVT::getVectorVT(MVT::i32, NumElts), + DL, OneConstant); + insertDAGNode(*CurDAG, X, AllOnesVec); + + AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec); + insertDAGNode(*CurDAG, X, AllOnesVec); + + unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; + SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec); + + ReplaceNode(Node, NewNode.getNode()); + SelectCode(NewNode.getNode()); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -3975,12 +4118,18 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, if (CC != ISD::SETEQ && CC != ISD::SETNE) return false; - // See if we're comparing against zero. This should have been canonicalized - // to RHS during lowering. - if (!ISD::isBuildVectorAllZeros(Setcc.getOperand(1).getNode())) + SDValue SetccOp0 = Setcc.getOperand(0); + SDValue SetccOp1 = Setcc.getOperand(1); + + // Canonicalize the all zero vector to the RHS. + if (ISD::isBuildVectorAllZeros(SetccOp0.getNode())) + std::swap(SetccOp0, SetccOp1); + + // See if we're comparing against zero. + if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode())) return false; - SDValue N0 = Setcc.getOperand(0); + SDValue N0 = SetccOp0; MVT CmpVT = N0.getSimpleValueType(); MVT CmpSVT = CmpVT.getVectorElementType(); @@ -4027,13 +4176,14 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) { // Look through single use bitcasts. - if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) - Src = Src.getOperand(0); - - if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) { + if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) { Parent = Src.getNode(); Src = Src.getOperand(0); - if (Src.getSimpleValueType() == CmpSVT) + } + + if (Src.getOpcode() == X86ISD::VBROADCAST_LOAD && Src.hasOneUse()) { + auto *MemIntr = cast(Src); + if (MemIntr->getMemoryVT().getSizeInBits() == CmpSVT.getSizeInBits()) return Src; } @@ -4045,17 +4195,18 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, bool FoldedBCast = false; if (!FoldedLoad && CanFoldLoads && (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) { - SDNode *ParentNode = nullptr; + SDNode *ParentNode = N0.getNode(); if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) { - FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0, - Tmp1, Tmp2, Tmp3, Tmp4); + FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0, + Tmp1, Tmp2, Tmp3, Tmp4); } // Try the other operand. if (!FoldedBCast) { + SDNode *ParentNode = N0.getNode(); if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) { - FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0, - Tmp1, Tmp2, Tmp3, Tmp4); + FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0, + Tmp1, Tmp2, Tmp3, Tmp4); if (FoldedBCast) std::swap(Src0, Src1); } @@ -4125,7 +4276,7 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, // Update the chain. ReplaceUses(Load.getValue(1), SDValue(CNode, 1)); // Record the mem-refs - CurDAG->setNodeMemRefs(CNode, {cast(Load)->getMemOperand()}); + CurDAG->setNodeMemRefs(CNode, {cast(Load)->getMemOperand()}); } else { if (IsMasked) CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1); @@ -4146,6 +4297,55 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, return true; } +// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it +// into vpternlog. +bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode!"); + + MVT NVT = N->getSimpleValueType(0); + + // Make sure we support VPTERNLOG. + if (!NVT.isVector() || !Subtarget->hasAVX512()) + return false; + + // We need VLX for 128/256-bit. + if (!(Subtarget->hasVLX() || NVT.is512BitVector())) + return false; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Canonicalize AND to LHS. + if (N1.getOpcode() == ISD::AND) + std::swap(N0, N1); + + if (N0.getOpcode() != ISD::AND || + N1.getOpcode() != X86ISD::ANDNP || + !N0.hasOneUse() || !N1.hasOneUse()) + return false; + + // ANDN is not commutable, use it to pick down A and C. + SDValue A = N1.getOperand(0); + SDValue C = N1.getOperand(1); + + // AND is commutable, if one operand matches A, the other operand is B. + // Otherwise this isn't a match. + SDValue B; + if (N0.getOperand(0) == A) + B = N0.getOperand(1); + else if (N0.getOperand(1) == A) + B = N0.getOperand(0); + else + return false; + + SDLoc dl(N); + SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); + SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); + ReplaceNode(N, Ternlog.getNode()); + SelectCode(Ternlog.getNode()); + return true; +} + void X86DAGToDAGISel::Select(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); unsigned Opcode = Node->getOpcode(); @@ -4170,6 +4370,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opc = 0; switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::x86_sse3_monitor: if (!Subtarget->hasSSE3()) break; @@ -4303,9 +4504,16 @@ void X86DAGToDAGISel::Select(SDNode *Node) { if (tryShrinkShlLogicImm(Node)) return; + if (Opcode == ISD::OR && tryMatchBitSelect(Node)) + return; + LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { + if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() && + combineIncDecVector(Node)) + return; + // Try to avoid folding immediates with multiple uses for optsize. // This code tries to select to register form directly to avoid going // through the isel table which might fold the immediate. We can't change @@ -4333,6 +4541,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) { if (!isInt<8>(Val) && !isInt<32>(Val)) break; + // If this can match to INC/DEC, let it go. + if (Opcode == ISD::ADD && (Val == 1 || Val == -1)) + break; + // Check if we should avoid folding this immediate. if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) break; @@ -4610,7 +4822,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; ClrReg = HiReg = X86::AH; - SExtOpcode = X86::CBW; + SExtOpcode = 0; // Not used. break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; @@ -4632,24 +4844,27 @@ void X86DAGToDAGISel::Select(SDNode *Node) { bool signBitIsZero = CurDAG->SignBitIsZero(N0); SDValue InFlag; - if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { + if (NVT == MVT::i8) { // Special case for div8, just use a move with zero extension to AX to // clear the upper 8 bits (AH). SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain; MachineSDNode *Move; if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; - Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, - MVT::Other, Ops); + unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8 + : X86::MOVZX16rm8; + Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops); Chain = SDValue(Move, 1); ReplaceUses(N0.getValue(1), Chain); // Record the mem-refs CurDAG->setNodeMemRefs(Move, {cast(N0)->getMemOperand()}); } else { - Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0); + unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8 + : X86::MOVZX16rr8; + Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0); Chain = CurDAG->getEntryNode(); } - Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0), + Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0), SDValue()); InFlag = Chain.getValue(1); } else { @@ -4996,10 +5211,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { case ISD::FRINT: Imm = 0x4; break; } SDLoc dl(Node); - SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, - Node->getValueType(0), + SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0), Node->getOperand(0), - CurDAG->getConstant(Imm, dl, MVT::i8)); + CurDAG->getTargetConstant(Imm, dl, MVT::i8)); ReplaceNode(Node, Res.getNode()); SelectCode(Res.getNode()); return; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0b4bf687e6cf..ed975e9248a8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -65,17 +65,19 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt ExperimentalVectorWideningLegalization( - "x86-experimental-vector-widening-legalization", cl::init(false), - cl::desc("Enable an experimental vector type legalization through widening " - "rather than promotion."), - cl::Hidden); - static cl::opt ExperimentalPrefLoopAlignment( "x86-experimental-pref-loop-alignment", cl::init(4), - cl::desc("Sets the preferable loop alignment for experiments " - "(the last x86-experimental-pref-loop-alignment bits" - " of the loop header PC will be 0)."), + cl::desc( + "Sets the preferable loop alignment for experiments (as log2 bytes)" + "(the last x86-experimental-pref-loop-alignment bits" + " of the loop header PC will be 0)."), + cl::Hidden); + +// Added in 10.0. +static cl::opt EnableOldKNLABI( + "x86-enable-old-knl-abi", cl::init(false), + cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of " + "one ZMM register on AVX512F, but not AVX512BW targets."), cl::Hidden); static cl::opt MulConstantOptimization( @@ -84,6 +86,13 @@ static cl::opt MulConstantOptimization( "SHIFT, LEA, etc."), cl::Hidden); +static cl::opt ExperimentalUnorderedISEL( + "x86-experimental-unordered-atomic-isel", cl::init(false), + cl::desc("Use LoadSDNode and StoreSDNode instead of " + "AtomicSDNode for unordered atomic loads and " + "stores respectively."), + cl::Hidden); + /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without @@ -196,7 +205,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Integer absolute. if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); - setOperationAction(ISD::ABS , MVT::i32 , Custom); + setOperationAction(ISD::ABS , MVT::i32 , Custom); } setOperationAction(ISD::ABS , MVT::i64 , Custom); @@ -214,14 +223,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); - if (Subtarget.is64Bit()) { - if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) - // f32/f64 are legal, f80 is custom. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); - else - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - } else if (!Subtarget.useSoftFloat()) { + if (!Subtarget.useSoftFloat()) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); @@ -277,29 +279,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); - if (Subtarget.is64Bit()) { - if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { - // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); - } else { - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); - } - } else if (!Subtarget.useSoftFloat()) { - // Since AVX is a superset of SSE3, only check for SSE here. - if (Subtarget.hasSSE1() && !Subtarget.hasSSE3()) - // Expand FP_TO_UINT into a select. - // FIXME: We would like to use a Custom expander here eventually to do - // the optimal thing for SSE vs. the default expansion in the legalizer. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); - else - // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. - // With SSE3 we can use fisttpll to convert to a signed i64; without - // SSE, we're stuck with a fistpll. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); - - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + if (!Subtarget.useSoftFloat()) { + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); } // TODO: when we have SSE, these could be more efficient, by using movd/movq. @@ -345,11 +327,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); - setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f64 , Expand); setOperationAction(ISD::FREM , MVT::f80 , Expand); + setOperationAction(ISD::FREM , MVT::f128 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); // Promote the i8 variants and force them on up to i32 which has a shorter @@ -396,15 +378,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // There's never any support for operations beyond MVT::f32. setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f80, MVT::f16, Expand); + setTruncStoreAction(MVT::f128, MVT::f16, Expand); if (Subtarget.hasPOPCNT()) { setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); @@ -638,17 +624,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); - // Long double always uses X87, except f128 in MMX. + // f80 always uses X87. if (UseX87) { - if (Subtarget.is64Bit() && Subtarget.hasMMX()) { - addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass - : &X86::VR128RegClass); - ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); - setOperationAction(ISD::FABS , MVT::f128, Custom); - setOperationAction(ISD::FNEG , MVT::f128, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); - } - addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -684,10 +661,60 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::LLRINT, MVT::f80, Expand); } + // f128 uses xmm registers, but most operations require libcalls. + if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) { + addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); + + addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps + + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FMA, MVT::f128, Expand); + + setOperationAction(ISD::FABS, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); + + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FSINCOS, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + // We need to custom handle any FP_ROUND with an f128 input, but + // LegalizeDAG uses the result type to know when to run a custom handler. + // So we have to list all legal floating point result types here. + if (isTypeLegal(MVT::f32)) { + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); + } + if (isTypeLegal(MVT::f64)) { + setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); + } + if (isTypeLegal(MVT::f80)) { + setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); + } + + setOperationAction(ISD::SETCC, MVT::f128, Custom); + + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f80, Expand); + } + // Always use a library call for pow. setOperationAction(ISD::FPOW , MVT::f32 , Expand); setOperationAction(ISD::FPOW , MVT::f64 , Expand); setOperationAction(ISD::FPOW , MVT::f80 , Expand); + setOperationAction(ISD::FPOW , MVT::f128 , Expand); setOperationAction(ISD::FLOG, MVT::f80, Expand); setOperationAction(ISD::FLOG2, MVT::f80, Expand); @@ -716,7 +743,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // First set operation action for all vector types to either promote // (for widening) or expand (for scalarization). Then we will selectively // turn on ones that can be effectively codegen'd. - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); @@ -754,7 +781,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(InnerVT, VT, Expand); setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); @@ -797,6 +824,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2f32, Custom); + + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -823,10 +852,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } setOperationAction(ISD::MUL, MVT::v2i8, Custom); - setOperationAction(ISD::MUL, MVT::v2i16, Custom); - setOperationAction(ISD::MUL, MVT::v2i32, Custom); setOperationAction(ISD::MUL, MVT::v4i8, Custom); - setOperationAction(ISD::MUL, MVT::v4i16, Custom); setOperationAction(ISD::MUL, MVT::v8i8, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -863,28 +889,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); - if (!ExperimentalVectorWideningLegalization) { - // Use widening instead of promotion. - for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8, - MVT::v4i16, MVT::v2i16 }) { - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); - setOperationAction(ISD::SSUBSAT, VT, Custom); - } - } - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - // Provide custom widening for v2f32 setcc. This is really for VLX when - // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to - // type legalization changing the result type to v4i1 during widening. - // It works fine for SSE2 and is probably faster so no need to qualify with - // VLX support. - setOperationAction(ISD::SETCC, MVT::v2i32, Custom); - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); @@ -904,19 +912,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } - // We support custom legalizing of sext and anyext loads for specific - // memory vector types which we can load as a scalar (or sequence of - // scalars) and extend in-register to a legal 128-bit vector type. For sext - // loads these must work with a single scalar load. - for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom); - } - for (auto VT : { MVT::v2f64, MVT::v2i64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); @@ -938,7 +933,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); // Custom legalize these to avoid over promotion or custom promotion. setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); @@ -991,18 +985,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); - if (ExperimentalVectorWideningLegalization) { - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); - } else { - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom); - } + setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); // In the customized shift lowering, the legal v4i32/v2i64 cases // in AVX2 will be recognized. @@ -1069,22 +1059,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); } - if (!ExperimentalVectorWideningLegalization) { - // Avoid narrow result types when widening. The legal types are listed - // in the next loop. - for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom); - } - } - // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); - if (!ExperimentalVectorWideningLegalization) - setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); @@ -1145,6 +1123,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom); + if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); @@ -1292,10 +1272,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STORE, VT, Custom); } - if (HasInt256) - setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - if (HasInt256) { + setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); + // Custom legalize 2x32 to get a little better code. setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); @@ -1407,6 +1386,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom); + setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); @@ -1433,12 +1414,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); - if (ExperimentalVectorWideningLegalization) { - // Need to custom widen this if we don't have AVX512BW. - setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); - } + // Need to custom widen this if we don't have AVX512BW. + setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); for (auto VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); @@ -1529,10 +1508,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); } - // Need to custom split v32i16/v64i8 bitcasts. if (!Subtarget.hasBWI()) { + // Need to custom split v32i16/v64i8 bitcasts. setOperationAction(ISD::BITCAST, MVT::v32i16, Custom); setOperationAction(ISD::BITCAST, MVT::v64i8, Custom); + + // Better to split these into two 256-bit ops. + setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom); + setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom); } if (Subtarget.hasVBMI2()) { @@ -1777,6 +1760,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSHR, VT, Custom); } } + + setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); } // We want to custom lower some of our intrinsics. @@ -1905,13 +1892,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, MaxLoadsPerMemcmpOptSize = 2; // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). - setPrefLoopAlignment(ExperimentalPrefLoopAlignment); + setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment)); // An out-of-order CPU can speculatively execute past a predictable branch, // but a conditional move could be stalled by an expensive earlier operation. PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); EnableExtLdPromotion = true; - setPrefFunctionAlignment(4); // 2^4 bytes. + setPrefFunctionAlignment(Align(16)); verifyIntrinsicTables(); } @@ -1939,8 +1926,7 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return TypeSplitVector; - if (ExperimentalVectorWideningLegalization && - VT.getVectorNumElements() != 1 && + if (VT.getVectorNumElements() != 1 && VT.getVectorElementType() != MVT::i1) return TypeWidenVector; @@ -1950,19 +1936,62 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { + // v32i1 vectors should be promoted to v32i8 to match avx2. if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return MVT::v32i8; + // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && + Subtarget.hasAVX512() && + (!isPowerOf2_32(VT.getVectorNumElements()) || + (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || + (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) + return MVT::i8; + // FIXME: Should we just make these types legal and custom split operations? + if ((VT == MVT::v32i16 || VT == MVT::v64i8) && + Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI) + return MVT::v16i32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { + // v32i1 vectors should be promoted to v32i8 to match avx2. if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return 1; + // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && + Subtarget.hasAVX512() && + (!isPowerOf2_32(VT.getVectorNumElements()) || + (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || + (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) + return VT.getVectorNumElements(); + // FIXME: Should we just make these types legal and custom split operations? + if ((VT == MVT::v32i16 || VT == MVT::v64i8) && + Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI) + return 1; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } +unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && + Subtarget.hasAVX512() && + (!isPowerOf2_32(VT.getVectorNumElements()) || + (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || + (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) { + RegisterVT = MVT::i8; + IntermediateVT = MVT::i1; + NumIntermediates = VT.getVectorNumElements(); + return NumIntermediates; + } + + return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, + NumIntermediates, RegisterVT); +} + EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext& Context, EVT VT) const { @@ -2060,6 +2089,11 @@ EVT X86TargetLowering::getOptimalMemOpType( if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16)))) { + // FIXME: Check if unaligned 64-byte accesses are slow. + if (Size >= 64 && Subtarget.hasAVX512() && + (Subtarget.getPreferVectorWidth() >= 512)) { + return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; + } // FIXME: Check if unaligned 32-byte accesses are slow. if (Size >= 32 && Subtarget.hasAVX() && (Subtarget.getPreferVectorWidth() >= 256)) { @@ -2403,8 +2437,8 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, /// Breaks v64i1 value into two registers and adds the new node to the DAG static void Passv64i1ArgInRegs( - const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, - SmallVector, 8> &RegsToPass, CCValAssign &VA, + const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg, + SmallVectorImpl> &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget) { assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); assert(Subtarget.is32Bit() && "Expecting 32 bit target"); @@ -2537,7 +2571,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"); - Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I], + Passv64i1ArgInRegs(dl, DAG, ValToCopy, RegsToPass, VA, RVLocs[++I], Subtarget); assert(2 == RegsToPass.size() && @@ -2816,6 +2850,10 @@ SDValue X86TargetLowering::LowerCallResult( ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. + } else if (CopyVT == MVT::f64 && + (Is64Bit && !Subtarget.hasSSE2())) { + errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); + VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } // If we prefer to use the value in xmm registers, copy it out as f80 and @@ -2925,7 +2963,7 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || - CC == CallingConv::HHVM); + CC == CallingConv::HHVM || CC == CallingConv::Tail); } /// Return true if we might ever do TCO for calls with this calling convention. @@ -2951,7 +2989,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { /// Return true if the function is being made into a tailcall target by /// changing its ABI. static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { - return GuaranteedTailCallOpt && canGuaranteeTCO(CC); + return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail; } bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { @@ -3405,7 +3443,7 @@ SDValue X86TargetLowering::LowerFormalArguments( // Find the largest legal vector type. MVT VecVT = MVT::Other; // FIXME: Only some x86_32 calling conventions support AVX512. - if (Subtarget.hasAVX512() && + if (Subtarget.useAVX512Regs() && (Is64Bit || (CallConv == CallingConv::X86_VectorCall || CallConv == CallingConv::Intel_OCL_BI))) VecVT = MVT::v16f32; @@ -3577,6 +3615,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); bool IsSibcall = false; + bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || + CallConv == CallingConv::Tail; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); const auto *CI = dyn_cast_or_null(CLI.CS.getInstruction()); @@ -3597,8 +3637,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Attr.getValueAsString() == "true") isTailCall = false; - if (Subtarget.isPICStyleGOT() && - !MF.getTarget().Options.GuaranteedTailCallOpt) { + if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) { // If we are using a GOT, disable tail calls to external symbols with // default visibility. Tail calling such a symbol requires using a GOT // relocation, which forces early binding of the symbol. This breaks code @@ -3625,7 +3664,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Sibcalls are automatically detected tailcalls which do not require // ABI changes. - if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) + if (!IsGuaranteeTCO && isTailCall) IsSibcall = true; if (isTailCall) @@ -3657,8 +3696,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; - else if (MF.getTarget().Options.GuaranteedTailCallOpt && - canGuaranteeTCO(CallConv)) + else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; @@ -3782,8 +3820,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"); // Split v64i1 value into two registers - Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I], - Subtarget); + Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); const TargetOptions &Options = DAG.getTarget().Options; @@ -4069,6 +4106,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); + // Save heapallocsite metadata. + if (CLI.CS) + if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite")) + DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); + // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, @@ -4190,7 +4232,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VR)) + if (!Register::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) @@ -4279,6 +4321,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( bool CCMatch = CallerCC == CalleeCC; bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); + bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || + CalleeCC == CallingConv::Tail; // Win64 functions have extra shadow space for argument homing. Don't do the // sibcall if the caller and callee have mismatched expectations for this @@ -4286,7 +4330,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( if (IsCalleeWin64 != IsCallerWin64) return false; - if (DAG.getTarget().Options.GuaranteedTailCallOpt) { + if (IsGuaranteeTCO) { if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; return false; @@ -4413,7 +4457,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) continue; - unsigned Reg = VA.getLocReg(); + Register Reg = VA.getLocReg(); switch (Reg) { default: break; case X86::EAX: case X86::EDX: case X86::ECX: @@ -4652,7 +4696,11 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, // X < 0 -> X == 0, jump on sign. return X86::COND_S; } - if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { + if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) { + // X >= 0 -> X == 0, jump on !sign. + return X86::COND_NS; + } + if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) { // X < 1 -> X <= 0 RHS = DAG.getConstant(0, DL, RHS.getValueType()); return X86::COND_LE; @@ -4760,7 +4808,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, ScalarVT = MVT::i32; Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); - Info.align = 1; + Info.align = Align::None(); Info.flags |= MachineMemOperand::MOStore; break; } @@ -4773,7 +4821,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, unsigned NumElts = std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements()); Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); - Info.align = 1; + Info.align = Align::None(); Info.flags |= MachineMemOperand::MOLoad; break; } @@ -4785,7 +4833,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, unsigned NumElts = std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements()); Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); - Info.align = 1; + Info.align = Align::None(); Info.flags |= MachineMemOperand::MOStore; break; } @@ -4811,6 +4859,8 @@ bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const { + assert(cast(Load)->isSimple() && "illegal to narrow"); + // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF // relocation target a movq or addq instruction: don't let the load shrink. SDValue BasePtr = cast(Load)->getBasePtr(); @@ -4852,11 +4902,12 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { +bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { // If we are using XMM registers in the ABI and the condition of the select is // a floating-point compare and we have blendv or conditional move, then it is // cheaper to select instead of doing a cross-register move and creating a // load that depends on the compare result. + bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128; return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX(); } @@ -4869,15 +4920,25 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { return true; } -bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const { +bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const { // TODO: We handle scalars using custom code, but generic combining could make // that unnecessary. APInt MulC; if (!ISD::isConstantSplatVector(C.getNode(), MulC)) return false; + // Find the type this will be legalized too. Otherwise we might prematurely + // convert this to shl+add/sub and then still have to type legalize those ops. + // Another choice would be to defer the decision for illegal types until + // after type legalization. But constant splat vectors of i64 can't make it + // through type legalization on 32-bit targets so we would need to special + // case vXi64. + while (getTypeAction(Context, VT) != TypeLegal) + VT = getTypeToTransformTo(Context, VT); + // If vector multiply is legal, assume that's faster than shl + add/sub. - // TODO: Multiply is a complex op with higher latency and lower througput in + // TODO: Multiply is a complex op with higher latency and lower throughput in // most implementations, so this check could be loosened based on type // and/or a CPU attribute. if (isOperationLegal(ISD::MUL, VT)) @@ -5022,6 +5083,33 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const { return Subtarget.hasSSE2(); } +bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const { + return X.getValueType().isScalarInteger(); // 'bt' +} + +bool X86TargetLowering:: + shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + // Does baseline recommend not to perform the fold by default? + if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) + return false; + // For scalars this transform is always beneficial. + if (X.getValueType().isScalarInteger()) + return true; + // If all the shift amounts are identical, then transform is beneficial even + // with rudimentary SSE2 shifts. + if (DAG.isSplatValue(Y, /*AllowUndefs=*/true)) + return true; + // If we have AVX2 with it's powerful shift operations, then it's also good. + if (Subtarget.hasAVX2()) + return true; + // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'. + return NewShiftOpcode == ISD::SHL; +} + bool X86TargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { assert(((N->getOpcode() == ISD::SHL && @@ -5054,6 +5142,14 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { return true; } +bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG, + SDNode *N) const { + if (DAG.getMachineFunction().getFunction().hasMinSize() && + !Subtarget.isOSWindows()) + return false; + return true; +} + bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { // Any legal vector type can be splatted more efficiently than // loading/spilling from memory. @@ -5093,10 +5189,8 @@ static bool isUndefOrZero(int Val) { /// Return true if every element in Mask, beginning from position Pos and ending /// in Pos+Size is the undef sentinel value. static bool isUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { - for (unsigned i = Pos, e = Pos + Size; i != e; ++i) - if (Mask[i] != SM_SentinelUndef) - return false; - return true; + return llvm::all_of(Mask.slice(Pos, Size), + [](int M) { return M == SM_SentinelUndef; }); } /// Return true if the mask creates a vector whose lower half is undefined. @@ -5119,10 +5213,7 @@ static bool isInRange(int Val, int Low, int Hi) { /// Return true if the value of any element in Mask falls within the specified /// range (L, H]. static bool isAnyInRange(ArrayRef Mask, int Low, int Hi) { - for (int M : Mask) - if (isInRange(M, Low, Hi)) - return true; - return false; + return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); }); } /// Return true if Val is undef or if its value falls within the @@ -5133,12 +5224,9 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) { /// Return true if every element in Mask is undef or if its value /// falls within the specified range (L, H]. -static bool isUndefOrInRange(ArrayRef Mask, - int Low, int Hi) { - for (int M : Mask) - if (!isUndefOrInRange(M, Low, Hi)) - return false; - return true; +static bool isUndefOrInRange(ArrayRef Mask, int Low, int Hi) { + return llvm::all_of( + Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); }); } /// Return true if Val is undef, zero or if its value falls within the @@ -5150,10 +5238,8 @@ static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) { /// Return true if every element in Mask is undef, zero or if its value /// falls within the specified range (L, H]. static bool isUndefOrZeroOrInRange(ArrayRef Mask, int Low, int Hi) { - for (int M : Mask) - if (!isUndefOrZeroOrInRange(M, Low, Hi)) - return false; - return true; + return llvm::all_of( + Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); }); } /// Return true if every element in Mask, beginning @@ -5171,8 +5257,9 @@ static bool isSequentialOrUndefInRange(ArrayRef Mask, unsigned Pos, /// from position Pos and ending in Pos+Size, falls within the specified /// sequential range (Low, Low+Size], or is undef or is zero. static bool isSequentialOrUndefOrZeroInRange(ArrayRef Mask, unsigned Pos, - unsigned Size, int Low) { - for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low) + unsigned Size, int Low, + int Step = 1) { + for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) return false; return true; @@ -5182,10 +5269,8 @@ static bool isSequentialOrUndefOrZeroInRange(ArrayRef Mask, unsigned Pos, /// from position Pos and ending in Pos+Size is undef or is zero. static bool isUndefOrZeroInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { - for (unsigned i = Pos, e = Pos + Size; i != e; ++i) - if (!isUndefOrZero(Mask[i])) - return false; - return true; + return llvm::all_of(Mask.slice(Pos, Size), + [](int M) { return isUndefOrZero(M); }); } /// Helper function to test whether a shuffle mask could be @@ -5357,6 +5442,8 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, SDValue Vec; if (!Subtarget.hasSSE2() && VT.is128BitVector()) { Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); + } else if (VT.isFloatingPoint()) { + Vec = DAG.getConstantFP(+0.0, dl, VT); } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); @@ -5500,6 +5587,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops) { if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && Idx == (VT.getVectorNumElements() / 2) && Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(1).getValueType() == SubVT && isNullConstant(Src.getOperand(2))) { Ops.push_back(Src.getOperand(1)); Ops.push_back(Sub); @@ -5593,7 +5681,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { // May need to promote to a legal type. Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), + DAG.getConstant(0, dl, WideOpVT), SubVec, Idx); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } @@ -5609,14 +5697,14 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (IdxVal == 0) { // Zero lower bits of the Vec - SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); + SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); // Merge them together, SubVec should be zero extended. SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), + DAG.getConstant(0, dl, WideOpVT), SubVec, ZeroIdx); Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); @@ -5628,7 +5716,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (Vec.isUndef()) { assert(IdxVal != 0 && "Unexpected index"); SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } @@ -5638,30 +5726,30 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(ShiftLeft, dl, MVT::i8)); + DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); if (ShiftRight != 0) SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, - DAG.getConstant(ShiftRight, dl, MVT::i8)); + DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } // Simple case when we put subvector in the upper part if (IdxVal + SubVecNumElems == NumElems) { SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); if (SubVecNumElems * 2 == NumElems) { // Special case, use legal zero extending insert_subvector. This allows // isel to opimitize when bits are known zero. Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), + DAG.getConstant(0, dl, WideOpVT), Vec, ZeroIdx); } else { // Otherwise use explicit shifts to zero the bits. Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); NumElems = WideOpVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8); + SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); } @@ -5675,30 +5763,47 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, // Widen the vector if needed. Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); - // Move the current value of the bit to be replace to the lsbs. - Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); - // Xor with the new bit. - Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec); - // Shift to MSB, filling bottom bits with 0. + + // Clear the upper bits of the subvector and move it to its insert position. unsigned ShiftLeft = NumElems - SubVecNumElems; - Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op, - DAG.getConstant(ShiftLeft, dl, MVT::i8)); - // Shift to the final position, filling upper bits with 0. + SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, + DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; - Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op, - DAG.getConstant(ShiftRight, dl, MVT::i8)); - // Xor with original vector leaving the new value. - Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op); + SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, + DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); + + // Isolate the bits below the insertion point. + unsigned LowShift = NumElems - IdxVal; + SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, + DAG.getTargetConstant(LowShift, dl, MVT::i8)); + Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low, + DAG.getTargetConstant(LowShift, dl, MVT::i8)); + + // Isolate the bits after the last inserted bit. + unsigned HighShift = IdxVal + SubVecNumElems; + SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, + DAG.getTargetConstant(HighShift, dl, MVT::i8)); + High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High, + DAG.getTargetConstant(HighShift, dl, MVT::i8)); + + // Now OR all 3 pieces together. + Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High); + SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec); + // Reduce to original width if needed. - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } -static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT, - unsigned NumElems, SelectionDAG &DAG, - const SDLoc &dl, unsigned VectorWidth) { - SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth); - return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth); +static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG, + const SDLoc &dl) { + assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch"); + EVT SubVT = V1.getValueType(); + EVT SubSVT = SubVT.getScalarType(); + unsigned SubNumElts = SubVT.getVectorNumElements(); + unsigned SubVectorWidth = SubVT.getSizeInBits(); + EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts); + SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth); + return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth); } /// Returns a vector of specified type with all bits set. @@ -5755,6 +5860,34 @@ static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT, return DAG.getNode(Opcode, DL, VT, In); } +// Match (xor X, -1) -> X. +// Match extract_subvector(xor X, -1) -> extract_subvector(X). +// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y). +static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { + V = peekThroughBitcasts(V); + if (V.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) + return V.getOperand(0); + if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && + (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { + if (SDValue Not = IsNOT(V.getOperand(0), DAG)) { + Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(), + Not, V.getOperand(1)); + } + } + SmallVector CatOps; + if (collectConcatOps(V.getNode(), CatOps)) { + for (SDValue &CatOp : CatOps) { + SDValue NotCat = IsNOT(CatOp, DAG); + if (!NotCat) return SDValue(); + CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat); + } + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps); + } + return SDValue(); +} + /// Returns a vector_shuffle node for an unpackl operation. static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, SDValue V2) { @@ -6003,6 +6136,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, } } + if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD && + EltSizeInBits <= VT.getScalarSizeInBits()) { + auto *MemIntr = cast(Op); + if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits()) + return false; + + SDValue Ptr = MemIntr->getBasePtr(); + if (Ptr->getOpcode() == X86ISD::Wrapper || + Ptr->getOpcode() == X86ISD::WrapperRIP) + Ptr = Ptr->getOperand(0); + + auto *CNode = dyn_cast(Ptr); + if (!CNode || CNode->isMachineConstantPoolEntry() || + CNode->getOffset() != 0) + return false; + + if (const Constant *C = CNode->getConstVal()) { + unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); + if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) { + if (UndefSrcElts[0]) + UndefSrcElts.setBits(0, NumSrcElts); + SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); + return CastBitData(UndefSrcElts, SrcEltBits); + } + } + } + // Extract constant bits from a subvector broadcast. if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) { SmallVector SubEltBits; @@ -6123,7 +6287,9 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, return false; } -static bool isConstantSplat(SDValue Op, APInt &SplatVal) { +namespace llvm { +namespace X86 { +bool isConstantSplat(SDValue Op, APInt &SplatVal) { APInt UndefElts; SmallVector EltBits; if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), @@ -6146,6 +6312,8 @@ static bool isConstantSplat(SDValue Op, APInt &SplatVal) { return false; } +} // namespace X86 +} // namespace llvm static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, @@ -6551,13 +6719,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return true; } -/// Check a target shuffle mask's inputs to see if we can set any values to -/// SM_SentinelZero - this is for elements that are known to be zero -/// (not just zeroable) from their inputs. +/// Decode a target shuffle mask and inputs and see if any values are +/// known to be undef or zero from their inputs. /// Returns true if the target shuffle mask was decoded. -static bool setTargetShuffleZeroElements(SDValue N, - SmallVectorImpl &Mask, - SmallVectorImpl &Ops) { +static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl &Mask, + SmallVectorImpl &Ops, + APInt &KnownUndef, APInt &KnownZero) { bool IsUnary; if (!isTargetShuffle(N.getOpcode())) return false; @@ -6566,15 +6733,17 @@ static bool setTargetShuffleZeroElements(SDValue N, if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) return false; + int Size = Mask.size(); SDValue V1 = Ops[0]; SDValue V2 = IsUnary ? V1 : Ops[1]; + KnownUndef = KnownZero = APInt::getNullValue(Size); V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); assert((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"); - unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size(); + unsigned EltSizeInBits = VT.getSizeInBits() / Size; // Extract known constant input data. APInt UndefSrcElts[2]; @@ -6585,12 +6754,18 @@ static bool setTargetShuffleZeroElements(SDValue N, getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], SrcEltBits[1], true, false)}; - for (int i = 0, Size = Mask.size(); i < Size; ++i) { + for (int i = 0; i < Size; ++i) { int M = Mask[i]; // Already decoded as SM_SentinelZero / SM_SentinelUndef. - if (M < 0) + if (M < 0) { + assert(isUndefOrZero(M) && "Unknown shuffle sentinel value!"); + if (SM_SentinelUndef == M) + KnownUndef.setBit(i); + if (SM_SentinelZero == M) + KnownZero.setBit(i); continue; + } // Determine shuffle input and normalize the mask. unsigned SrcIdx = M / Size; @@ -6599,7 +6774,7 @@ static bool setTargetShuffleZeroElements(SDValue N, // We are referencing an UNDEF input. if (V.isUndef()) { - Mask[i] = SM_SentinelUndef; + KnownUndef.setBit(i); continue; } @@ -6612,31 +6787,64 @@ static bool setTargetShuffleZeroElements(SDValue N, int Scale = Size / V.getValueType().getVectorNumElements(); int Idx = M / Scale; if (Idx != 0 && !VT.isFloatingPoint()) - Mask[i] = SM_SentinelUndef; + KnownUndef.setBit(i); else if (Idx == 0 && X86::isZeroNode(V.getOperand(0))) - Mask[i] = SM_SentinelZero; + KnownZero.setBit(i); continue; } // Attempt to extract from the source's constant bits. if (IsSrcConstant[SrcIdx]) { if (UndefSrcElts[SrcIdx][M]) - Mask[i] = SM_SentinelUndef; + KnownUndef.setBit(i); else if (SrcEltBits[SrcIdx][M] == 0) - Mask[i] = SM_SentinelZero; + KnownZero.setBit(i); } } - assert(VT.getVectorNumElements() == Mask.size() && + assert(VT.getVectorNumElements() == (unsigned)Size && "Different mask size from vector size!"); return true; } +// Replace target shuffle mask elements with known undef/zero sentinels. +static void resolveTargetShuffleFromZeroables(SmallVectorImpl &Mask, + const APInt &KnownUndef, + const APInt &KnownZero) { + unsigned NumElts = Mask.size(); + assert(KnownUndef.getBitWidth() == NumElts && + KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch"); + + for (unsigned i = 0; i != NumElts; ++i) { + if (KnownUndef[i]) + Mask[i] = SM_SentinelUndef; + else if (KnownZero[i]) + Mask[i] = SM_SentinelZero; + } +} + +// Extract target shuffle mask sentinel elements to known undef/zero bitmasks. +static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl &Mask, + APInt &KnownUndef, + APInt &KnownZero) { + unsigned NumElts = Mask.size(); + KnownUndef = KnownZero = APInt::getNullValue(NumElts); + + for (unsigned i = 0; i != NumElts; ++i) { + int M = Mask[i]; + if (SM_SentinelUndef == M) + KnownUndef.setBit(i); + if (SM_SentinelZero == M) + KnownZero.setBit(i); + } +} + // Forward declaration (for getFauxShuffleMask recursive check). -static bool resolveTargetShuffleInputs(SDValue Op, - SmallVectorImpl &Inputs, - SmallVectorImpl &Mask, - SelectionDAG &DAG); +// TODO: Use DemandedElts variant. +static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, + SmallVectorImpl &Mask, + SelectionDAG &DAG, unsigned Depth, + bool ResolveKnownElts); // Attempt to decode ops that could be represented as a shuffle mask. // The decoded shuffle mask may contain a different number of elements to the @@ -6644,7 +6852,8 @@ static bool resolveTargetShuffleInputs(SDValue Op, static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, SmallVectorImpl &Mask, SmallVectorImpl &Ops, - SelectionDAG &DAG) { + SelectionDAG &DAG, unsigned Depth, + bool ResolveKnownElts) { Mask.clear(); Ops.clear(); @@ -6685,7 +6894,7 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, Mask.push_back(SM_SentinelUndef); continue; } - uint64_t ByteBits = EltBits[i].getZExtValue(); + const APInt &ByteBits = EltBits[i]; if (ByteBits != 0 && ByteBits != 255) return false; Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i); @@ -6696,8 +6905,10 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, case ISD::OR: { // Inspect each operand at the byte level. We can merge these into a // blend shuffle mask if for each byte at least one is masked out (zero). - KnownBits Known0 = DAG.computeKnownBits(N.getOperand(0), DemandedElts); - KnownBits Known1 = DAG.computeKnownBits(N.getOperand(1), DemandedElts); + KnownBits Known0 = + DAG.computeKnownBits(N.getOperand(0), DemandedElts, Depth + 1); + KnownBits Known1 = + DAG.computeKnownBits(N.getOperand(1), DemandedElts, Depth + 1); if (Known0.One.isNullValue() && Known1.One.isNullValue()) { bool IsByteMask = true; unsigned NumSizeInBytes = NumSizeInBits / 8; @@ -6736,14 +6947,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, return false; SmallVector SrcMask0, SrcMask1; SmallVector SrcInputs0, SrcInputs1; - if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) || - !resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG)) + if (!getTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG, Depth + 1, + true) || + !getTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1, + true)) return false; - int MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); + size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0); scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1); - for (int i = 0; i != MaskSize; ++i) { + for (size_t i = 0; i != MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) @@ -6751,14 +6964,12 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, else if (Mask1[i] == SM_SentinelZero) Mask.push_back(Mask0[i]); else if (Mask0[i] == SM_SentinelZero) - Mask.push_back(Mask1[i] + (MaskSize * SrcInputs0.size())); + Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size())); else return false; } - for (SDValue &Op : SrcInputs0) - Ops.push_back(Op); - for (SDValue &Op : SrcInputs1) - Ops.push_back(Op); + Ops.append(SrcInputs0.begin(), SrcInputs0.end()); + Ops.append(SrcInputs1.begin(), SrcInputs1.end()); return true; } case ISD::INSERT_SUBVECTOR: { @@ -6786,8 +6997,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)). SmallVector SubMask; SmallVector SubInputs; - if (!resolveTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, - SubMask, DAG)) + if (!getTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, + SubMask, DAG, Depth + 1, ResolveKnownElts)) return false; if (SubMask.size() != NumSubElts) { assert(((SubMask.size() % NumSubElts) == 0 || @@ -6911,14 +7122,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // as a truncation shuffle. if (Opcode == X86ISD::PACKSS) { if ((!N0.isUndef() && - DAG.ComputeNumSignBits(N0, EltsLHS) <= NumBitsPerElt) || + DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) || (!N1.isUndef() && - DAG.ComputeNumSignBits(N1, EltsRHS) <= NumBitsPerElt)) + DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt)) return false; } else { APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); - if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS)) || - (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS))) + if ((!N0.isUndef() && + !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) || + (!N1.isUndef() && + !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1))) return false; } @@ -7061,23 +7274,45 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, Inputs = UsedInputs; } -/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs -/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the -/// remaining input indices in case we now have a unary shuffle and adjust the -/// inputs accordingly. +/// Calls getTargetShuffleAndZeroables to resolve a target shuffle mask's inputs +/// and then sets the SM_SentinelUndef and SM_SentinelZero values. /// Returns true if the target shuffle mask was decoded. -static bool resolveTargetShuffleInputs(SDValue Op, - SmallVectorImpl &Inputs, - SmallVectorImpl &Mask, - SelectionDAG &DAG) { +static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, + SmallVectorImpl &Inputs, + SmallVectorImpl &Mask, + APInt &KnownUndef, APInt &KnownZero, + SelectionDAG &DAG, unsigned Depth, + bool ResolveKnownElts) { + EVT VT = Op.getValueType(); + if (!VT.isSimple() || !VT.isVector()) + return false; + + if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) { + if (ResolveKnownElts) + resolveTargetShuffleFromZeroables(Mask, KnownUndef, KnownZero); + return true; + } + if (getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, + ResolveKnownElts)) { + resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); + return true; + } + return false; +} + +static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, + SmallVectorImpl &Mask, + SelectionDAG &DAG, unsigned Depth = 0, + bool ResolveKnownElts = true) { + EVT VT = Op.getValueType(); + if (!VT.isSimple() || !VT.isVector()) + return false; + + APInt KnownUndef, KnownZero; unsigned NumElts = Op.getValueType().getVectorNumElements(); APInt DemandedElts = APInt::getAllOnesValue(NumElts); - if (!setTargetShuffleZeroElements(Op, Mask, Inputs)) - if (!getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG)) - return false; - - resolveTargetShuffleInputsAndMask(Inputs, Mask); - return true; + return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef, + KnownZero, DAG, Depth, ResolveKnownElts); } /// Returns the scalar element that will make up the ith @@ -7414,7 +7649,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); SDLoc DL(Op); SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, - DAG.getIntPtrConstant(InsertPSMask, DL)); + DAG.getIntPtrConstant(InsertPSMask, DL, true)); return DAG.getBitcast(VT, Result); } @@ -7427,7 +7662,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getBitcast(ShVT, SrcOp); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); - SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8); + SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8); return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } @@ -7439,7 +7674,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, // the shuffle mask. if (LoadSDNode *LD = dyn_cast(SrcOp)) { SDValue Ptr = LD->getBasePtr(); - if (!ISD::isNormalLoad(LD) || LD->isVolatile()) + if (!ISD::isNormalLoad(LD) || !LD->isSimple()) return SDValue(); EVT PVT = LD->getValueType(0); if (PVT != MVT::i32 && PVT != MVT::f32) @@ -7504,6 +7739,49 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, return SDValue(); } +// Recurse to find a LoadSDNode source and the accumulated ByteOffest. +static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) { + if (ISD::isNON_EXTLoad(Elt.getNode())) { + auto *BaseLd = cast(Elt); + if (!BaseLd->isSimple()) + return false; + Ld = BaseLd; + ByteOffset = 0; + return true; + } + + switch (Elt.getOpcode()) { + case ISD::BITCAST: + case ISD::TRUNCATE: + case ISD::SCALAR_TO_VECTOR: + return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset); + case ISD::SRL: + if (isa(Elt.getOperand(1))) { + uint64_t Idx = Elt.getConstantOperandVal(1); + if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) { + ByteOffset += Idx / 8; + return true; + } + } + break; + case ISD::EXTRACT_VECTOR_ELT: + if (isa(Elt.getOperand(1))) { + SDValue Src = Elt.getOperand(0); + unsigned SrcSizeInBits = Src.getScalarValueSizeInBits(); + unsigned DstSizeInBits = Elt.getScalarValueSizeInBits(); + if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 && + findEltLoadSrc(Src, Ld, ByteOffset)) { + uint64_t Idx = Elt.getConstantOperandVal(1); + ByteOffset += Idx * (SrcSizeInBits / 8); + return true; + } + } + break; + } + + return false; +} + /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the /// elements can be replaced by a single large load which has the same value as /// a build_vector or insert_subvector whose loaded operands are 'Elts'. @@ -7513,6 +7791,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool isAfterLegalize) { + if ((VT.getScalarSizeInBits() % 8) != 0) + return SDValue(); + unsigned NumElems = Elts.size(); int LastLoadedElt = -1; @@ -7521,6 +7802,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, APInt UndefMask = APInt::getNullValue(NumElems); SmallVector Loads(NumElems, nullptr); + SmallVector ByteOffsets(NumElems, 0); // For each element in the initializer, see if we've found a load, zero or an // undef. @@ -7539,13 +7821,16 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, // Each loaded element must be the correct fractional portion of the // requested vector load. - if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits()) + unsigned EltSizeInBits = Elt.getValueSizeInBits(); + if ((NumElems * EltSizeInBits) != VT.getSizeInBits()) return SDValue(); - if (!ISD::isNON_EXTLoad(Elt.getNode())) + if (!findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) || ByteOffsets[i] < 0) + return SDValue(); + unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0); + if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits) return SDValue(); - Loads[i] = cast(Elt); LoadMask.setBit(i); LastLoadedElt = i; } @@ -7575,6 +7860,24 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, int LoadSizeInBits = (1 + LastLoadedElt - FirstLoadedElt) * BaseSizeInBits; assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected"); + // TODO: Support offsetting the base load. + if (ByteOffsets[FirstLoadedElt] != 0) + return SDValue(); + + // Check to see if the element's load is consecutive to the base load + // or offset from a previous (already checked) load. + auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) { + LoadSDNode *Ld = Loads[EltIdx]; + int64_t ByteOffset = ByteOffsets[EltIdx]; + if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) { + int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes); + return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] && + Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0); + } + return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes, + EltIdx - FirstLoadedElt); + }; + // Consecutive loads can contain UNDEFS but not ZERO elements. // Consecutive loads with UNDEFs and ZEROs elements require a // an additional shuffle stage to clear the ZERO elements. @@ -7582,8 +7885,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, bool IsConsecutiveLoadWithZeros = true; for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) { if (LoadMask[i]) { - if (!DAG.areNonVolatileConsecutiveLoads(Loads[i], LDBase, BaseSizeInBytes, - i - FirstLoadedElt)) { + if (!CheckConsecutiveLoad(LDBase, i)) { IsConsecutiveLoad = false; IsConsecutiveLoadWithZeros = false; break; @@ -7595,8 +7897,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { auto MMOFlags = LDBase->getMemOperand()->getFlags(); - assert(!(MMOFlags & MachineMemOperand::MOVolatile) && - "Cannot merge volatile loads."); + assert(LDBase->isSimple() && + "Cannot merge volatile or atomic loads."); SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags); @@ -7636,17 +7938,22 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded // vector and a zero vector to clear out the zero elements. if (!isAfterLegalize && VT.isVector()) { - SmallVector ClearMask(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - if (ZeroMask[i]) - ClearMask[i] = i + NumElems; - else if (LoadMask[i]) - ClearMask[i] = i; + unsigned NumMaskElts = VT.getVectorNumElements(); + if ((NumMaskElts % NumElems) == 0) { + unsigned Scale = NumMaskElts / NumElems; + SmallVector ClearMask(NumMaskElts, -1); + for (unsigned i = 0; i < NumElems; ++i) { + if (UndefMask[i]) + continue; + int Offset = ZeroMask[i] ? NumMaskElts : 0; + for (unsigned j = 0; j != Scale; ++j) + ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset; + } + SDValue V = CreateLoad(VT, LDBase); + SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) + : DAG.getConstantFP(0.0, DL, VT); + return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); } - SDValue V = CreateLoad(VT, LDBase); - SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) - : DAG.getConstantFP(0.0, DL, VT); - return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); } } @@ -8194,34 +8501,10 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, "Unexpected type in LowerBUILD_VECTORvXi1!"); SDLoc dl(Op); - if (ISD::isBuildVectorAllZeros(Op.getNode())) - return Op; - - if (ISD::isBuildVectorAllOnes(Op.getNode())) + if (ISD::isBuildVectorAllZeros(Op.getNode()) || + ISD::isBuildVectorAllOnes(Op.getNode())) return Op; - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { - if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { - // Split the pieces. - SDValue Lower = - DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32)); - SDValue Upper = - DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); - // We have to manually lower both halves so getNode doesn't try to - // reassemble the build_vector. - Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget); - Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); - } - SDValue Imm = ConvertI1VectorToInteger(Op, DAG); - if (Imm.getValueSizeInBits() == VT.getSizeInBits()) - return DAG.getBitcast(VT, Imm); - SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, - DAG.getIntPtrConstant(0, dl)); - } - - // Vector has one or more non-const elements uint64_t Immediate = 0; SmallVector NonConstIdx; bool IsSplat = true; @@ -8244,29 +8527,40 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, } // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" - if (IsSplat) - return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx), + if (IsSplat) { + // The build_vector allows the scalar element to be larger than the vector + // element type. We need to mask it to use as a condition unless we know + // the upper bits are zero. + // FIXME: Use computeKnownBits instead of checking specific opcode? + SDValue Cond = Op.getOperand(SplatIdx); + assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!"); + if (Cond.getOpcode() != ISD::SETCC) + Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond, + DAG.getConstant(1, dl, MVT::i8)); + return DAG.getSelect(dl, VT, Cond, DAG.getConstant(1, dl, VT), DAG.getConstant(0, dl, VT)); + } // insert elements one by one SDValue DstVec; - SDValue Imm; - if (Immediate) { - MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8)); - Imm = DAG.getConstant(Immediate, dl, ImmVT); - } - else if (HasConstElts) - Imm = DAG.getConstant(0, dl, VT); - else - Imm = DAG.getUNDEF(VT); - if (Imm.getValueSizeInBits() == VT.getSizeInBits()) - DstVec = DAG.getBitcast(VT, Imm); - else { - SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); - DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, - DAG.getIntPtrConstant(0, dl)); - } + if (HasConstElts) { + if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { + SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32); + SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32); + ImmL = DAG.getBitcast(MVT::v32i1, ImmL); + ImmH = DAG.getBitcast(MVT::v32i1, ImmH); + DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH); + } else { + MVT ImmVT = MVT::getIntegerVT(std::max(VT.getSizeInBits(), 8U)); + SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT); + MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; + DstVec = DAG.getBitcast(VecVT, Imm); + DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec, + DAG.getIntPtrConstant(0, dl)); + } + } else + DstVec = DAG.getUNDEF(VT); for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) { unsigned InsertIdx = NonConstIdx[i]; @@ -8757,7 +9051,7 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, // If we don't need the upper xmm, then perform as a xmm hop. unsigned HalfNumElts = NumElts / 2; if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) { - MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), HalfNumElts); + MVT HalfVT = VT.getHalfNumVectorElementsVT(); V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128); V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128); SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1); @@ -8965,21 +9259,14 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, MVT VT = Op.getSimpleValueType(); // Vectors containing all zeros can be matched by pxor and xorps. - if (ISD::isBuildVectorAllZeros(Op.getNode())) { - // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd - // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. - if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) - return Op; - - return getZeroVector(VT, Subtarget, DAG, DL); - } + if (ISD::isBuildVectorAllZeros(Op.getNode())) + return Op; // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { - if (VT == MVT::v4i32 || VT == MVT::v16i32 || - (VT == MVT::v8i32 && Subtarget.hasInt256())) + if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getOnesVector(VT, DAG, DL); @@ -9150,9 +9437,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, {4, 5, 6, 7, 4, 5, 6, 7}); if (Subtarget.hasXOP()) - return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, - LoLo, HiHi, IndicesVec, - DAG.getConstant(0, DL, MVT::i8))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, + IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); // Permute Lo and Hi and then select based on index range. // This works as VPERMILPS only uses index bits[0:1] to permute elements. SDValue Res = DAG.getSelectCC( @@ -9186,9 +9473,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, // VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec. IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); if (Subtarget.hasXOP()) - return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, - LoLo, HiHi, IndicesVec, - DAG.getConstant(0, DL, MVT::i8))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, + IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); // Permute Lo and Hi and then select based on index range. // This works as VPERMILPD only uses index bit[1] to permute elements. SDValue Res = DAG.getSelectCC( @@ -9283,7 +9570,7 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, return SDValue(); auto *PermIdx = dyn_cast(ExtractedIndex.getOperand(1)); - if (!PermIdx || PermIdx->getZExtValue() != Idx) + if (!PermIdx || PermIdx->getAPIntValue() != Idx) return SDValue(); } @@ -9434,23 +9721,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // it to i32 first. if (EltVT == MVT::i16 || EltVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - if (VT.getSizeInBits() >= 256) { - MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32); - if (Subtarget.hasAVX()) { - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); - Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); - } else { - // Without AVX, we need to extend to a 128-bit vector and then - // insert into the 256-bit vector. - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); - SDValue ZeroVec = getZeroVector(ShufVT, Subtarget, DAG, dl); - Item = insert128BitVector(ZeroVec, Item, 0, DAG, dl); - } - } else { - assert(VT.is128BitVector() && "Expected an SSE value type!"); - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); - Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); - } + MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); return DAG.getBitcast(VT, Item); } } @@ -9549,8 +9822,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); // Recreate the wider vector with the lower and upper part. - return concatSubVectors(Lower, Upper, VT, NumElems, DAG, dl, - VT.getSizeInBits() / 2); + return concatSubVectors(Lower, Upper, DAG, dl); } // Let legalizer expand 2-wide build_vectors. @@ -9703,8 +9975,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, // If we have more than 2 non-zeros, build each half separately. if (NumNonZero > 2) { - MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), - ResVT.getVectorNumElements()/2); + MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); ArrayRef Ops = Op->ops(); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops.slice(0, NumOperands/2)); @@ -9745,30 +10016,47 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"); - unsigned NumZero = 0; - unsigned NumNonZero = 0; + uint64_t Zeros = 0; uint64_t NonZeros = 0; for (unsigned i = 0; i != NumOperands; ++i) { SDValue SubVec = Op.getOperand(i); if (SubVec.isUndef()) continue; + assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. if (ISD::isBuildVectorAllZeros(SubVec.getNode())) - ++NumZero; - else { - assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. + Zeros |= (uint64_t)1 << i; + else NonZeros |= (uint64_t)1 << i; - ++NumNonZero; - } } + unsigned NumElems = ResVT.getVectorNumElements(); + + // If we are inserting non-zero vector and there are zeros in LSBs and undef + // in the MSBs we need to emit a KSHIFTL. The generic lowering to + // insert_subvector will give us two kshifts. + if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros && + Log2_64(NonZeros) != NumOperands - 1) { + MVT ShiftVT = ResVT; + if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) + ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + unsigned Idx = Log2_64(NonZeros); + SDValue SubVec = Op.getOperand(Idx); + unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); + SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT, + DAG.getUNDEF(ShiftVT), SubVec, + DAG.getIntPtrConstant(0, dl)); + Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec, + DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op, + DAG.getIntPtrConstant(0, dl)); + } // If there are zero or one non-zeros we can handle this very simply. - if (NumNonZero <= 1) { - SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) - : DAG.getUNDEF(ResVT); - if (!NumNonZero) + if (NonZeros == 0 || isPowerOf2_64(NonZeros)) { + SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT); + if (!NonZeros) return Vec; - unsigned Idx = countTrailingZeros(NonZeros); + unsigned Idx = Log2_64(NonZeros); SDValue SubVec = Op.getOperand(Idx); unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, @@ -9776,8 +10064,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, } if (NumOperands > 2) { - MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), - ResVT.getVectorNumElements()/2); + MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); ArrayRef Ops = Op->ops(); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops.slice(0, NumOperands/2)); @@ -9786,7 +10073,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } - assert(NumNonZero == 2 && "Simple cases not handled?"); + assert(countPopulation(NonZeros) == 2 && "Simple cases not handled?"); if (ResVT.getVectorNumElements() >= 16) return Op; // The operation is legal with KUNPCK @@ -9794,7 +10081,6 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, DAG.getUNDEF(ResVT), Op.getOperand(0), DAG.getIntPtrConstant(0, dl)); - unsigned NumElems = ResVT.getVectorNumElements(); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), DAG.getIntPtrConstant(NumElems/2, dl)); } @@ -9997,42 +10283,44 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, /// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding /// value in ExpectedMask is always accepted. Otherwise the indices must match. /// -/// SM_SentinelZero is accepted as a valid negative index but must match in both. +/// SM_SentinelZero is accepted as a valid negative index but must match in +/// both. static bool isTargetShuffleEquivalent(ArrayRef Mask, - ArrayRef ExpectedMask) { + ArrayRef ExpectedMask, + SDValue V1 = SDValue(), + SDValue V2 = SDValue()) { int Size = Mask.size(); if (Size != (int)ExpectedMask.size()) return false; assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) && "Illegal target shuffle mask"); - for (int i = 0; i < Size; ++i) - if (Mask[i] == SM_SentinelUndef) - continue; - else if (Mask[i] < 0 && Mask[i] != SM_SentinelZero) - return false; - else if (Mask[i] != ExpectedMask[i]) - return false; - - return true; -} + // Check for out-of-range target shuffle mask indices. + if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size)) + return false; -// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle -// mask. -static SmallVector createTargetShuffleMask(ArrayRef Mask, - const APInt &Zeroable) { - int NumElts = Mask.size(); - assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes"); + // If the values are build vectors, we can look through them to find + // equivalent inputs that make the shuffles equivalent. + auto *BV1 = dyn_cast_or_null(V1); + auto *BV2 = dyn_cast_or_null(V2); + BV1 = ((BV1 && Size != (int)BV1->getNumOperands()) ? nullptr : BV1); + BV2 = ((BV2 && Size != (int)BV2->getNumOperands()) ? nullptr : BV2); - SmallVector TargetMask(NumElts, SM_SentinelUndef); - for (int i = 0; i != NumElts; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) + for (int i = 0; i < Size; ++i) { + if (Mask[i] == SM_SentinelUndef || Mask[i] == ExpectedMask[i]) continue; - assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index"); - TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M); + if (0 <= Mask[i] && 0 <= ExpectedMask[i]) { + auto *MaskBV = Mask[i] < Size ? BV1 : BV2; + auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2; + if (MaskBV && ExpectedBV && + MaskBV->getOperand(Mask[i] % Size) == + ExpectedBV->getOperand(ExpectedMask[i] % Size)) + continue; + } + // TODO - handle SM_Sentinel equivalences. + return false; } - return TargetMask; + return true; } // Attempt to create a shuffle mask from a VSELECT condition mask. @@ -10133,7 +10421,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef Mask) { static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, const SDLoc &DL, SelectionDAG &DAG) { - return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); + return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); } /// Compute whether each element of a shuffle is zeroable. @@ -10573,14 +10861,14 @@ static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, // Try binary shuffle. SmallVector BinaryMask; createPackShuffleMask(VT, BinaryMask, false); - if (isTargetShuffleEquivalent(TargetMask, BinaryMask)) + if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2)) if (MatchPACK(V1, V2)) return true; // Try unary shuffle. SmallVector UnaryMask; createPackShuffleMask(VT, UnaryMask, true); - if (isTargetShuffleEquivalent(TargetMask, UnaryMask)) + if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1)) if (MatchPACK(V1, V1)) return true; @@ -10685,9 +10973,9 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SelectionDAG &DAG); static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, - MutableArrayRef TargetMask, - bool &ForceV1Zero, bool &ForceV2Zero, - uint64_t &BlendMask) { + MutableArrayRef Mask, + const APInt &Zeroable, bool &ForceV1Zero, + bool &ForceV2Zero, uint64_t &BlendMask) { bool V1IsZeroOrUndef = V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZeroOrUndef = @@ -10695,13 +10983,12 @@ static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, BlendMask = 0; ForceV1Zero = false, ForceV2Zero = false; - assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask"); + assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask"); // Attempt to generate the binary blend mask. If an input is zero then // we can use any lane. - // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. - for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { - int M = TargetMask[i]; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + int M = Mask[i]; if (M == SM_SentinelUndef) continue; if (M == i) @@ -10710,16 +10997,16 @@ static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, BlendMask |= 1ull << i; continue; } - if (M == SM_SentinelZero) { + if (Zeroable[i]) { if (V1IsZeroOrUndef) { ForceV1Zero = true; - TargetMask[i] = i; + Mask[i] = i; continue; } if (V2IsZeroOrUndef) { ForceV2Zero = true; BlendMask |= 1ull << i; - TargetMask[i] = i + Size; + Mask[i] = i + Size; continue; } } @@ -10748,11 +11035,10 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - SmallVector Mask = createTargetShuffleMask(Original, Zeroable); - uint64_t BlendMask = 0; bool ForceV1Zero = false, ForceV2Zero = false; - if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero, + SmallVector Mask(Original.begin(), Original.end()); + if (!matchVectorShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero, BlendMask)) return SDValue(); @@ -10778,7 +11064,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, case MVT::v8i16: assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!"); return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8)); + DAG.getTargetConstant(BlendMask, DL, MVT::i8)); case MVT::v16i16: { assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!"); SmallVector RepeatedMask; @@ -10790,7 +11076,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, if (RepeatedMask[i] >= 8) BlendMask |= 1ull << i; return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8)); + DAG.getTargetConstant(BlendMask, DL, MVT::i8)); } // Use PBLENDW for lower/upper lanes and then blend lanes. // TODO - we should allow 2 PBLENDW here and leave shuffle combine to @@ -10799,9 +11085,9 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, uint64_t HiMask = (BlendMask >> 8) & 0xFF; if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(LoMask, DL, MVT::i8)); + DAG.getTargetConstant(LoMask, DL, MVT::i8)); SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(HiMask, DL, MVT::i8)); + DAG.getTargetConstant(HiMask, DL, MVT::i8)); return DAG.getVectorShuffle( MVT::v16i16, DL, Lo, Hi, {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); @@ -11061,7 +11347,7 @@ static SDValue lowerShuffleAsByteRotateAndPermute( SDValue Rotate = DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), DAG.getBitcast(ByteVT, Lo), - DAG.getConstant(Scale * RotAmt, DL, MVT::i8))); + DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8))); SmallVector PermMask(NumElts, SM_SentinelUndef); for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { @@ -11268,7 +11554,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, "512-bit PALIGNR requires BWI instructions"); return DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, - DAG.getConstant(ByteRotation, DL, MVT::i8))); + DAG.getTargetConstant(ByteRotation, DL, MVT::i8))); } assert(VT.is128BitVector() && @@ -11282,10 +11568,12 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, int LoByteShift = 16 - ByteRotation; int HiByteShift = ByteRotation; - SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, - DAG.getConstant(LoByteShift, DL, MVT::i8)); - SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, - DAG.getConstant(HiByteShift, DL, MVT::i8)); + SDValue LoShift = + DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, + DAG.getTargetConstant(LoByteShift, DL, MVT::i8)); + SDValue HiShift = + DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, + DAG.getTargetConstant(HiByteShift, DL, MVT::i8)); return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); } @@ -11317,7 +11605,7 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1, return SDValue(); return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, - DAG.getConstant(Rotation, DL, MVT::i8)); + DAG.getTargetConstant(Rotation, DL, MVT::i8)); } /// Try to lower a vector shuffle as a byte shift sequence. @@ -11356,27 +11644,27 @@ static SDValue lowerVectorShuffleAsByteShiftMask( if (ZeroLo == 0) { unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroHi, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8)); } else if (ZeroHi == 0) { unsigned Shift = Mask[ZeroLo] % NumElts; Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroLo, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); } else if (!Subtarget.hasSSSE3()) { // If we don't have PSHUFB then its worth avoiding an AND constant mask // by performing 3 byte shifts. Shuffle combining can kick in above that. // TODO: There may be some cases where VSH{LR}DQ+PAND is still better. unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Shift += Mask[ZeroLo] % NumElts; Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroLo, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); } else return SDValue(); @@ -11498,7 +11786,7 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, "Illegal integer vector type"); V = DAG.getBitcast(ShiftVT, V); V = DAG.getNode(Opcode, DL, ShiftVT, V, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); return DAG.getBitcast(VT, V); } @@ -11632,14 +11920,14 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, uint64_t BitLen, BitIdx; if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), V2 ? V2 : DAG.getUNDEF(VT), - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return SDValue(); } @@ -11686,9 +11974,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); }; - // Found a valid zext mask! Try various lowering strategies based on the + // Found a valid a/zext mask! Try various lowering strategies based on the // input type and available ISA extensions. - // TODO: Add AnyExt support. if (Subtarget.hasSSE41()) { // Not worth offsetting 128-bit vectors if scale == 2, a pattern using // PUNPCK will catch this in a later shuffle match. @@ -11697,7 +11984,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); InputV = ShuffleOffset(InputV); - InputV = getExtendInVec(ISD::ZERO_EXTEND, DL, ExtVT, InputV, DAG); + InputV = getExtendInVec(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND, DL, + ExtVT, InputV, DAG); return DAG.getBitcast(VT, InputV); } @@ -11736,8 +12024,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( int LoIdx = Offset * EltBits; SDValue Lo = DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, - DAG.getConstant(EltBits, DL, MVT::i8), - DAG.getConstant(LoIdx, DL, MVT::i8))); + DAG.getTargetConstant(EltBits, DL, MVT::i8), + DAG.getTargetConstant(LoIdx, DL, MVT::i8))); if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1)) return DAG.getBitcast(VT, Lo); @@ -11745,8 +12033,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( int HiIdx = (Offset + 1) * EltBits; SDValue Hi = DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, - DAG.getConstant(EltBits, DL, MVT::i8), - DAG.getConstant(HiIdx, DL, MVT::i8))); + DAG.getTargetConstant(EltBits, DL, MVT::i8), + DAG.getTargetConstant(HiIdx, DL, MVT::i8))); return DAG.getBitcast(VT, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); } @@ -11759,8 +12047,12 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( SDValue PSHUFBMask[16]; for (int i = 0; i < 16; ++i) { int Idx = Offset + (i / Scale); - PSHUFBMask[i] = DAG.getConstant( - (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8); + if ((i % Scale == 0 && SafeOffset(Idx))) { + PSHUFBMask[i] = DAG.getConstant(Idx, DL, MVT::i8); + continue; + } + PSHUFBMask[i] = + AnyExt ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(0x80, DL, MVT::i8); } InputV = DAG.getBitcast(MVT::v16i8, InputV); return DAG.getBitcast( @@ -12052,9 +12344,9 @@ static SDValue lowerShuffleAsElementInsertion( V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); } else { V2 = DAG.getBitcast(MVT::v16i8, V2); - V2 = DAG.getNode( - X86ISD::VSHLDQ, DL, MVT::v16i8, V2, - DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); + V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2, + DAG.getTargetConstant( + V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); V2 = DAG.getBitcast(VT, V2); } } @@ -12294,7 +12586,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, // If we can't broadcast from a register, check that the input is a load. if (!BroadcastFromReg && !isShuffleFoldableLoad(V)) return SDValue(); - } else if (MayFoldLoad(V) && !cast(V)->isVolatile()) { + } else if (MayFoldLoad(V) && cast(V)->isSimple()) { // 32-bit targets need to load i64 as a f64 and then bitcast the result. if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) { BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); @@ -12486,7 +12778,7 @@ static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, // Insert the V2 element into the desired position. return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } /// Try to lower a shuffle as a permute of the inputs followed by an @@ -12635,14 +12927,14 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } return DAG.getNode( X86ISD::SHUFP, DL, MVT::v2f64, Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); @@ -12688,7 +12980,7 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef Mask, unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } /// Handle lowering of 2-lane 64-bit integer shuffles. @@ -12996,10 +13288,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef Mask, int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); if (NumV2Elements == 0) { - // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2, - Mask, Subtarget, DAG)) - return Broadcast; + // Try to use broadcast unless the mask only has one non-undef element. + if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) { + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2, + Mask, Subtarget, DAG)) + return Broadcast; + } // Straight shuffle of a single input vector. For everything from SSE2 // onward this has a single fast instruction with no scary immediates. @@ -13680,16 +13974,16 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef Mask, int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; }); if (NumV2Inputs == 0) { - // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2, - Mask, Subtarget, DAG)) - return Broadcast; - // Try to use shift instructions. if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, Zeroable, Subtarget, DAG)) return Shift; + // Check for being able to broadcast a single element. + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2, + Mask, Subtarget, DAG)) + return Broadcast; + // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; @@ -13984,8 +14278,16 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef Mask, DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); // Unpack the bytes to form the i16s that will be shuffled into place. + bool EvenInUse = false, OddInUse = false; + for (int i = 0; i < 16; i += 2) { + EvenInUse |= (Mask[i + 0] >= 0); + OddInUse |= (Mask[i + 1] >= 0); + if (EvenInUse && OddInUse) + break; + } V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, - MVT::v16i8, V1, V1); + MVT::v16i8, EvenInUse ? V1 : DAG.getUNDEF(MVT::v16i8), + OddInUse ? V1 : DAG.getUNDEF(MVT::v16i8)); int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; for (int i = 0; i < 16; ++i) @@ -14100,11 +14402,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef Mask, // First we need to zero all the dropped bytes. assert(NumEvenDrops <= 3 && "No support for dropping even elements more than 3 times."); - // We use the mask type to pick which bytes are preserved based on how many - // elements are dropped. - MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 }; - SDValue ByteClearMask = DAG.getBitcast( - MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); + SmallVector ByteClearOps(16, DAG.getConstant(0, DL, MVT::i8)); + for (unsigned i = 0; i != 16; i += 1 << NumEvenDrops) + ByteClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i8); + SDValue ByteClearMask = DAG.getBuildVector(MVT::v16i8, DL, ByteClearOps); V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask); if (!IsSingleInput) V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask); @@ -14448,16 +14749,14 @@ static SDValue lowerShuffleAsLanePermuteAndPermute( return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask); } -/// Lower a vector shuffle crossing multiple 128-bit lanes as -/// a permutation and blend of those lanes. +/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one +/// source with a lane permutation. /// -/// This essentially blends the out-of-lane inputs to each lane into the lane -/// from a permuted copy of the vector. This lowering strategy results in four -/// instructions in the worst case for a single-input cross lane shuffle which -/// is lower than any other fully general cross-lane shuffle strategy I'm aware -/// of. Special cases for each particular shuffle pattern should be handled -/// prior to trying this lowering. -static SDValue lowerShuffleAsLanePermuteAndBlend( +/// This lowering strategy results in four instructions in the worst case for a +/// single-input cross lane shuffle which is lower than any other fully general +/// cross-lane shuffle strategy I'm aware of. Special cases for each particular +/// shuffle pattern should be handled prior to trying this lowering. +static SDValue lowerShuffleAsLanePermuteAndShuffle( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // FIXME: This should probably be generalized for 512-bit vectors as well. @@ -14484,24 +14783,28 @@ static SDValue lowerShuffleAsLanePermuteAndBlend( return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); } + // TODO - we could support shuffling V2 in the Flipped input. assert(V2.isUndef() && "This last part of this routine only works on single input shuffles"); - SmallVector FlippedBlendMask(Size); - for (int i = 0; i < Size; ++i) - FlippedBlendMask[i] = - Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize) - ? Mask[i] - : Mask[i] % LaneSize + - (i / LaneSize) * LaneSize + Size); + SmallVector InLaneMask(Mask.begin(), Mask.end()); + for (int i = 0; i < Size; ++i) { + int &M = InLaneMask[i]; + if (M < 0) + continue; + if (((M % Size) / LaneSize) != (i / LaneSize)) + M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size; + } + assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) && + "In-lane shuffle mask expected"); - // Flip the vector, and blend the results which should now be in-lane. + // Flip the lanes, and shuffle the results which should now be in-lane. MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; SDValue Flipped = DAG.getBitcast(PVT, V1); - Flipped = DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), - { 2, 3, 0, 1 }); + Flipped = + DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1}); Flipped = DAG.getBitcast(VT, Flipped); - return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask); + return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask); } /// Handle lowering 2-lane 128-bit shuffles. @@ -14565,8 +14868,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { unsigned PermMask = ((WidenedMask[0] % 2) << 0) | ((WidenedMask[1] % 2) << 1); - return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); + return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } } } @@ -14598,7 +14901,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, V2 = DAG.getUNDEF(VT); return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } /// Lower a vector shuffle by first fixing the 128-bit lanes and then @@ -14616,26 +14919,26 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( if (is128BitLaneRepeatedShuffleMask(VT, Mask)) return SDValue(); - int Size = Mask.size(); + int NumElts = Mask.size(); int NumLanes = VT.getSizeInBits() / 128; - int LaneSize = 128 / VT.getScalarSizeInBits(); - SmallVector RepeatMask(LaneSize, -1); + int NumLaneElts = 128 / VT.getScalarSizeInBits(); + SmallVector RepeatMask(NumLaneElts, -1); SmallVector, 2> LaneSrcs(NumLanes, {{-1, -1}}); // First pass will try to fill in the RepeatMask from lanes that need two // sources. for (int Lane = 0; Lane != NumLanes; ++Lane) { - int Srcs[2] = { -1, -1 }; - SmallVector InLaneMask(LaneSize, -1); - for (int i = 0; i != LaneSize; ++i) { - int M = Mask[(Lane * LaneSize) + i]; + int Srcs[2] = {-1, -1}; + SmallVector InLaneMask(NumLaneElts, -1); + for (int i = 0; i != NumLaneElts; ++i) { + int M = Mask[(Lane * NumLaneElts) + i]; if (M < 0) continue; // Determine which of the possible input lanes (NumLanes from each source) // this element comes from. Assign that as one of the sources for this // lane. We can assign up to 2 sources for this lane. If we run out // sources we can't do anything. - int LaneSrc = M / LaneSize; + int LaneSrc = M / NumLaneElts; int Src; if (Srcs[0] < 0 || Srcs[0] == LaneSrc) Src = 0; @@ -14645,7 +14948,7 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( return SDValue(); Srcs[Src] = LaneSrc; - InLaneMask[i] = (M % LaneSize) + Src * Size; + InLaneMask[i] = (M % NumLaneElts) + Src * NumElts; } // If this lane has two sources, see if it fits with the repeat mask so far. @@ -14701,23 +15004,23 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( if (LaneSrcs[Lane][0] >= 0) continue; - for (int i = 0; i != LaneSize; ++i) { - int M = Mask[(Lane * LaneSize) + i]; + for (int i = 0; i != NumLaneElts; ++i) { + int M = Mask[(Lane * NumLaneElts) + i]; if (M < 0) continue; // If RepeatMask isn't defined yet we can define it ourself. if (RepeatMask[i] < 0) - RepeatMask[i] = M % LaneSize; + RepeatMask[i] = M % NumLaneElts; - if (RepeatMask[i] < Size) { - if (RepeatMask[i] != M % LaneSize) + if (RepeatMask[i] < NumElts) { + if (RepeatMask[i] != M % NumLaneElts) return SDValue(); - LaneSrcs[Lane][0] = M / LaneSize; + LaneSrcs[Lane][0] = M / NumLaneElts; } else { - if (RepeatMask[i] != ((M % LaneSize) + Size)) + if (RepeatMask[i] != ((M % NumLaneElts) + NumElts)) return SDValue(); - LaneSrcs[Lane][1] = M / LaneSize; + LaneSrcs[Lane][1] = M / NumLaneElts; } } @@ -14725,14 +15028,14 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( return SDValue(); } - SmallVector NewMask(Size, -1); + SmallVector NewMask(NumElts, -1); for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][0]; - for (int i = 0; i != LaneSize; ++i) { + for (int i = 0; i != NumLaneElts; ++i) { int M = -1; if (Src >= 0) - M = Src * LaneSize + i; - NewMask[Lane * LaneSize + i] = M; + M = Src * NumLaneElts + i; + NewMask[Lane * NumLaneElts + i] = M; } } SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); @@ -14745,11 +15048,11 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][1]; - for (int i = 0; i != LaneSize; ++i) { + for (int i = 0; i != NumLaneElts; ++i) { int M = -1; if (Src >= 0) - M = Src * LaneSize + i; - NewMask[Lane * LaneSize + i] = M; + M = Src * NumLaneElts + i; + NewMask[Lane * NumLaneElts + i] = M; } } SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); @@ -14760,12 +15063,12 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( cast(NewV2)->getMask() == Mask) return SDValue(); - for (int i = 0; i != Size; ++i) { - NewMask[i] = RepeatMask[i % LaneSize]; + for (int i = 0; i != NumElts; ++i) { + NewMask[i] = RepeatMask[i % NumLaneElts]; if (NewMask[i] < 0) continue; - NewMask[i] += (i / LaneSize) * LaneSize; + NewMask[i] += (i / NumLaneElts) * NumLaneElts; } return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask); } @@ -14831,14 +15134,13 @@ getHalfShuffleMask(ArrayRef Mask, MutableArrayRef HalfMask, static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef HalfMask, int HalfIdx1, int HalfIdx2, bool UndefLower, - SelectionDAG &DAG) { + SelectionDAG &DAG, bool UseConcat = false) { assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?"); assert(V1.getValueType().isSimple() && "Expecting only simple types"); MVT VT = V1.getSimpleValueType(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned HalfNumElts = NumElts / 2; - MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts); + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + unsigned HalfNumElts = HalfVT.getVectorNumElements(); auto getHalfVector = [&](int HalfIdx) { if (HalfIdx < 0) @@ -14853,6 +15155,14 @@ static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, SDValue Half1 = getHalfVector(HalfIdx1); SDValue Half2 = getHalfVector(HalfIdx2); SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); + if (UseConcat) { + SDValue Op0 = V; + SDValue Op1 = DAG.getUNDEF(HalfVT); + if (UndefLower) + std::swap(Op0, Op1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1); + } + unsigned Offset = UndefLower ? HalfNumElts : 0; return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, DAG.getIntPtrConstant(Offset, DL)); @@ -14877,9 +15187,8 @@ static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, // Upper half is undef and lower half is whole upper subvector. // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> - unsigned NumElts = VT.getVectorNumElements(); - unsigned HalfNumElts = NumElts / 2; - MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts); + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + unsigned HalfNumElts = HalfVT.getVectorNumElements(); if (!UndefLower && isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) { SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, @@ -15155,11 +15464,19 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( } static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, - unsigned &ShuffleImm, ArrayRef Mask) { + bool &ForceV1Zero, bool &ForceV2Zero, + unsigned &ShuffleImm, ArrayRef Mask, + const APInt &Zeroable) { int NumElts = VT.getVectorNumElements(); assert(VT.getScalarSizeInBits() == 64 && (NumElts == 2 || NumElts == 4 || NumElts == 8) && "Unexpected data type for VSHUFPD"); + assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) && + "Illegal shuffle mask"); + + bool ZeroLane[2] = { true, true }; + for (int i = 0; i < NumElts; ++i) + ZeroLane[i & 1] &= Zeroable[i]; // Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, .. // Mask for V4F64; 0/1, 4/5, 2/3, 6/7.. @@ -15167,7 +15484,7 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, bool ShufpdMask = true; bool CommutableMask = true; for (int i = 0; i < NumElts; ++i) { - if (Mask[i] == SM_SentinelUndef) + if (Mask[i] == SM_SentinelUndef || ZeroLane[i & 1]) continue; if (Mask[i] < 0) return false; @@ -15180,30 +15497,77 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, ShuffleImm |= (Mask[i] % 2) << i; } - if (ShufpdMask) - return true; - if (CommutableMask) { + if (!ShufpdMask && !CommutableMask) + return false; + + if (!ShufpdMask && CommutableMask) std::swap(V1, V2); - return true; - } - return false; + ForceV1Zero = ZeroLane[0]; + ForceV2Zero = ZeroLane[1]; + return true; } -static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { - assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&& +static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) && "Unexpected data type for VSHUFPD"); unsigned Immediate = 0; - if (!matchShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask)) + bool ForceV1Zero = false, ForceV2Zero = false; + if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate, + Mask, Zeroable)) return SDValue(); + // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs. + if (ForceV1Zero) + V1 = getZeroVector(VT, Subtarget, DAG, DL); + if (ForceV2Zero) + V2 = getZeroVector(VT, Subtarget, DAG, DL); + return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, - DAG.getConstant(Immediate, DL, MVT::i8)); + DAG.getTargetConstant(Immediate, DL, MVT::i8)); } +// Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed +// by zeroable elements in the remaining 24 elements. Turn this into two +// vmovqb instructions shuffled together. +static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + const APInt &Zeroable, + SelectionDAG &DAG) { + assert(VT == MVT::v32i8 && "Unexpected type!"); + + // The first 8 indices should be every 8th element. + if (!isSequentialOrUndefInRange(Mask, 0, 8, 0, 8)) + return SDValue(); + + // Remaining elements need to be zeroable. + if (Zeroable.countLeadingOnes() < (Mask.size() - 8)) + return SDValue(); + + V1 = DAG.getBitcast(MVT::v4i64, V1); + V2 = DAG.getBitcast(MVT::v4i64, V2); + + V1 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V1); + V2 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V2); + + // The VTRUNCs will put 0s in the upper 12 bytes. Use them to put zeroes in + // the upper bits of the result using an unpckldq. + SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, + { 0, 1, 2, 3, 16, 17, 18, 19, + 4, 5, 6, 7, 20, 21, 22, 23 }); + // Insert the unpckldq into a zero vector to widen to v32i8. + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8, + DAG.getConstant(0, DL, MVT::v32i8), Unpack, + DAG.getIntPtrConstant(0, DL)); +} + + /// Handle lowering of 4-lane 64-bit floating point shuffles. /// /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 @@ -15236,7 +15600,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, - DAG.getConstant(VPERMILPMask, DL, MVT::i8)); + DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); } // With AVX2 we have direct support for this permutation. @@ -15256,8 +15620,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, return V; // Otherwise, fall back. - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG, - Subtarget); + return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask, + DAG, Subtarget); } // Use dedicated unpack instructions for masks that match their pattern. @@ -15269,7 +15633,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, return Blend; // Check if the blend happens to exactly fit that of SHUFPD. - if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG)) + if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Op; // If we have one input in place, then we can permute the other input and @@ -15473,8 +15838,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1); // Otherwise, fall back. - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, - DAG, Subtarget); + return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask, + DAG, Subtarget); } // Try to simplify this by merging 128-bit lanes to enable a lane-based @@ -15681,8 +16046,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) return V; - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask, - DAG, Subtarget); + return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask, + DAG, Subtarget); } SmallVector RepeatedMask; @@ -15780,8 +16145,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) return V; - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG, - Subtarget); + return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask, + DAG, Subtarget); } if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2, @@ -15803,6 +16168,14 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) return V; + // Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed + // by zeroable elements in the remaining 24 elements. Turn this into two + // vmovqb instructions shuffled together. + if (Subtarget.hasVLX()) + if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2, + Mask, Zeroable, DAG)) + return V; + // Otherwise fall back on generic lowering. return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG); @@ -15974,7 +16347,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef Mask, } return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } /// Handle lowering of 8-lane 64-bit floating point shuffles. @@ -15999,7 +16372,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef Mask, ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) | ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1, - DAG.getConstant(VPERMILPMask, DL, MVT::i8)); + DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); } SmallVector RepeatedMask; @@ -16016,7 +16389,8 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef Mask, return Unpck; // Check if the blend happens to exactly fit that of SHUFPD. - if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG)) + if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Op; if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2, @@ -16389,6 +16763,49 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef Mask, } } +static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + // Shuffle should be unary. + if (!V2.isUndef()) + return SDValue(); + + int ShiftAmt = -1; + int NumElts = Mask.size(); + for (int i = 0; i != NumElts; ++i) { + int M = Mask[i]; + assert((M == SM_SentinelUndef || (0 <= M && M < NumElts)) && + "Unexpected mask index."); + if (M < 0) + continue; + + // The first non-undef element determines our shift amount. + if (ShiftAmt < 0) { + ShiftAmt = M - i; + // Need to be shifting right. + if (ShiftAmt <= 0) + return SDValue(); + } + // All non-undef elements must shift by the same amount. + if (ShiftAmt != M - i) + return SDValue(); + } + assert(ShiftAmt >= 0 && "All undef?"); + + // Great we found a shift right. + MVT WideVT = VT; + if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) + WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, + DAG.getUNDEF(WideVT), V1, + DAG.getIntPtrConstant(0, DL)); + Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res, + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); +} + // Determine if this shuffle can be implemented with a KSHIFT instruction. // Returns the shift amount if possible or -1 if not. This is a simplified // version of matchShuffleAsShift. @@ -16434,13 +16851,20 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/o basic ISA!"); - unsigned NumElts = Mask.size(); + int NumElts = Mask.size(); // Try to recognize shuffles that are just padding a subvector with zeros. - unsigned SubvecElts = 0; - for (int i = 0; i != (int)NumElts; ++i) { - if (Mask[i] >= 0 && Mask[i] != i) - break; + int SubvecElts = 0; + int Src = -1; + for (int i = 0; i != NumElts; ++i) { + if (Mask[i] >= 0) { + // Grab the source from the first valid mask. All subsequent elements need + // to use this same source. + if (Src < 0) + Src = Mask[i] / NumElts; + if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i) + break; + } ++SubvecElts; } @@ -16451,30 +16875,54 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, // Make sure the number of zeroable bits in the top at least covers the bits // not covered by the subvector. - if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { + if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { + assert(Src >= 0 && "Expected a source!"); MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts); SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, - V1, DAG.getIntPtrConstant(0, DL)); + Src == 0 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - getZeroVector(VT, Subtarget, DAG, DL), + DAG.getConstant(0, DL, VT), Extract, DAG.getIntPtrConstant(0, DL)); } + // Try a simple shift right with undef elements. Later we'll try with zeros. + if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget, + DAG)) + return Shift; + // Try to match KSHIFTs. - // TODO: Support narrower than legal shifts by widening and extracting. - if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) { - unsigned Offset = 0; - for (SDValue V : { V1, V2 }) { - unsigned Opcode; - int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); - if (ShiftAmt >= 0) - return DAG.getNode(Opcode, DL, VT, V, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); - Offset += NumElts; // Increment for next iteration. + unsigned Offset = 0; + for (SDValue V : { V1, V2 }) { + unsigned Opcode; + int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); + if (ShiftAmt >= 0) { + MVT WideVT = VT; + if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) + WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, + DAG.getUNDEF(WideVT), V, + DAG.getIntPtrConstant(0, DL)); + // Widened right shifts need two shifts to ensure we shift in zeroes. + if (Opcode == X86ISD::KSHIFTR && WideVT != VT) { + int WideElts = WideVT.getVectorNumElements(); + // Shift left to put the original vector in the MSBs of the new size. + Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res, + DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8)); + // Increase the shift amount to account for the left shift. + ShiftAmt += WideElts - NumElts; + } + + Res = DAG.getNode(Opcode, DL, WideVT, Res, + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); } + Offset += NumElts; // Increment for next iteration. } + MVT ExtVT; switch (VT.SimpleTy) { default: @@ -16594,7 +17042,7 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef Mask) { static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); - ArrayRef Mask = SVOp->getMask(); + ArrayRef OrigMask = SVOp->getMask(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); MVT VT = Op.getSimpleValueType(); @@ -16620,8 +17068,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // undef as well. This makes it easier to match the shuffle based solely on // the mask. if (V2IsUndef && - any_of(Mask, [NumElements](int M) { return M >= NumElements; })) { - SmallVector NewMask(Mask.begin(), Mask.end()); + any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { + SmallVector NewMask(OrigMask.begin(), OrigMask.end()); for (int &M : NewMask) if (M >= NumElements) M = -1; @@ -16629,15 +17077,16 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, } // Check for illegal shuffle mask element index values. - int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit; - assert(llvm::all_of(Mask, + int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); + (void)MaskUpperLimit; + assert(llvm::all_of(OrigMask, [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && "Out of bounds shuffle index"); // We actually see shuffles that are entirely re-arrangements of a set of // zero inputs. This mostly happens while decomposing complex shuffles into // simple ones. Directly lower these as a buildvector of zeros. - APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2); + APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2); if (Zeroable.isAllOnesValue()) return getZeroVector(VT, Subtarget, DAG, DL); @@ -16645,11 +17094,11 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // Create an alternative mask with info about zeroable elements. // Here we do not set undef elements as zeroable. - SmallVector ZeroableMask(Mask.begin(), Mask.end()); + SmallVector ZeroableMask(OrigMask.begin(), OrigMask.end()); if (V2IsZero) { assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!"); for (int i = 0; i != NumElements; ++i) - if (Mask[i] != SM_SentinelUndef && Zeroable[i]) + if (OrigMask[i] != SM_SentinelUndef && Zeroable[i]) ZeroableMask[i] = SM_SentinelZero; } @@ -16664,7 +17113,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // by obfuscating the operands with bitcasts. // TODO: Avoid lowering directly from this top-level function: make this // a query (canLowerAsBroadcast) and defer lowering to the type-based calls. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask, Subtarget, DAG)) return Broadcast; @@ -16700,8 +17149,11 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, } // Commute the shuffle if it will improve canonicalization. - if (canonicalizeShuffleMaskWithCommute(Mask)) - return DAG.getCommutedVectorShuffle(*SVOp); + SmallVector Mask(OrigMask.begin(), OrigMask.end()); + if (canonicalizeShuffleMaskWithCommute(Mask)) { + ShuffleVectorSDNode::commuteMask(Mask); + std::swap(V1, V2); + } if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget)) return V; @@ -16910,7 +17362,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, // Use kshiftr instruction to move to the lower element. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); @@ -17137,8 +17589,8 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || (Subtarget.hasAVX2() && EltVT == MVT::i32)) { SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); - N2 = DAG.getIntPtrConstant(1, dl); - return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2); + return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, + DAG.getTargetConstant(1, dl, MVT::i8)); } } @@ -17207,14 +17659,14 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // But if optimizing for size and there's a load folding opportunity, // generate insertps because blendps does not have a 32-bit memory // operand form. - N2 = DAG.getIntPtrConstant(1, dl); N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); - return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2); + return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, + DAG.getTargetConstant(1, dl, MVT::i8)); } - N2 = DAG.getIntPtrConstant(IdxVal << 4, dl); // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); - return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); + return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, + DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8)); } // PINSR* works with constant index. @@ -17300,7 +17752,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, // Shift to the LSB. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); @@ -17841,10 +18293,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, std::swap(Op0, Op1); APInt APIntShiftAmt; - if (isConstantSplat(Amt, APIntShiftAmt)) { + if (X86::isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); - return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, - Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, + Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); } return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, @@ -17970,6 +18422,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); + if (VT == MVT::f128) + return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT)); + if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) return Extract; @@ -18072,6 +18527,16 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, return Result; } +/// Horizontal vector math instructions may be slower than normal math with +/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch +/// implementation, and likely shuffle complexity of the alternate sequence. +static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool HasFastHOps = Subtarget.hasFastHorizontalOps(); + return !IsSingleSource || IsOptimizingSize || HasFastHOps; +} + /// 64-bit unsigned integer to double expansion. static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -18126,8 +18591,7 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; - if (Subtarget.hasSSE3()) { - // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'. + if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) { Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); @@ -18273,7 +18737,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // Low will be bitcasted right away, so do not bother bitcasting back to its // original type. Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, - VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); + VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); @@ -18281,7 +18745,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // High will be bitcasted right away, so do not bother bitcasting back to // its original type. High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, - VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); + VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); } else { SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT); // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; @@ -18329,16 +18793,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue N0 = Op.getOperand(0); SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); + MVT SrcVT = N0.getSimpleValueType(); + MVT DstVT = Op.getSimpleValueType(); + + if (DstVT == MVT::f128) + return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT)); - if (Op.getSimpleValueType().isVector()) + if (DstVT.isVector()) return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) return Extract; - MVT SrcVT = N0.getSimpleValueType(); - MVT DstVT = Op.getSimpleValueType(); - if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) { // Conversions from unsigned i32 to f32/f64 are legal, @@ -18346,6 +18812,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, return Op; } + // Promote i32 to i64 and use a signed conversion on 64-bit targets. + if (SrcVT == MVT::i32 && Subtarget.is64Bit()) { + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0); + return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0); + } + if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) return V; @@ -18579,7 +19051,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, // Custom legalize v8i8->v8i64 on CPUs without avx512bw. if (InVT == MVT::v8i8) { - if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) + if (VT != MVT::v8i64) return SDValue(); In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), @@ -18602,10 +19074,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. // Concat upper and lower parts. // - - MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); - + MVT HalfVT = VT.getHalfNumVectorElementsVT(); SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In); // Short-circuit if we can determine that each 128-bit half is the same value. @@ -18903,9 +19372,29 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); - // If called by the legalizer just return. - if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT)) + // If we're called by the type legalizer, handle a few cases. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(InVT)) { + if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) && + VT.is128BitVector()) { + assert(Subtarget.hasVLX() && "Unexpected subtarget!"); + // The default behavior is to truncate one step, concatenate, and then + // truncate the remainder. We'd rather produce two 64-bit results and + // concatenate those. + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(In, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + Lo = DAG.getNode(ISD::TRUNCATE, DL, LoVT, Lo); + Hi = DAG.getNode(ISD::TRUNCATE, DL, HiVT, Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + + // Otherwise let default legalization handle it. return SDValue(); + } if (VT.getVectorElementType() == MVT::i1) return LowerTruncateVecI1(Op, DAG, Subtarget); @@ -18940,6 +19429,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget)) return V; + // Handle truncation of V256 to V128 using shuffles. + assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); + if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget.hasInt256()) { @@ -19016,22 +19508,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi); } - // Handle truncation of V256 to V128 using shuffles. - assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); - - assert(Subtarget.hasAVX() && "256-bit vector without AVX!"); - - unsigned NumElems = VT.getVectorNumElements(); - MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2); - - SmallVector MaskVec(NumElems * 2, -1); - // Prepare truncation shuffle mask - for (unsigned i = 0; i != NumElems; ++i) - MaskVec[i] = i * 2; - In = DAG.getBitcast(NVT, In); - SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, - DAG.getIntPtrConstant(0, DL)); + llvm_unreachable("All 256->128 cases should have been handled above!"); } SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { @@ -19041,6 +19518,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { MVT SrcVT = Src.getSimpleValueType(); SDLoc dl(Op); + if (SrcVT == MVT::f128) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::FP_TO_SINT) + LC = RTLIB::getFPTOSINT(SrcVT, VT); + else + LC = RTLIB::getFPTOUINT(SrcVT, VT); + + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first; + } + if (VT.isVector()) { if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) { MVT ResVT = MVT::v4i32; @@ -19075,14 +19563,27 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT); - if (!IsSigned && Subtarget.hasAVX512()) { - // Conversions from f32/f64 should be legal. - if (UseSSEReg) + if (!IsSigned && UseSSEReg) { + // Conversions from f32/f64 with AVX512 should be legal. + if (Subtarget.hasAVX512()) return Op; - // Use default expansion. + // Use default expansion for i64. if (VT == MVT::i64) return SDValue(); + + assert(VT == MVT::i32 && "Unexpected VT!"); + + // Promote i32 to i64 and use a signed operation on 64-bit targets. + if (Subtarget.is64Bit()) { + SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + } + + // Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can + // use fisttp which will be handled later. + if (!Subtarget.hasSSE3()) + return SDValue(); } // Promote i16 to i32 if we can use a SSE operation. @@ -19103,12 +19604,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases."); } -static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); + if (VT == MVT::f128) { + RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT); + return LowerF128Call(Op, DAG, LC); + } + assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); return DAG.getNode(X86ISD::VFPEXT, DL, VT, @@ -19116,14 +19622,31 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { In, DAG.getUNDEF(SVT))); } -/// Horizontal vector math instructions may be slower than normal math with -/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch -/// implementation, and likely shuffle complexity of the alternate sequence. -static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize(); - bool HasFastHOps = Subtarget.hasFastHorizontalOps(); - return !IsSingleSource || IsOptimizingSize || HasFastHOps; +SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + SDValue In = Op.getOperand(0); + MVT SVT = In.getSimpleValueType(); + + // It's legal except when f128 is involved + if (SVT != MVT::f128) + return Op; + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, VT); + + // FP_ROUND node has a second operand indicating whether it is known to be + // precise. That doesn't take part in the LibCall so we can't directly use + // LowerF128Call. + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first; +} + +// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking +// the default expansion of STRICT_FP_ROUND. +static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) { + // FIXME: Need to form a libcall with an input chain for f128. + assert(Op.getOperand(0).getValueType() != MVT::f128 && + "Don't know how to handle f128 yet!"); + return Op; } /// Depending on uarch and/or optimizing for size, we might prefer to use a @@ -19200,8 +19723,13 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, /// Depending on uarch and/or optimizing for size, we might prefer to use a /// vector operation in place of the typical scalar operation. -static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() == MVT::f128) { + RTLIB::Libcall LC = Op.getOpcode() == ISD::FADD ? RTLIB::ADD_F128 + : RTLIB::SUB_F128; + return LowerF128Call(Op, DAG, LC); + } + assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && "Only expecting float/double"); return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); @@ -19358,13 +19886,13 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl, SelectionDAG &DAG) { return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(Cond, dl, MVT::i8), EFLAGS); + DAG.getTargetConstant(Cond, dl, MVT::i8), EFLAGS); } /// Helper for matching OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1),...)) /// style scalarized (associative) reduction patterns. -static bool matchBitOpReduction(SDValue Op, ISD::NodeType BinOp, - SmallVectorImpl &SrcOps) { +static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp, + SmallVectorImpl &SrcOps) { SmallVector Opnds; DenseMap SrcOpMap; EVT VT = MVT::Other; @@ -19437,7 +19965,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC, return SDValue(); SmallVector VecIns; - if (!matchBitOpReduction(Op, ISD::OR, VecIns)) + if (!matchScalarReduction(Op, ISD::OR, VecIns)) return SDValue(); // Quit if not 128/256-bit vector. @@ -19461,8 +19989,8 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC, VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS)); } - X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, DL, - MVT::i8); + X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, + DL, MVT::i8); return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back()); } @@ -19576,6 +20104,13 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, case X86ISD::XOR: case X86ISD::AND: return SDValue(Op.getNode(), 1); + case ISD::SSUBO: + case ISD::USUBO: { + // /USUBO/SSUBO will become a X86ISD::SUB and we can use its Z flag. + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), + Op->getOperand(1)).getValue(1); + } default: default_case: break; @@ -19766,6 +20301,63 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; } +SDValue +X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + + assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) && + "Unexpected divisor!"); + + // Only perform this transform if CMOV is supported otherwise the select + // below will become a branch. + if (!Subtarget.hasCMov()) + return SDValue(); + + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + // FIXME: Support i8. + if (VT != MVT::i16 && VT != MVT::i32 && + !(Subtarget.is64Bit() && VT == MVT::i64)) + return SDValue(); + + unsigned Lg2 = Divisor.countTrailingZeros(); + + // If the divisor is 2 or -2, the default expansion is better. + if (Lg2 == 1) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue Zero = DAG.getConstant(0, DL, VT); + APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2); + SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT); + + // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right. + SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CMov.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); +} + /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, @@ -19842,8 +20434,8 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, if (Src.getValueType() != BitNo.getValueType()) BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); - X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, - dl, MVT::i8); + X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, + dl, MVT::i8); return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); } @@ -19935,13 +20527,6 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { ISD::CondCode SetCCOpcode = cast(CC)->get(); - // If this is a seteq make sure any build vectors of all zeros are on the RHS. - // This helps with vptestm matching. - // TODO: Should we just canonicalize the setcc during DAG combine? - if ((SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE) && - ISD::isBuildVectorAllZeros(Op0.getNode())) - std::swap(Op0, Op1); - // Prefer SETGT over SETLT. if (SetCCOpcode == ISD::SETLT) { SetCCOpcode = ISD::getSetCCSwappedOperands(SetCCOpcode); @@ -20007,7 +20592,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, // Only do this pre-AVX since vpcmp* is no longer destructive. if (Subtarget.hasAVX()) return SDValue(); - SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false); + SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false); if (!ULEOp1) return SDValue(); Op1 = ULEOp1; @@ -20018,7 +20603,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, // This is beneficial because materializing a constant 0 for the PCMPEQ is // probably cheaper than XOR+PCMPGT using 2 different vector constants: // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0 - SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true); + SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true); if (!UGEOp1) return SDValue(); Op1 = Op0; @@ -20086,14 +20671,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC0, dl, MVT::i8)); + DAG.getTargetConstant(CC0, dl, MVT::i8)); SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC1, dl, MVT::i8)); + DAG.getTargetConstant(CC1, dl, MVT::i8)); Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } else { // Handle all other FP comparisons here. Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(SSECC, dl, MVT::i8)); + DAG.getTargetConstant(SSECC, dl, MVT::i8)); } // If this is SSE/AVX CMPP, bitcast the result back to integer to match the @@ -20106,16 +20691,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } MVT VTOp0 = Op0.getSimpleValueType(); + (void)VTOp0; assert(VTOp0 == Op1.getSimpleValueType() && "Expected operands with same type!"); assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && "Invalid number of packed elements for source and destination!"); - // This is being called by type legalization because v2i32 is marked custom - // for result type legalization for v2f32. - if (VTOp0 == MVT::v2i32) - return SDValue(); - // The non-AVX512 code below works under the assumption that source and // destination types are the same. assert((Subtarget.hasAVX512() || (VT == VTOp0)) && @@ -20153,7 +20734,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CmpMode, dl, MVT::i8)); + DAG.getTargetConstant(CmpMode, dl, MVT::i8)); } // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2. @@ -20222,21 +20803,19 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, TLI.isOperationLegal(ISD::UMIN, VT)) { // If we have a constant operand, increment/decrement it and change the // condition to avoid an invert. - if (Cond == ISD::SETUGT && - ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) { - return !C->getAPIntValue().isMaxValue(); - })) { + if (Cond == ISD::SETUGT) { // X > C --> X >= (C+1) --> X == umax(X, C+1) - Op1 = DAG.getNode(ISD::ADD, dl, VT, Op1, DAG.getConstant(1, dl, VT)); - Cond = ISD::SETUGE; + if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true)) { + Op1 = UGTOp1; + Cond = ISD::SETUGE; + } } - if (Cond == ISD::SETULT && - ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) { - return !C->getAPIntValue().isNullValue(); - })) { + if (Cond == ISD::SETULT) { // X < C --> X <= (C-1) --> X == umin(X, C-1) - Op1 = DAG.getNode(ISD::SUB, dl, VT, Op1, DAG.getConstant(1, dl, VT)); - Cond = ISD::SETULE; + if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false)) { + Op1 = ULTOp1; + Cond = ISD::SETULE; + } } bool Invert = false; unsigned Opc; @@ -20360,11 +20939,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, return Result; } -// Try to select this as a KORTEST+SETCC if possible. -static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, - const SDLoc &dl, SelectionDAG &DAG, - const X86Subtarget &Subtarget, - SDValue &X86CC) { +// Try to select this as a KORTEST+SETCC or KTEST+SETCC if possible. +static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC, + const SDLoc &dl, SelectionDAG &DAG, + const X86Subtarget &Subtarget, + SDValue &X86CC) { // Only support equality comparisons. if (CC != ISD::SETEQ && CC != ISD::SETNE) return SDValue(); @@ -20389,6 +20968,21 @@ static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, } else return SDValue(); + // If the input is an AND, we can combine it's operands into the KTEST. + bool KTestable = false; + if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) + KTestable = true; + if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)) + KTestable = true; + if (!isNullConstant(Op1)) + KTestable = false; + if (KTestable && Op0.getOpcode() == ISD::AND && Op0.hasOneUse()) { + SDValue LHS = Op0.getOperand(0); + SDValue RHS = Op0.getOperand(1); + X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); + return DAG.getNode(X86ISD::KTEST, dl, MVT::i32, LHS, RHS); + } + // If the input is an OR, we can combine it's operands into the KORTEST. SDValue LHS = Op0; SDValue RHS = Op0; @@ -20397,7 +20991,7 @@ static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, RHS = Op0.getOperand(1); } - X86CC = DAG.getConstant(X86Cond, dl, MVT::i8); + X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); } @@ -20425,9 +21019,9 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, return PTEST; } - // Try to lower using KORTEST. - if (SDValue KORTEST = EmitKORTEST(Op0, Op1, CC, dl, DAG, Subtarget, X86CC)) - return KORTEST; + // Try to lower using KORTEST or KTEST. + if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC)) + return Test; // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. @@ -20442,7 +21036,7 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, if (Invert) { X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); - X86CC = DAG.getConstant(CCode, dl, MVT::i8); + X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8); } return Op0.getOperand(1); @@ -20456,7 +21050,7 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); - X86CC = DAG.getConstant(CondCode, dl, MVT::i8); + X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); return EFLAGS; } @@ -20472,6 +21066,19 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); ISD::CondCode CC = cast(Op.getOperand(2))->get(); + // Handle f128 first, since one possible outcome is a normal integer + // comparison which gets handled by emitFlagsForSetcc. + if (Op0.getValueType() == MVT::f128) { + softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1); + + // If softenSetCCOperands returned a scalar, use it. + if (!Op1.getNode()) { + assert(Op0.getValueType() == Op.getValueType() && + "Unexpected setcc expansion!"); + return Op0; + } + } + SDValue X86CC; SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC); if (!EFLAGS) @@ -20612,15 +21219,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); if (Subtarget.hasAVX512()) { - SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, - CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); + SDValue Cmp = + DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, + DAG.getTargetConstant(SSECC, DL, MVT::i8)); assert(!VT.isVector() && "Not a scalar type?"); return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); } if (SSECC < 8 || Subtarget.hasAVX()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, - DAG.getConstant(SSECC, DL, MVT::i8)); + DAG.getTargetConstant(SSECC, DL, MVT::i8)); // If we have AVX, we can use a variable vector select (VBLENDV) instead // of 3 logic instructions for size savings and potentially speed. @@ -20718,8 +21326,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond.getOperand(1).getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); - unsigned CondCode = - cast(Cond.getOperand(0))->getZExtValue(); + unsigned CondCode = Cond.getConstantOperandVal(0); if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { @@ -20807,8 +21414,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); - MVT VT = Op.getSimpleValueType(); - bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && !isScalarFPTypeInSSEReg(VT)) // FPStack? @@ -20826,7 +21431,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { X86::CondCode X86Cond; std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); - CC = DAG.getConstant(X86Cond, DL, MVT::i8); + CC = DAG.getTargetConstant(X86Cond, DL, MVT::i8); AddTest = false; } @@ -20848,7 +21453,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (AddTest) { - CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8); Cond = EmitCmp(Cond, DAG.getConstant(0, DL, Cond.getValueType()), X86::COND_NE, DL, DAG); } @@ -20864,9 +21469,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (isNullConstant(Op1) || isNullConstant(Op2))) { - SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, DL, MVT::i8), - Cond); + SDValue Res = + DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cond); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) return DAG.getNOT(DL, Res, Res.getValueType()); return Res; @@ -21037,8 +21642,8 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, // pre-AVX2 256-bit extensions need to be split into 128-bit instructions. if (Subtarget.hasAVX()) { assert(VT.is256BitVector() && "256-bit vector expected"); - int HalfNumElts = NumElts / 2; - MVT HalfVT = MVT::getVectorVT(SVT, HalfNumElts); + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + int HalfNumElts = HalfVT.getVectorNumElements(); unsigned NumSrcElts = InVT.getVectorNumElements(); SmallVector HiMask(NumSrcElts, SM_SentinelUndef); @@ -21081,7 +21686,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr, - DAG.getConstant(SignExtShift, dl, MVT::i8)); + DAG.getTargetConstant(SignExtShift, dl, MVT::i8)); } if (VT == MVT::v2i64) { @@ -21119,7 +21724,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, // Custom legalize v8i8->v8i64 on CPUs without avx512bw. if (InVT == MVT::v8i8) { - if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) + if (VT != MVT::v8i64) return SDValue(); In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), @@ -21138,10 +21743,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, // for v4i32 the high shuffle mask will be {2, 3, -1, -1} // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 // concat the vectors to original VT - - MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); - + MVT HalfVT = VT.getHalfNumVectorElementsVT(); SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In); unsigned NumElems = InVT.getVectorNumElements(); @@ -21165,7 +21767,7 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) { // Splitting volatile memory ops is not allowed unless the operation was not // legal to begin with. We are assuming the input op is legal (this transform // is only used for targets with AVX). - if (Store->isVolatile()) + if (!Store->isSimple()) return SDValue(); MVT StoreVT = StoredVal.getSimpleValueType(); @@ -21201,7 +21803,7 @@ static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, // Splitting volatile memory ops is not allowed unless the operation was not // legal to begin with. We are assuming the input op is legal (this transform // is only used for targets with AVX). - if (Store->isVolatile()) + if (!Store->isSimple()) return SDValue(); MVT StoreSVT = StoreVT.getScalarType(); @@ -21266,14 +21868,13 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, return SDValue(); } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && "Unexpected VT"); - if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) != - TargetLowering::TypeWidenVector) - return SDValue(); + assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) == + TargetLowering::TypeWidenVector && "Unexpected type action!"); - MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(), - StoreVT.getVectorNumElements() * 2); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT); StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, DAG.getUNDEF(StoreVT)); @@ -21313,11 +21914,10 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, LoadSDNode *Ld = cast(Op.getNode()); SDLoc dl(Ld); - EVT MemVT = Ld->getMemoryVT(); // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads. if (RegVT.getVectorElementType() == MVT::i1) { - assert(EVT(RegVT) == MemVT && "Expected non-extending load"); + assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load"); assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && "Expected AVX512F without AVX512DQI"); @@ -21336,176 +21936,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl); } - // Nothing useful we can do without SSE2 shuffles. - assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2."); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned RegSz = RegVT.getSizeInBits(); - - ISD::LoadExtType Ext = Ld->getExtensionType(); - - assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD) - && "Only anyext and sext are currently implemented."); - assert(MemVT != RegVT && "Cannot extend to the same type"); - assert(MemVT.isVector() && "Must load a vector from memory"); - - unsigned NumElems = RegVT.getVectorNumElements(); - unsigned MemSz = MemVT.getSizeInBits(); - assert(RegSz > MemSz && "Register size must be greater than the mem size"); - - if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) { - // The only way in which we have a legal 256-bit vector result but not the - // integer 256-bit operations needed to directly lower a sextload is if we - // have AVX1 but not AVX2. In that case, we can always emit a sextload to - // a 128-bit vector and a normal sign_extend to 256-bits that should get - // correctly legalized. We do this late to allow the canonical form of - // sextload to persist throughout the rest of the DAG combiner -- it wants - // to fold together any extensions it can, and so will fuse a sign_extend - // of an sextload into a sextload targeting a wider value. - SDValue Load; - if (MemSz == 128) { - // Just switch this to a normal load. - assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, " - "it must be a legal 128-bit vector " - "type!"); - Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); - } else { - assert(MemSz < 128 && - "Can't extend a type wider than 128 bits to a 256 bit vector!"); - // Do an sext load to a 128-bit vector type. We want to use the same - // number of elements, but elements half as wide. This will end up being - // recursively lowered by this routine, but will succeed as we definitely - // have all the necessary features if we're using AVX1. - EVT HalfEltVT = - EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2); - EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems); - Load = - DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), MemVT, Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); - } - - // Replace chain users with the new chain. - assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); - - // Finally, do a normal sign-extend to the desired register. - SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT); - return DAG.getMergeValues({SExt, Load.getValue(1)}, dl); - } - - // All sizes must be a power of two. - assert(isPowerOf2_32(RegSz * MemSz * NumElems) && - "Non-power-of-two elements are not custom lowered!"); - - // Attempt to load the original value using scalar loads. - // Find the largest scalar type that divides the total loaded size. - MVT SclrLoadTy = MVT::i8; - for (MVT Tp : MVT::integer_valuetypes()) { - if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) { - SclrLoadTy = Tp; - } - } - - // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. - if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 && - (64 <= MemSz)) - SclrLoadTy = MVT::f64; - - // Calculate the number of scalar loads that we need to perform - // in order to load our vector from memory. - unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits(); - - assert((Ext != ISD::SEXTLOAD || NumLoads == 1) && - "Can only lower sext loads with a single scalar load!"); - - unsigned loadRegSize = RegSz; - if (Ext == ISD::SEXTLOAD && RegSz >= 256) - loadRegSize = 128; - - // If we don't have BWI we won't be able to create the shuffle needed for - // v8i8->v8i64. - if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && - MemVT == MVT::v8i8) - loadRegSize = 128; - - // Represent our vector as a sequence of elements which are the - // largest scalar that we can load. - EVT LoadUnitVecVT = EVT::getVectorVT( - *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits()); - - // Represent the data using the same element type that is stored in - // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - loadRegSize / MemVT.getScalarSizeInBits()); - - assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && - "Invalid vector type"); - - // We can't shuffle using an illegal type. - assert(TLI.isTypeLegal(WideVecVT) && - "We only lower types that form legal widened vector types"); - - SmallVector Chains; - SDValue Ptr = Ld->getBasePtr(); - unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8; - SDValue Increment = DAG.getConstant(OffsetInc, dl, - TLI.getPointerTy(DAG.getDataLayout())); - SDValue Res = DAG.getUNDEF(LoadUnitVecVT); - - unsigned Offset = 0; - for (unsigned i = 0; i < NumLoads; ++i) { - unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset); - - // Perform a single load. - SDValue ScalarLoad = - DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, - Ld->getPointerInfo().getWithOffset(Offset), - NewAlign, Ld->getMemOperand()->getFlags()); - Chains.push_back(ScalarLoad.getValue(1)); - // Create the first element type using SCALAR_TO_VECTOR in order to avoid - // another round of DAGCombining. - if (i == 0) - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad); - else - Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res, - ScalarLoad, DAG.getIntPtrConstant(i, dl)); - - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - Offset += OffsetInc; - } - - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); - - // Bitcast the loaded value to a vector of the original element type, in - // the size of the target vector type. - SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res); - unsigned SizeRatio = RegSz / MemSz; - - if (Ext == ISD::SEXTLOAD) { - SDValue Sext = getExtendInVec(ISD::SIGN_EXTEND, dl, RegVT, SlicedVec, DAG); - return DAG.getMergeValues({Sext, TF}, dl); - } - - if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && - MemVT == MVT::v8i8) { - SDValue Sext = getExtendInVec(ISD::ZERO_EXTEND, dl, RegVT, SlicedVec, DAG); - return DAG.getMergeValues({Sext, TF}, dl); - } - - // Redistribute the loaded elements into the different locations. - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i * SizeRatio] = i; - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, - DAG.getUNDEF(WideVecVT), ShuffleVec); - - // Bitcast to the requested type. - Shuff = DAG.getBitcast(RegVT, Shuff); - return DAG.getMergeValues({Shuff, TF}, dl); + return SDValue(); } /// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes @@ -21610,7 +22041,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (Inverted) X86Cond = X86::GetOppositeBranchCondition(X86Cond); - CC = DAG.getConstant(X86Cond, dl, MVT::i8); + CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); addTest = false; } else { unsigned CondOpc; @@ -21638,10 +22069,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp) && Op.getNode()->hasOneUse()) { - X86::CondCode CCode = - (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); - CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, dl, MVT::i8); + X86::CondCode CCode0 = + (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); + CCode0 = X86::GetOppositeBranchCondition(CCode0); + CC = DAG.getTargetConstant(CCode0, dl, MVT::i8); SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order @@ -21654,12 +22085,12 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { (void)NewBR; Dest = FalseBB; - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); - X86::CondCode CCode = - (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); - CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, dl, MVT::i8); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, + Dest, CC, Cmp); + X86::CondCode CCode1 = + (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); + CCode1 = X86::GetOppositeBranchCondition(CCode1); + CC = DAG.getTargetConstant(CCode1, dl, MVT::i8); Cond = Cmp; addTest = false; } @@ -21672,7 +22103,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, dl, MVT::i8); + CC = DAG.getTargetConstant(CCode, dl, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; } else if (Cond.getOpcode() == ISD::SETCC && @@ -21698,10 +22129,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); - CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; } @@ -21714,10 +22145,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); - CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; } @@ -21742,7 +22173,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (addTest) { X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE; - CC = DAG.getConstant(X86Cond, dl, MVT::i8); + CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()), X86Cond, dl, DAG); } @@ -21770,7 +22201,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDNode *Node = Op.getNode(); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); - unsigned Align = cast(Op.getOperand(2))->getZExtValue(); + unsigned Align = Op.getConstantOperandVal(2); EVT VT = Node->getValueType(0); // Chain the dynamic stack allocation so that it doesn't modify the stack @@ -21811,7 +22242,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, } const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); - unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); + Register Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); @@ -21821,7 +22252,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, MF.getInfo()->setHasWinAlloca(true); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - unsigned SPReg = RegInfo->getStackRegister(); + Register SPReg = RegInfo->getStackRegister(); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); Chain = SP.getValue(1); @@ -22076,7 +22507,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, } return DAG.getNode(Opc, dl, VT, SrcOp, - DAG.getConstant(ShiftAmt, dl, MVT::i8)); + DAG.getTargetConstant(ShiftAmt, dl, MVT::i8)); } /// Handle vector element shifts where the shift amount may or may not be a @@ -22121,7 +22552,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64, ShAmt); else { - SDValue ByteShift = DAG.getConstant( + SDValue ByteShift = DAG.getTargetConstant( (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, @@ -22308,13 +22739,21 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // Helper to detect if the operand is CUR_DIRECTION rounding mode. auto isRoundModeCurDirection = [](SDValue Rnd) { if (auto *C = dyn_cast(Rnd)) - return C->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION; + return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION; return false; }; auto isRoundModeSAE = [](SDValue Rnd) { - if (auto *C = dyn_cast(Rnd)) - return C->getZExtValue() == X86::STATIC_ROUNDING::NO_EXC; + if (auto *C = dyn_cast(Rnd)) { + unsigned RC = C->getZExtValue(); + if (RC & X86::STATIC_ROUNDING::NO_EXC) { + // Clear the NO_EXC bit and check remaining bits. + RC ^= X86::STATIC_ROUNDING::NO_EXC; + // As a convenience we allow no other bits or explicitly + // current direction. + return RC == 0 || RC == X86::STATIC_ROUNDING::CUR_DIRECTION; + } + } return false; }; @@ -22335,7 +22774,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, }; SDLoc dl(Op); - unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); MVT VT = Op.getSimpleValueType(); const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); if (IntrData) { @@ -22411,9 +22850,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); - if (IntrData->Type == INTR_TYPE_3OP_IMM8) - Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); - // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -22666,7 +23102,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case CMP_MASK_CC: { MVT MaskVT = Op.getSimpleValueType(); SDValue CC = Op.getOperand(3); - CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -22685,7 +23120,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); - SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3)); + SDValue CC = Op.getOperand(3); SDValue Mask = Op.getOperand(4); SDValue Cmp; @@ -22750,16 +23185,16 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case COMI_RM: { // Comparison intrinsics with Sae SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - unsigned CondVal = cast(Op.getOperand(3))->getZExtValue(); + unsigned CondVal = Op.getConstantOperandVal(3); SDValue Sae = Op.getOperand(4); SDValue FCmp; if (isRoundModeCurDirection(Sae)) FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8)); + DAG.getTargetConstant(CondVal, dl, MVT::i8)); else if (isRoundModeSAE(Sae)) FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8), Sae); + DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae); else return SDValue(); // Need to fill with zeros to ensure the bitcast will produce zeroes @@ -22819,9 +23254,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. - SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, - Op.getOperand(2), - DAG.getConstant(0xf, dl, MVT::i32)); + auto Round = cast(Op.getOperand(2)); + SDValue RoundingMode = + DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), RoundingMode); } @@ -22829,12 +23264,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. - SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, - Op.getOperand(3), - DAG.getConstant(0xf, dl, MVT::i32)); + auto Round = cast(Op.getOperand(3)); + SDValue RoundingMode = + DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), RoundingMode); } + case BEXTRI: { + assert(IntrData->Opc0 == X86ISD::BEXTR && "Unexpected opcode"); + + // The control is a TargetConstant, but we need to convert it to a + // ConstantSDNode. + uint64_t Imm = Op.getConstantOperandVal(2); + SDValue Control = DAG.getConstant(Imm, dl, Op.getValueType()); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), + Op.getOperand(1), Control); + } // ADC/ADCX/SBB case ADX: { SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); @@ -23165,6 +23610,61 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, MaskVT, Operation); return DAG.getMergeValues({Result0, Result1}, DL); } + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDLoc DL(Op); + SDValue ShAmt = Op.getOperand(2); + // If the argument is a constant, convert it to a target constant. + if (auto *C = dyn_cast(ShAmt)) { + ShAmt = DAG.getTargetConstant(C->getZExtValue(), DL, MVT::i32); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1), ShAmt); + } + + unsigned NewIntrinsic; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_mmx_pslli_w: + NewIntrinsic = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntrinsic = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntrinsic = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntrinsic = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntrinsic = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntrinsic = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntrinsic = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntrinsic = Intrinsic::x86_mmx_psra_d; + break; + } + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Copy the 32 bits to an + // MMX register. + ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), + DAG.getConstant(NewIntrinsic, DL, MVT::i32), + Op.getOperand(1), ShAmt); + + } } } @@ -23177,7 +23677,9 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, // Scale must be constant. if (!C) return SDValue(); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, + TLI.getPointerTy(DAG.getDataLayout())); EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger(); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); // If source is undef or we know it won't be used, use a zero vector @@ -23204,7 +23706,9 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, // Scale must be constant. if (!C) return SDValue(); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, + TLI.getPointerTy(DAG.getDataLayout())); unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), VT.getVectorNumElements()); MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); @@ -23238,7 +23742,9 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, // Scale must be constant. if (!C) return SDValue(); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, + TLI.getPointerTy(DAG.getDataLayout())); unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), Src.getSimpleValueType().getVectorNumElements()); MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); @@ -23266,7 +23772,9 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, // Scale must be constant. if (!C) return SDValue(); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, + TLI.getPointerTy(DAG.getDataLayout())); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); MVT MaskVT = @@ -23435,8 +23943,7 @@ EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); - + unsigned IntNo = Op.getConstantOperandVal(1); const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { switch (IntNo) { @@ -23538,10 +24045,10 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. // Otherwise return the value from Rand, which is always 0, casted to i32. - SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), - DAG.getConstant(1, dl, Op->getValueType(1)), - DAG.getConstant(X86::COND_B, dl, MVT::i8), - SDValue(Result.getNode(), 1) }; + SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), + DAG.getConstant(1, dl, Op->getValueType(1)), + DAG.getTargetConstant(X86::COND_B, dl, MVT::i8), + SDValue(Result.getNode(), 1)}; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); // Return { result, isValid, chain }. @@ -23581,8 +24088,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, Scale, Chain, Subtarget); } case PREFETCH: { - SDValue Hint = Op.getOperand(6); - unsigned HintVal = cast(Hint)->getZExtValue(); + const APInt &HintVal = Op.getConstantOperandAPInt(6); assert((HintVal == 2 || HintVal == 3) && "Wrong prefetch hint in intrinsic: should be 2 or 3"); unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); @@ -23678,7 +24184,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -23730,7 +24236,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); SDLoc dl(Op); // FIXME probably not meaningful - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); assert(((FrameReg == X86::RBP && VT == MVT::i64) || (FrameReg == X86::EBP && VT == MVT::i32)) && "Invalid Frame Register!"); @@ -23743,12 +24249,11 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); - const MachineFunction &MF = DAG.getMachineFunction(); - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("esp", X86::ESP) .Case("rsp", X86::RSP) .Case("ebp", X86::EBP) @@ -23762,8 +24267,7 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, #ifndef NDEBUG else { const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - unsigned FrameReg = - RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); + Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF); assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && "Invalid Frame Register!"); } @@ -23809,7 +24313,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); + Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || (FrameReg == X86::EBP && PtrVT == MVT::i32)) && "Invalid Frame Register!"); @@ -23967,6 +24471,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::Fast: + case CallingConv::Tail: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td NestReg = X86::EAX; @@ -24279,12 +24784,9 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget, if (Opc == ISD::CTLZ) { // If src is zero (i.e. bsr sets ZF), returns NumBits. - SDValue Ops[] = { - Op, - DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), - DAG.getConstant(X86::COND_E, dl, MVT::i8), - Op.getValue(1) - }; + SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), + DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), + Op.getValue(1)}; Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); } @@ -24312,12 +24814,9 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget, Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0); // If src is zero (i.e. bsf sets ZF), returns NumBits. - SDValue Ops[] = { - Op, - DAG.getConstant(NumBits, dl, VT), - DAG.getConstant(X86::COND_E, dl, MVT::i8), - Op.getValue(1) - }; + SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT), + DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), + Op.getValue(1)}; return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); } @@ -24453,7 +24952,7 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, SDValue N0 = Op.getOperand(0); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), DAG.getConstant(0, DL, VT), N0); - SDValue Ops[] = {N0, Neg, DAG.getConstant(X86::COND_GE, DL, MVT::i8), + SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_GE, DL, MVT::i8), SDValue(Neg.getNode(), 1)}; return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); } @@ -25033,7 +25532,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Optimize shl/srl/sra with constant shift amount. APInt APIntShiftAmt; - if (!isConstantSplat(Amt, APIntShiftAmt)) + if (!X86::isConstantSplat(Amt, APIntShiftAmt)) return SDValue(); // If the shift amount is out of range, return undef. @@ -25220,7 +25719,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, } ConstantSDNode *ND = cast(Op); - APInt C(SVTBits, ND->getAPIntValue().getZExtValue()); + APInt C(SVTBits, ND->getZExtValue()); uint64_t ShAmt = C.getZExtValue(); if (ShAmt >= SVTBits) { Elts.push_back(DAG.getUNDEF(SVT)); @@ -25502,7 +26001,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, (VT == MVT::v32i8 && Subtarget.hasInt256())) && !Subtarget.hasXOP()) { int NumElts = VT.getVectorNumElements(); - SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8); + SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8); // Extend constant shift amount to vXi16 (it doesn't matter if the type // isn't legal). @@ -25774,7 +26273,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); return DAG.getNode(Op, DL, VT, R, - DAG.getConstant(RotateAmt, DL, MVT::i8)); + DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); } // Else, fall-back on VPROLV/VPRORV. @@ -25795,7 +26294,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, if (0 <= CstSplatIndex) { uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); return DAG.getNode(X86ISD::VROTLI, DL, VT, R, - DAG.getConstant(RotateAmt, DL, MVT::i8)); + DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); } // Use general rotate by variable (per-element). @@ -26032,7 +26531,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // If this is a canonical idempotent atomicrmw w/no uses, we have a better // lowering available in lowerAtomicArith. - // TODO: push more cases through this path. + // TODO: push more cases through this path. if (auto *C = dyn_cast(AI->getValOperand())) if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && AI->use_empty()) @@ -26087,10 +26586,22 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { return Loaded; } +bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + if (!SI.isUnordered()) + return false; + return ExperimentalUnorderedISEL; +} +bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + if (!LI.isUnordered()) + return false; + return ExperimentalUnorderedISEL; +} + + /// Emit a locked operation on a stack location which does not change any /// memory location, but does involve a lock prefix. Location is chosen to be /// a) very likely accessed only by a single thread to minimize cache traffic, -/// and b) definitely dereferenceable. Returns the new Chain result. +/// and b) definitely dereferenceable. Returns the new Chain result. static SDValue emitLockedStackOp(SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue Chain, SDLoc DL) { @@ -26099,22 +26610,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG, // operations issued by the current processor. As such, the location // referenced is not relevant for the ordering properties of the instruction. // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual, - // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions + // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions // 2) Using an immediate operand appears to be the best encoding choice // here since it doesn't require an extra register. // 3) OR appears to be very slightly faster than ADD. (Though, the difference // is small enough it might just be measurement noise.) // 4) When choosing offsets, there are several contributing factors: // a) If there's no redzone, we default to TOS. (We could allocate a cache - // line aligned stack object to improve this case.) + // line aligned stack object to improve this case.) // b) To minimize our chances of introducing a false dependence, we prefer - // to offset the stack usage from TOS slightly. + // to offset the stack usage from TOS slightly. // c) To minimize concerns about cross thread stack usage - in particular, // the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which // captures state in the TOS frame and accesses it from many threads - // we want to use an offset such that the offset is in a distinct cache // line from the TOS frame. - // + // // For a general discussion of the tradeoffs and benchmark results, see: // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ @@ -26155,10 +26666,10 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); - AtomicOrdering FenceOrdering = static_cast( - cast(Op.getOperand(1))->getZExtValue()); - SyncScope::ID FenceSSID = static_cast( - cast(Op.getOperand(2))->getZExtValue()); + AtomicOrdering FenceOrdering = + static_cast(Op.getConstantOperandVal(1)); + SyncScope::ID FenceSSID = + static_cast(Op.getConstantOperandVal(2)); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. @@ -26167,7 +26678,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); - SDValue Chain = Op.getOperand(0); + SDValue Chain = Op.getOperand(0); return emitLockedStackOp(DAG, Subtarget, Chain, dl); } @@ -26218,6 +26729,17 @@ static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT InVT = V.getSimpleValueType(); + if (InVT == MVT::v64i8) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(V, DL); + Lo = getPMOVMSKB(DL, Lo, DAG, Subtarget); + Hi = getPMOVMSKB(DL, Hi, DAG, Subtarget); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); + Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, + DAG.getConstant(32, DL, MVT::i8)); + return DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); + } if (InVT == MVT::v32i8 && !Subtarget.hasInt256()) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(V, DL); @@ -26258,8 +26780,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, SDLoc dl(Op); SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl); - EVT CastVT = MVT::getVectorVT(DstVT.getVectorElementType(), - DstVT.getVectorNumElements() / 2); + MVT CastVT = DstVT.getHalfNumVectorElementsVT(); Lo = DAG.getBitcast(CastVT, Lo); Hi = DAG.getBitcast(CastVT, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi); @@ -26275,53 +26796,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, return DAG.getZExtOrTrunc(V, DL, DstVT); } - if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || - SrcVT == MVT::i64) { - assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); - if (DstVT != MVT::f64 && DstVT != MVT::i64 && - !(DstVT == MVT::x86mmx && SrcVT.isVector())) - // This conversion needs to be expanded. - return SDValue(); + assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || + SrcVT == MVT::i64) && "Unexpected VT!"); - SDLoc dl(Op); - if (SrcVT.isVector()) { - // Widen the vector in input in the case of MVT::v2i32. - // Example: from MVT::v2i32 to MVT::v4i32. - MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), - SrcVT.getVectorNumElements() * 2); - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, - DAG.getUNDEF(SrcVT)); - } else { - assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && - "Unexpected source type in LowerBITCAST"); - Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); - } + assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); + if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) && + !(DstVT == MVT::x86mmx && SrcVT.isVector())) + // This conversion needs to be expanded. + return SDValue(); - MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64; - Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src); + SDLoc dl(Op); + if (SrcVT.isVector()) { + // Widen the vector in input in the case of MVT::v2i32. + // Example: from MVT::v2i32 to MVT::v4i32. + MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), + SrcVT.getVectorNumElements() * 2); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, + DAG.getUNDEF(SrcVT)); + } else { + assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && + "Unexpected source type in LowerBITCAST"); + Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); + } - if (DstVT == MVT::x86mmx) - return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src); + MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64; + Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src, - DAG.getIntPtrConstant(0, dl)); - } + if (DstVT == MVT::x86mmx) + return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src); - assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() && - Subtarget.hasMMX() && "Unexpected custom BITCAST"); - assert((DstVT == MVT::i64 || - (DstVT.isVector() && DstVT.getSizeInBits()==64)) && - "Unexpected custom BITCAST"); - // i64 <=> MMX conversions are Legal. - if (SrcVT==MVT::i64 && DstVT.isVector()) - return Op; - if (DstVT==MVT::i64 && SrcVT.isVector()) - return Op; - // MMX <=> MMX conversions are Legal. - if (SrcVT.isVector() && DstVT.isVector()) - return Op; - // All other conversions need to be expanded. - return SDValue(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src, + DAG.getIntPtrConstant(0, dl)); } /// Compute the horizontal sum of bytes in V for the elements of VT. @@ -26549,6 +27054,13 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, SDValue In = Op.getOperand(0); SDLoc DL(Op); + // Split v8i64/v16i32 without BWI so that we can still use the PSHUFB + // lowering. + if (VT == MVT::v8i64 || VT == MVT::v16i32) { + assert(!Subtarget.hasBWI() && "BWI should Expand BITREVERSE"); + return Lower512IntUnary(Op, DAG); + } + unsigned NumElts = VT.getVectorNumElements(); assert(VT.getScalarType() == MVT::i8 && "Only byte vector BITREVERSE supported"); @@ -26656,12 +27168,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, // seq_cst which isn't SingleThread, everything just needs to be preserved // during codegen and then dropped. Note that we expect (but don't assume), // that orderings other than seq_cst and acq_rel have been canonicalized to - // a store or load. + // a store or load. if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent && AN->getSyncScopeID() == SyncScope::System) { // Prefer a locked operation against a stack location to minimize cache // traffic. This assumes that stack locations are very likely to be - // accessed only by the owning thread. + // accessed only by the owning thread. SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL); assert(!N->hasAnyUseOfValue(0)); // NOTE: The getUNDEF is needed to give something for the unused result 0. @@ -26886,12 +27398,13 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, SDValue Chain = N->getChain(); SDValue BasePtr = N->getBasePtr(); - if (VT == MVT::v2f32) { + if (VT == MVT::v2f32 || VT == MVT::v2i32) { assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); // If the index is v2i64 and we have VLX we can use xmm for data and index. if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, - DAG.getUNDEF(MVT::v2f32)); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT)); SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; SDValue NewScatter = DAG.getTargetMemSDNode( @@ -26901,30 +27414,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, return SDValue(); } - if (VT == MVT::v2i32) { - assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, - DAG.getUNDEF(MVT::v2i32)); - // If the index is v2i64 and we have VLX we can use xmm for data and index. - if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { - SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); - SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; - SDValue NewScatter = DAG.getTargetMemSDNode( - VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); - return SDValue(NewScatter.getNode(), 1); - } - // Custom widen all the operands to avoid promotion. - EVT NewIndexVT = EVT::getVectorVT( - *DAG.getContext(), Index.getValueType().getVectorElementType(), 4); - Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, - DAG.getUNDEF(Index.getValueType())); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getConstant(0, dl, MVT::v2i1)); - SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(), dl, - Ops, N->getMemOperand()); - } - MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); @@ -27160,6 +27649,13 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op, return NOOP; } +SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const { + SmallVector Ops(Op->op_begin(), Op->op_end()); + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; +} + /// Provide custom lowering hooks for some operations. SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -27206,10 +27702,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG); case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: - case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget); + case ISD::FSUB: return lowerFaddFsub(Op, DAG); + case ISD::FMUL: return LowerF128Call(Op, DAG, RTLIB::MUL_F128); + case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128); case ISD::FABS: case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); @@ -27347,37 +27847,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::MUL: { EVT VT = N->getValueType(0); - assert(VT.isVector() && "Unexpected VT"); - if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger && - VT.getVectorNumElements() == 2) { - // Promote to a pattern that will be turned into PMULUDQ. - SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, - N->getOperand(0)); - SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, - N->getOperand(1)); - SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1); - Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul)); - } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && - VT.getVectorElementType() == MVT::i8) { - // Pre-promote these to vXi16 to avoid op legalization thinking all 16 - // elements are needed. - MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); - SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); - SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); - Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); - unsigned NumConcats = 16 / VT.getVectorNumElements(); - SmallVector ConcatOps(NumConcats, DAG.getUNDEF(VT)); - ConcatOps[0] = Res; - Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); - Results.push_back(Res); - } + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + VT.getVectorElementType() == MVT::i8 && "Unexpected VT!"); + // Pre-promote these to vXi16 to avoid op legalization thinking all 16 + // elements are needed. + MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); + SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); + Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + unsigned NumConcats = 16 / VT.getVectorNumElements(); + SmallVector ConcatOps(NumConcats, DAG.getUNDEF(VT)); + ConcatOps[0] = Res; + Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); + Results.push_back(Res); return; } - case ISD::UADDSAT: - case ISD::SADDSAT: - case ISD::USUBSAT: - case ISD::SSUBSAT: case X86ISD::VPMADDWD: case X86ISD::AVG: { // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and @@ -27388,6 +27873,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT InVT = N->getOperand(0).getValueType(); assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && "Expected a VT that divides into 128 bits."); + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); unsigned NumConcat = 128 / InVT.getSizeInBits(); EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), @@ -27404,9 +27891,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, - DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; } @@ -27435,26 +27919,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Hi); return; } - case ISD::SETCC: { - // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when - // setCC result type is v2i1 because type legalzation will end up with - // a v4i1 setcc plus an extend. - assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type"); - if (N->getOperand(0).getValueType() != MVT::v2f32 || - getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector) - return; - SDValue UNDEF = DAG.getUNDEF(MVT::v2f32); - SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, - N->getOperand(0), UNDEF); - SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, - N->getOperand(1), UNDEF); - SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS, - N->getOperand(2)); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - return; - } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: @@ -27475,7 +27939,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SREM: case ISD::UREM: { EVT VT = N->getValueType(0); - if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) { + if (VT.isVector()) { + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); // If this RHS is a constant splat vector we can widen this and let // division/remainder by constant optimize it. // TODO: Can we do something for non-splat? @@ -27493,17 +27959,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } - if (VT == MVT::v2i32) { - // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the - // v2i64 and unroll later. But then we create i64 scalar ops which - // might be slow in 64-bit mode or require a libcall in 32-bit mode. - Results.push_back(DAG.UnrollVectorOp(N)); - return; - } - - if (VT.isVector()) - return; - LLVM_FALLTHROUGH; } case ISD::SDIVREM: @@ -27561,58 +28016,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } } - return; - } - case ISD::SIGN_EXTEND_VECTOR_INREG: { - if (ExperimentalVectorWideningLegalization) - return; - - EVT VT = N->getValueType(0); - SDValue In = N->getOperand(0); - EVT InVT = In.getValueType(); - if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && - (InVT == MVT::v16i16 || InVT == MVT::v32i8)) { - // Custom split this so we can extend i8/i16->i32 invec. This is better - // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using - // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting - // we allow the sra from the extend to i32 to be shared by the split. - EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), - InVT.getVectorNumElements() / 2); - MVT ExtendVT = MVT::getVectorVT(MVT::i32, - VT.getVectorNumElements()); - In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT, - In, DAG.getIntPtrConstant(0, dl)); - In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In); - - // Fill a vector with sign bits for each element. - SDValue Zero = DAG.getConstant(0, dl, ExtendVT); - SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Create an unpackl and unpackh to interleave the sign bits then bitcast - // to vXi64. - SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits); - Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); - SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits); - Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 && + getTypeAction(*DAG.getContext(), InVT) == TypeSplitVector && + isTypeLegal(MVT::v4i64)) { + // Input needs to be split and output needs to widened. Let's use two + // VTRUNCs, and shuffle their results together into the wider type. + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(In, dl); - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); + Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo); + Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi); + SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi, + { 0, 1, 2, 3, 16, 17, 18, 19, + -1, -1, -1, -1, -1, -1, -1, -1 }); Results.push_back(Res); return; } + return; } + case ISD::ANY_EXTEND: + // Right now, only MVT::v8i8 has Custom action for an illegal type. + // It's intended to custom handle the input type. + assert(N->getValueType(0) == MVT::v8i8 && + "Do not know how to legalize this Node"); + return; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: { EVT VT = N->getValueType(0); SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && - (InVT == MVT::v4i16 || InVT == MVT::v4i8) && - getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector) { + (InVT == MVT::v4i16 || InVT == MVT::v4i8)){ + assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector && + "Unexpected type action!"); assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode"); // Custom split this so we can extend i8/i16->i32 invec. This is better // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using @@ -27683,27 +28120,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); - // Promote these manually to avoid over promotion to v2i64. Type - // legalization will revisit the v2i32 operation for more cleanup. - if ((VT == MVT::v2i8 || VT == MVT::v2i16) && - getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) { - // AVX512DQ provides instructions that produce a v2i64 result. - if (Subtarget.hasDQI()) - return; - - SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src); - Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext - : ISD::AssertSext, - dl, MVT::v2i32, Res, - DAG.getValueType(VT.getVectorElementType())); - Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); - Results.push_back(Res); - return; - } - if (VT.isVector() && VT.getScalarSizeInBits() < 32) { - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); // Try to create a 128 bit vector, but don't exceed a 32 bit element. unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); @@ -27738,35 +28157,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, assert((IsSigned || Subtarget.hasAVX512()) && "Can only handle signed conversion without AVX512"); assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); - bool Widenv2i32 = - getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); if (Src.getValueType() == MVT::v2f64) { - unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (!IsSigned && !Subtarget.hasVLX()) { - // If v2i32 is widened, we can defer to the generic legalizer. - if (Widenv2i32) - return; - // Custom widen by doubling to a legal vector with. Isel will - // further widen to v8f64. - Opc = ISD::FP_TO_UINT; - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, - Src, DAG.getUNDEF(MVT::v2f64)); + // If we have VLX we can emit a target specific FP_TO_UINT node, + // otherwise we can defer to the generic legalizer which will widen + // the input as well. This will be further widened during op + // legalization to v8i32<-v8f64. + return; } + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); - if (!Widenv2i32) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - return; - } - if (SrcVT == MVT::v2f32 && - getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { - SDValue Idx = DAG.getIntPtrConstant(0, dl); - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, - DAG.getUNDEF(MVT::v2f32)); - Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT - : ISD::FP_TO_UINT, dl, MVT::v4i32, Res); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx); Results.push_back(Res); return; } @@ -27776,6 +28178,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } + assert(!VT.isVector() && "Vectors should have been handled above!"); + if (Subtarget.hasDQI() && VT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { assert(!Subtarget.is64Bit() && "i64 should be legal"); @@ -27847,7 +28251,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default : llvm_unreachable("Do not know how to custom type " "legalize this intrinsic operation!"); @@ -27905,7 +28309,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); SDValue Result; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - unsigned BasePtr = TRI->getBaseRegister(); + Register BasePtr = TRI->getBaseRegister(); MachineMemOperand *MMO = cast(N)->getMemOperand(); if (TRI->hasBasePointer(DAG.getMachineFunction()) && (BasePtr == X86::RBX || BasePtr == X86::EBX)) { @@ -28060,34 +28464,33 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } - if (SrcVT != MVT::f64 || - (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) || - getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) + if (DstVT.isVector() && SrcVT == MVT::x86mmx) { + assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector && + "Unexpected type action!"); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT); + SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, WideVT, N->getOperand(0)); + Results.push_back(Res); return; + } - unsigned NumElts = DstVT.getVectorNumElements(); - EVT SVT = DstVT.getVectorElementType(); - EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); - SDValue Res; - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0)); - Res = DAG.getBitcast(WiderVT, Res); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); return; } case ISD::MGATHER: { EVT VT = N->getValueType(0); - if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { + if ((VT == MVT::v2f32 || VT == MVT::v2i32) && + (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { auto *Gather = cast(N); SDValue Index = Gather->getIndex(); if (Index.getValueType() != MVT::v2i64) return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); SDValue Mask = Gather->getMask(); assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, + SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Gather->getPassThru(), - DAG.getUNDEF(MVT::v2f32)); + DAG.getUNDEF(VT)); if (!Subtarget.hasVLX()) { // We need to widen the mask, but the instruction will only use 2 // of its elements. So we can use undef. @@ -28098,66 +28501,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Ops[] = { Gather->getChain(), PassThru, Mask, Gather->getBasePtr(), Index, Gather->getScale() }; SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl, + DAG.getVTList(WideVT, Mask.getValueType(), MVT::Other), Ops, dl, Gather->getMemoryVT(), Gather->getMemOperand()); Results.push_back(Res); Results.push_back(Res.getValue(2)); return; } - if (VT == MVT::v2i32) { - auto *Gather = cast(N); - SDValue Index = Gather->getIndex(); - SDValue Mask = Gather->getMask(); - assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, - Gather->getPassThru(), - DAG.getUNDEF(MVT::v2i32)); - // If the index is v2i64 we can use it directly. - if (Index.getValueType() == MVT::v2i64 && - (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { - if (!Subtarget.hasVLX()) { - // We need to widen the mask, but the instruction will only use 2 - // of its elements. So we can use undef. - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getUNDEF(MVT::v2i1)); - Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); - } - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; - SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl, - Gather->getMemoryVT(), Gather->getMemOperand()); - SDValue Chain = Res.getValue(2); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Chain); - return; - } - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { - EVT IndexVT = Index.getValueType(); - EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(), - IndexVT.getScalarType(), 4); - // Otherwise we need to custom widen everything to avoid promotion. - Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, - DAG.getUNDEF(IndexVT)); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getConstant(0, dl, MVT::v2i1)); - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; - SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other), - Gather->getMemoryVT(), dl, Ops, - Gather->getMemOperand()); - SDValue Chain = Res.getValue(1); - if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Chain); - return; - } - } return; } case ISD::LOAD: { @@ -28166,8 +28515,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, // cast since type legalization will try to use an i64 load. MVT VT = N->getSimpleValueType(0); assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT"); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); if (!ISD::isNON_EXTLoad(N)) return; auto *Ld = cast(N); @@ -28177,11 +28526,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); SDValue Chain = Res.getValue(1); - MVT WideVT = MVT::getVectorVT(LdVT, 2); - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res); - MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() * 2); - Res = DAG.getBitcast(CastVT, Res); + MVT VecVT = MVT::getVectorVT(LdVT, 2); + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); + Res = DAG.getBitcast(WideVT, Res); Results.push_back(Res); Results.push_back(Chain); return; @@ -28236,6 +28584,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; + case X86ISD::MOVQ2DQ: return "X86ISD::MOVQ2DQ"; case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q"; case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W"; case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D"; @@ -28373,6 +28722,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; + case X86ISD::VBROADCAST_LOAD: return "X86ISD::VBROADCAST_LOAD"; case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; @@ -28737,6 +29087,9 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { } bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { + if (isa(ExtVal.getOperand(0))) + return false; + EVT SrcVT = ExtVal.getOperand(0).getValueType(); // There is no extending load for vXi1. @@ -28856,10 +29209,10 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - unsigned mainDstReg = MRI.createVirtualRegister(RC); - unsigned fallDstReg = MRI.createVirtualRegister(RC); + Register mainDstReg = MRI.createVirtualRegister(RC); + Register fallDstReg = MRI.createVirtualRegister(RC); // thisMBB: // xbegin fallMBB @@ -28913,7 +29266,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, static_assert(X86::AddrNumOperands == 5, "VAARG_64 assumes 5 address operands"); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); MachineOperand &Base = MI.getOperand(1); MachineOperand &Scale = MI.getOperand(2); MachineOperand &Index = MI.getOperand(3); @@ -29049,7 +29402,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, assert(OffsetReg != 0); // Read the reg_save_area address. - unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); + Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) .add(Base) .add(Scale) @@ -29059,8 +29412,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, .setMemRefs(LoadOnlyMMO); // Zero-extend the offset - unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); - BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) + Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) .addImm(0) .addReg(OffsetReg) .addImm(X86::sub_32bit); @@ -29071,7 +29424,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, .addReg(RegSaveReg); // Compute the offset for the next argument - unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); + Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) .addReg(OffsetReg) .addImm(UseFPOffset ? 16 : 8); @@ -29096,7 +29449,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // // Load the overflow_area address into a register. - unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); + Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) .add(Base) .add(Scale) @@ -29110,7 +29463,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, if (NeedsAlign) { // Align the overflow address assert(isPowerOf2_32(Align) && "Alignment must be a power of 2"); - unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); + Register TmpReg = MRI.createVirtualRegister(AddrRegClass); // aligned_addr = (addr + (align-1)) & ~(align-1) BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) @@ -29127,7 +29480,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // Compute the next overflow address after this argument. // (the overflow address should be kept 8-byte aligned) - unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); + Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) .addReg(OverflowDestReg) .addImm(ArgSizeA8); @@ -29191,7 +29544,7 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned CountReg = MI.getOperand(0).getReg(); + Register CountReg = MI.getOperand(0).getReg(); int64_t RegSaveFrameIndex = MI.getOperand(1).getImm(); int64_t VarArgsFPOffset = MI.getOperand(2).getImm(); @@ -29273,7 +29626,9 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, static bool isCMOVPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { case X86::CMOV_FR32: + case X86::CMOV_FR32X: case X86::CMOV_FR64: + case X86::CMOV_FR64X: case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: @@ -29326,9 +29681,9 @@ static MachineInstrBuilder createPHIsForCMOVsInSinkBB( MachineInstrBuilder MIB; for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { - unsigned DestReg = MIIt->getOperand(0).getReg(); - unsigned Op1Reg = MIIt->getOperand(1).getReg(); - unsigned Op2Reg = MIIt->getOperand(2).getReg(); + Register DestReg = MIIt->getOperand(0).getReg(); + Register Op1Reg = MIIt->getOperand(1).getReg(); + Register Op2Reg = MIIt->getOperand(2).getReg(); // If this CMOV we are generating is the opposite condition from // the jump we generated, then we have to swap the operands for the @@ -29486,9 +29841,9 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, // SinkMBB: // %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ] - unsigned DestReg = FirstCMOV.getOperand(0).getReg(); - unsigned Op1Reg = FirstCMOV.getOperand(1).getReg(); - unsigned Op2Reg = FirstCMOV.getOperand(2).getReg(); + Register DestReg = FirstCMOV.getOperand(0).getReg(); + Register Op1Reg = FirstCMOV.getOperand(1).getReg(); + Register Op2Reg = FirstCMOV.getOperand(2).getReg(); MachineInstrBuilder MIB = BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg) .addReg(Op1Reg) @@ -30006,7 +30361,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, // call the retpoline thunk. DebugLoc DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); - unsigned CalleeVReg = MI.getOperand(0).getReg(); + Register CalleeVReg = MI.getOperand(0).getReg(); unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); // Find an available scratch register to hold the callee. On 64-bit, we can @@ -30079,7 +30434,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, // Initialize a register with zero. MVT PVT = getPointerTy(MF->getDataLayout()); const TargetRegisterClass *PtrRC = getRegClassFor(PVT); - unsigned ZReg = MRI.createVirtualRegister(PtrRC); + Register ZReg = MRI.createVirtualRegister(PtrRC); unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; BuildMI(*MBB, MI, DL, TII->get(XorRROpc)) .addDef(ZReg) @@ -30087,7 +30442,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, .addReg(ZReg, RegState::Undef); // Read the current SSP Register value to the zeroed register. - unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC); + Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); @@ -30131,8 +30486,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); (void)TRI; - unsigned mainDstReg = MRI.createVirtualRegister(RC); - unsigned restoreDstReg = MRI.createVirtualRegister(RC); + Register mainDstReg = MRI.createVirtualRegister(RC); + Register restoreDstReg = MRI.createVirtualRegister(RC); MemOpndSlot = CurOp; @@ -30246,8 +30601,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); X86MachineFunctionInfo *X86FI = MF->getInfo(); X86FI->setRestoreBasePointer(MF); - unsigned FramePtr = RegInfo->getFrameRegister(*MF); - unsigned BasePtr = RegInfo->getBaseRegister(); + Register FramePtr = RegInfo->getFrameRegister(*MF); + Register BasePtr = RegInfo->getBaseRegister(); unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), FramePtr, true, X86FI->getRestoreBasePointerOffset()) @@ -30329,7 +30684,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, MBB->addSuccessor(checkSspMBB); // Initialize a register with zero. - unsigned ZReg = MRI.createVirtualRegister(PtrRC); + Register ZReg = MRI.createVirtualRegister(PtrRC); unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; BuildMI(checkSspMBB, DL, TII->get(XorRROpc)) .addDef(ZReg) @@ -30337,7 +30692,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, .addReg(ZReg, RegState::Undef); // Read the current SSP Register value to the zeroed register. - unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC); + Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); @@ -30352,7 +30707,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, checkSspMBB->addSuccessor(fallMBB); // Reload the previously saved SSP register value. - unsigned PrevSSPReg = MRI.createVirtualRegister(PtrRC); + Register PrevSSPReg = MRI.createVirtualRegister(PtrRC); unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; const int64_t SPPOffset = 3 * PVT.getStoreSize(); MachineInstrBuilder MIB = @@ -30370,7 +30725,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, MIB.setMemRefs(MMOs); // Subtract the current SSP from the previous SSP. - unsigned SspSubReg = MRI.createVirtualRegister(PtrRC); + Register SspSubReg = MRI.createVirtualRegister(PtrRC); unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr; BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg) .addReg(PrevSSPReg) @@ -30384,7 +30739,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, // Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8. unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri; unsigned Offset = (PVT == MVT::i64) ? 3 : 2; - unsigned SspFirstShrReg = MRI.createVirtualRegister(PtrRC); + Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg) .addReg(SspSubReg) .addImm(Offset); @@ -30394,7 +30749,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg); // Reset the lower 8 bits. - unsigned SspSecondShrReg = MRI.createVirtualRegister(PtrRC); + Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg) .addReg(SspFirstShrReg) .addImm(8); @@ -30406,12 +30761,12 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, // Do a single shift left. unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1; - unsigned SspAfterShlReg = MRI.createVirtualRegister(PtrRC); + Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg) .addReg(SspSecondShrReg); // Save the value 128 to a register (will be used next with incssp). - unsigned Value128InReg = MRI.createVirtualRegister(PtrRC); + Register Value128InReg = MRI.createVirtualRegister(PtrRC); unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri; BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg) .addImm(128); @@ -30419,8 +30774,8 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, // Since incssp only looks at the lower 8 bits, we might need to do several // iterations of incssp until we finish fixing the shadow stack. - unsigned DecReg = MRI.createVirtualRegister(PtrRC); - unsigned CounterReg = MRI.createVirtualRegister(PtrRC); + Register DecReg = MRI.createVirtualRegister(PtrRC); + Register CounterReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg) .addReg(SspAfterShlReg) .addMBB(fixShadowLoopPrepareMBB) @@ -30460,11 +30815,11 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, const TargetRegisterClass *RC = (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; - unsigned Tmp = MRI.createVirtualRegister(RC); + Register Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; - unsigned SP = RegInfo->getStackRegister(); + Register SP = RegInfo->getStackRegister(); MachineInstrBuilder MIB; @@ -30662,8 +31017,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, X86MachineFunctionInfo *MFI = MF->getInfo(); MFI->setRestoreBasePointer(MF); - unsigned FP = RI.getFrameRegister(*MF); - unsigned BP = RI.getBaseRegister(); + Register FP = RI.getFrameRegister(*MF); + Register BP = RI.getBaseRegister(); unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm; addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true, MFI->getRestoreBasePointerOffset()) @@ -30674,7 +31029,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, } // IReg is used as an index in a memory operand and therefore can't be SP - unsigned IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); + Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI, Subtarget.is64Bit() ? 8 : 4); BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri)) @@ -30683,8 +31038,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE); if (Subtarget.is64Bit()) { - unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass); - unsigned IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); // leaq .LJTI0_0(%rip), BReg BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg) @@ -30710,9 +31065,9 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(0); break; case MachineJumpTableInfo::EK_LabelDifference32: { - unsigned OReg = MRI->createVirtualRegister(&X86::GR32RegClass); - unsigned OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); - unsigned TReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass); + Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); + Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass); // movl (BReg,IReg64,4), OReg BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg) @@ -30783,8 +31138,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, DefRegs[MOp.getReg()] = true; MachineInstrBuilder MIB(*MF, &II); - for (unsigned RI = 0; SavedRegs[RI]; ++RI) { - unsigned Reg = SavedRegs[RI]; + for (unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) { + unsigned Reg = SavedRegs[RegIdx]; if (!DefRegs[Reg]) MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } @@ -30906,20 +31261,18 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, TII->get(X86::FNSTCW16m)), OrigCWFrameIdx); // Load the old value of the control word... - unsigned OldCW = - MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); + Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW), OrigCWFrameIdx); // OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero. - unsigned NewCW = - MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); + Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW) .addReg(OldCW, RegState::Kill).addImm(0xC00); // Extract to 16 bits. - unsigned NewCW16 = - MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); + Register NewCW16 = + MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16) .addReg(NewCW, RegState::Kill, X86::sub_16bit); @@ -31023,7 +31376,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineRegisterInfo &MRI = MF->getRegInfo(); MVT SPTy = getPointerTy(MF->getDataLayout()); const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); - unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); + Register computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); X86AddressMode AM = getAddressFromInstr(&MI, 0); // Regalloc does not need any help when the memory operand of CMPXCHG8B @@ -31034,10 +31387,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its // four operand definitions that are E[ABCD] registers. We skip them and // then insert the LEA. - MachineBasicBlock::iterator MBBI(MI); - while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) || - MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX)) - --MBBI; + MachineBasicBlock::reverse_iterator RMBBI(MI.getReverseIterator()); + while (RMBBI != BB->rend() && (RMBBI->definesRegister(X86::EAX) || + RMBBI->definesRegister(X86::EBX) || + RMBBI->definesRegister(X86::ECX) || + RMBBI->definesRegister(X86::EDX))) { + ++RMBBI; + } + MachineBasicBlock::iterator MBBI(RMBBI); addFullAddress( BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); @@ -31232,12 +31589,21 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.One |= Known2.One; break; } + case X86ISD::PSADBW: { + assert(VT.getScalarType() == MVT::i64 && + Op.getOperand(0).getValueType().getScalarType() == MVT::i8 && + "Unexpected PSADBW types"); + + // PSADBW - fills low 16 bits and zeros upper 48 bits of each i64 result. + Known.Zero.setBitsFrom(16); + break; + } case X86ISD::CMOV: { - Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1); + Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); // If we don't know any bits, early out. if (Known.isUnknown()) break; - KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1); + KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; @@ -31650,8 +32016,8 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef Mask, if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { - ArrayRef LoMask(Mask.data() + 0, 4); - ArrayRef HiMask(Mask.data() + 4, 4); + ArrayRef LoMask(RepeatedMask.data() + 0, 4); + ArrayRef HiMask(RepeatedMask.data() + 4, 4); // PSHUFLW: permute lower 4 elements only. if (isUndefOrInRange(LoMask, 0, 4) && @@ -31789,8 +32155,8 @@ static bool matchBinaryPermuteShuffle( uint64_t BlendMask = 0; bool ForceV1Zero = false, ForceV2Zero = false; SmallVector TargetMask(Mask.begin(), Mask.end()); - if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero, - BlendMask)) { + if (matchVectorShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero, + ForceV2Zero, BlendMask)) { if (MaskVT == MVT::v16i16) { // We can only use v16i16 PBLENDW if the lanes are repeated. SmallVector RepeatedMask; @@ -31819,15 +32185,15 @@ static bool matchBinaryPermuteShuffle( } } - // Attempt to combine to INSERTPS. + // Attempt to combine to INSERTPS, but only if it has elements that need to + // be set to zero. if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && - MaskVT.is128BitVector()) { - if (Zeroable.getBoolValue() && - matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { - Shuffle = X86ISD::INSERTPS; - ShuffleVT = MVT::v4f32; - return true; - } + MaskVT.is128BitVector() && + llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }) && + matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { + Shuffle = X86ISD::INSERTPS; + ShuffleVT = MVT::v4f32; + return true; } // Attempt to combine to SHUFPD. @@ -31835,7 +32201,11 @@ static bool matchBinaryPermuteShuffle( ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && Subtarget.hasAVX()) || (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { - if (matchShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) { + bool ForceV1Zero = false, ForceV2Zero = false; + if (matchShuffleWithSHUFPD(MaskVT, V1, V2, ForceV1Zero, ForceV2Zero, + PermuteImm, Mask, Zeroable)) { + V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; + V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; Shuffle = X86ISD::SHUFP; ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64); return true; @@ -31889,6 +32259,15 @@ static bool matchBinaryPermuteShuffle( } } + // Attempt to combine to INSERTPS more generally if X86ISD::SHUFP failed. + if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && + MaskVT.is128BitVector() && + matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { + Shuffle = X86ISD::INSERTPS; + ShuffleVT = MVT::v4f32; + return true; + } + return false; } @@ -31942,7 +32321,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned NumRootElts = RootVT.getVectorNumElements(); unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || - (RootVT.isFloatingPoint() && Depth >= 2) || + (RootVT.isFloatingPoint() && Depth >= 1) || (RootVT.is256BitVector() && !Subtarget.hasAVX2()); // Don't combine if we are a AVX512/EVEX target and the mask element size @@ -31981,7 +32360,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 && !(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) && !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) + if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128) return SDValue(); // Nothing to do! MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); unsigned PermMask = 0; @@ -31991,7 +32370,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res, DAG.getUNDEF(ShuffleVT), - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32026,8 +32405,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. // TODO: Should we indicate which domain is preferred if both are allowed? - bool AllowFloatDomain = FloatDomain || (Depth > 3); - bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() && + bool AllowFloatDomain = FloatDomain || (Depth >= 3); + bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() && (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); // Determine zeroable mask elements. @@ -32062,14 +32441,14 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (V1.getValueType() == MaskVT && V1.getOpcode() == ISD::SCALAR_TO_VECTOR && MayFoldLoad(V1.getOperand(0))) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) + if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! Res = V1.getOperand(0); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); return DAG.getBitcast(RootVT, Res); } if (Subtarget.hasAVX2()) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) + if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! Res = DAG.getBitcast(MaskVT, V1); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); @@ -32083,7 +32462,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleSrcVT, NewV1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); @@ -32094,11 +32473,11 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, - DAG.getConstant(PermuteImm, DL, MVT::i8)); + DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } @@ -32109,7 +32488,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); @@ -32123,12 +32502,12 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleVT, NewV1); NewV2 = DAG.getBitcast(ShuffleVT, NewV2); Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, - DAG.getConstant(PermuteImm, DL, MVT::i8)); + DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32141,34 +32520,34 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, uint64_t BitLen, BitIdx; if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) + if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) + if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); V2 = DAG.getBitcast(IntMaskVT, V2); Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. - if (Depth < 2) + if (Depth < 1) return SDValue(); // Depth threshold above which we can efficiently use variable mask shuffles. - int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; + int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2; AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; bool MaskContainsZeros = @@ -32321,7 +32700,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, V2 = DAG.getBitcast(MaskVT, V2); SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, - DAG.getConstant(M2ZImm, DL, MVT::i8)); + DAG.getTargetConstant(M2ZImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32650,7 +33029,7 @@ static SDValue combineX86ShufflesRecursively( // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. const unsigned MaxRecursionDepth = 8; - if (Depth > MaxRecursionDepth) + if (Depth >= MaxRecursionDepth) return SDValue(); // Directly rip through bitcasts to find the underlying operand. @@ -32667,11 +33046,18 @@ static SDValue combineX86ShufflesRecursively( "Can only combine shuffles of the same vector register size."); // Extract target shuffle mask and resolve sentinels and inputs. + // TODO - determine Op's demanded elts from RootMask. SmallVector OpMask; SmallVector OpInputs; - if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG)) + APInt OpUndef, OpZero; + APInt OpDemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode()); + if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef, + OpZero, DAG, Depth, false)) return SDValue(); + resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero); + // Add the inputs to the Ops list, avoiding duplicates. SmallVector Ops(SrcOps.begin(), SrcOps.end()); @@ -32772,6 +33158,9 @@ static SDValue combineX86ShufflesRecursively( Mask[i] = OpMaskedIdx; } + // Remove unused/repeated shuffle source ops. + resolveTargetShuffleInputsAndMask(Ops, Mask); + // Handle the all undef/zero cases early. if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) return DAG.getUNDEF(Root.getValueType()); @@ -32783,11 +33172,8 @@ static SDValue combineX86ShufflesRecursively( return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, SDLoc(Root)); - // Remove unused/repeated shuffle source ops. - resolveTargetShuffleInputsAndMask(Ops, Mask); assert(!Ops.empty() && "Shuffle with no inputs detected"); - - HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode()); + HasVariableMask |= IsOpVariableMask; // Update the list of shuffle nodes that have been combined so far. SmallVector CombinedNodes(SrcNodes.begin(), @@ -32853,7 +33239,7 @@ static SDValue combineX86ShufflesRecursively( /// Helper entry wrapper to combineX86ShufflesRecursively. static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, + return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget); } @@ -33088,7 +33474,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, for (unsigned i = 0; i != Scale; ++i) DemandedMask[i] = i; if (SDValue Res = combineX86ShufflesRecursively( - {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, + {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); @@ -33120,6 +33506,30 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, VT.getSizeInBits()); } + // vbroadcast(scalarload X) -> vbroadcast_load X + // For float loads, extract other uses of the scalar from the broadcast. + if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) && + ISD::isNormalLoad(Src.getNode())) { + LoadSDNode *LN = cast(Src); + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, + LN->getMemoryVT(), LN->getMemOperand()); + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceExtract = Src.hasOneUse(); + DCI.CombineTo(N.getNode(), BcastLd); + if (NoReplaceExtract) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(LN); + } else { + SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd, + DAG.getIntPtrConstant(0, DL)); + DCI.CombineTo(LN, Scl, BcastLd.getValue(1)); + } + return N; // Return N so it doesn't get rechecked! + } + return SDValue(); } case X86ISD::BLENDI: { @@ -33133,14 +33543,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, MVT SrcVT = N0.getOperand(0).getSimpleValueType(); if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && SrcVT.getScalarSizeInBits() >= 32) { - unsigned Mask = N.getConstantOperandVal(2); + unsigned BlendMask = N.getConstantOperandVal(2); unsigned Size = VT.getVectorNumElements(); unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits(); - unsigned ScaleMask = scaleVectorShuffleBlendMask(Mask, Size, Scale); + BlendMask = scaleVectorShuffleBlendMask(BlendMask, Size, Scale); return DAG.getBitcast( VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), N1.getOperand(0), - DAG.getConstant(ScaleMask, DL, MVT::i8))); + DAG.getTargetConstant(BlendMask, DL, MVT::i8))); } } return SDValue(); @@ -33208,76 +33618,97 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, // If we zero out all elements from Op0 then we don't need to reference it. if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); // If we zero out the element from Op1 then we don't need to reference it. if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); // Attempt to merge insertps Op1 with an inner target shuffle node. SmallVector TargetMask1; SmallVector Ops1; - if (setTargetShuffleZeroElements(Op1, TargetMask1, Ops1)) { - int M = TargetMask1[SrcIdx]; - if (isUndefOrZero(M)) { + APInt KnownUndef1, KnownZero1; + if (getTargetShuffleAndZeroables(Op1, TargetMask1, Ops1, KnownUndef1, + KnownZero1)) { + if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) { // Zero/UNDEF insertion - zero out element and remove dependency. InsertPSMask |= (1u << DstIdx); return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } // Update insertps mask srcidx and reference the source input directly. + int M = TargetMask1[SrcIdx]; assert(0 <= M && M < 8 && "Shuffle index out of range"); InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); Op1 = Ops1[M < 4 ? 0 : 1]; return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } // Attempt to merge insertps Op0 with an inner target shuffle node. SmallVector TargetMask0; SmallVector Ops0; - if (!setTargetShuffleZeroElements(Op0, TargetMask0, Ops0)) - return SDValue(); + APInt KnownUndef0, KnownZero0; + if (getTargetShuffleAndZeroables(Op0, TargetMask0, Ops0, KnownUndef0, + KnownZero0)) { + bool Updated = false; + bool UseInput00 = false; + bool UseInput01 = false; + for (int i = 0; i != 4; ++i) { + if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { + // No change if element is already zero or the inserted element. + continue; + } else if (KnownUndef0[i] || KnownZero0[i]) { + // If the target mask is undef/zero then we must zero the element. + InsertPSMask |= (1u << i); + Updated = true; + continue; + } - bool Updated = false; - bool UseInput00 = false; - bool UseInput01 = false; - for (int i = 0; i != 4; ++i) { - int M = TargetMask0[i]; - if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { - // No change if element is already zero or the inserted element. - continue; - } else if (isUndefOrZero(M)) { - // If the target mask is undef/zero then we must zero the element. - InsertPSMask |= (1u << i); - Updated = true; - continue; + // The input vector element must be inline. + int M = TargetMask0[i]; + if (M != i && M != (i + 4)) + return SDValue(); + + // Determine which inputs of the target shuffle we're using. + UseInput00 |= (0 <= M && M < 4); + UseInput01 |= (4 <= M); } - // The input vector element must be inline. - if (M != i && M != (i + 4)) - return SDValue(); + // If we're not using both inputs of the target shuffle then use the + // referenced input directly. + if (UseInput00 && !UseInput01) { + Updated = true; + Op0 = Ops0[0]; + } else if (!UseInput00 && UseInput01) { + Updated = true; + Op0 = Ops0[1]; + } - // Determine which inputs of the target shuffle we're using. - UseInput00 |= (0 <= M && M < 4); - UseInput01 |= (4 <= M); + if (Updated) + return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } - // If we're not using both inputs of the target shuffle then use the - // referenced input directly. - if (UseInput00 && !UseInput01) { - Updated = true; - Op0 = Ops0[0]; - } else if (!UseInput00 && UseInput01) { - Updated = true; - Op0 = Ops0[1]; + // If we're inserting an element from a vbroadcast load, fold the + // load into the X86insertps instruction. We need to convert the scalar + // load to a vector and clear the source lane of the INSERTPS control. + if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) { + auto *MemIntr = cast(Op1); + if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) { + SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(), + MemIntr->getBasePtr(), + MemIntr->getMemOperand()); + SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, + Load), + DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); + return Insert; + } } - if (Updated) - return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); - return SDValue(); } default: @@ -33580,7 +34011,7 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, } /// Eliminate a redundant shuffle of a horizontal math op. -static SDValue foldShuffleOfHorizOp(SDNode *N) { +static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST) if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef()) @@ -33611,17 +34042,36 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { HOp.getOperand(0) != HOp.getOperand(1)) return SDValue(); + // The shuffle that we are eliminating may have allowed the horizontal op to + // have an undemanded (undefined) operand. Duplicate the other (defined) + // operand to ensure that the results are defined across all lanes without the + // shuffle. + auto updateHOp = [](SDValue HorizOp, SelectionDAG &DAG) { + SDValue X; + if (HorizOp.getOperand(0).isUndef()) { + assert(!HorizOp.getOperand(1).isUndef() && "Not expecting foldable h-op"); + X = HorizOp.getOperand(1); + } else if (HorizOp.getOperand(1).isUndef()) { + assert(!HorizOp.getOperand(0).isUndef() && "Not expecting foldable h-op"); + X = HorizOp.getOperand(0); + } else { + return HorizOp; + } + return DAG.getNode(HorizOp.getOpcode(), SDLoc(HorizOp), + HorizOp.getValueType(), X, X); + }; + // When the operands of a horizontal math op are identical, the low half of // the result is the same as the high half. If a target shuffle is also - // replicating low and high halves, we don't need the shuffle. + // replicating low and high halves (and without changing the type/length of + // the vector), we don't need the shuffle. if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) { - if (HOp.getScalarValueSizeInBits() == 64) { + if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) { // movddup (hadd X, X) --> hadd X, X // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X assert((HOp.getValueType() == MVT::v2f64 || - HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT && - "Unexpected type for h-op"); - return HOp; + HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op"); + return updateHOp(HOp, DAG); } return SDValue(); } @@ -33635,14 +34085,14 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { (isTargetShuffleEquivalent(Mask, {0, 0}) || isTargetShuffleEquivalent(Mask, {0, 1, 0, 1}) || isTargetShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}))) - return HOp; + return updateHOp(HOp, DAG); if (HOp.getValueSizeInBits() == 256 && (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}) || isTargetShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) || isTargetShuffleEquivalent( Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11}))) - return HOp; + return updateHOp(HOp, DAG); return SDValue(); } @@ -33677,7 +34127,7 @@ static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { // the wide shuffle that we started with. return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0), Shuf->getOperand(1), HalfMask, HalfIdx1, - HalfIdx2, false, DAG); + HalfIdx2, false, DAG, /*UseConcat*/true); } static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, @@ -33696,70 +34146,10 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) return AddSub; - if (SDValue HAddSub = foldShuffleOfHorizOp(N)) + if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG)) return HAddSub; } - // During Type Legalization, when promoting illegal vector types, - // the backend might introduce new shuffle dag nodes and bitcasts. - // - // This code performs the following transformation: - // fold: (shuffle (bitcast (BINOP A, B)), Undef, ) -> - // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, ) - // - // We do this only if both the bitcast and the BINOP dag nodes have - // one use. Also, perform this transformation only if the new binary - // operation is legal. This is to avoid introducing dag nodes that - // potentially need to be further expanded (or custom lowered) into a - // less optimal sequence of dag nodes. - if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() && - N->getOpcode() == ISD::VECTOR_SHUFFLE && - N->getOperand(0).getOpcode() == ISD::BITCAST && - N->getOperand(1).isUndef() && N->getOperand(0).hasOneUse()) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - SDValue BC0 = N0.getOperand(0); - EVT SVT = BC0.getValueType(); - unsigned Opcode = BC0.getOpcode(); - unsigned NumElts = VT.getVectorNumElements(); - - if (BC0.hasOneUse() && SVT.isVector() && - SVT.getVectorNumElements() * 2 == NumElts && - TLI.isOperationLegal(Opcode, VT)) { - bool CanFold = false; - switch (Opcode) { - default : break; - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: - // isOperationLegal lies for integer ops on floating point types. - CanFold = VT.isInteger(); - break; - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - // isOperationLegal lies for floating point ops on integer types. - CanFold = VT.isFloatingPoint(); - break; - } - - unsigned SVTNumElts = SVT.getVectorNumElements(); - ShuffleVectorSDNode *SVOp = cast(N); - for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i) - CanFold = SVOp->getMaskElt(i) == (int)(i * 2); - for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i) - CanFold = SVOp->getMaskElt(i) < 0; - - if (CanFold) { - SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0)); - SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1)); - SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01); - return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, SVOp->getMask()); - } - } - } - // Attempt to combine into a vector load/broadcast. if (SDValue LD = combineToConsecutiveLoads(VT, N, dl, DAG, Subtarget, true)) return LD; @@ -33841,7 +34231,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() && ISD::isNormalLoad(N->getOperand(0).getNode())) { LoadSDNode *LN = cast(N->getOperand(0)); - if (!LN->isVolatile()) { + if (LN->isSimple()) { SDVTList Tys = DAG.getVTList(VT, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; SDValue VZLoad = @@ -33855,53 +34245,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, } } - - // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the - // operands is an extend from v2i32 to v2i64. Turn it into a pmulld. - // FIXME: This can probably go away once we default to widening legalization. - if (Subtarget.hasSSE41() && VT == MVT::v4i32 && - N->getOpcode() == ISD::VECTOR_SHUFFLE && - N->getOperand(0).getOpcode() == ISD::BITCAST && - N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) { - SDValue BC = N->getOperand(0); - SDValue MULUDQ = BC.getOperand(0); - ShuffleVectorSDNode *SVOp = cast(N); - ArrayRef Mask = SVOp->getMask(); - if (BC.hasOneUse() && MULUDQ.hasOneUse() && - Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) { - SDValue Op0 = MULUDQ.getOperand(0); - SDValue Op1 = MULUDQ.getOperand(1); - if (Op0.getOpcode() == ISD::BITCAST && - Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && - Op0.getOperand(0).getValueType() == MVT::v4i32) { - ShuffleVectorSDNode *SVOp0 = - cast(Op0.getOperand(0)); - ArrayRef Mask2 = SVOp0->getMask(); - if (Mask2[0] == 0 && Mask2[1] == -1 && - Mask2[2] == 1 && Mask2[3] == -1) { - Op0 = SVOp0->getOperand(0); - Op1 = DAG.getBitcast(MVT::v4i32, Op1); - Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask); - return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1); - } - } - if (Op1.getOpcode() == ISD::BITCAST && - Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && - Op1.getOperand(0).getValueType() == MVT::v4i32) { - ShuffleVectorSDNode *SVOp1 = - cast(Op1.getOperand(0)); - ArrayRef Mask2 = SVOp1->getMask(); - if (Mask2[0] == 0 && Mask2[1] == -1 && - Mask2[2] == 1 && Mask2[3] == -1) { - Op0 = DAG.getBitcast(MVT::v4i32, Op0); - Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask); - Op1 = SVOp1->getOperand(0); - return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1); - } - } - } - } - return SDValue(); } @@ -33966,6 +34309,84 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( // TODO convert SrcUndef to KnownUndef. break; } + case X86ISD::KSHIFTL: { + SDValue Src = Op.getOperand(0); + auto *Amt = cast(Op.getOperand(1)); + assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); + unsigned ShiftAmt = Amt->getZExtValue(); + + if (ShiftAmt == 0) + return TLO.CombineTo(Op, Src); + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (Src.getOpcode() == X86ISD::KSHIFTR) { + if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) { + unsigned C1 = Src.getConstantOperandVal(1); + unsigned NewOpc = X86ISD::KSHIFTL; + int Diff = ShiftAmt - C1; + if (Diff < 0) { + Diff = -Diff; + NewOpc = X86ISD::KSHIFTR; + } + + SDLoc dl(Op); + SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); + return TLO.CombineTo( + Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); + } + } + + APInt DemandedSrc = DemandedElts.lshr(ShiftAmt); + if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + + KnownUndef <<= ShiftAmt; + KnownZero <<= ShiftAmt; + KnownZero.setLowBits(ShiftAmt); + break; + } + case X86ISD::KSHIFTR: { + SDValue Src = Op.getOperand(0); + auto *Amt = cast(Op.getOperand(1)); + assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); + unsigned ShiftAmt = Amt->getZExtValue(); + + if (ShiftAmt == 0) + return TLO.CombineTo(Op, Src); + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted + // out) are never demanded. + if (Src.getOpcode() == X86ISD::KSHIFTL) { + if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) { + unsigned C1 = Src.getConstantOperandVal(1); + unsigned NewOpc = X86ISD::KSHIFTR; + int Diff = ShiftAmt - C1; + if (Diff < 0) { + Diff = -Diff; + NewOpc = X86ISD::KSHIFTL; + } + + SDLoc dl(Op); + SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); + return TLO.CombineTo( + Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); + } + } + + APInt DemandedSrc = DemandedElts.shl(ShiftAmt); + if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + + KnownUndef.lshrInPlace(ShiftAmt); + KnownZero.lshrInPlace(ShiftAmt); + KnownZero.setHighBits(ShiftAmt); + break; + } case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: { SDValue Src = Op.getOperand(0); @@ -33979,16 +34400,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } case X86ISD::PACKSS: case X86ISD::PACKUS: { + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + APInt DemandedLHS, DemandedRHS; getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(N0, DemandedLHS, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(N1, DemandedRHS, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; + + // Aggressively peek through ops to get at the demanded elts. + // TODO - we should do this for all target/faux shuffles ops. + if (!DemandedElts.isAllOnesValue()) { + APInt DemandedSrcBits = + APInt::getAllOnesValue(N0.getScalarValueSizeInBits()); + SDValue NewN0 = SimplifyMultipleUseDemandedBits( + N0, DemandedSrcBits, DemandedLHS, TLO.DAG, Depth + 1); + SDValue NewN1 = SimplifyMultipleUseDemandedBits( + N1, DemandedSrcBits, DemandedRHS, TLO.DAG, Depth + 1); + if (NewN0 || NewN1) { + NewN0 = NewN0 ? NewN0 : N0; + NewN1 = NewN1 ? NewN1 : N1; + return TLO.CombineTo(Op, + TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); + } + } break; } case X86ISD::HADD: @@ -34062,25 +34503,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return true; break; } - case X86ISD::SUBV_BROADCAST: { - // Reduce size of broadcast if we don't need the upper half. - unsigned HalfElts = NumElts / 2; - if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) { - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - SDValue Half = Src; - if (SrcVT.getVectorNumElements() != HalfElts) { - MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts); - Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src); - } - - return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0, - TLO.DAG, SDLoc(Op), - Half.getValueSizeInBits())); - } - break; - } case X86ISD::VPERMV: { SDValue Mask = Op.getOperand(0); APInt MaskUndef, MaskZero; @@ -34134,6 +34556,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); + } + // Subvector broadcast. + case X86ISD::SUBV_BROADCAST: { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + if (Src.getValueSizeInBits() > ExtSizeInBits) + Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); + else if (Src.getValueSizeInBits() < ExtSizeInBits) { + MVT SrcSVT = Src.getSimpleValueType().getScalarType(); + MVT SrcVT = + MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits()); + Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src); + } + return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0, + TLO.DAG, DL, ExtSizeInBits)); } // Byte shifts by immediate. case X86ISD::VSHLDQ: @@ -34201,36 +34638,30 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } } - // Simplify target shuffles. - if (!isTargetShuffle(Opc) || !VT.isSimple()) - return false; - - // Get target shuffle mask. - bool IsUnary; + // Get target/faux shuffle mask. + APInt OpUndef, OpZero; SmallVector OpMask; SmallVector OpInputs; - if (!getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, OpInputs, - OpMask, IsUnary)) + if (!getTargetShuffleInputs(Op, DemandedElts, OpInputs, OpMask, OpUndef, + OpZero, TLO.DAG, Depth, false)) return false; - // Shuffle inputs must be the same type as the result. - if (llvm::any_of(OpInputs, - [VT](SDValue V) { return VT != V.getValueType(); })) + // Shuffle inputs must be the same size as the result. + if (OpMask.size() != (unsigned)NumElts || + llvm::any_of(OpInputs, [VT](SDValue V) { + return VT.getSizeInBits() != V.getValueSizeInBits() || + !V.getValueType().isVector(); + })) return false; - // Clear known elts that might have been set above. - KnownZero.clearAllBits(); - KnownUndef.clearAllBits(); + KnownZero = OpZero; + KnownUndef = OpUndef; // Check if shuffle mask can be simplified to undef/zero/identity. int NumSrcs = OpInputs.size(); - for (int i = 0; i != NumElts; ++i) { - int &M = OpMask[i]; + for (int i = 0; i != NumElts; ++i) if (!DemandedElts[i]) - M = SM_SentinelUndef; - else if (0 <= M && OpInputs[M / NumElts].isUndef()) - M = SM_SentinelUndef; - } + OpMask[i] = SM_SentinelUndef; if (isUndefInRange(OpMask, 0, NumElts)) { KnownUndef.setAllBits(); @@ -34243,10 +34674,14 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } for (int Src = 0; Src != NumSrcs; ++Src) if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts)) - return TLO.CombineTo(Op, OpInputs[Src]); + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src])); // Attempt to simplify inputs. for (int Src = 0; Src != NumSrcs; ++Src) { + // TODO: Support inputs of different types. + if (OpInputs[Src].getValueType() != VT) + continue; + int Lo = Src * NumElts; APInt SrcElts = APInt::getNullValue(NumElts); for (int i = 0; i != NumElts; ++i) @@ -34256,21 +34691,13 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SrcElts.setBit(M); } + // TODO - Propagate input undef/zero elts. APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; } - // Extract known zero/undef elements. - // TODO - Propagate input undef/zero elts. - for (int i = 0; i != NumElts; ++i) { - if (OpMask[i] == SM_SentinelUndef) - KnownUndef.setBit(i); - if (OpMask[i] == SM_SentinelZero) - KnownZero.setBit(i); - } - return false; } @@ -34296,6 +34723,18 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp, TLO, Depth + 1)) return true; + + // Aggressively peek through ops to get at the demanded low bits. + SDValue DemandedLHS = SimplifyMultipleUseDemandedBits( + LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedRHS = SimplifyMultipleUseDemandedBits( + RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); + if (DemandedLHS || DemandedRHS) { + DemandedLHS = DemandedLHS ? DemandedLHS : LHS; + DemandedRHS = DemandedRHS ? DemandedRHS : RHS; + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS)); + } break; } case X86ISD::VSHLI: { @@ -34323,7 +34762,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI; SDValue NewShift = TLO.DAG.getNode( NewOpc, SDLoc(Op), VT, Op0.getOperand(0), - TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); + TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); return TLO.CombineTo(Op, NewShift); } } @@ -34441,6 +34880,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( KnownVec, TLO, Depth + 1)) return true; + if (SDValue V = SimplifyMultipleUseDemandedBits( + Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1))); + Known = KnownVec.zext(BitWidth, true); return false; } @@ -34542,12 +34986,80 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); } +SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + int NumElts = DemandedElts.getBitWidth(); + unsigned Opc = Op.getOpcode(); + EVT VT = Op.getValueType(); + + switch (Opc) { + case X86ISD::PINSRB: + case X86ISD::PINSRW: { + // If we don't demand the inserted element, return the base vector. + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast(Op.getOperand(2)); + MVT VecVT = Vec.getSimpleValueType(); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && + !DemandedElts[CIdx->getZExtValue()]) + return Vec; + break; + } + } + + APInt ShuffleUndef, ShuffleZero; + SmallVector ShuffleMask; + SmallVector ShuffleOps; + if (getTargetShuffleInputs(Op, DemandedElts, ShuffleOps, ShuffleMask, + ShuffleUndef, ShuffleZero, DAG, Depth, false)) { + // If all the demanded elts are from one operand and are inline, + // then we can use the operand directly. + int NumOps = ShuffleOps.size(); + if (ShuffleMask.size() == (unsigned)NumElts && + llvm::all_of(ShuffleOps, [VT](SDValue V) { + return VT.getSizeInBits() == V.getValueSizeInBits(); + })) { + + if (DemandedElts.isSubsetOf(ShuffleUndef)) + return DAG.getUNDEF(VT); + if (DemandedElts.isSubsetOf(ShuffleUndef | ShuffleZero)) + return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op)); + + // Bitmask that indicates which ops have only been accessed 'inline'. + APInt IdentityOp = APInt::getAllOnesValue(NumOps); + for (int i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (!DemandedElts[i] || ShuffleUndef[i]) + continue; + int Op = M / NumElts; + int Index = M % NumElts; + if (M < 0 || Index != i) { + IdentityOp.clearAllBits(); + break; + } + IdentityOp &= APInt::getOneBitSet(NumOps, Op); + if (IdentityOp == 0) + break; + } + assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) && + "Multiple identity shuffles detected"); + + if (IdentityOp != 0) + return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countTrailingZeros()]); + } + } + + return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + Op, DemandedBits, DemandedElts, DAG, Depth); +} + /// Check if a vector extract from a target-specific shuffle of a load can be /// folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but /// shuffles have been custom lowered so we need to handle those here. -static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { +static SDValue +XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -34559,13 +35071,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT OriginalVT = InVec.getValueType(); + unsigned NumOriginalElts = OriginalVT.getVectorNumElements(); // Peek through bitcasts, don't duplicate a load with other uses. InVec = peekThroughOneUseBitcasts(InVec); EVT CurrentVT = InVec.getValueType(); - if (!CurrentVT.isVector() || - CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements()) + if (!CurrentVT.isVector()) + return SDValue(); + + unsigned NumCurrentElts = CurrentVT.getVectorNumElements(); + if ((NumOriginalElts % NumCurrentElts) != 0) return SDValue(); if (!isTargetShuffle(InVec.getOpcode())) @@ -34582,10 +35098,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, ShuffleOps, ShuffleMask, UnaryShuffle)) return SDValue(); + unsigned Scale = NumOriginalElts / NumCurrentElts; + if (Scale > 1) { + SmallVector ScaledMask; + scaleShuffleMask(Scale, ShuffleMask, ScaledMask); + ShuffleMask = std::move(ScaledMask); + } + assert(ShuffleMask.size() == NumOriginalElts && "Shuffle mask size mismatch"); + // Select the input vector, guarding against out of range extract vector. - unsigned NumElems = CurrentVT.getVectorNumElements(); int Elt = cast(EltNo)->getZExtValue(); - int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt]; + int Idx = (Elt > (int)NumOriginalElts) ? SM_SentinelUndef : ShuffleMask[Elt]; if (Idx == SM_SentinelZero) return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT) @@ -34598,8 +35121,9 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; })) return SDValue(); - assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range"); - SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1]; + assert(0 <= Idx && Idx < (int)(2 * NumOriginalElts) && + "Shuffle index out of range"); + SDValue LdNode = (Idx < (int)NumOriginalElts) ? ShuffleOps[0] : ShuffleOps[1]; // If inputs to shuffle are the same for both ops, then allow 2 uses unsigned AllowedUses = @@ -34619,7 +35143,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, LoadSDNode *LN0 = cast(LdNode); - if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || !LN0->isSimple()) return SDValue(); // If there's a bitcast before the shuffle, check if the load type and @@ -34637,10 +35161,11 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); // Create shuffle node taking into account the case that its a unary shuffle - SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1]; - Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle, - ShuffleMask); - Shuffle = DAG.getBitcast(OriginalVT, Shuffle); + SDValue Shuffle = UnaryShuffle ? DAG.getUNDEF(OriginalVT) + : DAG.getBitcast(OriginalVT, ShuffleOps[1]); + Shuffle = DAG.getVectorShuffle(OriginalVT, dl, + DAG.getBitcast(OriginalVT, ShuffleOps[0]), + Shuffle, ShuffleMask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, EltNo); } @@ -34660,6 +35185,23 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) { return false; } +// Helper to push sign extension of vXi1 SETCC result through bitops. +static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, + SDValue Src, const SDLoc &DL) { + switch (Src.getOpcode()) { + case ISD::SETCC: + return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + case ISD::AND: + case ISD::XOR: + case ISD::OR: + return DAG.getNode( + Src.getOpcode(), DL, SExtVT, + signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL), + signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL)); + } + llvm_unreachable("Unexpected node type for vXi1 sign extension"); +} + // Try to match patterns such as // (i16 bitcast (v16i1 x)) // -> @@ -34698,6 +35240,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) MVT SExtVT; + bool PropagateSExt = false; switch (SrcVT.getSimpleVT().SimpleTy) { default: return SDValue(); @@ -34708,8 +35251,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, SExtVT = MVT::v4i32; // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. - if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) + if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) { SExtVT = MVT::v4i64; + PropagateSExt = true; + } break; case MVT::v8i1: SExtVT = MVT::v8i16; @@ -34718,11 +35263,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. - // TODO : use checkBitcastSrcVectorSize - if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() && - (Src.getOperand(0).getValueType().is256BitVector() || - Src.getOperand(0).getValueType().is512BitVector())) { + if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256) || + checkBitcastSrcVectorSize(Src, 512))) { SExtVT = MVT::v8i32; + PropagateSExt = true; } break; case MVT::v16i1: @@ -34745,19 +35289,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, return SDValue(); }; - SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) + : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); - if (SExtVT == MVT::v64i8) { - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(V, DL); - Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); - Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); - Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); - Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); - Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, - DAG.getConstant(32, DL, MVT::i8)); - V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); - } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) { + if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) { V = getPMOVMSKB(DL, V, DAG, Subtarget); } else { if (SExtVT == MVT::v8i16) @@ -34891,8 +35426,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, unsigned ShufMask = (NumElts > 2 ? 0 : 0x44); return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, - DAG.getConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), Splat, - DAG.getConstant(ShufMask, DL, MVT::i8)); + DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), + Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8)); } Ops.append(NumElts, Splat); } else { @@ -34935,6 +35470,24 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget)) return V; + // Recognize the IR pattern for the movmsk intrinsic under SSE1 befoer type + // legalization destroys the v4i32 type. + if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && SrcVT == MVT::v4i1 && + VT.isScalarInteger() && N0.getOpcode() == ISD::SETCC && + N0.getOperand(0).getValueType() == MVT::v4i32 && + ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) && + cast(N0.getOperand(2))->get() == ISD::SETLT) { + SDValue N00 = N0.getOperand(0); + // Only do this if we can avoid scalarizing the input. + if (ISD::isNormalLoad(N00.getNode()) || + (N00.getOpcode() == ISD::BITCAST && + N00.getOperand(0).getValueType() == MVT::v4f32)) { + SDValue V = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, + DAG.getBitcast(MVT::v4f32, N00)); + return DAG.getZExtOrTrunc(V, dl, VT); + } + } + // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && @@ -34949,6 +35502,26 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, // type, widen both sides to avoid a trip through memory. if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && Subtarget.hasAVX512()) { + // Use zeros for the widening if we already have some zeroes. This can + // allow SimplifyDemandedBits to remove scalar ANDs that may be down + // stream of this. + // FIXME: It might make sense to detect a concat_vectors with a mix of + // zeroes and undef and turn it into insert_subvector for i1 vectors as + // a separate combine. What we can't do is canonicalize the operands of + // such a concat or we'll get into a loop with SimplifyDemandedBits. + if (N0.getOpcode() == ISD::CONCAT_VECTORS) { + SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1); + if (ISD::isBuildVectorAllZeros(LastOp.getNode())) { + SrcVT = LastOp.getValueType(); + unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); + SmallVector Ops(N0->op_begin(), N0->op_end()); + Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT)); + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); + N0 = DAG.getBitcast(MVT::i8, N0); + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + } + } + unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); SmallVector Ops(NumConcats, DAG.getUNDEF(SrcVT)); Ops[0] = N0; @@ -34958,6 +35531,33 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, } } + // Look for (i8 (bitcast (v8i1 (extract_subvector (v16i1 X), 0)))) and + // replace with (i8 (trunc (i16 (bitcast (v16i1 X))))). This can occur + // due to insert_subvector legalization on KNL. By promoting the copy to i16 + // we can help with known bits propagation from the vXi1 domain to the + // scalar domain. + if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() && + !Subtarget.hasDQI() && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N0.getOperand(0).getValueType() == MVT::v16i1 && + isNullConstant(N0.getOperand(1))) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, + DAG.getBitcast(MVT::i16, N0.getOperand(0))); + + // Combine (bitcast (vbroadcast_load)) -> (vbroadcast_load). The memory VT + // determines // the number of bits loaded. Remaining bits are zero. + if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() && + VT.getScalarSizeInBits() == SrcVT.getScalarSizeInBits()) { + auto *BCast = cast(N0); + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() }; + SDValue ResNode = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, + VT.getVectorElementType(), + BCast->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1)); + return ResNode; + } + // Since MMX types are special and don't usually play with other vector types, // it's better to handle them early to be sure we emit efficient code by // avoiding store-load conversions. @@ -35152,7 +35752,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, // Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns. ISD::NodeType BinOp; SDValue Src = DAG.matchBinOpReduction( - Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}); + Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true); if (!Src) return SDValue(); @@ -35246,29 +35846,31 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, SDLoc DL(Extract); EVT MatchVT = Match.getValueType(); unsigned NumElts = MatchVT.getVectorNumElements(); + unsigned MaxElts = Subtarget.hasInt256() ? 32 : 16; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ExtractVT == MVT::i1) { // Special case for (pre-legalization) vXi1 reductions. - if (NumElts > 32) + if (NumElts > 64 || !isPowerOf2_32(NumElts)) return SDValue(); - if (DAG.getTargetLoweringInfo().isTypeLegal(MatchVT)) { + if (TLI.isTypeLegal(MatchVT)) { // If this is a legal AVX512 predicate type then we can just bitcast. EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); Movmsk = DAG.getBitcast(MovmskVT, Match); } else { // Use combineBitcastvxi1 to create the MOVMSK. - if (NumElts == 32 && !Subtarget.hasInt256()) { + while (NumElts > MaxElts) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); - NumElts = 16; + NumElts /= 2; } EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget); } if (!Movmsk) return SDValue(); - Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, MVT::i32); + Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32); } else { // Bail with AVX512VL (which uses predicate registers). if (Subtarget.hasVLX()) @@ -35309,13 +35911,15 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget); NumElts = MaskSrcVT.getVectorNumElements(); } - assert(NumElts <= 32 && "Not expecting more than 32 elements"); + assert((NumElts <= 32 || NumElts == 64) && + "Not expecting more than 64 elements"); + MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32; if (BinOp == ISD::XOR) { // parity -> (AND (CTPOP(MOVMSK X)), 1) - SDValue Mask = DAG.getConstant(1, DL, MVT::i32); - SDValue Result = DAG.getNode(ISD::CTPOP, DL, MVT::i32, Movmsk); - Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, Mask); + SDValue Mask = DAG.getConstant(1, DL, CmpVT); + SDValue Result = DAG.getNode(ISD::CTPOP, DL, CmpVT, Movmsk); + Result = DAG.getNode(ISD::AND, DL, CmpVT, Result, Mask); return DAG.getZExtOrTrunc(Result, DL, ExtractVT); } @@ -35323,19 +35927,19 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, ISD::CondCode CondCode; if (BinOp == ISD::OR) { // any_of -> MOVMSK != 0 - CmpC = DAG.getConstant(0, DL, MVT::i32); + CmpC = DAG.getConstant(0, DL, CmpVT); CondCode = ISD::CondCode::SETNE; } else { // all_of -> MOVMSK == ((1 << NumElts) - 1) - CmpC = DAG.getConstant((1ULL << NumElts) - 1, DL, MVT::i32); + CmpC = DAG.getConstant(APInt::getLowBitsSet(CmpVT.getSizeInBits(), NumElts), + DL, CmpVT); CondCode = ISD::CondCode::SETEQ; } // The setcc produces an i8 of 0/1, so extend that to the result width and // negate to get the final 0/-1 mask value. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT SetccVT = - TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode); SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT); SDValue Zero = DAG.getConstant(0, DL, ExtractVT); @@ -35431,6 +36035,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + SDLoc dl(N); SDValue Src = N->getOperand(0); SDValue Idx = N->getOperand(1); @@ -35452,10 +36057,37 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(VT, SrcOp); } + // If we're extracting a single element from a broadcast load and there are + // no other users, just create a single load. + if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) { + auto *MemIntr = cast(SrcBC); + unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits(); + if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth && + VT.getSizeInBits() == SrcBCWidth) { + SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(), + MemIntr->getBasePtr(), + MemIntr->getPointerInfo(), + MemIntr->getAlignment(), + MemIntr->getMemOperand()->getFlags()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); + return Load; + } + } + + // Handle extract(truncate(x)) for 0'th index. + // TODO: Treat this as a faux shuffle? + // TODO: When can we use this for general indices? + if (ISD::TRUNCATE == Src.getOpcode() && SrcVT.is128BitVector() && + isNullConstant(Idx)) { + Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl); + Src = DAG.getBitcast(SrcVT, Src); + return DAG.getNode(N->getOpcode(), dl, VT, Src, Idx); + } + // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; - if (!resolveTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) + if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. @@ -35489,7 +36121,6 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, return SDValue(); int SrcIdx = Mask[N->getConstantOperandVal(1)]; - SDLoc dl(N); // If the shuffle source element is undef/zero then we can just accept it. if (SrcIdx == SM_SentinelUndef) @@ -35584,7 +36215,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { } // TODO: This switch could include FNEG and the x86-specific FP logic ops - // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid + // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid // missed load folding and fma+fneg combining. switch (Vec.getOpcode()) { case ISD::FMA: // Begin 3 operands @@ -35631,27 +36262,84 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller"); - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); - if (!Subtarget.hasFastHorizontalOps() && !OptForSize) - return SDValue(); - SDValue Index = ExtElt->getOperand(1); - if (!isNullConstant(Index)) - return SDValue(); - // TODO: Allow FADD with reduction and/or reassociation and no-signed-zeros. ISD::NodeType Opc; - SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD}); + SDValue Rdx = + DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD, ISD::FADD}, true); if (!Rdx) return SDValue(); + SDValue Index = ExtElt->getOperand(1); + assert(isNullConstant(Index) && + "Reduction doesn't end in an extract from index 0"); + EVT VT = ExtElt->getValueType(0); - EVT VecVT = ExtElt->getOperand(0).getValueType(); + EVT VecVT = Rdx.getValueType(); if (VecVT.getScalarType() != VT) return SDValue(); - unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD; SDLoc DL(ExtElt); + // vXi8 reduction - sub 128-bit vector. + if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) { + if (VecVT == MVT::v4i8) { + // Pad with zero. + if (Subtarget.hasSSE41()) { + Rdx = DAG.getBitcast(MVT::i32, Rdx); + Rdx = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, + DAG.getConstant(0, DL, MVT::v4i32), Rdx, + DAG.getIntPtrConstant(0, DL)); + Rdx = DAG.getBitcast(MVT::v16i8, Rdx); + } else { + Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, + DAG.getConstant(0, DL, VecVT)); + } + } + if (Rdx.getValueType() == MVT::v8i8) { + // Pad with undef. + Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, + DAG.getUNDEF(MVT::v8i8)); + } + Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, + DAG.getConstant(0, DL, MVT::v16i8)); + Rdx = DAG.getBitcast(MVT::v16i8, Rdx); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); + } + + // Must be a >=128-bit vector with pow2 elements. + if ((VecVT.getSizeInBits() % 128) != 0 || + !isPowerOf2_32(VecVT.getVectorNumElements())) + return SDValue(); + + // vXi8 reduction - sum lo/hi halves then use PSADBW. + if (VT == MVT::i8) { + while (Rdx.getValueSizeInBits() > 128) { + unsigned HalfSize = VecVT.getSizeInBits() / 2; + unsigned HalfElts = VecVT.getVectorNumElements() / 2; + SDValue Lo = extractSubVector(Rdx, 0, DAG, DL, HalfSize); + SDValue Hi = extractSubVector(Rdx, HalfElts, DAG, DL, HalfSize); + Rdx = DAG.getNode(ISD::ADD, DL, Lo.getValueType(), Lo, Hi); + VecVT = Rdx.getValueType(); + } + assert(VecVT == MVT::v16i8 && "v16i8 reduction expected"); + + SDValue Hi = DAG.getVectorShuffle( + MVT::v16i8, DL, Rdx, Rdx, + {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); + Rdx = DAG.getNode(ISD::ADD, DL, MVT::v16i8, Rdx, Hi); + Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, + getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); + Rdx = DAG.getBitcast(MVT::v16i8, Rdx); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); + } + + // Only use (F)HADD opcodes if they aren't microcoded or minimizes codesize. + bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + if (!Subtarget.hasFastHorizontalOps() && !OptForSize) + return SDValue(); + + unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD; + // 256-bit horizontal instructions operate on 128-bit chunks rather than // across the whole vector, so we need an extract + hop preliminary stage. // This is the only step where the operands of the hop are not the same value. @@ -35661,15 +36349,14 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG, unsigned NumElts = VecVT.getVectorNumElements(); SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL); SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL); - VecVT = EVT::getVectorVT(*DAG.getContext(), VT, NumElts / 2); - Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Hi, Lo); + Rdx = DAG.getNode(HorizOpcode, DL, Lo.getValueType(), Hi, Lo); + VecVT = Rdx.getValueType(); } if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) && !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3())) return SDValue(); // extract (add (shuf X), X), 0 --> extract (hadd X, X), 0 - assert(Rdx.getValueType() == VecVT && "Unexpected reduction match"); unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements()); for (unsigned i = 0; i != ReductionSteps; ++i) Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx); @@ -35714,15 +36401,26 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, } } - // TODO - Remove this once we can handle the implicit zero-extension of - // X86ISD::PEXTRW/X86ISD::PEXTRB in: - // XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and - // combineBasicSADPattern. if (IsPextr) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits( SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI)) return SDValue(N, 0); + + // PEXTR*(PINSR*(v, s, c), c) -> s (with implicit zext handling). + if ((InputVector.getOpcode() == X86ISD::PINSRB || + InputVector.getOpcode() == X86ISD::PINSRW) && + InputVector.getOperand(2) == EltIdx) { + assert(SrcVT == InputVector.getOperand(0).getValueType() && + "Vector type mismatch"); + SDValue Scl = InputVector.getOperand(1); + Scl = DAG.getNode(ISD::TRUNCATE, dl, SrcVT.getScalarType(), Scl); + return DAG.getZExtOrTrunc(Scl, dl, VT); + } + + // TODO - Remove this once we can handle the implicit zero-extension of + // X86ISD::PEXTRW/X86ISD::PEXTRB in XFormVExtractWithShuffleIntoLoad, + // combineHorizontalPredicateResult and combineBasicSADPattern. return SDValue(); } @@ -35832,6 +36530,15 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, // get simplified at node creation time)? bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + // If both inputs are 0/undef, create a complete zero vector. + // FIXME: As noted above this should be handled by DAGCombiner/getNode. + if (TValIsAllZeros && FValIsAllZeros) { + if (VT.isFloatingPoint()) + return DAG.getConstantFP(0.0, DL, VT); + return DAG.getConstant(0, DL, VT); + } + if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) { // Invert the cond to not(cond) : xor(op,allones)=not(op) @@ -36295,8 +37002,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Since SKX these selects have a proper lowering. if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() && CondVT.getVectorElementType() == MVT::i1 && - (ExperimentalVectorWideningLegalization || - VT.getVectorNumElements() > 4) && (VT.getVectorElementType() == MVT::i8 || VT.getVectorElementType() == MVT::i16)) { Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); @@ -36358,6 +37063,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // subl %esi, $edi // cmovsl %eax, %edi if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && + Cond.hasOneUse() && DAG.isEqualTo(LHS, Cond.getOperand(0)) && DAG.isEqualTo(RHS, Cond.getOperand(1))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -36508,6 +37214,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, if (SDValue V = narrowVectorSelect(N, DAG, Subtarget)) return V; + // select(~Cond, X, Y) -> select(Cond, Y, X) + if (CondVT.getScalarType() != MVT::i1) + if (SDValue CondNot = IsNOT(Cond, DAG)) + return DAG.getNode(N->getOpcode(), DL, VT, + DAG.getBitcast(CondVT, CondNot), RHS, LHS); + // Custom action for SELECT MMX if (VT == MVT::x86mmx) { LHS = DAG.getBitcast(MVT::i64, LHS); @@ -36873,8 +37585,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, // We can't always do this as FCMOV only supports a subset of X86 cond. if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) { if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) { - SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8), - Flags}; + SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8), + Flags}; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } @@ -36923,12 +37635,13 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { - uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); - if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; + APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); + assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() && + "Implicit constant truncation"); bool isFastMultiplier = false; - if (Diff < 10) { - switch ((unsigned char)Diff) { + if (Diff.ult(10)) { + switch (Diff.getZExtValue()) { default: break; case 1: // result = add base, cond case 2: // result = lea base( , cond*2) @@ -36943,7 +37656,6 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, } if (isFastMultiplier) { - APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); Cond = getSETCC(CC, Cond, DL ,DAG); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), @@ -36994,8 +37706,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, if (CC == X86::COND_E && CmpAgainst == dyn_cast(TrueOp)) { - SDValue Ops[] = { FalseOp, Cond.getOperand(0), - DAG.getConstant(CC, DL, MVT::i8), Cond }; + SDValue Ops[] = {FalseOp, Cond.getOperand(0), + DAG.getTargetConstant(CC, DL, MVT::i8), Cond}; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } @@ -37029,10 +37741,11 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, CC1 = X86::GetOppositeBranchCondition(CC1); } - SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8), - Flags}; + SDValue LOps[] = {FalseOp, TrueOp, + DAG.getTargetConstant(CC0, DL, MVT::i8), Flags}; SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); - SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags}; + SDValue Ops[] = {LCMOV, TrueOp, DAG.getTargetConstant(CC1, DL, MVT::i8), + Flags}; SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); return CMOV; } @@ -37064,9 +37777,9 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); // This should constant fold. SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); - SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), - DAG.getConstant(X86::COND_NE, DL, MVT::i8), - Cond); + SDValue CMov = + DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), + DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8), Cond); return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); } } @@ -37166,98 +37879,45 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, if ((NumElts % 2) != 0) return SDValue(); - unsigned RegSize = 128; - MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); // Shrink the operands of mul. SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); - if (ExperimentalVectorWideningLegalization || - NumElts >= OpsVT.getVectorNumElements()) { - // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the - // lower part is needed. - SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); - if (Mode == MULU8 || Mode == MULS8) - return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, - DL, VT, MulLo); - - MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2); - // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16, - // the higher part is also needed. - SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, - ReducedVT, NewN0, NewN1); - - // Repack the lower part and higher part result of mul into a wider - // result. - // Generate shuffle functioning as punpcklwd. - SmallVector ShuffleMask(NumElts); - for (unsigned i = 0, e = NumElts / 2; i < e; i++) { - ShuffleMask[2 * i] = i; - ShuffleMask[2 * i + 1] = i + NumElts; - } - SDValue ResLo = - DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); - ResLo = DAG.getBitcast(ResVT, ResLo); - // Generate shuffle functioning as punpckhwd. - for (unsigned i = 0, e = NumElts / 2; i < e; i++) { - ShuffleMask[2 * i] = i + NumElts / 2; - ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; - } - SDValue ResHi = - DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); - ResHi = DAG.getBitcast(ResVT, ResHi); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); - } - - // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want - // to legalize the mul explicitly because implicit legalization for type - // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack - // instructions which will not exist when we explicitly legalize it by - // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with - // <4 x i16> undef). - // - // Legalize the operands of mul. - // FIXME: We may be able to handle non-concatenated vectors by insertion. - unsigned ReducedSizeInBits = ReducedVT.getSizeInBits(); - if ((RegSize % ReducedSizeInBits) != 0) - return SDValue(); - - SmallVector Ops(RegSize / ReducedSizeInBits, - DAG.getUNDEF(ReducedVT)); - Ops[0] = NewN0; - NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops); - Ops[0] = NewN1; - NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops); - - if (Mode == MULU8 || Mode == MULS8) { - // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower - // part is needed. - SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1); - - // convert the type of mul result to VT. - MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); - SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG - : ISD::SIGN_EXTEND_VECTOR_INREG, - DL, ResVT, Mul); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, - DAG.getIntPtrConstant(0, DL)); - } + // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the + // lower part is needed. + SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); + if (Mode == MULU8 || Mode == MULS8) + return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, + DL, VT, MulLo); - // Generate the lower and higher part of mul: pmulhw/pmulhuw. For - // MULU16/MULS16, both parts are needed. - SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1); + MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2); + // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16, + // the higher part is also needed. SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, - OpsVT, NewN0, NewN1); + ReducedVT, NewN0, NewN1); // Repack the lower part and higher part result of mul into a wider - // result. Make sure the type of mul result is VT. - MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); - SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi); - Res = DAG.getBitcast(ResVT, Res); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, - DAG.getIntPtrConstant(0, DL)); + // result. + // Generate shuffle functioning as punpcklwd. + SmallVector ShuffleMask(NumElts); + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { + ShuffleMask[2 * i] = i; + ShuffleMask[2 * i + 1] = i + NumElts; + } + SDValue ResLo = + DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); + ResLo = DAG.getBitcast(ResVT, ResLo); + // Generate shuffle functioning as punpckhwd. + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { + ShuffleMask[2 * i] = i + NumElts / 2; + ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; + } + SDValue ResHi = + DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); + ResHi = DAG.getBitcast(ResVT, ResHi); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); } static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, @@ -37365,8 +38025,7 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, // Make sure the vXi16 type is legal. This covers the AVX512 without BWI case. // Also allow v2i32 if it will be widened. MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements()); - if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) || - DAG.getTargetLoweringInfo().isTypeLegal(WVT))) + if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(WVT)) return SDValue(); SDValue N0 = N->getOperand(0); @@ -37919,7 +38578,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, if (NewShiftVal >= NumBitsPerElt) NewShiftVal = NumBitsPerElt - 1; return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8)); + DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8)); } // We can decode 'whole byte' logical bit shifts as shuffles. @@ -38039,7 +38698,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, - DAG.getConstant(x86cc, DL, MVT::i8)); + DAG.getTargetConstant(x86cc, DL, MVT::i8)); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, @@ -38048,10 +38707,9 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, N->getSimpleValueType(0)); } - SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, - CMP00.getValueType(), CMP00, CMP01, - DAG.getConstant(x86cc, DL, - MVT::i8)); + SDValue OnesOrZeroesF = + DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, + CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8)); bool is64BitFP = (CMP00.getValueType() == MVT::f64); MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; @@ -38083,34 +38741,6 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// Match (xor X, -1) -> X. -// Match extract_subvector(xor X, -1) -> extract_subvector(X). -// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y). -static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { - V = peekThroughBitcasts(V); - if (V.getOpcode() == ISD::XOR && - ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) - return V.getOperand(0); - if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && - (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { - if (SDValue Not = IsNOT(V.getOperand(0), DAG)) { - Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(), - Not, V.getOperand(1)); - } - } - SmallVector CatOps; - if (collectConcatOps(V.getNode(), CatOps)) { - for (SDValue &CatOp : CatOps) { - SDValue NotCat = IsNOT(CatOp, DAG); - if (!NotCat) return SDValue(); - CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat); - } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps); - } - return SDValue(); -} - /// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y). static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::AND); @@ -38273,7 +38903,7 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); unsigned ShiftVal = SplatVal.countTrailingOnes(); - SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8); + SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt); return DAG.getBitcast(N->getValueType(0), Shift); } @@ -38499,7 +39129,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, // TODO: Support multiple SrcOps. if (VT == MVT::i1) { SmallVector SrcOps; - if (matchBitOpReduction(SDValue(N, 0), ISD::AND, SrcOps) && + if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps) && SrcOps.size() == 1) { SDLoc dl(N); unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); @@ -38570,7 +39200,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, } if (SDValue Shuffle = combineX86ShufflesRecursively( - {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, + {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, N->getOperand(0).getOperand(1)); @@ -38585,7 +39215,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); - EVT VT = N->getValueType(0); + MVT VT = N->getSimpleValueType(0); if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0) return SDValue(); @@ -38594,10 +39224,12 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) return SDValue(); - // On XOP we'll lower to PCMOV so accept one use, otherwise only - // do this if either mask has multiple uses already. - if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() || - !N1.getOperand(1).hasOneUse())) + // On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use + // VPTERNLOG. Otherwise only do this if either mask has multiple uses already. + bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) || + Subtarget.hasVLX(); + if (!(Subtarget.hasXOP() || UseVPTERNLOG || + !N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse())) return SDValue(); // Attempt to extract constant byte masks. @@ -38895,6 +39527,24 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, DAG.getBitcast(MVT::v4f32, N1))); } + // Match any-of bool scalar reductions into a bitcast/movmsk + cmp. + // TODO: Support multiple SrcOps. + if (VT == MVT::i1) { + SmallVector SrcOps; + if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) && + SrcOps.size() == 1) { + SDLoc dl(N); + unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); + EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); + SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); + if (Mask) { + APInt AllBits = APInt::getNullValue(NumElts); + return DAG.getSetCC(dl, MVT::i1, Mask, + DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE); + } + } + } + if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -39136,26 +39786,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } -/// Check if truncation with saturation form type \p SrcVT to \p DstVT -/// is valid for the given \p Subtarget. -static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - // FIXME: Scalar type may be supported if we move it to vector register. - if (!SrcVT.isVector()) - return false; - - EVT SrcElVT = SrcVT.getScalarType(); - EVT DstElVT = DstVT.getScalarType(); - if (DstElVT != MVT::i8 && DstElVT != MVT::i16 && DstElVT != MVT::i32) - return false; - if (SrcVT.is512BitVector() || Subtarget.hasVLX()) - return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); - return false; -} - /// Detect patterns of truncation with unsigned saturation: /// /// 1. (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). @@ -39253,64 +39883,61 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { return SDValue(); } -/// Detect a pattern of truncation with signed saturation. -/// The types should allow to use VPMOVSS* instruction on AVX512. -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectAVX512SSatPattern(SDValue In, EVT VT, - const X86Subtarget &Subtarget, - const TargetLowering &TLI) { - if (!TLI.isTypeLegal(In.getValueType())) - return SDValue(); - if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return SDValue(); - return detectSSatPattern(In, VT); -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// The types should allow to use VPMOVUS* instruction on AVX512. -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectAVX512USatPattern(SDValue In, EVT VT, SelectionDAG &DAG, - const SDLoc &DL, - const X86Subtarget &Subtarget, - const TargetLowering &TLI) { - if (!TLI.isTypeLegal(In.getValueType())) - return SDValue(); - if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return SDValue(); - return detectUSatPattern(In, VT, DAG, DL); -} - static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - EVT SVT = VT.getScalarType(); + if (!Subtarget.hasSSE2() || !VT.isVector()) + return SDValue(); + + EVT SVT = VT.getVectorElementType(); EVT InVT = In.getValueType(); - EVT InSVT = InVT.getScalarType(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.isTypeLegal(InVT) && TLI.isTypeLegal(VT) && - isSATValidOnAVX512Subtarget(InVT, VT, Subtarget)) { - if (auto SSatVal = detectSSatPattern(In, VT)) - return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); - if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) - return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); - } - if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) && - !Subtarget.hasAVX512() && + EVT InSVT = InVT.getVectorElementType(); + + // If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is + // split across two registers. We can use a packusdw+perm to clamp to 0-65535 + // and concatenate at the same time. Then we can use a final vpmovuswb to + // clip to 0-255. + if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && + InVT == MVT::v16i32 && VT == MVT::v16i8) { + if (auto USatVal = detectSSatPattern(In, VT, true)) { + // Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB. + SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal, + DL, DAG, Subtarget); + assert(Mid && "Failed to pack!"); + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid); + } + } + + // vXi32 truncate instructions are available with AVX512F. + // vXi16 truncate instructions are only available with AVX512BW. + // For 256-bit or smaller vectors, we require VLX. + // FIXME: We could widen truncates to 512 to remove the VLX restriction. + // If the result type is 256-bits or larger and we have disable 512-bit + // registers, we should go ahead and use the pack instructions if possible. + bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) || + (Subtarget.hasBWI() && InSVT == MVT::i16)) && + (InVT.getSizeInBits() > 128) && + (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) && + !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); + + if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 && + VT.getSizeInBits() >= 64 && (SVT == MVT::i8 || SVT == MVT::i16) && (InSVT == MVT::i16 || InSVT == MVT::i32)) { if (auto USatVal = detectSSatPattern(In, VT, true)) { // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW). + // Only do this when the result is at least 64 bits or we'll leaving + // dangling PACKSSDW nodes. if (SVT == MVT::i8 && InSVT == MVT::i32) { EVT MidVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements()); SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL, DAG, Subtarget); - if (Mid) - return truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, - Subtarget); + assert(Mid && "Failed to pack!"); + SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, + Subtarget); + assert(V && "Failed to pack!"); + return V; } else if (SVT == MVT::i8 || Subtarget.hasSSE41()) return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, Subtarget); @@ -39319,6 +39946,42 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, Subtarget); } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 && + Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI())) { + unsigned TruncOpc; + SDValue SatVal; + if (auto SSatVal = detectSSatPattern(In, VT)) { + SatVal = SSatVal; + TruncOpc = X86ISD::VTRUNCS; + } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) { + SatVal = USatVal; + TruncOpc = X86ISD::VTRUNCUS; + } + if (SatVal) { + unsigned ResElts = VT.getVectorNumElements(); + // If the input type is less than 512 bits and we don't have VLX, we need + // to widen to 512 bits. + if (!Subtarget.hasVLX() && !InVT.is512BitVector()) { + unsigned NumConcats = 512 / InVT.getSizeInBits(); + ResElts *= NumConcats; + SmallVector ConcatOps(NumConcats, DAG.getUNDEF(InVT)); + ConcatOps[0] = SatVal; + InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, + NumConcats * InVT.getVectorNumElements()); + SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps); + } + // Widen the result if its narrower than 128 bits. + if (ResElts * SVT.getSizeInBits() < 128) + ResElts = 128 / SVT.getSizeInBits(); + EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts); + SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); + } + } + return SDValue(); } @@ -39377,7 +40040,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, return true; }; - // Check if each element of the vector is left-shifted by one. + // Check if each element of the vector is right-shifted by one. auto LHS = In.getOperand(0); auto RHS = In.getOperand(1); if (!IsConstVectorInRange(RHS, 1, 1)) @@ -39679,90 +40342,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, return Blend; } - if (Mld->getExtensionType() != ISD::EXTLOAD) - return SDValue(); - - // Resolve extending loads. - EVT VT = Mld->getValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - EVT LdVT = Mld->getMemoryVT(); - SDLoc dl(Mld); - - assert(LdVT != VT && "Cannot extend to the same type"); - unsigned ToSz = VT.getScalarSizeInBits(); - unsigned FromSz = LdVT.getScalarSizeInBits(); - // From/To sizes and ElemCount must be pow of two. - assert (isPowerOf2_32(NumElems * FromSz * ToSz) && - "Unexpected size for extending masked load"); - - unsigned SizeRatio = ToSz / FromSz; - assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - LdVT.getScalarType(), NumElems*SizeRatio); - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - // Convert PassThru value. - SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru()); - if (!Mld->getPassThru().isUndef()) { - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru, - DAG.getUNDEF(WideVecVT), ShuffleVec); - } - - // Prepare the new mask. - SDValue NewMask; - SDValue Mask = Mld->getMask(); - if (Mask.getValueType() == VT) { - // Mask and original value have the same type. - NewMask = DAG.getBitcast(WideVecVT, Mask); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i) - ShuffleVec[i] = NumElems * SizeRatio; - NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, dl, WideVecVT), - ShuffleVec); - } else { - assert(Mask.getValueType().getVectorElementType() == MVT::i1); - unsigned WidenNumElts = NumElems*SizeRatio; - unsigned MaskNumElts = VT.getVectorNumElements(); - EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - WidenNumElts); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); - SmallVector Ops(NumConcat, ZeroVal); - Ops[0] = Mask; - NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); - } - - SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(), - Mld->getBasePtr(), NewMask, WidePassThru, - Mld->getMemoryVT(), Mld->getMemOperand(), - ISD::NON_EXTLOAD); - - SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i * SizeRatio] = i; - - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, - DAG.getUNDEF(WideVecVT), ShuffleVec); - SlicedVec = DAG.getBitcast(VT, SlicedVec); - - return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true); + return SDValue(); } /// If exactly one element of the mask is set for a non-truncating masked store, @@ -39800,123 +40380,45 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = Mst->getValue().getValueType(); - EVT StVT = Mst->getMemoryVT(); SDLoc dl(Mst); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!Mst->isTruncatingStore()) { - if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) - return ScalarStore; - - // If the mask value has been legalized to a non-boolean vector, try to - // simplify ops leading up to it. We only demand the MSB of each lane. - SDValue Mask = Mst->getMask(); - if (Mask.getScalarValueSizeInBits() != 1) { - APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); - if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) - return SDValue(N, 0); - } - - // TODO: AVX512 targets should also be able to simplify something like the - // pattern above, but that pattern will be different. It will either need to - // match setcc more generally or match PCMPGTM later (in tablegen?). - - SDValue Value = Mst->getValue(); - if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && - TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), - Mst->getMemoryVT())) { - return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), - Mst->getBasePtr(), Mask, - Mst->getMemoryVT(), Mst->getMemOperand(), true); - } - - return SDValue(); - } - - // Resolve truncating stores. - unsigned NumElems = VT.getVectorNumElements(); - - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromSz = VT.getScalarSizeInBits(); - unsigned ToSz = StVT.getScalarSizeInBits(); - - // The truncating store is legal in some cases. For example - // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw - // are designated for truncate store. - // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegal(VT, StVT)) + if (Mst->isTruncatingStore()) return SDValue(); - // From/To sizes and ElemCount must be pow of two. - assert (isPowerOf2_32(NumElems * FromSz * ToSz) && - "Unexpected size for truncating masked store"); - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - assert (((NumElems * FromSz) % ToSz) == 0 && - "Unexpected ratio for truncating masked store"); - - unsigned SizeRatio = FromSz / ToSz; - assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - StVT.getScalarType(), NumElems*SizeRatio); - - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue()); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; + if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) + return ScalarStore; - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - - SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - ShuffleVec); - - SDValue NewMask; + // If the mask value has been legalized to a non-boolean vector, try to + // simplify ops leading up to it. We only demand the MSB of each lane. SDValue Mask = Mst->getMask(); - if (Mask.getValueType() == VT) { - // Mask and original value have the same type. - NewMask = DAG.getBitcast(WideVecVT, Mask); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) - ShuffleVec[i] = NumElems*SizeRatio; - NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, dl, WideVecVT), - ShuffleVec); - } else { - assert(Mask.getValueType().getVectorElementType() == MVT::i1); - unsigned WidenNumElts = NumElems*SizeRatio; - unsigned MaskNumElts = VT.getVectorNumElements(); - EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - WidenNumElts); + if (Mask.getScalarValueSizeInBits() != 1) { + APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); + if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) + return SDValue(N, 0); + } - unsigned NumConcat = WidenNumElts / MaskNumElts; - SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); - SmallVector Ops(NumConcat, ZeroVal); - Ops[0] = Mask; - NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); + SDValue Value = Mst->getValue(); + if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + Mst->getMemoryVT())) { + return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), + Mst->getBasePtr(), Mask, + Mst->getMemoryVT(), Mst->getMemOperand(), true); } - return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, - Mst->getBasePtr(), NewMask, StVT, - Mst->getMemOperand(), false); + return SDValue(); } static SDValue combineStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { StoreSDNode *St = cast(N); - EVT VT = St->getValue().getValueType(); EVT StVT = St->getMemoryVT(); SDLoc dl(St); unsigned Alignment = St->getAlignment(); - SDValue StoredVal = St->getOperand(1); + SDValue StoredVal = St->getValue(); + EVT VT = StoredVal.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Convert a store of vXi1 into a store of iX and a bitcast. @@ -39986,8 +40488,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getMemOperand()->getFlags()); } - // If we are saving a concatenation of two XMM registers and 32-byte stores - // are slow, such as on Sandy Bridge, perform two 16-byte stores. + // If we are saving a 32-byte vector and 32-byte stores are slow, such as on + // Sandy Bridge, perform two 16-byte stores. bool Fast; if (VT.is256BitVector() && StVT == VT && TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, @@ -40026,13 +40528,24 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() && St->getValue().getOpcode() == ISD::TRUNCATE && St->getValue().getOperand(0).getValueType() == MVT::v16i16 && - TLI.isTruncStoreLegalOrCustom(MVT::v16i32, MVT::v16i8) && - !DCI.isBeforeLegalizeOps()) { + TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) && + St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) { SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue()); return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(), MVT::v16i8, St->getMemOperand()); } + // Try to fold a VTRUNCUS or VTRUNCS into a truncating store. + if (!St->isTruncatingStore() && StoredVal.hasOneUse() && + (StoredVal.getOpcode() == X86ISD::VTRUNCUS || + StoredVal.getOpcode() == X86ISD::VTRUNCS) && + TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) { + bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS; + return EmitTruncSStore(IsSigned, St->getChain(), + dl, StoredVal.getOperand(0), St->getBasePtr(), + VT, St->getMemOperand(), DAG); + } + // Optimize trunc store (of multiple scalars) to shuffle and store. // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. @@ -40040,100 +40553,26 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, // Check if we can detect an AVG pattern from the truncation. If yes, // replace the trunc store by a normal store with the result of X86ISD::AVG // instruction. - if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, - Subtarget, dl)) - return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), - St->getMemOperand()->getFlags()); - - if (SDValue Val = - detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget, - TLI)) - return EmitTruncSStore(true /* Signed saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); - if (SDValue Val = detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), - DAG, dl, Subtarget, TLI)) - return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); - - unsigned NumElems = VT.getVectorNumElements(); - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromSz = VT.getScalarSizeInBits(); - unsigned ToSz = StVT.getScalarSizeInBits(); - - // The truncating store is legal in some cases. For example - // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw - // are designated for truncate store. - // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegalOrCustom(VT, StVT)) - return SDValue(); + if (DCI.isBeforeLegalize() || TLI.isTypeLegal(St->getMemoryVT())) + if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, + Subtarget, dl)) + return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); - // From, To sizes and ElemCount must be pow of two - if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - if (0 != (NumElems * FromSz) % ToSz) return SDValue(); - - unsigned SizeRatio = FromSz / ToSz; - - assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - StVT.getScalarType(), NumElems*SizeRatio); - - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue()); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - - // Can't shuffle using an illegal type. - if (!TLI.isTypeLegal(WideVecVT)) - return SDValue(); - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - ShuffleVec); - // At this point all of the data is stored at the bottom of the - // register. We now need to save it to mem. - - // Find the largest store unit - MVT StoreType = MVT::i8; - for (MVT Tp : MVT::integer_valuetypes()) { - if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz) - StoreType = Tp; + if (TLI.isTruncStoreLegal(VT, StVT)) { + if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT())) + return EmitTruncSStore(true /* Signed saturation */, St->getChain(), + dl, Val, St->getBasePtr(), + St->getMemoryVT(), St->getMemOperand(), DAG); + if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(), + DAG, dl)) + return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), + dl, Val, St->getBasePtr(), + St->getMemoryVT(), St->getMemOperand(), DAG); } - // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. - if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 && - (64 <= NumElems * ToSz)) - StoreType = MVT::f64; - - // Bitcast the original vector into a vector of store-size units - EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); - assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); - SmallVector Chains; - SDValue Ptr = St->getBasePtr(); - - // Perform one or more big stores into memory. - for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) { - SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - StoreType, ShuffWide, - DAG.getIntPtrConstant(i, dl)); - SDValue Ch = - DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(), - St->getAlignment(), St->getMemOperand()->getFlags()); - Ptr = DAG.getMemBasePlusOffset(Ptr, StoreType.getStoreSize(), dl); - Chains.push_back(Ch); - } - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + return SDValue(); } // Turn load->store of MMX types into GPR load/stores. This avoids clobbering @@ -40149,11 +40588,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); - if (((VT.isVector() && !VT.isFloatingPoint()) || - (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) && + if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) && isa(St->getValue()) && - !cast(St->getValue())->isVolatile() && - St->getChain().hasOneUse() && !St->isVolatile()) { + cast(St->getValue())->isSimple() && + St->getChain().hasOneUse() && St->isSimple()) { LoadSDNode *Ld = cast(St->getValue().getNode()); SmallVector Ops; @@ -40595,8 +41033,8 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - // Requires SSE2 but AVX512 has fast truncate. - if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) + // Requires SSE2. + if (!Subtarget.hasSSE2()) return SDValue(); if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple()) @@ -40620,6 +41058,13 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64) return SDValue(); + // AVX512 has fast truncate, but if the input is already going to be split, + // there's no harm in trying pack. + if (Subtarget.hasAVX512() && + !(!Subtarget.useAVX512Regs() && VT.is256BitVector() && + InVT.is512BitVector())) + return SDValue(); + unsigned NumPackedSignBits = std::min(SVT.getSizeInBits(), 16); unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; @@ -40658,9 +41103,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, // Only handle vXi16 types that are at least 128-bits unless they will be // widened. - if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 || - (!ExperimentalVectorWideningLegalization && - VT.getVectorNumElements() < 8)) + if (!VT.isVector() || VT.getVectorElementType() != MVT::i16) return SDValue(); // Input type should be vXi32. @@ -40874,6 +41317,19 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, return combineVectorTruncation(N, DAG, Subtarget); } +static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + SDValue In = N->getOperand(0); + SDLoc DL(N); + + if (auto SSatVal = detectSSatPattern(In, VT)) + return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); + if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + + return SDValue(); +} + /// Returns the negated value if the node \p N flips sign of FP value. /// /// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000) @@ -40883,10 +41339,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, /// In this case we go though all bitcasts. /// This also recognizes splat of a negated value and returns the splat of that /// value. -static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { +static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) { if (N->getOpcode() == ISD::FNEG) return N->getOperand(0); + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return SDValue(); + unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); SDValue Op = peekThroughBitcasts(SDValue(N, 0)); @@ -40900,7 +41360,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here. if (!SVOp->getOperand(1).isUndef()) return SDValue(); - if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode())) + if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode(), Depth + 1)) if (NegOp0.getValueType() == VT) // FIXME: Can we do better? return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT), SVOp->getMask()); @@ -40914,7 +41374,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { SDValue InsVal = Op.getOperand(1); if (!InsVector.isUndef()) return SDValue(); - if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode())) + if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1)) if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, NegInsVal, Op.getOperand(2)); @@ -40951,6 +41411,57 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { return SDValue(); } +static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc, + bool NegRes) { + if (NegMul) { + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::FMA: Opcode = X86ISD::FNMADD; break; + case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; + case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; + case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; + case X86ISD::FNMADD: Opcode = ISD::FMA; break; + case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; + case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; + case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; + } + } + + if (NegAcc) { + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::FMA: Opcode = X86ISD::FMSUB; break; + case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; + case X86ISD::FMSUB: Opcode = ISD::FMA; break; + case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; + case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; + case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; + case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; + case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; + case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break; + case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break; + case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break; + case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break; + } + } + + if (NegRes) { + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::FMA: Opcode = X86ISD::FNMSUB; break; + case X86ISD::FMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; + case X86ISD::FMSUB: Opcode = X86ISD::FNMADD; break; + case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; + case X86ISD::FNMADD: Opcode = X86ISD::FMSUB; break; + case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break; + case X86ISD::FNMSUB: Opcode = ISD::FMA; break; + case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break; + } + } + + return Opcode; +} + /// Do target-specific dag combines on floating point negations. static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -40980,29 +41491,123 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, // If we're negating an FMA node, then we can adjust the // instruction to include the extra negation. - unsigned NewOpcode = 0; if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) { switch (Arg.getOpcode()) { - case ISD::FMA: NewOpcode = X86ISD::FNMSUB; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FMSUB; break; - case X86ISD::FNMSUB: NewOpcode = ISD::FMA; break; - case X86ISD::FMADD_RND: NewOpcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FMSUB_RND: NewOpcode = X86ISD::FNMADD_RND; break; - case X86ISD::FNMADD_RND: NewOpcode = X86ISD::FMSUB_RND; break; - case X86ISD::FNMSUB_RND: NewOpcode = X86ISD::FMADD_RND; break; - // We can't handle scalar intrinsic node here because it would only - // invert one element and not the whole vector. But we could try to handle - // a negation of the lower element only. - } - } - if (NewOpcode) - return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, - Arg.getNode()->ops())); + case ISD::FMA: + case X86ISD::FMSUB: + case X86ISD::FNMADD: + case X86ISD::FNMSUB: + case X86ISD::FMADD_RND: + case X86ISD::FMSUB_RND: + case X86ISD::FNMADD_RND: + case X86ISD::FNMSUB_RND: { + // We can't handle scalar intrinsic node here because it would only + // invert one element and not the whole vector. But we could try to handle + // a negation of the lower element only. + unsigned NewOpcode = negateFMAOpcode(Arg.getOpcode(), false, false, true); + return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg->ops())); + } + } + } return SDValue(); } +char X86TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg patterns are removable even if they have multiple uses. + if (isFNEG(DAG, Op.getNode(), Depth)) + return 2; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return 0; + + EVT VT = Op.getValueType(); + EVT SVT = VT.getScalarType(); + switch (Op.getOpcode()) { + case ISD::FMA: + case X86ISD::FMSUB: + case X86ISD::FNMADD: + case X86ISD::FNMSUB: + case X86ISD::FMADD_RND: + case X86ISD::FMSUB_RND: + case X86ISD::FNMADD_RND: + case X86ISD::FNMSUB_RND: { + if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || + !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations) + break; + + // This is always negatible for free but we might be able to remove some + // extra operand negations as well. + for (int i = 0; i != 3; ++i) { + char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V == 2) + return V; + } + return 1; + } + } + + return TargetLowering::isNegatibleForFree(Op, DAG, LegalOperations, + ForCodeSize, Depth); +} + +SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg patterns are removable even if they have multiple uses. + if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) + return DAG.getBitcast(Op.getValueType(), Arg); + + EVT VT = Op.getValueType(); + EVT SVT = VT.getScalarType(); + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::FMA: + case X86ISD::FMSUB: + case X86ISD::FNMADD: + case X86ISD::FNMSUB: + case X86ISD::FMADD_RND: + case X86ISD::FMSUB_RND: + case X86ISD::FNMADD_RND: + case X86ISD::FNMSUB_RND: { + if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || + !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations) + break; + + // This is always negatible for free but we might be able to remove some + // extra operand negations as well. + SmallVector NewOps(Op.getNumOperands(), SDValue()); + for (int i = 0; i != 3; ++i) { + char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V == 2) + NewOps[i] = getNegatedExpression(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + } + + bool NegA = !!NewOps[0]; + bool NegB = !!NewOps[1]; + bool NegC = !!NewOps[2]; + unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true); + + // Fill in the non-negated ops with the original values. + for (int i = 0, e = Op.getNumOperands(); i != e; ++i) + if (!NewOps[i]) + NewOps[i] = Op.getOperand(i); + return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps); + } + } + + return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, + ForCodeSize, Depth); +} + static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = N->getSimpleValueType(0); @@ -41312,8 +41917,8 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { assert(InVT.is128BitVector() && "Expected 128-bit input vector"); LoadSDNode *LN = cast(N->getOperand(0)); - // Unless the load is volatile. - if (!LN->isVolatile()) { + // Unless the load is volatile or atomic. + if (LN->isSimple()) { SDLoc dl(N); unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); MVT MemVT = MVT::getIntegerVT(NumBits); @@ -41347,8 +41952,8 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { assert(InVT.is128BitVector() && "Expected 128-bit input vector"); LoadSDNode *LN = cast(N->getOperand(0)); - // Unless the load is volatile. - if (!LN->isVolatile()) { + // Unless the load is volatile or atomic. + if (LN->isSimple()) { SDLoc dl(N); unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); MVT MemVT = MVT::getFloatingPointVT(NumBits); @@ -41724,127 +42329,6 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, DAG.getConstant(EltSizeInBits - 1, DL, VT)); } -/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or -/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating -/// with UNDEFs) of the input to vectors of the same size as the target type -/// which then extends the lowest elements. -static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { - if (ExperimentalVectorWideningLegalization) - return SDValue(); - - unsigned Opcode = N->getOpcode(); - // TODO - add ANY_EXTEND support. - if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND) - return SDValue(); - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - if (!Subtarget.hasSSE2()) - return SDValue(); - - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT SVT = VT.getScalarType(); - EVT InVT = N0.getValueType(); - EVT InSVT = InVT.getScalarType(); - - // FIXME: Generic DAGCombiner previously had a bug that would cause a - // sign_extend of setcc to sometimes return the original node and tricked it - // into thinking CombineTo was used which prevented the target combines from - // running. - // Earlying out here to avoid regressions like this - // (v4i32 (sext (v4i1 (setcc (v4i16))))) - // Becomes - // (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef)))) - // Type legalized to - // (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32))))))) - // Leading to a packssdw+pmovsxwd - // We could write a DAG combine to fix this, but really we shouldn't be - // creating sext_invec that's forcing v8i16 into the DAG. - if (N0.getOpcode() == ISD::SETCC) - return SDValue(); - - // Input type must be a vector and we must be extending legal integer types. - if (!VT.isVector() || VT.getVectorNumElements() < 2) - return SDValue(); - if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) - return SDValue(); - if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) - return SDValue(); - - // If the input/output types are both legal then we have at least AVX1 and - // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly. - if (DAG.getTargetLoweringInfo().isTypeLegal(VT) && - DAG.getTargetLoweringInfo().isTypeLegal(InVT)) - return SDValue(); - - SDLoc DL(N); - - auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) { - EVT SrcVT = N.getValueType(); - EVT DstVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), - Size / SrcVT.getScalarSizeInBits()); - SmallVector Opnds(Size / SrcVT.getSizeInBits(), - DAG.getUNDEF(SrcVT)); - Opnds[0] = N; - return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Opnds); - }; - - // If target-size is less than 128-bits, extend to a type that would extend - // to 128 bits, extend that and extract the original target vector. - if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits())) { - unsigned Scale = 128 / VT.getSizeInBits(); - EVT ExVT = - EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits()); - SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits()); - SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt, - DAG.getIntPtrConstant(0, DL)); - } - - // If target-size is 128-bits (or 256-bits on AVX target), then convert to - // ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT. - // Also use this if we don't have SSE41 to allow the legalizer do its job. - if (!Subtarget.hasSSE41() || VT.is128BitVector() || - (VT.is256BitVector() && Subtarget.hasAVX()) || - (VT.is512BitVector() && Subtarget.useAVX512Regs())) { - SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits()); - Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode); - return DAG.getNode(Opcode, DL, VT, ExOp); - } - - auto SplitAndExtendInReg = [&](unsigned SplitSize) { - unsigned NumVecs = VT.getSizeInBits() / SplitSize; - unsigned NumSubElts = SplitSize / SVT.getSizeInBits(); - EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts); - EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts); - - unsigned IROpc = getOpcode_EXTEND_VECTOR_INREG(Opcode); - SmallVector Opnds; - for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) { - SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0, - DAG.getIntPtrConstant(Offset, DL)); - SrcVec = ExtendVecSize(DL, SrcVec, SplitSize); - SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec); - Opnds.push_back(SrcVec); - } - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds); - }; - - // On pre-AVX targets, split into 128-bit nodes of - // ISD::*_EXTEND_VECTOR_INREG. - if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128)) - return SplitAndExtendInReg(128); - - // On pre-AVX512 targets, split into 256-bit nodes of - // ISD::*_EXTEND_VECTOR_INREG. - if (!Subtarget.useAVX512Regs() && !(VT.getSizeInBits() % 256)) - return SplitAndExtendInReg(256); - - return SDValue(); -} - // Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm // result type. static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, @@ -41915,9 +42399,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); } - if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) - return V; - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; @@ -41931,45 +42412,15 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { - if (NegMul) { - switch (Opcode) { - default: llvm_unreachable("Unexpected opcode"); - case ISD::FMA: Opcode = X86ISD::FNMADD; break; - case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; - case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; - case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FNMADD: Opcode = ISD::FMA; break; - case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; - case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; - case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; - } - } - - if (NegAcc) { - switch (Opcode) { - default: llvm_unreachable("Unexpected opcode"); - case ISD::FMA: Opcode = X86ISD::FMSUB; break; - case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; - case X86ISD::FMSUB: Opcode = ISD::FMA; break; - case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; - case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; - case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; - case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; - } - } - - return Opcode; -} - static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) return SDValue(); EVT ScalarVT = VT.getScalarType(); @@ -41980,17 +42431,21 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, SDValue B = N->getOperand(1); SDValue C = N->getOperand(2); - auto invertIfNegative = [&DAG](SDValue &V) { - if (SDValue NegVal = isFNEG(DAG, V.getNode())) { - V = DAG.getBitcast(V.getValueType(), NegVal); + auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { + bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool LegalOperations = !DCI.isBeforeLegalizeOps(); + if (TLI.isNegatibleForFree(V, DAG, LegalOperations, CodeSize) == 2) { + V = TLI.getNegatedExpression(V, DAG, LegalOperations, CodeSize); return true; } // Look through extract_vector_elts. If it comes from an FNEG, create a // new extract from the FNEG input. if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isNullConstant(V.getOperand(1))) { - if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) { - NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal); + SDValue Vec = V.getOperand(0); + if (TLI.isNegatibleForFree(Vec, DAG, LegalOperations, CodeSize) == 2) { + SDValue NegVal = + TLI.getNegatedExpression(Vec, DAG, LegalOperations, CodeSize); V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), NegVal, V.getOperand(1)); return true; @@ -42009,7 +42464,8 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, if (!NegA && !NegB && !NegC) return SDValue(); - unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); + unsigned NewOpcode = + negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false); if (N->getNumOperands() == 4) return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); @@ -42017,33 +42473,27 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, } // Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C) +// Combine FMSUBADD(A, B, FNEG(C)) -> FMADDSUB(A, B, C) static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { + TargetLowering::DAGCombinerInfo &DCI) { SDLoc dl(N); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool LegalOperations = !DCI.isBeforeLegalizeOps(); - SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode()); - if (!NegVal) + SDValue N2 = N->getOperand(2); + if (TLI.isNegatibleForFree(N2, DAG, LegalOperations, CodeSize) != 2) return SDValue(); - // FIXME: Should we bitcast instead? - if (NegVal.getValueType() != VT) - return SDValue(); - - unsigned NewOpcode; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected opcode!"); - case X86ISD::FMADDSUB: NewOpcode = X86ISD::FMSUBADD; break; - case X86ISD::FMADDSUB_RND: NewOpcode = X86ISD::FMSUBADD_RND; break; - case X86ISD::FMSUBADD: NewOpcode = X86ISD::FMADDSUB; break; - case X86ISD::FMSUBADD_RND: NewOpcode = X86ISD::FMADDSUB_RND; break; - } + SDValue NegN2 = TLI.getNegatedExpression(N2, DAG, LegalOperations, CodeSize); + unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false); if (N->getNumOperands() == 4) return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), - NegVal, N->getOperand(3)); + NegN2, N->getOperand(3)); return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), - NegVal); + NegN2); } static SDValue combineZext(SDNode *N, SelectionDAG &DAG, @@ -42090,9 +42540,6 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) - return V; - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; @@ -42111,12 +42558,11 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - unsigned NumSrcElts = N00.getValueType().getVectorNumElements(); unsigned NumSrcEltBits = N00.getScalarValueSizeInBits(); APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2); if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) && (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) { - return concatSubVectors(N00, N01, VT, NumSrcElts * 2, DAG, dl, 128); + return concatSubVectors(N00, N01, DAG, dl); } } @@ -42159,16 +42605,30 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, !IsOrXorXorCCZero) return SDValue(); - // TODO: Use PXOR + PTEST for SSE4.1 or later? EVT VT = SetCC->getValueType(0); SDLoc DL(SetCC); + bool HasAVX = Subtarget.hasAVX(); + + // Use XOR (plus OR) and PTEST after SSE4.1 and before AVX512. + // Otherwise use PCMPEQ (plus AND) and mask testing. if ((OpSize == 128 && Subtarget.hasSSE2()) || - (OpSize == 256 && Subtarget.hasAVX2()) || + (OpSize == 256 && HasAVX) || (OpSize == 512 && Subtarget.useAVX512Regs())) { - EVT VecVT = OpSize == 512 ? MVT::v16i32 : - OpSize == 256 ? MVT::v32i8 : - MVT::v16i8; - EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT; + bool HasPT = Subtarget.hasSSE41(); + EVT VecVT = MVT::v16i8; + EVT CmpVT = MVT::v16i8; + if (OpSize == 256) + VecVT = CmpVT = MVT::v32i8; + if (OpSize == 512) { + if (Subtarget.hasBWI()) { + VecVT = MVT::v64i8; + CmpVT = MVT::v64i1; + } else { + VecVT = MVT::v16i32; + CmpVT = MVT::v16i1; + } + } + SDValue Cmp; if (IsOrXorXorCCZero) { // This is a bitwise-combined equality comparison of 2 pairs of vectors: @@ -42179,18 +42639,38 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1)); SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0)); SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1)); - SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); - SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ); - Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2); + if (VecVT == CmpVT && HasPT) { + SDValue Cmp1 = DAG.getNode(ISD::XOR, DL, VecVT, A, B); + SDValue Cmp2 = DAG.getNode(ISD::XOR, DL, VecVT, C, D); + Cmp = DAG.getNode(ISD::OR, DL, VecVT, Cmp1, Cmp2); + } else { + SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); + SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ); + Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2); + } } else { SDValue VecX = DAG.getBitcast(VecVT, X); SDValue VecY = DAG.getBitcast(VecVT, Y); - Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); + if (VecVT == CmpVT && HasPT) { + Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY); + } else { + Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); + } } // For 512-bits we want to emit a setcc that will lower to kortest. - if (OpSize == 512) - return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp), - DAG.getConstant(0xFFFF, DL, MVT::i16), CC); + if (VecVT != CmpVT) { + EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 : MVT::i16; + SDValue Mask = DAG.getAllOnesConstant(DL, KRegVT); + return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp), Mask, CC); + } + if (HasPT) { + SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64, + Cmp); + SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp); + X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; + SDValue SetCC = getSETCC(X86CC, PT, DL, DAG); + return DAG.getNode(ISD::TRUNCATE, DL, VT, SetCC.getValue(0)); + } // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality. // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne @@ -42270,8 +42750,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, // go through type promotion to a 128-bit vector. if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && VT.getVectorElementType() == MVT::i1 && - (ExperimentalVectorWideningLegalization || - VT.getVectorNumElements() > 4) && (OpVT.getVectorElementType() == MVT::i8 || OpVT.getVectorElementType() == MVT::i16)) { SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS, @@ -42289,7 +42767,8 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, } static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Src = N->getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = N->getSimpleValueType(0); @@ -42310,7 +42789,7 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, // Look through int->fp bitcasts that don't change the element width. unsigned EltWidth = SrcVT.getScalarSizeInBits(); - if (Src.getOpcode() == ISD::BITCAST && + if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST && Src.getOperand(0).getScalarValueSizeInBits() == EltWidth) return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0)); @@ -42334,71 +42813,123 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + // With vector masks we only demand the upper bit of the mask. + SDValue Mask = cast(N)->getMask(); + if (Mask.getScalarValueSizeInBits() != 1) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); + if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) + return SDValue(N, 0); + } + + return SDValue(); +} + static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { + TargetLowering::DAGCombinerInfo &DCI) { SDLoc DL(N); + auto *GorS = cast(N); + SDValue Chain = GorS->getChain(); + SDValue Index = GorS->getIndex(); + SDValue Mask = GorS->getMask(); + SDValue Base = GorS->getBasePtr(); + SDValue Scale = GorS->getScale(); - if (DCI.isBeforeLegalizeOps()) { - SDValue Index = N->getOperand(4); - // Remove any sign extends from 32 or smaller to larger than 32. - // Only do this before LegalizeOps in case we need the sign extend for - // legalization. - if (Index.getOpcode() == ISD::SIGN_EXTEND) { - if (Index.getScalarValueSizeInBits() > 32 && - Index.getOperand(0).getScalarValueSizeInBits() <= 32) { - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[4] = Index.getOperand(0); - SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); - if (Res == N) { - // The original sign extend has less users, add back to worklist in - // case it needs to be removed - DCI.AddToWorklist(Index.getNode()); - DCI.AddToWorklist(N); + if (DCI.isBeforeLegalize()) { + unsigned IndexWidth = Index.getScalarValueSizeInBits(); + + // Shrink constant indices if they are larger than 32-bits. + // Only do this before legalize types since v2i64 could become v2i32. + // FIXME: We could check that the type is legal if we're after legalize + // types, but then we would need to construct test cases where that happens. + // FIXME: We could support more than just constant vectors, but we need to + // careful with costing. A truncate that can be optimized out would be fine. + // Otherwise we might only want to create a truncate if it avoids a split. + if (auto *BV = dyn_cast(Index)) { + if (BV->isConstant() && IndexWidth > 32 && + DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { + unsigned NumElts = Index.getValueType().getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); + if (auto *Gather = dyn_cast(GorS)) { + SDValue Ops[] = { Chain, Gather->getPassThru(), + Mask, Base, Index, Scale } ; + return DAG.getMaskedGather(Gather->getVTList(), + Gather->getMemoryVT(), DL, Ops, + Gather->getMemOperand(), + Gather->getIndexType()); } - return SDValue(Res, 0); - } + auto *Scatter = cast(GorS); + SDValue Ops[] = { Chain, Scatter->getValue(), + Mask, Base, Index, Scale }; + return DAG.getMaskedScatter(Scatter->getVTList(), + Scatter->getMemoryVT(), DL, + Ops, Scatter->getMemOperand(), + Scatter->getIndexType()); + } + } + + // Shrink any sign/zero extends from 32 or smaller to larger than 32 if + // there are sufficient sign bits. Only do this before legalize types to + // avoid creating illegal types in truncate. + if ((Index.getOpcode() == ISD::SIGN_EXTEND || + Index.getOpcode() == ISD::ZERO_EXTEND) && + IndexWidth > 32 && + Index.getOperand(0).getScalarValueSizeInBits() <= 32 && + DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { + unsigned NumElts = Index.getValueType().getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); + if (auto *Gather = dyn_cast(GorS)) { + SDValue Ops[] = { Chain, Gather->getPassThru(), + Mask, Base, Index, Scale } ; + return DAG.getMaskedGather(Gather->getVTList(), + Gather->getMemoryVT(), DL, Ops, + Gather->getMemOperand(), + Gather->getIndexType()); + } + auto *Scatter = cast(GorS); + SDValue Ops[] = { Chain, Scatter->getValue(), + Mask, Base, Index, Scale }; + return DAG.getMaskedScatter(Scatter->getVTList(), + Scatter->getMemoryVT(), DL, + Ops, Scatter->getMemOperand(), + Scatter->getIndexType()); } + } + + if (DCI.isBeforeLegalizeOps()) { + unsigned IndexWidth = Index.getScalarValueSizeInBits(); // Make sure the index is either i32 or i64 - unsigned ScalarSize = Index.getScalarValueSizeInBits(); - if (ScalarSize != 32 && ScalarSize != 64) { - MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32; + if (IndexWidth != 32 && IndexWidth != 64) { + MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32; EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT, Index.getValueType().getVectorNumElements()); Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[4] = Index; - SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); - if (Res == N) - DCI.AddToWorklist(N); - return SDValue(Res, 0); - } - - // Try to remove zero extends from 32->64 if we know the sign bit of - // the input is zero. - if (Index.getOpcode() == ISD::ZERO_EXTEND && - Index.getScalarValueSizeInBits() == 64 && - Index.getOperand(0).getScalarValueSizeInBits() == 32) { - if (DAG.SignBitIsZero(Index.getOperand(0))) { - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[4] = Index.getOperand(0); - SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); - if (Res == N) { - // The original sign extend has less users, add back to worklist in - // case it needs to be removed - DCI.AddToWorklist(Index.getNode()); - DCI.AddToWorklist(N); - } - return SDValue(Res, 0); - } - } - } - - // With AVX2 we only demand the upper bit of the mask. - if (!Subtarget.hasAVX512()) { + if (auto *Gather = dyn_cast(GorS)) { + SDValue Ops[] = { Chain, Gather->getPassThru(), + Mask, Base, Index, Scale } ; + return DAG.getMaskedGather(Gather->getVTList(), + Gather->getMemoryVT(), DL, Ops, + Gather->getMemOperand(), + Gather->getIndexType()); + } + auto *Scatter = cast(GorS); + SDValue Ops[] = { Chain, Scatter->getValue(), + Mask, Base, Index, Scale }; + return DAG.getMaskedScatter(Scatter->getVTList(), + Scatter->getMemoryVT(), DL, + Ops, Scatter->getMemOperand(), + Scatter->getIndexType()); + } + } + + // With vector masks we only demand the upper bit of the mask. + if (Mask.getScalarValueSizeInBits() != 1) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue Mask = N->getOperand(2); APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) return SDValue(N, 0); @@ -42432,7 +42963,7 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, // Make sure to not keep references to operands, as combineSetCCEFLAGS can // RAUW them under us. if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) { - SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); + SDValue Cond = DAG.getTargetConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0), N->getOperand(1), Cond, Flags); } @@ -42549,6 +43080,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, } static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { // First try to optimize away the conversion entirely when it's // conditionally from a constant. Vectors only. @@ -42578,13 +43110,22 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, unsigned BitWidth = InVT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op0); if (NumSignBits >= (BitWidth - 31)) { - EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32); + EVT TruncVT = MVT::i32; if (InVT.isVector()) TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, InVT.getVectorNumElements()); SDLoc dl(N); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); - return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); + if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); + return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); + } + // If we're after legalize and the type is v2i32 we need to shuffle and + // use CVTSI2P. + assert(InVT == MVT::v2i64 && "Unexpected VT!"); + SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0); + SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, + { 0, 2, -1, -1 }); + return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); } } @@ -42604,7 +43145,7 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasDQI() && VT != MVT::f80) return SDValue(); - if (!Ld->isVolatile() && !VT.isVector() && + if (Ld->isSimple() && !VT.isVector() && ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && !Subtarget.is64Bit() && LdVT == MVT::i64) { SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD( @@ -42841,12 +43382,12 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); EVT VT = N->getValueType(0); SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); - SDValue Res1 = DAG.getNode(ISD::AND, DL, VT, - DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, - MVT::i8), - N->getOperand(2)), - DAG.getConstant(1, DL, VT)); + SDValue Res1 = + DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), + N->getOperand(2)), + DAG.getConstant(1, DL, VT)); return DCI.CombineTo(N, Res1, CarryOut); } @@ -42906,7 +43447,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, MVT::i8), + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Y.getOperand(1)); } @@ -42924,7 +43465,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { EFLAGS.getOperand(1), EFLAGS.getOperand(0)); SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, MVT::i8), + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), NewEFLAGS); } } @@ -42984,7 +43525,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, MVT::i8), + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), SDValue(Neg.getNode(), 1)); } @@ -42997,7 +43538,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { SDValue One = DAG.getConstant(1, DL, ZVT); SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1); + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cmp1); } } @@ -43025,9 +43566,6 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, if (!Subtarget.hasSSE2()) return SDValue(); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - EVT VT = N->getValueType(0); // If the vector size is less than 128, or greater than the supported RegSize, @@ -43035,14 +43573,27 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, if (!VT.isVector() || VT.getVectorNumElements() < 8) return SDValue(); - if (Op0.getOpcode() != ISD::MUL) - std::swap(Op0, Op1); - if (Op0.getOpcode() != ISD::MUL) - return SDValue(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); - ShrinkMode Mode; - if (!canReduceVMulWidth(Op0.getNode(), DAG, Mode) || Mode == MULU16) - return SDValue(); + auto UsePMADDWD = [&](SDValue Op) { + ShrinkMode Mode; + return Op.getOpcode() == ISD::MUL && + canReduceVMulWidth(Op.getNode(), DAG, Mode) && Mode != MULU16 && + (!Subtarget.hasSSE41() || + (Op->isOnlyUserOf(Op.getOperand(0).getNode()) && + Op->isOnlyUserOf(Op.getOperand(1).getNode()))); + }; + + SDValue MulOp, OtherOp; + if (UsePMADDWD(Op0)) { + MulOp = Op0; + OtherOp = Op1; + } else if (UsePMADDWD(Op1)) { + MulOp = Op1; + OtherOp = Op0; + } else + return SDValue(); SDLoc DL(N); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, @@ -43050,34 +43601,27 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, VT.getVectorNumElements() / 2); + // Shrink the operands of mul. + SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0)); + SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1)); + // Madd vector size is half of the original vector size auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops); }; - - auto BuildPMADDWD = [&](SDValue Mul) { - // Shrink the operands of mul. - SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0)); - SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1)); - - SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 }, - PMADDWDBuilder); - // Fill the rest of the output with 0 - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, - DAG.getConstant(0, DL, MAddVT)); - }; - - Op0 = BuildPMADDWD(Op0); - - // It's possible that Op1 is also a mul we can reduce. - if (Op1.getOpcode() == ISD::MUL && - canReduceVMulWidth(Op1.getNode(), DAG, Mode) && Mode != MULU16) { - Op1 = BuildPMADDWD(Op1); - } - - return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); + SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 }, + PMADDWDBuilder); + // Fill the rest of the output with 0 + SDValue Zero = DAG.getConstant(0, DL, Madd.getSimpleValueType()); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero); + + // Preserve the reduction flag on the ADD. We may need to revisit for the + // other operand. + SDNodeFlags Flags; + Flags.setVectorReduction(true); + return DAG.getNode(ISD::ADD, DL, VT, Concat, OtherOp, Flags); } static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, @@ -43087,8 +43631,6 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); EVT VT = N->getValueType(0); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); // TODO: There's nothing special about i32, any integer type above i16 should // work just as well. @@ -43108,80 +43650,53 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, if (VT.getSizeInBits() / 4 > RegSize) return SDValue(); - // We know N is a reduction add, which means one of its operands is a phi. - // To match SAD, we need the other operand to be a ABS. - if (Op0.getOpcode() != ISD::ABS) - std::swap(Op0, Op1); - if (Op0.getOpcode() != ISD::ABS) - return SDValue(); - - auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) { - // SAD pattern detected. Now build a SAD instruction and an addition for - // reduction. Note that the number of elements of the result of SAD is less - // than the number of elements of its input. Therefore, we could only update - // part of elements in the reduction vector. - SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget); - - // The output of PSADBW is a vector of i64. - // We need to turn the vector of i64 into a vector of i32. - // If the reduction vector is at least as wide as the psadbw result, just - // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero - // anyway. - MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32); - if (VT.getSizeInBits() >= ResVT.getSizeInBits()) - Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad); - else - Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad); - - if (VT.getSizeInBits() > ResVT.getSizeInBits()) { - // Fill the upper elements with zero to match the add width. - SDValue Zero = DAG.getConstant(0, DL, VT); - Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad, - DAG.getIntPtrConstant(0, DL)); - } - - return Sad; - }; + // We know N is a reduction add. To match SAD, we need one of the operands to + // be an ABS. + SDValue AbsOp = N->getOperand(0); + SDValue OtherOp = N->getOperand(1); + if (AbsOp.getOpcode() != ISD::ABS) + std::swap(AbsOp, OtherOp); + if (AbsOp.getOpcode() != ISD::ABS) + return SDValue(); // Check whether we have an abs-diff pattern feeding into the select. SDValue SadOp0, SadOp1; - if (!detectZextAbsDiff(Op0, SadOp0, SadOp1)) - return SDValue(); - - Op0 = BuildPSADBW(SadOp0, SadOp1); - - // It's possible we have a sad on the other side too. - if (Op1.getOpcode() == ISD::ABS && - detectZextAbsDiff(Op1, SadOp0, SadOp1)) { - Op1 = BuildPSADBW(SadOp0, SadOp1); - } - - return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); -} - -/// Convert vector increment or decrement to sub/add with an all-ones constant: -/// add X, <1, 1...> --> sub X, <-1, -1...> -/// sub X, <1, 1...> --> add X, <-1, -1...> -/// The all-ones vector constant can be materialized using a pcmpeq instruction -/// that is commonly recognized as an idiom (has no register dependency), so -/// that's better/smaller than loading a splat 1 constant. -static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { - assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && - "Unexpected opcode for increment/decrement transform"); - - // Pseudo-legality check: getOnesVector() expects one of these types, so bail - // out and wait for legalization if we have an unsupported vector length. - EVT VT = N->getValueType(0); - if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) - return SDValue(); - - APInt SplatVal; - if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue()) - return SDValue(); - - SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); - unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; - return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); + if(!detectZextAbsDiff(AbsOp, SadOp0, SadOp1)) + return SDValue(); + + // SAD pattern detected. Now build a SAD instruction and an addition for + // reduction. Note that the number of elements of the result of SAD is less + // than the number of elements of its input. Therefore, we could only update + // part of elements in the reduction vector. + SDValue Sad = createPSADBW(DAG, SadOp0, SadOp1, DL, Subtarget); + + // The output of PSADBW is a vector of i64. + // We need to turn the vector of i64 into a vector of i32. + // If the reduction vector is at least as wide as the psadbw result, just + // bitcast. If it's narrower which can only occur for v2i32, bits 127:16 of + // the PSADBW will be zero. If we promote/ narrow vectors, truncate the v2i64 + // result to v2i32 which will be removed by type legalization. If we/ widen + // narrow vectors then we bitcast to v4i32 and extract v2i32. + MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32); + Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad); + + if (VT.getSizeInBits() > ResVT.getSizeInBits()) { + // Fill the upper elements with zero to match the add width. + assert(VT.getSizeInBits() % ResVT.getSizeInBits() == 0 && "Unexpected VTs"); + unsigned NumConcats = VT.getSizeInBits() / ResVT.getSizeInBits(); + SmallVector Ops(NumConcats, DAG.getConstant(0, DL, ResVT)); + Ops[0] = Sad; + Sad = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops); + } else if (VT.getSizeInBits() < ResVT.getSizeInBits()) { + Sad = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Sad, + DAG.getIntPtrConstant(0, DL)); + } + + // Preserve the reduction flag on the ADD. We may need to revisit for the + // other operand. + SDNodeFlags Flags; + Flags.setVectorReduction(true); + return DAG.getNode(ISD::ADD, DL, VT, Sad, OtherOp, Flags); } static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, @@ -43294,8 +43809,8 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, } // Attempt to turn this pattern into PMADDWD. -// (mul (add (zext (build_vector)), (zext (build_vector))), -// (add (zext (build_vector)), (zext (build_vector))) +// (mul (add (sext (build_vector)), (sext (build_vector))), +// (add (sext (build_vector)), (sext (build_vector))) static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget) { @@ -43415,6 +43930,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, } static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { const SDNodeFlags Flags = N->getFlags(); if (Flags.hasVectorReduction()) { @@ -43445,8 +43961,29 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, HADDBuilder); } - if (SDValue V = combineIncDecVector(N, DAG)) - return V; + // If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into + // (sub Y, (sext (vXi1 X))). + // FIXME: We have the (sub Y, (zext (vXi1 X))) -> (add (sext (vXi1 X)), Y) in + // generic DAG combine without a legal type check, but adding this there + // caused regressions. + if (VT.isVector()) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (Op0.getOpcode() == ISD::ZERO_EXTEND && + Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && + TLI.isTypeLegal(Op0.getOperand(0).getValueType())) { + SDLoc DL(N); + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt); + } + + if (Op1.getOpcode() == ISD::ZERO_EXTEND && + Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && + TLI.isTypeLegal(Op1.getOperand(0).getValueType())) { + SDLoc DL(N); + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt); + } + } return combineAddOrSubToADCOrSBB(N, DAG); } @@ -43457,13 +43994,15 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); EVT VT = N->getValueType(0); + if (!VT.isVector()) + return SDValue(); + // PSUBUS is supported, starting from SSE2, but truncation for v8i32 // is only worth it with SSSE3 (PSHUFB). - if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) && + EVT EltVT = VT.getVectorElementType(); + if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16)) && !(Subtarget.hasSSSE3() && (VT == MVT::v8i32 || VT == MVT::v8i64)) && - !(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)) && - !(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 || - VT == MVT::v16i32 || VT == MVT::v8i64))) + !(Subtarget.useBWIRegs() && (VT == MVT::v16i32))) return SDValue(); SDValue SubusLHS, SubusRHS; @@ -43493,16 +44032,13 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, } else return SDValue(); - auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops); - }; - // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with // special preprocessing in some cases. - if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64) - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, - { SubusLHS, SubusRHS }, USUBSATBuilder); + if (EltVT == MVT::i8 || EltVT == MVT::i16) + return DAG.getNode(ISD::USUBSAT, SDLoc(N), VT, SubusLHS, SubusRHS); + + assert((VT == MVT::v8i32 || VT == MVT::v16i32 || VT == MVT::v8i64) && + "Unexpected VT!"); // Special preprocessing case can be only applied // if the value was zero extended from 16 bit, @@ -43531,15 +44067,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SDValue NewSubusLHS = DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType); SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType); - SDValue Psubus = - SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType, - { NewSubusLHS, NewSubusRHS }, USUBSATBuilder); + SDValue Psubus = DAG.getNode(ISD::USUBSAT, SDLoc(N), ShrinkedType, + NewSubusLHS, NewSubusRHS); + // Zero extend the result, it may be used somewhere as 32 bit, // if not zext and following trunc will shrink. return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType); } static SDValue combineSub(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -43576,9 +44113,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, HSUBBuilder); } - if (SDValue V = combineIncDecVector(N, DAG)) - return V; - // Try to create PSUBUS if SUB's argument is max/min if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) return V; @@ -43712,14 +44246,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, } } - // If we're inserting all zeros into the upper half, change this to - // an insert into an all zeros vector. We will match this to a move - // with implicit upper bit zeroing during isel. - if (Ops.size() == 2 && ISD::isBuildVectorAllZeros(Ops[1].getNode())) - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - getZeroVector(VT, Subtarget, DAG, DL), Ops[0], - DAG.getIntPtrConstant(0, DL)); - return SDValue(); } @@ -43786,10 +44312,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // least as large as the original insertion. Just insert the original // subvector into a zero vector. if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 && - SubVec.getConstantOperandAPInt(1) == 0 && + isNullConstant(SubVec.getOperand(1)) && SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) { SDValue Ins = SubVec.getOperand(0); - if (Ins.getConstantOperandAPInt(2) == 0 && + if (isNullConstant(Ins.getOperand(2)) && ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) && Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits()) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, @@ -43825,31 +44351,42 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // Match concat_vector style patterns. SmallVector SubVectorOps; - if (collectConcatOps(N, SubVectorOps)) + if (collectConcatOps(N, SubVectorOps)) { if (SDValue Fold = combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget)) return Fold; - // If we are inserting into both halves of the vector, the starting vector - // should be undef. If it isn't, make it so. Only do this if the early insert - // has no other uses. - // TODO: Should this be a generic DAG combine? - // TODO: Why doesn't SimplifyDemandedVectorElts catch this? - if ((IdxVal == OpVT.getVectorNumElements() / 2) && - Vec.getOpcode() == ISD::INSERT_SUBVECTOR && - OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 && - isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() && - Vec.hasOneUse()) { - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), - Vec.getOperand(1), Vec.getOperand(2)); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, - N->getOperand(2)); + // If we're inserting all zeros into the upper half, change this to + // a concat with zero. We will match this to a move + // with implicit upper bit zeroing during isel. + // We do this here because we don't want combineConcatVectorOps to + // create INSERT_SUBVECTOR from CONCAT_VECTORS. + if (SubVectorOps.size() == 2 && + ISD::isBuildVectorAllZeros(SubVectorOps[1].getNode())) + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, + getZeroVector(OpVT, Subtarget, DAG, dl), + SubVectorOps[0], DAG.getIntPtrConstant(0, dl)); } // If this is a broadcast insert into an upper undef, use a larger broadcast. if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST) return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); + // If this is a broadcast load inserted into an upper undef, use a larger + // broadcast load. + if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() && + SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) { + auto *MemIntr = cast(SubVec); + SDVTList Tys = DAG.getVTList(OpVT, MVT::Other); + SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, + MemIntr->getMemoryVT(), + MemIntr->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); + return BcastLd; + } + return SDValue(); } @@ -43928,12 +44465,15 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, return SDValue(); MVT VT = N->getSimpleValueType(0); - EVT WideVecVT = N->getOperand(0).getValueType(); - SDValue WideVec = peekThroughBitcasts(N->getOperand(0)); + SDValue InVec = N->getOperand(0); + SDValue InVecBC = peekThroughBitcasts(InVec); + EVT InVecVT = InVec.getValueType(); + EVT InVecBCVT = InVecBC.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && - TLI.isTypeLegal(WideVecVT) && - WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) { + TLI.isTypeLegal(InVecVT) && + InVecVT.getSizeInBits() == 256 && InVecBC.getOpcode() == ISD::AND) { auto isConcatenatedNot = [] (SDValue V) { V = peekThroughBitcasts(V); if (!isBitwiseNot(V)) @@ -43941,12 +44481,12 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, SDValue NotOp = V->getOperand(0); return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS; }; - if (isConcatenatedNot(WideVec.getOperand(0)) || - isConcatenatedNot(WideVec.getOperand(1))) { + if (isConcatenatedNot(InVecBC.getOperand(0)) || + isConcatenatedNot(InVecBC.getOperand(1))) { // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1 - SDValue Concat = split256IntArith(WideVec, DAG); + SDValue Concat = split256IntArith(InVecBC, DAG); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, - DAG.getBitcast(WideVecVT, Concat), N->getOperand(1)); + DAG.getBitcast(InVecVT, Concat), N->getOperand(1)); } } @@ -43956,7 +44496,6 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, if (SDValue V = narrowExtractedVectorSelect(N, DAG)) return V; - SDValue InVec = N->getOperand(0); unsigned IdxVal = cast(N->getOperand(1))->getZExtValue(); if (ISD::isBuildVectorAllZeros(InVec.getNode())) @@ -43976,31 +44515,42 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') // TODO: Move this to DAGCombiner::visitEXTRACT_SUBVECTOR - if (InVec.getOpcode() == ISD::BITCAST && - InVec.getOperand(0).getValueType().isVector()) { - SDValue SrcOp = InVec.getOperand(0); - EVT SrcVT = SrcOp.getValueType(); - unsigned SrcNumElts = SrcVT.getVectorNumElements(); - unsigned DestNumElts = InVec.getValueType().getVectorNumElements(); + if (InVec != InVecBC && InVecBCVT.isVector()) { + unsigned SrcNumElts = InVecBCVT.getVectorNumElements(); + unsigned DestNumElts = InVecVT.getVectorNumElements(); if ((DestNumElts % SrcNumElts) == 0) { unsigned DestSrcRatio = DestNumElts / SrcNumElts; if ((VT.getVectorNumElements() % DestSrcRatio) == 0) { unsigned NewExtNumElts = VT.getVectorNumElements() / DestSrcRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), - SrcVT.getScalarType(), NewExtNumElts); + InVecBCVT.getScalarType(), NewExtNumElts); if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio; SDLoc DL(N); SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, - SrcOp, NewIndex); + InVecBC, NewIndex); return DAG.getBitcast(VT, NewExtract); } } } } + // If we are extracting from an insert into a zero vector, replace with a + // smaller insert into zero if we don't access less than the original + // subvector. Don't do this for i1 vectors. + if (VT.getVectorElementType() != MVT::i1 && + InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 && + InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) && + ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) && + InVec.getOperand(1).getValueSizeInBits() <= VT.getSizeInBits()) { + SDLoc DL(N); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + getZeroVector(VT, Subtarget, DAG, DL), + InVec.getOperand(1), InVec.getOperand(2)); + } + // If we're extracting from a broadcast then we're better off just // broadcasting to the smaller type directly, assuming this is the only use. // As its a broadcast we don't care about the extraction index. @@ -44008,11 +44558,25 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits()) return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0)); + if (InVec.getOpcode() == X86ISD::VBROADCAST_LOAD && InVec.hasOneUse()) { + auto *MemIntr = cast(InVec); + if (MemIntr->getMemoryVT().getSizeInBits() <= VT.getSizeInBits()) { + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, + MemIntr->getMemoryVT(), + MemIntr->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); + return BcastLd; + } + } + // If we're extracting the lowest subvector and we're the only user, // we may be able to perform this with a smaller vector width. if (IdxVal == 0 && InVec.hasOneUse()) { unsigned InOpcode = InVec.getOpcode(); - if (VT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) { + if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) { // v2f64 CVTDQ2PD(v4i32). if (InOpcode == ISD::SINT_TO_FP && InVec.getOperand(0).getValueType() == MVT::v4i32) { @@ -44093,7 +44657,8 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { // Simplify PMULDQ and PMULUDQ operations. static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -44103,23 +44668,43 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS); // Multiply by zero. + // Don't return RHS as it may contain UNDEFs. if (ISD::isBuildVectorAllZeros(RHS.getNode())) - return RHS; - - // Aggressively peek through ops to get at the demanded low bits. - APInt DemandedMask = APInt::getLowBitsSet(64, 32); - SDValue DemandedLHS = DAG.GetDemandedBits(LHS, DemandedMask); - SDValue DemandedRHS = DAG.GetDemandedBits(RHS, DemandedMask); - if (DemandedLHS || DemandedRHS) - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - DemandedLHS ? DemandedLHS : LHS, - DemandedRHS ? DemandedRHS : RHS); + return DAG.getConstant(0, SDLoc(N), N->getValueType(0)); // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI)) return SDValue(N, 0); + // If the input is an extend_invec and the SimplifyDemandedBits call didn't + // convert it to any_extend_invec, due to the LegalOperations check, do the + // conversion directly to a vector shuffle manually. This exposes combine + // opportunities missed by combineExtInVec not calling + // combineX86ShufflesRecursively on SSE4.1 targets. + // FIXME: This is basically a hack around several other issues related to + // ANY_EXTEND_VECTOR_INREG. + if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() && + (LHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || + LHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && + LHS.getOperand(0).getValueType() == MVT::v4i32) { + SDLoc dl(N); + LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0), + LHS.getOperand(0), { 0, -1, 1, -1 }); + LHS = DAG.getBitcast(MVT::v2i64, LHS); + return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); + } + if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() && + (RHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || + RHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && + RHS.getOperand(0).getValueType() == MVT::v4i32) { + SDLoc dl(N); + RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0), + RHS.getOperand(0), { 0, -1, 1, -1 }); + RHS = DAG.getBitcast(MVT::v2i64, RHS); + return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); + } + return SDValue(); } @@ -44134,7 +44719,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { auto *Ld = cast(In); - if (!Ld->isVolatile()) { + if (Ld->isSimple()) { MVT SVT = In.getSimpleValueType().getVectorElementType(); ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD; EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT, @@ -44150,17 +44735,6 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, } } - // Disabling for widening legalization for now. We can enable if we find a - // case that needs it. Otherwise it can be deleted when we switch to - // widening legalization. - if (ExperimentalVectorWideningLegalization) - return SDValue(); - - // Combine (ext_invec (ext_invec X)) -> (ext_invec X) - if (In.getOpcode() == N->getOpcode() && - TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType())) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0)); - // Attempt to combine as a shuffle. // TODO: SSE41 support if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) { @@ -44173,6 +44747,20 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + + APInt KnownUndef, KnownZero; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, + KnownZero, DCI)) + return SDValue(N, 0); + + return SDValue(); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -44196,8 +44784,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); case X86ISD::CMP: return combineCMP(N, DAG); - case ISD::ADD: return combineAdd(N, DAG, Subtarget); - case ISD::SUB: return combineSub(N, DAG, Subtarget); + case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget); + case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget); case X86ISD::ADD: case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI); case X86ISD::SBB: return combineSBB(N, DAG); @@ -44214,12 +44802,13 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); - case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); + case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); + case X86ISD::VTRUNC: return combineVTRUNC(N, DAG); case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); @@ -44299,20 +44888,22 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FNMADD_RND: case X86ISD::FNMSUB: case X86ISD::FNMSUB_RND: - case ISD::FMA: return combineFMA(N, DAG, Subtarget); + case ISD::FMA: return combineFMA(N, DAG, DCI, Subtarget); case X86ISD::FMADDSUB_RND: case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: - case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget); - case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI); + case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI); + case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget); case X86ISD::MGATHER: - case X86ISD::MSCATTER: + case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI); case ISD::MGATHER: - case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget); + case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI); case X86ISD::PCMPEQ: case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); case X86ISD::PMULDQ: - case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI); + case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget); + case X86ISD::KSHIFTL: + case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI); } return SDValue(); @@ -44660,10 +45251,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const { case 'I': case 'J': case 'K': - case 'L': - case 'M': case 'N': case 'G': + case 'L': + case 'M': + return C_Immediate; case 'C': case 'e': case 'Z': @@ -45175,8 +45767,9 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (VConstraint && Subtarget.hasVLX()) return std::make_pair(0U, &X86::FR64XRegClass); return std::make_pair(0U, &X86::FR64RegClass); - // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. - // Vector types. + // TODO: Handle i128 in FR128RegClass after it is tested well. + // Vector types and fp128. + case MVT::f128: case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: @@ -45469,7 +46062,7 @@ void X86TargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be @@ -45514,3 +46107,16 @@ X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; } + +unsigned +X86TargetLowering::getStackProbeSize(MachineFunction &MF) const { + // The default stack probe size is 4096 if the function has no stackprobesize + // attribute. + unsigned StackProbeSize = 4096; + const Function &Fn = MF.getFunction(); + if (Fn.hasFnAttribute("stack-probe-size")) + Fn.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackProbeSize; +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e0be03bc3f9d..6f7e90008de4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -17,7 +17,6 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" namespace llvm { class X86Subtarget; @@ -144,6 +143,10 @@ namespace llvm { /// relative displacements. WrapperRIP, + /// Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. + MOVQ2DQ, + /// Copies a 64-bit value from the low word of an XMM vector /// to an MMX vector. MOVDQ2Q, @@ -422,7 +425,8 @@ namespace llvm { // Tests Types Of a FP Values for scalar types. VFPCLASSS, - // Broadcast scalar to vector. + // Broadcast (splat) scalar or element 0 of a vector. If the operand is + // a vector, this node may change the vector length as part of the splat. VBROADCAST, // Broadcast mask to vector. VBROADCASTM, @@ -611,6 +615,9 @@ namespace llvm { // extract_vector_elt, store. VEXTRACT_STORE, + // scalar broadcast from memory + VBROADCAST_LOAD, + // Store FP control world into i16 memory. FNSTCW16m, @@ -680,6 +687,9 @@ namespace llvm { bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO); + /// If Op is a constant whose elements are all the same constant or + /// undefined, return true and return the constant value in \p SplatVal. + bool isConstantSplat(SDValue Op, APInt &SplatVal); } // end namespace X86 //===--------------------------------------------------------------------===// @@ -792,6 +802,17 @@ namespace llvm { /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; + /// Return 1 if we can compute the negated form of the specified expression + /// for the same cost as the expression itself, or 2 if we can compute the + /// negated form more cheaply than the expression itself. Else return 0. + char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, + bool ForCodeSize, unsigned Depth) const override; + + /// If isNegatibleForFree returns true, return the newly negated expression. + SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; @@ -840,6 +861,13 @@ namespace llvm { bool hasAndNot(SDValue Y) const override; + bool hasBitTest(SDValue X, SDValue Y) const override; + + bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; @@ -863,11 +891,7 @@ namespace llvm { return VTIsOk(XVT) && VTIsOk(KeptBitsVT); } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { - if (DAG.getMachineFunction().getFunction().hasMinSize()) - return false; - return true; - } + bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; bool shouldSplatInsEltVarIndex(EVT VT) const override; @@ -913,6 +937,10 @@ namespace llvm { TargetLoweringOpt &TLO, unsigned Depth) const override; + SDValue SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const override; + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; SDValue unwrapAddress(SDValue N) const override; @@ -1090,11 +1118,12 @@ namespace llvm { bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; - bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override; + bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; bool convertSelectOfConstantsToMath(EVT VT) const override; - bool decomposeMulByConstant(EVT VT, SDValue C) const override; + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const override; @@ -1136,8 +1165,8 @@ namespace llvm { return nullptr; // nothing to do, move along. } - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. @@ -1189,12 +1218,18 @@ namespace llvm { CallingConv::ID CC, EVT VT) const override; + unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const override; + bool isIntDivCheap(EVT VT, AttributeList Attr) const override; bool supportSwiftError() const override; StringRef getStackProbeSymbolName(MachineFunction &MF) const override; + unsigned getStackProbeSize(MachineFunction &MF) const; + bool hasVectorBlend() const override { return true; } unsigned getMaxSupportedInterleaveFactor() const override { return 4; } @@ -1326,6 +1361,12 @@ namespace llvm { SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1372,6 +1413,9 @@ namespace llvm { LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; + bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; + bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; + bool needsCmpXchgNb(Type *MemType) const; void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, @@ -1462,6 +1506,9 @@ namespace llvm { /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; + + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl &Created) const override; }; namespace X86 { @@ -1625,24 +1672,24 @@ namespace llvm { /// mask. This is the reverse process to canWidenShuffleElements, but can /// always succeed. template - void scaleShuffleMask(int Scale, ArrayRef Mask, + void scaleShuffleMask(size_t Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask) { assert(0 < Scale && "Unexpected scaling factor"); size_t NumElts = Mask.size(); ScaledMask.assign(NumElts * Scale, -1); - for (int i = 0; i != (int)NumElts; ++i) { + for (size_t i = 0; i != NumElts; ++i) { int M = Mask[i]; // Repeat sentinel values in every mask element. if (M < 0) { - for (int s = 0; s != Scale; ++s) + for (size_t s = 0; s != Scale; ++s) ScaledMask[(Scale * i) + s] = M; continue; } // Scale mask element and increment across each mask element. - for (int s = 0; s != Scale; ++s) + for (size_t s = 0; s != Scale; ++s) ScaledMask[(Scale * i) + s] = (Scale * M) + s; } } diff --git a/lib/Target/X86/X86IndirectBranchTracking.cpp b/lib/Target/X86/X86IndirectBranchTracking.cpp index 04e8b2231fec..cc0f59ab329d 100644 --- a/lib/Target/X86/X86IndirectBranchTracking.cpp +++ b/lib/Target/X86/X86IndirectBranchTracking.cpp @@ -84,7 +84,7 @@ bool X86IndirectBranchTrackingPass::addENDBR( return false; } -bool IsCallReturnTwice(llvm::MachineOperand &MOp) { +static bool IsCallReturnTwice(llvm::MachineOperand &MOp) { if (!MOp.isGlobal()) return false; auto *CalleeFn = dyn_cast(MOp.getGlobal()); diff --git a/lib/Target/X86/X86InsertPrefetch.cpp b/lib/Target/X86/X86InsertPrefetch.cpp index 02ae73706a34..2b1e3f23efd7 100644 --- a/lib/Target/X86/X86InsertPrefetch.cpp +++ b/lib/Target/X86/X86InsertPrefetch.cpp @@ -79,8 +79,8 @@ ErrorOr getPrefetchHints(const FunctionSamples *TopSamples, // The prefetch instruction can't take memory operands involving vector // registers. bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { - unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); - unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); + Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); + Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); return (BaseReg == 0 || X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) && @@ -108,7 +108,7 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, Prefetches &Prefetches) const { assert(Prefetches.empty() && "Expected caller passed empty PrefetchInfo vector."); - static const std::pair HintTypes[] = { + static constexpr std::pair HintTypes[] = { {"_nta_", X86::PREFETCHNTA}, {"_t0_", X86::PREFETCHT0}, {"_t1_", X86::PREFETCHT1}, @@ -173,7 +173,7 @@ bool X86InsertPrefetch::doInitialization(Module &M) { void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); } bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 54eddeacaa17..9b5de59430a5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -74,6 +74,7 @@ class X86VectorVTInfo("alignedload" # VTName); PatFrag ScalarLdFrag = !cast("load" # EltVT); + PatFrag BroadcastLdFrag = !cast("X86VBroadcastld" # EltSizeName); ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), !cast("sse_load_f32"), @@ -412,6 +413,14 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; +} + // Alias instructions that allow VPTERNLOG to be used with a mask to create // a mix of all ones and all zeros elements. This is done this way to force // the same register to be used as input for all three sources. @@ -436,6 +445,19 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", [(set VR256X:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; +} + // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -443,7 +465,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", [(set FR32X:$dst, fp32imm0)]>; def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", - [(set FR64X:$dst, fpimm0)]>; + [(set FR64X:$dst, fp64imm0)]>; + def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", + [(set VR128X:$dst, fp128imm0)]>; } //===----------------------------------------------------------------------===// @@ -730,14 +754,14 @@ let isCommutable = 1 in def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, + [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -1100,75 +1124,104 @@ multiclass avx512_broadcast_rm_split opc, string OpcodeStr, X86VectorVTInfo MaskInfo, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, - SDPatternOperator UnmaskedOp = X86VBroadcast> { - let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { - defm r : AVX512_maskable_split, - T8PD, EVEX, Sched<[SchedRR]>; - let mayLoad = 1 in - defm m : AVX512_maskable_split, - T8PD, EVEX, EVEX_CD8, - Sched<[SchedRM]>; - } - - def : Pat<(MaskInfo.VT - (bitconvert - (DestInfo.VT (UnmaskedOp - (SrcInfo.VT (scalar_to_vector - (SrcInfo.ScalarLdFrag addr:$src))))))), - (!cast(Name#MaskInfo.ZSuffix#m) addr:$src)>; - def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, + bit IsConvertibleToThreeAddress, + SDPatternOperator UnmaskedOp = X86VBroadcast, + SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { + let hasSideEffects = 0 in + def r : AVX512PI, T8PD, EVEX, Sched<[SchedRR]>; + def rkz : AVX512PI(Name#DestInfo.ZSuffix#mk) - MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>; - def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, + (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), + MaskInfo.ImmAllZerosV))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; + let Constraints = "$src0 = $dst" in + def rk : AVX512PI, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; + + let hasSideEffects = 0, mayLoad = 1 in + def m : AVX512PI, T8PD, EVEX, + EVEX_CD8, Sched<[SchedRM]>; + + def mkz : AVX512PI(Name#MaskInfo.ZSuffix#mkz) - MaskInfo.KRCWM:$mask, addr:$src)>; + (SrcInfo.BroadcastLdFrag addr:$src)))), + MaskInfo.ImmAllZerosV))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, + EVEX_CD8, Sched<[SchedRM]>; + + let Constraints = "$src0 = $dst", + isConvertibleToThreeAddress = IsConvertibleToThreeAddress in + def mk : AVX512PI, T8PD, EVEX, EVEX_K, + EVEX_CD8, Sched<[SchedRM]>; } // Helper class to force mask and broadcast result to same type. multiclass avx512_broadcast_rm opc, string OpcodeStr, string Name, SchedWrite SchedRR, SchedWrite SchedRM, X86VectorVTInfo DestInfo, - X86VectorVTInfo SrcInfo> : + X86VectorVTInfo SrcInfo, + bit IsConvertibleToThreeAddress> : avx512_broadcast_rm_split; + DestInfo, DestInfo, SrcInfo, + IsConvertibleToThreeAddress>; multiclass avx512_fp_broadcast_sd opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info512, _.info128, 1>, avx512_broadcast_scalar, EVEX_V512; @@ -1176,7 +1229,7 @@ multiclass avx512_fp_broadcast_sd opc, string OpcodeStr, let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info256, _.info128, 1>, avx512_broadcast_scalar, EVEX_V256; @@ -1187,7 +1240,7 @@ multiclass avx512_fp_broadcast_ss opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info512, _.info128, 1>, avx512_broadcast_scalar, EVEX_V512; @@ -1195,12 +1248,12 @@ multiclass avx512_fp_broadcast_ss opc, string OpcodeStr, let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info256, _.info128, 1>, avx512_broadcast_scalar, EVEX_V256; defm Z128 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info128, _.info128, 1>, avx512_broadcast_scalar, EVEX_V128; @@ -1284,46 +1337,35 @@ defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, X86VBroadcast, GR64, HasAVX512>, VEX_W; -// Provide aliases for broadcast from the same register class that -// automatically does the extract. -multiclass avx512_int_broadcast_rm_lowering { - def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), - (!cast(Name#DestInfo.ZSuffix#"r") - (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>; -} - multiclass avx512_int_broadcast_rm_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + bit IsConvertibleToThreeAddress> { let Predicates = [prd] in { defm Z : avx512_broadcast_rm, - avx512_int_broadcast_rm_lowering, + WriteShuffle256Ld, _.info512, _.info128, + IsConvertibleToThreeAddress>, EVEX_V512; - // Defined separately to avoid redefinition. - defm Z_Alt : avx512_int_broadcast_rm_lowering; } let Predicates = [prd, HasVLX] in { defm Z256 : avx512_broadcast_rm, - avx512_int_broadcast_rm_lowering, + WriteShuffle256Ld, _.info256, _.info128, + IsConvertibleToThreeAddress>, EVEX_V256; defm Z128 : avx512_broadcast_rm, + WriteShuffleXLd, _.info128, _.info128, + IsConvertibleToThreeAddress>, EVEX_V128; } } defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", - avx512vl_i8_info, HasBWI>; + avx512vl_i8_info, HasBWI, 0>; defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", - avx512vl_i16_info, HasBWI>; + avx512vl_i16_info, HasBWI, 0>; defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", - avx512vl_i32_info, HasAVX512>; + avx512vl_i32_info, HasAVX512, 1>; defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", - avx512vl_i64_info, HasAVX512>, VEX_W1X; + avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { @@ -1354,6 +1396,10 @@ let Predicates = [HasAVX512] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZm addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZm addr:$src)>; } let Predicates = [HasVLX] in { @@ -1362,6 +1408,12 @@ let Predicates = [HasVLX] in { (VPBROADCASTQZ128m addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZ256m addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZ128m addr:$src)>; + def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZ256m addr:$src)>; } let Predicates = [HasVLX, HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -1382,6 +1434,12 @@ let Predicates = [HasVLX, HasBWI] in { def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWZ256m addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZ128m addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZ256m addr:$src)>; } let Predicates = [HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -1394,6 +1452,10 @@ let Predicates = [HasBWI] in { def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWZm addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -1629,12 +1691,12 @@ multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, let Predicates = [HasDQI] in defm Z : avx512_broadcast_rm_split, + _Src.info512, _Src.info128, 0, null_frag, null_frag>, EVEX_V512; let Predicates = [HasDQI, HasVLX] in defm Z256 : avx512_broadcast_rm_split, + _Src.info256, _Src.info128, 0, null_frag, null_frag>, EVEX_V256; } @@ -1645,7 +1707,7 @@ multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, let Predicates = [HasDQI, HasVLX] in defm Z128 : avx512_broadcast_rm_split, + _Src.info128, _Src.info128, 0, null_frag, null_frag>, EVEX_V128; } @@ -1654,23 +1716,6 @@ defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", avx512vl_f32_info, avx512vl_f64_info>; -let Predicates = [HasVLX] in { -def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))), - (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; -def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))), - (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; -} - -def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), - (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>; -def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), - (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; - -def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), - (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>; -def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), - (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; - //===----------------------------------------------------------------------===// // AVX-512 BROADCAST MASK TO VECTOR REGISTER //--- @@ -1730,7 +1775,7 @@ multiclass avx512_perm_i_mb opc, string OpcodeStr, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src2, - IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, + IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1807,7 +1852,7 @@ multiclass avx512_perm_i_lowering(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; @@ -1846,7 +1891,7 @@ multiclass avx512_perm_t_mb opc, string OpcodeStr, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src1, - IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, + IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1947,7 +1992,7 @@ multiclass WriteFVarBlendask opc, string OpcodeStr, } multiclass WriteFVarBlendask_rmb opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let mayLoad = 1, hasSideEffects = 0 in { + let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { def rmbk : AVX5128I, EVEX_4V, VEX_LIG, Sched<[sched]>; + timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2041,9 +2086,9 @@ multiclass avx512_cmp_scalar, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, + timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -2052,9 +2097,9 @@ multiclass avx512_cmp_scalar, + timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; let isCodeGenOnly = 1 in { @@ -2065,7 +2110,7 @@ multiclass avx512_cmp_scalar, + timm:$cc))]>, EVEX_4V, VEX_LIG, Sched<[sched]>; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), @@ -2074,7 +2119,7 @@ multiclass avx512_cmp_scalar, + timm:$cc))]>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -2100,94 +2145,82 @@ let Predicates = [HasAVX512] in { SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; } -multiclass avx512_icmp_packed opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, X86FoldableSchedWrite sched, +multiclass avx512_icmp_packed opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable> { - let isCommutable = IsCommutable in + let isCommutable = IsCommutable, hasSideEffects = 0 in def rr : AVX512BI, - EVEX_4V, Sched<[sched]>; + []>, EVEX_4V, Sched<[sched]>; + let mayLoad = 1, hasSideEffects = 0 in def rm : AVX512BI, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; - let isCommutable = IsCommutable in + []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + let isCommutable = IsCommutable, hasSideEffects = 0 in def rrk : AVX512BI, - EVEX_4V, EVEX_K, Sched<[sched]>; + []>, EVEX_4V, EVEX_K, Sched<[sched]>; + let mayLoad = 1, hasSideEffects = 0 in def rmk : AVX512BI, - EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } -multiclass avx512_icmp_packed_rmb opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, +multiclass avx512_icmp_packed_rmb opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable> : - avx512_icmp_packed { + avx512_icmp_packed { + let mayLoad = 1, hasSideEffects = 0 in { def rmb : AVX512BI, - EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512BI, - EVEX_4V, EVEX_K, EVEX_B, + []>, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + } } -multiclass avx512_icmp_packed_vl opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, X86SchedWriteWidths sched, +multiclass avx512_icmp_packed_vl opc, string OpcodeStr, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed, EVEX_V256; - defm Z128 : avx512_icmp_packed, EVEX_V128; } } multiclass avx512_icmp_packed_rmb_vl opc, string OpcodeStr, - PatFrag OpNode, PatFrag OpNode_su, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed_rmb, EVEX_V256; - defm Z128 : avx512_icmp_packed_rmb, EVEX_V128; } } @@ -2195,53 +2228,42 @@ multiclass avx512_icmp_packed_rmb_vl opc, string OpcodeStr, // This fragment treats X86cmpm as commutable to help match loads in both // operands for PCMPEQ. def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; -def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), - (X86setcc_commute node:$src1, node:$src2, SETEQ)>; def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), (setcc node:$src1, node:$src2, SETGT)>; -def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2), - (X86pcmpeqm_c node:$src1, node:$src2), [{ - return N->hasOneUse(); -}]>; -def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2), - (X86pcmpgtm node:$src1, node:$src2), [{ - return N->hasOneUse(); -}]>; - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't // increase the pattern complexity the way an immediate would. let AddedComplexity = 2 in { // FIXME: Is there a better scheduler class for VPCMP? -defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, EVEX_CD8<8, CD8VF>, VEX_WIG; -defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, EVEX_CD8<16, CD8VF>, VEX_WIG; -defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, EVEX_CD8<32, CD8VF>; -defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", SchedWriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>, VEX_WIG; -defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", SchedWriteVecALU, avx512vl_i16_info, HasBWI>, EVEX_CD8<16, CD8VF>, VEX_WIG; -defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; -defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; } @@ -2322,8 +2344,7 @@ multiclass avx512_icmp_cc_rmb opc, string Suffix, PatFrag Frag, "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.BroadcastLdFrag addr:$src2), cond)))]>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmibk : AVX512AIi8 opc, string Suffix, PatFrag Frag, [(set _.KRC:$dst, (and _.KRCWM:$mask, (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.BroadcastLdFrag addr:$src2), cond))))]>, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; - def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), cond)), (!cast(Name#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; def : Pat<(and _.KRCWM:$mask, - (_.KVT (CommFrag_su:$cc (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), cond))), (!cast(Name#_.ZSuffix#"rmibk") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - (CommFrag.OperandTransform $cc))>; + (CommFrag_su.OperandTransform $cc))>; } multiclass avx512_icmp_cc_vl opc, string Suffix, PatFrag Frag, @@ -2496,14 +2515,19 @@ def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), return N->hasOneUse(); }]>; +def X86cmpm_imm_commute : SDNodeXFormgetZExtValue() & 0x1f); + return getI8Imm(Imm, SDLoc(N)); +}]>; + multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", - (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), - (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), + (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), + (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1>, Sched<[sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, @@ -2511,9 +2535,9 @@ multiclass avx512_vcmp_common, + timm:$cc)>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, @@ -2523,38 +2547,37 @@ multiclass avx512_vcmp_common, + (_.VT (_.BroadcastLdFrag addr:$src2)), + timm:$cc)>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; // Patterns for selecting with loads in other operand. def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc), + timm:$cc), (!cast(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc)), + timm:$cc)), (!cast(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; - def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (_.VT _.RC:$src1), CommutableCMPCC:$cc), + def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2), + (_.VT _.RC:$src1), timm:$cc), (!cast(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; - def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc)), + timm:$cc)), (!cast(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; } multiclass avx512_vcmp_sae { @@ -2564,9 +2587,9 @@ multiclass avx512_vcmp_sae { "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", "$src1, $src2, {sae}, $cc", - (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), + (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc)>, + timm:$cc)>, EVEX_B, Sched<[sched]>; } @@ -2590,12 +2613,12 @@ defm VCMPPS : avx512_vcmp, // Patterns to select fp compares with load as first operand. let Predicates = [HasAVX512] in { def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, - CommutableCMPCC:$cc)), - (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; + timm:$cc)), + (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, - CommutableCMPCC:$cc)), - (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; + timm:$cc)), + (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; } // ---------------------------------------------------------------- @@ -2621,7 +2644,7 @@ multiclass avx512_scalar_fpclass opc, string OpcodeStr, (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su (_.VT _.RC:$src1), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched]>; def rm : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst, (X86Vfpclasss _.ScalarIntMemCPat:$src1, - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su _.ScalarIntMemCPat:$src1, - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2661,7 +2684,7 @@ multiclass avx512_vector_fpclass opc, string OpcodeStr, (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su (_.VT _.RC:$src1), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched]>; def rm : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT (_.LdFrag addr:$src1)), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su (_.VT (_.LdFrag addr:$src1)), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmb : AVX512 opc, string OpcodeStr, _.BroadcastStr##", $dst|$dst, ${src1}" ##_.BroadcastStr##", $src2}", [(set _.KRC:$dst,(X86Vfpclass - (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2)))]>, + (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2)))]>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512 opc, string OpcodeStr, _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## _.BroadcastStr##", $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su - (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2))))]>, + (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2))))]>, EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -2836,13 +2857,21 @@ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), (KMOVWrk VK16:$src)>; +def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), + (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), (COPY_TO_REGCLASS VK16:$src, GR32)>; +def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), + (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; +def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), + (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), (COPY_TO_REGCLASS VK8:$src, GR32)>; +def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), + (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (COPY_TO_REGCLASS GR32:$src, VK32)>; @@ -3075,7 +3104,7 @@ multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, def ri : Ii8, + [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, Sched<[sched]>; } @@ -3097,30 +3126,6 @@ multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; -// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. -multiclass axv512_icmp_packed_no_vlx_lowering { - def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2))), - (COPY_TO_REGCLASS - (!cast(InstStr#"Zrr") - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), - Narrow.KRC)>; - - def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (Frag_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2)))), - (COPY_TO_REGCLASS - (!cast(InstStr#"Zrrk") - (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), - Narrow.KRC)>; -} - // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. multiclass axv512_icmp_packed_cc_no_vlx_lowering(InstStr##Zrri) + (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), (Frag.OperandTransform $cc)), Narrow.KRC)>; @@ -3138,53 +3143,111 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), cond)))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS (!cast(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - (Frag.OperandTransform $cc)), Narrow.KRC)>; + (Frag_su.OperandTransform $cc)), Narrow.KRC)>; +} + +multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering { +// Broadcast load. +def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), cond)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), + cond)))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; } // Same as above, but for fp types which don't use PatFrags. -multiclass axv512_cmp_packed_cc_no_vlx_lowering { -def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), imm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS - (!cast(InstStr##Zrri) + (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - imm:$cc), Narrow.KRC)>; + timm:$cc), Narrow.KRC)>; def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (OpNode_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), imm:$cc))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - imm:$cc), Narrow.KRC)>; -} + timm:$cc), Narrow.KRC)>; -let Predicates = [HasAVX512, NoVLX] in { - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't - // increase the pattern complexity the way an immediate would. - let AddedComplexity = 2 in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; +// Broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - } +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; +} +let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -3197,29 +3260,25 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; -} + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; -let Predicates = [HasBWI, NoVLX] in { - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't - // increase the pattern complexity the way an immediate would. - let AddedComplexity = 2 in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - } + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; +} +let Predicates = [HasBWI, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -4186,16 +4245,32 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), + (COPY_TO_REGCLASS + (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), + VK1WM:$mask, addr:$src)), + FR32X)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), + (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; + def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; -def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)), +def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), + (COPY_TO_REGCLASS + (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), + VK1WM:$mask, addr:$src)), + FR64X)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), + (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; + let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), @@ -4537,8 +4612,7 @@ multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src1, - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (_.BroadcastLdFrag addr:$src2)))>, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4664,8 +4738,7 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, "${src2}"##_Brdct.BroadcastStr##", $src1", "$src1, ${src2}"##_Brdct.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert - (_Brdct.VT (X86VBroadcast - (_Brdct.ScalarLdFrag addr:$src2))))))>, + (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4737,8 +4810,7 @@ multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, "${src2}"##_Src.BroadcastStr##", $src1", "$src1, ${src2}"##_Src.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert - (_Src.VT (X86VBroadcast - (_Src.ScalarLdFrag addr:$src2))))))>, + (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4874,22 +4946,11 @@ let Predicates = [HasDQI, NoVLX] in { (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; - - def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), - (EXTRACT_SUBREG - (VPMULLQZrr - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), - sub_xmm)>; -} - -// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. -let Predicates = [HasDQI, NoVLX] in { - def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), (EXTRACT_SUBREG - (VPMULLQZrr + (VPMULLQZrmb (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + addr:$src2), sub_ymm)>; def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), @@ -4898,29 +4959,47 @@ let Predicates = [HasDQI, NoVLX] in { (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; + def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (VPMULLQZrmb + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + addr:$src2), + sub_xmm)>; } -multiclass avx512_min_max_lowering { +multiclass avx512_min_max_lowering { def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), (EXTRACT_SUBREG - (Instr + (!cast(Instr#"rr") (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; + def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (!cast(Instr#"rmb") + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + addr:$src2), + sub_ymm)>; def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), (EXTRACT_SUBREG - (Instr + (!cast(Instr#"rr") (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; + def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (!cast(Instr#"rmb") + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + addr:$src2), + sub_xmm)>; } let Predicates = [HasAVX512, NoVLX] in { - defm : avx512_min_max_lowering; - defm : avx512_min_max_lowering; - defm : avx512_min_max_lowering; - defm : avx512_min_max_lowering; + defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; + defm : avx512_min_max_lowering<"VPMINUQZ", umin>; + defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; + defm : avx512_min_max_lowering<"VPMINSQZ", smin>; } //===----------------------------------------------------------------------===// @@ -4977,32 +5056,6 @@ let Predicates = [HasVLX] in { def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; - def : Pat<(and VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(or VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(xor VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>; - - def : Pat<(and VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(or VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(xor VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), @@ -5042,32 +5095,6 @@ let Predicates = [HasVLX] in { (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; - - def : Pat<(and VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(or VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(xor VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>; - - def : Pat<(and VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(or VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(xor VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>; } let Predicates = [HasAVX512] in { @@ -5110,32 +5137,6 @@ let Predicates = [HasAVX512] in { (VPANDNQZrm VR512:$src1, addr:$src2)>; def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), (VPANDNQZrm VR512:$src1, addr:$src2)>; - - def : Pat<(and VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(or VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(xor VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(X86andnp VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZrmb VR512:$src1, addr:$src2)>; - - def : Pat<(and VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(or VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(xor VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(X86andnp VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZrmb VR512:$src1, addr:$src2)>; } // Patterns to catch vselect with different type than logic op. @@ -5174,25 +5175,17 @@ multiclass avx512_logical_lowering_bcast { // Register-broadcast logical operations. - def : Pat<(IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))), - (!cast(InstrStr#rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))))), + (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), _.RC:$src0)), (!cast(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))))), + (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), _.ImmAllZerosV)), (!cast(InstrStr#rmbkz) _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; @@ -5329,7 +5322,8 @@ multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, - X86FoldableSchedWrite sched, bit IsCommutable> { + X86FoldableSchedWrite sched, bit IsCommutable, + string EVEX2VexOvrd> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]> { + Sched<[sched]>, + EVEX2VEXOverride { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5357,7 +5352,8 @@ multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, + EVEX2VEXOverride; } defm rrb_Int : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNode, SDNode VecNode, SDNode SaeNode, X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar_sae, + VecNode, SaeNode, sched.PS.Scl, IsCommutable, + NAME#"SS">, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae, + VecNode, SaeNode, sched.PD.Scl, IsCommutable, + NAME#"SD">, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds, @@ -5410,13 +5408,14 @@ defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, // X86fminc and X86fmaxc instead of X86fmin and X86fmax multiclass avx512_comutable_binop_s opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86FoldableSchedWrite sched, + string EVEX2VEXOvrd> { let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]> { + Sched<[sched]>, EVEX2VEXOverride { let isCommutable = 1; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5424,24 +5423,27 @@ multiclass avx512_comutable_binop_s opc, string OpcodeStr, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, + EVEX2VEXOverride; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - SchedWriteFCmp.Scl>, XS, EVEX_4V, - VEX_LIG, EVEX_CD8<32, CD8VT1>; + SchedWriteFCmp.Scl, "VMINCSS">, XS, + EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, - VEX_LIG, EVEX_CD8<64, CD8VT1>; + SchedWriteFCmp.Scl, "VMINCSD">, XD, + VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - SchedWriteFCmp.Scl>, XS, EVEX_4V, - VEX_LIG, EVEX_CD8<32, CD8VT1>; + SchedWriteFCmp.Scl, "VMAXCSS">, XS, + EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, - VEX_LIG, EVEX_CD8<64, CD8VT1>; + SchedWriteFCmp.Scl, "VMAXCSD">, XD, + VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, @@ -5464,8 +5466,7 @@ multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpN (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5595,8 +5596,7 @@ multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5751,13 +5751,13 @@ multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, defm ri : AVX512_maskable, + (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, Sched<[sched]>; defm mi : AVX512_maskable, + (i8 timm:$src2)))>, Sched<[sched.Folded]>; } } @@ -5769,7 +5769,7 @@ multiclass avx512_shift_rmbi opc, Format ImmFormM, defm mbi : AVX512_maskable, + (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, EVEX_B, Sched<[sched.Folded]>; } @@ -5911,17 +5911,17 @@ let Predicates = [HasAVX512, NoVLX] in { (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), VR128X:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))), + def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; + timm:$src2)), sub_xmm)>; } //===-------------------------------------------------------------------===// @@ -5953,8 +5953,7 @@ multiclass avx512_var_shift_mb opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))>, + (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6062,27 +6061,27 @@ let Predicates = [HasAVX512, NoVLX] in { (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; - def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; } // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. @@ -6113,27 +6112,27 @@ let Predicates = [HasAVX512, NoVLX] in { (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; - def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; } //===-------------------------------------------------------------------===// @@ -6228,8 +6227,7 @@ multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src1, - (Ctrl.VT (X86VBroadcast - (Ctrl.ScalarLdFrag addr:$src2)))))>, + (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6419,7 +6417,7 @@ multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, - _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>, + _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6493,7 +6491,7 @@ multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src2, - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), + (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6571,7 +6569,7 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", "$src2, ${src3}"##_.BroadcastStr, - (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), + (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1, _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6964,7 +6962,7 @@ multiclass avx512_pmadd52_rm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, - (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))), + (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1)>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -7504,14 +7502,13 @@ multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, OpcodeStr, "${src}"##Broadcast, "${src}"##Broadcast, (_.VT (OpNode (_Src.VT - (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) + (_Src.BroadcastLdFrag addr:$src)) )), (vselect MaskRC:$mask, (_.VT (OpNode (_Src.VT - (X86VBroadcast - (_Src.ScalarLdFrag addr:$src))))), + (_Src.BroadcastLdFrag addr:$src)))), _.RC:$src0), vselect, "$src0 = $dst">, EVEX, EVEX_B, Sched<[sched.Folded]>; @@ -7646,14 +7643,14 @@ let Predicates = [HasAVX512] in { v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>; - def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZrmb addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), + (fpround (v8f64 (X86VBroadcastld64 addr:$src))), (v8f32 VR256X:$src0)), (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), + (fpround (v8f64 (X86VBroadcastld64 addr:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; } @@ -7677,14 +7674,14 @@ let Predicates = [HasVLX] in { v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>; - def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZ256rmb addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), VR128X:$src0), (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; @@ -7708,12 +7705,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))), + def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))), (VCVTPD2PSZ128rmb addr:$src)>; - def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8194,12 +8191,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2DQZ128rmb addr:$src)>; - def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8223,12 +8220,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2DQZ128rmb addr:$src)>; - def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8252,12 +8249,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2UDQZ128rmb addr:$src)>; - def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8281,12 +8278,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2UDQZ128rmb addr:$src)>; - def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8419,12 +8416,12 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), + def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), (VCVTQQ2PSZ128rmb addr:$src)>; - def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8448,12 +8445,12 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), + def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), (VCVTUQQ2PSZ128rmb addr:$src)>; - def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8576,21 +8573,21 @@ let ExeDomain = GenericDomain in { (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _dest.RC:$dst, - (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>, + (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, Sched<[RR]>; let Constraints = "$src0 = $dst" in def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _dest.RC:$dst, - (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), + (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), _dest.RC:$src0, _src.KRCWM:$mask))]>, Sched<[RR]>, EVEX_K; def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", [(set _dest.RC:$dst, - (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), + (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, Sched<[RR]>, EVEX_KZ; let hasSideEffects = 0, mayStore = 1 in { @@ -8631,17 +8628,17 @@ let Predicates = [HasAVX512] in { } def : Pat<(store (f64 (extractelt - (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; def : Pat<(store (i64 (extractelt - (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; - def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), - (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; - def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), - (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; + def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. @@ -8765,7 +8762,7 @@ multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8859,7 +8856,7 @@ multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8940,7 +8937,7 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (fsqrt (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -9049,14 +9046,14 @@ multiclass avx512_rndscale_scalar opc, string OpcodeStr, (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3)))>, + (i32 timm:$src3)))>, Sched<[sched]>; defm rb_Int : AVX512_maskable_scalar, EVEX_B, + (i32 timm:$src3)))>, EVEX_B, Sched<[sched]>; defm m_Int : AVX512_maskable_scalar opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales _.RC:$src1, - _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, + _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { @@ -9082,15 +9079,15 @@ multiclass avx512_rndscale_scalar opc, string OpcodeStr, } let Predicates = [HasAVX512] in { - def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2), + def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), - _.FRC:$src1, imm:$src2))>; + _.FRC:$src1, timm:$src2))>; } let Predicates = [HasAVX512, OptForSize] in { - def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2), + def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), - addr:$src1, imm:$src2))>; + addr:$src1, timm:$src2))>; } } @@ -10109,19 +10106,19 @@ multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNo (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, Sched<[sched]>; + (i32 timm:$src2))>, Sched<[sched]>; defm rmi : AVX512_maskable, + (i32 timm:$src2))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable, EVEX_B, + (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10136,7 +10133,7 @@ multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, OpcodeStr##_.Suffix, "$src2, {sae}, $src1", "$src1, {sae}, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, + (i32 timm:$src2))>, EVEX_B, Sched<[sched]>; } @@ -10169,22 +10166,22 @@ multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable, + (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i32 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10200,7 +10197,7 @@ multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT SrcInfo.RC:$src2), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched]>; defm rmi : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT (bitconvert (SrcInfo.LdFrag addr:$src2))), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10226,8 +10223,8 @@ multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $src3", (OpNode (_.VT _.RC:$src1), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -10241,15 +10238,14 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable_scalar, + (_.VT _.ScalarIntMemCPat:$src2), + (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10265,7 +10261,7 @@ multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, EVEX_B, Sched<[sched]>; } @@ -10279,7 +10275,7 @@ multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, EVEX_B, Sched<[sched]>; } @@ -10401,7 +10397,7 @@ multiclass avx512_shuff_packed_128_common opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, - (i8 imm:$src3)))))>, + (i8 timm:$src3)))))>, Sched<[sched]>, EVEX2VEXOverride; defm rmi : AVX512_maskable opc, string OpcodeStr, (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, (CastInfo.LdFrag addr:$src2), - (i8 imm:$src3)))))>, + (i8 timm:$src3)))))>, Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride; defm rmbi : AVX512_maskable opc, string OpcodeStr, (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (i8 imm:$src3)))))>, EVEX_B, + (_.BroadcastLdFrag addr:$src2), + (i8 timm:$src3)))))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10491,14 +10487,14 @@ multiclass avx512_valign opc, string OpcodeStr, defm rri : AVX512_maskable, + (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; defm rmi : AVX512_maskable, + (i8 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride<"VPALIGNRrmi">; @@ -10507,8 +10503,8 @@ multiclass avx512_valign opc, string OpcodeStr, OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $src3", (X86VAlign _.RC:$src1, - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10541,13 +10537,13 @@ defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", // Fragments to help convert valignq into masked valignd. Or valignq/valignd // into vpalignr. -def ValignqImm32XForm : SDNodeXFormgetZExtValue() * 2, SDLoc(N)); }]>; -def ValignqImm8XForm : SDNodeXFormgetZExtValue() * 8, SDLoc(N)); }]>; -def ValigndImm8XForm : SDNodeXFormgetZExtValue() * 4, SDLoc(N)); }]>; @@ -10557,40 +10553,40 @@ multiclass avx512_vpalign_mask_lowering(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, From.RC:$src2, - imm:$src3))), + timm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rrikz") To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (From.LdFrag addr:$src2), - imm:$src3))), + timm:$src3))), To.RC:$src0)), (!cast(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (From.LdFrag addr:$src2), - imm:$src3))), + timm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rmikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; } multiclass avx512_vpalign_mask_lowering_mb : avx512_vpalign_mask_lowering { def : Pat<(From.VT (OpNode From.RC:$src1, - (bitconvert (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3)), + (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3)), (!cast(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert - (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3))), + (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3))), To.RC:$src0)), (!cast(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert - (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3))), + (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rmbikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; } let Predicates = [HasAVX512] in { @@ -10666,13 +10659,13 @@ multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, defm rr : AVX512_maskable, EVEX, AVX5128IBase, + (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, Sched<[sched]>; defm rm : AVX512_maskable, + (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } @@ -10685,8 +10678,7 @@ multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, (ins _.ScalarMemOp:$src1), OpcodeStr, "${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr, - (_.VT (OpNode (X86VBroadcast - (_.ScalarLdFrag addr:$src1))))>, + (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } @@ -10770,7 +10762,7 @@ let Predicates = [HasAVX512, NoVLX] in { multiclass avx512_unary_lowering { let Predicates = [prd, NoVLX] in { - def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), + def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), (EXTRACT_SUBREG (!cast(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), @@ -10778,7 +10770,7 @@ multiclass avx512_unary_lowering; - def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), + def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), (EXTRACT_SUBREG (!cast(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), @@ -10829,17 +10821,16 @@ defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// -multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_movddup_128 opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, EVEX, + (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, Sched<[sched]>; defm rm : AVX512_maskable, + (_.VT (_.BroadcastLdFrag addr:$src))>, EVEX, EVEX_CD8<_.EltSize, CD8VH>, Sched<[sched.Folded]>; } @@ -10853,7 +10844,7 @@ multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX512, HasVLX] in { defm Z256 : avx512_unary_rm, EVEX_V256; - defm Z128 : avx512_movddup_128, EVEX_V128; } } @@ -10867,11 +10858,9 @@ multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode, defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; let Predicates = [HasVLX] in { -def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), - (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; -def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; @@ -10884,17 +10873,17 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), immAllZerosV), (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -11070,14 +11059,14 @@ multiclass avx512_shift_packed opc, SDNode OpNode, Format MRMr, def rr : AVX512, + [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, Sched<[sched]>; def rm : AVX512, + (i8 timm:$src2))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -11104,6 +11093,7 @@ defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", multiclass avx512_psadbw_packed opc, SDNode OpNode, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _dst, X86VectorVTInfo _src> { + let isCommutable = 1 in def rr : AVX512BIgetZExtValue(); // Swap bits 1/4 and 3/6. @@ -11151,7 +11141,7 @@ def VPTERNLOG321_imm8 : SDNodeXForm; -def VPTERNLOG213_imm8 : SDNodeXFormgetZExtValue(); // Swap bits 2/4 and 3/5. @@ -11162,7 +11152,7 @@ def VPTERNLOG213_imm8 : SDNodeXForm; -def VPTERNLOG132_imm8 : SDNodeXFormgetZExtValue(); // Swap bits 1/2 and 5/6. @@ -11173,7 +11163,7 @@ def VPTERNLOG132_imm8 : SDNodeXForm; -def VPTERNLOG231_imm8 : SDNodeXFormgetZExtValue(); // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 @@ -11186,7 +11176,7 @@ def VPTERNLOG231_imm8 : SDNodeXForm; -def VPTERNLOG312_imm8 : SDNodeXFormgetZExtValue(); // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 @@ -11210,7 +11200,7 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT _.RC:$src3), - (i8 imm:$src4)), 1, 1>, + (i8 timm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V, Sched<[sched]>; defm rmi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (bitconvert (_.LdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, + (i8 timm:$src4)), 1, 0>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, "$src2, ${src3}"##_.BroadcastStr##", $src4", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src3)), + (i8 timm:$src4)), 1, 0>, EVEX_B, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)), + (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; // Additional patterns for matching loads in other positions. def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4))), + _.RC:$src2, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching zero masking with loads in other // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching masked loads with different // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, - (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), + (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src1, (i8 imm:$src4)), + _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; // Additional patterns for matching broadcasts in other positions. - def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), + def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4))), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching zero masking with broadcasts in other // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, - (VPTERNLOG321_imm8 imm:$src4))>; + (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, - (VPTERNLOG132_imm8 imm:$src4))>; + (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching masked broadcasts with different // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - (i8 imm:$src4)), _.RC:$src1)), + (_.BroadcastLdFrag addr:$src3), + (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src1, (i8 imm:$src4)), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; } multiclass avx512_common_ternlog, VEX_W; +// Patterns to use VPTERNLOG for vXi16/vXi8 vectors. +let Predicates = [HasVLX] in { + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv16i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv8i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv32i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv16i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + +let Predicates = [HasAVX512] in { + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv64i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv32i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + // Patterns to implement vnot using vpternlog instead of creating all ones // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen // so that the result is only dependent on src0. But we use the same source @@ -11498,14 +11594,14 @@ multiclass avx512_fixupimm_packed opc, string OpcodeStr, (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), - (i32 imm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>; defm rmi : AVX512_maskable_3src, + (i32 timm:$src4))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, "$src2, ${src3}"##_.BroadcastStr##", $src4", (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), - (i32 imm:$src4))>, + (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), + (i32 timm:$src4))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Constraints = "$src1 = $dst" } @@ -11531,7 +11627,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { (X86VFixupimmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched]>; } } @@ -11547,7 +11643,7 @@ multiclass avx512_fixupimm_scalar opc, string OpcodeStr, (X86VFixupimms (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 imm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>; defm rrib : AVX512_maskable_3src_scalar opc, string OpcodeStr, (X86VFixupimmSAEs (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmi : AVX512_maskable_3src_scalar opc, string OpcodeStr, (_.VT _.RC:$src2), (_src3VT.VT (scalar_to_vector (_src3VT.ScalarLdFrag addr:$src3))), - (i32 imm:$src4))>, + (i32 timm:$src4))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_fixupimm_packed_all { let Predicates = [HasAVX512] in defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, @@ -11804,7 +11900,7 @@ multiclass VBMI2_shift_var_rmb Op, string OpStr, SDNode OpNode, "${src3}"##VTI.BroadcastStr##", $src2", "$src2, ${src3}"##VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, + (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -11880,12 +11976,14 @@ defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, let Constraints = "$src1 = $dst" in multiclass VNNI_rmb Op, string OpStr, SDNode OpNode, - X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { + X86FoldableSchedWrite sched, X86VectorVTInfo VTI, + bit IsCommutable> { defm r : AVX512_maskable_3src, + VTI.RC:$src2, VTI.RC:$src3)), + IsCommutable, IsCommutable>, EVEX_4V, T8PD, Sched<[sched]>; defm m : AVX512_maskable_3src Op, string OpStr, SDNode OpNode, OpStr, "${src3}"##VTI.BroadcastStr##", $src2", "$src2, ${src3}"##VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (X86VBroadcast - (VTI.ScalarLdFrag addr:$src3))))>, + (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass VNNI_common Op, string OpStr, SDNode OpNode, - X86SchedWriteWidths sched> { + X86SchedWriteWidths sched, bit IsCommutable> { let Predicates = [HasVNNI] in - defm Z : VNNI_rmb, EVEX_V512; + defm Z : VNNI_rmb, EVEX_V512; let Predicates = [HasVNNI, HasVLX] in { - defm Z256 : VNNI_rmb, EVEX_V256; - defm Z128 : VNNI_rmb, EVEX_V128; + defm Z256 : VNNI_rmb, EVEX_V256; + defm Z128 : VNNI_rmb, EVEX_V128; } } // FIXME: Is there a better scheduler class for VPDP? -defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>; -defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>; -defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>; -defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>; +defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; +defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; +defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; +defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; + +def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), + (X86vpmaddwd node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +// Patterns to match VPDPWSSD from existing instructions/intrinsics. +let Predicates = [HasVNNI] in { + def : Pat<(v16i32 (add VR512:$src1, + (X86vpmaddwd_su VR512:$src2, VR512:$src3))), + (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; + def : Pat<(v16i32 (add VR512:$src1, + (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), + (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; +} +let Predicates = [HasVNNI,HasVLX] in { + def : Pat<(v8i32 (add VR256X:$src1, + (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), + (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; + def : Pat<(v8i32 (add VR256X:$src1, + (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), + (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; + def : Pat<(v4i32 (add VR128X:$src1, + (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), + (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; + def : Pat<(v4i32 (add VR128X:$src1, + (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), + (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; +} //===----------------------------------------------------------------------===// // Bit Algorithms @@ -12004,8 +12133,8 @@ multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", (OpNode (VTI.VT VTI.RC:$src1), - (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), - (i8 imm:$src3))>, EVEX_B, + (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -12116,7 +12245,7 @@ multiclass avx512_vp2intersect_modes { !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), [(set _.KRPC:$dst, (X86vp2intersect - _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>, + _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; } @@ -12217,12 +12346,12 @@ let Predicates = [HasBF16, HasVLX] in { (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 - (X86VBroadcast (loadf32 addr:$src))))), + (X86VBroadcastld32 addr:$src)))), (VCVTNEPS2BF16Z128rmb addr:$src)>; - def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), + def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), (v8i16 VR128X:$src0), VK4WM:$mask), (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; - def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), + def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), v8i16x_info.ImmAllZerosV, VK4WM:$mask), (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; } @@ -12249,7 +12378,7 @@ multiclass avx512_dpbf16ps_rm opc, string OpcodeStr, SDNode OpNode, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), (_.VT (OpNode _.RC:$src1, _.RC:$src2, - (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>, + (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, EVEX_B, EVEX_4V; } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index e52635f8d48b..1e399a894490 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1271,22 +1271,22 @@ let isCompare = 1 in { // ANDN Instruction // multiclass bmi_andn { + PatFrag ld_frag, X86FoldableSchedWrite sched> { def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - Sched<[WriteALU]>; + Sched<[sched]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // Complexity is reduced to give and with immediate a chance to match first. let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in { - defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8PS, VEX_4V; - defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8PS, VEX_4V, VEX_W; + defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V; + defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, VEX_W; } let Predicates = [HasBMI], AddedComplexity = -6 in { diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 50aed98112c3..aa45e9b191c1 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -131,11 +131,11 @@ addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { /// reference. static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand, unsigned Reg) { - // Direct memory address is in a form of: Reg, 1 (Scale), NoReg, 0, NoReg. - MI->getOperand(Operand).setReg(Reg); + // Direct memory address is in a form of: Reg/FI, 1 (Scale), NoReg, 0, NoReg. + MI->getOperand(Operand).ChangeToRegister(Reg, /*isDef=*/false); MI->getOperand(Operand + 1).setImm(1); MI->getOperand(Operand + 2).setReg(0); - MI->getOperand(Operand + 3).setImm(0); + MI->getOperand(Operand + 3).ChangeToImmediate(0); MI->getOperand(Operand + 4).setReg(0); } diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 099f6aa8d8bb..330b8c7a8a43 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -20,19 +20,19 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", : I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond), "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, - (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>, + (X86cmov GR16:$src1, GR16:$src2, timm:$cond, EFLAGS))]>, TB, OpSize16; def CMOV32rr : I<0x40, MRMSrcRegCC, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, ccode:$cond), "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, - (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>, + (X86cmov GR32:$src1, GR32:$src2, timm:$cond, EFLAGS))]>, TB, OpSize32; def CMOV64rr :RI<0x40, MRMSrcRegCC, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, ccode:$cond), "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, - (X86cmov GR64:$src1, GR64:$src2, imm:$cond, EFLAGS))]>, TB; + (X86cmov GR64:$src1, GR64:$src2, timm:$cond, EFLAGS))]>, TB; } let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", @@ -41,29 +41,46 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", : I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond), "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - imm:$cond, EFLAGS))]>, TB, OpSize16; + timm:$cond, EFLAGS))]>, TB, OpSize16; def CMOV32rm : I<0x40, MRMSrcMemCC, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2, ccode:$cond), "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - imm:$cond, EFLAGS))]>, TB, OpSize32; + timm:$cond, EFLAGS))]>, TB, OpSize32; def CMOV64rm :RI<0x40, MRMSrcMemCC, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2, ccode:$cond), "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - imm:$cond, EFLAGS))]>, TB; + timm:$cond, EFLAGS))]>, TB; } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" } // isCodeGenOnly = 1, ForceDisassemble = 1 +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC), + SDLoc(N), MVT::i8); +}]>; + +// Conditional moves with folded loads with operands swapped and conditions +// inverted. +let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, timm:$cond, EFLAGS), + (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, timm:$cond, EFLAGS), + (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, timm:$cond, EFLAGS), + (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>; +} + // SetCC instructions. let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in { def SETCCr : I<0x90, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond), "set${cond}\t$dst", - [(set GR8:$dst, (X86setcc imm:$cond, EFLAGS))]>, + [(set GR8:$dst, (X86setcc timm:$cond, EFLAGS))]>, TB, Sched<[WriteSETCC]>; def SETCCm : I<0x90, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond), "set${cond}\t$dst", - [(store (X86setcc imm:$cond, EFLAGS), addr:$dst)]>, + [(store (X86setcc timm:$cond, EFLAGS), addr:$dst)]>, TB, Sched<[WriteSETCCStore]>; } // Uses = [EFLAGS] diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index efaccdc9ee96..78d8dd3c0d03 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -542,7 +542,7 @@ multiclass CMOVrr_PSEUDO { def CMOV#NAME : I<0, Pseudo, (outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond), "#CMOV_"#NAME#" PSEUDO!", - [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond, + [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, timm:$cond, EFLAGS)))]>; } @@ -593,66 +593,66 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in { defm _VK64 : CMOVrr_PSEUDO; } // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] -def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; +def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; let Predicates = [NoVLX] in { - def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - - def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; - def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; - def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; - def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; - def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; + def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + + def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; + def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; + def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; + def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; + def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; } let Predicates = [HasVLX] in { - def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - - def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; - def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; - def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; - def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; - def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; + def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + + def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; + def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; + def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; + def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; + def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; } -def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; -def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; -def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; -def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; -def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; +def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; +def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; +def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; +def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; +def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; //===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions @@ -1126,12 +1126,12 @@ def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))), // binary size compared to a regular MOV, but it introduces an unnecessary // load, so is not suitable for regular or optsize functions. let Predicates = [OptForMinSize] in { -def : Pat<(nonvolatile_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>; -def : Pat<(nonvolatile_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>; -def : Pat<(nonvolatile_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>; -def : Pat<(nonvolatile_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>; -def : Pat<(nonvolatile_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>; -def : Pat<(nonvolatile_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>; +def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>; +def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>; +def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>; +def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>; +def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>; +def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>; } // In kernel code model, we can get the address of a label @@ -1276,23 +1276,6 @@ def : Pat<(X86cmp GR32:$src1, 0), def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; -def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); - return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC), - SDLoc(N), MVT::i8); -}]>; - -// Conditional moves with folded loads with operands swapped and conditions -// inverted. -let Predicates = [HasCMov] in { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, imm:$cond, EFLAGS), - (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, imm:$cond, EFLAGS), - (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, imm:$cond, EFLAGS), - (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; -} - // zextload bool -> zextload byte // i1 stored in one byte in zero-extended form. // Upper bits cleanup should be executed before Store. diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index f82e80965b7c..e1e6eea59884 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -75,7 +75,7 @@ let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump], def JCC_1 : Ii8PCRel <0x70, AddCCFrm, (outs), (ins brtarget8:$dst, ccode:$cond), "j${cond}\t$dst", - [(X86brcond bb:$dst, imm:$cond, EFLAGS)]>; + [(X86brcond bb:$dst, timm:$cond, EFLAGS)]>; let hasSideEffects = 0 in { def JCC_2 : Ii16PCRel<0x80, AddCCFrm, (outs), (ins brtarget16:$dst, ccode:$cond), @@ -145,6 +145,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>, Sched<[WriteJumpLd]>; + // Win64 wants indirect jumps leaving the function to have a REX_W prefix. + // These are switched from TAILJMPr/m64_REX in MCInstLower. + let isCodeGenOnly = 1, hasREX_WPrefix = 1 in { + def JMP64r_REX : I<0xFF, MRM4r, (outs), (ins GR64:$dst), + "rex64 jmp{q}\t{*}$dst", []>, Sched<[WriteJump]>; + let mayLoad = 1 in + def JMP64m_REX : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), + "rex64 jmp{q}\t{*}$dst", []>, Sched<[WriteJumpLd]>; + + } + // Non-tracking jumps for IBT, use with caution. let isCodeGenOnly = 1 in { def JMP16r_NT : I<0xFF, MRM4r, (outs), (ins GR16 : $dst), "jmp{w}\t{*}$dst", @@ -273,39 +284,35 @@ let isCall = 1 in // Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [ESP, SSP] in { - def TCRETURNdi : PseudoI<(outs), - (ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable; - def TCRETURNri : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; + isCodeGenOnly = 1, Uses = [ESP, SSP] in { + def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>, NotMemoryFoldable; + def TCRETURNri : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>, NotMemoryFoldable; let mayLoad = 1 in - def TCRETURNmi : PseudoI<(outs), - (ins i32mem_TC:$dst, i32imm:$offset), []>; + def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset), + []>, Sched<[WriteJumpLd]>; - // FIXME: The should be pseudo instructions that are lowered when going to - // mcinst. - def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i32imm_pcrel:$dst), "jmp\t$dst", []>; + def TAILJMPd : PseudoI<(outs), (ins i32imm_pcrel:$dst), + []>, Sched<[WriteJump]>; - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), - "", []>; // FIXME: Remove encoding when JIT is dead. + def TAILJMPr : PseudoI<(outs), (ins ptr_rc_tailcall:$dst), + []>, Sched<[WriteJump]>; let mayLoad = 1 in - def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst), - "jmp{l}\t{*}$dst", []>; + def TAILJMPm : PseudoI<(outs), (ins i32mem_TC:$dst), + []>, Sched<[WriteJumpLd]>; } // Conditional tail calls are similar to the above, but they are branches // rather than barriers, and they use EFLAGS. let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in + isCodeGenOnly = 1, SchedRW = [WriteJump] in let Uses = [ESP, EFLAGS, SSP] in { def TCRETURNdicc : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; // This gets substituted to a conditional jump instruction in MC lowering. - def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs), - (ins i32imm_pcrel:$dst, i32imm:$cond), "", []>; + def TAILJMPd_CC : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$cond), []>; } @@ -348,34 +355,36 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { + isCodeGenOnly = 1, Uses = [RSP, SSP] in { def TCRETURNdi64 : PseudoI<(outs), - (ins i64i32imm_pcrel:$dst, i32imm:$offset), - []>; + (ins i64i32imm_pcrel:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>; def TCRETURNri64 : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; + (ins ptr_rc_tailcall:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>, NotMemoryFoldable; let mayLoad = 1 in def TCRETURNmi64 : PseudoI<(outs), - (ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable; + (ins i64mem_TC:$dst, i32imm:$offset), + []>, Sched<[WriteJumpLd]>, NotMemoryFoldable; - def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), - "jmp\t$dst", []>; + def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst), + []>, Sched<[WriteJump]>; - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), - "jmp{q}\t{*}$dst", []>; + def TAILJMPr64 : PseudoI<(outs), (ins ptr_rc_tailcall:$dst), + []>, Sched<[WriteJump]>; let mayLoad = 1 in - def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), - "jmp{q}\t{*}$dst", []>; + def TAILJMPm64 : PseudoI<(outs), (ins i64mem_TC:$dst), + []>, Sched<[WriteJumpLd]>; // Win64 wants indirect jumps leaving the function to have a REX_W prefix. let hasREX_WPrefix = 1 in { - def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), - "rex64 jmp{q}\t{*}$dst", []>; + def TAILJMPr64_REX : PseudoI<(outs), (ins ptr_rc_tailcall:$dst), + []>, Sched<[WriteJump]>; let mayLoad = 1 in - def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), - "rex64 jmp{q}\t{*}$dst", []>; + def TAILJMPm64_REX : PseudoI<(outs), (ins i64mem_TC:$dst), + []>, Sched<[WriteJumpLd]>; } } @@ -403,13 +412,13 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, // Conditional tail calls are similar to the above, but they are branches // rather than barriers, and they use EFLAGS. let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in + isCodeGenOnly = 1, SchedRW = [WriteJump] in let Uses = [RSP, EFLAGS, SSP] in { def TCRETURNdi64cc : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; // This gets substituted to a conditional jump instruction in MC lowering. - def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs), - (ins i64i32imm_pcrel:$dst, i32imm:$cond), "", []>; + def TAILJMPd64_CC : PseudoI<(outs), + (ins i64i32imm_pcrel:$dst, i32imm:$cond), []>; } diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 06e605fe5db2..7a4eb138ec34 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -17,19 +17,18 @@ let hasSideEffects = 0 in { let Defs = [EAX], Uses = [AX] in // EAX = signext(AX) def CWDE : I<0x98, RawFrm, (outs), (ins), "{cwtl|cwde}", []>, OpSize32, Sched<[WriteALU]>; + let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX) + def CDQE : RI<0x98, RawFrm, (outs), (ins), + "{cltq|cdqe}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>; + // FIXME: CWD/CDQ/CQO shouldn't Def the A register, but the fast register + // allocator crashes if you remove it. let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX) def CWD : I<0x99, RawFrm, (outs), (ins), "{cwtd|cwd}", []>, OpSize16, Sched<[WriteALU]>; let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX) def CDQ : I<0x99, RawFrm, (outs), (ins), "{cltd|cdq}", []>, OpSize32, Sched<[WriteALU]>; - - - let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX) - def CDQE : RI<0x98, RawFrm, (outs), (ins), - "{cltq|cdqe}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>; - let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX) def CQO : RI<0x99, RawFrm, (outs), (ins), "{cqto|cqo}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp index d42fec3770c7..f3b286e0375c 100644 --- a/lib/Target/X86/X86InstrFoldTables.cpp +++ b/lib/Target/X86/X86InstrFoldTables.cpp @@ -292,6 +292,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = { { X86::JMP32r_NT, X86::JMP32m_NT, TB_FOLDED_LOAD }, { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, { X86::JMP64r_NT, X86::JMP64m_NT, TB_FOLDED_LOAD }, + { X86::MMX_MOVD64from64rr, X86::MMX_MOVD64from64rm, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, @@ -5245,6 +5247,270 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = { { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 }, }; +static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = { + { X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD }, + { X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD }, + { X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD }, + { X86::VADDPSZ128rr, X86::VADDPSZ128rmb, TB_BCAST_SS }, + { X86::VADDPSZ256rr, X86::VADDPSZ256rmb, TB_BCAST_SS }, + { X86::VADDPSZrr, X86::VADDPSZrmb, TB_BCAST_SS }, + { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD }, + { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD }, + { X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD }, + { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmbi, TB_BCAST_SS }, + { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmbi, TB_BCAST_SS }, + { X86::VCMPPSZrri, X86::VCMPPSZrmbi, TB_BCAST_SS }, + { X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD }, + { X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD }, + { X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD }, + { X86::VDIVPSZ128rr, X86::VDIVPSZ128rmb, TB_BCAST_SS }, + { X86::VDIVPSZ256rr, X86::VDIVPSZ256rmb, TB_BCAST_SS }, + { X86::VDIVPSZrr, X86::VDIVPSZrmb, TB_BCAST_SS }, + { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rmb, TB_BCAST_SD }, + { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rmb, TB_BCAST_SD }, + { X86::VMAXCPDZrr, X86::VMAXCPDZrmb, TB_BCAST_SD }, + { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS }, + { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS }, + { X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS }, + { X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD }, + { X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD }, + { X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD }, + { X86::VMAXPSZ128rr, X86::VMAXPSZ128rmb, TB_BCAST_SS }, + { X86::VMAXPSZ256rr, X86::VMAXPSZ256rmb, TB_BCAST_SS }, + { X86::VMAXPSZrr, X86::VMAXPSZrmb, TB_BCAST_SS }, + { X86::VMINCPDZ128rr, X86::VMINCPDZ128rmb, TB_BCAST_SD }, + { X86::VMINCPDZ256rr, X86::VMINCPDZ256rmb, TB_BCAST_SD }, + { X86::VMINCPDZrr, X86::VMINCPDZrmb, TB_BCAST_SD }, + { X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS }, + { X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS }, + { X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS }, + { X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD }, + { X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD }, + { X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD }, + { X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS }, + { X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS }, + { X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS }, + { X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD }, + { X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD }, + { X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD }, + { X86::VMULPSZ128rr, X86::VMULPSZ128rmb, TB_BCAST_SS }, + { X86::VMULPSZ256rr, X86::VMULPSZ256rmb, TB_BCAST_SS }, + { X86::VMULPSZrr, X86::VMULPSZrmb, TB_BCAST_SS }, + { X86::VPADDDZ128rr, X86::VPADDDZ128rmb, TB_BCAST_D }, + { X86::VPADDDZ256rr, X86::VPADDDZ256rmb, TB_BCAST_D }, + { X86::VPADDDZrr, X86::VPADDDZrmb, TB_BCAST_D }, + { X86::VPADDQZ128rr, X86::VPADDQZ128rmb, TB_BCAST_Q }, + { X86::VPADDQZ256rr, X86::VPADDQZ256rmb, TB_BCAST_Q }, + { X86::VPADDQZrr, X86::VPADDQZrmb, TB_BCAST_Q }, + { X86::VPANDDZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D }, + { X86::VPANDDZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D }, + { X86::VPANDDZrr, X86::VPANDDZrmb, TB_BCAST_D }, + { X86::VPANDNDZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D }, + { X86::VPANDNDZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D }, + { X86::VPANDNDZrr, X86::VPANDNDZrmb, TB_BCAST_D }, + { X86::VPANDNQZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q }, + { X86::VPANDNQZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q }, + { X86::VPANDNQZrr, X86::VPANDNQZrmb, TB_BCAST_Q }, + { X86::VPANDQZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q }, + { X86::VPANDQZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q }, + { X86::VPANDQZrr, X86::VPANDQZrmb, TB_BCAST_Q }, + { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmib, TB_BCAST_D }, + { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmib, TB_BCAST_D }, + { X86::VPCMPDZrri, X86::VPCMPDZrmib, TB_BCAST_D }, + { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D }, + { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D }, + { X86::VPCMPEQDZrr, X86::VPCMPEQDZrmb, TB_BCAST_D }, + { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q }, + { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q }, + { X86::VPCMPEQQZrr, X86::VPCMPEQQZrmb, TB_BCAST_Q }, + { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D }, + { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D }, + { X86::VPCMPGTDZrr, X86::VPCMPGTDZrmb, TB_BCAST_D }, + { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q }, + { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q }, + { X86::VPCMPGTQZrr, X86::VPCMPGTQZrmb, TB_BCAST_Q }, + { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmib, TB_BCAST_Q }, + { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmib, TB_BCAST_Q }, + { X86::VPCMPQZrri, X86::VPCMPQZrmib, TB_BCAST_Q }, + { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D }, + { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D }, + { X86::VPCMPUDZrri, X86::VPCMPUDZrmib, TB_BCAST_D }, + { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q }, + { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q }, + { X86::VPCMPUQZrri, X86::VPCMPUQZrmib, TB_BCAST_Q }, + { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rmb, TB_BCAST_D }, + { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rmb, TB_BCAST_D }, + { X86::VPMAXSDZrr, X86::VPMAXSDZrmb, TB_BCAST_D }, + { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rmb, TB_BCAST_Q }, + { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rmb, TB_BCAST_Q }, + { X86::VPMAXSQZrr, X86::VPMAXSQZrmb, TB_BCAST_Q }, + { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rmb, TB_BCAST_D }, + { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rmb, TB_BCAST_D }, + { X86::VPMAXUDZrr, X86::VPMAXUDZrmb, TB_BCAST_D }, + { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rmb, TB_BCAST_Q }, + { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rmb, TB_BCAST_Q }, + { X86::VPMAXUQZrr, X86::VPMAXUQZrmb, TB_BCAST_Q }, + { X86::VPMINSDZ128rr, X86::VPMINSDZ128rmb, TB_BCAST_D }, + { X86::VPMINSDZ256rr, X86::VPMINSDZ256rmb, TB_BCAST_D }, + { X86::VPMINSDZrr, X86::VPMINSDZrmb, TB_BCAST_D }, + { X86::VPMINSQZ128rr, X86::VPMINSQZ128rmb, TB_BCAST_Q }, + { X86::VPMINSQZ256rr, X86::VPMINSQZ256rmb, TB_BCAST_Q }, + { X86::VPMINSQZrr, X86::VPMINSQZrmb, TB_BCAST_Q }, + { X86::VPMINUDZ128rr, X86::VPMINUDZ128rmb, TB_BCAST_D }, + { X86::VPMINUDZ256rr, X86::VPMINUDZ256rmb, TB_BCAST_D }, + { X86::VPMINUDZrr, X86::VPMINUDZrmb, TB_BCAST_D }, + { X86::VPMINUQZ128rr, X86::VPMINUQZ128rmb, TB_BCAST_Q }, + { X86::VPMINUQZ256rr, X86::VPMINUQZ256rmb, TB_BCAST_Q }, + { X86::VPMINUQZrr, X86::VPMINUQZrmb, TB_BCAST_Q }, + { X86::VPMULLDZ128rr, X86::VPMULLDZ128rmb, TB_BCAST_D }, + { X86::VPMULLDZ256rr, X86::VPMULLDZ256rmb, TB_BCAST_D }, + { X86::VPMULLDZrr, X86::VPMULLDZrmb, TB_BCAST_D }, + { X86::VPMULLQZ128rr, X86::VPMULLQZ128rmb, TB_BCAST_Q }, + { X86::VPMULLQZ256rr, X86::VPMULLQZ256rmb, TB_BCAST_Q }, + { X86::VPMULLQZrr, X86::VPMULLQZrmb, TB_BCAST_Q }, + { X86::VPORDZ128rr, X86::VPORDZ128rmb, TB_BCAST_D }, + { X86::VPORDZ256rr, X86::VPORDZ256rmb, TB_BCAST_D }, + { X86::VPORDZrr, X86::VPORDZrmb, TB_BCAST_D }, + { X86::VPORQZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q }, + { X86::VPORQZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q }, + { X86::VPORQZrr, X86::VPORQZrmb, TB_BCAST_Q }, + { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D }, + { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D }, + { X86::VPTESTMDZrr, X86::VPTESTMDZrmb, TB_BCAST_D }, + { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q }, + { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q }, + { X86::VPTESTMQZrr, X86::VPTESTMQZrmb, TB_BCAST_Q }, + { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D }, + { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D }, + { X86::VPTESTNMDZrr, X86::VPTESTNMDZrmb, TB_BCAST_D }, + { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q }, + { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q }, + { X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q }, + { X86::VPXORDZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D }, + { X86::VPXORDZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D }, + { X86::VPXORDZrr, X86::VPXORDZrmb, TB_BCAST_D }, + { X86::VPXORQZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q }, + { X86::VPXORQZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q }, + { X86::VPXORQZrr, X86::VPXORQZrmb, TB_BCAST_Q }, + { X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD }, + { X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD }, + { X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD }, + { X86::VSUBPSZ128rr, X86::VSUBPSZ128rmb, TB_BCAST_SS }, + { X86::VSUBPSZ256rr, X86::VSUBPSZ256rmb, TB_BCAST_SS }, + { X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS }, +}; + +static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = { + { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD }, + { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD }, + { X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD }, + { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS }, + { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS }, + { X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS }, + { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD }, + { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD }, + { X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD }, + { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS }, + { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS }, + { X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS }, + { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD }, + { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD }, + { X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD }, + { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128mb, TB_BCAST_SS }, + { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256mb, TB_BCAST_SS }, + { X86::VFMADD231PSZr, X86::VFMADD231PSZmb, TB_BCAST_SS }, + { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD }, + { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD }, + { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZmb, TB_BCAST_SD }, + { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS }, + { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS }, + { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZmb, TB_BCAST_SS }, + { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD }, + { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD }, + { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZmb, TB_BCAST_SD }, + { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS }, + { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS }, + { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZmb, TB_BCAST_SS }, + { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD }, + { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD }, + { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZmb, TB_BCAST_SD }, + { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS }, + { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS }, + { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS }, + { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD }, + { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS }, + { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD }, + { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS }, + { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD }, + { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUB231PSZr, X86::VFMSUB231PSZmb, TB_BCAST_SS }, + { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZmb, TB_BCAST_SD }, + { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZmb, TB_BCAST_SS }, + { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZmb, TB_BCAST_SD }, + { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZmb, TB_BCAST_SS }, + { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZmb, TB_BCAST_SD }, + { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZmb, TB_BCAST_SS }, + { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD }, + { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD }, + { X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD }, + { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS }, + { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS }, + { X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS }, + { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD }, + { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD }, + { X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD }, + { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS }, + { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS }, + { X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS }, + { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD }, + { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD }, + { X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD }, + { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS }, + { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS }, + { X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS }, + { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD }, + { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD }, + { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD }, + { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS }, + { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS }, + { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS }, + { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD }, + { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD }, + { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD }, + { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS }, + { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS }, + { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS }, + { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD }, + { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD }, + { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD }, + { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS }, + { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS }, + { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS }, +}; + static const X86MemoryFoldTableEntry * lookupFoldTableImpl(ArrayRef Table, unsigned RegOp) { #ifndef NDEBUG @@ -5287,6 +5553,18 @@ lookupFoldTableImpl(ArrayRef Table, unsigned RegOp) { std::end(MemoryFoldTable4)) == std::end(MemoryFoldTable4) && "MemoryFoldTable4 is not sorted and unique!"); + assert(std::is_sorted(std::begin(BroadcastFoldTable2), + std::end(BroadcastFoldTable2)) && + std::adjacent_find(std::begin(BroadcastFoldTable2), + std::end(BroadcastFoldTable2)) == + std::end(BroadcastFoldTable2) && + "BroadcastFoldTable2 is not sorted and unique!"); + assert(std::is_sorted(std::begin(BroadcastFoldTable3), + std::end(BroadcastFoldTable3)) && + std::adjacent_find(std::begin(BroadcastFoldTable3), + std::end(BroadcastFoldTable3)) == + std::end(BroadcastFoldTable3) && + "BroadcastFoldTable3 is not sorted and unique!"); FoldTablesChecked.store(true, std::memory_order_relaxed); } #endif @@ -5355,6 +5633,15 @@ struct X86MemUnfoldTable { // Index 4, folded load addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); + // Broadcast tables. + for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable2) + // Index 2, folded broadcast + addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); + + for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable3) + // Index 2, folded broadcast + addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); + // Sort the memory->reg unfold table. array_pod_sort(Table.begin(), Table.end()); diff --git a/lib/Target/X86/X86InstrFoldTables.h b/lib/Target/X86/X86InstrFoldTables.h index 419baf98f61d..7dc236a0d7e4 100644 --- a/lib/Target/X86/X86InstrFoldTables.h +++ b/lib/Target/X86/X86InstrFoldTables.h @@ -19,35 +19,48 @@ namespace llvm { enum { // Select which memory operand is being unfolded. - // (stored in bits 0 - 3) + // (stored in bits 0 - 2) TB_INDEX_0 = 0, TB_INDEX_1 = 1, TB_INDEX_2 = 2, TB_INDEX_3 = 3, TB_INDEX_4 = 4, - TB_INDEX_MASK = 0xf, + TB_INDEX_MASK = 0x7, // Do not insert the reverse map (MemOp -> RegOp) into the table. // This may be needed because there is a many -> one mapping. - TB_NO_REVERSE = 1 << 4, + TB_NO_REVERSE = 1 << 3, // Do not insert the forward map (RegOp -> MemOp) into the table. // This is needed for Native Client, which prohibits branch // instructions from using a memory operand. - TB_NO_FORWARD = 1 << 5, + TB_NO_FORWARD = 1 << 4, - TB_FOLDED_LOAD = 1 << 6, - TB_FOLDED_STORE = 1 << 7, + TB_FOLDED_LOAD = 1 << 5, + TB_FOLDED_STORE = 1 << 6, + TB_FOLDED_BCAST = 1 << 7, // Minimum alignment required for load/store. - // Used for RegOp->MemOp conversion. - // (stored in bits 8 - 15) + // Used for RegOp->MemOp conversion. Encoded as Log2(Align) + 1 to allow 0 + // to mean align of 0. + // (stored in bits 8 - 11) TB_ALIGN_SHIFT = 8, - TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, - TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, - TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, - TB_ALIGN_64 = 64 << TB_ALIGN_SHIFT, - TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT + TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, + TB_ALIGN_16 = 5 << TB_ALIGN_SHIFT, + TB_ALIGN_32 = 6 << TB_ALIGN_SHIFT, + TB_ALIGN_64 = 7 << TB_ALIGN_SHIFT, + TB_ALIGN_MASK = 0xf << TB_ALIGN_SHIFT, + + // Broadcast type. + // (stored in bits 12 - 13) + TB_BCAST_TYPE_SHIFT = 12, + TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT, + + // Unused bits 14-15 }; // This struct is used for both the folding and unfold tables. They KeyOp diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 096cc27861ca..de6f8a81dff6 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -103,6 +103,8 @@ def X86vzld : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86VBroadcastld : SDNode<"X86ISD::VBROADCAST_LOAD", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>, @@ -954,6 +956,26 @@ def X86vextractstore64 : PatFrag<(ops node:$val, node:$ptr), return cast(N)->getMemoryVT().getStoreSize() == 8; }]>; +def X86VBroadcastld8 : PatFrag<(ops node:$src), + (X86VBroadcastld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 1; +}]>; + +def X86VBroadcastld16 : PatFrag<(ops node:$src), + (X86VBroadcastld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 2; +}]>; + +def X86VBroadcastld32 : PatFrag<(ops node:$src), + (X86VBroadcastld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 4; +}]>; + +def X86VBroadcastld64 : PatFrag<(ops node:$src), + (X86VBroadcastld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 8; +}]>; + def fp32imm0 : PatLeaf<(f32 fpimm), [{ return N->isExactlyValue(+0.0); @@ -963,6 +985,10 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{ return N->isExactlyValue(+0.0); }]>; +def fp128imm0 : PatLeaf<(f128 fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index // to VEXTRACTF128/VEXTRACTI128 imm. def EXTRACT_get_vextract128_imm : SDNodeXFormgetParent(); @@ -675,7 +712,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; } - unsigned SrcReg = Src.getReg(); + Register SrcReg = Src.getReg(); // For both LEA64 and LEA32 the register already has essentially the right // type (32-bit or 64-bit) we may just need to forbid SP. @@ -684,7 +721,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, isKill = Src.isKill(); assert(!Src.isUndef() && "Undef op doesn't need optimization"); - if (TargetRegisterInfo::isVirtualRegister(NewSrc) && + if (Register::isVirtualRegister(NewSrc) && !MF.getRegInfo().constrainRegClass(NewSrc, RC)) return false; @@ -693,7 +730,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, // This is for an LEA64_32r and incoming registers are 32-bit. One way or // another we need to add 64-bit registers to the final MI. - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (Register::isPhysicalRegister(SrcReg)) { ImplicitOp = Src; ImplicitOp.setImplicit(); @@ -740,8 +777,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( return nullptr; unsigned Opcode = X86::LEA64_32r; - unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); - unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); + Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); + Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); // Build and insert into an implicit UNDEF value. This is OK because // we will be shifting and then extracting the lower 8/16-bits. @@ -751,8 +788,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( // But testing has shown this *does* help performance in 64-bit mode (at // least on modern x86 machines). MachineBasicBlock::iterator MBBI = MI.getIterator(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); bool IsDead = MI.getOperand(0).isDead(); bool IsKill = MI.getOperand(1).isKill(); unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; @@ -794,7 +831,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( case X86::ADD8rr_DB: case X86::ADD16rr: case X86::ADD16rr_DB: { - unsigned Src2 = MI.getOperand(2).getReg(); + Register Src2 = MI.getOperand(2).getReg(); bool IsKill2 = MI.getOperand(2).isKill(); assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); unsigned InRegLEA2 = 0; @@ -888,7 +925,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + if (Register::isVirtualRegister(Src.getReg()) && !MF.getRegInfo().constrainRegClass(Src.getReg(), &X86::GR64_NOSPRegClass)) return nullptr; @@ -911,7 +948,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // LEA can't handle ESP. bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -947,7 +984,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -970,7 +1007,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -1005,7 +1042,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1013,7 +1050,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, const MachineOperand &Src2 = MI.getOperand(2); bool isKill2; - unsigned SrcReg2; + Register SrcReg2; MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, SrcReg2, isKill2, ImplicitOp2, LV)) @@ -1054,7 +1091,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1085,6 +1122,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; case X86::SUB32ri8: case X86::SUB32ri: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; @@ -1093,7 +1132,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1111,6 +1150,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SUB64ri8: case X86::SUB64ri32: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; @@ -1140,40 +1181,62 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk: case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk: case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk: - case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: { + case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: + case X86::VBROADCASTSDZ256mk: + case X86::VBROADCASTSDZmk: + case X86::VBROADCASTSSZ128mk: + case X86::VBROADCASTSSZ256mk: + case X86::VBROADCASTSSZmk: + case X86::VPBROADCASTDZ128mk: + case X86::VPBROADCASTDZ256mk: + case X86::VPBROADCASTDZmk: + case X86::VPBROADCASTQZ128mk: + case X86::VPBROADCASTQZ256mk: + case X86::VPBROADCASTQZmk: { unsigned Opc; switch (MIOpc) { default: llvm_unreachable("Unreachable!"); - case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; - case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; - case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; - case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; - case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; - case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; - case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; - case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; - case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; - case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; - case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; - case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; - case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; - case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; - case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; - case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; - case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; - case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; - case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; - case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; - case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; - case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; - case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; - case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; - case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; - case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; - case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; - case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; - case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; - case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; + case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; + case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; + case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; + case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; + case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; + case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; + case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; + case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; + case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; + case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; + case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; + case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; + case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; + case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; + case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; + case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; + case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; + case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; + case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; + case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; + case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; + case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; + case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; + case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; + case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; + case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; + case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; + case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break; + case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break; + case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break; + case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break; + case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break; + case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break; + case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break; + case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break; + case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break; + case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break; + case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break; } NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) @@ -1187,6 +1250,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .add(MI.getOperand(7)); break; } + case X86::VMOVDQU8Z128rrk: case X86::VMOVDQU8Z256rrk: case X86::VMOVDQU8Zrrk: @@ -1683,6 +1747,27 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } + case X86::VCMPSDZrr: + case X86::VCMPSSZrr: + case X86::VCMPPDZrri: + case X86::VCMPPSZrri: + case X86::VCMPPDZ128rri: + case X86::VCMPPSZ128rri: + case X86::VCMPPDZ256rri: + case X86::VCMPPSZ256rri: + case X86::VCMPPDZrrik: + case X86::VCMPPSZrrik: + case X86::VCMPPDZ128rrik: + case X86::VCMPPSZ128rrik: + case X86::VCMPPDZ256rrik: + case X86::VCMPPSZ256rrik: { + unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f; + Imm = X86::getSwappedVCMPImm(Imm); + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } case X86::VPERM2F128rr: case X86::VPERM2I128rr: { // Flip permute source immediate. @@ -1859,7 +1944,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, // CommutableOpIdx2 is well defined now. Let's choose another commutable // operand and assign its index to CommutableOpIdx1. - unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); + Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); unsigned CommutableOpIdx1; for (CommutableOpIdx1 = LastCommutableVecOp; @@ -1889,7 +1974,8 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, return true; } -bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, +bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { const MCInstrDesc &Desc = MI.getDesc(); if (!Desc.isCommutable()) @@ -1926,17 +2012,23 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, // Ordered/Unordered/Equal/NotEqual tests unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7; switch (Imm) { + default: + // EVEX versions can be commuted. + if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX) + break; + return false; case 0x00: // EQUAL case 0x03: // UNORDERED case 0x04: // NOT EQUAL case 0x07: // ORDERED - // The indices of the commutable operands are 1 and 2 (or 2 and 3 - // when masked). - // Assign them to the returned operand indices here. - return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, - 2 + OpOffset); + break; } - return false; + + // The indices of the commutable operands are 1 and 2 (or 2 and 3 + // when masked). + // Assign them to the returned operand indices here. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, + 2 + OpOffset); } case X86::MOVSSrr: // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can @@ -1990,6 +2082,24 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, case X86::VPTERNLOGQZ256rmbikz: case X86::VPTERNLOGQZrmbikz: return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); + case X86::VPDPWSSDZ128r: + case X86::VPDPWSSDZ128rk: + case X86::VPDPWSSDZ128rkz: + case X86::VPDPWSSDZ256r: + case X86::VPDPWSSDZ256rk: + case X86::VPDPWSSDZ256rkz: + case X86::VPDPWSSDZr: + case X86::VPDPWSSDZrk: + case X86::VPDPWSSDZrkz: + case X86::VPDPWSSDSZ128r: + case X86::VPDPWSSDSZ128rk: + case X86::VPDPWSSDSZ128rkz: + case X86::VPDPWSSDSZ256r: + case X86::VPDPWSSDSZ256rk: + case X86::VPDPWSSDSZ256rkz: + case X86::VPDPWSSDSZr: + case X86::VPDPWSSDSZrk: + case X86::VPDPWSSDSZrkz: case X86::VPMADD52HUQZ128r: case X86::VPMADD52HUQZ128rk: case X86::VPMADD52HUQZ128rkz: @@ -2215,7 +2325,7 @@ unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) { } } -/// Get the VPCMP immediate if the opcodes are swapped. +/// Get the VPCMP immediate if the operands are swapped. unsigned X86::getSwappedVPCMPImm(unsigned Imm) { switch (Imm) { default: llvm_unreachable("Unreachable!"); @@ -2233,7 +2343,7 @@ unsigned X86::getSwappedVPCMPImm(unsigned Imm) { return Imm; } -/// Get the VPCOM immediate if the opcodes are swapped. +/// Get the VPCOM immediate if the operands are swapped. unsigned X86::getSwappedVPCOMImm(unsigned Imm) { switch (Imm) { default: llvm_unreachable("Unreachable!"); @@ -2251,6 +2361,23 @@ unsigned X86::getSwappedVPCOMImm(unsigned Imm) { return Imm; } +/// Get the VCMP immediate if the operands are swapped. +unsigned X86::getSwappedVCMPImm(unsigned Imm) { + // Only need the lower 2 bits to distinquish. + switch (Imm & 0x3) { + default: llvm_unreachable("Unreachable!"); + case 0x00: case 0x03: + // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted. + break; + case 0x01: case 0x02: + // Need to toggle bits 3:0. Bit 4 stays the same. + Imm ^= 0xf; + break; + } + + return Imm; +} + bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { if (!MI.isTerminator()) return false; @@ -3131,25 +3258,6 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(isKill)); } -void X86InstrInfo::storeRegToAddr( - MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, const TargetRegisterClass *RC, - ArrayRef MMOs, - SmallVectorImpl &NewMIs) const { - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); - DebugLoc DL; - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.add(Addr[i]); - MIB.addReg(SrcReg, getKillRegState(isKill)); - MIB.setMemRefs(MMOs); - NewMIs.push_back(MIB); -} - - void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, @@ -3164,23 +3272,6 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); } -void X86InstrInfo::loadRegFromAddr( - MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, const TargetRegisterClass *RC, - ArrayRef MMOs, - SmallVectorImpl &NewMIs) const { - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); - DebugLoc DL; - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.add(Addr[i]); - MIB.setMemRefs(MMOs); - NewMIs.push_back(MIB); -} - bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const { @@ -3599,8 +3690,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (!IsCmpZero && !Sub) return false; - bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg); + bool IsSwapped = + (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg); // Scan forward from the instruction after CmpInstr for uses of EFLAGS. // It is safe to remove CmpInstr if EFLAGS is redefined or killed. @@ -3755,7 +3847,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg != FoldAsLoadDefReg) continue; // Do not fold if we have a subreg use or a def. @@ -3785,7 +3877,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); MIB->setDesc(Desc); // MachineInstr::addOperand() will insert explicit operands before any @@ -3815,7 +3907,7 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne) { MachineBasicBlock &MBB = *MIB->getParent(); DebugLoc DL = MIB->getDebugLoc(); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); // Insert the XOR. BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg) @@ -3891,7 +3983,7 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { MachineBasicBlock &MBB = *MIB->getParent(); DebugLoc DL = MIB->getDebugLoc(); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); const GlobalValue *GV = cast((*MIB->memoperands_begin())->getValue()); auto Flags = MachineMemOperand::MOLoad | @@ -3929,7 +4021,7 @@ static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx) { - unsigned DestReg = MIB->getOperand(0).getReg(); + Register DestReg = MIB->getOperand(0).getReg(); // Check if DestReg is XMM16-31 or YMM16-31. if (TRI->getEncodingValue(DestReg) < 16) { // We can use a normal VEX encoded load. @@ -3952,7 +4044,7 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx) { - unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg(); + Register SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg(); // Check if DestReg is XMM16-31 or YMM16-31. if (TRI->getEncodingValue(SrcReg) < 16) { // We can use a normal VEX encoded store. @@ -4008,12 +4100,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::V_SET0: case X86::FsFLD0SS: case X86::FsFLD0SD: + case X86::FsFLD0F128: return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::AVX_SET0: { assert(HasAVX && "AVX not supported"); const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned SrcReg = MIB->getOperand(0).getReg(); - unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); + Register SrcReg = MIB->getOperand(0).getReg(); + Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); MIB->getOperand(0).setReg(XReg); Expand2AddrUndef(MIB, get(X86::VXORPSrr)); MIB.addReg(SrcReg, RegState::ImplicitDefine); @@ -4021,9 +4114,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case X86::AVX512_128_SET0: case X86::AVX512_FsFLD0SS: - case X86::AVX512_FsFLD0SD: { + case X86::AVX512_FsFLD0SD: + case X86::AVX512_FsFLD0F128: { bool HasVLX = Subtarget.hasVLX(); - unsigned SrcReg = MIB->getOperand(0).getReg(); + Register SrcReg = MIB->getOperand(0).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) return Expand2AddrUndef(MIB, @@ -4037,10 +4131,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::AVX512_256_SET0: case X86::AVX512_512_SET0: { bool HasVLX = Subtarget.hasVLX(); - unsigned SrcReg = MIB->getOperand(0).getReg(); + Register SrcReg = MIB->getOperand(0).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) { - unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); + Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); MIB->getOperand(0).setReg(XReg); Expand2AddrUndef(MIB, get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr)); @@ -4060,14 +4154,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::AVX2_SETALLONES: return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); case X86::AVX1_SETALLONES: { - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS. MIB->setDesc(get(X86::VCMPPSYrri)); MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf); return true; } case X86::AVX512_512_SETALLONES: { - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); MIB->setDesc(get(X86::VPTERNLOGDZrri)); // VPTERNLOGD needs 3 register inputs and an immediate. // 0xff will return 1s for any input. @@ -4077,8 +4171,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case X86::AVX512_512_SEXT_MASK_32: case X86::AVX512_512_SEXT_MASK_64: { - unsigned Reg = MIB->getOperand(0).getReg(); - unsigned MaskReg = MIB->getOperand(1).getReg(); + Register Reg = MIB->getOperand(0).getReg(); + Register MaskReg = MIB->getOperand(1).getReg(); unsigned MaskState = getRegState(MIB->getOperand(1)); unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ? X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz; @@ -4115,8 +4209,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr), get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); case X86::MOV32ri64: { - unsigned Reg = MIB->getOperand(0).getReg(); - unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit); + Register Reg = MIB->getOperand(0).getReg(); + Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit); MI.setDesc(get(X86::MOV32ri)); MIB->getOperand(0).setReg(Reg32); MIB.addReg(Reg, RegState::ImplicitDefine); @@ -4251,8 +4345,8 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance( // If MI is marked as reading Reg, the partial register update is wanted. const MachineOperand &MO = MI.getOperand(0); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { if (MO.readsReg() || MI.readsVirtualRegister(Reg)) return 0; } else { @@ -4268,7 +4362,10 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance( // Return true for any instruction the copies the high bits of the first source // operand into the unused high bits of the destination operand. -static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { +static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum, + bool ForLoadFold = false) { + // Set the OpNum parameter to the first source operand. + OpNum = 1; switch (Opcode) { case X86::VCVTSI2SSrr: case X86::VCVTSI2SSrm: @@ -4427,6 +4524,14 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { case X86::VSQRTSDZm: case X86::VSQRTSDZm_Int: return true; + case X86::VMOVSSZrrk: + case X86::VMOVSDZrrk: + OpNum = 3; + return true; + case X86::VMOVSSZrrkz: + case X86::VMOVSDZrrkz: + OpNum = 2; + return true; } return false; @@ -4449,14 +4554,11 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { unsigned X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, const TargetRegisterInfo *TRI) const { - if (!hasUndefRegUpdate(MI.getOpcode())) + if (!hasUndefRegUpdate(MI.getOpcode(), OpNum)) return 0; - // Set the OpNum parameter to the first source operand. - OpNum = 1; - const MachineOperand &MO = MI.getOperand(OpNum); - if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) { return UndefRegClearance; } return 0; @@ -4464,7 +4566,7 @@ X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, void X86InstrInfo::breakPartialRegDependency( MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - unsigned Reg = MI.getOperand(OpNum).getReg(); + Register Reg = MI.getOperand(OpNum).getReg(); // If MI kills this register, the false dependence is already broken. if (MI.killsRegister(Reg, TRI)) return; @@ -4480,7 +4582,7 @@ void X86InstrInfo::breakPartialRegDependency( } else if (X86::VR256RegClass.contains(Reg)) { // Use vxorps to clear the full ymm register. // It wants to read and write the xmm sub-register. - unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); + Register XReg = TRI->getSubReg(Reg, X86::sub_xmm); BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg) .addReg(XReg, RegState::Undef) .addReg(XReg, RegState::Undef) @@ -4489,7 +4591,7 @@ void X86InstrInfo::breakPartialRegDependency( } else if (X86::GR64RegClass.contains(Reg)) { // Using XOR32rr because it has shorter encoding and zeros up the upper bits // as well. - unsigned XReg = TRI->getSubReg(Reg, X86::sub_32bit); + Register XReg = TRI->getSubReg(Reg, X86::sub_32bit); BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg) .addReg(XReg, RegState::Undef) .addReg(XReg, RegState::Undef) @@ -4538,8 +4640,8 @@ static void updateOperandRegConstraints(MachineFunction &MF, // We only need to update constraints on virtual register operands. if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TRI.isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; auto *NewRC = MRI.constrainRegClass( @@ -4698,7 +4800,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) { - if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) || + unsigned Ignored; + if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) || !MI.getOperand(1).isReg()) return false; @@ -4788,6 +4891,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( if (I != nullptr) { unsigned Opcode = I->DstOp; unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; + MinAlign = MinAlign ? 1 << (MinAlign - 1) : 0; if (Align < MinAlign) return nullptr; bool NarrowToMOV32rm = false; @@ -4821,8 +4925,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // If this is the special case where we use a MOV32rm to load a 32-bit // value and zero-extend the top bits. Change the destination register // to a 32-bit one. - unsigned DstReg = NewMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register DstReg = NewMI->getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg)) NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit)); else NewMI->getOperand(0).setSubReg(X86::sub_32bit); @@ -5133,6 +5237,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::V_SET0: case X86::V_SETALLONES: case X86::AVX512_128_SET0: + case X86::FsFLD0F128: + case X86::AVX512_FsFLD0F128: Alignment = 16; break; case X86::MMX_SET0: @@ -5182,7 +5288,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::FsFLD0SD: case X86::AVX512_FsFLD0SD: case X86::FsFLD0SS: - case X86::AVX512_FsFLD0SS: { + case X86::AVX512_FsFLD0SS: + case X86::FsFLD0F128: + case X86::AVX512_FsFLD0F128: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. @@ -5212,6 +5320,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( Ty = Type::getFloatTy(MF.getFunction().getContext()); else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction().getContext()); + else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128) + Ty = Type::getFP128Ty(MF.getFunction().getContext()); else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16); else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || @@ -5293,6 +5403,51 @@ extractStoreMMOs(ArrayRef MMOs, MachineFunction &MF) { return StoreMMOs; } +static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I, + const TargetRegisterClass *RC, + const X86Subtarget &STI) { + assert(STI.hasAVX512() && "Expected at least AVX512!"); + unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC); + assert((SpillSize == 64 || STI.hasVLX()) && + "Can't broadcast less than 64 bytes without AVX512VL!"); + + switch (I->Flags & TB_BCAST_MASK) { + default: llvm_unreachable("Unexpected broadcast type!"); + case TB_BCAST_D: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VPBROADCASTDZ128m; + case 32: return X86::VPBROADCASTDZ256m; + case 64: return X86::VPBROADCASTDZm; + } + break; + case TB_BCAST_Q: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VPBROADCASTQZ128m; + case 32: return X86::VPBROADCASTQZ256m; + case 64: return X86::VPBROADCASTQZm; + } + break; + case TB_BCAST_SS: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VBROADCASTSSZ128m; + case 32: return X86::VBROADCASTSSZ256m; + case 64: return X86::VBROADCASTSSZm; + } + break; + case TB_BCAST_SD: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VMOVDDUPZ128rm; + case 32: return X86::VBROADCASTSDZ256m; + case 64: return X86::VBROADCASTSDZm; + } + break; + } +} + bool X86InstrInfo::unfoldMemoryOperand( MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl &NewMIs) const { @@ -5303,6 +5458,7 @@ bool X86InstrInfo::unfoldMemoryOperand( unsigned Index = I->Flags & TB_INDEX_MASK; bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; bool FoldedStore = I->Flags & TB_FOLDED_STORE; + bool FoldedBCast = I->Flags & TB_FOLDED_BCAST; if (UnfoldLoad && !FoldedLoad) return false; UnfoldLoad &= FoldedLoad; @@ -5311,7 +5467,9 @@ bool X86InstrInfo::unfoldMemoryOperand( UnfoldStore &= FoldedStore; const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // TODO: Check if 32-byte or greater accesses are slow too? if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass && Subtarget.isUnalignedMem16Slow()) @@ -5335,10 +5493,26 @@ bool X86InstrInfo::unfoldMemoryOperand( AfterOps.push_back(Op); } - // Emit the load instruction. + // Emit the load or broadcast instruction. if (UnfoldLoad) { auto MMOs = extractLoadMMOs(MI.memoperands(), MF); - loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs); + + unsigned Opc; + if (FoldedBCast) { + Opc = getBroadcastOpcode(I, RC, Subtarget); + } else { + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget); + } + + DebugLoc DL; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg); + for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) + MIB.add(AddrOps[i]); + MIB.setMemRefs(MMOs); + NewMIs.push_back(MIB); + if (UnfoldStore) { // Address operands cannot be marked isKill. for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { @@ -5404,7 +5578,16 @@ bool X86InstrInfo::unfoldMemoryOperand( if (UnfoldStore) { const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF); auto MMOs = extractStoreMMOs(MI.memoperands(), MF); - storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs); + unsigned Alignment = std::max(TRI.getSpillSize(*DstRC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget); + DebugLoc DL; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); + for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) + MIB.add(AddrOps[i]); + MIB.addReg(Reg, RegState::Kill); + MIB.setMemRefs(MMOs); + NewMIs.push_back(MIB); } return true; @@ -5423,6 +5606,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned Index = I->Flags & TB_INDEX_MASK; bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; bool FoldedStore = I->Flags & TB_FOLDED_STORE; + bool FoldedBCast = I->Flags & TB_FOLDED_BCAST; const MCInstrDesc &MCID = get(Opc); MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); @@ -5456,10 +5640,17 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, - VT, MVT::Other, AddrOps); + + unsigned Opc; + if (FoldedBCast) { + Opc = getBroadcastOpcode(I, RC, Subtarget); + } else { + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget); + } + + Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps); NewNodes.push_back(Load); // Preserve memory reference information. @@ -7367,6 +7558,96 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { } } +Optional +X86InstrInfo::describeLoadedValue(const MachineInstr &MI) const { + const MachineOperand *Op = nullptr; + DIExpression *Expr = nullptr; + + switch (MI.getOpcode()) { + case X86::LEA32r: + case X86::LEA64r: + case X86::LEA64_32r: { + // Operand 4 could be global address. For now we do not support + // such situation. + if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm()) + return None; + + const MachineOperand &Op1 = MI.getOperand(1); + const MachineOperand &Op2 = MI.getOperand(3); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + assert(Op2.isReg() && (Op2.getReg() == X86::NoRegister || + Register::isPhysicalRegister(Op2.getReg()))); + + // Omit situations like: + // %rsi = lea %rsi, 4, ... + if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) || + Op2.getReg() == MI.getOperand(0).getReg()) + return None; + else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister && + TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) || + (Op2.getReg() != X86::NoRegister && + TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg()))) + return None; + + int64_t Coef = MI.getOperand(2).getImm(); + int64_t Offset = MI.getOperand(4).getImm(); + SmallVector Ops; + + if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) { + Op = &Op1; + } else if (Op1.isFI()) + Op = &Op1; + + if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) { + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(Coef + 1); + Ops.push_back(dwarf::DW_OP_mul); + } else { + if (Op && Op2.getReg() != X86::NoRegister) { + int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false); + if (dwarfReg < 0) + return None; + else if (dwarfReg < 32) { + Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg); + Ops.push_back(0); + } else { + Ops.push_back(dwarf::DW_OP_bregx); + Ops.push_back(dwarfReg); + Ops.push_back(0); + } + } else if (!Op) { + assert(Op2.getReg() != X86::NoRegister); + Op = &Op2; + } + + if (Coef > 1) { + assert(Op2.getReg() != X86::NoRegister); + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(Coef); + Ops.push_back(dwarf::DW_OP_mul); + } + + if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) && + Op2.getReg() != X86::NoRegister) { + Ops.push_back(dwarf::DW_OP_plus); + } + } + + DIExpression::appendOffset(Ops, Offset); + Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops); + + return ParamLoadedValue(*Op, Expr);; + } + case X86::XOR32rr: { + if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) + return ParamLoadedValue(MachineOperand::CreateImm(0), Expr); + return None; + } + default: + return TargetInstrInfo::describeLoadedValue(MI); + } +} + /// This is an architecture-specific helper function of reassociateOps. /// Set special operand attributes for new instructions after reassociation. void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, @@ -7500,9 +7781,8 @@ namespace { // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx // addq %rcx, %rax // RAX now holds address of _GLOBAL_OFFSET_TABLE_. - unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); - unsigned GOTReg = - RegInfo.createVirtualRegister(&X86::GR64RegClass); + Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); + Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg) .addReg(X86::RIP) .addImm(0) diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 13ca17139494..22b7b1d4cb19 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -67,6 +67,9 @@ unsigned getSwappedVPCMPImm(unsigned Imm); /// Get the VPCOM immediate if the opcodes are swapped. unsigned getSwappedVPCOMImm(unsigned Imm); +/// Get the VCMP immediate if the opcodes are swapped. +unsigned getSwappedVCMPImm(unsigned Imm); + } // namespace X86 /// isGlobalStubReference - Return true if the specified TargetFlag operand is @@ -203,7 +206,7 @@ public: int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr &Orig, @@ -218,7 +221,7 @@ public: /// Reference parameters are set to indicate how caller should add this /// operand to the LEA instruction. bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, - unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc, + unsigned LEAOpcode, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, LiveVariables *LV) const; @@ -251,7 +254,7 @@ public: /// findCommutedOpIndices(MI, Op1, Op2); /// can be interpreted as a query asking to find an operand that would be /// commutable with the operand#1. - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; /// Returns an adjusted FMA opcode that must be used in FMA instruction that @@ -317,23 +320,11 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - ArrayRef MMOs, - SmallVectorImpl &NewMIs) const; - void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - ArrayRef MMOs, - SmallVectorImpl &NewMIs) const; - bool expandPostRAPseudo(MachineInstr &MI) const override; /// Check whether the target can fold a load that feeds a subreg operand @@ -527,6 +518,13 @@ public: #define GET_INSTRINFO_HELPER_DECLS #include "X86GenInstrInfo.inc" + static bool hasLockPrefix(const MachineInstr &MI) { + return MI.getDesc().TSFlags & X86II::LOCK; + } + + Optional + describeLoadedValue(const MachineInstr &MI) const override; + protected: /// Commutes the operands in the given instruction by changing the operands /// order and/or changing the instruction's opcode and/or the immediate value diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8e05dd8ec5c1..e452145f3b65 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -673,6 +673,14 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { ImmSExti64i32AsmOperand]; } +// 4-bit immediate used by some XOP instructions +// [0, 0xF] +def ImmUnsignedi4AsmOperand : AsmOperandClass { + let Name = "ImmUnsignedi4"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidImmUnsignedi4"; +} + // Unsigned immediate used by SSE/AVX instructions // [0, 0xFF] // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] @@ -705,6 +713,13 @@ def i64i8imm : Operand { let OperandType = "OPERAND_IMMEDIATE"; } +// Unsigned 4-bit immediate used by some XOP instructions. +def u4imm : Operand { + let PrintMethod = "printU8Imm"; + let ParserMatchClass = ImmUnsignedi4AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + // Unsigned 8-bit immediate used by SSE/AVX instructions. def u8imm : Operand { let PrintMethod = "printU8Imm"; @@ -925,7 +940,6 @@ def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">; def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; -def HasMPX : Predicate<"Subtarget->hasMPX()">; def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">; def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">; def HasCLWB : Predicate<"Subtarget->hasCLWB()">; @@ -1103,7 +1117,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ if (ExtType == ISD::NON_EXTLOAD) return true; if (ExtType == ISD::EXTLOAD) - return LD->getAlignment() >= 2 && !LD->isVolatile(); + return LD->getAlignment() >= 2 && LD->isSimple(); return false; }]>; @@ -1113,7 +1127,7 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ if (ExtType == ISD::NON_EXTLOAD) return true; if (ExtType == ISD::EXTLOAD) - return LD->getAlignment() >= 4 && !LD->isVolatile(); + return LD->getAlignment() >= 4 && LD->isSimple(); return false; }]>; @@ -1170,7 +1184,7 @@ def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [ if (LD->getMemoryVT() == MVT::i32) return true; - return LD->getAlignment() >= 4 && !LD->isVolatile(); + return LD->getAlignment() >= 4 && LD->isSimple(); }]>; @@ -2404,25 +2418,26 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { } multiclass bmi_bls { + RegisterClass RC, X86MemOperand x86memop, + X86FoldableSchedWrite sched> { let hasSideEffects = 0 in { def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8PS, VEX_4V, Sched<[WriteBLS]>; + T8PS, VEX_4V, Sched<[sched]>; let mayLoad = 1 in def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8PS, VEX_4V, Sched<[WriteBLS.Folded]>; + T8PS, VEX_4V, Sched<[sched.Folded]>; } } let Predicates = [HasBMI], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem>; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem>, VEX_W; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem>; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem>, VEX_W; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem>; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem>, VEX_W; + defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>; + defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, VEX_W; + defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>; + defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, VEX_W; + defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>; + defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, VEX_W; } //===----------------------------------------------------------------------===// @@ -2683,12 +2698,12 @@ def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst", multiclass lwpins_intr { def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, imm:$cntl))]>, + [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, timm:$cntl))]>, XOP_4V, XOPA; let mayLoad = 1 in def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), imm:$cntl))]>, + [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), timm:$cntl))]>, XOP_4V, XOPA; } @@ -2700,11 +2715,11 @@ let Defs = [EFLAGS] in { multiclass lwpval_intr { def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(Int RC:$src0, GR32:$src1, imm:$cntl)]>, XOP_4V, XOPA; + [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP_4V, XOPA; let mayLoad = 1 in def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(Int RC:$src0, (loadi32 addr:$src1), imm:$cntl)]>, + [(Int RC:$src0, (loadi32 addr:$src1), timm:$cntl)]>, XOP_4V, XOPA; } @@ -3205,13 +3220,13 @@ def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>; // Disambiguate the mem/imm form of bt-without-a-suffix as btl. // Likewise for btc/btr/bts. def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}", - (BT32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">; + (BT32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">; def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}", - (BTC32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">; + (BTC32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">; def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}", - (BTR32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">; + (BTR32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">; def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}", - (BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">; + (BTS32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">; // clr aliases. def : InstAlias<"clr{b}\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 57835b1a256a..cd9a866c91cb 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -30,7 +30,6 @@ def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>; let Constraints = "$src1 = $dst" in { // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. - // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. multiclass MMXI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, X86FoldableSchedWrite sched, bit Commutable = 0, X86MemOperand OType = i64mem> { @@ -67,7 +66,7 @@ let Constraints = "$src1 = $dst" in { def ri : MMXIi8, + [(set VR64:$dst, (IntId2 VR64:$src1, timm:$src2))]>, Sched<[schedImm]>; } } @@ -114,13 +113,13 @@ multiclass ssse3_palign_mm, + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 timm:$src3)))]>, Sched<[sched]>; def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, u8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, - (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>, + (bitconvert (load_mmx addr:$src2)), (i8 timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -496,14 +495,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, u8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>, + (int_x86_sse_pshuf_w VR64:$src1, timm:$src2))]>, Sched<[SchedWriteShuffle.MMX]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, (int_x86_sse_pshuf_w (load_mmx addr:$src1), - imm:$src2))]>, + timm:$src2))]>, Sched<[SchedWriteShuffle.MMX.Folded]>; // -- Conversion Instructions @@ -535,7 +534,7 @@ def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1, - imm:$src2))]>, + timm:$src2))]>, Sched<[WriteVecExtract]>; let Constraints = "$src1 = $dst" in { let Predicates = [HasMMX, HasSSE1] in { @@ -544,7 +543,7 @@ let Predicates = [HasMMX, HasSSE1] in { (ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, - GR32orGR64:$src2, imm:$src3))]>, + GR32orGR64:$src2, timm:$src3))]>, Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem, @@ -553,7 +552,7 @@ let Predicates = [HasMMX, HasSSE1] in { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, (i32 (anyext (loadi16 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } } @@ -567,6 +566,13 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (int_x86_mmx_pmovmskb VR64:$src))]>, Sched<[WriteMMXMOVMSK]>; +// MMX to XMM for vector types +def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, + [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; + +def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; + // Low word of XMM to MMX. def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; @@ -574,9 +580,13 @@ def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)), (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), +def : Pat<(x86mmx (MMX_X86movdq2q (v2i64 (simple_load addr:$src)))), (x86mmx (MMX_MOVQ64rm addr:$src))>; +def : Pat<(v2i64 (X86vzmovl (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src)))))), + (MMX_MOVQ2DQrr VR64:$src)>; + // Misc. let SchedRW = [SchedWriteShuffle.MMX] in { let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in @@ -601,9 +611,6 @@ def : Pat<(x86mmx (MMX_X86movdq2q def : Pat<(x86mmx (MMX_X86movdq2q (bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))), (MMX_CVTTPS2PIirr VR128:$src)>; -def : Pat<(x86mmx (MMX_X86movdq2q - (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))), - (MMX_CVTTPS2PIirr VR128:$src)>; def : Pat<(x86mmx (MMX_X86movdq2q (bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), (MMX_CVTPD2PIirr VR128:$src)>; diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td index f7d931510fe2..44ba071947c2 100644 --- a/lib/Target/X86/X86InstrMPX.td +++ b/lib/Target/X86/X86InstrMPX.td @@ -12,16 +12,16 @@ // //===----------------------------------------------------------------------===// -// FIXME: Investigate a better scheduler class once MPX is used inside LLVM. +// FIXME: Investigate a better scheduler class if MPX is ever used inside LLVM. let SchedRW = [WriteSystem] in { multiclass mpx_bound_make opc, string OpcodeStr> { def 32rm: I, - Requires<[HasMPX, Not64BitMode]>; + Requires<[Not64BitMode]>; def 64rm: I, - Requires<[HasMPX, In64BitMode]>; + Requires<[In64BitMode]>; } defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; @@ -29,17 +29,17 @@ defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; multiclass mpx_bound_check opc, string OpcodeStr> { def 32rm: I, - Requires<[HasMPX, Not64BitMode]>; + Requires<[Not64BitMode]>; def 64rm: I, - Requires<[HasMPX, In64BitMode]>; + Requires<[In64BitMode]>; def 32rr: I, - Requires<[HasMPX, Not64BitMode]>; + Requires<[Not64BitMode]>; def 64rr: I, - Requires<[HasMPX, In64BitMode]>; + Requires<[In64BitMode]>; } defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable; defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable; @@ -47,33 +47,31 @@ defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable; def BNDMOVrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX]>, NotMemoryFoldable; + NotMemoryFoldable; let mayLoad = 1 in { def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable; + Requires<[Not64BitMode]>, NotMemoryFoldable; def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable; + Requires<[In64BitMode]>, NotMemoryFoldable; } let isCodeGenOnly = 1, ForceDisassemble = 1 in def BNDMOVrr_REV : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX]>, NotMemoryFoldable; + NotMemoryFoldable; let mayStore = 1 in { def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable; + Requires<[Not64BitMode]>, NotMemoryFoldable; def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable; + Requires<[In64BitMode]>, NotMemoryFoldable; def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src), - "bndstx\t{$src, $dst|$dst, $src}", []>, PS, - Requires<[HasMPX]>; + "bndstx\t{$src, $dst|$dst, $src}", []>, PS; } let mayLoad = 1 in def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src), - "bndldx\t{$src, $dst|$dst, $src}", []>, PS, - Requires<[HasMPX]>; + "bndldx\t{$src, $dst|$dst, $src}", []>, PS; } // SchedRW diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7d0a5b87baf4..09a04c0338b4 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -115,7 +115,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>; + [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; + def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; } //===----------------------------------------------------------------------===// @@ -128,13 +130,18 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, SchedRW = [WriteZero] in { + isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } -let Predicates = [NoAVX512] in +let Predicates = [NoAVX512] in { +def : Pat<(v16i8 immAllZerosV), (V_SET0)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0)>; def : Pat<(v4i32 immAllZerosV), (V_SET0)>; +def : Pat<(v2i64 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +} // The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, @@ -147,6 +154,14 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [NoAVX512] in { +def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; +def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; +def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; +def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; +} + // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -355,7 +370,7 @@ defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle, SchedWriteFMoveLS.YMM>, PS, VEX, VEX_L, VEX_WIG; -defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", +defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, SchedWriteFMoveLS.YMM>, PD, VEX, VEX_L, VEX_WIG; } @@ -661,7 +676,7 @@ let Predicates = [UseSSE1] in { // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll // end up with a movsd or blend instead of shufp. // No need for aligned load, we're only loading 64-bits. - def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1, + def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1, (i8 -28)), (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), @@ -727,7 +742,7 @@ let Predicates = [UseSSE1] in { // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll // end up with a movsd or blend instead of shufp. // No need for aligned load, we're only loading 64-bits. - def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))), + def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>; @@ -761,7 +776,7 @@ let Predicates = [UseSSE2] in { let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { // Use MOVLPD to load into the low bits from a full vector unless we can use // BLENDPD. - def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))), + def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; } @@ -1713,12 +1728,12 @@ multiclass sse12_cmp_scalar, + [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>, Sched<[sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), - (ld_frag addr:$src2), imm:$cc))]>, + (ld_frag addr:$src2), timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1751,13 +1766,13 @@ multiclass sse12_cmp_scalar_int, + VR128:$src, timm:$cc))]>, Sched<[sched]>; let mayLoad = 1 in def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src, u8imm:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, - mem_cpat:$src, imm:$cc))]>, + mem_cpat:$src, timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1876,12 +1891,12 @@ multiclass sse12_cmp_packed, + [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, Sched<[sched]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, [(set RC:$dst, - (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>, + (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1906,7 +1921,7 @@ let Constraints = "$src1 = $dst" in { SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; } -def CommutableCMPCC : PatLeaf<(imm), [{ +def CommutableCMPCC : PatLeaf<(timm), [{ uint64_t Imm = N->getZExtValue() & 0x7; return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); }]>; @@ -1915,47 +1930,47 @@ def CommutableCMPCC : PatLeaf<(imm), [{ let Predicates = [HasAVX] in { def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1, CommutableCMPCC:$cc)), - (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; + (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1, CommutableCMPCC:$cc)), - (VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>; + (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1, CommutableCMPCC:$cc)), - (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1, CommutableCMPCC:$cc)), - (VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, CommutableCMPCC:$cc)), - (VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; + (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, CommutableCMPCC:$cc)), - (VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; + (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; } let Predicates = [UseSSE2] in { def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1, CommutableCMPCC:$cc)), - (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, CommutableCMPCC:$cc)), - (CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; + (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; } let Predicates = [UseSSE1] in { def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1, CommutableCMPCC:$cc)), - (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, CommutableCMPCC:$cc)), - (CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; + (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; } //===----------------------------------------------------------------------===// @@ -1970,13 +1985,13 @@ multiclass sse12_shuffle, + (i8 timm:$src3))))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCommutable = IsCommutable in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], d>, + (i8 timm:$src3))))], d>, Sched<[sched]>; } @@ -2097,7 +2112,7 @@ let Predicates = [HasAVX1Only] in { let Predicates = [UseSSE2] in { // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. def : Pat<(v2f64 (X86Unpckl VR128:$src1, - (v2f64 (nonvolatile_load addr:$src2)))), + (v2f64 (simple_load addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; } @@ -2721,7 +2736,7 @@ defm : scalar_math_patterns; defm : scalar_math_patterns; defm : scalar_math_patterns; - + /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the @@ -3482,7 +3497,7 @@ multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>, + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, Sched<[schedImm]>; } @@ -3514,7 +3529,7 @@ multiclass PDI_binop_ri opc, Format ImmForm, string OpcodeStr, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>, + [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, Sched<[sched]>; } @@ -3597,7 +3612,7 @@ let Predicates = [HasAVX, prd] in { !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, + (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, VEX, Sched<[sched.XMM]>, VEX_WIG; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), @@ -3605,7 +3620,7 @@ let Predicates = [HasAVX, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (load addr:$src1), - (i8 imm:$src2))))]>, VEX, + (i8 timm:$src2))))]>, VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; } @@ -3615,7 +3630,7 @@ let Predicates = [HasAVX2, prd] in { !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>, + (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, u8imm:$src2), @@ -3623,7 +3638,7 @@ let Predicates = [HasAVX2, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (load addr:$src1), - (i8 imm:$src2))))]>, VEX, VEX_L, + (i8 timm:$src2))))]>, VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; } @@ -3633,7 +3648,7 @@ let Predicates = [UseSSE2] in { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, + (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, Sched<[sched.XMM]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), @@ -3641,7 +3656,7 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (memop addr:$src1), - (i8 imm:$src2))))]>, + (i8 timm:$src2))))]>, Sched<[sched.XMM.Folded]>; } } @@ -4380,7 +4395,7 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; let Predicates = [HasAVX, NoVLX] in { - def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))), + def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; @@ -4388,7 +4403,7 @@ let Predicates = [HasAVX, NoVLX] in { let Predicates = [UseSSE3] in { // No need for aligned memory as this only loads 64-bits. - def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))), + def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))), (MOVDDUPrm addr:$src)>; def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), (MOVDDUPrm addr:$src)>; @@ -4812,7 +4827,7 @@ multiclass ssse3_palignr, + [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, Sched<[sched]>; let mayLoad = 1 in def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), @@ -4823,7 +4838,7 @@ multiclass ssse3_palignr, + (i8 timm:$src3))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5300,7 +5315,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>, + (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, Sched<[SchedWriteFShuffle.XMM]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { [(set VR128:$dst, (X86insertps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -5323,17 +5338,6 @@ let ExeDomain = SSEPackedSingle in { defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; } -let Predicates = [UseAVX] in { - // If we're inserting an element from a vbroadcast of a load, fold the - // load into the X86insertps instruction. - def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), - (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)), - (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; - def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), - (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)), - (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; -} - //===----------------------------------------------------------------------===// // SSE4.1 - Round Instructions //===----------------------------------------------------------------------===// @@ -5348,7 +5352,7 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>, + [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, Sched<[sched]>; // Vector intrinsic operation, mem @@ -5357,13 +5361,13 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, - (VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>, + (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, Sched<[sched.Folded]>; } multiclass avx_fp_unop_rm opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { -let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in { +let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { def SSr : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 -let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in { +let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { def SDr : SS4AIi8 opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { -let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in { +let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { def SSr : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 -let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in { +let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { def SDr : SS4AIi8 opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched, ValueType VT32, ValueType VT64, SDNode OpNode, bit Is2Addr = 1> { -let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in { +let ExeDomain = SSEPackedSingle in { def SSr_Int : SS4AIi8, + [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, Sched<[sched]>; def SSm_Int : SS4AIi8, + (OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 -let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in { +let ExeDomain = SSEPackedDouble in { def SDr_Int : SS4AIi8, + [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, Sched<[sched]>; def SDm_Int : SS4AIi8, + (OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 } @@ -5508,17 +5512,17 @@ let Predicates = [UseAVX] in { } let Predicates = [UseAVX] in { - def : Pat<(X86VRndScale FR32:$src1, imm:$src2), - (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>; - def : Pat<(X86VRndScale FR64:$src1, imm:$src2), - (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>; + def : Pat<(X86VRndScale FR32:$src1, timm:$src2), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; + def : Pat<(X86VRndScale FR64:$src1, timm:$src2), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; } let Predicates = [UseAVX, OptForSize] in { - def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2), - (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; - def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2), - (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; + def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), + (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; + def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), + (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; } let ExeDomain = SSEPackedSingle in @@ -5535,17 +5539,17 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, v4f32, v2f64, X86RndScales>; let Predicates = [UseSSE41] in { - def : Pat<(X86VRndScale FR32:$src1, imm:$src2), - (ROUNDSSr FR32:$src1, imm:$src2)>; - def : Pat<(X86VRndScale FR64:$src1, imm:$src2), - (ROUNDSDr FR64:$src1, imm:$src2)>; + def : Pat<(X86VRndScale FR32:$src1, timm:$src2), + (ROUNDSSr FR32:$src1, timm:$src2)>; + def : Pat<(X86VRndScale FR64:$src1, timm:$src2), + (ROUNDSDr FR64:$src1, timm:$src2)>; } let Predicates = [UseSSE41, OptForSize] in { - def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2), - (ROUNDSSm addr:$src1, imm:$src2)>; - def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2), - (ROUNDSDm addr:$src1, imm:$src2)>; + def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), + (ROUNDSSm addr:$src1, timm:$src2)>; + def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), + (ROUNDSDm addr:$src1, timm:$src2)>; } //===----------------------------------------------------------------------===// @@ -5826,7 +5830,7 @@ multiclass SS41I_binop_rmi_int opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, + [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, Sched<[sched]>; def rmi : SS4AIi8 opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, - (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>, + (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5853,7 +5857,7 @@ multiclass SS41I_binop_rmi opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>; def rmi : SS4AIi8 opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } -def BlendCommuteImm2 : SDNodeXFormgetZExtValue() & 0x03; return getI8Imm(Imm ^ 0x03, SDLoc(N)); }]>; -def BlendCommuteImm4 : SDNodeXFormgetZExtValue() & 0x0f; return getI8Imm(Imm ^ 0x0f, SDLoc(N)); }]>; -def BlendCommuteImm8 : SDNodeXFormgetZExtValue() & 0xff; return getI8Imm(Imm ^ 0xff, SDLoc(N)); }]>; // Turn a 4-bit blendi immediate to 8-bit for use with pblendw. -def BlendScaleImm4 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 4; ++i) { @@ -5894,7 +5898,7 @@ def BlendScaleImm4 : SDNodeXForm; // Turn a 2-bit blendi immediate to 8-bit for use with pblendw. -def BlendScaleImm2 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 2; ++i) { @@ -5905,7 +5909,7 @@ def BlendScaleImm2 : SDNodeXForm; // Turn a 2-bit blendi immediate to 4-bit for use with pblendd. -def BlendScaleImm2to4 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 2; ++i) { @@ -5916,7 +5920,7 @@ def BlendScaleImm2to4 : SDNodeXForm; // Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it. -def BlendScaleCommuteImm4 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 4; ++i) { @@ -5927,7 +5931,7 @@ def BlendScaleCommuteImm4 : SDNodeXForm; // Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it. -def BlendScaleCommuteImm2 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 2; ++i) { @@ -5938,7 +5942,7 @@ def BlendScaleCommuteImm2 : SDNodeXForm; // Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it. -def BlendScaleCommuteImm2to4 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 2; ++i) { @@ -6008,7 +6012,7 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>; def rmi : SS4AIi8, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Pattern to commute if load is in first source. - def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)), + def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), (!cast(NAME#"rmi") RC:$src1, addr:$src2, - (commuteXForm imm:$src3))>; + (commuteXForm timm:$src3))>; } let Predicates = [HasAVX] in { @@ -6061,37 +6065,37 @@ let Predicates = [HasAVX2] in { // Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. // ExecutionDomainFixPass will cleanup domains later on. let Predicates = [HasAVX1Only] in { -def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), - (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), - (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), - (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>; +def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), + (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>; +def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), + (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>; +def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), + (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>; // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movsd via commuting under optsize. -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), - (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), - (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), - (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; - -def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3), - (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3), - (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3), - (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>; +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), + (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), + (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; +def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), + (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; + +def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), + (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>; +def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), + (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>; +def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), + (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>; // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movss via commuting under optsize. -def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), - (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3), - (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3), - (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; +def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), + (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), + (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), + (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; } defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, @@ -6107,19 +6111,19 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, let Predicates = [UseSSE41] in { // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movss via commuting under optsize. -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), - (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3), - (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3), - (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), + (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), + (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; +def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), + (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; -def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), - (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3), - (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3), - (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; +def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), + (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), + (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), + (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; } // For insertion into the zero index (low half) of a 256-bit vector, it is @@ -6592,7 +6596,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, - (i8 imm:$src3)))]>, TA, + (i8 timm:$src3)))]>, TA, Sched<[SchedWriteVecIMul.XMM]>; def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), @@ -6600,7 +6604,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, (memop addr:$src2), - (i8 imm:$src3)))]>, TA, + (i8 timm:$src3)))]>, TA, Sched<[SchedWriteVecIMul.XMM.Folded, SchedWriteVecIMul.XMM.ReadAfterFold]>; @@ -6718,26 +6722,26 @@ let Predicates = [HasAVX, HasAES] in { (ins VR128:$src1, u8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>, + (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; } def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, Sched<[WriteAESKeyGen]>; def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>, + (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, Sched<[WriteAESKeyGen.Folded]>; //===----------------------------------------------------------------------===// @@ -6745,7 +6749,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), //===----------------------------------------------------------------------===// // Immediate transform to help with commuting. -def PCLMULCommuteImm : SDNodeXFormgetZExtValue(); return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); }]>; @@ -6758,7 +6762,7 @@ let Predicates = [NoAVX, HasPCLMUL] in { (ins VR128:$src1, VR128:$src2, u8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, Sched<[WriteCLMul]>; def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), @@ -6766,14 +6770,14 @@ let Predicates = [NoAVX, HasPCLMUL] in { "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), - imm:$src3))]>, + timm:$src3))]>, Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; } // Constraints = "$src1 = $dst" def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, - (i8 imm:$src3)), + (i8 timm:$src3)), (PCLMULQDQrm VR128:$src1, addr:$src2, - (PCLMULCommuteImm imm:$src3))>; + (PCLMULCommuteImm timm:$src3))>; } // Predicates = [NoAVX, HasPCLMUL] // SSE aliases @@ -6795,21 +6799,21 @@ multiclass vpclmulqdq, + (IntId RC:$src1, RC:$src2, timm:$src3))]>, Sched<[WriteCLMul]>; def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, MemOp:$src2, u8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set RC:$dst, - (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>, + (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; // We can commute a load in the first operand by swapping the sources and // rotating the immediate. - def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)), + def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), (!cast(NAME#"rm") RC:$src1, addr:$src2, - (PCLMULCommuteImm imm:$src3))>; + (PCLMULCommuteImm timm:$src3))>; } let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in @@ -6853,8 +6857,8 @@ let Constraints = "$src = $dst" in { def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), (ins VR128:$src, u8imm:$len, u8imm:$idx), "extrq\t{$idx, $len, $src|$src, $len, $idx}", - [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len, - imm:$idx))]>, + [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, + timm:$idx))]>, PD, Sched<[SchedWriteVecALU.XMM]>; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), @@ -6867,7 +6871,7 @@ def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, - imm:$len, imm:$idx))]>, + timm:$len, timm:$idx))]>, XD, Sched<[SchedWriteVecALU.XMM]>; def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), @@ -6907,10 +6911,10 @@ def : Pat<(nontemporalstore FR64:$src, addr:$dst), // class avx_broadcast_rm opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, ValueType VT, - PatFrag ld_frag, SchedWrite Sched> : + PatFrag bcast_frag, SchedWrite Sched> : AVX8I, + [(set RC:$dst, (VT (bcast_frag addr:$src)))]>, Sched<[Sched]>, VEX; // AVX2 adds register forms @@ -6923,15 +6927,15 @@ class avx2_broadcast_rr opc, string OpcodeStr, RegisterClass RC, let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, - f32mem, v4f32, loadf32, + f32mem, v4f32, X86VBroadcastld32, SchedWriteFShuffle.XMM.Folded>; def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, - f32mem, v8f32, loadf32, + f32mem, v8f32, X86VBroadcastld32, SchedWriteFShuffle.XMM.Folded>, VEX_L; } let ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, - v4f64, loadf64, + v4f64, X86VBroadcastld64, SchedWriteFShuffle.XMM.Folded>, VEX_L; let ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { @@ -6944,15 +6948,6 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, v4f64, v2f64, WriteFShuffle256>, VEX_L; -let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (VBROADCASTSSrm addr:$src)>; - def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (VBROADCASTSSYrm addr:$src)>; - def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (VBROADCASTSDYrm addr:$src)>; -} - //===----------------------------------------------------------------------===// // VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both // halves of a 256-bit vector. @@ -7081,27 +7076,29 @@ let Predicates = [HasAVX1Only] in { // multiclass avx_movmask_rm opc_rm, bits<8> opc_mr, string OpcodeStr, Intrinsic IntLd, Intrinsic IntLd256, - Intrinsic IntSt, Intrinsic IntSt256> { + Intrinsic IntSt, Intrinsic IntSt256, + X86SchedWriteMaskMove schedX, + X86SchedWriteMaskMove schedY> { def rm : AVX8I, - VEX_4V, Sched<[WriteFMaskedLoad]>; + VEX_4V, Sched<[schedX.RM]>; def Yrm : AVX8I, - VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>; + VEX_4V, VEX_L, Sched<[schedY.RM]>; def mr : AVX8I, - VEX_4V, Sched<[WriteFMaskedStore]>; + VEX_4V, Sched<[schedX.MR]>; def Ymr : AVX8I, - VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>; + VEX_4V, VEX_L, Sched<[schedY.MR]>; } let ExeDomain = SSEPackedSingle in @@ -7109,13 +7106,15 @@ defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", int_x86_avx_maskload_ps, int_x86_avx_maskload_ps_256, int_x86_avx_maskstore_ps, - int_x86_avx_maskstore_ps_256>; + int_x86_avx_maskstore_ps_256, + WriteFMaskMove32, WriteFMaskMove32Y>; let ExeDomain = SSEPackedDouble in defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", int_x86_avx_maskload_pd, int_x86_avx_maskload_pd_256, int_x86_avx_maskstore_pd, - int_x86_avx_maskstore_pd_256>; + int_x86_avx_maskstore_pd_256, + WriteFMaskMove64, WriteFMaskMove64Y>; //===----------------------------------------------------------------------===// // VPERMIL - Permute Single and Double Floating-Point Values @@ -7143,13 +7142,13 @@ multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, def ri : AVXAIi8, VEX, + [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, Sched<[sched]>; def mi : AVXAIi8, VEX, + (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, Sched<[sched.Folded]>; }// Predicates = [HasAVX, NoVLX] } @@ -7181,38 +7180,38 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, - (i8 imm:$src3))))]>, VEX_4V, VEX_L, + (i8 timm:$src3))))]>, VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2), - (i8 imm:$src3)))]>, VEX_4V, VEX_L, + (i8 timm:$src3)))]>, VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; } // Immediate transform to help with commuting. -def Perm2XCommuteImm : SDNodeXFormgetZExtValue() ^ 0x22, SDLoc(N)); }]>; let Predicates = [HasAVX] in { // Pattern with load in other operand. def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2), - VR256:$src1, (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; + VR256:$src1, (i8 timm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; } let Predicates = [HasAVX1Only] in { -def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, timm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, - (loadv4i64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; + (loadv4i64 addr:$src2), (i8 timm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, timm:$imm)>; // Pattern with load in other operand. def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), - VR256:$src1, (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; + VR256:$src1, (i8 timm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; } //===----------------------------------------------------------------------===// @@ -7257,7 +7256,7 @@ multiclass f16c_ps2ph, + [(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>, TAPD, VEX, Sched<[RR]>; let hasSideEffects = 0, mayStore = 1 in def mr : Ii8<0x1D, MRMDestMem, (outs), @@ -7282,15 +7281,15 @@ let Predicates = [HasF16C, NoVLX] in { (VCVTPH2PSrm addr:$src)>; def : Pat<(store (f64 (extractelt - (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; + (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; def : Pat<(store (i64 (extractelt - (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst), - (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; + (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. @@ -7327,20 +7326,20 @@ multiclass AVX2_blend_rmi opc, string OpcodeStr, SDNode OpNode, (ins RC:$src1, RC:$src2, u8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>, VEX_4V; def rmi : AVX2AIi8, + (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; // Pattern to commute if load is in first source. - def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)), + def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), (!cast(NAME#"rmi") RC:$src1, addr:$src2, - (commuteXForm imm:$src3))>; + (commuteXForm timm:$src3))>; } let Predicates = [HasAVX2] in { @@ -7351,19 +7350,19 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, SchedWriteBlend.YMM, VR256, i256mem, BlendCommuteImm8>, VEX_L; -def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), - (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), - (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), - (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; +def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), + (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), + (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), + (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), - (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), - (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>; -def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), - (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>; +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), + (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), + (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>; +def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), + (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>; } // For insertion into the zero index (low half) of a 256-bit vector, it is @@ -7407,7 +7406,7 @@ def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0 // destination operand // multiclass avx2_broadcast opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, + X86MemOperand x86memop, PatFrag bcast_frag, ValueType OpVT128, ValueType OpVT256, Predicate prd> { let Predicates = [HasAVX2, prd] in { def rr : AVX28I opc, string OpcodeStr, def rm : AVX28I, + (OpVT128 (bcast_frag addr:$src)))]>, Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; def Yrr : AVX28I opc, string OpcodeStr, def Yrm : AVX28I, + (OpVT256 (bcast_frag addr:$src)))]>, Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; // Provide aliases for broadcast from the same register class that @@ -7439,13 +7438,13 @@ multiclass avx2_broadcast opc, string OpcodeStr, } } -defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, +defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8, v16i8, v32i8, NoVLX_Or_NoBWI>; -defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, +defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16, v8i16, v16i16, NoVLX_Or_NoBWI>; -defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, +defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32, v4i32, v8i32, NoVLX>; -defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, +defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64, v2i64, v4i64, NoVLX>; let Predicates = [HasAVX2, NoVLX] in { @@ -7455,14 +7454,11 @@ let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQYrm addr:$src)>; - def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + // FIXME this is to handle aligned extloads from i8/i16. + def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), (VPBROADCASTDrm addr:$src)>; - def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), (VPBROADCASTDYrm addr:$src)>; - def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (VPBROADCASTQrm addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (VPBROADCASTQYrm addr:$src)>; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -7483,17 +7479,12 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWYrm addr:$src)>; -} -let Predicates = [HasAVX2, NoVLX] in { - // Provide aliases for broadcast from the same register class that - // automatically does the extract. - def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))), - (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), - sub_xmm)))>; - def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))), - (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), - sub_xmm)))>; + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWrm addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWYrm addr:$src)>; } let Predicates = [HasAVX2, NoVLX] in { @@ -7509,45 +7500,41 @@ let Predicates = [HasAVX2, NoVLX] in { let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v16i8 (X86VBroadcast GR8:$src)), - (VPBROADCASTBrr (v16i8 (COPY_TO_REGCLASS + (VPBROADCASTBrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR8:$src, sub_8bit)), - VR128)))>; + GR8:$src, sub_8bit))))>; def : Pat<(v32i8 (X86VBroadcast GR8:$src)), - (VPBROADCASTBYrr (v16i8 (COPY_TO_REGCLASS + (VPBROADCASTBYrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR8:$src, sub_8bit)), - VR128)))>; + GR8:$src, sub_8bit))))>; def : Pat<(v8i16 (X86VBroadcast GR16:$src)), - (VPBROADCASTWrr (v8i16 (COPY_TO_REGCLASS + (VPBROADCASTWrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR16:$src, sub_16bit)), - VR128)))>; + GR16:$src, sub_16bit))))>; def : Pat<(v16i16 (X86VBroadcast GR16:$src)), - (VPBROADCASTWYrr (v8i16 (COPY_TO_REGCLASS + (VPBROADCASTWYrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR16:$src, sub_16bit)), - VR128)))>; + GR16:$src, sub_16bit))))>; } let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VPBROADCASTDrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>; + (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), - (VPBROADCASTDYrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>; + (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>; def : Pat<(v2i64 (X86VBroadcast GR64:$src)), - (VPBROADCASTQrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>; + (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), - (VPBROADCASTQYrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>; + (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>; } // AVX1 broadcast patterns let Predicates = [HasAVX1Only] in { -def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), +def : Pat<(v8i32 (X86VBroadcastld32 addr:$src)), (VBROADCASTSSYrm addr:$src)>; -def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), +def : Pat<(v4i64 (X86VBroadcastld64 addr:$src)), (VBROADCASTSDYrm addr:$src)>; -def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), +def : Pat<(v4i32 (X86VBroadcastld32 addr:$src)), (VBROADCASTSSrm addr:$src)>; } @@ -7557,12 +7544,12 @@ let Predicates = [HasAVX, NoVLX] in { // 128bit broadcasts: def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; - def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)), (VMOVDDUPrm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), (VMOVDDUPrr VR128:$src)>; - def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), + def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (VMOVDDUPrm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), (VMOVDDUPrm addr:$src)>; @@ -7581,19 +7568,19 @@ let Predicates = [HasAVX1Only] in { (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)>; + (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), - (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), sub_xmm), - (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), 1)>; + (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm), + (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), - (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), sub_xmm), - (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), 1)>; + (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm), + (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>; def : Pat<(v2i64 (X86VBroadcast i64:$src)), - (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)>; - def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), + (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>; + def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)), (VMOVDDUPrm addr:$src)>; } @@ -7636,7 +7623,7 @@ multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, + (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, Sched<[Sched]>, VEX, VEX_L; def Ymi : AVX2AIi8 opc, string OpcodeStr, PatFrag mem_frag, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpVT (X86VPermi (mem_frag addr:$src1), - (i8 imm:$src2))))]>, + (i8 timm:$src2))))]>, Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; } } @@ -7663,19 +7650,19 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, - (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>, + (i8 timm:$src3))))]>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), - (i8 imm:$src3)))]>, + (i8 timm:$src3)))]>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; let Predicates = [HasAVX2] in def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), - VR256:$src1, (i8 imm:$imm))), - (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; + VR256:$src1, (i8 timm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; //===----------------------------------------------------------------------===// @@ -7760,7 +7747,7 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskstore_q_256>, VEX_W; multiclass maskmov_lowering { + ValueType MaskVT> { // masked store def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)), (!cast(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; @@ -7772,23 +7759,23 @@ multiclass maskmov_lowering(InstrStr#"rm") RC:$mask, addr:$ptr)>; } let Predicates = [HasAVX] in { - defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>; - defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64, "VBLENDVPD", v4i32>; - defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8i32>; - defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8i32>; + defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>; + defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>; + defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>; + defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>; } let Predicates = [HasAVX1Only] in { // load/store i32/i64 not supported use ps/pd version - defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>; - defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>; - defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>; - defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>; + defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>; + defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>; + defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>; + defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>; } let Predicates = [HasAVX2] in { - defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>; - defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>; - defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>; - defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>; + defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>; + defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>; + defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>; + defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>; } //===----------------------------------------------------------------------===// @@ -7956,13 +7943,13 @@ multiclass GF2P8AFFINE_rmi Op, string OpStr, ValueType OpVT, OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { def rri : Ii8, Sched<[SchedWriteVecALU.XMM]>; def rmi : Ii8, + timm:$src3)))], SSEPackedInt>, Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; } } diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 7050e1917494..7f41feb6c0d9 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -43,7 +43,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>; let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap", - [(int_x86_int imm:$trap)]>; + [(int_x86_int timm:$trap)]>; def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index fc0da845299f..3a1212342a13 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -45,7 +45,7 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins), def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), "xabort\t$imm", - [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>; + [(int_x86_xabort timm:$imm)]>, Requires<[HasRTM]>; } // SchedRW // HLE prefixes diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 66ca78556b82..229af366d940 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -143,13 +143,13 @@ multiclass xop3opimm opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>, + (vt128 (OpNode (vt128 VR128:$src1), timm:$src2)))]>, XOP, Sched<[sched]>; def mi : IXOPi8, + (vt128 (OpNode (vt128 (load addr:$src1)), timm:$src2)))]>, XOP, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -251,7 +251,7 @@ multiclass xopvpcom opc, string Suffix, SDNode OpNode, ValueType vt128, "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), - imm:$cc)))]>, + timm:$cc)))]>, XOP_4V, Sched<[sched]>; def mi : IXOPi8 opc, string Suffix, SDNode OpNode, ValueType vt128, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)), - imm:$cc)))]>, + timm:$cc)))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } def : Pat<(OpNode (load addr:$src2), - (vt128 VR128:$src1), imm:$cc), + (vt128 VR128:$src1), timm:$cc), (!cast(NAME#"mi") VR128:$src1, addr:$src2, - (CommuteVPCOMCC imm:$cc))>; + (CommuteVPCOMCC timm:$cc))>; } defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>; @@ -418,27 +418,27 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag, X86FoldableSchedWrite sched> { def rr : IXOP5, + (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 timm:$src4))))]>, Sched<[sched]>; def rm : IXOP5, VEX_W, + (i8 timm:$src4))))]>, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def mr : IXOP5, + RC:$src3, (i8 timm:$src4))))]>, Sched<[sched.Folded, sched.ReadAfterFold, // fpmemop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -447,7 +447,7 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : IXOP5, VEX_W, Sched<[sched]>, FoldGenData; diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index 892a083f4d1a..01620b7b64c9 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -60,7 +60,7 @@ public: X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -94,11 +94,9 @@ private: MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const; + MachineFunction &MF); bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const; + MachineFunction &MF); bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, @@ -217,7 +215,7 @@ static unsigned getSubRegIndex(const TargetRegisterClass *RC) { } static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); if (X86::GR64RegClass.contains(Reg)) return &X86::GR64RegClass; if (X86::GR32RegClass.contains(Reg)) @@ -233,15 +231,15 @@ static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) { // Set X86 Opcode and constrain DestReg. bool X86InstructionSelector::selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const { - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); - unsigned SrcReg = I.getOperand(1).getReg(); + Register SrcReg = I.getOperand(1).getReg(); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (Register::isPhysicalRegister(DstReg)) { assert(I.isCopy() && "Generic operators do not allow physical registers"); if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID && @@ -253,7 +251,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, if (SrcRC != DstRC) { // This case can be generated by ABI lowering, performe anyext - unsigned ExtSrc = MRI.createVirtualRegister(DstRC); + Register ExtSrc = MRI.createVirtualRegister(DstRC); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::SUBREG_TO_REG)) .addDef(ExtSrc) @@ -268,12 +266,12 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, return true; } - assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) && + assert((!Register::isPhysicalRegister(SrcReg) || I.isCopy()) && "No phys reg on generic operators"); assert((DstSize == SrcSize || // Copies are a mean to setup initial types, the number of // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + (Register::isPhysicalRegister(SrcReg) && DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) && "Copy with different width?!"); @@ -282,7 +280,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, if (SrcRegBank.getID() == X86::GPRRegBankID && DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize && - TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + Register::isPhysicalRegister(SrcReg)) { // Change the physical register to performe truncate. const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg); @@ -308,8 +306,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, return true; } -bool X86InstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool X86InstructionSelector::select(MachineInstr &I) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -333,7 +330,7 @@ bool X86InstructionSelector::select(MachineInstr &I, assert(I.getNumOperands() == I.getNumExplicitOperands() && "Generic instruction has unexpected implicit operands\n"); - if (selectImpl(I, CoverageInfo)) + if (selectImpl(I, *CoverageInfo)) return true; LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs())); @@ -370,10 +367,10 @@ bool X86InstructionSelector::select(MachineInstr &I, case TargetOpcode::G_UADDE: return selectUadde(I, MRI, MF); case TargetOpcode::G_UNMERGE_VALUES: - return selectUnmergeValues(I, MRI, MF, CoverageInfo); + return selectUnmergeValues(I, MRI, MF); case TargetOpcode::G_MERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: - return selectMergeValues(I, MRI, MF, CoverageInfo); + return selectMergeValues(I, MRI, MF); case TargetOpcode::G_EXTRACT: return selectExtract(I, MRI, MF); case TargetOpcode::G_INSERT: @@ -512,7 +509,7 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && "unexpected instruction"); - const unsigned DefReg = I.getOperand(0).getReg(); + const Register DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); @@ -572,7 +569,7 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_GEP) && "unexpected instruction"); - const unsigned DefReg = I.getOperand(0).getReg(); + const Register DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); // Use LEA to calculate frame index and GEP @@ -625,7 +622,7 @@ bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, AM.Base.Reg = X86::RIP; } - const unsigned DefReg = I.getOperand(0).getReg(); + const Register DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); unsigned NewOpc = getLeaOP(Ty, STI); @@ -644,7 +641,7 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, assert((I.getOpcode() == TargetOpcode::G_CONSTANT) && "unexpected instruction"); - const unsigned DefReg = I.getOperand(0).getReg(); + const Register DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) @@ -717,8 +714,8 @@ bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I, I.getOpcode() == TargetOpcode::G_PTRTOINT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); @@ -781,8 +778,8 @@ bool X86InstructionSelector::selectZext(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); @@ -892,8 +889,8 @@ bool X86InstructionSelector::selectAnyext(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); @@ -952,8 +949,8 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I, std::tie(CC, SwapArgs) = X86::getX86ConditionCode( (CmpInst::Predicate)I.getOperand(1).getPredicate()); - unsigned LHS = I.getOperand(2).getReg(); - unsigned RHS = I.getOperand(3).getReg(); + Register LHS = I.getOperand(2).getReg(); + Register RHS = I.getOperand(3).getReg(); if (SwapArgs) std::swap(LHS, RHS); @@ -998,8 +995,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction"); - unsigned LhsReg = I.getOperand(2).getReg(); - unsigned RhsReg = I.getOperand(3).getReg(); + Register LhsReg = I.getOperand(2).getReg(); + Register RhsReg = I.getOperand(3).getReg(); CmpInst::Predicate Predicate = (CmpInst::Predicate)I.getOperand(1).getPredicate(); @@ -1033,7 +1030,7 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, break; } - unsigned ResultReg = I.getOperand(0).getReg(); + Register ResultReg = I.getOperand(0).getReg(); RBI.constrainGenericRegister( ResultReg, *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI); @@ -1043,8 +1040,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, .addReg(LhsReg) .addReg(RhsReg); - unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); - unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); + Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); + Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]); MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -1089,11 +1086,11 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned CarryOutReg = I.getOperand(1).getReg(); - const unsigned Op0Reg = I.getOperand(2).getReg(); - const unsigned Op1Reg = I.getOperand(3).getReg(); - unsigned CarryInReg = I.getOperand(4).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register CarryOutReg = I.getOperand(1).getReg(); + const Register Op0Reg = I.getOperand(2).getReg(); + const Register Op1Reg = I.getOperand(3).getReg(); + Register CarryInReg = I.getOperand(4).getReg(); const LLT DstTy = MRI.getType(DstReg); @@ -1149,8 +1146,8 @@ bool X86InstructionSelector::selectExtract(MachineInstr &I, assert((I.getOpcode() == TargetOpcode::G_EXTRACT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); int64_t Index = I.getOperand(2).getImm(); const LLT DstTy = MRI.getType(DstReg); @@ -1281,9 +1278,9 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); - const unsigned InsertReg = I.getOperand(2).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); + const Register InsertReg = I.getOperand(2).getReg(); int64_t Index = I.getOperand(3).getImm(); const LLT DstTy = MRI.getType(DstReg); @@ -1335,14 +1332,13 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, } bool X86InstructionSelector::selectUnmergeValues( - MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const { + MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && "unexpected instruction"); // Split to extracts. unsigned NumDefs = I.getNumOperands() - 1; - unsigned SrcReg = I.getOperand(NumDefs).getReg(); + Register SrcReg = I.getOperand(NumDefs).getReg(); unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { @@ -1352,7 +1348,7 @@ bool X86InstructionSelector::selectUnmergeValues( .addReg(SrcReg) .addImm(Idx * DefSize); - if (!select(ExtrInst, CoverageInfo)) + if (!select(ExtrInst)) return false; } @@ -1361,15 +1357,14 @@ bool X86InstructionSelector::selectUnmergeValues( } bool X86InstructionSelector::selectMergeValues( - MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const { + MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES || I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) && "unexpected instruction"); // Split to inserts. - unsigned DstReg = I.getOperand(0).getReg(); - unsigned SrcReg0 = I.getOperand(1).getReg(); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg0 = I.getOperand(1).getReg(); const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg0); @@ -1378,13 +1373,13 @@ bool X86InstructionSelector::selectMergeValues( const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); // For the first src use insertSubReg. - unsigned DefReg = MRI.createGenericVirtualRegister(DstTy); + Register DefReg = MRI.createGenericVirtualRegister(DstTy); MRI.setRegBank(DefReg, RegBank); if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) return false; for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { - unsigned Tmp = MRI.createGenericVirtualRegister(DstTy); + Register Tmp = MRI.createGenericVirtualRegister(DstTy); MRI.setRegBank(Tmp, RegBank); MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -1395,7 +1390,7 @@ bool X86InstructionSelector::selectMergeValues( DefReg = Tmp; - if (!select(InsertInst, CoverageInfo)) + if (!select(InsertInst)) return false; } @@ -1403,7 +1398,7 @@ bool X86InstructionSelector::selectMergeValues( TII.get(TargetOpcode::COPY), DstReg) .addReg(DefReg); - if (!select(CopyInst, CoverageInfo)) + if (!select(CopyInst)) return false; I.eraseFromParent(); @@ -1415,7 +1410,7 @@ bool X86InstructionSelector::selectCondBranch(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction"); - const unsigned CondReg = I.getOperand(0).getReg(); + const Register CondReg = I.getOperand(0).getReg(); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); MachineInstr &TestInst = @@ -1442,7 +1437,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, if (CM != CodeModel::Small && CM != CodeModel::Large) return false; - const unsigned DstReg = I.getOperand(0).getReg(); + const Register DstReg = I.getOperand(0).getReg(); const LLT DstTy = MRI.getType(DstReg); const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); unsigned Align = DstTy.getSizeInBits(); @@ -1460,7 +1455,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, // Under X86-64 non-small code model, GV (and friends) are 64-bits, so // they cannot be folded into immediate fields. - unsigned AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass); + Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass); BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg) .addConstantPoolIndex(CPI, 0, OpFlag); @@ -1503,7 +1498,7 @@ bool X86InstructionSelector::selectImplicitDefOrPHI( I.getOpcode() == TargetOpcode::G_PHI) && "unexpected instruction"); - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); if (!MRI.getRegClassOrNull(DstReg)) { const LLT DstTy = MRI.getType(DstReg); @@ -1537,7 +1532,7 @@ bool X86InstructionSelector::selectShift(MachineInstr &I, I.getOpcode() == TargetOpcode::G_LSHR) && "unexpected instruction"); - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); const LLT DstTy = MRI.getType(DstReg); const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); @@ -1578,8 +1573,8 @@ bool X86InstructionSelector::selectShift(MachineInstr &I, return false; } - unsigned Op0Reg = I.getOperand(1).getReg(); - unsigned Op1Reg = I.getOperand(2).getReg(); + Register Op0Reg = I.getOperand(1).getReg(); + Register Op1Reg = I.getOperand(2).getReg(); assert(MRI.getType(Op1Reg).getSizeInBits() == 8); @@ -1606,9 +1601,9 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, I.getOpcode() == TargetOpcode::G_UREM) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned Op1Reg = I.getOperand(1).getReg(); - const unsigned Op2Reg = I.getOperand(2).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register Op1Reg = I.getOperand(1).getReg(); + const Register Op2Reg = I.getOperand(2).getReg(); const LLT RegTy = MRI.getType(DstReg); assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) && @@ -1732,7 +1727,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpSignExtend)); else { - unsigned Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass); + Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0), Zero32); @@ -1770,8 +1765,8 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, if ((I.getOpcode() == Instruction::SRem || I.getOpcode() == Instruction::URem) && OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) { - unsigned SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); - unsigned ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); + Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); + Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg) .addReg(X86::AX); diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 40141d894629..1d7adbaa9e99 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -23,7 +23,7 @@ enum IntrinsicType : uint16_t { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, INTR_TYPE_3OP_IMM8, - CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, + CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, BEXTRI, CVTPD2PS_MASK, INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE, INTR_TYPE_1OP_MASK_SAE, INTR_TYPE_2OP_MASK_SAE, INTR_TYPE_3OP_MASK_SAE, @@ -1101,8 +1101,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(subborrow_32, ADX, X86ISD::SBB, X86ISD::SUB), X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB), - X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), - X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), + X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTR, 0), + X86_INTRINSIC_DATA(tbm_bextri_u64, BEXTRI, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index 00fb1b573858..04121f863c89 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -13,6 +13,7 @@ #include "X86LegalizerInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -84,6 +85,24 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, verify(*STI.getInstrInfo()); } +bool X86LegalizerInfo::legalizeIntrinsic(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memset: + case Intrinsic::memmove: + if (createMemLibcall(MIRBuilder, MRI, MI) == + LegalizerHelper::UnableToLegalize) + return false; + MI.eraseFromParent(); + return true; + default: + break; + } + return true; +} + void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0)); @@ -158,6 +177,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({G_ANYEXT, Ty}, Legal); } setAction({G_ANYEXT, s128}, Legal); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); // Comparison setAction({G_ICMP, s1}, Legal); diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h index d21707b9ab9b..7a0f13fb5ae6 100644 --- a/lib/Target/X86/X86LegalizerInfo.h +++ b/lib/Target/X86/X86LegalizerInfo.h @@ -32,6 +32,9 @@ private: public: X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM); + bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; + private: void setLegalizerInfo32bit(); void setLegalizerInfo64bit(); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index b1fefaa84be4..78098fd6262f 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -427,6 +427,41 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, } } +// Replace TAILJMP opcodes with their equivalent opcodes that have encoding +// information. +static unsigned convertTailJumpOpcode(unsigned Opcode) { + switch (Opcode) { + case X86::TAILJMPr: + Opcode = X86::JMP32r; + break; + case X86::TAILJMPm: + Opcode = X86::JMP32m; + break; + case X86::TAILJMPr64: + Opcode = X86::JMP64r; + break; + case X86::TAILJMPm64: + Opcode = X86::JMP64m; + break; + case X86::TAILJMPr64_REX: + Opcode = X86::JMP64r_REX; + break; + case X86::TAILJMPm64_REX: + Opcode = X86::JMP64m_REX; + break; + case X86::TAILJMPd: + case X86::TAILJMPd64: + Opcode = X86::JMP_1; + break; + case X86::TAILJMPd_CC: + case X86::TAILJMPd64_CC: + Opcode = X86::JCC_1; + break; + } + + return Opcode; +} + void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); @@ -500,21 +535,190 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; } - // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register - // inputs modeled as normal uses instead of implicit uses. As such, truncate - // off all but the first operand (the callee). FIXME: Change isel. - case X86::TAILJMPr64: - case X86::TAILJMPr64_REX: - case X86::CALL64r: - case X86::CALL64pcrel32: { - unsigned Opcode = OutMI.getOpcode(); - MCOperand Saved = OutMI.getOperand(0); - OutMI = MCInst(); - OutMI.setOpcode(Opcode); - OutMI.addOperand(Saved); + case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rmik: + case X86::VPCMPBZ128rri: case X86::VPCMPBZ128rrik: + case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rmik: + case X86::VPCMPBZ256rri: case X86::VPCMPBZ256rrik: + case X86::VPCMPBZrmi: case X86::VPCMPBZrmik: + case X86::VPCMPBZrri: case X86::VPCMPBZrrik: + case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rmik: + case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk: + case X86::VPCMPDZ128rri: case X86::VPCMPDZ128rrik: + case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rmik: + case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk: + case X86::VPCMPDZ256rri: case X86::VPCMPDZ256rrik: + case X86::VPCMPDZrmi: case X86::VPCMPDZrmik: + case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk: + case X86::VPCMPDZrri: case X86::VPCMPDZrrik: + case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rmik: + case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk: + case X86::VPCMPQZ128rri: case X86::VPCMPQZ128rrik: + case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rmik: + case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk: + case X86::VPCMPQZ256rri: case X86::VPCMPQZ256rrik: + case X86::VPCMPQZrmi: case X86::VPCMPQZrmik: + case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk: + case X86::VPCMPQZrri: case X86::VPCMPQZrrik: + case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rmik: + case X86::VPCMPWZ128rri: case X86::VPCMPWZ128rrik: + case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rmik: + case X86::VPCMPWZ256rri: case X86::VPCMPWZ256rrik: + case X86::VPCMPWZrmi: case X86::VPCMPWZrmik: + case X86::VPCMPWZrri: case X86::VPCMPWZrrik: { + // Turn immediate 0 into the VPCMPEQ instruction. + if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break; + case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break; + case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break; + case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPEQBZ128rrk; break; + case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPEQBZ256rm; break; + case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPEQBZ256rmk; break; + case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPEQBZ256rr; break; + case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPEQBZ256rrk; break; + case X86::VPCMPBZrmi: NewOpc = X86::VPCMPEQBZrm; break; + case X86::VPCMPBZrmik: NewOpc = X86::VPCMPEQBZrmk; break; + case X86::VPCMPBZrri: NewOpc = X86::VPCMPEQBZrr; break; + case X86::VPCMPBZrrik: NewOpc = X86::VPCMPEQBZrrk; break; + case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPEQDZ128rm; break; + case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPEQDZ128rmb; break; + case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break; + case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPEQDZ128rmk; break; + case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPEQDZ128rr; break; + case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPEQDZ128rrk; break; + case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPEQDZ256rm; break; + case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPEQDZ256rmb; break; + case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break; + case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPEQDZ256rmk; break; + case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPEQDZ256rr; break; + case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPEQDZ256rrk; break; + case X86::VPCMPDZrmi: NewOpc = X86::VPCMPEQDZrm; break; + case X86::VPCMPDZrmib: NewOpc = X86::VPCMPEQDZrmb; break; + case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPEQDZrmbk; break; + case X86::VPCMPDZrmik: NewOpc = X86::VPCMPEQDZrmk; break; + case X86::VPCMPDZrri: NewOpc = X86::VPCMPEQDZrr; break; + case X86::VPCMPDZrrik: NewOpc = X86::VPCMPEQDZrrk; break; + case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPEQQZ128rm; break; + case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPEQQZ128rmb; break; + case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break; + case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPEQQZ128rmk; break; + case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPEQQZ128rr; break; + case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPEQQZ128rrk; break; + case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPEQQZ256rm; break; + case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPEQQZ256rmb; break; + case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break; + case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPEQQZ256rmk; break; + case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPEQQZ256rr; break; + case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPEQQZ256rrk; break; + case X86::VPCMPQZrmi: NewOpc = X86::VPCMPEQQZrm; break; + case X86::VPCMPQZrmib: NewOpc = X86::VPCMPEQQZrmb; break; + case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPEQQZrmbk; break; + case X86::VPCMPQZrmik: NewOpc = X86::VPCMPEQQZrmk; break; + case X86::VPCMPQZrri: NewOpc = X86::VPCMPEQQZrr; break; + case X86::VPCMPQZrrik: NewOpc = X86::VPCMPEQQZrrk; break; + case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPEQWZ128rm; break; + case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPEQWZ128rmk; break; + case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPEQWZ128rr; break; + case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPEQWZ128rrk; break; + case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPEQWZ256rm; break; + case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPEQWZ256rmk; break; + case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPEQWZ256rr; break; + case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPEQWZ256rrk; break; + case X86::VPCMPWZrmi: NewOpc = X86::VPCMPEQWZrm; break; + case X86::VPCMPWZrmik: NewOpc = X86::VPCMPEQWZrmk; break; + case X86::VPCMPWZrri: NewOpc = X86::VPCMPEQWZrr; break; + case X86::VPCMPWZrrik: NewOpc = X86::VPCMPEQWZrrk; break; + } + + OutMI.setOpcode(NewOpc); + OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1)); + break; + } + + // Turn immediate 6 into the VPCMPGT instruction. + if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break; + case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break; + case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break; + case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPGTBZ128rrk; break; + case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPGTBZ256rm; break; + case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPGTBZ256rmk; break; + case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPGTBZ256rr; break; + case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPGTBZ256rrk; break; + case X86::VPCMPBZrmi: NewOpc = X86::VPCMPGTBZrm; break; + case X86::VPCMPBZrmik: NewOpc = X86::VPCMPGTBZrmk; break; + case X86::VPCMPBZrri: NewOpc = X86::VPCMPGTBZrr; break; + case X86::VPCMPBZrrik: NewOpc = X86::VPCMPGTBZrrk; break; + case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPGTDZ128rm; break; + case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPGTDZ128rmb; break; + case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break; + case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPGTDZ128rmk; break; + case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPGTDZ128rr; break; + case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPGTDZ128rrk; break; + case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPGTDZ256rm; break; + case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPGTDZ256rmb; break; + case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break; + case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPGTDZ256rmk; break; + case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPGTDZ256rr; break; + case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPGTDZ256rrk; break; + case X86::VPCMPDZrmi: NewOpc = X86::VPCMPGTDZrm; break; + case X86::VPCMPDZrmib: NewOpc = X86::VPCMPGTDZrmb; break; + case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPGTDZrmbk; break; + case X86::VPCMPDZrmik: NewOpc = X86::VPCMPGTDZrmk; break; + case X86::VPCMPDZrri: NewOpc = X86::VPCMPGTDZrr; break; + case X86::VPCMPDZrrik: NewOpc = X86::VPCMPGTDZrrk; break; + case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPGTQZ128rm; break; + case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPGTQZ128rmb; break; + case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break; + case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPGTQZ128rmk; break; + case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPGTQZ128rr; break; + case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPGTQZ128rrk; break; + case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPGTQZ256rm; break; + case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPGTQZ256rmb; break; + case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break; + case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPGTQZ256rmk; break; + case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPGTQZ256rr; break; + case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPGTQZ256rrk; break; + case X86::VPCMPQZrmi: NewOpc = X86::VPCMPGTQZrm; break; + case X86::VPCMPQZrmib: NewOpc = X86::VPCMPGTQZrmb; break; + case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPGTQZrmbk; break; + case X86::VPCMPQZrmik: NewOpc = X86::VPCMPGTQZrmk; break; + case X86::VPCMPQZrri: NewOpc = X86::VPCMPGTQZrr; break; + case X86::VPCMPQZrrik: NewOpc = X86::VPCMPGTQZrrk; break; + case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPGTWZ128rm; break; + case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPGTWZ128rmk; break; + case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPGTWZ128rr; break; + case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPGTWZ128rrk; break; + case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPGTWZ256rm; break; + case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPGTWZ256rmk; break; + case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPGTWZ256rr; break; + case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPGTWZ256rrk; break; + case X86::VPCMPWZrmi: NewOpc = X86::VPCMPGTWZrm; break; + case X86::VPCMPWZrmik: NewOpc = X86::VPCMPGTWZrmk; break; + case X86::VPCMPWZrri: NewOpc = X86::VPCMPGTWZrr; break; + case X86::VPCMPWZrrik: NewOpc = X86::VPCMPGTWZrrk; break; + } + + OutMI.setOpcode(NewOpc); + OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1)); + break; + } + break; } + // CALL64r, CALL64pcrel32 - These instructions used to have + // register inputs modeled as normal uses instead of implicit uses. As such, + // they we used to truncate off all but the first operand (the callee). This + // issue seems to have been fixed at some point. This assert verifies that. + case X86::CALL64r: + case X86::CALL64pcrel32: + assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); + break; + case X86::EH_RETURN: case X86::EH_RETURN64: { OutMI = MCInst(); @@ -539,36 +743,30 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; } - // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump - // instruction. - { - unsigned Opcode; - case X86::TAILJMPr: - Opcode = X86::JMP32r; - goto SetTailJmpOpcode; - case X86::TAILJMPd: - case X86::TAILJMPd64: - Opcode = X86::JMP_1; - goto SetTailJmpOpcode; - - SetTailJmpOpcode: - MCOperand Saved = OutMI.getOperand(0); - OutMI = MCInst(); - OutMI.setOpcode(Opcode); - OutMI.addOperand(Saved); - break; - } + // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump + // instruction. + case X86::TAILJMPr: + case X86::TAILJMPr64: + case X86::TAILJMPr64_REX: + case X86::TAILJMPd: + case X86::TAILJMPd64: + assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); + OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); + break; case X86::TAILJMPd_CC: - case X86::TAILJMPd64_CC: { - MCOperand Saved = OutMI.getOperand(0); - MCOperand Saved2 = OutMI.getOperand(1); - OutMI = MCInst(); - OutMI.setOpcode(X86::JCC_1); - OutMI.addOperand(Saved); - OutMI.addOperand(Saved2); + case X86::TAILJMPd64_CC: + assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!"); + OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); + break; + + case X86::TAILJMPm: + case X86::TAILJMPm64: + case X86::TAILJMPm64_REX: + assert(OutMI.getNumOperands() == X86::AddrNumOperands && + "Unexpected number of operands!"); + OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); break; - } case X86::DEC16r: case X86::DEC32r: @@ -958,7 +1156,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, // FAULTING_LOAD_OP , , , // , - unsigned DefRegister = FaultingMI.getOperand(0).getReg(); + Register DefRegister = FaultingMI.getOperand(0).getReg(); FaultMaps::FaultKind FK = static_cast(FaultingMI.getOperand(1).getImm()); MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); @@ -1079,7 +1277,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, // Emit MOV to materialize the target address and the CALL to target. // This is encoded with 12-13 bytes, depending on which register is used. - unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg(); + Register ScratchReg = MI.getOperand(ScratchIdx).getReg(); if (X86II::isX86_64ExtendedReg(ScratchReg)) EncodedBytes = 13; else @@ -1369,6 +1567,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, recordSled(CurSled, MI, SledKind::TAIL_CALL); unsigned OpCode = MI.getOperand(0).getImm(); + OpCode = convertTailJumpOpcode(OpCode); MCInst TC; TC.setOpcode(OpCode); @@ -1538,8 +1737,6 @@ static void printConstant(const Constant *COp, raw_ostream &CS) { void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only"); - const X86RegisterInfo *RI = - MF->getSubtarget().getRegisterInfo(); // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. if (EmitFPOData) { @@ -1577,17 +1774,16 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { // Otherwise, use the .seh_ directives for all other Windows platforms. switch (MI->getOpcode()) { case X86::SEH_PushReg: - OutStreamer->EmitWinCFIPushReg( - RI->getSEHRegNum(MI->getOperand(0).getImm())); + OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm()); break; case X86::SEH_SaveReg: - OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()), + OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; case X86::SEH_SaveXMM: - OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()), + OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; @@ -1596,9 +1792,8 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { break; case X86::SEH_SetFrame: - OutStreamer->EmitWinCFISetFrame( - RI->getSEHRegNum(MI->getOperand(0).getImm()), - MI->getOperand(1).getImm()); + OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(), + MI->getOperand(1).getImm()); break; case X86::SEH_PushFrame: @@ -1650,7 +1845,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::EH_RETURN: case X86::EH_RETURN64: { // Lower these as normal, but add some comments. - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); OutStreamer->AddComment(StringRef("eh_return, addr: %") + X86ATTInstPrinter::getRegisterName(Reg)); break; @@ -1697,11 +1892,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::MASKPAIR16LOAD: { int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm(); assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); - const X86RegisterInfo *RI = - MF->getSubtarget().getRegisterInfo(); - unsigned Reg = MI->getOperand(0).getReg(); - unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); - unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); + Register Reg = MI->getOperand(0).getReg(); + Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); + Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); // Load the first mask register MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm); @@ -1730,11 +1923,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::MASKPAIR16STORE: { int64_t Disp = MI->getOperand(X86::AddrDisp).getImm(); assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); - const X86RegisterInfo *RI = - MF->getSubtarget().getRegisterInfo(); - unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg(); - unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); - unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); + Register Reg = MI->getOperand(X86::AddrNumOperands).getReg(); + Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); + Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); // Store the first mask register MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk); diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index d7e535598d81..5cb80a082b56 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -36,6 +36,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// is stashed. signed char RestoreBasePointerOffset = 0; + /// WinEHXMMSlotInfo - Slot information of XMM registers in the stack frame + /// in bytes. + DenseMap WinEHXMMSlotInfo; + /// CalleeSavedFrameSize - Size of the callee-saved register portion of the /// stack frame in bytes. unsigned CalleeSavedFrameSize = 0; @@ -120,6 +124,10 @@ public: void setRestoreBasePointer(const MachineFunction *MF); int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; } + DenseMap& getWinEHXMMSlotInfo() { return WinEHXMMSlotInfo; } + const DenseMap& getWinEHXMMSlotInfo() const { + return WinEHXMMSlotInfo; } + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp index 7f75598b0655..1aee01563c4b 100644 --- a/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -198,8 +198,7 @@ static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) { static inline bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2) { return MO1.isIdenticalTo(MO2) && - (!MO1.isReg() || - !TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + (!MO1.isReg() || !Register::isPhysicalRegister(MO1.getReg())); } #ifndef NDEBUG @@ -235,9 +234,9 @@ static inline bool isLEA(const MachineInstr &MI) { namespace { -class OptimizeLEAPass : public MachineFunctionPass { +class X86OptimizeLEAPass : public MachineFunctionPass { public: - OptimizeLEAPass() : MachineFunctionPass(ID) {} + X86OptimizeLEAPass() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "X86 LEA Optimize"; } @@ -246,6 +245,8 @@ public: /// been calculated by LEA. Also, remove redundant LEAs. bool runOnMachineFunction(MachineFunction &MF) override; + static char ID; + private: using MemOpMap = DenseMap>; @@ -296,18 +297,18 @@ private: MachineRegisterInfo *MRI; const X86InstrInfo *TII; const X86RegisterInfo *TRI; - - static char ID; }; } // end anonymous namespace -char OptimizeLEAPass::ID = 0; +char X86OptimizeLEAPass::ID = 0; -FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } +FunctionPass *llvm::createX86OptimizeLEAs() { return new X86OptimizeLEAPass(); } +INITIALIZE_PASS(X86OptimizeLEAPass, DEBUG_TYPE, "X86 optimize LEA pass", false, + false) -int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, - const MachineInstr &Last) { +int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First, + const MachineInstr &Last) { // Both instructions must be in the same basic block and they must be // presented in InstrPos. assert(Last.getParent() == First.getParent() && @@ -328,10 +329,9 @@ int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, // 3) Displacement of the new memory operand should fit in 1 byte if possible. // 4) The LEA should be as close to MI as possible, and prior to it if // possible. -bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl &List, - const MachineInstr &MI, - MachineInstr *&BestLEA, - int64_t &AddrDispShift, int &Dist) { +bool X86OptimizeLEAPass::chooseBestLEA( + const SmallVectorImpl &List, const MachineInstr &MI, + MachineInstr *&BestLEA, int64_t &AddrDispShift, int &Dist) { const MachineFunction *MF = MI.getParent()->getParent(); const MCInstrDesc &Desc = MI.getDesc(); int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags) + @@ -387,9 +387,10 @@ bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl &List, // Get the difference between the addresses' displacements of the two // instructions \p MI1 and \p MI2. The numbers of the first memory operands are // passed through \p N1 and \p N2. -int64_t OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, unsigned N1, - const MachineInstr &MI2, - unsigned N2) const { +int64_t X86OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, + unsigned N1, + const MachineInstr &MI2, + unsigned N2) const { const MachineOperand &Op1 = MI1.getOperand(N1 + X86::AddrDisp); const MachineOperand &Op2 = MI2.getOperand(N2 + X86::AddrDisp); @@ -411,9 +412,9 @@ int64_t OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, unsigned N1, // 2) Def registers of LEAs belong to the same class. // 3) All uses of the Last LEA def register are replaceable, thus the // register is used only as address base. -bool OptimizeLEAPass::isReplaceable(const MachineInstr &First, - const MachineInstr &Last, - int64_t &AddrDispShift) const { +bool X86OptimizeLEAPass::isReplaceable(const MachineInstr &First, + const MachineInstr &Last, + int64_t &AddrDispShift) const { assert(isLEA(First) && isLEA(Last) && "The function works only with LEA instructions"); @@ -467,7 +468,8 @@ bool OptimizeLEAPass::isReplaceable(const MachineInstr &First, return true; } -void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) { +void X86OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, + MemOpMap &LEAs) { unsigned Pos = 0; for (auto &MI : MBB) { // Assign the position number to the instruction. Note that we are going to @@ -485,7 +487,7 @@ void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) { // Try to find load and store instructions which recalculate addresses already // calculated by some LEA and replace their memory operands with its def // register. -bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { +bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { bool Changed = false; assert(!LEAs.empty()); @@ -564,9 +566,9 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { return Changed; } -MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, - unsigned VReg, - int64_t AddrDispShift) { +MachineInstr *X86OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, + unsigned VReg, + int64_t AddrDispShift) { DIExpression *Expr = const_cast(MI.getDebugExpression()); if (AddrDispShift != 0) Expr = DIExpression::prepend(Expr, DIExpression::StackValue, AddrDispShift); @@ -583,7 +585,7 @@ MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, } // Try to find similar LEAs in the list and replace one with another. -bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { +bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { bool Changed = false; // Loop over all entries in the table. @@ -613,8 +615,8 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { // Loop over all uses of the Last LEA and update their operands. Note // that the correctness of this has already been checked in the // isReplaceable function. - unsigned FirstVReg = First.getOperand(0).getReg(); - unsigned LastVReg = Last.getOperand(0).getReg(); + Register FirstVReg = First.getOperand(0).getReg(); + Register LastVReg = Last.getOperand(0).getReg(); for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end(); UI != UE;) { MachineOperand &MO = *UI++; @@ -670,7 +672,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { return Changed; } -bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { +bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; if (DisableX86LEAOpt || skipFunction(MF.getFunction())) diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp index 78fede3dcde2..daddf4231897 100644 --- a/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/lib/Target/X86/X86RegisterBankInfo.cpp @@ -46,7 +46,9 @@ const RegisterBank &X86RegisterBankInfo::getRegBankFromRegClass( if (X86::GR8RegClass.hasSubClassEq(&RC) || X86::GR16RegClass.hasSubClassEq(&RC) || X86::GR32RegClass.hasSubClassEq(&RC) || - X86::GR64RegClass.hasSubClassEq(&RC)) + X86::GR64RegClass.hasSubClassEq(&RC) || + X86::LOW32_ADDR_ACCESSRegClass.hasSubClassEq(&RC) || + X86::LOW32_ADDR_ACCESS_RBPRegClass.hasSubClassEq(&RC)) return getRegBank(X86::GPRRegBankID); if (X86::FR32XRegClass.hasSubClassEq(&RC) || diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 2e2f1f9e438a..ff625325b4c9 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -544,7 +544,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { "Stack realignment in presence of dynamic allocas is not supported with" "this calling convention."); - unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); + Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); @@ -677,13 +677,13 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { MI.getOperand(4).getImm() != 0 || MI.getOperand(5).getReg() != X86::NoRegister) return false; - unsigned BasePtr = MI.getOperand(1).getReg(); + Register BasePtr = MI.getOperand(1).getReg(); // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will // be replaced with a 32-bit operand MOV which will zero extend the upper // 32-bits of the super register. if (Opc == X86::LEA64_32r) BasePtr = getX86SubSuperRegister(BasePtr, 32); - unsigned NewDestReg = MI.getOperand(0).getReg(); + Register NewDestReg = MI.getOperand(0).getReg(); const X86InstrInfo *TII = MI.getParent()->getParent()->getSubtarget().getInstrInfo(); TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, @@ -692,12 +692,27 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { return true; } +static bool isFuncletReturnInstr(MachineInstr &MI) { + switch (MI.getOpcode()) { + case X86::CATCHRET: + case X86::CLEANUPRET: + return true; + default: + return false; + } + llvm_unreachable("impossible"); +} + void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; - MachineFunction &MF = *MI.getParent()->getParent(); + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false + : isFuncletReturnInstr(*MBBI); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); @@ -709,6 +724,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && "Return instruction can only reference SP relative frame objects"); FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); + } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { + FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr); } else { FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); } @@ -729,7 +746,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // register as source operand, semantic is the same and destination is // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. // Don't change BasePtr since it is used later for stack adjustment. - unsigned MachineBasePtr = BasePtr; + Register MachineBasePtr = BasePtr; if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) MachineBasePtr = getX86SubSuperRegister(BasePtr, 64); @@ -773,7 +790,7 @@ Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { const X86Subtarget &Subtarget = MF.getSubtarget(); - unsigned FrameReg = getFrameRegister(MF); + Register FrameReg = getFrameRegister(MF); if (Subtarget.isTarget64BitILP32()) FrameReg = getX86SubSuperRegister(FrameReg, 32); return FrameReg; @@ -782,7 +799,7 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { unsigned X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { const X86Subtarget &Subtarget = MF.getSubtarget(); - unsigned StackReg = getStackRegister(); + Register StackReg = getStackRegister(); if (Subtarget.isTarget64BitILP32()) StackReg = getX86SubSuperRegister(StackReg, 32); return StackReg; diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp index b435b22e8ac7..f8464c7e8298 100644 --- a/lib/Target/X86/X86RetpolineThunks.cpp +++ b/lib/Target/X86/X86RetpolineThunks.cpp @@ -58,8 +58,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } private: @@ -97,7 +97,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { TII = STI->getInstrInfo(); Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64; - MMI = &getAnalysis(); + MMI = &getAnalysis().getMMI(); Module &M = const_cast(*MMI->getModule()); // If this function is not a thunk, check to see if we need to insert @@ -279,7 +279,7 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF, CallTarget->addLiveIn(Reg); CallTarget->setHasAddressTaken(); - CallTarget->setAlignment(4); + CallTarget->setAlignment(Align(16)); insertRegReturnAddrClobber(*CallTarget, Reg); CallTarget->back().setPreInstrSymbol(MF, TargetSym); BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td index 7574e4b8f896..9b1fcaa8a13d 100755 --- a/lib/Target/X86/X86SchedBroadwell.td +++ b/lib/Target/X86/X86SchedBroadwell.td @@ -232,8 +232,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 284d1567c5c6..06f417501b21 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -231,8 +231,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SchedPredicates.td b/lib/Target/X86/X86SchedPredicates.td index 41bd776648f7..76001d382a27 100644 --- a/lib/Target/X86/X86SchedPredicates.td +++ b/lib/Target/X86/X86SchedPredicates.td @@ -84,3 +84,60 @@ def IsSETAm_Or_SETBEm : CheckAny<[ CheckImmOperand_s<5, "X86::COND_A">, CheckImmOperand_s<5, "X86::COND_BE"> ]>; + +// A predicate used to check if an instruction has a LOCK prefix. +def CheckLockPrefix : CheckFunctionPredicate< + "X86_MC::hasLockPrefix", + "X86InstrInfo::hasLockPrefix" +>; + +def IsRegRegCompareAndSwap_8 : CheckOpcode<[ CMPXCHG8rr ]>; + +def IsRegMemCompareAndSwap_8 : CheckOpcode<[ + LCMPXCHG8, CMPXCHG8rm +]>; + +def IsRegRegCompareAndSwap_16_32_64 : CheckOpcode<[ + CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr +]>; + +def IsRegMemCompareAndSwap_16_32_64 : CheckOpcode<[ + CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm, + LCMPXCHG16, LCMPXCHG32, LCMPXCHG64, + LCMPXCHG8B, LCMPXCHG16B +]>; + +def IsCompareAndSwap8B : CheckOpcode<[ CMPXCHG8B, LCMPXCHG8B ]>; +def IsCompareAndSwap16B : CheckOpcode<[ CMPXCHG16B, LCMPXCHG16B ]>; + +def IsRegMemCompareAndSwap : CheckOpcode< + !listconcat( + IsRegMemCompareAndSwap_8.ValidOpcodes, + IsRegMemCompareAndSwap_16_32_64.ValidOpcodes + )>; + +def IsRegRegCompareAndSwap : CheckOpcode< + !listconcat( + IsRegRegCompareAndSwap_8.ValidOpcodes, + IsRegRegCompareAndSwap_16_32_64.ValidOpcodes + )>; + +def IsAtomicCompareAndSwap_8 : CheckAll<[ + CheckLockPrefix, + IsRegMemCompareAndSwap_8 +]>; + +def IsAtomicCompareAndSwap : CheckAll<[ + CheckLockPrefix, + IsRegMemCompareAndSwap +]>; + +def IsAtomicCompareAndSwap8B : CheckAll<[ + CheckLockPrefix, + IsCompareAndSwap8B +]>; + +def IsAtomicCompareAndSwap16B : CheckAll<[ + CheckLockPrefix, + IsCompareAndSwap16B +]>; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index d40bdf728a48..26d4d8fa3549 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -208,8 +208,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td index 8f3e4ae62d53..9a511ecc0071 100644 --- a/lib/Target/X86/X86SchedSkylakeClient.td +++ b/lib/Target/X86/X86SchedSkylakeClient.td @@ -226,8 +226,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td index 58caf1dacfcb..a8c65435ab9b 100755 --- a/lib/Target/X86/X86SchedSkylakeServer.td +++ b/lib/Target/X86/X86SchedSkylakeServer.td @@ -226,8 +226,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 55ca85ec1e3d..95f710061aeb 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -102,6 +102,12 @@ class X86SchedWriteMoveLS { + SchedWrite RM = LoadRM; + SchedWrite MR = StoreMR; +} + // Multiclass that wraps X86SchedWriteMoveLS for each vector width. class X86SchedWriteMoveLSWidths; +// Conditional SIMD Packed Loads and Stores wrappers. +def WriteFMaskMove32 + : X86SchedWriteMaskMove; +def WriteFMaskMove64 + : X86SchedWriteMaskMove; +def WriteFMaskMove32Y + : X86SchedWriteMaskMove; +def WriteFMaskMove64Y + : X86SchedWriteMaskMove; + // Vector width wrappers. def SchedWriteFAdd : X86SchedWriteWidths; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index b0334655de7e..78acb1065ec8 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -216,8 +216,10 @@ defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; diff --git a/lib/Target/X86/X86ScheduleBdVer2.td b/lib/Target/X86/X86ScheduleBdVer2.td index 8cc01c3acece..d7aea3cf4e9d 100644 --- a/lib/Target/X86/X86ScheduleBdVer2.td +++ b/lib/Target/X86/X86ScheduleBdVer2.td @@ -726,8 +726,10 @@ defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; -defm : PdWriteRes; -defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 2d26232b4132..d0421d94ee05 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -180,9 +180,11 @@ multiclass JWriteResYMMPair; +// A folded store needs a cycle on the SAGU for the store data, most RMW +// instructions don't need an extra uop. ALU RMW operations don't seem to +// benefit from STLF, and their observed latency is 6cy. That is the reason why +// this write adds two extra cycles (instead of just 1cy for the store). +defm : X86WriteRes; //////////////////////////////////////////////////////////////////////////////// // Arithmetic. @@ -191,22 +193,22 @@ defm : X86WriteRes; defm : JWriteResIntPair; defm : JWriteResIntPair; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; - -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; defm : X86WriteRes; defm : JWriteResIntPair; @@ -305,6 +307,192 @@ def : WriteRes; // to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. def : WriteRes { let Latency = 1; } +def JWriteCMPXCHG8rr : SchedWriteRes<[JALU01]> { + let Latency = 3; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +def JWriteLOCK_CMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 16; + let ResourceCycles = [3,16,16]; + let NumMicroOps = 5; +} + +def JWriteLOCK_CMPXCHGrm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 17; + let ResourceCycles = [3,17,17]; + let NumMicroOps = 6; +} + +def JWriteCMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [3,1,1]; + let NumMicroOps = 5; +} + +def JWriteCMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [3,1,1]; + let NumMicroOps = 18; +} + +def JWriteCMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 32; + let ResourceCycles = [6,1,1]; + let NumMicroOps = 28; +} + +def JWriteLOCK_CMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 19; + let ResourceCycles = [3,19,19]; + let NumMicroOps = 18; +} + +def JWriteLOCK_CMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 38; + let ResourceCycles = [6,38,38]; + let NumMicroOps = 28; +} + +def JWriteCMPXCHGVariant : SchedWriteVariant<[ + SchedVar, [JWriteLOCK_CMPXCHG8B]>, + SchedVar, [JWriteLOCK_CMPXCHG16B]>, + SchedVar, [JWriteLOCK_CMPXCHG8rm]>, + SchedVar, [JWriteLOCK_CMPXCHGrm]>, + SchedVar, [JWriteCMPXCHG8B]>, + SchedVar, [JWriteCMPXCHG16B]>, + SchedVar, [JWriteCMPXCHG8rm]>, + SchedVar, [WriteCMPXCHGRMW]>, + SchedVar, [JWriteCMPXCHG8rr]>, + SchedVar +]>; + +// The first five reads are contributed by the memory load operand. +// We ignore those reads and set a read-advance for the other input operands +// including the implicit read of RAX. +def : InstRW<[JWriteCMPXCHGVariant, + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadAfterLd, ReadAfterLd], (instrs LCMPXCHG8, LCMPXCHG16, + LCMPXCHG32, LCMPXCHG64, + CMPXCHG8rm, CMPXCHG16rm, + CMPXCHG32rm, CMPXCHG64rm)>; + +def : InstRW<[JWriteCMPXCHGVariant], (instrs CMPXCHG8rr, CMPXCHG16rr, + CMPXCHG32rr, CMPXCHG64rr)>; + +def : InstRW<[JWriteCMPXCHGVariant, + // Ignore reads contributed by the memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Add a read-advance to every implicit register read. + ReadAfterLd, ReadAfterLd, ReadAfterLd, ReadAfterLd], (instrs LCMPXCHG8B, LCMPXCHG16B, + CMPXCHG8B, CMPXCHG16B)>; + +def JWriteLOCK_ALURMW : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 19; + let ResourceCycles = [1,19,19]; + let NumMicroOps = 1; +} + +def JWriteLOCK_ALURMWVariant : SchedWriteVariant<[ + SchedVar, [JWriteLOCK_ALURMW]>, + SchedVar +]>; +def : InstRW<[JWriteLOCK_ALURMWVariant], (instrs INC8m, INC16m, INC32m, INC64m, + DEC8m, DEC16m, DEC32m, DEC64m, + NOT8m, NOT16m, NOT32m, NOT64m, + NEG8m, NEG16m, NEG32m, NEG64m)>; + +def JWriteXCHG8rr_XADDrr : SchedWriteRes<[JALU01]> { + let Latency = 2; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} +def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr, + XADD32rr, XADD64rr)>; + +// This write defines the latency of the in/out register operand of a non-atomic +// XADDrm. This is the first of a pair of writes that model non-atomic +// XADDrm instructions (the second write definition is JWriteXADDrm_LdSt_Part). +// +// We need two writes because the instruction latency differs from the output +// register operand latency. In particular, the first write describes the first +// (and only) output register operand of the instruction. However, the +// instruction latency is set to the MAX of all the write latencies. That's why +// a second write is needed in this case (see example below). +// +// Example: +// XADD %ecx, (%rsp) ## Instruction latency: 11cy +// ## ECX write Latency: 3cy +// +// Register ECX becomes available in 3 cycles. That is because the value of ECX +// is exchanged with the value read from the stack pointer, and the load-to-use +// latency is assumed to be 3cy. +def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 3; // load-to-use latency + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +// This write defines the latency of the in/out register operand of an atomic +// XADDrm. This is the first of a sequence of two writes used to model atomic +// XADD instructions. The second write of the sequence is JWriteXCHGrm_LdSt_Part. +// +// +// Example: +// LOCK XADD %ecx, (%rsp) ## Instruction Latency: 16cy +// ## ECX write Latency: 11cy +// +// The value of ECX becomes available only after 11cy from the start of +// execution. This write is used to specifically set that operand latency. +def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 11; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +// This write defines the latency of the in/out register operand of an atomic +// XCHGrm. This write is the first of a sequence of two writes that describe +// atomic XCHG operations. We need two writes because the instruction latency +// differs from the output register write latency. We want to make sure that +// the output register operand becomes visible after 11cy. However, we want to +// set the instruction latency to 16cy. +def JWriteXCHGrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 11; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def JWriteXADDrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [1, 1]; + let NumMicroOps = 1; +} + +def JWriteXCHGrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> { + let Latency = 16; + let ResourceCycles = [16, 16]; + let NumMicroOps = 1; +} + +def JWriteXADDrm_Part1 : SchedWriteVariant<[ + SchedVar, [JWriteLOCK_XADDrm_XCHG_Part]>, + SchedVar +]>; + +def JWriteXADDrm_Part2 : SchedWriteVariant<[ + SchedVar, [JWriteXCHGrm_LdSt_Part]>, + SchedVar +]>; + +def : InstRW<[JWriteXADDrm_Part1, JWriteXADDrm_Part2, ReadAfterLd], + (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm, + LXADD8, LXADD16, LXADD32, LXADD64)>; + +def : InstRW<[JWriteXCHGrm_XCHG_Part, JWriteXCHGrm_LdSt_Part, ReadAfterLd], + (instrs XCHG8rm, XCHG16rm, XCHG32rm, XCHG64rm)>; + + //////////////////////////////////////////////////////////////////////////////// // Floating point. This covers both scalar and vector operations. //////////////////////////////////////////////////////////////////////////////// @@ -313,19 +501,22 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -466,8 +657,8 @@ defm : X86WriteResUnsupported; //////////////////////////////////////////////////////////////////////////////// defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -475,7 +666,7 @@ defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -630,6 +821,18 @@ def JWriteJVZEROUPPER: SchedWriteRes<[]> { } def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>; +/////////////////////////////////////////////////////////////////////////////// +// SSE2/AVX Store Selected Bytes of Double Quadword - (V)MASKMOVDQ +/////////////////////////////////////////////////////////////////////////////// + +def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> { + let Latency = 34; + let ResourceCycles = [1, 1, 2, 2, 2, 16, 42]; + let NumMicroOps = 63; +} +def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, + VMASKMOVDQU, VMASKMOVDQU64)>; + /////////////////////////////////////////////////////////////////////////////// // SchedWriteVariant definitions. /////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index 34c251a5c5bb..8e3ce721f1a1 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -186,8 +186,12 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes; -def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + def : WriteRes; def : WriteRes; def : WriteRes; diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td index 65f6d89df610..06201f4a3a84 100644 --- a/lib/Target/X86/X86ScheduleZnver1.td +++ b/lib/Target/X86/X86ScheduleZnver1.td @@ -268,8 +268,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 50690953eef5..1ae8df977f83 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -36,7 +36,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( const X86RegisterInfo *TRI = static_cast( DAG.getSubtarget().getRegisterInfo()); - unsigned BaseReg = TRI->getBaseRegister(); + Register BaseReg = TRI->getBaseRegister(); for (unsigned R : ClobberSet) if (BaseReg == R) return true; diff --git a/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 40f5dbe57e4b..b8980789258e 100644 --- a/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -477,7 +477,7 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction( // Otherwise, just build the predicate state itself by zeroing a register // as we don't need any initial state. PS->InitialReg = MRI->createVirtualRegister(PS->RC); - unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass); + Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass); auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0), PredStateSubReg); ++NumInstsInserted; @@ -750,7 +750,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); - unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); // Note that we intentionally use an empty debug location so that // this picks up the preceding location. auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(), @@ -907,7 +907,7 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( MI.dump(); dbgs() << "\n"); report_fatal_error("Unable to unfold load!"); } - unsigned Reg = MRI->createVirtualRegister(UnfoldedRC); + Register Reg = MRI->createVirtualRegister(UnfoldedRC); SmallVector NewMIs; // If we were able to compute an unfolded reg class, any failure here // is just a programming error so just assert. @@ -1102,7 +1102,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( // synthetic target in the predecessor. We do this at the bottom of the // predecessor. auto InsertPt = Pred->getFirstTerminator(); - unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass); if (MF.getTarget().getCodeModel() == CodeModel::Small && !Subtarget->isPositionIndependent()) { // Directly materialize it into an immediate. @@ -1153,7 +1153,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n"); } else { // Otherwise compute the address into a register first. - unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass); auto AddrI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg) .addReg(/*Base*/ X86::RIP) @@ -1175,7 +1175,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( // Now cmov over the predicate if the comparison wasn't equal. int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); - unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) .addReg(PS->InitialReg) @@ -1878,7 +1878,7 @@ unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS( DebugLoc Loc) { // FIXME: Hard coding this to a 32-bit register class seems weird, but matches // what instruction selection does. - unsigned Reg = MRI->createVirtualRegister(&X86::GR32RegClass); + Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass); // We directly copy the FLAGS register and rely on later lowering to clean // this up into the appropriate setCC instructions. BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS); @@ -1905,7 +1905,7 @@ void X86SpeculativeLoadHardeningPass::restoreEFLAGS( void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc, unsigned PredStateReg) { - unsigned TmpReg = MRI->createVirtualRegister(PS->RC); + Register TmpReg = MRI->createVirtualRegister(PS->RC); // FIXME: This hard codes a shift distance based on the number of bits needed // to stay canonical on 64-bit. We should compute this somehow and support // 32-bit as part of that. @@ -1925,8 +1925,8 @@ void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc) { - unsigned PredStateReg = MRI->createVirtualRegister(PS->RC); - unsigned TmpReg = MRI->createVirtualRegister(PS->RC); + Register PredStateReg = MRI->createVirtualRegister(PS->RC); + Register TmpReg = MRI->createVirtualRegister(PS->RC); // We know that the stack pointer will have any preserved predicate state in // its high bit. We just want to smear this across the other bits. Turns out, @@ -2031,9 +2031,9 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr( } for (MachineOperand *Op : HardenOpRegs) { - unsigned OpReg = Op->getReg(); + Register OpReg = Op->getReg(); auto *OpRC = MRI->getRegClass(OpReg); - unsigned TmpReg = MRI->createVirtualRegister(OpRC); + Register TmpReg = MRI->createVirtualRegister(OpRC); // If this is a vector register, we'll need somewhat custom logic to handle // hardening it. @@ -2045,7 +2045,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr( // Move our state into a vector register. // FIXME: We could skip this at the cost of longer encodings with AVX-512 // but that doesn't seem likely worth it. - unsigned VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass); + Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass); auto MovI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg) .addReg(StateReg); @@ -2054,7 +2054,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr( LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n"); // Broadcast it across the vector register. - unsigned VBStateReg = MRI->createVirtualRegister(OpRC); + Register VBStateReg = MRI->createVirtualRegister(OpRC); auto BroadcastI = BuildMI(MBB, InsertPt, Loc, TII->get(Is128Bit ? X86::VPBROADCASTQrr : X86::VPBROADCASTQYrr), @@ -2084,7 +2084,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr( assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!"); // Broadcast our state into a vector register. - unsigned VStateReg = MRI->createVirtualRegister(OpRC); + Register VStateReg = MRI->createVirtualRegister(OpRC); unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128r : Is256Bit ? X86::VPBROADCASTQrZ256r : X86::VPBROADCASTQrZr; @@ -2153,7 +2153,7 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( // See if we can sink hardening the loaded value. auto SinkCheckToSingleUse = [&](MachineInstr &MI) -> Optional { - unsigned DefReg = MI.getOperand(0).getReg(); + Register DefReg = MI.getOperand(0).getReg(); // We need to find a single use which we can sink the check. We can // primarily do this because many uses may already end up checked on their @@ -2210,8 +2210,8 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( // If this register isn't a virtual register we can't walk uses of sanely, // just bail. Also check that its register class is one of the ones we // can harden. - unsigned UseDefReg = UseMI.getOperand(0).getReg(); - if (!TRI->isVirtualRegister(UseDefReg) || + Register UseDefReg = UseMI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(UseDefReg) || !canHardenRegister(UseDefReg)) return {}; @@ -2241,6 +2241,9 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) { // We don't support post-load hardening of vectors. return false; + unsigned RegIdx = Log2_32(RegBytes); + assert(RegIdx < 4 && "Unsupported register size"); + // If this register class is explicitly constrained to a class that doesn't // require REX prefix, we may not be able to satisfy that constraint when // emitting the hardening instructions, so bail out here. @@ -2251,13 +2254,13 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) { const TargetRegisterClass *NOREXRegClasses[] = { &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass, &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass}; - if (RC == NOREXRegClasses[Log2_32(RegBytes)]) + if (RC == NOREXRegClasses[RegIdx]) return false; const TargetRegisterClass *GPRRegClasses[] = { &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass, &X86::GR64RegClass}; - return RC->hasSuperClassEq(GPRRegClasses[Log2_32(RegBytes)]); + return RC->hasSuperClassEq(GPRRegClasses[RegIdx]); } /// Harden a value in a register. @@ -2278,7 +2281,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( unsigned Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc) { assert(canHardenRegister(Reg) && "Cannot harden this register!"); - assert(TRI->isVirtualRegister(Reg) && "Cannot harden a physical register!"); + assert(Register::isVirtualRegister(Reg) && "Cannot harden a physical register!"); auto *RC = MRI->getRegClass(Reg); int Bytes = TRI->getRegSizeInBits(*RC) / 8; @@ -2289,7 +2292,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( if (Bytes != 8) { unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit}; unsigned SubRegImm = SubRegImms[Log2_32(Bytes)]; - unsigned NarrowStateReg = MRI->createVirtualRegister(RC); + Register NarrowStateReg = MRI->createVirtualRegister(RC); BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg) .addReg(StateReg, 0, SubRegImm); StateReg = NarrowStateReg; @@ -2299,7 +2302,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( if (isEFLAGSLive(MBB, InsertPt, *TRI)) FlagsReg = saveEFLAGS(MBB, InsertPt, Loc); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr}; unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)]; auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg) @@ -2329,13 +2332,13 @@ unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) { DebugLoc Loc = MI.getDebugLoc(); auto &DefOp = MI.getOperand(0); - unsigned OldDefReg = DefOp.getReg(); + Register OldDefReg = DefOp.getReg(); auto *DefRC = MRI->getRegClass(OldDefReg); // Because we want to completely replace the uses of this def'ed value with // the hardened value, create a dedicated new register that will only be used // to communicate the unhardened value to the hardening. - unsigned UnhardenedReg = MRI->createVirtualRegister(DefRC); + Register UnhardenedReg = MRI->createVirtualRegister(DefRC); DefOp.setReg(UnhardenedReg); // Now harden this register's value, getting a hardened reg that is safe to @@ -2537,7 +2540,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( .addReg(ExpectedRetAddrReg, RegState::Kill) .addSym(RetSymbol); } else { - unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); + Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg) .addReg(/*Base*/ X86::RIP) .addImm(/*Scale*/ 1) @@ -2554,7 +2557,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); - unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) .addReg(NewStateReg, RegState::Kill) .addReg(PS->PoisonReg) @@ -2611,7 +2614,7 @@ void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr( // For all of these, the target register is the first operand of the // instruction. auto &TargetOp = MI.getOperand(0); - unsigned OldTargetReg = TargetOp.getReg(); + Register OldTargetReg = TargetOp.getReg(); // Try to lookup a hardened version of this register. We retain a reference // here as we want to update the map to track any newly computed hardened diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index d5bb56603df9..f8f78da52cc2 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -146,6 +146,9 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV, return X86II::MO_DLLIMPORT; return X86II::MO_COFFSTUB; } + // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables. + if (isOSWindows()) + return X86II::MO_NO_FLAG; if (is64Bit()) { // ELF supports a large, truly PIC code model with non-PC relative GOT @@ -285,10 +288,10 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) - stackAlignment = StackAlignOverride; + stackAlignment = *StackAlignOverride; else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetKFreeBSD() || In64BitMode) - stackAlignment = 16; + stackAlignment = Align(16); // Some CPUs have more overhead for gather. The specified overhead is relative // to the Load operation. "2" is the number provided by Intel architects. This @@ -304,6 +307,8 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Consume the vector width attribute or apply any target specific limit. if (PreferVectorWidthOverride) PreferVectorWidth = PreferVectorWidthOverride; + else if (Prefer128Bit) + PreferVectorWidth = 128; else if (Prefer256Bit) PreferVectorWidth = 256; } @@ -316,12 +321,11 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, - unsigned StackAlignOverride, + MaybeAlign StackAlignOverride, unsigned PreferVectorWidthOverride, unsigned RequiredVectorWidth) - : X86GenSubtargetInfo(TT, CPU, FS), - PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), - StackAlignOverride(StackAlignOverride), + : X86GenSubtargetInfo(TT, CPU, FS), PICStyle(PICStyles::None), TM(TM), + TargetTriple(TT), StackAlignOverride(StackAlignOverride), PreferVectorWidthOverride(PreferVectorWidthOverride), RequiredVectorWidth(RequiredVectorWidth), In64BitMode(TargetTriple.getArch() == Triple::x86_64), @@ -355,7 +359,7 @@ const CallLowering *X86Subtarget::getCallLowering() const { return CallLoweringInfo.get(); } -const InstructionSelector *X86Subtarget::getInstructionSelector() const { +InstructionSelector *X86Subtarget::getInstructionSelector() const { return InstSelector.get(); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 24ccc9cb7843..e8efe8f2afe5 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -365,8 +365,8 @@ protected: /// Processor has AVX-512 vp2intersect instructions bool HasVP2INTERSECT = false; - /// Processor supports MPX - Memory Protection Extensions - bool HasMPX = false; + /// Deprecated flag for MPX instructions. + bool DeprecatedHasMPX = false; /// Processor supports CET SHSTK - Control-Flow Enforcement Technology /// using Shadow Stack @@ -427,15 +427,21 @@ protected: /// Use software floating point for code generation. bool UseSoftFloat = false; + /// Use alias analysis during code generation. + bool UseAA = false; + /// The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned stackAlignment = 4; + Align stackAlignment = Align(4); /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. /// // FIXME: this is a known good value for Yonah. How about others? unsigned MaxInlineSizeThreshold = 128; + /// Indicates target prefers 128 bit instructions. + bool Prefer128Bit = false; + /// Indicates target prefers 256 bit instructions. bool Prefer256Bit = false; @@ -453,7 +459,7 @@ protected: private: /// Override the stack alignment. - unsigned StackAlignOverride; + MaybeAlign StackAlignOverride; /// Preferred vector width from function attribute. unsigned PreferVectorWidthOverride; @@ -490,7 +496,7 @@ public: /// of the specified triple. /// X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const X86TargetMachine &TM, unsigned StackAlignOverride, + const X86TargetMachine &TM, MaybeAlign StackAlignOverride, unsigned PreferVectorWidthOverride, unsigned RequiredVectorWidth); @@ -515,7 +521,7 @@ public: /// Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. - unsigned getStackAlignment() const { return stackAlignment; } + Align getStackAlignment() const { return stackAlignment; } /// Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -527,7 +533,7 @@ public: /// Methods used by Global ISel const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; @@ -684,7 +690,6 @@ public: bool hasBF16() const { return HasBF16; } bool hasVP2INTERSECT() const { return HasVP2INTERSECT; } bool hasBITALG() const { return HasBITALG; } - bool hasMPX() const { return HasMPX; } bool hasSHSTK() const { return HasSHSTK; } bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } @@ -739,6 +744,7 @@ public: X86ProcFamily == IntelTRM; } bool useSoftFloat() const { return UseSoftFloat; } + bool useAA() const override { return UseAA; } /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for /// no-sse2). There isn't any reason to disable it if the target processor @@ -809,6 +815,7 @@ public: // On Win64, all these conventions just use the default convention. case CallingConv::C: case CallingConv::Fast: + case CallingConv::Tail: case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0cbf13899a29..c15297134e4d 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -81,27 +81,28 @@ extern "C" void LLVMInitializeX86Target() { initializeX86SpeculativeLoadHardeningPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); + initializeX86OptimizeLEAPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) { if (TT.getArch() == Triple::x86_64) - return llvm::make_unique(); - return llvm::make_unique(); + return std::make_unique(); + return std::make_unique(); } if (TT.isOSFreeBSD()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSSolaris()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSFuchsia()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSBinFormatELF()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSBinFormatCOFF()) - return llvm::make_unique(); + return std::make_unique(); llvm_unreachable("unknown subtarget type"); } @@ -116,6 +117,9 @@ static std::string computeDataLayout(const Triple &TT) { !TT.isArch64Bit()) Ret += "-p:32:32"; + // Address spaces for 32 bit signed, 32 bit unsigned, and 64 bit pointers. + Ret += "-p270:32:32-p271:32:32-p272:64:64"; + // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl()) Ret += "-i64:64"; @@ -218,17 +222,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL), TLOF(createTLOF(getTargetTriple())) { - // Windows stack unwinder gets confused when execution flow "falls through" - // after a call to 'noreturn' function. - // To prevent that, we emit a trap for 'unreachable' IR instructions. - // (which on X86, happens to be the 'ud2' instruction) // On PS4, the "return address" of a 'noreturn' call must still be within // the calling function, and TrapUnreachable is an easy way to get that. - // The check here for 64-bit windows is a bit icky, but as we're unlikely - // to ever want to mix 32 and 64-bit windows code in a single module - // this should be fine. - if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4() || - TT.isOSBinFormatMachO()) { + if (TT.isPS4() || TT.isOSBinFormatMachO()) { this->Options.TrapUnreachable = true; this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO(); } @@ -311,10 +307,10 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, - Options.StackAlignmentOverride, - PreferVectorWidthOverride, - RequiredVectorWidth); + I = std::make_unique( + TargetTriple, CPU, FS, *this, + MaybeAlign(Options.StackAlignmentOverride), PreferVectorWidthOverride, + RequiredVectorWidth); } return I.get(); } @@ -517,12 +513,19 @@ void X86PassConfig::addPreEmitPass() { } void X86PassConfig::addPreEmitPass2() { + const Triple &TT = TM->getTargetTriple(); + const MCAsmInfo *MAI = TM->getMCAsmInfo(); + addPass(createX86RetpolineThunksPass()); + + // Insert extra int3 instructions after trailing call instructions to avoid + // issues in the unwinder. + if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) + addPass(createX86AvoidTrailingCallPass()); + // Verify basic block incoming and outgoing cfa offset and register values and // correct CFA calculation rule where needed by inserting appropriate CFI // instructions. - const Triple &TT = TM->getTargetTriple(); - const MCAsmInfo *MAI = TM->getMCAsmInfo(); if (!TT.isOSDarwin() && (!TT.isOSWindows() || MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI)) diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index b999e2e86af6..ec3db7b1e9e8 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -16,7 +16,6 @@ #include "X86Subtarget.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include @@ -26,6 +25,7 @@ namespace llvm { class StringRef; class X86Subtarget; class X86RegisterBankInfo; +class TargetTransformInfo; class X86TargetMachine final : public LLVMTargetMachine { std::unique_ptr TLOF; diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 92e0779c2e74..44185957686b 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -47,8 +47,8 @@ MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol( } const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel( - const MCSymbol *Sym, const MCValue &MV, int64_t Offset, - MachineModuleInfo *MMI, MCStreamer &Streamer) const { + const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV, + int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { // On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry // from a data section. In case there's an additional offset, then use // foo@GOTPCREL+4+. diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 13d7b4ad70d6..1fd0bbf56b19 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -30,7 +30,8 @@ namespace llvm { const TargetMachine &TM, MachineModuleInfo *MMI) const override; - const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 3dc59aeb263e..70fd857fcf01 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -116,7 +116,8 @@ llvm::Optional X86TTIImpl::getCacheAssociativity( llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } -unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) { +unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector && !ST->hasSSE1()) return 0; @@ -887,7 +888,7 @@ int X86TTIImpl::getArithmeticInstrCost( int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. - // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. + // 64-bit packed integer vectors (v2i32) are widened to type v4i32. std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); // Treat Transpose as 2-op shuffles - there's no difference in lowering. @@ -911,6 +912,39 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, int NumSubElts = SubLT.second.getVectorNumElements(); if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) return SubLT.first; + // Handle some cases for widening legalization. For now we only handle + // cases where the original subvector was naturally aligned and evenly + // fit in its legalized subvector type. + // FIXME: Remove some of the alignment restrictions. + // FIXME: We can use permq for 64-bit or larger extracts from 256-bit + // vectors. + int OrigSubElts = SubTp->getVectorNumElements(); + if (NumSubElts > OrigSubElts && + (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 && + LT.second.getVectorElementType() == + SubLT.second.getVectorElementType() && + LT.second.getVectorElementType().getSizeInBits() == + Tp->getVectorElementType()->getPrimitiveSizeInBits()) { + assert(NumElts >= NumSubElts && NumElts > OrigSubElts && + "Unexpected number of elements!"); + Type *VecTy = VectorType::get(Tp->getVectorElementType(), + LT.second.getVectorNumElements()); + Type *SubTy = VectorType::get(Tp->getVectorElementType(), + SubLT.second.getVectorNumElements()); + int ExtractIndex = alignDown((Index % NumElts), NumSubElts); + int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy, + ExtractIndex, SubTy); + + // If the original size is 32-bits or more, we can use pshufd. Otherwise + // if we have SSSE3 we can use pshufb. + if (SubTp->getPrimitiveSizeInBits() >= 32 || ST->hasSSSE3()) + return ExtractCost + 1; // pshufd or pshufb + + assert(SubTp->getPrimitiveSizeInBits() == 16 && + "Unexpected vector size"); + + return ExtractCost + 2; // worst case pshufhw + pshufd + } } } @@ -1314,8 +1348,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, @@ -1354,6 +1390,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 }, { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, @@ -1371,14 +1409,14 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -1402,13 +1440,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, @@ -1421,7 +1459,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, @@ -1507,6 +1548,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 }, { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, }; @@ -1520,7 +1562,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, @@ -1536,6 +1579,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 }, { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 }, @@ -1562,15 +1607,21 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+3*PACKUSWB + { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 4 }, // PAND+3*PACKUSWB + { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 2 }, // PSHUFD+PSHUFLW + { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD }; std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); @@ -1691,6 +1742,11 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, } } + static const CostTblEntry SLMCostTbl[] = { + // slm pcmpeq/pcmpgt throughput is 2 + { ISD::SETCC, MVT::v2i64, 2 }, + }; + static const CostTblEntry AVX512BWCostTbl[] = { { ISD::SETCC, MVT::v32i16, 1 }, { ISD::SETCC, MVT::v64i8, 1 }, @@ -1777,6 +1833,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps }; + if (ST->isSLM()) + if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) + return LT.first * (ExtraCost + Entry->Cost); + if (ST->hasBWI()) if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) return LT.first * (ExtraCost + Entry->Cost); @@ -2043,8 +2103,26 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/ }; + static const CostTblEntry LZCNT64CostTbl[] = { // 64-bit targets + { ISD::CTLZ, MVT::i64, 1 }, + }; + static const CostTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets + { ISD::CTLZ, MVT::i32, 1 }, + { ISD::CTLZ, MVT::i16, 1 }, + { ISD::CTLZ, MVT::i8, 1 }, + }; + static const CostTblEntry POPCNT64CostTbl[] = { // 64-bit targets + { ISD::CTPOP, MVT::i64, 1 }, + }; + static const CostTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets + { ISD::CTPOP, MVT::i32, 1 }, + { ISD::CTPOP, MVT::i16, 1 }, + { ISD::CTPOP, MVT::i8, 1 }, + }; static const CostTblEntry X64CostTbl[] = { // 64-bit targets { ISD::BITREVERSE, MVT::i64, 14 }, + { ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV + { ISD::CTPOP, MVT::i64, 10 }, { ISD::SADDO, MVT::i64, 1 }, { ISD::UADDO, MVT::i64, 1 }, }; @@ -2052,6 +2130,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::BITREVERSE, MVT::i32, 14 }, { ISD::BITREVERSE, MVT::i16, 14 }, { ISD::BITREVERSE, MVT::i8, 11 }, + { ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV + { ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV + { ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV + { ISD::CTPOP, MVT::i32, 8 }, + { ISD::CTPOP, MVT::i16, 9 }, + { ISD::CTPOP, MVT::i8, 7 }, { ISD::SADDO, MVT::i32, 1 }, { ISD::SADDO, MVT::i16, 1 }, { ISD::SADDO, MVT::i8, 1 }, @@ -2163,6 +2247,26 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) return LT.first * Entry->Cost; + if (ST->hasLZCNT()) { + if (ST->is64Bit()) + if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + } + + if (ST->hasPOPCNT()) { + if (ST->is64Bit()) + if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + } + + // TODO - add BMI (TZCNT) scalar handling + if (ST->is64Bit()) if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) return LT.first * Entry->Cost; @@ -2357,8 +2461,9 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, unsigned NumElem = SrcVTy->getVectorNumElements(); VectorType *MaskTy = VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); - if ((IsLoad && !isLegalMaskedLoad(SrcVTy)) || - (IsStore && !isLegalMaskedStore(SrcVTy)) || !isPowerOf2_32(NumElem)) { + if ((IsLoad && !isLegalMaskedLoad(SrcVTy, MaybeAlign(Alignment))) || + (IsStore && !isLegalMaskedStore(SrcVTy, MaybeAlign(Alignment))) || + !isPowerOf2_32(NumElem)) { // Scalarization int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); int ScalarCompareCost = getCmpSelInstrCost( @@ -2425,70 +2530,107 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, bool IsPairwise) { - - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); - - MVT MTy = LT.second; - - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput // and make it as the cost. - static const CostTblEntry SSE42CostTblPairWise[] = { + static const CostTblEntry SSE2CostTblPairWise[] = { { ISD::FADD, MVT::v2f64, 2 }, { ISD::FADD, MVT::v4f32, 4 }, { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". + { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32. { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". + { ISD::ADD, MVT::v2i16, 3 }, // FIXME: chosen to be less than v4i16 + { ISD::ADD, MVT::v4i16, 4 }, // FIXME: chosen to be less than v8i16 { ISD::ADD, MVT::v8i16, 5 }, + { ISD::ADD, MVT::v2i8, 2 }, + { ISD::ADD, MVT::v4i8, 2 }, + { ISD::ADD, MVT::v8i8, 2 }, + { ISD::ADD, MVT::v16i8, 3 }, }; static const CostTblEntry AVX1CostTblPairWise[] = { - { ISD::FADD, MVT::v4f32, 4 }, { ISD::FADD, MVT::v4f64, 5 }, { ISD::FADD, MVT::v8f32, 7 }, { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". - { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8". - { ISD::ADD, MVT::v8i16, 5 }, { ISD::ADD, MVT::v8i32, 5 }, + { ISD::ADD, MVT::v16i16, 6 }, + { ISD::ADD, MVT::v32i8, 4 }, }; - static const CostTblEntry SSE42CostTblNoPairWise[] = { + static const CostTblEntry SSE2CostTblNoPairWise[] = { { ISD::FADD, MVT::v2f64, 2 }, { ISD::FADD, MVT::v4f32, 4 }, { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". + { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32 { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3". + { ISD::ADD, MVT::v2i16, 2 }, // The data reported by the IACA tool is "4.3". + { ISD::ADD, MVT::v4i16, 3 }, // The data reported by the IACA tool is "4.3". { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3". + { ISD::ADD, MVT::v2i8, 2 }, + { ISD::ADD, MVT::v4i8, 2 }, + { ISD::ADD, MVT::v8i8, 2 }, + { ISD::ADD, MVT::v16i8, 3 }, }; static const CostTblEntry AVX1CostTblNoPairWise[] = { - { ISD::FADD, MVT::v4f32, 3 }, { ISD::FADD, MVT::v4f64, 3 }, + { ISD::FADD, MVT::v4f32, 3 }, { ISD::FADD, MVT::v8f32, 4 }, { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". - { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8". { ISD::ADD, MVT::v4i64, 3 }, - { ISD::ADD, MVT::v8i16, 4 }, { ISD::ADD, MVT::v8i32, 5 }, + { ISD::ADD, MVT::v16i16, 5 }, + { ISD::ADD, MVT::v32i8, 4 }, }; + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Before legalizing the type, give a chance to look up illegal narrow types + // in the table. + // FIXME: Is there a better way to do this? + EVT VT = TLI->getValueType(DL, ValTy); + if (VT.isSimple()) { + MVT MTy = VT.getSimpleVT(); + if (IsPairwise) { + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) + return Entry->Cost; + + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy)) + return Entry->Cost; + } else { + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) + return Entry->Cost; + + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) + return Entry->Cost; + } + } + + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + + MVT MTy = LT.second; + if (IsPairwise) { if (ST->hasAVX()) if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) return LT.first * Entry->Cost; - if (ST->hasSSE42()) - if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy)) + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy)) return LT.first * Entry->Cost; } else { if (ST->hasAVX()) if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) return LT.first * Entry->Cost; - if (ST->hasSSE42()) - if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy)) + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) return LT.first * Entry->Cost; } @@ -3116,7 +3258,7 @@ bool X86TTIImpl::canMacroFuseCmp() { return ST->hasMacroFusion() || ST->hasBranchFusion(); } -bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { +bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) { if (!ST->hasAVX()) return false; @@ -3139,11 +3281,11 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI()); } -bool X86TTIImpl::isLegalMaskedStore(Type *DataType) { - return isLegalMaskedLoad(DataType); +bool X86TTIImpl::isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { + return isLegalMaskedLoad(DataType, Alignment); } -bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) { +bool X86TTIImpl::isLegalNTLoad(Type *DataType, Align Alignment) { unsigned DataSize = DL.getTypeStoreSize(DataType); // The only supported nontemporal loads are for aligned vectors of 16 or 32 // bytes. Note that 32-byte nontemporal vector loads are supported by AVX2 @@ -3154,7 +3296,7 @@ bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) { return false; } -bool X86TTIImpl::isLegalNTStore(Type *DataType, unsigned Alignment) { +bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) { unsigned DataSize = DL.getTypeStoreSize(DataType); // SSE4A supports nontemporal stores of float and double at arbitrary @@ -3299,9 +3441,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { if (IsZeroCmp) { // Only enable vector loads for equality comparison. Right now the vector // version is not as fast for three way compare (see #33329). - // TODO: enable AVX512 when the DAG is ready. - // if (ST->hasAVX512()) Options.LoadSizes.push_back(64); const unsigned PreferredWidth = ST->getPreferVectorWidth(); + if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64); if (PreferredWidth >= 256 && ST->hasAVX2()) Options.LoadSizes.push_back(32); if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16); // All GPR and vector loads can be unaligned. SIMD compare requires integer diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index 25d9c33eb16d..7581257f41f8 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -83,6 +83,7 @@ class X86TTIImpl : public BasicTTIImplBase { X86::FeatureSlowUAMem32, // Based on whether user set the -mprefer-vector-width command line. + X86::FeaturePrefer128Bit, X86::FeaturePrefer256Bit, // CPU name enums. These just follow CPU string. @@ -115,7 +116,7 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; unsigned getMaxInterleaveFactor(unsigned VF); @@ -184,10 +185,10 @@ public: bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); bool canMacroFuseCmp(); - bool isLegalMaskedLoad(Type *DataType); - bool isLegalMaskedStore(Type *DataType); - bool isLegalNTLoad(Type *DataType, unsigned Alignment); - bool isLegalNTStore(Type *DataType, unsigned Alignment); + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment); + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment); + bool isLegalNTLoad(Type *DataType, Align Alignment); + bool isLegalNTStore(Type *DataType, Align Alignment); bool isLegalMaskedGather(Type *DataType); bool isLegalMaskedScatter(Type *DataType); bool isLegalMaskedExpandLoad(Type *DataType); diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index a07d2f20acab..9280d030b5d5 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -292,8 +292,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { // need to insert any VZEROUPPER instructions. This is constant-time, so it // is cheap in the common case of no ymm/zmm use. bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm; - const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass}; - for (auto *RC : RCs) { + for (auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) { if (!YmmOrZmmUsed) { for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; i++) { @@ -304,9 +303,8 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { } } } - if (!YmmOrZmmUsed) { + if (!YmmOrZmmUsed) return false; - } assert(BlockStates.empty() && DirtySuccessors.empty() && "X86VZeroUpper state should be clear"); diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp index 9e499db1d7ee..ae72c6427588 100644 --- a/lib/Target/X86/X86WinAllocaExpander.cpp +++ b/lib/Target/X86/X86WinAllocaExpander.cpp @@ -81,7 +81,7 @@ static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) { MI->getOpcode() == X86::WIN_ALLOCA_64); assert(MI->getOperand(0).isReg()); - unsigned AmountReg = MI->getOperand(0).getReg(); + Register AmountReg = MI->getOperand(0).getReg(); MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg); if (!Def || @@ -261,7 +261,7 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) { break; } - unsigned AmountReg = MI->getOperand(0).getReg(); + Register AmountReg = MI->getOperand(0).getReg(); MI->eraseFromParent(); // Delete the definition of AmountReg. diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index f68d17d7256d..d65e1f3ab414 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -339,7 +339,10 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) { if (UseStackGuard) { Value *Val = Builder.CreateLoad(Int32Ty, Cookie); Value *FrameAddr = Builder.CreateCall( - Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress), + Intrinsic::getDeclaration( + TheModule, Intrinsic::frameaddress, + Builder.getInt8PtrTy( + TheModule->getDataLayout().getAllocaAddrSpace())), Builder.getInt32(0), "frameaddr"); Value *FrameAddrI32 = Builder.CreatePtrToInt(FrameAddr, Int32Ty); FrameAddrI32 = Builder.CreateXor(FrameAddrI32, Val); diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 9f615b9e7741..6b3dc27cb886 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -115,7 +115,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSymbol *GVSym = getSymbol(GV); const Constant *C = GV->getInitializer(); - unsigned Align = (unsigned)DL.getPreferredTypeAlignmentShift(C->getType()); + const Align Alignment(DL.getPrefTypeAlignment(C->getType())); // Mark the start of the global getTargetStreamer().emitCCTopData(GVSym->getName()); @@ -143,7 +143,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { llvm_unreachable("Unknown linkage type!"); } - EmitAlignment(Align > 2 ? Align : 2, GV); + EmitAlignment(std::max(Alignment, Align(4)), GV); if (GV->isThreadLocal()) { report_fatal_error("TLS is not supported by this target!"); diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 5066407c74aa..fd8b37e26e47 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -211,7 +211,7 @@ static void RestoreSpillList(MachineBasicBlock &MBB, //===----------------------------------------------------------------------===// XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0) { + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(4), 0) { // Do nothing } @@ -367,8 +367,8 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList); // Return to the landing pad. - unsigned EhStackReg = MBBI->getOperand(0).getReg(); - unsigned EhHandlerReg = MBBI->getOperand(1).getReg(); + Register EhStackReg = MBBI->getOperand(0).getReg(); + Register EhHandlerReg = MBBI->getOperand(1).getReg(); BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)).addReg(EhStackReg); BuildMI(MBB, MBBI, dl, TII.get(XCore::BAU_1r)).addReg(EhHandlerReg); MBB.erase(MBBI); // Erase the previous return instruction. diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp index e433d21c59b7..b5dbdea98eea 100644 --- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp +++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp @@ -55,7 +55,7 @@ bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) { MBBI != EE; ++MBBI) { if (MBBI->getOpcode() == XCore::FRAME_TO_ARGS_OFFSET) { MachineInstr &OldInst = *MBBI; - unsigned Reg = OldInst.getOperand(0).getReg(); + Register Reg = OldInst.getOperand(0).getReg(); MBBI = TII.loadImmediate(MBB, MBBI, Reg, StackSize); OldInst.eraseFromParent(); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 072278d9fc46..bf006fd673f1 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -171,8 +171,8 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - setMinFunctionAlignment(1); - setPrefFunctionAlignment(2); + setMinFunctionAlignment(Align(2)); + setPrefFunctionAlignment(Align(4)); } bool XCoreTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { @@ -414,8 +414,8 @@ SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { "Unexpected extension type"); assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT"); - if (allowsMemoryAccess(Context, DAG.getDataLayout(), LD->getMemoryVT(), - *LD->getMemOperand())) + if (allowsMemoryAccessForAlignment(Context, DAG.getDataLayout(), + LD->getMemoryVT(), *LD->getMemOperand())) return SDValue(); SDValue Chain = LD->getChain(); @@ -488,8 +488,8 @@ SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(!ST->isTruncatingStore() && "Unexpected store type"); assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT"); - if (allowsMemoryAccess(Context, DAG.getDataLayout(), ST->getMemoryVT(), - *ST->getMemOperand())) + if (allowsMemoryAccessForAlignment(Context, DAG.getDataLayout(), + ST->getMemoryVT(), *ST->getMemOperand())) return SDValue(); SDValue Chain = ST->getChain(); @@ -1309,7 +1309,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments( llvm_unreachable(nullptr); } case MVT::i32: - unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); + Register VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); ArgIn = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); CFRegNode.push_back(ArgIn.getValue(ArgIn->getNumValues() - 1)); @@ -1360,7 +1360,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments( offset -= StackSlotSize; SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); // Move argument from phys reg -> virt reg - unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); + Register VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); RegInfo.addLiveIn(ArgRegs[i], VReg); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); CFRegNode.push_back(Val.getValue(Val->getNumValues() - 1)); @@ -1780,8 +1780,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // Replace unaligned store of unaligned load with memmove. StoreSDNode *ST = cast(N); if (!DCI.isBeforeLegalize() || - allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - ST->getMemoryVT(), *ST->getMemOperand()) || + allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + ST->getMemoryVT(), + *ST->getMemOperand()) || ST->isVolatile() || ST->isIndexed()) { break; } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 3752274e2cdf..86ec7f82d4d1 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -301,7 +301,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, << "<--------->\n"); Offset/=4; - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand"); if (TFI->hasFP(MF)) { diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 2a8cd6b657b7..b5b7445265b7 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -53,7 +53,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT, T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32", TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveXCoreCodeModel(CM), OL), - TLOF(llvm::make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h index 3fecaaa59722..58df1f290ec9 100644 --- a/lib/Target/XCore/XCoreTargetTransformInfo.h +++ b/lib/Target/XCore/XCoreTargetTransformInfo.h @@ -40,7 +40,8 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { return 0; } diff --git a/lib/TextAPI/MachO/Architecture.cpp b/lib/TextAPI/MachO/Architecture.cpp index a66a982fa153..699fb5f4587a 100644 --- a/lib/TextAPI/MachO/Architecture.cpp +++ b/lib/TextAPI/MachO/Architecture.cpp @@ -68,6 +68,10 @@ std::pair getCPUTypeFromArchitecture(Architecture Arch) { return std::make_pair(0, 0); } +Architecture mapToArchitecture(const Triple &Target) { + return getArchitectureFromName(Target.getArchName()); +} + raw_ostream &operator<<(raw_ostream &OS, Architecture Arch) { OS << getArchitectureName(Arch); return OS; diff --git a/lib/TextAPI/MachO/InterfaceFile.cpp b/lib/TextAPI/MachO/InterfaceFile.cpp index 54ba8cc31267..c40a952a6a8b 100644 --- a/lib/TextAPI/MachO/InterfaceFile.cpp +++ b/lib/TextAPI/MachO/InterfaceFile.cpp @@ -27,36 +27,65 @@ typename C::iterator addEntry(C &Container, StringRef InstallName) { return Container.emplace(I, InstallName); } + +template +typename C::iterator addEntry(C &Container, const Target &Target_) { + auto Iter = + lower_bound(Container, Target_, [](const Target &LHS, const Target &RHS) { + return LHS < RHS; + }); + if ((Iter != std::end(Container)) && !(Target_ < *Iter)) + return Iter; + + return Container.insert(Iter, Target_); +} } // end namespace detail. -void InterfaceFile::addAllowableClient(StringRef Name, - ArchitectureSet Architectures) { - auto Client = detail::addEntry(AllowableClients, Name); - Client->addArchitectures(Architectures); +void InterfaceFileRef::addTarget(const Target &Target) { + detail::addEntry(Targets, Target); +} + +void InterfaceFile::addAllowableClient(StringRef InstallName, + const Target &Target) { + auto Client = detail::addEntry(AllowableClients, InstallName); + Client->addTarget(Target); } void InterfaceFile::addReexportedLibrary(StringRef InstallName, - ArchitectureSet Architectures) { + const Target &Target) { auto Lib = detail::addEntry(ReexportedLibraries, InstallName); - Lib->addArchitectures(Architectures); + Lib->addTarget(Target); } -void InterfaceFile::addUUID(Architecture Arch, StringRef UUID) { - auto I = partition_point(UUIDs, - [=](const std::pair &O) { - return O.first < Arch; - }); +void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) { + auto Iter = lower_bound(ParentUmbrellas, Target_, + [](const std::pair &LHS, + Target RHS) { return LHS.first < RHS; }); - if (I != UUIDs.end() && Arch == I->first) { - I->second = UUID; + if ((Iter != ParentUmbrellas.end()) && !(Target_ < Iter->first)) { + Iter->second = Parent; return; } - UUIDs.emplace(I, Arch, UUID); + ParentUmbrellas.emplace(Iter, Target_, Parent); return; } -void InterfaceFile::addUUID(Architecture Arch, uint8_t UUID[16]) { +void InterfaceFile::addUUID(const Target &Target_, StringRef UUID) { + auto Iter = lower_bound(UUIDs, Target_, + [](const std::pair &LHS, + Target RHS) { return LHS.first < RHS; }); + + if ((Iter != UUIDs.end()) && !(Target_ < Iter->first)) { + Iter->second = UUID; + return; + } + + UUIDs.emplace(Iter, Target_, UUID); + return; +} + +void InterfaceFile::addUUID(const Target &Target, uint8_t UUID[16]) { std::stringstream Stream; for (unsigned i = 0; i < 16; ++i) { if (i == 4 || i == 6 || i == 8 || i == 10) @@ -64,17 +93,30 @@ void InterfaceFile::addUUID(Architecture Arch, uint8_t UUID[16]) { Stream << std::setfill('0') << std::setw(2) << std::uppercase << std::hex << static_cast(UUID[i]); } - addUUID(Arch, Stream.str()); + addUUID(Target, Stream.str()); +} + +void InterfaceFile::addTarget(const Target &Target) { + detail::addEntry(Targets, Target); +} + +InterfaceFile::const_filtered_target_range +InterfaceFile::targets(ArchitectureSet Archs) const { + std::function fn = [Archs](const Target &Target_) { + return Archs.has(Target_.Arch); + }; + return make_filter_range(Targets, fn); } void InterfaceFile::addSymbol(SymbolKind Kind, StringRef Name, - ArchitectureSet Archs, SymbolFlags Flags) { + const TargetList &Targets, SymbolFlags Flags) { Name = copyString(Name); auto result = Symbols.try_emplace(SymbolsMapKey{Kind, Name}, nullptr); if (result.second) - result.first->second = new (Allocator) Symbol{Kind, Name, Archs, Flags}; + result.first->second = new (Allocator) Symbol{Kind, Name, Targets, Flags}; else - result.first->second->addArchitectures(Archs); + for (const auto &Target : Targets) + result.first->second->addTarget(Target); } } // end namespace MachO. diff --git a/lib/TextAPI/MachO/Platform.cpp b/lib/TextAPI/MachO/Platform.cpp new file mode 100644 index 000000000000..588ec9a4d83b --- /dev/null +++ b/lib/TextAPI/MachO/Platform.cpp @@ -0,0 +1,91 @@ +//===- llvm/TextAPI/MachO/Platform.cpp - Platform ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementations of Platform Helper functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/TextAPI/MachO/Platform.h" + +namespace llvm { +namespace MachO { + +PlatformKind mapToPlatformKind(PlatformKind Platform, bool WantSim) { + switch (Platform) { + default: + return Platform; + case PlatformKind::iOS: + return WantSim ? PlatformKind::iOSSimulator : PlatformKind::iOS; + case PlatformKind::tvOS: + return WantSim ? PlatformKind::tvOSSimulator : PlatformKind::tvOS; + case PlatformKind::watchOS: + return WantSim ? PlatformKind::watchOSSimulator : PlatformKind::watchOS; + } + llvm_unreachable("Unknown llvm.MachO.PlatformKind enum"); +} + +PlatformKind mapToPlatformKind(const Triple &Target) { + switch (Target.getOS()) { + default: + return PlatformKind::unknown; + case Triple::MacOSX: + return PlatformKind::macOS; + case Triple::IOS: + if (Target.isSimulatorEnvironment()) + return PlatformKind::iOSSimulator; + if (Target.getEnvironment() == Triple::MacABI) + return PlatformKind::macCatalyst; + return PlatformKind::iOS; + case Triple::TvOS: + return Target.isSimulatorEnvironment() ? PlatformKind::tvOSSimulator + : PlatformKind::tvOS; + case Triple::WatchOS: + return Target.isSimulatorEnvironment() ? PlatformKind::watchOSSimulator + : PlatformKind::watchOS; + // TODO: add bridgeOS once in llvm::Triple + } + llvm_unreachable("Unknown Target Triple"); +} + +PlatformSet mapToPlatformSet(ArrayRef Targets) { + PlatformSet Result; + for (const auto &Target : Targets) + Result.insert(mapToPlatformKind(Target)); + return Result; +} + +StringRef getPlatformName(PlatformKind Platform) { + switch (Platform) { + case PlatformKind::unknown: + return "unknown"; + case PlatformKind::macOS: + return "macOS"; + case PlatformKind::iOS: + return "iOS"; + case PlatformKind::tvOS: + return "tvOS"; + case PlatformKind::watchOS: + return "watchOS"; + case PlatformKind::bridgeOS: + return "bridgeOS"; + case PlatformKind::macCatalyst: + return "macCatalyst"; + case PlatformKind::iOSSimulator: + return "iOS Simulator"; + case PlatformKind::tvOSSimulator: + return "tvOS Simulator"; + case PlatformKind::watchOSSimulator: + return "watchOS Simulator"; + } + llvm_unreachable("Unknown llvm.MachO.PlatformKind enum"); +} + +} // end namespace MachO. +} // end namespace llvm. diff --git a/lib/TextAPI/MachO/Symbol.cpp b/lib/TextAPI/MachO/Symbol.cpp index 731b264f6082..9f2d8172beed 100644 --- a/lib/TextAPI/MachO/Symbol.cpp +++ b/lib/TextAPI/MachO/Symbol.cpp @@ -45,5 +45,14 @@ LLVM_DUMP_METHOD void Symbol::dump(raw_ostream &OS) const { } #endif +Symbol::const_filtered_target_range +Symbol::targets(ArchitectureSet Architectures) const { + std::function FN = + [Architectures](const Target &Target) { + return Architectures.has(Target.Arch); + }; + return make_filter_range(Targets, FN); +} + } // end namespace MachO. } // end namespace llvm. diff --git a/lib/TextAPI/MachO/Target.cpp b/lib/TextAPI/MachO/Target.cpp new file mode 100644 index 000000000000..aee8ef421425 --- /dev/null +++ b/lib/TextAPI/MachO/Target.cpp @@ -0,0 +1,75 @@ +//===- tapi/Core/Target.cpp - Target ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TextAPI/MachO/Target.h" + +namespace llvm { +namespace MachO { + +Expected Target::create(StringRef TargetValue) { + auto Result = TargetValue.split('-'); + auto ArchitectureStr = Result.first; + auto Architecture = getArchitectureFromName(ArchitectureStr); + auto PlatformStr = Result.second; + PlatformKind Platform; + Platform = StringSwitch(PlatformStr) + .Case("macos", PlatformKind::macOS) + .Case("ios", PlatformKind::iOS) + .Case("tvos", PlatformKind::tvOS) + .Case("watchos", PlatformKind::watchOS) + .Case("bridgeos", PlatformKind::bridgeOS) + .Case("maccatalyst", PlatformKind::macCatalyst) + .Case("ios-simulator", PlatformKind::iOSSimulator) + .Case("tvos-simulator", PlatformKind::tvOSSimulator) + .Case("watchos-simulator", PlatformKind::watchOSSimulator) + .Default(PlatformKind::unknown); + + if (Platform == PlatformKind::unknown) { + if (PlatformStr.startswith("<") && PlatformStr.endswith(">")) { + PlatformStr = PlatformStr.drop_front().drop_back(); + unsigned long long RawValue; + if (!PlatformStr.getAsInteger(10, RawValue)) + Platform = (PlatformKind)RawValue; + } + } + + return Target{Architecture, Platform}; +} + +Target::operator std::string() const { + return (getArchitectureName(Arch) + " (" + getPlatformName(Platform) + ")") + .str(); +} + +raw_ostream &operator<<(raw_ostream &OS, const Target &Target) { + OS << std::string(Target); + return OS; +} + +PlatformSet mapToPlatformSet(ArrayRef Targets) { + PlatformSet Result; + for (const auto &Target : Targets) + Result.insert(Target.Platform); + return Result; +} + +ArchitectureSet mapToArchitectureSet(ArrayRef Targets) { + ArchitectureSet Result; + for (const auto &Target : Targets) + Result.set(Target.Arch); + return Result; +} + +} // end namespace MachO. +} // end namespace llvm. diff --git a/lib/TextAPI/MachO/TextStub.cpp b/lib/TextAPI/MachO/TextStub.cpp index 799ebdc883ab..0584e43d5893 100644 --- a/lib/TextAPI/MachO/TextStub.cpp +++ b/lib/TextAPI/MachO/TextStub.cpp @@ -147,6 +147,58 @@ Each undefineds section is defined as following: objc-ivars: [] # Optional: List of Objective C Instance Variables weak-ref-symbols: [] # Optional: List of weak defined symbols */ + +/* + + YAML Format specification. + +--- !tapi-tbd +tbd-version: 4 # The tbd version for format +targets: [ armv7-ios, x86_64-maccatalyst ] # The list of applicable tapi supported target triples +uuids: # Optional: List of target and UUID pairs. + - target: armv7-ios + value: ... + - target: x86_64-maccatalyst + value: ... +flags: [] # Optional: +install-name: /u/l/libfoo.dylib # +current-version: 1.2.3 # Optional: defaults to 1.0 +compatibility-version: 1.0 # Optional: defaults to 1.0 +swift-abi-version: 0 # Optional: defaults to 0 +parent-umbrella: # Optional: +allowable-clients: + - targets: [ armv7-ios ] # Optional: + clients: [ clientA ] +exports: # List of export sections +... +re-exports: # List of reexport sections +... +undefineds: # List of undefineds sections +... + +Each export and reexport section is defined as following: + +- targets: [ arm64-macos ] # The list of target triples associated with symbols + symbols: [ _symA ] # Optional: List of symbols + objc-classes: [] # Optional: List of Objective-C classes + objc-eh-types: [] # Optional: List of Objective-C classes + # with EH + objc-ivars: [] # Optional: List of Objective C Instance + # Variables + weak-symbols: [] # Optional: List of weak defined symbols + thread-local-symbols: [] # Optional: List of thread local symbols +- targets: [ arm64-macos, x86_64-maccatalyst ] # Optional: Targets for applicable additional symbols + symbols: [ _symB ] # Optional: List of symbols + +Each undefineds section is defined as following: +- targets: [ arm64-macos ] # The list of target triples associated with symbols + symbols: [ _symC ] # Optional: List of symbols + objc-classes: [] # Optional: List of Objective-C classes + objc-eh-types: [] # Optional: List of Objective-C classes + # with EH + objc-ivars: [] # Optional: List of Objective C Instance Variables + weak-symbols: [] # Optional: List of weak defined symbols +*/ // clang-format on using namespace llvm; @@ -175,6 +227,38 @@ struct UndefinedSection { std::vector WeakRefSymbols; }; +// Sections for direct target mapping in TBDv4 +struct SymbolSection { + TargetList Targets; + std::vector Symbols; + std::vector Classes; + std::vector ClassEHs; + std::vector Ivars; + std::vector WeakSymbols; + std::vector TlvSymbols; +}; + +struct MetadataSection { + enum Option { Clients, Libraries }; + std::vector Targets; + std::vector Values; +}; + +struct UmbrellaSection { + std::vector Targets; + std::string Umbrella; +}; + +// UUID's for TBDv4 are mapped to target not arch +struct UUIDv4 { + Target TargetID; + std::string Value; + + UUIDv4() = default; + UUIDv4(const Target &TargetID, const std::string &Value) + : TargetID(TargetID), Value(Value) {} +}; + // clang-format off enum TBDFlags : unsigned { None = 0U, @@ -189,6 +273,12 @@ enum TBDFlags : unsigned { LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Architecture) LLVM_YAML_IS_SEQUENCE_VECTOR(ExportSection) LLVM_YAML_IS_SEQUENCE_VECTOR(UndefinedSection) +// Specific to TBDv4 +LLVM_YAML_IS_SEQUENCE_VECTOR(SymbolSection) +LLVM_YAML_IS_SEQUENCE_VECTOR(MetadataSection) +LLVM_YAML_IS_SEQUENCE_VECTOR(UmbrellaSection) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Target) +LLVM_YAML_IS_SEQUENCE_VECTOR(UUIDv4) namespace llvm { namespace yaml { @@ -231,6 +321,49 @@ template <> struct MappingTraits { } }; +template <> struct MappingTraits { + static void mapping(IO &IO, SymbolSection &Section) { + IO.mapRequired("targets", Section.Targets); + IO.mapOptional("symbols", Section.Symbols); + IO.mapOptional("objc-classes", Section.Classes); + IO.mapOptional("objc-eh-types", Section.ClassEHs); + IO.mapOptional("objc-ivars", Section.Ivars); + IO.mapOptional("weak-symbols", Section.WeakSymbols); + IO.mapOptional("thread-local-symbols", Section.TlvSymbols); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, UmbrellaSection &Section) { + IO.mapRequired("targets", Section.Targets); + IO.mapRequired("umbrella", Section.Umbrella); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, UUIDv4 &UUID) { + IO.mapRequired("target", UUID.TargetID); + IO.mapRequired("value", UUID.Value); + } +}; + +template <> +struct MappingContextTraits { + static void mapping(IO &IO, MetadataSection &Section, + MetadataSection::Option &OptionKind) { + IO.mapRequired("targets", Section.Targets); + switch (OptionKind) { + case MetadataSection::Option::Clients: + IO.mapRequired("clients", Section.Values); + return; + case MetadataSection::Option::Libraries: + IO.mapRequired("libraries", Section.Values); + return; + } + llvm_unreachable("unexpected option for metadata"); + } +}; + template <> struct ScalarBitSetTraits { static void bitset(IO &IO, TBDFlags &Flags) { IO.bitSetCase(Flags, "flat_namespace", TBDFlags::FlatNamespace); @@ -240,13 +373,67 @@ template <> struct ScalarBitSetTraits { } }; +template <> struct ScalarTraits { + static void output(const Target &Value, void *, raw_ostream &OS) { + OS << Value.Arch << "-"; + switch (Value.Platform) { + default: + OS << "unknown"; + break; + case PlatformKind::macOS: + OS << "macos"; + break; + case PlatformKind::iOS: + OS << "ios"; + break; + case PlatformKind::tvOS: + OS << "tvos"; + break; + case PlatformKind::watchOS: + OS << "watchos"; + break; + case PlatformKind::bridgeOS: + OS << "bridgeos"; + break; + case PlatformKind::macCatalyst: + OS << "maccatalyst"; + break; + case PlatformKind::iOSSimulator: + OS << "ios-simulator"; + break; + case PlatformKind::tvOSSimulator: + OS << "tvos-simulator"; + break; + case PlatformKind::watchOSSimulator: + OS << "watchos-simulator"; + break; + } + } + + static StringRef input(StringRef Scalar, void *, Target &Value) { + auto Result = Target::create(Scalar); + if (!Result) + return toString(Result.takeError()); + + Value = *Result; + if (Value.Arch == AK_unknown) + return "unknown architecture"; + if (Value.Platform == PlatformKind::unknown) + return "unknown platform"; + + return {}; + } + + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + template <> struct MappingTraits { struct NormalizedTBD { explicit NormalizedTBD(IO &IO) {} NormalizedTBD(IO &IO, const InterfaceFile *&File) { Architectures = File->getArchitectures(); UUIDs = File->uuids(); - Platform = File->getPlatform(); + Platforms = File->getPlatforms(); InstallName = File->getInstallName(); CurrentVersion = PackedVersion(File->getCurrentVersion()); CompatibilityVersion = PackedVersion(File->getCompatibilityVersion()); @@ -263,7 +450,10 @@ template <> struct MappingTraits { if (File->isInstallAPI()) Flags |= TBDFlags::InstallAPI; - ParentUmbrella = File->getParentUmbrella(); + for (const auto &Iter : File->umbrellas()) { + ParentUmbrella = Iter.second; + break; + } std::set ArchSet; for (const auto &Library : File->allowableClients()) @@ -396,6 +586,29 @@ template <> struct MappingTraits { } } + // TBD v1 - TBD v3 files only support one platform and several + // architectures. It is possible to have more than one platform for TBD v3 + // files, but the architectures don't apply to all + // platforms, specifically to filter out the i386 slice from + // platform macCatalyst. + TargetList synthesizeTargets(ArchitectureSet Architectures, + const PlatformSet &Platforms) { + TargetList Targets; + + for (auto Platform : Platforms) { + Platform = mapToPlatformKind(Platform, Architectures.hasX86()); + + for (const auto &&Architecture : Architectures) { + if ((Architecture == AK_i386) && + (Platform == PlatformKind::macCatalyst)) + continue; + + Targets.emplace_back(Architecture, Platform); + } + } + return Targets; + } + const InterfaceFile *denormalize(IO &IO) { auto Ctx = reinterpret_cast(IO.getContext()); assert(Ctx); @@ -403,16 +616,16 @@ template <> struct MappingTraits { auto *File = new InterfaceFile; File->setPath(Ctx->Path); File->setFileType(Ctx->FileKind); + File->addTargets(synthesizeTargets(Architectures, Platforms)); for (auto &ID : UUIDs) File->addUUID(ID.first, ID.second); - File->setPlatform(Platform); - File->setArchitectures(Architectures); File->setInstallName(InstallName); File->setCurrentVersion(CurrentVersion); File->setCompatibilityVersion(CompatibilityVersion); File->setSwiftABIVersion(SwiftABIVersion); File->setObjCConstraint(ObjCConstraint); - File->setParentUmbrella(ParentUmbrella); + for (const auto &Target : File->targets()) + File->addParentUmbrella(Target, ParentUmbrella); if (Ctx->FileKind == FileType::TBD_V1) { File->setTwoLevelNamespace(); @@ -425,76 +638,80 @@ template <> struct MappingTraits { } for (const auto &Section : Exports) { - for (const auto &Library : Section.AllowableClients) - File->addAllowableClient(Library, Section.Architectures); - for (const auto &Library : Section.ReexportedLibraries) - File->addReexportedLibrary(Library, Section.Architectures); + const auto Targets = + synthesizeTargets(Section.Architectures, Platforms); + + for (const auto &Lib : Section.AllowableClients) + for (const auto &Target : Targets) + File->addAllowableClient(Lib, Target); + + for (const auto &Lib : Section.ReexportedLibraries) + for (const auto &Target : Targets) + File->addReexportedLibrary(Lib, Target); for (const auto &Symbol : Section.Symbols) { if (Ctx->FileKind != FileType::TBD_V3 && Symbol.value.startswith("_OBJC_EHTYPE_$_")) File->addSymbol(SymbolKind::ObjectiveCClassEHType, - Symbol.value.drop_front(15), Section.Architectures); + Symbol.value.drop_front(15), Targets); else - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures); + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets); } for (auto &Symbol : Section.Classes) { auto Name = Symbol.value; if (Ctx->FileKind != FileType::TBD_V3) Name = Name.drop_front(); - File->addSymbol(SymbolKind::ObjectiveCClass, Name, - Section.Architectures); + File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets); } for (auto &Symbol : Section.ClassEHs) - File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, - Section.Architectures); + File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets); for (auto &Symbol : Section.IVars) { auto Name = Symbol.value; if (Ctx->FileKind != FileType::TBD_V3) Name = Name.drop_front(); File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, - Section.Architectures); + Targets); } for (auto &Symbol : Section.WeakDefSymbols) - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures, SymbolFlags::WeakDefined); + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, + SymbolFlags::WeakDefined); for (auto &Symbol : Section.TLVSymbols) - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures, SymbolFlags::ThreadLocalValue); + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, + SymbolFlags::ThreadLocalValue); } for (const auto &Section : Undefineds) { + const auto Targets = + synthesizeTargets(Section.Architectures, Platforms); for (auto &Symbol : Section.Symbols) { if (Ctx->FileKind != FileType::TBD_V3 && Symbol.value.startswith("_OBJC_EHTYPE_$_")) File->addSymbol(SymbolKind::ObjectiveCClassEHType, - Symbol.value.drop_front(15), Section.Architectures, + Symbol.value.drop_front(15), Targets, SymbolFlags::Undefined); else - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures, SymbolFlags::Undefined); + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, + SymbolFlags::Undefined); } for (auto &Symbol : Section.Classes) { auto Name = Symbol.value; if (Ctx->FileKind != FileType::TBD_V3) Name = Name.drop_front(); - File->addSymbol(SymbolKind::ObjectiveCClass, Name, - Section.Architectures, SymbolFlags::Undefined); + File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets, + SymbolFlags::Undefined); } for (auto &Symbol : Section.ClassEHs) - File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, - Section.Architectures, SymbolFlags::Undefined); + File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets, + SymbolFlags::Undefined); for (auto &Symbol : Section.IVars) { auto Name = Symbol.value; if (Ctx->FileKind != FileType::TBD_V3) Name = Name.drop_front(); - File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, - Section.Architectures, SymbolFlags::Undefined); + File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, Targets, + SymbolFlags::Undefined); } for (auto &Symbol : Section.WeakRefSymbols) - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures, + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, SymbolFlags::Undefined | SymbolFlags::WeakReferenced); } @@ -513,7 +730,7 @@ template <> struct MappingTraits { std::vector Architectures; std::vector UUIDs; - PlatformKind Platform{PlatformKind::unknown}; + PlatformSet Platforms; StringRef InstallName; PackedVersion CurrentVersion; PackedVersion CompatibilityVersion; @@ -525,71 +742,336 @@ template <> struct MappingTraits { std::vector Undefineds; }; + static void setFileTypeForInput(TextAPIContext *Ctx, IO &IO) { + if (IO.mapTag("!tapi-tbd", false)) + Ctx->FileKind = FileType::TBD_V4; + else if (IO.mapTag("!tapi-tbd-v3", false)) + Ctx->FileKind = FileType::TBD_V3; + else if (IO.mapTag("!tapi-tbd-v2", false)) + Ctx->FileKind = FileType::TBD_V2; + else if (IO.mapTag("!tapi-tbd-v1", false) || + IO.mapTag("tag:yaml.org,2002:map", false)) + Ctx->FileKind = FileType::TBD_V1; + else { + Ctx->FileKind = FileType::Invalid; + return; + } + } + static void mapping(IO &IO, const InterfaceFile *&File) { auto *Ctx = reinterpret_cast(IO.getContext()); assert((!Ctx || !IO.outputting() || (Ctx && Ctx->FileKind != FileType::Invalid)) && "File type is not set in YAML context"); - MappingNormalization Keys(IO, File); - // prope file type when reading. if (!IO.outputting()) { - if (IO.mapTag("!tapi-tbd-v2", false)) - Ctx->FileKind = FileType::TBD_V2; - else if (IO.mapTag("!tapi-tbd-v3", false)) - Ctx->FileKind = FileType::TBD_V2; - else if (IO.mapTag("!tapi-tbd-v1", false) || - IO.mapTag("tag:yaml.org,2002:map", false)) - Ctx->FileKind = FileType::TBD_V1; - else { + setFileTypeForInput(Ctx, IO); + switch (Ctx->FileKind) { + default: + break; + case FileType::TBD_V4: + mapKeysToValuesV4(IO, File); + return; + case FileType::Invalid: IO.setError("unsupported file type"); return; } - } - - // Set file tyoe when writing. - if (IO.outputting()) { + } else { + // Set file type when writing. switch (Ctx->FileKind) { default: llvm_unreachable("unexpected file type"); - case FileType::TBD_V1: - // Don't write the tag into the .tbd file for TBD v1. + case FileType::TBD_V4: + mapKeysToValuesV4(IO, File); + return; + case FileType::TBD_V3: + IO.mapTag("!tapi-tbd-v3", true); break; case FileType::TBD_V2: IO.mapTag("!tapi-tbd-v2", true); break; - case FileType::TBD_V3: - IO.mapTag("!tapi-tbd-v3", true); + case FileType::TBD_V1: + // Don't write the tag into the .tbd file for TBD v1 break; } } + mapKeysToValues(Ctx->FileKind, IO, File); + } + + using SectionList = std::vector; + struct NormalizedTBD_V4 { + explicit NormalizedTBD_V4(IO &IO) {} + NormalizedTBD_V4(IO &IO, const InterfaceFile *&File) { + auto Ctx = reinterpret_cast(IO.getContext()); + assert(Ctx); + TBDVersion = Ctx->FileKind >> 1; + Targets.insert(Targets.begin(), File->targets().begin(), + File->targets().end()); + for (const auto &IT : File->uuids()) + UUIDs.emplace_back(IT.first, IT.second); + InstallName = File->getInstallName(); + CurrentVersion = File->getCurrentVersion(); + CompatibilityVersion = File->getCompatibilityVersion(); + SwiftABIVersion = File->getSwiftABIVersion(); + + Flags = TBDFlags::None; + if (!File->isApplicationExtensionSafe()) + Flags |= TBDFlags::NotApplicationExtensionSafe; + + if (!File->isTwoLevelNamespace()) + Flags |= TBDFlags::FlatNamespace; + + if (File->isInstallAPI()) + Flags |= TBDFlags::InstallAPI; + + { + std::map valueToTargetList; + for (const auto &it : File->umbrellas()) + valueToTargetList[it.second].emplace_back(it.first); + + for (const auto &it : valueToTargetList) { + UmbrellaSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + it.second.begin(), it.second.end()); + CurrentSection.Umbrella = it.first; + ParentUmbrellas.emplace_back(std::move(CurrentSection)); + } + } + + assignTargetsToLibrary(File->allowableClients(), AllowableClients); + assignTargetsToLibrary(File->reexportedLibraries(), ReexportedLibraries); + + auto handleSymbols = + [](SectionList &CurrentSections, + InterfaceFile::const_filtered_symbol_range Symbols, + std::function Pred) { + std::set TargetSet; + std::map SymbolToTargetList; + for (const auto *Symbol : Symbols) { + if (!Pred(Symbol)) + continue; + TargetList Targets(Symbol->targets()); + SymbolToTargetList[Symbol] = Targets; + TargetSet.emplace(std::move(Targets)); + } + for (const auto &TargetIDs : TargetSet) { + SymbolSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + TargetIDs.begin(), TargetIDs.end()); + + for (const auto &IT : SymbolToTargetList) { + if (IT.second != TargetIDs) + continue; + + const auto *Symbol = IT.first; + switch (Symbol->getKind()) { + case SymbolKind::GlobalSymbol: + if (Symbol->isWeakDefined()) + CurrentSection.WeakSymbols.emplace_back(Symbol->getName()); + else if (Symbol->isThreadLocalValue()) + CurrentSection.TlvSymbols.emplace_back(Symbol->getName()); + else + CurrentSection.Symbols.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCClass: + CurrentSection.Classes.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCClassEHType: + CurrentSection.ClassEHs.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCInstanceVariable: + CurrentSection.Ivars.emplace_back(Symbol->getName()); + break; + } + } + sort(CurrentSection.Symbols); + sort(CurrentSection.Classes); + sort(CurrentSection.ClassEHs); + sort(CurrentSection.Ivars); + sort(CurrentSection.WeakSymbols); + sort(CurrentSection.TlvSymbols); + CurrentSections.emplace_back(std::move(CurrentSection)); + } + }; + + handleSymbols(Exports, File->exports(), [](const Symbol *Symbol) { + return !Symbol->isReexported(); + }); + handleSymbols(Reexports, File->exports(), [](const Symbol *Symbol) { + return Symbol->isReexported(); + }); + handleSymbols(Undefineds, File->undefineds(), + [](const Symbol *Symbol) { return true; }); + } + + const InterfaceFile *denormalize(IO &IO) { + auto Ctx = reinterpret_cast(IO.getContext()); + assert(Ctx); + + auto *File = new InterfaceFile; + File->setPath(Ctx->Path); + File->setFileType(Ctx->FileKind); + for (auto &id : UUIDs) + File->addUUID(id.TargetID, id.Value); + File->addTargets(Targets); + File->setInstallName(InstallName); + File->setCurrentVersion(CurrentVersion); + File->setCompatibilityVersion(CompatibilityVersion); + File->setSwiftABIVersion(SwiftABIVersion); + for (const auto &CurrentSection : ParentUmbrellas) + for (const auto &target : CurrentSection.Targets) + File->addParentUmbrella(target, CurrentSection.Umbrella); + File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace)); + File->setApplicationExtensionSafe( + !(Flags & TBDFlags::NotApplicationExtensionSafe)); + File->setInstallAPI(Flags & TBDFlags::InstallAPI); + + for (const auto &CurrentSection : AllowableClients) { + for (const auto &lib : CurrentSection.Values) + for (const auto &Target : CurrentSection.Targets) + File->addAllowableClient(lib, Target); + } + + for (const auto &CurrentSection : ReexportedLibraries) { + for (const auto &Lib : CurrentSection.Values) + for (const auto &Target : CurrentSection.Targets) + File->addReexportedLibrary(Lib, Target); + } + + auto handleSymbols = [File](const SectionList &CurrentSections, + SymbolFlags Flag = SymbolFlags::None) { + for (const auto &CurrentSection : CurrentSections) { + for (auto &sym : CurrentSection.Symbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets, Flag); + + for (auto &sym : CurrentSection.Classes) + File->addSymbol(SymbolKind::ObjectiveCClass, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.ClassEHs) + File->addSymbol(SymbolKind::ObjectiveCClassEHType, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.Ivars) + File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.WeakSymbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets); + for (auto &sym : CurrentSection.TlvSymbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets, + SymbolFlags::ThreadLocalValue); + } + }; + + handleSymbols(Exports); + handleSymbols(Reexports, SymbolFlags::Rexported); + handleSymbols(Undefineds, SymbolFlags::Undefined); + + return File; + } + + unsigned TBDVersion; + std::vector UUIDs; + TargetList Targets; + StringRef InstallName; + PackedVersion CurrentVersion; + PackedVersion CompatibilityVersion; + SwiftVersion SwiftABIVersion{0}; + std::vector AllowableClients; + std::vector ReexportedLibraries; + TBDFlags Flags{TBDFlags::None}; + std::vector ParentUmbrellas; + SectionList Exports; + SectionList Reexports; + SectionList Undefineds; + + private: + void assignTargetsToLibrary(const std::vector &Libraries, + std::vector &Section) { + std::set targetSet; + std::map valueToTargetList; + for (const auto &library : Libraries) { + TargetList targets(library.targets()); + valueToTargetList[&library] = targets; + targetSet.emplace(std::move(targets)); + } + + for (const auto &targets : targetSet) { + MetadataSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + targets.begin(), targets.end()); + + for (const auto &it : valueToTargetList) { + if (it.second != targets) + continue; + + CurrentSection.Values.emplace_back(it.first->getInstallName()); + } + llvm::sort(CurrentSection.Values); + Section.emplace_back(std::move(CurrentSection)); + } + } + }; + static void mapKeysToValues(FileType FileKind, IO &IO, + const InterfaceFile *&File) { + MappingNormalization Keys(IO, File); IO.mapRequired("archs", Keys->Architectures); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("uuids", Keys->UUIDs); - IO.mapRequired("platform", Keys->Platform); - if (Ctx->FileKind != FileType::TBD_V1) + IO.mapRequired("platform", Keys->Platforms); + if (FileKind != FileType::TBD_V1) IO.mapOptional("flags", Keys->Flags, TBDFlags::None); IO.mapRequired("install-name", Keys->InstallName); IO.mapOptional("current-version", Keys->CurrentVersion, PackedVersion(1, 0, 0)); IO.mapOptional("compatibility-version", Keys->CompatibilityVersion, PackedVersion(1, 0, 0)); - if (Ctx->FileKind != FileType::TBD_V3) + if (FileKind != FileType::TBD_V3) IO.mapOptional("swift-version", Keys->SwiftABIVersion, SwiftVersion(0)); else IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion, SwiftVersion(0)); IO.mapOptional("objc-constraint", Keys->ObjCConstraint, - (Ctx->FileKind == FileType::TBD_V1) + (FileKind == FileType::TBD_V1) ? ObjCConstraintType::None : ObjCConstraintType::Retain_Release); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("parent-umbrella", Keys->ParentUmbrella, StringRef()); IO.mapOptional("exports", Keys->Exports); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("undefineds", Keys->Undefineds); } + + static void mapKeysToValuesV4(IO &IO, const InterfaceFile *&File) { + MappingNormalization Keys(IO, + File); + IO.mapTag("!tapi-tbd", true); + IO.mapRequired("tbd-version", Keys->TBDVersion); + IO.mapRequired("targets", Keys->Targets); + IO.mapOptional("uuids", Keys->UUIDs); + IO.mapOptional("flags", Keys->Flags, TBDFlags::None); + IO.mapRequired("install-name", Keys->InstallName); + IO.mapOptional("current-version", Keys->CurrentVersion, + PackedVersion(1, 0, 0)); + IO.mapOptional("compatibility-version", Keys->CompatibilityVersion, + PackedVersion(1, 0, 0)); + IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion, SwiftVersion(0)); + IO.mapOptional("parent-umbrella", Keys->ParentUmbrellas); + auto OptionKind = MetadataSection::Option::Clients; + IO.mapOptionalWithContext("allowable-clients", Keys->AllowableClients, + OptionKind); + OptionKind = MetadataSection::Option::Libraries; + IO.mapOptionalWithContext("reexported-libraries", Keys->ReexportedLibraries, + OptionKind); + IO.mapOptional("exports", Keys->Exports); + IO.mapOptional("reexports", Keys->Reexports); + IO.mapOptional("undefineds", Keys->Undefineds); + } }; template <> @@ -623,15 +1105,17 @@ static void DiagHandler(const SMDiagnostic &Diag, void *Context) { } Expected> -TextAPIReader::get(std::unique_ptr InputBuffer) { +TextAPIReader::get(MemoryBufferRef InputBuffer) { TextAPIContext Ctx; - Ctx.Path = InputBuffer->getBufferIdentifier(); - yaml::Input YAMLIn(InputBuffer->getBuffer(), &Ctx, DiagHandler, &Ctx); + Ctx.Path = InputBuffer.getBufferIdentifier(); + yaml::Input YAMLIn(InputBuffer.getBuffer(), &Ctx, DiagHandler, &Ctx); // Fill vector with interface file objects created by parsing the YAML file. std::vector Files; YAMLIn >> Files; + // YAMLIn dynamically allocates for Interface file and in case of error, + // memory leak will occur unless wrapped around unique_ptr auto File = std::unique_ptr( const_cast(Files.front())); diff --git a/lib/TextAPI/MachO/TextStubCommon.cpp b/lib/TextAPI/MachO/TextStubCommon.cpp index 00382cd24573..183c5d5a93b0 100644 --- a/lib/TextAPI/MachO/TextStubCommon.cpp +++ b/lib/TextAPI/MachO/TextStubCommon.cpp @@ -41,9 +41,21 @@ void ScalarEnumerationTraits::enumeration( IO.enumCase(Constraint, "gc", ObjCConstraintType::GC); } -void ScalarTraits::output(const PlatformKind &Value, void *, - raw_ostream &OS) { - switch (Value) { +void ScalarTraits::output(const PlatformSet &Values, void *IO, + raw_ostream &OS) { + + const auto *Ctx = reinterpret_cast(IO); + assert((!Ctx || Ctx->FileKind != FileType::Invalid) && + "File type is not set in context"); + + if (Ctx && Ctx->FileKind == TBD_V3 && Values.count(PlatformKind::macOS) && + Values.count(PlatformKind::macCatalyst)) { + OS << "zippered"; + return; + } + + assert(Values.size() == 1U); + switch (*Values.begin()) { default: llvm_unreachable("unexpected platform"); break; @@ -64,21 +76,44 @@ void ScalarTraits::output(const PlatformKind &Value, void *, break; } } -StringRef ScalarTraits::input(StringRef Scalar, void *, - PlatformKind &Value) { - Value = StringSwitch(Scalar) - .Case("macosx", PlatformKind::macOS) - .Case("ios", PlatformKind::iOS) - .Case("watchos", PlatformKind::watchOS) - .Case("tvos", PlatformKind::tvOS) - .Case("bridgeos", PlatformKind::bridgeOS) - .Default(PlatformKind::unknown); - - if (Value == PlatformKind::unknown) + +StringRef ScalarTraits::input(StringRef Scalar, void *IO, + PlatformSet &Values) { + const auto *Ctx = reinterpret_cast(IO); + assert((!Ctx || Ctx->FileKind != FileType::Invalid) && + "File type is not set in context"); + + if (Scalar == "zippered") { + if (Ctx && Ctx->FileKind == FileType::TBD_V3) { + Values.insert(PlatformKind::macOS); + Values.insert(PlatformKind::macCatalyst); + return {}; + } + return "invalid platform"; + } + + auto Platform = StringSwitch(Scalar) + .Case("unknown", PlatformKind::unknown) + .Case("macosx", PlatformKind::macOS) + .Case("ios", PlatformKind::iOS) + .Case("watchos", PlatformKind::watchOS) + .Case("tvos", PlatformKind::tvOS) + .Case("bridgeos", PlatformKind::bridgeOS) + .Case("iosmac", PlatformKind::macCatalyst) + .Default(PlatformKind::unknown); + + if (Platform == PlatformKind::macCatalyst) + if (Ctx && Ctx->FileKind != FileType::TBD_V3) + return "invalid platform"; + + if (Platform == PlatformKind::unknown) return "unknown platform"; + + Values.insert(Platform); return {}; } -QuotingType ScalarTraits::mustQuote(StringRef) { + +QuotingType ScalarTraits::mustQuote(StringRef) { return QuotingType::None; } @@ -137,14 +172,25 @@ void ScalarTraits::output(const SwiftVersion &Value, void *, break; } } -StringRef ScalarTraits::input(StringRef Scalar, void *, +StringRef ScalarTraits::input(StringRef Scalar, void *IO, SwiftVersion &Value) { - Value = StringSwitch(Scalar) - .Case("1.0", 1) - .Case("1.1", 2) - .Case("2.0", 3) - .Case("3.0", 4) - .Default(0); + const auto *Ctx = reinterpret_cast(IO); + assert((!Ctx || Ctx->FileKind != FileType::Invalid) && + "File type is not set in context"); + + if (Ctx->FileKind == FileType::TBD_V4) { + if (Scalar.getAsInteger(10, Value)) + return "invalid Swift ABI version."; + return {}; + } else { + Value = StringSwitch(Scalar) + .Case("1.0", 1) + .Case("1.1", 2) + .Case("2.0", 3) + .Case("3.0", 4) + .Default(0); + } + if (Value != SwiftVersion(0)) return {}; @@ -166,10 +212,11 @@ StringRef ScalarTraits::input(StringRef Scalar, void *, UUID &Value) { auto UUID = Split.second.trim(); if (UUID.empty()) return "invalid uuid string pair"; - Value.first = getArchitectureFromName(Arch); Value.second = UUID; + Value.first = Target{getArchitectureFromName(Arch), PlatformKind::unknown}; return {}; } + QuotingType ScalarTraits::mustQuote(StringRef) { return QuotingType::Single; } diff --git a/lib/TextAPI/MachO/TextStubCommon.h b/lib/TextAPI/MachO/TextStubCommon.h index c4dd1075b1c8..a558cbcec9fb 100644 --- a/lib/TextAPI/MachO/TextStubCommon.h +++ b/lib/TextAPI/MachO/TextStubCommon.h @@ -21,7 +21,7 @@ #include "llvm/TextAPI/MachO/InterfaceFile.h" #include "llvm/TextAPI/MachO/PackedVersion.h" -using UUID = std::pair; +using UUID = std::pair; LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, FlowStringRef) LLVM_YAML_STRONG_TYPEDEF(uint8_t, SwiftVersion) @@ -41,9 +41,9 @@ template <> struct ScalarEnumerationTraits { static void enumeration(IO &, MachO::ObjCConstraintType &); }; -template <> struct ScalarTraits { - static void output(const MachO::PlatformKind &, void *, raw_ostream &); - static StringRef input(StringRef, void *, MachO::PlatformKind &); +template <> struct ScalarTraits { + static void output(const MachO::PlatformSet &, void *, raw_ostream &); + static StringRef input(StringRef, void *, MachO::PlatformSet &); static QuotingType mustQuote(StringRef); }; diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index 0b406cc531a4..19f253be7952 100644 --- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -74,13 +74,6 @@ static MachineTypes getEmulation(StringRef S) { .Default(IMAGE_FILE_MACHINE_UNKNOWN); } -static std::string getImplibPath(StringRef Path) { - SmallString<128> Out = StringRef("lib"); - Out.append(Path); - sys::path::replace_extension(Out, ".a"); - return Out.str(); -} - int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { DllOptTable Table; unsigned MissingIndex; @@ -149,13 +142,23 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { Def->OutputFile = Arg->getValue(); if (Def->OutputFile.empty()) { - llvm::errs() << "no output file specified\n"; + llvm::errs() << "no DLL name specified\n"; return 1; } std::string Path = Args.getLastArgValue(OPT_l); - if (Path.empty()) - Path = getImplibPath(Def->OutputFile); + + // If ExtName is set (if the "ExtName = Name" syntax was used), overwrite + // Name with ExtName and clear ExtName. When only creating an import + // library and not linking, the internal name is irrelevant. This avoids + // cases where writeImportLibrary tries to transplant decoration from + // symbol decoration onto ExtName. + for (COFFShortExport& E : Def->Exports) { + if (!E.ExtName.empty()) { + E.Name = E.ExtName; + E.ExtName.clear(); + } + } if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) { for (COFFShortExport& E : Def->Exports) { @@ -174,7 +177,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { } } - if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) + if (!Path.empty() && + writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) return 1; return 0; } diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 18ab6637305e..286191abff20 100644 --- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -13,6 +13,7 @@ #include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -141,6 +142,125 @@ static void doList(opt::InputArgList& Args) { fatalOpenError(std::move(Err), B->getBufferIdentifier()); } +static COFF::MachineTypes getCOFFFileMachine(MemoryBufferRef MB) { + std::error_code EC; + object::COFFObjectFile Obj(MB, EC); + if (EC) { + llvm::errs() << MB.getBufferIdentifier() + << ": failed to open: " << EC.message() << '\n'; + exit(1); + } + + uint16_t Machine = Obj.getMachine(); + if (Machine != COFF::IMAGE_FILE_MACHINE_I386 && + Machine != COFF::IMAGE_FILE_MACHINE_AMD64 && + Machine != COFF::IMAGE_FILE_MACHINE_ARMNT && + Machine != COFF::IMAGE_FILE_MACHINE_ARM64) { + llvm::errs() << MB.getBufferIdentifier() << ": unknown machine: " << Machine + << '\n'; + exit(1); + } + + return static_cast(Machine); +} + +static COFF::MachineTypes getBitcodeFileMachine(MemoryBufferRef MB) { + Expected TripleStr = getBitcodeTargetTriple(MB); + if (!TripleStr) { + llvm::errs() << MB.getBufferIdentifier() + << ": failed to get target triple from bitcode\n"; + exit(1); + } + + switch (Triple(*TripleStr).getArch()) { + case Triple::x86: + return COFF::IMAGE_FILE_MACHINE_I386; + case Triple::x86_64: + return COFF::IMAGE_FILE_MACHINE_AMD64; + case Triple::arm: + return COFF::IMAGE_FILE_MACHINE_ARMNT; + case Triple::aarch64: + return COFF::IMAGE_FILE_MACHINE_ARM64; + default: + llvm::errs() << MB.getBufferIdentifier() + << ": unknown arch in target triple " << *TripleStr << '\n'; + exit(1); + } +} + +static void appendFile(std::vector &Members, + COFF::MachineTypes &LibMachine, + std::string &LibMachineSource, MemoryBufferRef MB) { + file_magic Magic = identify_magic(MB.getBuffer()); + + if (Magic != file_magic::coff_object && Magic != file_magic::bitcode && + Magic != file_magic::archive && Magic != file_magic::windows_resource) { + llvm::errs() << MB.getBufferIdentifier() + << ": not a COFF object, bitcode, archive or resource file\n"; + exit(1); + } + + // If a user attempts to add an archive to another archive, llvm-lib doesn't + // handle the first archive file as a single file. Instead, it extracts all + // members from the archive and add them to the second archive. This beahvior + // is for compatibility with Microsoft's lib command. + if (Magic == file_magic::archive) { + Error Err = Error::success(); + object::Archive Archive(MB, Err); + fatalOpenError(std::move(Err), MB.getBufferIdentifier()); + + for (auto &C : Archive.children(Err)) { + Expected ChildMB = C.getMemoryBufferRef(); + if (!ChildMB) { + handleAllErrors(ChildMB.takeError(), [&](const ErrorInfoBase &EIB) { + llvm::errs() << MB.getBufferIdentifier() << ": " << EIB.message() + << "\n"; + }); + exit(1); + } + + appendFile(Members, LibMachine, LibMachineSource, *ChildMB); + } + + fatalOpenError(std::move(Err), MB.getBufferIdentifier()); + return; + } + + // Check that all input files have the same machine type. + // Mixing normal objects and LTO bitcode files is fine as long as they + // have the same machine type. + // Doing this here duplicates the header parsing work that writeArchive() + // below does, but it's not a lot of work and it's a bit awkward to do + // in writeArchive() which needs to support many tools, can't assume the + // input is COFF, and doesn't have a good way to report errors. + if (Magic == file_magic::coff_object || Magic == file_magic::bitcode) { + COFF::MachineTypes FileMachine = (Magic == file_magic::coff_object) + ? getCOFFFileMachine(MB) + : getBitcodeFileMachine(MB); + + // FIXME: Once lld-link rejects multiple resource .obj files: + // Call convertResToCOFF() on .res files and add the resulting + // COFF file to the .lib output instead of adding the .res file, and remove + // this check. See PR42180. + if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) { + if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) { + LibMachine = FileMachine; + LibMachineSource = + (" (inferred from earlier file '" + MB.getBufferIdentifier() + "')") + .str(); + } else if (LibMachine != FileMachine) { + llvm::errs() << MB.getBufferIdentifier() << ": file machine type " + << machineToStr(FileMachine) + << " conflicts with library machine type " + << machineToStr(LibMachine) << LibMachineSource << '\n'; + exit(1); + } + } + } + + Members.emplace_back(MB); +} + int llvm::libDriverMain(ArrayRef ArgsArr) { BumpPtrAllocator Alloc; StringSaver Saver(Alloc); @@ -195,104 +315,40 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { std::string(" (from '/machine:") + Arg->getValue() + "' flag)"; } - // Create a NewArchiveMember for each input file. + std::vector> MBs; + StringSet<> Seen; std::vector Members; + + // Create a NewArchiveMember for each input file. for (auto *Arg : Args.filtered(OPT_INPUT)) { + // Find a file std::string Path = findInputFile(Arg->getValue(), SearchPaths); if (Path.empty()) { llvm::errs() << Arg->getValue() << ": no such file or directory\n"; return 1; } - Expected MOrErr = - NewArchiveMember::getFile(Saver.save(Path), /*Deterministic=*/true); - if (!MOrErr) { - handleAllErrors(MOrErr.takeError(), [&](const ErrorInfoBase &EIB) { - llvm::errs() << Arg->getValue() << ": " << EIB.message() << "\n"; - }); - return 1; - } - - file_magic Magic = identify_magic(MOrErr->Buf->getBuffer()); - if (Magic != file_magic::coff_object && Magic != file_magic::bitcode && - Magic != file_magic::windows_resource) { - llvm::errs() << Arg->getValue() - << ": not a COFF object, bitcode or resource file\n"; - return 1; - } - - // Check that all input files have the same machine type. - // Mixing normal objects and LTO bitcode files is fine as long as they - // have the same machine type. - // Doing this here duplicates the header parsing work that writeArchive() - // below does, but it's not a lot of work and it's a bit awkward to do - // in writeArchive() which needs to support many tools, can't assume the - // input is COFF, and doesn't have a good way to report errors. - COFF::MachineTypes FileMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN; - if (Magic == file_magic::coff_object) { - std::error_code EC; - object::COFFObjectFile Obj(*MOrErr->Buf, EC); - if (EC) { - llvm::errs() << Arg->getValue() << ": failed to open: " << EC.message() - << '\n'; - return 1; - } - uint16_t Machine = Obj.getMachine(); - if (Machine != COFF::IMAGE_FILE_MACHINE_I386 && - Machine != COFF::IMAGE_FILE_MACHINE_AMD64 && - Machine != COFF::IMAGE_FILE_MACHINE_ARMNT && - Machine != COFF::IMAGE_FILE_MACHINE_ARM64) { - llvm::errs() << Arg->getValue() << ": unknown machine: " << Machine - << '\n'; - return 1; - } - FileMachine = static_cast(Machine); - } else if (Magic == file_magic::bitcode) { - Expected TripleStr = getBitcodeTargetTriple(*MOrErr->Buf); - if (!TripleStr) { - llvm::errs() << Arg->getValue() - << ": failed to get target triple from bitcode\n"; - return 1; - } - switch (Triple(*TripleStr).getArch()) { - case Triple::x86: - FileMachine = COFF::IMAGE_FILE_MACHINE_I386; - break; - case Triple::x86_64: - FileMachine = COFF::IMAGE_FILE_MACHINE_AMD64; - break; - case Triple::arm: - FileMachine = COFF::IMAGE_FILE_MACHINE_ARMNT; - break; - case Triple::aarch64: - FileMachine = COFF::IMAGE_FILE_MACHINE_ARM64; - break; - default: - llvm::errs() << Arg->getValue() << ": unknown arch in target triple " - << *TripleStr << '\n'; - return 1; - } - } - - // FIXME: Once lld-link rejects multiple resource .obj files: - // Call convertResToCOFF() on .res files and add the resulting - // COFF file to the .lib output instead of adding the .res file, and remove - // this check. See PR42180. - if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) { - if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) { - LibMachine = FileMachine; - LibMachineSource = std::string(" (inferred from earlier file '") + - Arg->getValue() + "')"; - } else if (LibMachine != FileMachine) { - llvm::errs() << Arg->getValue() << ": file machine type " - << machineToStr(FileMachine) - << " conflicts with library machine type " - << machineToStr(LibMachine) << LibMachineSource << '\n'; - return 1; - } - } - - Members.emplace_back(std::move(*MOrErr)); + // Input files are uniquified by pathname. If you specify the exact same + // path more than once, all but the first one are ignored. + // + // Note that there's a loophole in the rule; you can prepend `.\` or + // something like that to a path to make it look different, and they are + // handled as if they were different files. This behavior is compatible with + // Microsoft lib.exe. + if (!Seen.insert(Path).second) + continue; + + // Open a file. + ErrorOr> MOrErr = + MemoryBuffer::getFile(Path, -1, false); + fatalOpenError(errorCodeToError(MOrErr.getError()), Path); + MemoryBufferRef MBRef = (*MOrErr)->getMemBufferRef(); + + // Append a file. + appendFile(Members, LibMachine, LibMachineSource, MBRef); + + // Take the ownership of the file buffer to keep the file open. + MBs.push_back(std::move(*MOrErr)); } // Create an archive file. diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 06222d7e7e44..a24de3ca213f 100644 --- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -121,14 +121,13 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) { BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1); BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1); Instruction *TermI = GuardBB->getTerminator(); - BasicBlock *TrueBB, *FalseBB; ICmpInst::Predicate Pred; - if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), TrueBB, - FalseBB))) + BasicBlock *PhiBB = Phi.getParent(); + if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), + m_SpecificBB(PhiBB), m_SpecificBB(RotBB)))) return false; - BasicBlock *PhiBB = Phi.getParent(); - if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB) + if (Pred != CmpInst::ICMP_EQ) return false; // We matched a variation of this IR pattern: @@ -251,6 +250,72 @@ static bool foldAnyOrAllBitsSet(Instruction &I) { return true; } +// Try to recognize below function as popcount intrinsic. +// This is the "best" algorithm from +// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel +// Also used in TargetLowering::expandCTPOP(). +// +// int popcount(unsigned int i) { +// i = i - ((i >> 1) & 0x55555555); +// i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +// i = ((i + (i >> 4)) & 0x0F0F0F0F); +// return (i * 0x01010101) >> 24; +// } +static bool tryToRecognizePopCount(Instruction &I) { + if (I.getOpcode() != Instruction::LShr) + return false; + + Type *Ty = I.getType(); + if (!Ty->isIntOrIntVectorTy()) + return false; + + unsigned Len = Ty->getScalarSizeInBits(); + // FIXME: fix Len == 8 and other irregular type lengths. + if (!(Len <= 128 && Len > 8 && Len % 8 == 0)) + return false; + + APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55)); + APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33)); + APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F)); + APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01)); + APInt MaskShift = APInt(Len, Len - 8); + + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *MulOp0; + // Matching "(i * 0x01010101...) >> 24". + if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) && + match(Op1, m_SpecificInt(MaskShift))) { + Value *ShiftOp0; + // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)". + if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)), + m_Deferred(ShiftOp0)), + m_SpecificInt(Mask0F)))) { + Value *AndOp0; + // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)". + if (match(ShiftOp0, + m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)), + m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)), + m_SpecificInt(Mask33))))) { + Value *Root, *SubOp1; + // Matching "i - ((i >> 1) & 0x55555555...)". + if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) && + match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)), + m_SpecificInt(Mask55)))) { + LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n"); + IRBuilder<> Builder(&I); + Function *Func = Intrinsic::getDeclaration( + I.getModule(), Intrinsic::ctpop, I.getType()); + I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); + return true; + } + } + } + } + + return false; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. @@ -269,6 +334,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedRotateToFunnelShift(I); + MadeChange |= tryToRecognizePopCount(I); } } @@ -303,7 +369,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage( } bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); return runImpl(F, TLI, DT); } diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp index 1fb0a114d0c7..c3e05577f044 100644 --- a/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -73,6 +73,8 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) { II->replaceAllUsesWith(ConstantInt::getTrue(Context)); break; case Intrinsic::coro_id: + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: II->replaceAllUsesWith(ConstantTokenNone::get(Context)); break; case Intrinsic::coro_subfn_addr: @@ -111,8 +113,9 @@ struct CoroCleanup : FunctionPass { bool doInitialization(Module &M) override { if (coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr", "llvm.coro.free", - "llvm.coro.id"})) - L = llvm::make_unique(M); + "llvm.coro.id", "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once"})) + L = std::make_unique(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp index 692697d6f32e..55993d33ee4e 100644 --- a/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/lib/Transforms/Coroutines/CoroEarly.cpp @@ -91,13 +91,14 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) { Value *Operand = II->getArgOperand(0); // ResumeFnAddr is the first pointer sized element of the coroutine frame. + static_assert(coro::Shape::SwitchFieldIndex::Resume == 0, + "resume function not at offset zero"); auto *FrameTy = Int8Ptr; PointerType *FramePtrTy = FrameTy->getPointerTo(); Builder.SetInsertPoint(II); auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy); - auto *Gep = Builder.CreateConstInBoundsGEP1_32(FrameTy, BCI, 0); - auto *Load = Builder.CreateLoad(FrameTy, Gep); + auto *Load = Builder.CreateLoad(BCI); auto *Cond = Builder.CreateICmpEQ(Load, NullPtr); II->replaceAllUsesWith(Cond); @@ -189,6 +190,10 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { } } break; + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: + F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); + break; case Intrinsic::coro_resume: lowerResumeOrDestroy(CS, CoroSubFnInst::ResumeIndex); break; @@ -231,11 +236,18 @@ struct CoroEarly : public FunctionPass { // This pass has work to do only if we find intrinsics we are going to lower // in the module. bool doInitialization(Module &M) override { - if (coro::declaresIntrinsics( - M, {"llvm.coro.id", "llvm.coro.destroy", "llvm.coro.done", - "llvm.coro.end", "llvm.coro.noop", "llvm.coro.free", - "llvm.coro.promise", "llvm.coro.resume", "llvm.coro.suspend"})) - L = llvm::make_unique(M); + if (coro::declaresIntrinsics(M, {"llvm.coro.id", + "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once", + "llvm.coro.destroy", + "llvm.coro.done", + "llvm.coro.end", + "llvm.coro.noop", + "llvm.coro.free", + "llvm.coro.promise", + "llvm.coro.resume", + "llvm.coro.suspend"})) + L = std::make_unique(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp index 6707aa1c827d..aca77119023b 100644 --- a/lib/Transforms/Coroutines/CoroElide.cpp +++ b/lib/Transforms/Coroutines/CoroElide.cpp @@ -286,7 +286,7 @@ struct CoroElide : FunctionPass { bool doInitialization(Module &M) override { if (coro::declaresIntrinsics(M, {"llvm.coro.id"})) - L = llvm::make_unique(M); + L = std::make_unique(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp index 58bf22bee29b..2c42cf8a6d25 100644 --- a/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/lib/Transforms/Coroutines/CoroFrame.cpp @@ -18,6 +18,7 @@ #include "CoroInternal.h" #include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" @@ -28,6 +29,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/circular_raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" using namespace llvm; @@ -120,6 +122,15 @@ struct SuspendCrossingInfo { return false; BasicBlock *UseBB = I->getParent(); + + // As a special case, treat uses by an llvm.coro.suspend.retcon + // as if they were uses in the suspend's single predecessor: the + // uses conceptually occur before the suspend. + if (isa(I)) { + UseBB = UseBB->getSinglePredecessor(); + assert(UseBB && "should have split coro.suspend into its own block"); + } + return hasPathCrossingSuspendPoint(DefBB, UseBB); } @@ -128,7 +139,17 @@ struct SuspendCrossingInfo { } bool isDefinitionAcrossSuspend(Instruction &I, User *U) const { - return isDefinitionAcrossSuspend(I.getParent(), U); + auto *DefBB = I.getParent(); + + // As a special case, treat values produced by an llvm.coro.suspend.* + // as if they were defined in the single successor: the uses + // conceptually occur after the suspend. + if (isa(I)) { + DefBB = DefBB->getSingleSuccessor(); + assert(DefBB && "should have split coro.suspend into its own block"); + } + + return isDefinitionAcrossSuspend(DefBB, U); } }; } // end anonymous namespace @@ -183,9 +204,10 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) B.Suspend = true; B.Kills |= B.Consumes; }; - for (CoroSuspendInst *CSI : Shape.CoroSuspends) { + for (auto *CSI : Shape.CoroSuspends) { markSuspendBlock(CSI); - markSuspendBlock(CSI->getCoroSave()); + if (auto *Save = CSI->getCoroSave()) + markSuspendBlock(Save); } // Iterate propagating consumes and kills until they stop changing. @@ -261,11 +283,13 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) // We build up the list of spills for every case where a use is separated // from the definition by a suspend point. +static const unsigned InvalidFieldIndex = ~0U; + namespace { class Spill { Value *Def = nullptr; Instruction *User = nullptr; - unsigned FieldNo = 0; + unsigned FieldNo = InvalidFieldIndex; public: Spill(Value *Def, llvm::User *U) : Def(Def), User(cast(U)) {} @@ -280,11 +304,11 @@ public: // the definition the first time they encounter it. Consider refactoring // SpillInfo into two arrays to normalize the spill representation. unsigned fieldIndex() const { - assert(FieldNo && "Accessing unassigned field"); + assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field"); return FieldNo; } void setFieldIndex(unsigned FieldNumber) { - assert(!FieldNo && "Reassigning field number"); + assert(FieldNo == InvalidFieldIndex && "Reassigning field number"); FieldNo = FieldNumber; } }; @@ -376,18 +400,30 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, SmallString<32> Name(F.getName()); Name.append(".Frame"); StructType *FrameTy = StructType::create(C, Name); - auto *FramePtrTy = FrameTy->getPointerTo(); - auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, - /*isVarArg=*/false); - auto *FnPtrTy = FnTy->getPointerTo(); - - // Figure out how wide should be an integer type storing the suspend index. - unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); - Type *PromiseType = Shape.PromiseAlloca - ? Shape.PromiseAlloca->getType()->getElementType() - : Type::getInt1Ty(C); - SmallVector Types{FnPtrTy, FnPtrTy, PromiseType, - Type::getIntNTy(C, IndexBits)}; + SmallVector Types; + + AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); + + if (Shape.ABI == coro::ABI::Switch) { + auto *FramePtrTy = FrameTy->getPointerTo(); + auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, + /*IsVarArg=*/false); + auto *FnPtrTy = FnTy->getPointerTo(); + + // Figure out how wide should be an integer type storing the suspend index. + unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); + Type *PromiseType = PromiseAlloca + ? PromiseAlloca->getType()->getElementType() + : Type::getInt1Ty(C); + Type *IndexType = Type::getIntNTy(C, IndexBits); + Types.push_back(FnPtrTy); + Types.push_back(FnPtrTy); + Types.push_back(PromiseType); + Types.push_back(IndexType); + } else { + assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); + } + Value *CurrentDef = nullptr; Padder.addTypes(Types); @@ -399,7 +435,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, CurrentDef = S.def(); // PromiseAlloca was already added to Types array earlier. - if (CurrentDef == Shape.PromiseAlloca) + if (CurrentDef == PromiseAlloca) continue; uint64_t Count = 1; @@ -430,9 +466,80 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, } FrameTy->setBody(Types); + switch (Shape.ABI) { + case coro::ABI::Switch: + break; + + // Remember whether the frame is inline in the storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto &Layout = F.getParent()->getDataLayout(); + auto Id = Shape.getRetconCoroId(); + Shape.RetconLowering.IsFrameInlineInStorage + = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() && + Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment()); + break; + } + } + return FrameTy; } +// We use a pointer use visitor to discover if there are any writes into an +// alloca that dominates CoroBegin. If that is the case, insertSpills will copy +// the value from the alloca into the coroutine frame spill slot corresponding +// to that alloca. +namespace { +struct AllocaUseVisitor : PtrUseVisitor { + using Base = PtrUseVisitor; + AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, + const CoroBeginInst &CB) + : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} + + // We are only interested in uses that dominate coro.begin. + void visit(Instruction &I) { + if (DT.dominates(&I, &CoroBegin)) + Base::visit(I); + } + // We need to provide this overload as PtrUseVisitor uses a pointer based + // visiting function. + void visit(Instruction *I) { return visit(*I); } + + void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + + // If the use is an operand, the pointer escaped and anything can write into + // that memory. If the use is the pointer, we are definitely writing into the + // alloca and therefore we need to copy. + void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + + // Any other instruction that is not filtered out by PtrUseVisitor, will + // result in the copy. + void visitInstruction(Instruction &I) { PI.setAborted(&I); } + +private: + const DominatorTree &DT; + const CoroBeginInst &CoroBegin; +}; +} // namespace +static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, + const CoroBeginInst &CB) { + const DataLayout &DL = A.getModule()->getDataLayout(); + AllocaUseVisitor Visitor(DL, DT, CB); + auto PtrI = Visitor.visitPtr(A); + if (PtrI.isEscaped() || PtrI.isAborted()) { + auto *PointerEscapingInstr = PtrI.getEscapingInst() + ? PtrI.getEscapingInst() + : PtrI.getAbortingInst(); + if (PointerEscapingInstr) { + LLVM_DEBUG( + dbgs() << "AllocaInst copy was triggered by instruction: " + << *PointerEscapingInstr << "\n"); + } + return true; + } + return false; +} + // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that // catchswitch, like all other EHPads must be the first nonPHI in a block. @@ -476,7 +583,7 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { // whatever // // -static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { +static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { auto *CB = Shape.CoroBegin; LLVMContext &C = CB->getContext(); IRBuilder<> Builder(CB->getNextNode()); @@ -484,11 +591,14 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { PointerType *FramePtrTy = FrameTy->getPointerTo(); auto *FramePtr = cast(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); + DominatorTree DT(*CB->getFunction()); Value *CurrentValue = nullptr; BasicBlock *CurrentBlock = nullptr; Value *CurrentReload = nullptr; - unsigned Index = 0; // Proper field number will be read from field definition. + + // Proper field number will be read from field definition. + unsigned Index = InvalidFieldIndex; // We need to keep track of any allocas that need "spilling" // since they will live in the coroutine frame now, all access to them @@ -496,9 +606,11 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // we remember allocas and their indices to be handled once we processed // all the spills. SmallVector, 4> Allocas; - // Promise alloca (if present) has a fixed field number (Shape::PromiseField) - if (Shape.PromiseAlloca) - Allocas.emplace_back(Shape.PromiseAlloca, coro::Shape::PromiseField); + // Promise alloca (if present) has a fixed field number. + if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { + assert(Shape.ABI == coro::ABI::Switch); + Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise); + } // Create a GEP with the given index into the coroutine frame for the original // value Orig. Appends an extra 0 index for array-allocas, preserving the @@ -526,7 +638,7 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // Create a load instruction to reload the spilled value from the coroutine // frame. auto CreateReload = [&](Instruction *InsertBefore) { - assert(Index && "accessing unassigned field number"); + assert(Index != InvalidFieldIndex && "accessing unassigned field number"); Builder.SetInsertPoint(InsertBefore); auto *G = GetFramePointer(Index, CurrentValue); @@ -558,29 +670,45 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // coroutine frame. Instruction *InsertPt = nullptr; - if (isa(CurrentValue)) { + if (auto Arg = dyn_cast(CurrentValue)) { // For arguments, we will place the store instruction right after // the coroutine frame pointer instruction, i.e. bitcast of // coro.begin from i8* to %f.frame*. InsertPt = FramePtr->getNextNode(); + + // If we're spilling an Argument, make sure we clear 'nocapture' + // from the coroutine function. + Arg->getParent()->removeParamAttr(Arg->getArgNo(), + Attribute::NoCapture); + } else if (auto *II = dyn_cast(CurrentValue)) { // If we are spilling the result of the invoke instruction, split the // normal edge and insert the spill in the new block. auto NewBB = SplitEdge(II->getParent(), II->getNormalDest()); InsertPt = NewBB->getTerminator(); - } else if (dyn_cast(CurrentValue)) { + } else if (isa(CurrentValue)) { // Skip the PHINodes and EH pads instructions. BasicBlock *DefBlock = cast(E.def())->getParent(); if (auto *CSI = dyn_cast(DefBlock->getTerminator())) InsertPt = splitBeforeCatchSwitch(CSI); else InsertPt = &*DefBlock->getFirstInsertionPt(); + } else if (auto CSI = dyn_cast(CurrentValue)) { + // Don't spill immediately after a suspend; splitting assumes + // that the suspend will be followed by a branch. + InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); } else { + auto *I = cast(E.def()); + assert(!I->isTerminator() && "unexpected terminator"); // For all other values, the spill is placed immediately after // the definition. - assert(!cast(E.def())->isTerminator() && - "unexpected terminator"); - InsertPt = cast(E.def())->getNextNode(); + if (DT.dominates(CB, I)) { + InsertPt = I->getNextNode(); + } else { + // Unless, it is not dominated by CoroBegin, then it will be + // inserted immediately after CoroFrame is computed. + InsertPt = FramePtr->getNextNode(); + } } Builder.SetInsertPoint(InsertPt); @@ -613,21 +741,53 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { } BasicBlock *FramePtrBB = FramePtr->getParent(); - Shape.AllocaSpillBlock = - FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); - Shape.AllocaSpillBlock->splitBasicBlock(&Shape.AllocaSpillBlock->front(), - "PostSpill"); - Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); + auto SpillBlock = + FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); + SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); + Shape.AllocaSpillBlock = SpillBlock; // If we found any allocas, replace all of their remaining uses with Geps. + // Note: we cannot do it indiscriminately as some of the uses may not be + // dominated by CoroBegin. + bool MightNeedToCopy = false; + Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); + SmallVector UsersToUpdate; for (auto &P : Allocas) { - auto *G = GetFramePointer(P.second, P.first); + AllocaInst *const A = P.first; + UsersToUpdate.clear(); + for (User *U : A->users()) { + auto *I = cast(U); + if (DT.dominates(CB, I)) + UsersToUpdate.push_back(I); + else + MightNeedToCopy = true; + } + if (!UsersToUpdate.empty()) { + auto *G = GetFramePointer(P.second, A); + G->takeName(A); + for (Instruction *I : UsersToUpdate) + I->replaceUsesOfWith(A, G); + } + } + // If we discovered such uses not dominated by CoroBegin, see if any of them + // preceed coro begin and have instructions that can modify the + // value of the alloca and therefore would require a copying the value into + // the spill slot in the coroutine frame. + if (MightNeedToCopy) { + Builder.SetInsertPoint(FramePtr->getNextNode()); + + for (auto &P : Allocas) { + AllocaInst *const A = P.first; + if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { + if (A->isArrayAllocation()) + report_fatal_error( + "Coroutines cannot handle copying of array allocas yet"); - // We are not using ReplaceInstWithInst(P.first, cast(G)) here, - // as we are changing location of the instruction. - G->takeName(P.first); - P.first->replaceAllUsesWith(G); - P.first->eraseFromParent(); + auto *G = GetFramePointer(P.second, A); + auto *Value = Builder.CreateLoad(A); + Builder.CreateStore(Value, G); + } + } } return FramePtr; } @@ -829,52 +989,6 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB, } } -// Move early uses of spilled variable after CoroBegin. -// For example, if a parameter had address taken, we may end up with the code -// like: -// define @f(i32 %n) { -// %n.addr = alloca i32 -// store %n, %n.addr -// ... -// call @coro.begin -// we need to move the store after coro.begin -static void moveSpillUsesAfterCoroBegin(Function &F, SpillInfo const &Spills, - CoroBeginInst *CoroBegin) { - DominatorTree DT(F); - SmallVector NeedsMoving; - - Value *CurrentValue = nullptr; - - for (auto const &E : Spills) { - if (CurrentValue == E.def()) - continue; - - CurrentValue = E.def(); - - for (User *U : CurrentValue->users()) { - Instruction *I = cast(U); - if (!DT.dominates(CoroBegin, I)) { - LLVM_DEBUG(dbgs() << "will move: " << *I << "\n"); - - // TODO: Make this more robust. Currently if we run into a situation - // where simple instruction move won't work we panic and - // report_fatal_error. - for (User *UI : I->users()) { - if (!DT.dominates(CoroBegin, cast(UI))) - report_fatal_error("cannot move instruction since its users are not" - " dominated by CoroBegin"); - } - - NeedsMoving.push_back(I); - } - } - } - - Instruction *InsertPt = CoroBegin->getNextNode(); - for (Instruction *I : NeedsMoving) - I->moveBefore(InsertPt); -} - // Splits the block at a particular instruction unless it is the first // instruction in the block with a single predecessor. static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { @@ -895,21 +1009,337 @@ static void splitAround(Instruction *I, const Twine &Name) { splitBlockIfNotFirst(I->getNextNode(), "After" + Name); } +static bool isSuspendBlock(BasicBlock *BB) { + return isa(BB->front()); +} + +typedef SmallPtrSet VisitedBlocksSet; + +/// Does control flow starting at the given block ever reach a suspend +/// instruction before reaching a block in VisitedOrFreeBBs? +static bool isSuspendReachableFrom(BasicBlock *From, + VisitedBlocksSet &VisitedOrFreeBBs) { + // Eagerly try to add this block to the visited set. If it's already + // there, stop recursing; this path doesn't reach a suspend before + // either looping or reaching a freeing block. + if (!VisitedOrFreeBBs.insert(From).second) + return false; + + // We assume that we'll already have split suspends into their own blocks. + if (isSuspendBlock(From)) + return true; + + // Recurse on the successors. + for (auto Succ : successors(From)) { + if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) + return true; + } + + return false; +} + +/// Is the given alloca "local", i.e. bounded in lifetime to not cross a +/// suspend point? +static bool isLocalAlloca(CoroAllocaAllocInst *AI) { + // Seed the visited set with all the basic blocks containing a free + // so that we won't pass them up. + VisitedBlocksSet VisitedOrFreeBBs; + for (auto User : AI->users()) { + if (auto FI = dyn_cast(User)) + VisitedOrFreeBBs.insert(FI->getParent()); + } + + return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); +} + +/// After we split the coroutine, will the given basic block be along +/// an obvious exit path for the resumption function? +static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, + unsigned depth = 3) { + // If we've bottomed out our depth count, stop searching and assume + // that the path might loop back. + if (depth == 0) return false; + + // If this is a suspend block, we're about to exit the resumption function. + if (isSuspendBlock(BB)) return true; + + // Recurse into the successors. + for (auto Succ : successors(BB)) { + if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1)) + return false; + } + + // If none of the successors leads back in a loop, we're on an exit/abort. + return true; +} + +static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) { + // Look for a free that isn't sufficiently obviously followed by + // either a suspend or a termination, i.e. something that will leave + // the coro resumption frame. + for (auto U : AI->users()) { + auto FI = dyn_cast(U); + if (!FI) continue; + + if (!willLeaveFunctionImmediatelyAfter(FI->getParent())) + return true; + } + + // If we never found one, we don't need a stack save. + return false; +} + +/// Turn each of the given local allocas into a normal (dynamic) alloca +/// instruction. +static void lowerLocalAllocas(ArrayRef LocalAllocas, + SmallVectorImpl &DeadInsts) { + for (auto AI : LocalAllocas) { + auto M = AI->getModule(); + IRBuilder<> Builder(AI); + + // Save the stack depth. Try to avoid doing this if the stackrestore + // is going to immediately precede a return or something. + Value *StackSave = nullptr; + if (localAllocaNeedsStackSave(AI)) + StackSave = Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stacksave)); + + // Allocate memory. + auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); + Alloca->setAlignment(MaybeAlign(AI->getAlignment())); + + for (auto U : AI->users()) { + // Replace gets with the allocation. + if (isa(U)) { + U->replaceAllUsesWith(Alloca); + + // Replace frees with stackrestores. This is safe because + // alloca.alloc is required to obey a stack discipline, although we + // don't enforce that structurally. + } else { + auto FI = cast(U); + if (StackSave) { + Builder.SetInsertPoint(FI); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stackrestore), + StackSave); + } + } + DeadInsts.push_back(cast(U)); + } + + DeadInsts.push_back(AI); + } +} + +/// Turn the given coro.alloca.alloc call into a dynamic allocation. +/// This happens during the all-instructions iteration, so it must not +/// delete the call. +static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, + coro::Shape &Shape, + SmallVectorImpl &DeadInsts) { + IRBuilder<> Builder(AI); + auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); + + for (User *U : AI->users()) { + if (isa(U)) { + U->replaceAllUsesWith(Alloc); + } else { + auto FI = cast(U); + Builder.SetInsertPoint(FI); + Shape.emitDealloc(Builder, Alloc, nullptr); + } + DeadInsts.push_back(cast(U)); + } + + // Push this on last so that it gets deleted after all the others. + DeadInsts.push_back(AI); + + // Return the new allocation value so that we can check for needed spills. + return cast(Alloc); +} + +/// Get the current swifterror value. +static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, + coro::Shape &Shape) { + // Make a fake function pointer as a sort of intrinsic. + auto FnTy = FunctionType::get(ValueTy, {}, false); + auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); + + auto Call = Builder.CreateCall(Fn, {}); + Shape.SwiftErrorOps.push_back(Call); + + return Call; +} + +/// Set the given value as the current swifterror value. +/// +/// Returns a slot that can be used as a swifterror slot. +static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, + coro::Shape &Shape) { + // Make a fake function pointer as a sort of intrinsic. + auto FnTy = FunctionType::get(V->getType()->getPointerTo(), + {V->getType()}, false); + auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); + + auto Call = Builder.CreateCall(Fn, { V }); + Shape.SwiftErrorOps.push_back(Call); + + return Call; +} + +/// Set the swifterror value from the given alloca before a call, +/// then put in back in the alloca afterwards. +/// +/// Returns an address that will stand in for the swifterror slot +/// until splitting. +static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call, + AllocaInst *Alloca, + coro::Shape &Shape) { + auto ValueTy = Alloca->getAllocatedType(); + IRBuilder<> Builder(Call); + + // Load the current value from the alloca and set it as the + // swifterror value. + auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca); + auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape); + + // Move to after the call. Since swifterror only has a guaranteed + // value on normal exits, we can ignore implicit and explicit unwind + // edges. + if (isa(Call)) { + Builder.SetInsertPoint(Call->getNextNode()); + } else { + auto Invoke = cast(Call); + Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg()); + } + + // Get the current swifterror value and store it to the alloca. + auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape); + Builder.CreateStore(ValueAfterCall, Alloca); + + return Addr; +} + +/// Eliminate a formerly-swifterror alloca by inserting the get/set +/// intrinsics and attempting to MemToReg the alloca away. +static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, + coro::Shape &Shape) { + for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { + // We're likely changing the use list, so use a mutation-safe + // iteration pattern. + auto &Use = *UI; + ++UI; + + // swifterror values can only be used in very specific ways. + // We take advantage of that here. + auto User = Use.getUser(); + if (isa(User) || isa(User)) + continue; + + assert(isa(User) || isa(User)); + auto Call = cast(User); + + auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape); + + // Use the returned slot address as the call argument. + Use.set(Addr); + } + + // All the uses should be loads and stores now. + assert(isAllocaPromotable(Alloca)); +} + +/// "Eliminate" a swifterror argument by reducing it to the alloca case +/// and then loading and storing in the prologue and epilog. +/// +/// The argument keeps the swifterror flag. +static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, + coro::Shape &Shape, + SmallVectorImpl &AllocasToPromote) { + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); + + auto ArgTy = cast(Arg.getType()); + auto ValueTy = ArgTy->getElementType(); + + // Reduce to the alloca case: + + // Create an alloca and replace all uses of the arg with it. + auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace()); + Arg.replaceAllUsesWith(Alloca); + + // Set an initial value in the alloca. swifterror is always null on entry. + auto InitialValue = Constant::getNullValue(ValueTy); + Builder.CreateStore(InitialValue, Alloca); + + // Find all the suspends in the function and save and restore around them. + for (auto Suspend : Shape.CoroSuspends) { + (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape); + } + + // Find all the coro.ends in the function and restore the error value. + for (auto End : Shape.CoroEnds) { + Builder.SetInsertPoint(End); + auto FinalValue = Builder.CreateLoad(ValueTy, Alloca); + (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape); + } + + // Now we can use the alloca logic. + AllocasToPromote.push_back(Alloca); + eliminateSwiftErrorAlloca(F, Alloca, Shape); +} + +/// Eliminate all problematic uses of swifterror arguments and allocas +/// from the function. We'll fix them up later when splitting the function. +static void eliminateSwiftError(Function &F, coro::Shape &Shape) { + SmallVector AllocasToPromote; + + // Look for a swifterror argument. + for (auto &Arg : F.args()) { + if (!Arg.hasSwiftErrorAttr()) continue; + + eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote); + break; + } + + // Look for swifterror allocas. + for (auto &Inst : F.getEntryBlock()) { + auto Alloca = dyn_cast(&Inst); + if (!Alloca || !Alloca->isSwiftError()) continue; + + // Clear the swifterror flag. + Alloca->setSwiftError(false); + + AllocasToPromote.push_back(Alloca); + eliminateSwiftErrorAlloca(F, Alloca, Shape); + } + + // If we have any allocas to promote, compute a dominator tree and + // promote them en masse. + if (!AllocasToPromote.empty()) { + DominatorTree DT(F); + PromoteMemToReg(AllocasToPromote, DT); + } +} + void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite // access to local variables. LowerDbgDeclare(F); - Shape.PromiseAlloca = Shape.CoroBegin->getId()->getPromise(); - if (Shape.PromiseAlloca) { - Shape.CoroBegin->getId()->clearPromise(); + eliminateSwiftError(F, Shape); + + if (Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.PromiseAlloca) { + Shape.getSwitchCoroId()->clearPromise(); } // Make sure that all coro.save, coro.suspend and the fallthrough coro.end // intrinsics are in their own blocks to simplify the logic of building up // SuspendCrossing data. - for (CoroSuspendInst *CSI : Shape.CoroSuspends) { - splitAround(CSI->getCoroSave(), "CoroSave"); + for (auto *CSI : Shape.CoroSuspends) { + if (auto *Save = CSI->getCoroSave()) + splitAround(Save, "CoroSave"); splitAround(CSI, "CoroSuspend"); } @@ -926,6 +1356,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { IRBuilder<> Builder(F.getContext()); SpillInfo Spills; + SmallVector LocalAllocas; + SmallVector DeadInstructions; for (int Repeat = 0; Repeat < 4; ++Repeat) { // See if there are materializable instructions across suspend points. @@ -955,11 +1387,40 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // of the Coroutine Frame. if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) continue; + // The Coroutine Promise always included into coroutine frame, no need to // check for suspend crossing. - if (Shape.PromiseAlloca == &I) + if (Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.PromiseAlloca == &I) continue; + // Handle alloca.alloc specially here. + if (auto AI = dyn_cast(&I)) { + // Check whether the alloca's lifetime is bounded by suspend points. + if (isLocalAlloca(AI)) { + LocalAllocas.push_back(AI); + continue; + } + + // If not, do a quick rewrite of the alloca and then add spills of + // the rewritten value. The rewrite doesn't invalidate anything in + // Spills because the other alloca intrinsics have no other operands + // besides AI, and it doesn't invalidate the iteration because we delay + // erasing AI. + auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); + + for (User *U : Alloc->users()) { + if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) + Spills.emplace_back(Alloc, U); + } + continue; + } + + // Ignore alloca.get; we process this as part of coro.alloca.alloc. + if (isa(I)) { + continue; + } + for (User *U : I.users()) if (Checker.isDefinitionAcrossSuspend(I, U)) { // We cannot spill a token. @@ -970,7 +1431,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { } } LLVM_DEBUG(dump("Spills", Spills)); - moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, Spills); Shape.FramePtr = insertSpills(Spills, Shape); + lowerLocalAllocas(LocalAllocas, DeadInstructions); + + for (auto I : DeadInstructions) + I->eraseFromParent(); } diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h index 5e19d7642e38..de2d2920cb15 100644 --- a/lib/Transforms/Coroutines/CoroInstr.h +++ b/lib/Transforms/Coroutines/CoroInstr.h @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -77,10 +78,8 @@ public: } }; -/// This represents the llvm.coro.alloc instruction. -class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst { - enum { AlignArg, PromiseArg, CoroutineArg, InfoArg }; - +/// This represents a common base class for llvm.coro.id instructions. +class LLVM_LIBRARY_VISIBILITY AnyCoroIdInst : public IntrinsicInst { public: CoroAllocInst *getCoroAlloc() { for (User *U : users()) @@ -97,6 +96,24 @@ public: llvm_unreachable("no coro.begin associated with coro.id"); } + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + auto ID = I->getIntrinsicID(); + return ID == Intrinsic::coro_id || + ID == Intrinsic::coro_id_retcon || + ID == Intrinsic::coro_id_retcon_once; + } + + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.id instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdInst : public AnyCoroIdInst { + enum { AlignArg, PromiseArg, CoroutineArg, InfoArg }; + +public: AllocaInst *getPromise() const { Value *Arg = getArgOperand(PromiseArg); return isa(Arg) @@ -182,6 +199,80 @@ public: } }; +/// This represents either the llvm.coro.id.retcon or +/// llvm.coro.id.retcon.once instruction. +class LLVM_LIBRARY_VISIBILITY AnyCoroIdRetconInst : public AnyCoroIdInst { + enum { SizeArg, AlignArg, StorageArg, PrototypeArg, AllocArg, DeallocArg }; + +public: + void checkWellFormed() const; + + uint64_t getStorageSize() const { + return cast(getArgOperand(SizeArg))->getZExtValue(); + } + + uint64_t getStorageAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + + Value *getStorage() const { + return getArgOperand(StorageArg); + } + + /// Return the prototype for the continuation function. The type, + /// attributes, and calling convention of the continuation function(s) + /// are taken from this declaration. + Function *getPrototype() const { + return cast(getArgOperand(PrototypeArg)->stripPointerCasts()); + } + + /// Return the function to use for allocating memory. + Function *getAllocFunction() const { + return cast(getArgOperand(AllocArg)->stripPointerCasts()); + } + + /// Return the function to use for deallocating memory. + Function *getDeallocFunction() const { + return cast(getArgOperand(DeallocArg)->stripPointerCasts()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + auto ID = I->getIntrinsicID(); + return ID == Intrinsic::coro_id_retcon + || ID == Intrinsic::coro_id_retcon_once; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.id.retcon instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdRetconInst + : public AnyCoroIdRetconInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id_retcon; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.id.retcon.once instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdRetconOnceInst + : public AnyCoroIdRetconInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id_retcon_once; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.frame instruction. class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { public: @@ -215,7 +306,9 @@ class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { enum { IdArg, MemArg }; public: - CoroIdInst *getId() const { return cast(getArgOperand(IdArg)); } + AnyCoroIdInst *getId() const { + return cast(getArgOperand(IdArg)); + } Value *getMem() const { return getArgOperand(MemArg); } @@ -261,8 +354,22 @@ public: } }; +class LLVM_LIBRARY_VISIBILITY AnyCoroSuspendInst : public IntrinsicInst { +public: + CoroSaveInst *getCoroSave() const; + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_suspend || + I->getIntrinsicID() == Intrinsic::coro_suspend_retcon; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.suspend instruction. -class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public IntrinsicInst { +class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public AnyCoroSuspendInst { enum { SaveArg, FinalArg }; public: @@ -273,6 +380,7 @@ public: assert(isa(Arg)); return nullptr; } + bool isFinal() const { return cast(getArgOperand(FinalArg))->isOneValue(); } @@ -286,6 +394,37 @@ public: } }; +inline CoroSaveInst *AnyCoroSuspendInst::getCoroSave() const { + if (auto Suspend = dyn_cast(this)) + return Suspend->getCoroSave(); + return nullptr; +} + +/// This represents the llvm.coro.suspend.retcon instruction. +class LLVM_LIBRARY_VISIBILITY CoroSuspendRetconInst : public AnyCoroSuspendInst { +public: + op_iterator value_begin() { return arg_begin(); } + const_op_iterator value_begin() const { return arg_begin(); } + + op_iterator value_end() { return arg_end(); } + const_op_iterator value_end() const { return arg_end(); } + + iterator_range value_operands() { + return make_range(value_begin(), value_end()); + } + iterator_range value_operands() const { + return make_range(value_begin(), value_end()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_suspend_retcon; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.size instruction. class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst { public: @@ -317,6 +456,60 @@ public: } }; +/// This represents the llvm.coro.alloca.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaAllocInst : public IntrinsicInst { + enum { SizeArg, AlignArg }; +public: + Value *getSize() const { + return getArgOperand(SizeArg); + } + unsigned getAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_alloc; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.alloca.get instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaGetInst : public IntrinsicInst { + enum { AllocArg }; +public: + CoroAllocaAllocInst *getAlloc() const { + return cast(getArgOperand(AllocArg)); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_get; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.alloca.free instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaFreeInst : public IntrinsicInst { + enum { AllocArg }; +public: + CoroAllocaAllocInst *getAlloc() const { + return cast(getArgOperand(AllocArg)); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_free; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + } // End namespace llvm. #endif diff --git a/lib/Transforms/Coroutines/CoroInternal.h b/lib/Transforms/Coroutines/CoroInternal.h index 441c8a20f1f3..c151474316f9 100644 --- a/lib/Transforms/Coroutines/CoroInternal.h +++ b/lib/Transforms/Coroutines/CoroInternal.h @@ -12,6 +12,7 @@ #define LLVM_LIB_TRANSFORMS_COROUTINES_COROINTERNAL_H #include "CoroInstr.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/Transforms/Coroutines.h" namespace llvm { @@ -61,37 +62,174 @@ struct LowererBase { Value *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt); }; +enum class ABI { + /// The "resume-switch" lowering, where there are separate resume and + /// destroy functions that are shared between all suspend points. The + /// coroutine frame implicitly stores the resume and destroy functions, + /// the current index, and any promise value. + Switch, + + /// The "returned-continuation" lowering, where each suspend point creates a + /// single continuation function that is used for both resuming and + /// destroying. Does not support promises. + Retcon, + + /// The "unique returned-continuation" lowering, where each suspend point + /// creates a single continuation function that is used for both resuming + /// and destroying. Does not support promises. The function is known to + /// suspend at most once during its execution, and the return value of + /// the continuation is void. + RetconOnce, +}; + // Holds structural Coroutine Intrinsics for a particular function and other // values used during CoroSplit pass. struct LLVM_LIBRARY_VISIBILITY Shape { CoroBeginInst *CoroBegin; SmallVector CoroEnds; SmallVector CoroSizes; - SmallVector CoroSuspends; - - // Field Indexes for known coroutine frame fields. - enum { - ResumeField, - DestroyField, - PromiseField, - IndexField, + SmallVector CoroSuspends; + SmallVector SwiftErrorOps; + + // Field indexes for special fields in the switch lowering. + struct SwitchFieldIndex { + enum { + Resume, + Destroy, + Promise, + Index, + /// The index of the first spill field. + FirstSpill + }; }; + coro::ABI ABI; + StructType *FrameTy; Instruction *FramePtr; BasicBlock *AllocaSpillBlock; - SwitchInst *ResumeSwitch; - AllocaInst *PromiseAlloca; - bool HasFinalSuspend; + + struct SwitchLoweringStorage { + SwitchInst *ResumeSwitch; + AllocaInst *PromiseAlloca; + BasicBlock *ResumeEntryBlock; + bool HasFinalSuspend; + }; + + struct RetconLoweringStorage { + Function *ResumePrototype; + Function *Alloc; + Function *Dealloc; + BasicBlock *ReturnBlock; + bool IsFrameInlineInStorage; + }; + + union { + SwitchLoweringStorage SwitchLowering; + RetconLoweringStorage RetconLowering; + }; + + CoroIdInst *getSwitchCoroId() const { + assert(ABI == coro::ABI::Switch); + return cast(CoroBegin->getId()); + } + + AnyCoroIdRetconInst *getRetconCoroId() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + return cast(CoroBegin->getId()); + } IntegerType *getIndexType() const { + assert(ABI == coro::ABI::Switch); assert(FrameTy && "frame type not assigned"); - return cast(FrameTy->getElementType(IndexField)); + return cast(FrameTy->getElementType(SwitchFieldIndex::Index)); } ConstantInt *getIndex(uint64_t Value) const { return ConstantInt::get(getIndexType(), Value); } + PointerType *getSwitchResumePointerType() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + return cast(FrameTy->getElementType(SwitchFieldIndex::Resume)); + } + + FunctionType *getResumeFunctionType() const { + switch (ABI) { + case coro::ABI::Switch: { + auto *FnPtrTy = getSwitchResumePointerType(); + return cast(FnPtrTy->getPointerElementType()); + } + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return RetconLowering.ResumePrototype->getFunctionType(); + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + ArrayRef getRetconResultTypes() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + auto FTy = CoroBegin->getFunction()->getFunctionType(); + + // The safety of all this is checked by checkWFRetconPrototype. + if (auto STy = dyn_cast(FTy->getReturnType())) { + return STy->elements().slice(1); + } else { + return ArrayRef(); + } + } + + ArrayRef getRetconResumeTypes() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + + // The safety of all this is checked by checkWFRetconPrototype. + auto FTy = RetconLowering.ResumePrototype->getFunctionType(); + return FTy->params().slice(1); + } + + CallingConv::ID getResumeFunctionCC() const { + switch (ABI) { + case coro::ABI::Switch: + return CallingConv::Fast; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return RetconLowering.ResumePrototype->getCallingConv(); + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + unsigned getFirstSpillFieldIndex() const { + switch (ABI) { + case coro::ABI::Switch: + return SwitchFieldIndex::FirstSpill; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return 0; + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + AllocaInst *getPromiseAlloca() const { + if (ABI == coro::ABI::Switch) + return SwitchLowering.PromiseAlloca; + return nullptr; + } + + /// Allocate memory according to the rules of the active lowering. + /// + /// \param CG - if non-null, will be updated for the new call + Value *emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const; + + /// Deallocate memory according to the rules of the active lowering. + /// + /// \param CG - if non-null, will be updated for the new call + void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; + Shape() = default; explicit Shape(Function &F) { buildFrom(F); } void buildFrom(Function &F); diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index 5458e70ff16a..04723cbde417 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -55,6 +55,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -70,9 +71,197 @@ using namespace llvm; #define DEBUG_TYPE "coro-split" +namespace { + +/// A little helper class for building +class CoroCloner { +public: + enum class Kind { + /// The shared resume function for a switch lowering. + SwitchResume, + + /// The shared unwind function for a switch lowering. + SwitchUnwind, + + /// The shared cleanup function for a switch lowering. + SwitchCleanup, + + /// An individual continuation function. + Continuation, + }; +private: + Function &OrigF; + Function *NewF; + const Twine &Suffix; + coro::Shape &Shape; + Kind FKind; + ValueToValueMapTy VMap; + IRBuilder<> Builder; + Value *NewFramePtr = nullptr; + Value *SwiftErrorSlot = nullptr; + + /// The active suspend instruction; meaningful only for continuation ABIs. + AnyCoroSuspendInst *ActiveSuspend = nullptr; + +public: + /// Create a cloner for a switch lowering. + CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, + Kind FKind) + : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), + FKind(FKind), Builder(OrigF.getContext()) { + assert(Shape.ABI == coro::ABI::Switch); + } + + /// Create a cloner for a continuation lowering. + CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, + Function *NewF, AnyCoroSuspendInst *ActiveSuspend) + : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), + FKind(Kind::Continuation), Builder(OrigF.getContext()), + ActiveSuspend(ActiveSuspend) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + assert(NewF && "need existing function for continuation"); + assert(ActiveSuspend && "need active suspend point for continuation"); + } + + Function *getFunction() const { + assert(NewF != nullptr && "declaration not yet set"); + return NewF; + } + + void create(); + +private: + bool isSwitchDestroyFunction() { + switch (FKind) { + case Kind::Continuation: + case Kind::SwitchResume: + return false; + case Kind::SwitchUnwind: + case Kind::SwitchCleanup: + return true; + } + llvm_unreachable("Unknown CoroCloner::Kind enum"); + } + + void createDeclaration(); + void replaceEntryBlock(); + Value *deriveNewFramePointer(); + void replaceRetconSuspendUses(); + void replaceCoroSuspends(); + void replaceCoroEnds(); + void replaceSwiftErrorOps(); + void handleFinalSuspend(); + void maybeFreeContinuationStorage(); +}; + +} // end anonymous namespace + +static void maybeFreeRetconStorage(IRBuilder<> &Builder, coro::Shape &Shape, + Value *FramePtr, CallGraph *CG) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + if (Shape.RetconLowering.IsFrameInlineInStorage) + return; + + Shape.emitDealloc(Builder, FramePtr, CG); +} + +/// Replace a non-unwind call to llvm.coro.end. +static void replaceFallthroughCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, + CallGraph *CG) { + // Start inserting right before the coro.end. + IRBuilder<> Builder(End); + + // Create the return instruction. + switch (Shape.ABI) { + // The cloned functions in switch-lowering always return void. + case coro::ABI::Switch: + // coro.end doesn't immediately end the coroutine in the main function + // in this lowering, because we need to deallocate the coroutine. + if (!InResume) + return; + Builder.CreateRetVoid(); + break; + + // In unique continuation lowering, the continuations always return void. + // But we may have implicitly allocated storage. + case coro::ABI::RetconOnce: + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + Builder.CreateRetVoid(); + break; + + // In non-unique continuation lowering, we signal completion by returning + // a null continuation. + case coro::ABI::Retcon: { + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + auto RetTy = Shape.getResumeFunctionType()->getReturnType(); + auto RetStructTy = dyn_cast(RetTy); + PointerType *ContinuationTy = + cast(RetStructTy ? RetStructTy->getElementType(0) : RetTy); + + Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); + if (RetStructTy) { + ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy), + ReturnValue, 0); + } + Builder.CreateRet(ReturnValue); + break; + } + } + + // Remove the rest of the block, by splitting it into an unreachable block. + auto *BB = End->getParent(); + BB->splitBasicBlock(End); + BB->getTerminator()->eraseFromParent(); +} + +/// Replace an unwind call to llvm.coro.end. +static void replaceUnwindCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, CallGraph *CG){ + IRBuilder<> Builder(End); + + switch (Shape.ABI) { + // In switch-lowering, this does nothing in the main function. + case coro::ABI::Switch: + if (!InResume) + return; + break; + + // In continuation-lowering, this frees the continuation storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + break; + } + + // If coro.end has an associated bundle, add cleanupret instruction. + if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { + auto *FromPad = cast(Bundle->Inputs[0]); + auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); + End->getParent()->splitBasicBlock(End); + CleanupRet->getParent()->getTerminator()->eraseFromParent(); + } +} + +static void replaceCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, CallGraph *CG) { + if (End->isUnwind()) + replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); + else + replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); + + auto &Context = End->getContext(); + End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) + : ConstantInt::getFalse(Context)); + End->eraseFromParent(); +} + // Create an entry block for a resume function with a switch that will jump to // suspend points. -static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { +static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { + assert(Shape.ABI == coro::ABI::Switch); LLVMContext &C = F.getContext(); // resume.entry: @@ -91,15 +280,16 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { IRBuilder<> Builder(NewEntry); auto *FramePtr = Shape.FramePtr; auto *FrameTy = Shape.FrameTy; - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32( - FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr"); + auto *GepIndex = Builder.CreateStructGEP( + FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); auto *Switch = Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); - Shape.ResumeSwitch = Switch; + Shape.SwitchLowering.ResumeSwitch = Switch; size_t SuspendIndex = 0; - for (CoroSuspendInst *S : Shape.CoroSuspends) { + for (auto *AnyS : Shape.CoroSuspends) { + auto *S = cast(AnyS); ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); // Replace CoroSave with a store to Index: @@ -109,14 +299,15 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { Builder.SetInsertPoint(Save); if (S->isFinal()) { // Final suspend point is represented by storing zero in ResumeFnAddr. - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, - 0, "ResumeFn.addr"); + auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr, + coro::Shape::SwitchFieldIndex::Resume, + "ResumeFn.addr"); auto *NullPtr = ConstantPointerNull::get(cast( cast(GepIndex->getType())->getElementType())); Builder.CreateStore(NullPtr, GepIndex); } else { - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32( - FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr"); + auto *GepIndex = Builder.CreateStructGEP( + FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); Builder.CreateStore(IndexVal, GepIndex); } Save->replaceAllUsesWith(ConstantTokenNone::get(C)); @@ -164,48 +355,9 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { Builder.SetInsertPoint(UnreachBB); Builder.CreateUnreachable(); - return NewEntry; + Shape.SwitchLowering.ResumeEntryBlock = NewEntry; } -// In Resumers, we replace fallthrough coro.end with ret void and delete the -// rest of the block. -static void replaceFallthroughCoroEnd(IntrinsicInst *End, - ValueToValueMapTy &VMap) { - auto *NewE = cast(VMap[End]); - ReturnInst::Create(NewE->getContext(), nullptr, NewE); - - // Remove the rest of the block, by splitting it into an unreachable block. - auto *BB = NewE->getParent(); - BB->splitBasicBlock(NewE); - BB->getTerminator()->eraseFromParent(); -} - -// In Resumers, we replace unwind coro.end with True to force the immediate -// unwind to caller. -static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) { - if (Shape.CoroEnds.empty()) - return; - - LLVMContext &Context = Shape.CoroEnds.front()->getContext(); - auto *True = ConstantInt::getTrue(Context); - for (CoroEndInst *CE : Shape.CoroEnds) { - if (!CE->isUnwind()) - continue; - - auto *NewCE = cast(VMap[CE]); - - // If coro.end has an associated bundle, add cleanupret instruction. - if (auto Bundle = NewCE->getOperandBundle(LLVMContext::OB_funclet)) { - Value *FromPad = Bundle->Inputs[0]; - auto *CleanupRet = CleanupReturnInst::Create(FromPad, nullptr, NewCE); - NewCE->getParent()->splitBasicBlock(NewCE); - CleanupRet->getParent()->getTerminator()->eraseFromParent(); - } - - NewCE->replaceAllUsesWith(True); - NewCE->eraseFromParent(); - } -} // Rewrite final suspend point handling. We do not use suspend index to // represent the final suspend point. Instead we zero-out ResumeFnAddr in the @@ -216,83 +368,364 @@ static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) { // In the destroy function, we add a code sequence to check if ResumeFnAddress // is Null, and if so, jump to the appropriate label to handle cleanup from the // final suspend point. -static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr, - coro::Shape &Shape, SwitchInst *Switch, - bool IsDestroy) { - assert(Shape.HasFinalSuspend); +void CoroCloner::handleFinalSuspend() { + assert(Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.HasFinalSuspend); + auto *Switch = cast(VMap[Shape.SwitchLowering.ResumeSwitch]); auto FinalCaseIt = std::prev(Switch->case_end()); BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); Switch->removeCase(FinalCaseIt); - if (IsDestroy) { + if (isSwitchDestroyFunction()) { BasicBlock *OldSwitchBB = Switch->getParent(); auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); Builder.SetInsertPoint(OldSwitchBB->getTerminator()); - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, FramePtr, - 0, 0, "ResumeFn.addr"); - auto *Load = Builder.CreateLoad( - Shape.FrameTy->getElementType(coro::Shape::ResumeField), GepIndex); - auto *NullPtr = - ConstantPointerNull::get(cast(Load->getType())); - auto *Cond = Builder.CreateICmpEQ(Load, NullPtr); + auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr, + coro::Shape::SwitchFieldIndex::Resume, + "ResumeFn.addr"); + auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), + GepIndex); + auto *Cond = Builder.CreateIsNull(Load); Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); OldSwitchBB->getTerminator()->eraseFromParent(); } } -// Create a resume clone by cloning the body of the original function, setting -// new entry block and replacing coro.suspend an appropriate value to force -// resume or cleanup pass for every suspend point. -static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, - BasicBlock *ResumeEntry, int8_t FnIndex) { - Module *M = F.getParent(); - auto *FrameTy = Shape.FrameTy; - auto *FnPtrTy = cast(FrameTy->getElementType(0)); - auto *FnTy = cast(FnPtrTy->getElementType()); +static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, + const Twine &Suffix, + Module::iterator InsertBefore) { + Module *M = OrigF.getParent(); + auto *FnTy = Shape.getResumeFunctionType(); Function *NewF = - Function::Create(FnTy, GlobalValue::LinkageTypes::ExternalLinkage, - F.getName() + Suffix, M); + Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, + OrigF.getName() + Suffix); NewF->addParamAttr(0, Attribute::NonNull); NewF->addParamAttr(0, Attribute::NoAlias); - ValueToValueMapTy VMap; + M->getFunctionList().insert(InsertBefore, NewF); + + return NewF; +} + +/// Replace uses of the active llvm.coro.suspend.retcon call with the +/// arguments to the continuation function. +/// +/// This assumes that the builder has a meaningful insertion point. +void CoroCloner::replaceRetconSuspendUses() { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + + auto NewS = VMap[ActiveSuspend]; + if (NewS->use_empty()) return; + + // Copy out all the continuation arguments after the buffer pointer into + // an easily-indexed data structure for convenience. + SmallVector Args; + for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I) + Args.push_back(&*I); + + // If the suspend returns a single scalar value, we can just do a simple + // replacement. + if (!isa(NewS->getType())) { + assert(Args.size() == 1); + NewS->replaceAllUsesWith(Args.front()); + return; + } + + // Try to peephole extracts of an aggregate return. + for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) { + auto EVI = dyn_cast((UI++)->getUser()); + if (!EVI || EVI->getNumIndices() != 1) + continue; + + EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); + EVI->eraseFromParent(); + } + + // If we have no remaining uses, we're done. + if (NewS->use_empty()) return; + + // Otherwise, we need to create an aggregate. + Value *Agg = UndefValue::get(NewS->getType()); + for (size_t I = 0, E = Args.size(); I != E; ++I) + Agg = Builder.CreateInsertValue(Agg, Args[I], I); + + NewS->replaceAllUsesWith(Agg); +} + +void CoroCloner::replaceCoroSuspends() { + Value *SuspendResult; + + switch (Shape.ABI) { + // In switch lowering, replace coro.suspend with the appropriate value + // for the type of function we're extracting. + // Replacing coro.suspend with (0) will result in control flow proceeding to + // a resume label associated with a suspend point, replacing it with (1) will + // result in control flow proceeding to a cleanup label associated with this + // suspend point. + case coro::ABI::Switch: + SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); + break; + + // In returned-continuation lowering, the arguments from earlier + // continuations are theoretically arbitrary, and they should have been + // spilled. + case coro::ABI::RetconOnce: + case coro::ABI::Retcon: + return; + } + + for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { + // The active suspend was handled earlier. + if (CS == ActiveSuspend) continue; + + auto *MappedCS = cast(VMap[CS]); + MappedCS->replaceAllUsesWith(SuspendResult); + MappedCS->eraseFromParent(); + } +} + +void CoroCloner::replaceCoroEnds() { + for (CoroEndInst *CE : Shape.CoroEnds) { + // We use a null call graph because there's no call graph node for + // the cloned function yet. We'll just be rebuilding that later. + auto NewCE = cast(VMap[CE]); + replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); + } +} + +static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, + ValueToValueMapTy *VMap) { + Value *CachedSlot = nullptr; + auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { + if (CachedSlot) { + assert(CachedSlot->getType()->getPointerElementType() == ValueTy && + "multiple swifterror slots in function with different types"); + return CachedSlot; + } + + // Check if the function has a swifterror argument. + for (auto &Arg : F.args()) { + if (Arg.isSwiftError()) { + CachedSlot = &Arg; + assert(Arg.getType()->getPointerElementType() == ValueTy && + "swifterror argument does not have expected type"); + return &Arg; + } + } + + // Create a swifterror alloca. + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); + auto Alloca = Builder.CreateAlloca(ValueTy); + Alloca->setSwiftError(true); + + CachedSlot = Alloca; + return Alloca; + }; + + for (CallInst *Op : Shape.SwiftErrorOps) { + auto MappedOp = VMap ? cast((*VMap)[Op]) : Op; + IRBuilder<> Builder(MappedOp); + + // If there are no arguments, this is a 'get' operation. + Value *MappedResult; + if (Op->getNumArgOperands() == 0) { + auto ValueTy = Op->getType(); + auto Slot = getSwiftErrorSlot(ValueTy); + MappedResult = Builder.CreateLoad(ValueTy, Slot); + } else { + assert(Op->getNumArgOperands() == 1); + auto Value = MappedOp->getArgOperand(0); + auto ValueTy = Value->getType(); + auto Slot = getSwiftErrorSlot(ValueTy); + Builder.CreateStore(Value, Slot); + MappedResult = Slot; + } + + MappedOp->replaceAllUsesWith(MappedResult); + MappedOp->eraseFromParent(); + } + + // If we're updating the original function, we've invalidated SwiftErrorOps. + if (VMap == nullptr) { + Shape.SwiftErrorOps.clear(); + } +} + +void CoroCloner::replaceSwiftErrorOps() { + ::replaceSwiftErrorOps(*NewF, Shape, &VMap); +} + +void CoroCloner::replaceEntryBlock() { + // In the original function, the AllocaSpillBlock is a block immediately + // following the allocation of the frame object which defines GEPs for + // all the allocas that have been moved into the frame, and it ends by + // branching to the original beginning of the coroutine. Make this + // the entry block of the cloned function. + auto *Entry = cast(VMap[Shape.AllocaSpillBlock]); + Entry->setName("entry" + Suffix); + Entry->moveBefore(&NewF->getEntryBlock()); + Entry->getTerminator()->eraseFromParent(); + + // Clear all predecessors of the new entry block. There should be + // exactly one predecessor, which we created when splitting out + // AllocaSpillBlock to begin with. + assert(Entry->hasOneUse()); + auto BranchToEntry = cast(Entry->user_back()); + assert(BranchToEntry->isUnconditional()); + Builder.SetInsertPoint(BranchToEntry); + Builder.CreateUnreachable(); + BranchToEntry->eraseFromParent(); + + // TODO: move any allocas into Entry that weren't moved into the frame. + // (Currently we move all allocas into the frame.) + + // Branch from the entry to the appropriate place. + Builder.SetInsertPoint(Entry); + switch (Shape.ABI) { + case coro::ABI::Switch: { + // In switch-lowering, we built a resume-entry block in the original + // function. Make the entry block branch to this. + auto *SwitchBB = + cast(VMap[Shape.SwitchLowering.ResumeEntryBlock]); + Builder.CreateBr(SwitchBB); + break; + } + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + // In continuation ABIs, we want to branch to immediately after the + // active suspend point. Earlier phases will have put the suspend in its + // own basic block, so just thread our jump directly to its successor. + auto MappedCS = cast(VMap[ActiveSuspend]); + auto Branch = cast(MappedCS->getNextNode()); + assert(Branch->isUnconditional()); + Builder.CreateBr(Branch->getSuccessor(0)); + break; + } + } +} + +/// Derive the value of the new frame pointer. +Value *CoroCloner::deriveNewFramePointer() { + // Builder should be inserting to the front of the new entry block. + + switch (Shape.ABI) { + // In switch-lowering, the argument is the frame pointer. + case coro::ABI::Switch: + return &*NewF->arg_begin(); + + // In continuation-lowering, the argument is the opaque storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + Argument *NewStorage = &*NewF->arg_begin(); + auto FramePtrTy = Shape.FrameTy->getPointerTo(); + + // If the storage is inline, just bitcast to the storage to the frame type. + if (Shape.RetconLowering.IsFrameInlineInStorage) + return Builder.CreateBitCast(NewStorage, FramePtrTy); + + // Otherwise, load the real frame from the opaque storage. + auto FramePtrPtr = + Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo()); + return Builder.CreateLoad(FramePtrPtr); + } + } + llvm_unreachable("bad ABI"); +} + +/// Clone the body of the original function into a resume function of +/// some sort. +void CoroCloner::create() { + // Create the new function if we don't already have one. + if (!NewF) { + NewF = createCloneDeclaration(OrigF, Shape, Suffix, + OrigF.getParent()->end()); + } + // Replace all args with undefs. The buildCoroutineFrame algorithm already // rewritten access to the args that occurs after suspend points with loads // and stores to/from the coroutine frame. - for (Argument &A : F.args()) + for (Argument &A : OrigF.args()) VMap[&A] = UndefValue::get(A.getType()); SmallVector Returns; - CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); - NewF->setLinkage(GlobalValue::LinkageTypes::InternalLinkage); + // Ignore attempts to change certain attributes of the function. + // TODO: maybe there should be a way to suppress this during cloning? + auto savedVisibility = NewF->getVisibility(); + auto savedUnnamedAddr = NewF->getUnnamedAddr(); + auto savedDLLStorageClass = NewF->getDLLStorageClass(); + + // NewF's linkage (which CloneFunctionInto does *not* change) might not + // be compatible with the visibility of OrigF (which it *does* change), + // so protect against that. + auto savedLinkage = NewF->getLinkage(); + NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); + + CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns); + + NewF->setLinkage(savedLinkage); + NewF->setVisibility(savedVisibility); + NewF->setUnnamedAddr(savedUnnamedAddr); + NewF->setDLLStorageClass(savedDLLStorageClass); + + auto &Context = NewF->getContext(); + + // Replace the attributes of the new function: + auto OrigAttrs = NewF->getAttributes(); + auto NewAttrs = AttributeList(); + + switch (Shape.ABI) { + case coro::ABI::Switch: + // Bootstrap attributes by copying function attributes from the + // original function. This should include optimization settings and so on. + NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, + OrigAttrs.getFnAttributes()); + break; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + // If we have a continuation prototype, just use its attributes, + // full-stop. + NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); + break; + } - // Remove old returns. - for (ReturnInst *Return : Returns) - changeToUnreachable(Return, /*UseLLVMTrap=*/false); + // Make the frame parameter nonnull and noalias. + NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull); + NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias); + + switch (Shape.ABI) { + // In these ABIs, the cloned functions always return 'void', and the + // existing return sites are meaningless. Note that for unique + // continuations, this includes the returns associated with suspends; + // this is fine because we can't suspend twice. + case coro::ABI::Switch: + case coro::ABI::RetconOnce: + // Remove old returns. + for (ReturnInst *Return : Returns) + changeToUnreachable(Return, /*UseLLVMTrap=*/false); + break; + + // With multi-suspend continuations, we'll already have eliminated the + // original returns and inserted returns before all the suspend points, + // so we want to leave any returns in place. + case coro::ABI::Retcon: + break; + } - // Remove old return attributes. - NewF->removeAttributes( - AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewF->getReturnType())); + NewF->setAttributes(NewAttrs); + NewF->setCallingConv(Shape.getResumeFunctionCC()); - // Make AllocaSpillBlock the new entry block. - auto *SwitchBB = cast(VMap[ResumeEntry]); - auto *Entry = cast(VMap[Shape.AllocaSpillBlock]); - Entry->moveBefore(&NewF->getEntryBlock()); - Entry->getTerminator()->eraseFromParent(); - BranchInst::Create(SwitchBB, Entry); - Entry->setName("entry" + Suffix); + // Set up the new entry block. + replaceEntryBlock(); - // Clear all predecessors of the new entry block. - auto *Switch = cast(VMap[Shape.ResumeSwitch]); - Entry->replaceAllUsesWith(Switch->getDefaultDest()); - - IRBuilder<> Builder(&NewF->getEntryBlock().front()); + Builder.SetInsertPoint(&NewF->getEntryBlock().front()); + NewFramePtr = deriveNewFramePointer(); // Remap frame pointer. - Argument *NewFramePtr = &*NewF->arg_begin(); - Value *OldFramePtr = cast(VMap[Shape.FramePtr]); + Value *OldFramePtr = VMap[Shape.FramePtr]; NewFramePtr->takeName(OldFramePtr); OldFramePtr->replaceAllUsesWith(NewFramePtr); @@ -302,50 +735,55 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, Value *OldVFrame = cast(VMap[Shape.CoroBegin]); OldVFrame->replaceAllUsesWith(NewVFrame); - // Rewrite final suspend handling as it is not done via switch (allows to - // remove final case from the switch, since it is undefined behavior to resume - // the coroutine suspended at the final suspend point. - if (Shape.HasFinalSuspend) { - auto *Switch = cast(VMap[Shape.ResumeSwitch]); - bool IsDestroy = FnIndex != 0; - handleFinalSuspend(Builder, NewFramePtr, Shape, Switch, IsDestroy); + switch (Shape.ABI) { + case coro::ABI::Switch: + // Rewrite final suspend handling as it is not done via switch (allows to + // remove final case from the switch, since it is undefined behavior to + // resume the coroutine suspended at the final suspend point. + if (Shape.SwitchLowering.HasFinalSuspend) + handleFinalSuspend(); + break; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + // Replace uses of the active suspend with the corresponding + // continuation-function arguments. + assert(ActiveSuspend != nullptr && + "no active suspend when lowering a continuation-style coroutine"); + replaceRetconSuspendUses(); + break; } - // Replace coro suspend with the appropriate resume index. - // Replacing coro.suspend with (0) will result in control flow proceeding to - // a resume label associated with a suspend point, replacing it with (1) will - // result in control flow proceeding to a cleanup label associated with this - // suspend point. - auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0); - for (CoroSuspendInst *CS : Shape.CoroSuspends) { - auto *MappedCS = cast(VMap[CS]); - MappedCS->replaceAllUsesWith(NewValue); - MappedCS->eraseFromParent(); - } + // Handle suspends. + replaceCoroSuspends(); + + // Handle swifterror. + replaceSwiftErrorOps(); // Remove coro.end intrinsics. - replaceFallthroughCoroEnd(Shape.CoroEnds.front(), VMap); - replaceUnwindCoroEnds(Shape, VMap); + replaceCoroEnds(); + // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, // to suppress deallocation code. - coro::replaceCoroFree(cast(VMap[Shape.CoroBegin->getId()]), - /*Elide=*/FnIndex == 2); - - NewF->setCallingConv(CallingConv::Fast); - - return NewF; + if (Shape.ABI == coro::ABI::Switch) + coro::replaceCoroFree(cast(VMap[Shape.CoroBegin->getId()]), + /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup); } -static void removeCoroEnds(coro::Shape &Shape) { - if (Shape.CoroEnds.empty()) - return; - - LLVMContext &Context = Shape.CoroEnds.front()->getContext(); - auto *False = ConstantInt::getFalse(Context); +// Create a resume clone by cloning the body of the original function, setting +// new entry block and replacing coro.suspend an appropriate value to force +// resume or cleanup pass for every suspend point. +static Function *createClone(Function &F, const Twine &Suffix, + coro::Shape &Shape, CoroCloner::Kind FKind) { + CoroCloner Cloner(F, Suffix, Shape, FKind); + Cloner.create(); + return Cloner.getFunction(); +} - for (CoroEndInst *CE : Shape.CoroEnds) { - CE->replaceAllUsesWith(False); - CE->eraseFromParent(); +/// Remove calls to llvm.coro.end in the original function. +static void removeCoroEnds(coro::Shape &Shape, CallGraph *CG) { + for (auto End : Shape.CoroEnds) { + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG); } } @@ -377,8 +815,12 @@ static void replaceFrameSize(coro::Shape &Shape) { // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*)) // // Assumes that all the functions have the same signature. -static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin, - std::initializer_list Fns) { +static void setCoroInfo(Function &F, coro::Shape &Shape, + ArrayRef Fns) { + // This only works under the switch-lowering ABI because coro elision + // only works on the switch-lowering ABI. + assert(Shape.ABI == coro::ABI::Switch); + SmallVector Args(Fns.begin(), Fns.end()); assert(!Args.empty()); Function *Part = *Fns.begin(); @@ -393,38 +835,45 @@ static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin, // Update coro.begin instruction to refer to this constant. LLVMContext &C = F.getContext(); auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C)); - CoroBegin->getId()->setInfo(BC); + Shape.getSwitchCoroId()->setInfo(BC); } // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, Function *DestroyFn, Function *CleanupFn) { + assert(Shape.ABI == coro::ABI::Switch); + IRBuilder<> Builder(Shape.FramePtr->getNextNode()); - auto *ResumeAddr = Builder.CreateConstInBoundsGEP2_32( - Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::ResumeField, + auto *ResumeAddr = Builder.CreateStructGEP( + Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, "resume.addr"); Builder.CreateStore(ResumeFn, ResumeAddr); Value *DestroyOrCleanupFn = DestroyFn; - CoroIdInst *CoroId = Shape.CoroBegin->getId(); + CoroIdInst *CoroId = Shape.getSwitchCoroId(); if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { // If there is a CoroAlloc and it returns false (meaning we elide the // allocation, use CleanupFn instead of DestroyFn). DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); } - auto *DestroyAddr = Builder.CreateConstInBoundsGEP2_32( - Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::DestroyField, + auto *DestroyAddr = Builder.CreateStructGEP( + Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, "destroy.addr"); Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); } static void postSplitCleanup(Function &F) { removeUnreachableBlocks(F); + + // For now, we do a mandatory verification step because we don't + // entirely trust this pass. Note that we don't want to add a verifier + // pass to FPM below because it will also verify all the global data. + verifyFunction(F); + legacy::FunctionPassManager FPM(F.getParent()); - FPM.add(createVerifierPass()); FPM.add(createSCCPPass()); FPM.add(createCFGSimplificationPass()); FPM.add(createEarlyCSEPass()); @@ -520,21 +969,34 @@ static void addMustTailToCoroResumes(Function &F) { // Coroutine has no suspend points. Remove heap allocation for the coroutine // frame if possible. -static void handleNoSuspendCoroutine(CoroBeginInst *CoroBegin, Type *FrameTy) { +static void handleNoSuspendCoroutine(coro::Shape &Shape) { + auto *CoroBegin = Shape.CoroBegin; auto *CoroId = CoroBegin->getId(); auto *AllocInst = CoroId->getCoroAlloc(); - coro::replaceCoroFree(CoroId, /*Elide=*/AllocInst != nullptr); - if (AllocInst) { - IRBuilder<> Builder(AllocInst); - // FIXME: Need to handle overaligned members. - auto *Frame = Builder.CreateAlloca(FrameTy); - auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); - AllocInst->replaceAllUsesWith(Builder.getFalse()); - AllocInst->eraseFromParent(); - CoroBegin->replaceAllUsesWith(VFrame); - } else { - CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); + switch (Shape.ABI) { + case coro::ABI::Switch: { + auto SwitchId = cast(CoroId); + coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); + if (AllocInst) { + IRBuilder<> Builder(AllocInst); + // FIXME: Need to handle overaligned members. + auto *Frame = Builder.CreateAlloca(Shape.FrameTy); + auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); + AllocInst->replaceAllUsesWith(Builder.getFalse()); + AllocInst->eraseFromParent(); + CoroBegin->replaceAllUsesWith(VFrame); + } else { + CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); + } + break; + } + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType())); + break; } + CoroBegin->eraseFromParent(); } @@ -670,12 +1132,16 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend, // Remove suspend points that are simplified. static void simplifySuspendPoints(coro::Shape &Shape) { + // Currently, the only simplification we do is switch-lowering-specific. + if (Shape.ABI != coro::ABI::Switch) + return; + auto &S = Shape.CoroSuspends; size_t I = 0, N = S.size(); if (N == 0) return; while (true) { - if (simplifySuspendPoint(S[I], Shape.CoroBegin)) { + if (simplifySuspendPoint(cast(S[I]), Shape.CoroBegin)) { if (--N == I) break; std::swap(S[I], S[N]); @@ -687,142 +1153,227 @@ static void simplifySuspendPoints(coro::Shape &Shape) { S.resize(N); } -static SmallPtrSet getCoroBeginPredBlocks(CoroBeginInst *CB) { - // Collect all blocks that we need to look for instructions to relocate. - SmallPtrSet RelocBlocks; - SmallVector Work; - Work.push_back(CB->getParent()); +static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl &Clones) { + assert(Shape.ABI == coro::ABI::Switch); - do { - BasicBlock *Current = Work.pop_back_val(); - for (BasicBlock *BB : predecessors(Current)) - if (RelocBlocks.count(BB) == 0) { - RelocBlocks.insert(BB); - Work.push_back(BB); - } - } while (!Work.empty()); - return RelocBlocks; -} - -static SmallPtrSet -getNotRelocatableInstructions(CoroBeginInst *CoroBegin, - SmallPtrSetImpl &RelocBlocks) { - SmallPtrSet DoNotRelocate; - // Collect all instructions that we should not relocate - SmallVector Work; - - // Start with CoroBegin and terminators of all preceding blocks. - Work.push_back(CoroBegin); - BasicBlock *CoroBeginBB = CoroBegin->getParent(); - for (BasicBlock *BB : RelocBlocks) - if (BB != CoroBeginBB) - Work.push_back(BB->getTerminator()); - - // For every instruction in the Work list, place its operands in DoNotRelocate - // set. - do { - Instruction *Current = Work.pop_back_val(); - LLVM_DEBUG(dbgs() << "CoroSplit: Will not relocate: " << *Current << "\n"); - DoNotRelocate.insert(Current); - for (Value *U : Current->operands()) { - auto *I = dyn_cast(U); - if (!I) - continue; + createResumeEntryBlock(F, Shape); + auto ResumeClone = createClone(F, ".resume", Shape, + CoroCloner::Kind::SwitchResume); + auto DestroyClone = createClone(F, ".destroy", Shape, + CoroCloner::Kind::SwitchUnwind); + auto CleanupClone = createClone(F, ".cleanup", Shape, + CoroCloner::Kind::SwitchCleanup); - if (auto *A = dyn_cast(I)) { - // Stores to alloca instructions that occur before the coroutine frame - // is allocated should not be moved; the stored values may be used by - // the coroutine frame allocator. The operands to those stores must also - // remain in place. - for (const auto &User : A->users()) - if (auto *SI = dyn_cast(User)) - if (RelocBlocks.count(SI->getParent()) != 0 && - DoNotRelocate.count(SI) == 0) { - Work.push_back(SI); - DoNotRelocate.insert(SI); - } - continue; - } + postSplitCleanup(*ResumeClone); + postSplitCleanup(*DestroyClone); + postSplitCleanup(*CleanupClone); + + addMustTailToCoroResumes(*ResumeClone); + + // Store addresses resume/destroy/cleanup functions in the coroutine frame. + updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); + + assert(Clones.empty()); + Clones.push_back(ResumeClone); + Clones.push_back(DestroyClone); + Clones.push_back(CleanupClone); + + // Create a constant array referring to resume/destroy/clone functions pointed + // by the last argument of @llvm.coro.info, so that CoroElide pass can + // determined correct function to call. + setCoroInfo(F, Shape, Clones); +} - if (DoNotRelocate.count(I) == 0) { - Work.push_back(I); - DoNotRelocate.insert(I); +static void splitRetconCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl &Clones) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + assert(Clones.empty()); + + // Reset various things that the optimizer might have decided it + // "knows" about the coroutine function due to not seeing a return. + F.removeFnAttr(Attribute::NoReturn); + F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull); + + // Allocate the frame. + auto *Id = cast(Shape.CoroBegin->getId()); + Value *RawFramePtr; + if (Shape.RetconLowering.IsFrameInlineInStorage) { + RawFramePtr = Id->getStorage(); + } else { + IRBuilder<> Builder(Id); + + // Determine the size of the frame. + const DataLayout &DL = F.getParent()->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + + // Allocate. We don't need to update the call graph node because we're + // going to recompute it from scratch after splitting. + RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); + RawFramePtr = + Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); + + // Stash the allocated frame pointer in the continuation storage. + auto Dest = Builder.CreateBitCast(Id->getStorage(), + RawFramePtr->getType()->getPointerTo()); + Builder.CreateStore(RawFramePtr, Dest); + } + + // Map all uses of llvm.coro.begin to the allocated frame pointer. + { + // Make sure we don't invalidate Shape.FramePtr. + TrackingVH Handle(Shape.FramePtr); + Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); + Shape.FramePtr = Handle.getValPtr(); + } + + // Create a unique return block. + BasicBlock *ReturnBB = nullptr; + SmallVector ReturnPHIs; + + // Create all the functions in order after the main function. + auto NextF = std::next(F.getIterator()); + + // Create a continuation function for each of the suspend points. + Clones.reserve(Shape.CoroSuspends.size()); + for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { + auto Suspend = cast(Shape.CoroSuspends[i]); + + // Create the clone declaration. + auto Continuation = + createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF); + Clones.push_back(Continuation); + + // Insert a branch to the unified return block immediately before + // the suspend point. + auto SuspendBB = Suspend->getParent(); + auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); + auto Branch = cast(SuspendBB->getTerminator()); + + // Create the unified return block. + if (!ReturnBB) { + // Place it before the first suspend. + ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F, + NewSuspendBB); + Shape.RetconLowering.ReturnBlock = ReturnBB; + + IRBuilder<> Builder(ReturnBB); + + // Create PHIs for all the return values. + assert(ReturnPHIs.empty()); + + // First, the continuation. + ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(), + Shape.CoroSuspends.size())); + + // Next, all the directly-yielded values. + for (auto ResultTy : Shape.getRetconResultTypes()) + ReturnPHIs.push_back(Builder.CreatePHI(ResultTy, + Shape.CoroSuspends.size())); + + // Build the return value. + auto RetTy = F.getReturnType(); + + // Cast the continuation value if necessary. + // We can't rely on the types matching up because that type would + // have to be infinite. + auto CastedContinuationTy = + (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0)); + auto *CastedContinuation = + Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy); + + Value *RetV; + if (ReturnPHIs.size() == 1) { + RetV = CastedContinuation; + } else { + RetV = UndefValue::get(RetTy); + RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0); + for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I) + RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I); } + + Builder.CreateRet(RetV); } - } while (!Work.empty()); - return DoNotRelocate; -} -static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) { - // Analyze which non-alloca instructions are needed for allocation and - // relocate the rest to after coro.begin. We need to do it, since some of the - // targets of those instructions may be placed into coroutine frame memory - // for which becomes available after coro.begin intrinsic. + // Branch to the return block. + Branch->setSuccessor(0, ReturnBB); + ReturnPHIs[0]->addIncoming(Continuation, SuspendBB); + size_t NextPHIIndex = 1; + for (auto &VUse : Suspend->value_operands()) + ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB); + assert(NextPHIIndex == ReturnPHIs.size()); + } - auto BlockSet = getCoroBeginPredBlocks(CoroBegin); - auto DoNotRelocateSet = getNotRelocatableInstructions(CoroBegin, BlockSet); + assert(Clones.size() == Shape.CoroSuspends.size()); + for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { + auto Suspend = Shape.CoroSuspends[i]; + auto Clone = Clones[i]; - Instruction *InsertPt = CoroBegin->getNextNode(); - BasicBlock &BB = F.getEntryBlock(); // TODO: Look at other blocks as well. - for (auto B = BB.begin(), E = BB.end(); B != E;) { - Instruction &I = *B++; - if (isa(&I)) - continue; - if (&I == CoroBegin) - break; - if (DoNotRelocateSet.count(&I)) - continue; - I.moveBefore(InsertPt); + CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(); + } +} + +namespace { + class PrettyStackTraceFunction : public PrettyStackTraceEntry { + Function &F; + public: + PrettyStackTraceFunction(Function &F) : F(F) {} + void print(raw_ostream &OS) const override { + OS << "While splitting coroutine "; + F.printAsOperand(OS, /*print type*/ false, F.getParent()); + OS << "\n"; + } + }; +} + +static void splitCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl &Clones) { + switch (Shape.ABI) { + case coro::ABI::Switch: + return splitSwitchCoroutine(F, Shape, Clones); + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return splitRetconCoroutine(F, Shape, Clones); } + llvm_unreachable("bad ABI kind"); } static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { - EliminateUnreachableBlocks(F); + PrettyStackTraceFunction prettyStackTrace(F); + + // The suspend-crossing algorithm in buildCoroutineFrame get tripped + // up by uses in unreachable blocks, so remove them as a first pass. + removeUnreachableBlocks(F); coro::Shape Shape(F); if (!Shape.CoroBegin) return; simplifySuspendPoints(Shape); - relocateInstructionBefore(Shape.CoroBegin, F); buildCoroutineFrame(F, Shape); replaceFrameSize(Shape); + SmallVector Clones; + // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. if (Shape.CoroSuspends.empty()) { - handleNoSuspendCoroutine(Shape.CoroBegin, Shape.FrameTy); - removeCoroEnds(Shape); - postSplitCleanup(F); - coro::updateCallGraph(F, {}, CG, SCC); - return; + handleNoSuspendCoroutine(Shape); + } else { + splitCoroutine(F, Shape, Clones); } - auto *ResumeEntry = createResumeEntryBlock(F, Shape); - auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0); - auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1); - auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2); - - // We no longer need coro.end in F. - removeCoroEnds(Shape); + // Replace all the swifterror operations in the original function. + // This invalidates SwiftErrorOps in the Shape. + replaceSwiftErrorOps(F, Shape, nullptr); + removeCoroEnds(Shape, &CG); postSplitCleanup(F); - postSplitCleanup(*ResumeClone); - postSplitCleanup(*DestroyClone); - postSplitCleanup(*CleanupClone); - - addMustTailToCoroResumes(*ResumeClone); - - // Store addresses resume/destroy/cleanup functions in the coroutine frame. - updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); - - // Create a constant array referring to resume/destroy/clone functions pointed - // by the last argument of @llvm.coro.info, so that CoroElide pass can - // determined correct function to call. - setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone}); // Update call graph and add the functions we created to the SCC. - coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC); + coro::updateCallGraph(F, Clones, CG, SCC); } // When we see the coroutine the first time, we insert an indirect call to a @@ -881,6 +1432,80 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) { SCC.initialize(Nodes); } +/// Replace a call to llvm.coro.prepare.retcon. +static void replacePrepare(CallInst *Prepare, CallGraph &CG) { + auto CastFn = Prepare->getArgOperand(0); // as an i8* + auto Fn = CastFn->stripPointerCasts(); // as its original type + + // Find call graph nodes for the preparation. + CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr; + if (auto ConcreteFn = dyn_cast(Fn)) { + PrepareUserNode = CG[Prepare->getFunction()]; + FnNode = CG[ConcreteFn]; + } + + // Attempt to peephole this pattern: + // %0 = bitcast [[TYPE]] @some_function to i8* + // %1 = call @llvm.coro.prepare.retcon(i8* %0) + // %2 = bitcast %1 to [[TYPE]] + // ==> + // %2 = @some_function + for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); + UI != UE; ) { + // Look for bitcasts back to the original function type. + auto *Cast = dyn_cast((UI++)->getUser()); + if (!Cast || Cast->getType() != Fn->getType()) continue; + + // Check whether the replacement will introduce new direct calls. + // If so, we'll need to update the call graph. + if (PrepareUserNode) { + for (auto &Use : Cast->uses()) { + if (auto *CB = dyn_cast(Use.getUser())) { + if (!CB->isCallee(&Use)) + continue; + PrepareUserNode->removeCallEdgeFor(*CB); + PrepareUserNode->addCalledFunction(CB, FnNode); + } + } + } + + // Replace and remove the cast. + Cast->replaceAllUsesWith(Fn); + Cast->eraseFromParent(); + } + + // Replace any remaining uses with the function as an i8*. + // This can never directly be a callee, so we don't need to update CG. + Prepare->replaceAllUsesWith(CastFn); + Prepare->eraseFromParent(); + + // Kill dead bitcasts. + while (auto *Cast = dyn_cast(CastFn)) { + if (!Cast->use_empty()) break; + CastFn = Cast->getOperand(0); + Cast->eraseFromParent(); + } +} + +/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent +/// IPO from operating on calls to a retcon coroutine before it's been +/// split. This is only safe to do after we've split all retcon +/// coroutines in the module. We can do that this in this pass because +/// this pass does promise to split all retcon coroutines (as opposed to +/// switch coroutines, which are lowered in multiple stages). +static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) { + bool Changed = false; + for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); + PI != PE; ) { + // Intrinsics can only be used in calls. + auto *Prepare = cast((PI++)->getUser()); + replacePrepare(Prepare, CG); + Changed = true; + } + + return Changed; +} + //===----------------------------------------------------------------------===// // Top Level Driver //===----------------------------------------------------------------------===// @@ -899,7 +1524,9 @@ struct CoroSplit : public CallGraphSCCPass { // A coroutine is identified by the presence of coro.begin intrinsic, if // we don't have any, this pass has nothing to do. bool doInitialization(CallGraph &CG) override { - Run = coro::declaresIntrinsics(CG.getModule(), {"llvm.coro.begin"}); + Run = coro::declaresIntrinsics(CG.getModule(), + {"llvm.coro.begin", + "llvm.coro.prepare.retcon"}); return CallGraphSCCPass::doInitialization(CG); } @@ -907,6 +1534,12 @@ struct CoroSplit : public CallGraphSCCPass { if (!Run) return false; + // Check for uses of llvm.coro.prepare.retcon. + auto PrepareFn = + SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon"); + if (PrepareFn && PrepareFn->use_empty()) + PrepareFn = nullptr; + // Find coroutines for processing. SmallVector Coroutines; for (CallGraphNode *CGN : SCC) @@ -914,12 +1547,17 @@ struct CoroSplit : public CallGraphSCCPass { if (F->hasFnAttribute(CORO_PRESPLIT_ATTR)) Coroutines.push_back(F); - if (Coroutines.empty()) + if (Coroutines.empty() && !PrepareFn) return false; CallGraph &CG = getAnalysis().getCallGraph(); + + if (Coroutines.empty()) + return replaceAllPrepares(PrepareFn, CG); + createDevirtTriggerFunc(CG, SCC); + // Split all the coroutines. for (Function *F : Coroutines) { Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR); StringRef Value = Attr.getValueAsString(); @@ -932,6 +1570,10 @@ struct CoroSplit : public CallGraphSCCPass { F->removeFnAttr(CORO_PRESPLIT_ATTR); splitCoroutine(*F, CG, SCC); } + + if (PrepareFn) + replaceAllPrepares(PrepareFn, CG); + return true; } diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp index a581d1d21169..f39483b27518 100644 --- a/lib/Transforms/Coroutines/Coroutines.cpp +++ b/lib/Transforms/Coroutines/Coroutines.cpp @@ -123,12 +123,26 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index, static bool isCoroutineIntrinsicName(StringRef Name) { // NOTE: Must be sorted! static const char *const CoroIntrinsics[] = { - "llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.destroy", - "llvm.coro.done", "llvm.coro.end", "llvm.coro.frame", - "llvm.coro.free", "llvm.coro.id", "llvm.coro.noop", - "llvm.coro.param", "llvm.coro.promise", "llvm.coro.resume", - "llvm.coro.save", "llvm.coro.size", "llvm.coro.subfn.addr", + "llvm.coro.alloc", + "llvm.coro.begin", + "llvm.coro.destroy", + "llvm.coro.done", + "llvm.coro.end", + "llvm.coro.frame", + "llvm.coro.free", + "llvm.coro.id", + "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once", + "llvm.coro.noop", + "llvm.coro.param", + "llvm.coro.prepare.retcon", + "llvm.coro.promise", + "llvm.coro.resume", + "llvm.coro.save", + "llvm.coro.size", + "llvm.coro.subfn.addr", "llvm.coro.suspend", + "llvm.coro.suspend.retcon", }; return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1; } @@ -217,9 +231,6 @@ static void clear(coro::Shape &Shape) { Shape.FrameTy = nullptr; Shape.FramePtr = nullptr; Shape.AllocaSpillBlock = nullptr; - Shape.ResumeSwitch = nullptr; - Shape.PromiseAlloca = nullptr; - Shape.HasFinalSuspend = false; } static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin, @@ -235,6 +246,7 @@ static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin, // Collect "interesting" coroutine intrinsics. void coro::Shape::buildFrom(Function &F) { + bool HasFinalSuspend = false; size_t FinalSuspendIndex = 0; clear(*this); SmallVector CoroFrames; @@ -257,9 +269,15 @@ void coro::Shape::buildFrom(Function &F) { if (II->use_empty()) UnusedCoroSaves.push_back(cast(II)); break; - case Intrinsic::coro_suspend: - CoroSuspends.push_back(cast(II)); - if (CoroSuspends.back()->isFinal()) { + case Intrinsic::coro_suspend_retcon: { + auto Suspend = cast(II); + CoroSuspends.push_back(Suspend); + break; + } + case Intrinsic::coro_suspend: { + auto Suspend = cast(II); + CoroSuspends.push_back(Suspend); + if (Suspend->isFinal()) { if (HasFinalSuspend) report_fatal_error( "Only one suspend point can be marked as final"); @@ -267,18 +285,23 @@ void coro::Shape::buildFrom(Function &F) { FinalSuspendIndex = CoroSuspends.size() - 1; } break; + } case Intrinsic::coro_begin: { auto CB = cast(II); - if (CB->getId()->getInfo().isPreSplit()) { - if (CoroBegin) - report_fatal_error( + + // Ignore coro id's that aren't pre-split. + auto Id = dyn_cast(CB->getId()); + if (Id && !Id->getInfo().isPreSplit()) + break; + + if (CoroBegin) + report_fatal_error( "coroutine should have exactly one defining @llvm.coro.begin"); - CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); - CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); - CB->removeAttribute(AttributeList::FunctionIndex, - Attribute::NoDuplicate); - CoroBegin = CB; - } + CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); + CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + CB->removeAttribute(AttributeList::FunctionIndex, + Attribute::NoDuplicate); + CoroBegin = CB; break; } case Intrinsic::coro_end: @@ -310,7 +333,7 @@ void coro::Shape::buildFrom(Function &F) { // Replace all coro.suspend with undef and remove related coro.saves if // present. - for (CoroSuspendInst *CS : CoroSuspends) { + for (AnyCoroSuspendInst *CS : CoroSuspends) { CS->replaceAllUsesWith(UndefValue::get(CS->getType())); CS->eraseFromParent(); if (auto *CoroSave = CS->getCoroSave()) @@ -324,19 +347,136 @@ void coro::Shape::buildFrom(Function &F) { return; } + auto Id = CoroBegin->getId(); + switch (auto IdIntrinsic = Id->getIntrinsicID()) { + case Intrinsic::coro_id: { + auto SwitchId = cast(Id); + this->ABI = coro::ABI::Switch; + this->SwitchLowering.HasFinalSuspend = HasFinalSuspend; + this->SwitchLowering.ResumeSwitch = nullptr; + this->SwitchLowering.PromiseAlloca = SwitchId->getPromise(); + this->SwitchLowering.ResumeEntryBlock = nullptr; + + for (auto AnySuspend : CoroSuspends) { + auto Suspend = dyn_cast(AnySuspend); + if (!Suspend) { +#ifndef NDEBUG + AnySuspend->dump(); +#endif + report_fatal_error("coro.id must be paired with coro.suspend"); + } + + if (!Suspend->getCoroSave()) + createCoroSave(CoroBegin, Suspend); + } + break; + } + + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: { + auto ContinuationId = cast(Id); + ContinuationId->checkWellFormed(); + this->ABI = (IdIntrinsic == Intrinsic::coro_id_retcon + ? coro::ABI::Retcon + : coro::ABI::RetconOnce); + auto Prototype = ContinuationId->getPrototype(); + this->RetconLowering.ResumePrototype = Prototype; + this->RetconLowering.Alloc = ContinuationId->getAllocFunction(); + this->RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); + this->RetconLowering.ReturnBlock = nullptr; + this->RetconLowering.IsFrameInlineInStorage = false; + + // Determine the result value types, and make sure they match up with + // the values passed to the suspends. + auto ResultTys = getRetconResultTypes(); + auto ResumeTys = getRetconResumeTypes(); + + for (auto AnySuspend : CoroSuspends) { + auto Suspend = dyn_cast(AnySuspend); + if (!Suspend) { +#ifndef NDEBUG + AnySuspend->dump(); +#endif + report_fatal_error("coro.id.retcon.* must be paired with " + "coro.suspend.retcon"); + } + + // Check that the argument types of the suspend match the results. + auto SI = Suspend->value_begin(), SE = Suspend->value_end(); + auto RI = ResultTys.begin(), RE = ResultTys.end(); + for (; SI != SE && RI != RE; ++SI, ++RI) { + auto SrcTy = (*SI)->getType(); + if (SrcTy != *RI) { + // The optimizer likes to eliminate bitcasts leading into variadic + // calls, but that messes with our invariants. Re-insert the + // bitcast and ignore this type mismatch. + if (CastInst::isBitCastable(SrcTy, *RI)) { + auto BCI = new BitCastInst(*SI, *RI, "", Suspend); + SI->set(BCI); + continue; + } + +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("argument to coro.suspend.retcon does not " + "match corresponding prototype function result"); + } + } + if (SI != SE || RI != RE) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("wrong number of arguments to coro.suspend.retcon"); + } + + // Check that the result type of the suspend matches the resume types. + Type *SResultTy = Suspend->getType(); + ArrayRef SuspendResultTys; + if (SResultTy->isVoidTy()) { + // leave as empty array + } else if (auto SResultStructTy = dyn_cast(SResultTy)) { + SuspendResultTys = SResultStructTy->elements(); + } else { + // forms an ArrayRef using SResultTy, be careful + SuspendResultTys = SResultTy; + } + if (SuspendResultTys.size() != ResumeTys.size()) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("wrong number of results from coro.suspend.retcon"); + } + for (size_t I = 0, E = ResumeTys.size(); I != E; ++I) { + if (SuspendResultTys[I] != ResumeTys[I]) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("result from coro.suspend.retcon does not " + "match corresponding prototype function param"); + } + } + } + break; + } + + default: + llvm_unreachable("coro.begin is not dependent on a coro.id call"); + } + // The coro.free intrinsic is always lowered to the result of coro.begin. for (CoroFrameInst *CF : CoroFrames) { CF->replaceAllUsesWith(CoroBegin); CF->eraseFromParent(); } - // Canonicalize coro.suspend by inserting a coro.save if needed. - for (CoroSuspendInst *CS : CoroSuspends) - if (!CS->getCoroSave()) - createCoroSave(CoroBegin, CS); - // Move final suspend to be the last element in the CoroSuspends vector. - if (HasFinalSuspend && + if (ABI == coro::ABI::Switch && + SwitchLowering.HasFinalSuspend && FinalSuspendIndex != CoroSuspends.size() - 1) std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); @@ -345,6 +485,154 @@ void coro::Shape::buildFrom(Function &F) { CoroSave->eraseFromParent(); } +static void propagateCallAttrsFromCallee(CallInst *Call, Function *Callee) { + Call->setCallingConv(Callee->getCallingConv()); + // TODO: attributes? +} + +static void addCallToCallGraph(CallGraph *CG, CallInst *Call, Function *Callee){ + if (CG) + (*CG)[Call->getFunction()]->addCalledFunction(Call, (*CG)[Callee]); +} + +Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size, + CallGraph *CG) const { + switch (ABI) { + case coro::ABI::Switch: + llvm_unreachable("can't allocate memory in coro switch-lowering"); + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto Alloc = RetconLowering.Alloc; + Size = Builder.CreateIntCast(Size, + Alloc->getFunctionType()->getParamType(0), + /*is signed*/ false); + auto *Call = Builder.CreateCall(Alloc, Size); + propagateCallAttrsFromCallee(Call, Alloc); + addCallToCallGraph(CG, Call, Alloc); + return Call; + } + } + llvm_unreachable("Unknown coro::ABI enum"); +} + +void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, + CallGraph *CG) const { + switch (ABI) { + case coro::ABI::Switch: + llvm_unreachable("can't allocate memory in coro switch-lowering"); + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto Dealloc = RetconLowering.Dealloc; + Ptr = Builder.CreateBitCast(Ptr, + Dealloc->getFunctionType()->getParamType(0)); + auto *Call = Builder.CreateCall(Dealloc, Ptr); + propagateCallAttrsFromCallee(Call, Dealloc); + addCallToCallGraph(CG, Call, Dealloc); + return; + } + } + llvm_unreachable("Unknown coro::ABI enum"); +} + +LLVM_ATTRIBUTE_NORETURN +static void fail(const Instruction *I, const char *Reason, Value *V) { +#ifndef NDEBUG + I->dump(); + if (V) { + errs() << " Value: "; + V->printAsOperand(llvm::errs()); + errs() << '\n'; + } +#endif + report_fatal_error(Reason); +} + +/// Check that the given value is a well-formed prototype for the +/// llvm.coro.id.retcon.* intrinsics. +static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) { + auto F = dyn_cast(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.id.retcon.* prototype not a Function", V); + + auto FT = F->getFunctionType(); + + if (isa(I)) { + bool ResultOkay; + if (FT->getReturnType()->isPointerTy()) { + ResultOkay = true; + } else if (auto SRetTy = dyn_cast(FT->getReturnType())) { + ResultOkay = (!SRetTy->isOpaque() && + SRetTy->getNumElements() > 0 && + SRetTy->getElementType(0)->isPointerTy()); + } else { + ResultOkay = false; + } + if (!ResultOkay) + fail(I, "llvm.coro.id.retcon prototype must return pointer as first " + "result", F); + + if (FT->getReturnType() != + I->getFunction()->getFunctionType()->getReturnType()) + fail(I, "llvm.coro.id.retcon prototype return type must be same as" + "current function return type", F); + } else { + // No meaningful validation to do here for llvm.coro.id.unique.once. + } + + if (FT->getNumParams() == 0 || !FT->getParamType(0)->isPointerTy()) + fail(I, "llvm.coro.id.retcon.* prototype must take pointer as " + "its first parameter", F); +} + +/// Check that the given value is a well-formed allocator. +static void checkWFAlloc(const Instruction *I, Value *V) { + auto F = dyn_cast(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.* allocator not a Function", V); + + auto FT = F->getFunctionType(); + if (!FT->getReturnType()->isPointerTy()) + fail(I, "llvm.coro.* allocator must return a pointer", F); + + if (FT->getNumParams() != 1 || + !FT->getParamType(0)->isIntegerTy()) + fail(I, "llvm.coro.* allocator must take integer as only param", F); +} + +/// Check that the given value is a well-formed deallocator. +static void checkWFDealloc(const Instruction *I, Value *V) { + auto F = dyn_cast(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.* deallocator not a Function", V); + + auto FT = F->getFunctionType(); + if (!FT->getReturnType()->isVoidTy()) + fail(I, "llvm.coro.* deallocator must return void", F); + + if (FT->getNumParams() != 1 || + !FT->getParamType(0)->isPointerTy()) + fail(I, "llvm.coro.* deallocator must take pointer as only param", F); +} + +static void checkConstantInt(const Instruction *I, Value *V, + const char *Reason) { + if (!isa(V)) { + fail(I, Reason, V); + } +} + +void AnyCoroIdRetconInst::checkWellFormed() const { + checkConstantInt(this, getArgOperand(SizeArg), + "size argument to coro.id.retcon.* must be constant"); + checkConstantInt(this, getArgOperand(AlignArg), + "alignment argument to coro.id.retcon.* must be constant"); + checkWFRetconPrototype(this, getArgOperand(PrototypeArg)); + checkWFAlloc(this, getArgOperand(AllocArg)); + checkWFDealloc(this, getArgOperand(DeallocArg)); +} + void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCoroEarlyPass()); } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 95a9f31cced3..dd9f74a881ee 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -304,7 +304,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // of the previous load. LoadInst *newLoad = IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val"); - newLoad->setAlignment(OrigLoad->getAlignment()); + newLoad->setAlignment(MaybeAlign(OrigLoad->getAlignment())); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp index 2a52c6b9b4ad..95f47345d8fd 100644 --- a/lib/Transforms/IPO/Attributor.cpp +++ b/lib/Transforms/IPO/Attributor.cpp @@ -16,11 +16,15 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -30,6 +34,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" + #include using namespace llvm; @@ -46,19 +53,50 @@ STATISTIC(NumAttributesValidFixpoint, "Number of abstract attributes in a valid fixpoint state"); STATISTIC(NumAttributesManifested, "Number of abstract attributes manifested in IR"); -STATISTIC(NumFnNoUnwind, "Number of functions marked nounwind"); - -STATISTIC(NumFnUniqueReturned, "Number of function with unique return"); -STATISTIC(NumFnKnownReturns, "Number of function with known return values"); -STATISTIC(NumFnArgumentReturned, - "Number of function arguments marked returned"); -STATISTIC(NumFnNoSync, "Number of functions marked nosync"); -STATISTIC(NumFnNoFree, "Number of functions marked nofree"); -STATISTIC(NumFnReturnedNonNull, - "Number of function return values marked nonnull"); -STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull"); -STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull"); -STATISTIC(NumFnWillReturn, "Number of functions marked willreturn"); + +// Some helper macros to deal with statistics tracking. +// +// Usage: +// For simple IR attribute tracking overload trackStatistics in the abstract +// attribute and choose the right STATS_DECLTRACK_********* macro, +// e.g.,: +// void trackStatistics() const override { +// STATS_DECLTRACK_ARG_ATTR(returned) +// } +// If there is a single "increment" side one can use the macro +// STATS_DECLTRACK with a custom message. If there are multiple increment +// sides, STATS_DECL and STATS_TRACK can also be used separatly. +// +#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \ + ("Number of " #TYPE " marked '" #NAME "'") +#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME +#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG); +#define STATS_DECL(NAME, TYPE, MSG) \ + STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG); +#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE)); +#define STATS_DECLTRACK(NAME, TYPE, MSG) \ + { \ + STATS_DECL(NAME, TYPE, MSG) \ + STATS_TRACK(NAME, TYPE) \ + } +#define STATS_DECLTRACK_ARG_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME)) +#define STATS_DECLTRACK_CSARG_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CSArguments, \ + BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME)) +#define STATS_DECLTRACK_FN_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME)) +#define STATS_DECLTRACK_CS_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME)) +#define STATS_DECLTRACK_FNRET_ATTR(NAME) \ + STATS_DECLTRACK(NAME, FunctionReturn, \ + BUILD_STAT_MSG_IR_ATTR(function returns, NAME)) +#define STATS_DECLTRACK_CSRET_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CSReturn, \ + BUILD_STAT_MSG_IR_ATTR(call site returns, NAME)) +#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Floating, \ + ("Number of floating values known to be '" #NAME "'")) // TODO: Determine a good default value. // @@ -72,18 +110,32 @@ static cl::opt MaxFixpointIterations("attributor-max-iterations", cl::Hidden, cl::desc("Maximal number of fixpoint iterations."), cl::init(32)); +static cl::opt VerifyMaxFixpointIterations( + "attributor-max-iterations-verify", cl::Hidden, + cl::desc("Verify that max-iterations is a tight bound for a fixpoint"), + cl::init(false)); static cl::opt DisableAttributor( "attributor-disable", cl::Hidden, cl::desc("Disable the attributor inter-procedural deduction pass."), cl::init(true)); -static cl::opt VerifyAttributor( - "attributor-verify", cl::Hidden, - cl::desc("Verify the Attributor deduction and " - "manifestation of attributes -- may issue false-positive errors"), +static cl::opt ManifestInternal( + "attributor-manifest-internal", cl::Hidden, + cl::desc("Manifest Attributor internal string attributes."), cl::init(false)); +static cl::opt DepRecInterval( + "attributor-dependence-recompute-interval", cl::Hidden, + cl::desc("Number of iterations until dependences are recomputed."), + cl::init(4)); + +static cl::opt EnableHeapToStack("enable-heap-to-stack-conversion", + cl::init(true), cl::Hidden); + +static cl::opt MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), + cl::Hidden); + /// Logic operators for the change status enum class. /// ///{ @@ -95,78 +147,30 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) { } ///} -/// Helper to adjust the statistics. -static void bookkeeping(AbstractAttribute::ManifestPosition MP, - const Attribute &Attr) { - if (!AreStatisticsEnabled()) - return; - - if (!Attr.isEnumAttribute()) - return; - switch (Attr.getKindAsEnum()) { - case Attribute::NoUnwind: - NumFnNoUnwind++; - return; - case Attribute::Returned: - NumFnArgumentReturned++; - return; - case Attribute::NoSync: - NumFnNoSync++; - break; - case Attribute::NoFree: - NumFnNoFree++; - break; - case Attribute::NonNull: - switch (MP) { - case AbstractAttribute::MP_RETURNED: - NumFnReturnedNonNull++; - break; - case AbstractAttribute::MP_ARGUMENT: - NumFnArgumentNonNull++; - break; - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: - NumCSArgumentNonNull++; - break; - default: - break; - } - break; - case Attribute::WillReturn: - NumFnWillReturn++; - break; - default: - return; - } -} - -template -using followValueCB_t = std::function; -template -using visitValueCB_t = std::function; - -/// Recursively visit all values that might become \p InitV at some point. This +/// Recursively visit all values that might become \p IRP at some point. This /// will be done by looking through cast instructions, selects, phis, and calls -/// with the "returned" attribute. The callback \p FollowValueCB is asked before -/// a potential origin value is looked at. If no \p FollowValueCB is passed, a -/// default one is used that will make sure we visit every value only once. Once -/// we cannot look through the value any further, the callback \p VisitValueCB -/// is invoked and passed the current value and the \p State. To limit how much -/// effort is invested, we will never visit more than \p MaxValues values. -template +/// with the "returned" attribute. Once we cannot look through the value any +/// further, the callback \p VisitValueCB is invoked and passed the current +/// value, the \p State, and a flag to indicate if we stripped anything. To +/// limit how much effort is invested, we will never visit more values than +/// specified by \p MaxValues. +template static bool genericValueTraversal( - Value *InitV, StateTy &State, visitValueCB_t &VisitValueCB, - followValueCB_t *FollowValueCB = nullptr, int MaxValues = 8) { - + Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State, + const function_ref &VisitValueCB, + int MaxValues = 8) { + + const AAIsDead *LivenessAA = nullptr; + if (IRP.getAnchorScope()) + LivenessAA = &A.getAAFor( + QueryingAA, IRPosition::function(*IRP.getAnchorScope()), + /* TrackDependence */ false); + bool AnyDead = false; + + // TODO: Use Positions here to allow context sensitivity in VisitValueCB SmallPtrSet Visited; - followValueCB_t DefaultFollowValueCB = [&](Value *Val, bool &) { - return Visited.insert(Val).second; - }; - - if (!FollowValueCB) - FollowValueCB = &DefaultFollowValueCB; - SmallVector Worklist; - Worklist.push_back(InitV); + Worklist.push_back(&IRP.getAssociatedValue()); int Iteration = 0; do { @@ -174,7 +178,7 @@ static bool genericValueTraversal( // Check if we should process the current value. To prevent endless // recursion keep a record of the values we followed! - if (!(*FollowValueCB)(V, State)) + if (!Visited.insert(V).second) continue; // Make sure we limit the compile time for complex expressions. @@ -183,23 +187,23 @@ static bool genericValueTraversal( // Explicitly look through calls with a "returned" attribute if we do // not have a pointer as stripPointerCasts only works on them. + Value *NewV = nullptr; if (V->getType()->isPointerTy()) { - V = V->stripPointerCasts(); + NewV = V->stripPointerCasts(); } else { CallSite CS(V); if (CS && CS.getCalledFunction()) { - Value *NewV = nullptr; for (Argument &Arg : CS.getCalledFunction()->args()) if (Arg.hasReturnedAttr()) { NewV = CS.getArgOperand(Arg.getArgNo()); break; } - if (NewV) { - Worklist.push_back(NewV); - continue; - } } } + if (NewV && NewV != V) { + Worklist.push_back(NewV); + continue; + } // Look through select instructions, visit both potential values. if (auto *SI = dyn_cast(V)) { @@ -208,35 +212,34 @@ static bool genericValueTraversal( continue; } - // Look through phi nodes, visit all operands. + // Look through phi nodes, visit all live operands. if (auto *PHI = dyn_cast(V)) { - Worklist.append(PHI->op_begin(), PHI->op_end()); + assert(LivenessAA && + "Expected liveness in the presence of instructions!"); + for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { + const BasicBlock *IncomingBB = PHI->getIncomingBlock(u); + if (LivenessAA->isAssumedDead(IncomingBB->getTerminator())) { + AnyDead = true; + continue; + } + Worklist.push_back(PHI->getIncomingValue(u)); + } continue; } // Once a leaf is reached we inform the user through the callback. - VisitValueCB(V, State); + if (!VisitValueCB(*V, State, Iteration > 1)) + return false; } while (!Worklist.empty()); + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + A.recordDependence(*LivenessAA, QueryingAA); + // All values have been visited. return true; } -/// Helper to identify the correct offset into an attribute list. -static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP, - unsigned ArgNo = 0) { - switch (MP) { - case AbstractAttribute::MP_ARGUMENT: - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: - return ArgNo + AttributeList::FirstArgIndex; - case AbstractAttribute::MP_FUNCTION: - return AttributeList::FunctionIndex; - case AbstractAttribute::MP_RETURNED: - return AttributeList::ReturnIndex; - } - llvm_unreachable("Unknown manifest position!"); -} - /// Return true if \p New is equal or worse than \p Old. static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { if (!Old.isIntAttribute()) @@ -247,12 +250,9 @@ static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { /// Return true if the information provided by \p Attr was added to the /// attribute list \p Attrs. This is only the case if it was not already present -/// in \p Attrs at the position describe by \p MP and \p ArgNo. +/// in \p Attrs at the position describe by \p PK and \p AttrIdx. static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, - AttributeList &Attrs, - AbstractAttribute::ManifestPosition MP, - unsigned ArgNo = 0) { - unsigned AttrIdx = getAttrIndex(MP, ArgNo); + AttributeList &Attrs, int AttrIdx) { if (Attr.isEnumAttribute()) { Attribute::AttrKind Kind = Attr.getKindAsEnum(); @@ -270,9 +270,47 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); return true; } + if (Attr.isIntAttribute()) { + Attribute::AttrKind Kind = Attr.getKindAsEnum(); + if (Attrs.hasAttribute(AttrIdx, Kind)) + if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + return false; + Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind); + Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); + return true; + } llvm_unreachable("Expected enum or string attribute!"); } +static const Value *getPointerOperand(const Instruction *I) { + if (auto *LI = dyn_cast(I)) + if (!LI->isVolatile()) + return LI->getPointerOperand(); + + if (auto *SI = dyn_cast(I)) + if (!SI->isVolatile()) + return SI->getPointerOperand(); + + if (auto *CXI = dyn_cast(I)) + if (!CXI->isVolatile()) + return CXI->getPointerOperand(); + + if (auto *RMWI = dyn_cast(I)) + if (!RMWI->isVolatile()) + return RMWI->getPointerOperand(); + + return nullptr; +} +static const Value *getBasePointerOfAccessPointerOperand(const Instruction *I, + int64_t &BytesOffset, + const DataLayout &DL) { + const Value *Ptr = getPointerOperand(I); + if (!Ptr) + return nullptr; + + return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL, + /*AllowNonInbounds*/ false); +} ChangeStatus AbstractAttribute::update(Attributor &A) { ChangeStatus HasChanged = ChangeStatus::UNCHANGED; @@ -289,143 +327,527 @@ ChangeStatus AbstractAttribute::update(Attributor &A) { return HasChanged; } -ChangeStatus AbstractAttribute::manifest(Attributor &A) { - assert(getState().isValidState() && - "Attempted to manifest an invalid state!"); - assert(getAssociatedValue() && - "Attempted to manifest an attribute without associated value!"); - - ChangeStatus HasChanged = ChangeStatus::UNCHANGED; - SmallVector DeducedAttrs; - getDeducedAttributes(DeducedAttrs); - - Function &ScopeFn = getAnchorScope(); - LLVMContext &Ctx = ScopeFn.getContext(); - ManifestPosition MP = getManifestPosition(); - - AttributeList Attrs; - SmallVector ArgNos; +ChangeStatus +IRAttributeManifest::manifestAttrs(Attributor &A, IRPosition &IRP, + const ArrayRef &DeducedAttrs) { + Function *ScopeFn = IRP.getAssociatedFunction(); + IRPosition::Kind PK = IRP.getPositionKind(); // In the following some generic code that will manifest attributes in // DeducedAttrs if they improve the current IR. Due to the different // annotation positions we use the underlying AttributeList interface. - // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations. - switch (MP) { - case MP_ARGUMENT: - ArgNos.push_back(cast(getAssociatedValue())->getArgNo()); - Attrs = ScopeFn.getAttributes(); + AttributeList Attrs; + switch (PK) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + return ChangeStatus::UNCHANGED; + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_RETURNED: + Attrs = ScopeFn->getAttributes(); break; - case MP_FUNCTION: - case MP_RETURNED: - ArgNos.push_back(0); - Attrs = ScopeFn.getAttributes(); + case IRPosition::IRP_CALL_SITE: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes(); break; - case MP_CALL_SITE_ARGUMENT: { - CallSite CS(&getAnchoredValue()); - for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++) - if (CS.getArgOperand(u) == getAssociatedValue()) - ArgNos.push_back(u); - Attrs = CS.getAttributes(); - } } + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + LLVMContext &Ctx = IRP.getAnchorValue().getContext(); for (const Attribute &Attr : DeducedAttrs) { - for (unsigned ArgNo : ArgNos) { - if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo)) - continue; + if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx())) + continue; - HasChanged = ChangeStatus::CHANGED; - bookkeeping(MP, Attr); - } + HasChanged = ChangeStatus::CHANGED; } if (HasChanged == ChangeStatus::UNCHANGED) return HasChanged; - switch (MP) { - case MP_ARGUMENT: - case MP_FUNCTION: - case MP_RETURNED: - ScopeFn.setAttributes(Attrs); + switch (PK) { + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_RETURNED: + ScopeFn->setAttributes(Attrs); + break; + case IRPosition::IRP_CALL_SITE: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + CallSite(&IRP.getAnchorValue()).setAttributes(Attrs); + break; + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: break; - case MP_CALL_SITE_ARGUMENT: - CallSite(&getAnchoredValue()).setAttributes(Attrs); } return HasChanged; } -Function &AbstractAttribute::getAnchorScope() { - Value &V = getAnchoredValue(); - if (isa(V)) - return cast(V); - if (isa(V)) - return *cast(V).getParent(); - if (isa(V)) - return *cast(V).getFunction(); - llvm_unreachable("No scope for anchored value found!"); +const IRPosition IRPosition::EmptyKey(255); +const IRPosition IRPosition::TombstoneKey(256); + +SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { + IRPositions.emplace_back(IRP); + + ImmutableCallSite ICS(&IRP.getAnchorValue()); + switch (IRP.getPositionKind()) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + case IRPosition::IRP_FUNCTION: + return; + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_RETURNED: + IRPositions.emplace_back( + IRPosition::function(*IRP.getAssociatedFunction())); + return; + case IRPosition::IRP_CALL_SITE: + assert(ICS && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) + if (const Function *Callee = ICS.getCalledFunction()) + IRPositions.emplace_back(IRPosition::function(*Callee)); + return; + case IRPosition::IRP_CALL_SITE_RETURNED: + assert(ICS && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) { + if (const Function *Callee = ICS.getCalledFunction()) { + IRPositions.emplace_back(IRPosition::returned(*Callee)); + IRPositions.emplace_back(IRPosition::function(*Callee)); + } + } + IRPositions.emplace_back( + IRPosition::callsite_function(cast(*ICS.getInstruction()))); + return; + case IRPosition::IRP_CALL_SITE_ARGUMENT: { + int ArgNo = IRP.getArgNo(); + assert(ICS && ArgNo >= 0 && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) { + const Function *Callee = ICS.getCalledFunction(); + if (Callee && Callee->arg_size() > unsigned(ArgNo)) + IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); + if (Callee) + IRPositions.emplace_back(IRPosition::function(*Callee)); + } + IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue())); + return; + } + } +} + +bool IRPosition::hasAttr(ArrayRef AKs, + bool IgnoreSubsumingPositions) const { + for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) { + for (Attribute::AttrKind AK : AKs) + if (EquivIRP.getAttr(AK).getKindAsEnum() == AK) + return true; + // The first position returned by the SubsumingPositionIterator is + // always the position itself. If we ignore subsuming positions we + // are done after the first iteration. + if (IgnoreSubsumingPositions) + break; + } + return false; } -const Function &AbstractAttribute::getAnchorScope() const { - return const_cast(this)->getAnchorScope(); +void IRPosition::getAttrs(ArrayRef AKs, + SmallVectorImpl &Attrs) const { + for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) + for (Attribute::AttrKind AK : AKs) { + const Attribute &Attr = EquivIRP.getAttr(AK); + if (Attr.getKindAsEnum() == AK) + Attrs.push_back(Attr); + } } -/// -----------------------NoUnwind Function Attribute-------------------------- +void IRPosition::verify() { + switch (KindOrArgNo) { + default: + assert(KindOrArgNo >= 0 && "Expected argument or call site argument!"); + assert((isa(AnchorVal) || isa(AnchorVal)) && + "Expected call base or argument for positive attribute index!"); + if (isa(AnchorVal)) { + assert(cast(AnchorVal)->getArgNo() == unsigned(getArgNo()) && + "Argument number mismatch!"); + assert(cast(AnchorVal) == &getAssociatedValue() && + "Associated value mismatch!"); + } else { + assert(cast(*AnchorVal).arg_size() > unsigned(getArgNo()) && + "Call site argument number mismatch!"); + assert(cast(*AnchorVal).getArgOperand(getArgNo()) == + &getAssociatedValue() && + "Associated value mismatch!"); + } + break; + case IRP_INVALID: + assert(!AnchorVal && "Expected no value for an invalid position!"); + break; + case IRP_FLOAT: + assert((!isa(&getAssociatedValue()) && + !isa(&getAssociatedValue())) && + "Expected specialized kind for call base and argument values!"); + break; + case IRP_RETURNED: + assert(isa(AnchorVal) && + "Expected function for a 'returned' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_CALL_SITE_RETURNED: + assert((isa(AnchorVal)) && + "Expected call base for 'call site returned' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_CALL_SITE: + assert((isa(AnchorVal)) && + "Expected call base for 'call site function' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_FUNCTION: + assert(isa(AnchorVal) && + "Expected function for a 'function' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + } +} -struct AANoUnwindFunction : AANoUnwind, BooleanState { +namespace { +/// Helper functions to clamp a state \p S of type \p StateType with the +/// information in \p R and indicate/return if \p S did change (as-in update is +/// required to be run again). +/// +///{ +template +ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R); + +template <> +ChangeStatus clampStateAndIndicateChange(IntegerState &S, + const IntegerState &R) { + auto Assumed = S.getAssumed(); + S ^= R; + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} - AANoUnwindFunction(Function &F, InformationCache &InfoCache) - : AANoUnwind(F, InfoCache) {} +template <> +ChangeStatus clampStateAndIndicateChange(BooleanState &S, + const BooleanState &R) { + return clampStateAndIndicateChange(S, R); +} +///} - /// See AbstractAttribute::getState() - /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - /// } +/// Clamp the information known for all returned values of a function +/// (identified by \p QueryingAA) into \p S. +template +static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, + StateType &S) { + LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for " + << static_cast(QueryingAA) + << " into " << S << "\n"); + + assert((QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_RETURNED || + QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_CALL_SITE_RETURNED) && + "Can only clamp returned value states for a function returned or call " + "site returned position!"); + + // Use an optional state as there might not be any return values and we want + // to join (IntegerState::operator&) the state of all there are. + Optional T; + + // Callback for each possibly returned value. + auto CheckReturnValue = [&](Value &RV) -> bool { + const IRPosition &RVPos = IRPosition::value(RV); + const AAType &AA = A.getAAFor(QueryingAA, RVPos); + LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() + << " @ " << RVPos << "\n"); + const StateType &AAS = static_cast(AA.getState()); + if (T.hasValue()) + *T &= AAS; + else + T = AAS; + LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T + << "\n"); + return T->isValidState(); + }; + + if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA)) + S.indicatePessimisticFixpoint(); + else if (T.hasValue()) + S ^= *T; +} - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } +/// Helper class to compose two generic deduction +template class F, template class G> +struct AAComposeTwoGenericDeduction + : public F, StateType> { + AAComposeTwoGenericDeduction(const IRPosition &IRP) + : F, StateType>(IRP) {} - const std::string getAsStr() const override { - return getAssumed() ? "nounwind" : "may-unwind"; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus ChangedF = F, StateType>::updateImpl(A); + ChangeStatus ChangedG = G::updateImpl(A); + return ChangedF | ChangedG; } +}; + +/// Helper class for generic deduction: return value -> returned position. +template +struct AAReturnedFromReturnedValues : public Base { + AAReturnedFromReturnedValues(const IRPosition &IRP) : Base(IRP) {} /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + StateType S; + clampReturnedValueStates(A, *this, S); + // TODO: If we know we visited all returned values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange(this->getState(), S); + } +}; + +/// Clamp the information known at all call sites for a given argument +/// (identified by \p QueryingAA) into \p S. +template +static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, + StateType &S) { + LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for " + << static_cast(QueryingAA) + << " into " << S << "\n"); + + assert(QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_ARGUMENT && + "Can only clamp call site argument states for an argument position!"); + + // Use an optional state as there might not be any return values and we want + // to join (IntegerState::operator&) the state of all there are. + Optional T; + + // The argument number which is also the call site argument number. + unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); + + auto CallSiteCheck = [&](AbstractCallSite ACS) { + const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is on not associated + // (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + const AAType &AA = A.getAAFor(QueryingAA, ACSArgPos); + LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() + << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); + const StateType &AAS = static_cast(AA.getState()); + if (T.hasValue()) + *T &= AAS; + else + T = AAS; + LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T + << "\n"); + return T->isValidState(); + }; + + if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true)) + S.indicatePessimisticFixpoint(); + else if (T.hasValue()) + S ^= *T; +} - /// See AANoUnwind::isAssumedNoUnwind(). - bool isAssumedNoUnwind() const override { return getAssumed(); } +/// Helper class for generic deduction: call site argument -> argument position. +template +struct AAArgumentFromCallSiteArguments : public Base { + AAArgumentFromCallSiteArguments(const IRPosition &IRP) : Base(IRP) {} - /// See AANoUnwind::isKnownNoUnwind(). - bool isKnownNoUnwind() const override { return getKnown(); } + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + StateType S; + clampCallSiteArgumentStates(A, *this, S); + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange(this->getState(), S); + } }; -ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +/// Helper class for generic replication: function returned -> cs returned. +template +struct AACallSiteReturnedFromReturned : public Base { + AACallSiteReturnedFromReturned(const IRPosition &IRP) : Base(IRP) {} - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); - auto Opcodes = { - (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet, - (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume}; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + assert(this->getIRPosition().getPositionKind() == + IRPosition::IRP_CALL_SITE_RETURNED && + "Can only wrap function returned positions for call site returned " + "positions!"); + auto &S = this->getState(); + + const Function *AssociatedFunction = + this->getIRPosition().getAssociatedFunction(); + if (!AssociatedFunction) + return S.indicatePessimisticFixpoint(); + + IRPosition FnPos = IRPosition::returned(*AssociatedFunction); + const AAType &AA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + S, static_cast(AA.getState())); + } +}; - for (unsigned Opcode : Opcodes) { - for (Instruction *I : OpcodeInstMap[Opcode]) { - if (!I->mayThrow()) - continue; +/// Helper class for generic deduction using must-be-executed-context +/// Base class is required to have `followUse` method. - auto *NoUnwindAA = A.getAAFor(*this, *I); +/// bool followUse(Attributor &A, const Use *U, const Instruction *I) +/// U - Underlying use. +/// I - The user of the \p U. +/// `followUse` returns true if the value should be tracked transitively. - if (!NoUnwindAA || !NoUnwindAA->isAssumedNoUnwind()) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; +template +struct AAFromMustBeExecutedContext : public Base { + AAFromMustBeExecutedContext(const IRPosition &IRP) : Base(IRP) {} + + void initialize(Attributor &A) override { + Base::initialize(A); + IRPosition &IRP = this->getIRPosition(); + Instruction *CtxI = IRP.getCtxI(); + + if (!CtxI) + return; + + for (const Use &U : IRP.getAssociatedValue().uses()) + Uses.insert(&U); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto BeforeState = this->getState(); + auto &S = this->getState(); + Instruction *CtxI = this->getIRPosition().getCtxI(); + if (!CtxI) + return ChangeStatus::UNCHANGED; + + MustBeExecutedContextExplorer &Explorer = + A.getInfoCache().getMustBeExecutedContextExplorer(); + + SetVector NextUses; + + for (const Use *U : Uses) { + if (const Instruction *UserI = dyn_cast(U->getUser())) { + auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI); + bool Found = EIt.count(UserI); + while (!Found && ++EIt != EEnd) + Found = EIt.getCurrentInst() == UserI; + if (Found && Base::followUse(A, U, UserI)) + for (const Use &Us : UserI->uses()) + NextUses.insert(&Us); } } + for (const Use *U : NextUses) + Uses.insert(U); + + return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } - return ChangeStatus::UNCHANGED; -} + +private: + /// Container for (transitive) uses of the associated value. + SetVector Uses; +}; + +template +using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext = + AAComposeTwoGenericDeduction; + +template +using AACallSiteReturnedFromReturnedAndMustBeExecutedContext = + AAComposeTwoGenericDeduction; + +/// -----------------------NoUnwind Function Attribute-------------------------- + +struct AANoUnwindImpl : AANoUnwind { + AANoUnwindImpl(const IRPosition &IRP) : AANoUnwind(IRP) {} + + const std::string getAsStr() const override { + return getAssumed() ? "nounwind" : "may-unwind"; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto Opcodes = { + (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet, + (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume}; + + auto CheckForNoUnwind = [&](Instruction &I) { + if (!I.mayThrow()) + return true; + + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + const auto &NoUnwindAA = + A.getAAFor(*this, IRPosition::callsite_function(ICS)); + return NoUnwindAA.isAssumedNoUnwind(); + } + return false; + }; + + if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } +}; + +struct AANoUnwindFunction final : public AANoUnwindImpl { + AANoUnwindFunction(const IRPosition &IRP) : AANoUnwindImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) } +}; + +/// NoUnwind attribute deduction for a call sites. +struct AANoUnwindCallSite final : AANoUnwindImpl { + AANoUnwindCallSite(const IRPosition &IRP) : AANoUnwindImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoUnwindImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); } +}; /// --------------------- Function Return Values ------------------------------- @@ -434,68 +856,48 @@ ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) { /// /// If there is a unique returned value R, the manifest method will: /// - mark R with the "returned" attribute, if R is an argument. -class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState { +class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState { /// Mapping of values potentially returned by the associated function to the /// return instructions that might return them. - DenseMap> ReturnedValues; + MapVector> ReturnedValues; + + /// Mapping to remember the number of returned values for a call site such + /// that we can avoid updates if nothing changed. + DenseMap NumReturnedValuesPerKnownAA; + + /// Set of unresolved calls returned by the associated function. + SmallSetVector UnresolvedCalls; /// State flags /// ///{ - bool IsFixed; - bool IsValidState; - bool HasOverdefinedReturnedCalls; + bool IsFixed = false; + bool IsValidState = true; ///} - /// Collect values that could become \p V in the set \p Values, each mapped to - /// \p ReturnInsts. - void collectValuesRecursively( - Attributor &A, Value *V, SmallPtrSetImpl &ReturnInsts, - DenseMap> &Values) { - - visitValueCB_t VisitValueCB = [&](Value *Val, bool &) { - assert(!isa(Val) || - &getAnchorScope() == cast(Val)->getFunction()); - Values[Val].insert(ReturnInsts.begin(), ReturnInsts.end()); - }; - - bool UnusedBool; - bool Success = genericValueTraversal(V, UnusedBool, VisitValueCB); - - // If we did abort the above traversal we haven't see all the values. - // Consequently, we cannot know if the information we would derive is - // accurate so we give up early. - if (!Success) - indicatePessimisticFixpoint(); - } - public: - /// See AbstractAttribute::AbstractAttribute(...). - AAReturnedValuesImpl(Function &F, InformationCache &InfoCache) - : AAReturnedValues(F, InfoCache) { - // We do not have an associated argument yet. - AssociatedVal = nullptr; - } + AAReturnedValuesImpl(const IRPosition &IRP) : AAReturnedValues(IRP) {} /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // Reset the state. - AssociatedVal = nullptr; IsFixed = false; IsValidState = true; - HasOverdefinedReturnedCalls = false; ReturnedValues.clear(); - Function &F = cast(getAnchoredValue()); + Function *F = getAssociatedFunction(); + if (!F) { + indicatePessimisticFixpoint(); + return; + } // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F); // Look through all arguments, if one is marked as returned we are done. - for (Argument &Arg : F.args()) { + for (Argument &Arg : F->args()) { if (Arg.hasReturnedAttr()) { - auto &ReturnInstSet = ReturnedValues[&Arg]; for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) ReturnInstSet.insert(cast(RI)); @@ -505,13 +907,8 @@ public: } } - // If no argument was marked as returned we look at all return instructions - // and collect potentially returned values. - for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) { - SmallPtrSet RISet({cast(RI)}); - collectValuesRecursively(A, cast(RI)->getReturnValue(), RISet, - ReturnedValues); - } + if (!F->hasExactDefinition()) + indicatePessimisticFixpoint(); } /// See AbstractAttribute::manifest(...). @@ -523,25 +920,35 @@ public: /// See AbstractAttribute::getState(...). const AbstractState &getState() const override { return *this; } - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; } - /// See AbstractAttribute::updateImpl(Attributor &A). ChangeStatus updateImpl(Attributor &A) override; + llvm::iterator_range returned_values() override { + return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); + } + + llvm::iterator_range returned_values() const override { + return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); + } + + const SmallSetVector &getUnresolvedCalls() const override { + return UnresolvedCalls; + } + /// Return the number of potential return values, -1 if unknown. - size_t getNumReturnValues() const { + size_t getNumReturnValues() const override { return isValidState() ? ReturnedValues.size() : -1; } /// Return an assumed unique return value if a single candidate is found. If /// there cannot be one, return a nullptr. If it is not clear yet, return the /// Optional::NoneType. - Optional getAssumedUniqueReturnValue() const; + Optional getAssumedUniqueReturnValue(Attributor &A) const; - /// See AbstractState::checkForallReturnedValues(...). - bool - checkForallReturnedValues(std::function &Pred) const override; + /// See AbstractState::checkForAllReturnedValues(...). + bool checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred) const override; /// Pretty print the attribute similar to the IR representation. const std::string getAsStr() const override; @@ -553,13 +960,15 @@ public: bool isValidState() const override { return IsValidState; } /// See AbstractState::indicateOptimisticFixpoint(...). - void indicateOptimisticFixpoint() override { + ChangeStatus indicateOptimisticFixpoint() override { IsFixed = true; - IsValidState &= true; + return ChangeStatus::UNCHANGED; } - void indicatePessimisticFixpoint() override { + + ChangeStatus indicatePessimisticFixpoint() override { IsFixed = true; IsValidState = false; + return ChangeStatus::CHANGED; } }; @@ -568,21 +977,52 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { // Bookkeeping. assert(isValidState()); - NumFnKnownReturns++; + STATS_DECLTRACK(KnownReturnValues, FunctionReturn, + "Number of function with known return values"); // Check if we have an assumed unique return value that we could manifest. - Optional UniqueRV = getAssumedUniqueReturnValue(); + Optional UniqueRV = getAssumedUniqueReturnValue(A); if (!UniqueRV.hasValue() || !UniqueRV.getValue()) return Changed; // Bookkeeping. - NumFnUniqueReturned++; + STATS_DECLTRACK(UniqueReturnValue, FunctionReturn, + "Number of function with unique return"); + + // Callback to replace the uses of CB with the constant C. + auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) { + if (CB.getNumUses() == 0 || CB.isMustTailCall()) + return ChangeStatus::UNCHANGED; + CB.replaceAllUsesWith(&C); + return ChangeStatus::CHANGED; + }; // If the assumed unique return value is an argument, annotate it. if (auto *UniqueRVArg = dyn_cast(UniqueRV.getValue())) { - AssociatedVal = UniqueRVArg; - Changed = AbstractAttribute::manifest(A) | Changed; + getIRPosition() = IRPosition::argument(*UniqueRVArg); + Changed = IRAttribute::manifest(A); + } else if (auto *RVC = dyn_cast(UniqueRV.getValue())) { + // We can replace the returned value with the unique returned constant. + Value &AnchorValue = getAnchorValue(); + if (Function *F = dyn_cast(&AnchorValue)) { + for (const Use &U : F->uses()) + if (CallBase *CB = dyn_cast(U.getUser())) + if (CB->isCallee(&U)) { + Constant *RVCCast = + ConstantExpr::getTruncOrBitCast(RVC, CB->getType()); + Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed; + } + } else { + assert(isa(AnchorValue) && + "Expcected a function or call base anchor!"); + Constant *RVCCast = + ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType()); + Changed = ReplaceCallSiteUsersWith(cast(AnchorValue), *RVCCast); + } + if (Changed == ChangeStatus::CHANGED) + STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn, + "Number of function returns replaced by constant return"); } return Changed; @@ -590,18 +1030,20 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { const std::string AAReturnedValuesImpl::getAsStr() const { return (isAtFixpoint() ? "returns(#" : "may-return(#") + - (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")"; + (isValidState() ? std::to_string(getNumReturnValues()) : "?") + + ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]"; } -Optional AAReturnedValuesImpl::getAssumedUniqueReturnValue() const { - // If checkForallReturnedValues provides a unique value, ignoring potential +Optional +AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const { + // If checkForAllReturnedValues provides a unique value, ignoring potential // undef values that can also be present, it is assumed to be the actual // return value and forwarded to the caller of this method. If there are // multiple, a nullptr is returned indicating there cannot be a unique // returned value. Optional UniqueRV; - std::function Pred = [&](Value &RV) -> bool { + auto Pred = [&](Value &RV) -> bool { // If we found a second returned value and neither the current nor the saved // one is an undef, there is no unique returned value. Undefs are special // since we can pretend they have any value. @@ -618,14 +1060,15 @@ Optional AAReturnedValuesImpl::getAssumedUniqueReturnValue() const { return true; }; - if (!checkForallReturnedValues(Pred)) + if (!A.checkForAllReturnedValues(Pred, *this)) UniqueRV = nullptr; return UniqueRV; } -bool AAReturnedValuesImpl::checkForallReturnedValues( - std::function &Pred) const { +bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred) const { if (!isValidState()) return false; @@ -634,11 +1077,11 @@ bool AAReturnedValuesImpl::checkForallReturnedValues( for (auto &It : ReturnedValues) { Value *RV = It.first; - ImmutableCallSite ICS(RV); - if (ICS && !HasOverdefinedReturnedCalls) + CallBase *CB = dyn_cast(RV); + if (CB && !UnresolvedCalls.count(CB)) continue; - if (!Pred(*RV)) + if (!Pred(*RV, It.second)) return false; } @@ -646,125 +1089,196 @@ bool AAReturnedValuesImpl::checkForallReturnedValues( } ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { + size_t NumUnresolvedCalls = UnresolvedCalls.size(); + bool Changed = false; + + // State used in the value traversals starting in returned values. + struct RVState { + // The map in which we collect return values -> return instrs. + decltype(ReturnedValues) &RetValsMap; + // The flag to indicate a change. + bool &Changed; + // The return instrs we come from. + SmallSetVector RetInsts; + }; - // Check if we know of any values returned by the associated function, - // if not, we are done. - if (getNumReturnValues() == 0) { - indicateOptimisticFixpoint(); - return ChangeStatus::UNCHANGED; - } + // Callback for a leaf value returned by the associated function. + auto VisitValueCB = [](Value &Val, RVState &RVS, bool) -> bool { + auto Size = RVS.RetValsMap[&Val].size(); + RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end()); + bool Inserted = RVS.RetValsMap[&Val].size() != Size; + RVS.Changed |= Inserted; + LLVM_DEBUG({ + if (Inserted) + dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val + << " => " << RVS.RetInsts.size() << "\n"; + }); + return true; + }; - // Check if any of the returned values is a call site we can refine. - decltype(ReturnedValues) AddRVs; - bool HasCallSite = false; + // Helper method to invoke the generic value traversal. + auto VisitReturnedValue = [&](Value &RV, RVState &RVS) { + IRPosition RetValPos = IRPosition::value(RV); + return genericValueTraversal(A, RetValPos, *this, + RVS, VisitValueCB); + }; - // Look at all returned call sites. - for (auto &It : ReturnedValues) { - SmallPtrSet &ReturnInsts = It.second; - Value *RV = It.first; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Potentially returned value " << *RV - << "\n"); + // Callback for all "return intructions" live in the associated function. + auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) { + ReturnInst &Ret = cast(I); + RVState RVS({ReturnedValues, Changed, {}}); + RVS.RetInsts.insert(&Ret); + return VisitReturnedValue(*Ret.getReturnValue(), RVS); + }; - // Only call sites can change during an update, ignore the rest. - CallSite RetCS(RV); - if (!RetCS) + // Start by discovering returned values from all live returned instructions in + // the associated function. + if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret})) + return indicatePessimisticFixpoint(); + + // Once returned values "directly" present in the code are handled we try to + // resolve returned calls. + decltype(ReturnedValues) NewRVsMap; + for (auto &It : ReturnedValues) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *It.first + << " by #" << It.second.size() << " RIs\n"); + CallBase *CB = dyn_cast(It.first); + if (!CB || UnresolvedCalls.count(CB)) continue; - // For now, any call site we see will prevent us from directly fixing the - // state. However, if the information on the callees is fixed, the call - // sites will be removed and we will fix the information for this state. - HasCallSite = true; - - // Try to find a assumed unique return value for the called function. - auto *RetCSAA = A.getAAFor(*this, *RV); - if (!RetCSAA) { - HasOverdefinedReturnedCalls = true; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV - << ") with " << (RetCSAA ? "invalid" : "no") - << " associated state\n"); + if (!CB->getCalledFunction()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB + << "\n"); + UnresolvedCalls.insert(CB); continue; } - // Try to find a assumed unique return value for the called function. - Optional AssumedUniqueRV = RetCSAA->getAssumedUniqueReturnValue(); + // TODO: use the function scope once we have call site AAReturnedValues. + const auto &RetValAA = A.getAAFor( + *this, IRPosition::function(*CB->getCalledFunction())); + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: " + << static_cast(RetValAA) + << "\n"); - // If no assumed unique return value was found due to the lack of - // candidates, we may need to resolve more calls (through more update - // iterations) or the called function will not return. Either way, we simply - // stick with the call sites as return values. Because there were not - // multiple possibilities, we do not treat it as overdefined. - if (!AssumedUniqueRV.hasValue()) + // Skip dead ends, thus if we do not know anything about the returned + // call we mark it as unresolved and it will stay that way. + if (!RetValAA.getState().isValidState()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB + << "\n"); + UnresolvedCalls.insert(CB); continue; + } - // If multiple, non-refinable values were found, there cannot be a unique - // return value for the called function. The returned call is overdefined! - if (!AssumedUniqueRV.getValue()) { - HasOverdefinedReturnedCalls = true; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site has multiple " - "potentially returned values\n"); + // Do not try to learn partial information. If the callee has unresolved + // return values we will treat the call as unresolved/opaque. + auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls(); + if (!RetValAAUnresolvedCalls.empty()) { + UnresolvedCalls.insert(CB); continue; } - LLVM_DEBUG({ - bool UniqueRVIsKnown = RetCSAA->isAtFixpoint(); - dbgs() << "[AAReturnedValues] Returned call site " - << (UniqueRVIsKnown ? "known" : "assumed") - << " unique return value: " << *AssumedUniqueRV << "\n"; - }); - - // The assumed unique return value. - Value *AssumedRetVal = AssumedUniqueRV.getValue(); - - // If the assumed unique return value is an argument, lookup the matching - // call site operand and recursively collect new returned values. - // If it is not an argument, it is just put into the set of returned values - // as we would have already looked through casts, phis, and similar values. - if (Argument *AssumedRetArg = dyn_cast(AssumedRetVal)) - collectValuesRecursively(A, - RetCS.getArgOperand(AssumedRetArg->getArgNo()), - ReturnInsts, AddRVs); - else - AddRVs[AssumedRetVal].insert(ReturnInsts.begin(), ReturnInsts.end()); - } + // Now check if we can track transitively returned values. If possible, thus + // if all return value can be represented in the current scope, do so. + bool Unresolved = false; + for (auto &RetValAAIt : RetValAA.returned_values()) { + Value *RetVal = RetValAAIt.first; + if (isa(RetVal) || isa(RetVal) || + isa(RetVal)) + continue; + // Anything that did not fit in the above categories cannot be resolved, + // mark the call as unresolved. + LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value " + "cannot be translated: " + << *RetVal << "\n"); + UnresolvedCalls.insert(CB); + Unresolved = true; + break; + } - // Keep track of any change to trigger updates on dependent attributes. - ChangeStatus Changed = ChangeStatus::UNCHANGED; + if (Unresolved) + continue; - for (auto &It : AddRVs) { - assert(!It.second.empty() && "Entry does not add anything."); - auto &ReturnInsts = ReturnedValues[It.first]; - for (ReturnInst *RI : It.second) - if (ReturnInsts.insert(RI).second) { - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value " - << *It.first << " => " << *RI << "\n"); - Changed = ChangeStatus::CHANGED; + // Now track transitively returned values. + unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB]; + if (NumRetAA == RetValAA.getNumReturnValues()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not " + "changed since it was seen last\n"); + continue; + } + NumRetAA = RetValAA.getNumReturnValues(); + + for (auto &RetValAAIt : RetValAA.returned_values()) { + Value *RetVal = RetValAAIt.first; + if (Argument *Arg = dyn_cast(RetVal)) { + // Arguments are mapped to call site operands and we begin the traversal + // again. + bool Unused = false; + RVState RVS({NewRVsMap, Unused, RetValAAIt.second}); + VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS); + continue; + } else if (isa(RetVal)) { + // Call sites are resolved by the callee attribute over time, no need to + // do anything for us. + continue; + } else if (isa(RetVal)) { + // Constants are valid everywhere, we can simply take them. + NewRVsMap[RetVal].insert(It.second.begin(), It.second.end()); + continue; } + } } - // If there is no call site in the returned values we are done. - if (!HasCallSite) { - indicateOptimisticFixpoint(); - return ChangeStatus::CHANGED; + // To avoid modifications to the ReturnedValues map while we iterate over it + // we kept record of potential new entries in a copy map, NewRVsMap. + for (auto &It : NewRVsMap) { + assert(!It.second.empty() && "Entry does not add anything."); + auto &ReturnInsts = ReturnedValues[It.first]; + for (ReturnInst *RI : It.second) + if (ReturnInsts.insert(RI)) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value " + << *It.first << " => " << *RI << "\n"); + Changed = true; + } } - return Changed; + Changed |= (NumUnresolvedCalls != UnresolvedCalls.size()); + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } -/// ------------------------ NoSync Function Attribute ------------------------- +struct AAReturnedValuesFunction final : public AAReturnedValuesImpl { + AAReturnedValuesFunction(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {} -struct AANoSyncFunction : AANoSync, BooleanState { + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) } +}; - AANoSyncFunction(Function &F, InformationCache &InfoCache) - : AANoSync(F, InfoCache) {} +/// Returned values information for a call sites. +struct AAReturnedValuesCallSite final : AAReturnedValuesImpl { + AAReturnedValuesCallSite(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {} - /// See AbstractAttribute::getState() - /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - /// } + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites instead of + // redirecting requests to the callee. + llvm_unreachable("Abstract attributes for returned values are not " + "supported for call sites yet!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } +/// ------------------------ NoSync Function Attribute ------------------------- + +struct AANoSyncImpl : AANoSync { + AANoSyncImpl(const IRPosition &IRP) : AANoSync(IRP) {} const std::string getAsStr() const override { return getAssumed() ? "nosync" : "may-sync"; @@ -773,12 +1287,6 @@ struct AANoSyncFunction : AANoSync, BooleanState { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override; - /// See AANoSync::isAssumedNoSync() - bool isAssumedNoSync() const override { return getAssumed(); } - - /// See AANoSync::isKnownNoSync() - bool isKnownNoSync() const override { return getKnown(); } - /// Helper function used to determine whether an instruction is non-relaxed /// atomic. In other words, if an atomic instruction does not have unordered /// or monotonic ordering @@ -792,7 +1300,7 @@ struct AANoSyncFunction : AANoSync, BooleanState { static bool isNoSyncIntrinsic(Instruction *I); }; -bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) { +bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { if (!I->isAtomic()) return false; @@ -841,7 +1349,7 @@ bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) { /// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics. /// FIXME: We should ipmrove the handling of intrinsics. -bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) { +bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { if (auto *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { /// Element wise atomic memory intrinsics are can only be unordered, @@ -863,7 +1371,7 @@ bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) { return false; } -bool AANoSyncFunction::isVolatile(Instruction *I) { +bool AANoSyncImpl::isVolatile(Instruction *I) { assert(!ImmutableCallSite(I) && !isa(I) && "Calls should not be checked here"); @@ -881,482 +1389,3074 @@ bool AANoSyncFunction::isVolatile(Instruction *I) { } } -ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { - /// We are looking for volatile instructions or Non-Relaxed atomics. - /// FIXME: We should ipmrove the handling of intrinsics. - for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) { - ImmutableCallSite ICS(I); - auto *NoSyncAA = A.getAAFor(*this, *I); + auto CheckRWInstForNoSync = [&](Instruction &I) { + /// We are looking for volatile instructions or Non-Relaxed atomics. + /// FIXME: We should ipmrove the handling of intrinsics. - if (isa(I) && isNoSyncIntrinsic(I)) - continue; + if (isa(&I) && isNoSyncIntrinsic(&I)) + return true; - if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) && - !ICS.hasFnAttr(Attribute::NoSync)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + if (ICS.hasFnAttr(Attribute::NoSync)) + return true; + + const auto &NoSyncAA = + A.getAAFor(*this, IRPosition::callsite_function(ICS)); + if (NoSyncAA.isAssumedNoSync()) + return true; + return false; } - if (ICS) - continue; + if (!isVolatile(&I) && !isNonRelaxedAtomic(&I)) + return true; - if (!isVolatile(I) && !isNonRelaxedAtomic(I)) - continue; + return false; + }; - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } + auto CheckForNoSync = [&](Instruction &I) { + // At this point we handled all read/write effects and they are all + // nosync, so they can be skipped. + if (I.mayReadOrWriteMemory()) + return true; - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); - auto Opcodes = {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}; + // non-convergent and readnone imply nosync. + return !ImmutableCallSite(&I).isConvergent(); + }; - for (unsigned Opcode : Opcodes) { - for (Instruction *I : OpcodeInstMap[Opcode]) { - // At this point we handled all read/write effects and they are all - // nosync, so they can be skipped. - if (I->mayReadOrWriteMemory()) - continue; + if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) || + !A.checkForAllCallLikeInstructions(CheckForNoSync, *this)) + return indicatePessimisticFixpoint(); - ImmutableCallSite ICS(I); + return ChangeStatus::UNCHANGED; +} - // non-convergent and readnone imply nosync. - if (!ICS.isConvergent()) - continue; +struct AANoSyncFunction final : public AANoSyncImpl { + AANoSyncFunction(const IRPosition &IRP) : AANoSyncImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) } +}; + +/// NoSync attribute deduction for a call sites. +struct AANoSyncCallSite final : AANoSyncImpl { + AANoSyncCallSite(const IRPosition &IRP) : AANoSyncImpl(IRP) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoSyncImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } } - return ChangeStatus::UNCHANGED; -} + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); } +}; /// ------------------------ No-Free Attributes ---------------------------- -struct AANoFreeFunction : AbstractAttribute, BooleanState { +struct AANoFreeImpl : public AANoFree { + AANoFreeImpl(const IRPosition &IRP) : AANoFree(IRP) {} - /// See AbstractAttribute::AbstractAttribute(...). - AANoFreeFunction(Function &F, InformationCache &InfoCache) - : AbstractAttribute(F, InfoCache) {} + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto CheckForNoFree = [&](Instruction &I) { + ImmutableCallSite ICS(&I); + if (ICS.hasFnAttr(Attribute::NoFree)) + return true; - /// See AbstractAttribute::getState() - ///{ - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - ///} + const auto &NoFreeAA = + A.getAAFor(*this, IRPosition::callsite_function(ICS)); + return NoFreeAA.isAssumedNoFree(); + }; - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } + if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this)) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { return getAssumed() ? "nofree" : "may-free"; } +}; - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; +struct AANoFreeFunction final : public AANoFreeImpl { + AANoFreeFunction(const IRPosition &IRP) : AANoFreeImpl(IRP) {} - /// See AbstractAttribute::getAttrKind(). - Attribute::AttrKind getAttrKind() const override { return ID; } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) } +}; - /// Return true if "nofree" is assumed. - bool isAssumedNoFree() const { return getAssumed(); } +/// NoFree attribute deduction for a call sites. +struct AANoFreeCallSite final : AANoFreeImpl { + AANoFreeCallSite(const IRPosition &IRP) : AANoFreeImpl(IRP) {} - /// Return true if "nofree" is known. - bool isKnownNoFree() const { return getKnown(); } + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoFreeImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::NoFree; -}; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } -ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); } +}; - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); +/// ------------------------ NonNull Argument Attribute ------------------------ +static int64_t getKnownNonNullAndDerefBytesForUse( + Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue, + const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) { + TrackUse = false; + + const Value *UseV = U->get(); + if (!UseV->getType()->isPointerTy()) + return 0; + + Type *PtrTy = UseV->getType(); + const Function *F = I->getFunction(); + bool NullPointerIsDefined = + F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true; + const DataLayout &DL = A.getInfoCache().getDL(); + if (ImmutableCallSite ICS = ImmutableCallSite(I)) { + if (ICS.isBundleOperand(U)) + return 0; + + if (ICS.isCallee(U)) { + IsNonNull |= !NullPointerIsDefined; + return 0; + } - for (unsigned Opcode : - {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}) { - for (Instruction *I : OpcodeInstMap[Opcode]) { + unsigned ArgNo = ICS.getArgumentNo(U); + IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo); + auto &DerefAA = A.getAAFor(QueryingAA, IRP); + IsNonNull |= DerefAA.isKnownNonNull(); + return DerefAA.getKnownDereferenceableBytes(); + } - auto ICS = ImmutableCallSite(I); - auto *NoFreeAA = A.getAAFor(*this, *I); + int64_t Offset; + if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) { + if (Base == &AssociatedValue && getPointerOperand(I) == UseV) { + int64_t DerefBytes = + Offset + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()); - if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) && - !ICS.hasFnAttr(Attribute::NoFree)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } + IsNonNull |= !NullPointerIsDefined; + return DerefBytes; } } - return ChangeStatus::UNCHANGED; -} + if (const Value *Base = + GetPointerBaseWithConstantOffset(UseV, Offset, DL, + /*AllowNonInbounds*/ false)) { + auto &DerefAA = + A.getAAFor(QueryingAA, IRPosition::value(*Base)); + IsNonNull |= (!NullPointerIsDefined && DerefAA.isKnownNonNull()); + IsNonNull |= (!NullPointerIsDefined && (Offset != 0)); + int64_t DerefBytes = DerefAA.getKnownDereferenceableBytes(); + return std::max(int64_t(0), DerefBytes - Offset); + } -/// ------------------------ NonNull Argument Attribute ------------------------ -struct AANonNullImpl : AANonNull, BooleanState { + return 0; +} - AANonNullImpl(Value &V, InformationCache &InfoCache) - : AANonNull(V, InfoCache) {} +struct AANonNullImpl : AANonNull { + AANonNullImpl(const IRPosition &IRP) + : AANonNull(IRP), + NullIsDefined(NullPointerIsDefined( + getAnchorScope(), + getAssociatedValue().getType()->getPointerAddressSpace())) {} - AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue, - InformationCache &InfoCache) - : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (!NullIsDefined && + hasAttr({Attribute::NonNull, Attribute::Dereferenceable})) + indicateOptimisticFixpoint(); + else + AANonNull::initialize(A); + } - /// See AbstractAttribute::getState() - /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - /// } + /// See AAFromMustBeExecutedContext + bool followUse(Attributor &A, const Use *U, const Instruction *I) { + bool IsNonNull = false; + bool TrackUse = false; + getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I, + IsNonNull, TrackUse); + takeKnownMaximum(IsNonNull); + return TrackUse; + } /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { return getAssumed() ? "nonnull" : "may-null"; } - /// See AANonNull::isAssumedNonNull(). - bool isAssumedNonNull() const override { return getAssumed(); } + /// Flag to determine if the underlying value can be null and still allow + /// valid accesses. + const bool NullIsDefined; +}; - /// See AANonNull::isKnownNonNull(). - bool isKnownNonNull() const override { return getKnown(); } +/// NonNull attribute for a floating value. +struct AANonNullFloating + : AAFromMustBeExecutedContext { + using Base = AAFromMustBeExecutedContext; + AANonNullFloating(const IRPosition &IRP) : Base(IRP) {} - /// Generate a predicate that checks if a given value is assumed nonnull. - /// The generated function returns true if a value satisfies any of - /// following conditions. - /// (i) A value is known nonZero(=nonnull). - /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is - /// true. - std::function generatePredicate(Attributor &); -}; + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); -std::function AANonNullImpl::generatePredicate(Attributor &A) { - // FIXME: The `AAReturnedValues` should provide the predicate with the - // `ReturnInst` vector as well such that we can use the control flow sensitive - // version of `isKnownNonZero`. This should fix `test11` in - // `test/Transforms/FunctionAttrs/nonnull.ll` + if (isAtFixpoint()) + return; - std::function Pred = [&](Value &RV) -> bool { - if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout())) - return true; + const IRPosition &IRP = getIRPosition(); + const Value &V = IRP.getAssociatedValue(); + const DataLayout &DL = A.getDataLayout(); + + // TODO: This context sensitive query should be removed once we can do + // context sensitive queries in the genericValueTraversal below. + if (isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, IRP.getCtxI(), + /* TODO: DT */ nullptr)) + indicateOptimisticFixpoint(); + } - auto *NonNullAA = A.getAAFor(*this, RV); + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Change = Base::updateImpl(A); + if (isKnownNonNull()) + return Change; + + if (!NullIsDefined) { + const auto &DerefAA = A.getAAFor(*this, getIRPosition()); + if (DerefAA.getAssumedDereferenceableBytes()) + return Change; + } - ImmutableCallSite ICS(&RV); + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, AAAlign::StateType &T, + bool Stripped) -> bool { + const auto &AA = A.getAAFor(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, + /* CtxI */ getCtxI(), + /* TODO: DT */ nullptr)) + T.indicatePessimisticFixpoint(); + } else { + // Use abstract attribute information. + const AANonNull::StateType &NS = + static_cast(AA.getState()); + T ^= NS; + } + return T.isValidState(); + }; - if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) && - (!ICS || !ICS.hasRetAttr(Attribute::NonNull))) - return false; + StateType T; + if (!genericValueTraversal(A, getIRPosition(), *this, + T, VisitValueCB)) + return indicatePessimisticFixpoint(); - return true; - }; + return clampStateAndIndicateChange(getState(), T); + } - return Pred; -} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } +}; /// NonNull attribute for function return value. -struct AANonNullReturned : AANonNullImpl { +struct AANonNullReturned final + : AAReturnedFromReturnedValues { + AANonNullReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues(IRP) {} - AANonNullReturned(Function &F, InformationCache &InfoCache) - : AANonNullImpl(F, InfoCache) {} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } +}; - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_RETURNED; } +/// NonNull attribute for function argument. +struct AANonNullArgument final + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext { + AANonNullArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext( + IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) } +}; - /// See AbstractAttriubute::initialize(...). - void initialize(Attributor &A) override { - Function &F = getAnchorScope(); +struct AANonNullCallSiteArgument final : AANonNullFloating { + AANonNullCallSiteArgument(const IRPosition &IRP) : AANonNullFloating(IRP) {} - // Already nonnull. - if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, - Attribute::NonNull)) - indicateOptimisticFixpoint(); + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) } +}; + +/// NonNull attribute for a call site return position. +struct AANonNullCallSiteReturned final + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext { + AANonNullCallSiteReturned(const IRPosition &IRP) + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext( + IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } +}; + +/// ------------------------ No-Recurse Attributes ---------------------------- + +struct AANoRecurseImpl : public AANoRecurse { + AANoRecurseImpl(const IRPosition &IRP) : AANoRecurse(IRP) {} + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "norecurse" : "may-recurse"; + } +}; + +struct AANoRecurseFunction final : AANoRecurseImpl { + AANoRecurseFunction(const IRPosition &IRP) : AANoRecurseImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoRecurseImpl::initialize(A); + if (const Function *F = getAnchorScope()) + if (A.getInfoCache().getSccSize(*F) == 1) + return; + indicatePessimisticFixpoint(); } /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + + auto CheckForNoRecurse = [&](Instruction &I) { + ImmutableCallSite ICS(&I); + if (ICS.hasFnAttr(Attribute::NoRecurse)) + return true; + + const auto &NoRecurseAA = + A.getAAFor(*this, IRPosition::callsite_function(ICS)); + if (!NoRecurseAA.isAssumedNoRecurse()) + return false; + + // Recursion to the same function + if (ICS.getCalledFunction() == getAnchorScope()) + return false; + + return true; + }; + + if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this)) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) } }; -ChangeStatus AANonNullReturned::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +/// NoRecurse attribute deduction for a call sites. +struct AANoRecurseCallSite final : AANoRecurseImpl { + AANoRecurseCallSite(const IRPosition &IRP) : AANoRecurseImpl(IRP) {} - auto *AARetVal = A.getAAFor(*this, F); - if (!AARetVal) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoRecurseImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); } - std::function Pred = this->generatePredicate(A); - if (!AARetVal->checkForallReturnedValues(Pred)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast(FnAA.getState())); } - return ChangeStatus::UNCHANGED; + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); } +}; + +/// ------------------------ Will-Return Attributes ---------------------------- + +// Helper function that checks whether a function has any cycle. +// TODO: Replace with more efficent code +static bool containsCycle(Function &F) { + SmallPtrSet Visited; + + // Traverse BB by dfs and check whether successor is already visited. + for (BasicBlock *BB : depth_first(&F)) { + Visited.insert(BB); + for (auto *SuccBB : successors(BB)) { + if (Visited.count(SuccBB)) + return true; + } + } + return false; } -/// NonNull attribute for function argument. -struct AANonNullArgument : AANonNullImpl { +// Helper function that checks the function have a loop which might become an +// endless loop +// FIXME: Any cycle is regarded as endless loop for now. +// We have to allow some patterns. +static bool containsPossiblyEndlessLoop(Function *F) { + return !F || !F->hasExactDefinition() || containsCycle(*F); +} + +struct AAWillReturnImpl : public AAWillReturn { + AAWillReturnImpl(const IRPosition &IRP) : AAWillReturn(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAWillReturn::initialize(A); + + Function *F = getAssociatedFunction(); + if (containsPossiblyEndlessLoop(F)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto CheckForWillReturn = [&](Instruction &I) { + IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I)); + const auto &WillReturnAA = A.getAAFor(*this, IPos); + if (WillReturnAA.isKnownWillReturn()) + return true; + if (!WillReturnAA.isAssumedWillReturn()) + return false; + const auto &NoRecurseAA = A.getAAFor(*this, IPos); + return NoRecurseAA.isAssumedNoRecurse(); + }; + + if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "willreturn" : "may-noreturn"; + } +}; + +struct AAWillReturnFunction final : AAWillReturnImpl { + AAWillReturnFunction(const IRPosition &IRP) : AAWillReturnImpl(IRP) {} - AANonNullArgument(Argument &A, InformationCache &InfoCache) - : AANonNullImpl(A, InfoCache) {} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) } +}; - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; } +/// WillReturn attribute deduction for a call sites. +struct AAWillReturnCallSite final : AAWillReturnImpl { + AAWillReturnCallSite(const IRPosition &IRP) : AAWillReturnImpl(IRP) {} - /// See AbstractAttriubute::initialize(...). + /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - Argument *Arg = cast(getAssociatedValue()); - if (Arg->hasNonNullAttr()) - indicateOptimisticFixpoint(); + AAWillReturnImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); } /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); } }; -/// NonNull attribute for a call site argument. -struct AANonNullCallSiteArgument : AANonNullImpl { +/// ------------------------ NoAlias Argument Attribute ------------------------ + +struct AANoAliasImpl : AANoAlias { + AANoAliasImpl(const IRPosition &IRP) : AANoAlias(IRP) {} + + const std::string getAsStr() const override { + return getAssumed() ? "noalias" : "may-alias"; + } +}; - /// See AANonNullImpl::AANonNullImpl(...). - AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo, - InformationCache &InfoCache) - : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache), - ArgNo(ArgNo) {} +/// NoAlias attribute for a floating value. +struct AANoAliasFloating final : AANoAliasImpl { + AANoAliasFloating(const IRPosition &IRP) : AANoAliasImpl(IRP) {} /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - CallSite CS(&getAnchoredValue()); - if (isKnownNonZero(getAssociatedValue(), - getAnchorScope().getParent()->getDataLayout()) || - CS.paramHasAttr(ArgNo, getAttrKind())) + AANoAliasImpl::initialize(A); + Value &Val = getAssociatedValue(); + if (isa(Val)) + indicateOptimisticFixpoint(); + if (isa(Val) && + Val.getType()->getPointerAddressSpace() == 0) indicateOptimisticFixpoint(); } - /// See AbstractAttribute::updateImpl(Attributor &A). - ChangeStatus updateImpl(Attributor &A) override; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Implement this. + return indicatePessimisticFixpoint(); + } - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_CALL_SITE_ARGUMENT; - }; + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(noalias) + } +}; - // Return argument index of associated value. - int getArgNo() const { return ArgNo; } +/// NoAlias attribute for an argument. +struct AANoAliasArgument final + : AAArgumentFromCallSiteArguments { + AANoAliasArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArguments(IRP) {} -private: - unsigned ArgNo; + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) } }; -ChangeStatus AANonNullArgument::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); - Argument &Arg = cast(getAnchoredValue()); - unsigned ArgNo = Arg.getArgNo(); +struct AANoAliasCallSiteArgument final : AANoAliasImpl { + AANoAliasCallSiteArgument(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // See callsite argument attribute and callee argument attribute. + ImmutableCallSite ICS(&getAnchorValue()); + if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias)) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // We can deduce "noalias" if the following conditions hold. + // (i) Associated value is assumed to be noalias in the definition. + // (ii) Associated value is assumed to be no-capture in all the uses + // possibly executed before this callsite. + // (iii) There is no other pointer argument which could alias with the + // value. + + const Value &V = getAssociatedValue(); + const IRPosition IRP = IRPosition::value(V); + + // (i) Check whether noalias holds in the definition. + + auto &NoAliasAA = A.getAAFor(*this, IRP); + + if (!NoAliasAA.isAssumedNoAlias()) + return indicatePessimisticFixpoint(); + + LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] " << V + << " is assumed NoAlias in the definition\n"); + + // (ii) Check whether the value is captured in the scope using AANoCapture. + // FIXME: This is conservative though, it is better to look at CFG and + // check only uses possibly executed before this callsite. + + auto &NoCaptureAA = A.getAAFor(*this, IRP); + if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + LLVM_DEBUG( + dbgs() << "[Attributor][AANoAliasCSArg] " << V + << " cannot be noalias as it is potentially captured\n"); + return indicatePessimisticFixpoint(); + } + + // (iii) Check there is no other pointer argument which could alias with the + // value. + ImmutableCallSite ICS(&getAnchorValue()); + for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) { + if (getArgNo() == (int)i) + continue; + const Value *ArgOp = ICS.getArgOperand(i); + if (!ArgOp->getType()->isPointerTy()) + continue; + + if (const Function *F = getAnchorScope()) { + if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) { + bool IsAliasing = AAR->isNoAlias(&getAssociatedValue(), ArgOp); + LLVM_DEBUG(dbgs() + << "[Attributor][NoAliasCSArg] Check alias between " + "callsite arguments " + << AAR->isNoAlias(&getAssociatedValue(), ArgOp) << " " + << getAssociatedValue() << " " << *ArgOp << " => " + << (IsAliasing ? "" : "no-") << "alias \n"); + + if (IsAliasing) + continue; + } + } + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) } +}; + +/// NoAlias attribute for function return value. +struct AANoAliasReturned final : AANoAliasImpl { + AANoAliasReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + virtual ChangeStatus updateImpl(Attributor &A) override { + + auto CheckReturnValue = [&](Value &RV) -> bool { + if (Constant *C = dyn_cast(&RV)) + if (C->isNullValue() || isa(C)) + return true; + + /// For now, we can only deduce noalias if we have call sites. + /// FIXME: add more support. + ImmutableCallSite ICS(&RV); + if (!ICS) + return false; + + const IRPosition &RVPos = IRPosition::value(RV); + const auto &NoAliasAA = A.getAAFor(*this, RVPos); + if (!NoAliasAA.isAssumedNoAlias()) + return false; + + const auto &NoCaptureAA = A.getAAFor(*this, RVPos); + return NoCaptureAA.isAssumedNoCaptureMaybeReturned(); + }; + + if (!A.checkForAllReturnedValues(CheckReturnValue, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) } +}; + +/// NoAlias attribute deduction for a call site return value. +struct AANoAliasCallSiteReturned final : AANoAliasImpl { + AANoAliasCallSiteReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); } +}; + +/// -------------------AAIsDead Function Attribute----------------------- + +struct AAIsDeadImpl : public AAIsDead { + AAIsDeadImpl(const IRPosition &IRP) : AAIsDead(IRP) {} + + void initialize(Attributor &A) override { + const Function *F = getAssociatedFunction(); + if (F && !F->isDeclaration()) + exploreFromEntry(A, F); + } + + void exploreFromEntry(Attributor &A, const Function *F) { + ToBeExploredPaths.insert(&(F->getEntryBlock().front())); + + for (size_t i = 0; i < ToBeExploredPaths.size(); ++i) + if (const Instruction *NextNoReturnI = + findNextNoReturn(A, ToBeExploredPaths[i])) + NoReturnCalls.insert(NextNoReturnI); + + // Mark the block live after we looked for no-return instructions. + assumeLive(A, F->getEntryBlock()); + } + + /// Find the next assumed noreturn instruction in the block of \p I starting + /// from, thus including, \p I. + /// + /// The caller is responsible to monitor the ToBeExploredPaths set as new + /// instructions discovered in other basic block will be placed in there. + /// + /// \returns The next assumed noreturn instructions in the block of \p I + /// starting from, thus including, \p I. + const Instruction *findNextNoReturn(Attributor &A, const Instruction *I); + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" + + std::to_string(getAssociatedFunction()->size()) + "][#NRI " + + std::to_string(NoReturnCalls.size()) + "]"; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function &F = *getAssociatedFunction(); + + if (AssumedLiveBlocks.empty()) { + A.deleteAfterManifest(F); + return ChangeStatus::CHANGED; + } + + // Flag to determine if we can change an invoke to a call assuming the + // callee is nounwind. This is not possible if the personality of the + // function allows to catch asynchronous exceptions. + bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F); + + for (const Instruction *NRC : NoReturnCalls) { + Instruction *I = const_cast(NRC); + BasicBlock *BB = I->getParent(); + Instruction *SplitPos = I->getNextNode(); + // TODO: mark stuff before unreachable instructions as dead. + + if (auto *II = dyn_cast(I)) { + // If we keep the invoke the split position is at the beginning of the + // normal desitination block (it invokes a noreturn function after all). + BasicBlock *NormalDestBB = II->getNormalDest(); + SplitPos = &NormalDestBB->front(); + + /// Invoke is replaced with a call and unreachable is placed after it if + /// the callee is nounwind and noreturn. Otherwise, we keep the invoke + /// and only place an unreachable in the normal successor. + if (Invoke2CallAllowed) { + if (II->getCalledFunction()) { + const IRPosition &IPos = IRPosition::callsite_function(*II); + const auto &AANoUnw = A.getAAFor(*this, IPos); + if (AANoUnw.isAssumedNoUnwind()) { + LLVM_DEBUG(dbgs() + << "[AAIsDead] Replace invoke with call inst\n"); + // We do not need an invoke (II) but instead want a call followed + // by an unreachable. However, we do not remove II as other + // abstract attributes might have it cached as part of their + // results. Given that we modify the CFG anyway, we simply keep II + // around but in a new dead block. To avoid II being live through + // a different edge we have to ensure the block we place it in is + // only reached from the current block of II and then not reached + // at all when we insert the unreachable. + SplitBlockPredecessors(NormalDestBB, {BB}, ".i2c"); + CallInst *CI = createCallMatchingInvoke(II); + CI->insertBefore(II); + CI->takeName(II); + II->replaceAllUsesWith(CI); + SplitPos = CI->getNextNode(); + } + } + } + + if (SplitPos == &NormalDestBB->front()) { + // If this is an invoke of a noreturn function the edge to the normal + // destination block is dead but not necessarily the block itself. + // TODO: We need to move to an edge based system during deduction and + // also manifest. + assert(!NormalDestBB->isLandingPad() && + "Expected the normal destination not to be a landingpad!"); + if (NormalDestBB->getUniquePredecessor() == BB) { + assumeLive(A, *NormalDestBB); + } else { + BasicBlock *SplitBB = + SplitBlockPredecessors(NormalDestBB, {BB}, ".dead"); + // The split block is live even if it contains only an unreachable + // instruction at the end. + assumeLive(A, *SplitBB); + SplitPos = SplitBB->getTerminator(); + HasChanged = ChangeStatus::CHANGED; + } + } + } + + if (isa_and_nonnull(SplitPos)) + continue; + + BB = SplitPos->getParent(); + SplitBlock(BB, SplitPos); + changeToUnreachable(BB->getTerminator(), /* UseLLVMTrap */ false); + HasChanged = ChangeStatus::CHANGED; + } + + for (BasicBlock &BB : F) + if (!AssumedLiveBlocks.count(&BB)) + A.deleteAfterManifest(BB); + + return HasChanged; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AAIsDead::isAssumedDead(BasicBlock *). + bool isAssumedDead(const BasicBlock *BB) const override { + assert(BB->getParent() == getAssociatedFunction() && + "BB must be in the same anchor scope function."); + + if (!getAssumed()) + return false; + return !AssumedLiveBlocks.count(BB); + } + + /// See AAIsDead::isKnownDead(BasicBlock *). + bool isKnownDead(const BasicBlock *BB) const override { + return getKnown() && isAssumedDead(BB); + } + + /// See AAIsDead::isAssumed(Instruction *I). + bool isAssumedDead(const Instruction *I) const override { + assert(I->getParent()->getParent() == getAssociatedFunction() && + "Instruction must be in the same anchor scope function."); + + if (!getAssumed()) + return false; + + // If it is not in AssumedLiveBlocks then it for sure dead. + // Otherwise, it can still be after noreturn call in a live block. + if (!AssumedLiveBlocks.count(I->getParent())) + return true; + + // If it is not after a noreturn call, than it is live. + return isAfterNoReturn(I); + } + + /// See AAIsDead::isKnownDead(Instruction *I). + bool isKnownDead(const Instruction *I) const override { + return getKnown() && isAssumedDead(I); + } + + /// Check if instruction is after noreturn call, in other words, assumed dead. + bool isAfterNoReturn(const Instruction *I) const; + + /// Determine if \p F might catch asynchronous exceptions. + static bool mayCatchAsynchronousExceptions(const Function &F) { + return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); + } + + /// Assume \p BB is (partially) live now and indicate to the Attributor \p A + /// that internal function called from \p BB should now be looked at. + void assumeLive(Attributor &A, const BasicBlock &BB) { + if (!AssumedLiveBlocks.insert(&BB).second) + return; + + // We assume that all of BB is (probably) live now and if there are calls to + // internal functions we will assume that those are now live as well. This + // is a performance optimization for blocks with calls to a lot of internal + // functions. It can however cause dead functions to be treated as live. + for (const Instruction &I : BB) + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) + if (const Function *F = ICS.getCalledFunction()) + if (F->hasLocalLinkage()) + A.markLiveInternalFunction(*F); + } + + /// Collection of to be explored paths. + SmallSetVector ToBeExploredPaths; + + /// Collection of all assumed live BasicBlocks. + DenseSet AssumedLiveBlocks; + + /// Collection of calls with noreturn attribute, assumed or knwon. + SmallSetVector NoReturnCalls; +}; + +struct AAIsDeadFunction final : public AAIsDeadImpl { + AAIsDeadFunction(const IRPosition &IRP) : AAIsDeadImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECL(PartiallyDeadBlocks, Function, + "Number of basic blocks classified as partially dead"); + BUILD_STAT_NAME(PartiallyDeadBlocks, Function) += NoReturnCalls.size(); + } +}; + +bool AAIsDeadImpl::isAfterNoReturn(const Instruction *I) const { + const Instruction *PrevI = I->getPrevNode(); + while (PrevI) { + if (NoReturnCalls.count(PrevI)) + return true; + PrevI = PrevI->getPrevNode(); + } + return false; +} + +const Instruction *AAIsDeadImpl::findNextNoReturn(Attributor &A, + const Instruction *I) { + const BasicBlock *BB = I->getParent(); + const Function &F = *BB->getParent(); + + // Flag to determine if we can change an invoke to a call assuming the callee + // is nounwind. This is not possible if the personality of the function allows + // to catch asynchronous exceptions. + bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F); + + // TODO: We should have a function that determines if an "edge" is dead. + // Edges could be from an instruction to the next or from a terminator + // to the successor. For now, we need to special case the unwind block + // of InvokeInst below. + + while (I) { + ImmutableCallSite ICS(I); + + if (ICS) { + const IRPosition &IPos = IRPosition::callsite_function(ICS); + // Regarless of the no-return property of an invoke instruction we only + // learn that the regular successor is not reachable through this + // instruction but the unwind block might still be. + if (auto *Invoke = dyn_cast(I)) { + // Use nounwind to justify the unwind block is dead as well. + const auto &AANoUnw = A.getAAFor(*this, IPos); + if (!Invoke2CallAllowed || !AANoUnw.isAssumedNoUnwind()) { + assumeLive(A, *Invoke->getUnwindDest()); + ToBeExploredPaths.insert(&Invoke->getUnwindDest()->front()); + } + } + + const auto &NoReturnAA = A.getAAFor(*this, IPos); + if (NoReturnAA.isAssumedNoReturn()) + return I; + } + + I = I->getNextNode(); + } + + // get new paths (reachable blocks). + for (const BasicBlock *SuccBB : successors(BB)) { + assumeLive(A, *SuccBB); + ToBeExploredPaths.insert(&SuccBB->front()); + } + + // No noreturn instruction found. + return nullptr; +} + +ChangeStatus AAIsDeadImpl::updateImpl(Attributor &A) { + ChangeStatus Status = ChangeStatus::UNCHANGED; + + // Temporary collection to iterate over existing noreturn instructions. This + // will alow easier modification of NoReturnCalls collection + SmallVector NoReturnChanged; + + for (const Instruction *I : NoReturnCalls) + NoReturnChanged.push_back(I); + + for (const Instruction *I : NoReturnChanged) { + size_t Size = ToBeExploredPaths.size(); + + const Instruction *NextNoReturnI = findNextNoReturn(A, I); + if (NextNoReturnI != I) { + Status = ChangeStatus::CHANGED; + NoReturnCalls.remove(I); + if (NextNoReturnI) + NoReturnCalls.insert(NextNoReturnI); + } + + // Explore new paths. + while (Size != ToBeExploredPaths.size()) { + Status = ChangeStatus::CHANGED; + if (const Instruction *NextNoReturnI = + findNextNoReturn(A, ToBeExploredPaths[Size++])) + NoReturnCalls.insert(NextNoReturnI); + } + } + + LLVM_DEBUG(dbgs() << "[AAIsDead] AssumedLiveBlocks: " + << AssumedLiveBlocks.size() << " Total number of blocks: " + << getAssociatedFunction()->size() << "\n"); + + // If we know everything is live there is no need to query for liveness. + if (NoReturnCalls.empty() && + getAssociatedFunction()->size() == AssumedLiveBlocks.size()) { + // Indicating a pessimistic fixpoint will cause the state to be "invalid" + // which will cause the Attributor to not return the AAIsDead on request, + // which will prevent us from querying isAssumedDead(). + indicatePessimisticFixpoint(); + assert(!isValidState() && "Expected an invalid state!"); + Status = ChangeStatus::CHANGED; + } + + return Status; +} + +/// Liveness information for a call sites. +struct AAIsDeadCallSite final : AAIsDeadImpl { + AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites instead of + // redirecting requests to the callee. + llvm_unreachable("Abstract attributes for liveness are not " + "supported for call sites yet!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// -------------------- Dereferenceable Argument Attribute -------------------- + +template <> +ChangeStatus clampStateAndIndicateChange(DerefState &S, + const DerefState &R) { + ChangeStatus CS0 = clampStateAndIndicateChange( + S.DerefBytesState, R.DerefBytesState); + ChangeStatus CS1 = + clampStateAndIndicateChange(S.GlobalState, R.GlobalState); + return CS0 | CS1; +} + +struct AADereferenceableImpl : AADereferenceable { + AADereferenceableImpl(const IRPosition &IRP) : AADereferenceable(IRP) {} + using StateType = DerefState; + + void initialize(Attributor &A) override { + SmallVector Attrs; + getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull}, + Attrs); + for (const Attribute &Attr : Attrs) + takeKnownDerefBytesMaximum(Attr.getValueAsInt()); + + NonNullAA = &A.getAAFor(*this, getIRPosition()); + + const IRPosition &IRP = this->getIRPosition(); + bool IsFnInterface = IRP.isFnInterfaceKind(); + const Function *FnScope = IRP.getAnchorScope(); + if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::getState() + /// { + StateType &getState() override { return *this; } + const StateType &getState() const override { return *this; } + /// } + + /// See AAFromMustBeExecutedContext + bool followUse(Attributor &A, const Use *U, const Instruction *I) { + bool IsNonNull = false; + bool TrackUse = false; + int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( + A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + takeKnownDerefBytesMaximum(DerefBytes); + return TrackUse; + } + + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + // TODO: Add *_globally support + if (isAssumedNonNull()) + Attrs.emplace_back(Attribute::getWithDereferenceableBytes( + Ctx, getAssumedDereferenceableBytes())); + else + Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes( + Ctx, getAssumedDereferenceableBytes())); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + if (!getAssumedDereferenceableBytes()) + return "unknown-dereferenceable"; + return std::string("dereferenceable") + + (isAssumedNonNull() ? "" : "_or_null") + + (isAssumedGlobal() ? "_globally" : "") + "<" + + std::to_string(getKnownDereferenceableBytes()) + "-" + + std::to_string(getAssumedDereferenceableBytes()) + ">"; + } +}; + +/// Dereferenceable attribute for a floating value. +struct AADereferenceableFloating + : AAFromMustBeExecutedContext { + using Base = + AAFromMustBeExecutedContext; + AADereferenceableFloating(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Change = Base::updateImpl(A); + + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, DerefState &T, bool Stripped) -> bool { + unsigned IdxWidth = + DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); + APInt Offset(IdxWidth, 0); + const Value *Base = + V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + const auto &AA = + A.getAAFor(*this, IRPosition::value(*Base)); + int64_t DerefBytes = 0; + if (!Stripped && this == &AA) { + // Use IR information if we did not strip anything. + // TODO: track globally. + bool CanBeNull; + DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull); + T.GlobalState.indicatePessimisticFixpoint(); + } else { + const DerefState &DS = static_cast(AA.getState()); + DerefBytes = DS.DerefBytesState.getAssumed(); + T.GlobalState &= DS.GlobalState; + } + + // For now we do not try to "increase" dereferenceability due to negative + // indices as we first have to come up with code to deal with loops and + // for overflows of the dereferenceable bytes. + int64_t OffsetSExt = Offset.getSExtValue(); + if (OffsetSExt < 0) + OffsetSExt = 0; + + T.takeAssumedDerefBytesMinimum( + std::max(int64_t(0), DerefBytes - OffsetSExt)); + + if (this == &AA) { + if (!Stripped) { + // If nothing was stripped IR information is all we got. + T.takeKnownDerefBytesMaximum( + std::max(int64_t(0), DerefBytes - OffsetSExt)); + T.indicatePessimisticFixpoint(); + } else if (OffsetSExt > 0) { + // If something was stripped but there is circular reasoning we look + // for the offset. If it is positive we basically decrease the + // dereferenceable bytes in a circluar loop now, which will simply + // drive them down to the known value in a very slow way which we + // can accelerate. + T.indicatePessimisticFixpoint(); + } + } + + return T.isValidState(); + }; + + DerefState T; + if (!genericValueTraversal( + A, getIRPosition(), *this, T, VisitValueCB)) + return indicatePessimisticFixpoint(); + + return Change | clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for a return value. +struct AADereferenceableReturned final + : AAReturnedFromReturnedValues { + AADereferenceableReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for an argument +struct AADereferenceableArgument final + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl, DerefState> { + using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl, DerefState>; + AADereferenceableArgument(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for a call site argument. +struct AADereferenceableCallSiteArgument final : AADereferenceableFloating { + AADereferenceableCallSiteArgument(const IRPosition &IRP) + : AADereferenceableFloating(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute deduction for a call site return value. +struct AADereferenceableCallSiteReturned final + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl> { + using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl>; + AADereferenceableCallSiteReturned(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + + ChangeStatus Change = Base::updateImpl(A); + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return Change | + clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(dereferenceable); + } +}; + +// ------------------------ Align Argument Attribute ------------------------ + +struct AAAlignImpl : AAAlign { + AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {} + + // Max alignemnt value allowed in IR + static const unsigned MAX_ALIGN = 1U << 29; + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + takeAssumedMinimum(MAX_ALIGN); + + SmallVector Attrs; + getAttrs({Attribute::Alignment}, Attrs); + for (const Attribute &Attr : Attrs) + takeKnownMaximum(Attr.getValueAsInt()); + + if (getIRPosition().isFnInterfaceKind() && + (!getAssociatedFunction() || + !getAssociatedFunction()->hasExactDefinition())) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + // Check for users that allow alignment annotations. + Value &AnchorVal = getIRPosition().getAnchorValue(); + for (const Use &U : AnchorVal.uses()) { + if (auto *SI = dyn_cast(U.getUser())) { + if (SI->getPointerOperand() == &AnchorVal) + if (SI->getAlignment() < getAssumedAlign()) { + STATS_DECLTRACK(AAAlign, Store, + "Number of times alignemnt added to a store"); + SI->setAlignment(Align(getAssumedAlign())); + Changed = ChangeStatus::CHANGED; + } + } else if (auto *LI = dyn_cast(U.getUser())) { + if (LI->getPointerOperand() == &AnchorVal) + if (LI->getAlignment() < getAssumedAlign()) { + LI->setAlignment(Align(getAssumedAlign())); + STATS_DECLTRACK(AAAlign, Load, + "Number of times alignemnt added to a load"); + Changed = ChangeStatus::CHANGED; + } + } + } + + return AAAlign::manifest(A) | Changed; + } + + // TODO: Provide a helper to determine the implied ABI alignment and check in + // the existing manifest method and a new one for AAAlignImpl that value + // to avoid making the alignment explicit if it did not improve. + + /// See AbstractAttribute::getDeducedAttributes + virtual void + getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + if (getAssumedAlign() > 1) + Attrs.emplace_back( + Attribute::getWithAlignment(Ctx, Align(getAssumedAlign()))); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) + + "-" + std::to_string(getAssumedAlign()) + ">") + : "unknown-align"; + } +}; + +/// Align attribute for a floating value. +struct AAAlignFloating : AAAlignImpl { + AAAlignFloating(const IRPosition &IRP) : AAAlignImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, AAAlign::StateType &T, + bool Stripped) -> bool { + const auto &AA = A.getAAFor(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + // Use only IR information if we did not strip anything. + const MaybeAlign PA = V.getPointerAlignment(DL); + T.takeKnownMaximum(PA ? PA->value() : 0); + T.indicatePessimisticFixpoint(); + } else { + // Use abstract attribute information. + const AAAlign::StateType &DS = + static_cast(AA.getState()); + T ^= DS; + } + return T.isValidState(); + }; + + StateType T; + if (!genericValueTraversal(A, getIRPosition(), *this, T, + VisitValueCB)) + return indicatePessimisticFixpoint(); + + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) } +}; + +/// Align attribute for function return value. +struct AAAlignReturned final + : AAReturnedFromReturnedValues { + AAAlignReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) } +}; + +/// Align attribute for function argument. +struct AAAlignArgument final + : AAArgumentFromCallSiteArguments { + AAAlignArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArguments(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) } +}; + +struct AAAlignCallSiteArgument final : AAAlignFloating { + AAAlignCallSiteArgument(const IRPosition &IRP) : AAAlignFloating(IRP) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + return AAAlignImpl::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) } +}; + +/// Align attribute deduction for a call site return value. +struct AAAlignCallSiteReturned final : AAAlignImpl { + AAAlignCallSiteReturned(const IRPosition &IRP) : AAAlignImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAAlignImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } +}; + +/// ------------------ Function No-Return Attribute ---------------------------- +struct AANoReturnImpl : public AANoReturn { + AANoReturnImpl(const IRPosition &IRP) : AANoReturn(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoReturn::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->hasFnAttribute(Attribute::WillReturn)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "noreturn" : "may-return"; + } + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override { + const auto &WillReturnAA = A.getAAFor(*this, getIRPosition()); + if (WillReturnAA.isKnownWillReturn()) + return indicatePessimisticFixpoint(); + auto CheckForNoReturn = [](Instruction &) { return false; }; + if (!A.checkForAllInstructions(CheckForNoReturn, *this, + {(unsigned)Instruction::Ret})) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } +}; + +struct AANoReturnFunction final : AANoReturnImpl { + AANoReturnFunction(const IRPosition &IRP) : AANoReturnImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) } +}; + +/// NoReturn attribute deduction for a call sites. +struct AANoReturnCallSite final : AANoReturnImpl { + AANoReturnCallSite(const IRPosition &IRP) : AANoReturnImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } +}; + +/// ----------------------- Variable Capturing --------------------------------- + +/// A class to hold the state of for no-capture attributes. +struct AANoCaptureImpl : public AANoCapture { + AANoCaptureImpl(const IRPosition &IRP) : AANoCapture(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoCapture::initialize(A); + + // You cannot "capture" null in the default address space. + if (isa(getAssociatedValue()) && + getAssociatedValue().getType()->getPointerAddressSpace() == 0) { + indicateOptimisticFixpoint(); + return; + } + + const IRPosition &IRP = getIRPosition(); + const Function *F = + getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + + // Check what state the associated function can actually capture. + if (F) + determineFunctionCaptureCapabilities(IRP, *F, *this); + else + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...). + virtual void + getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + if (!isAssumedNoCaptureMaybeReturned()) + return; + + if (getArgNo() >= 0) { + if (isAssumedNoCapture()) + Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture)); + else if (ManifestInternal) + Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned")); + } + } + + /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known + /// depending on the ability of the function associated with \p IRP to capture + /// state in memory and through "returning/throwing", respectively. + static void determineFunctionCaptureCapabilities(const IRPosition &IRP, + const Function &F, + IntegerState &State) { + // TODO: Once we have memory behavior attributes we should use them here. + + // If we know we cannot communicate or write to memory, we do not care about + // ptr2int anymore. + if (F.onlyReadsMemory() && F.doesNotThrow() && + F.getReturnType()->isVoidTy()) { + State.addKnownBits(NO_CAPTURE); + return; + } + + // A function cannot capture state in memory if it only reads memory, it can + // however return/throw state and the state might be influenced by the + // pointer value, e.g., loading from a returned pointer might reveal a bit. + if (F.onlyReadsMemory()) + State.addKnownBits(NOT_CAPTURED_IN_MEM); + + // A function cannot communicate state back if it does not through + // exceptions and doesn not return values. + if (F.doesNotThrow() && F.getReturnType()->isVoidTy()) + State.addKnownBits(NOT_CAPTURED_IN_RET); + + // Check existing "returned" attributes. + int ArgNo = IRP.getArgNo(); + if (F.doesNotThrow() && ArgNo >= 0) { + for (unsigned u = 0, e = F.arg_size(); u< e; ++u) + if (F.hasParamAttribute(u, Attribute::Returned)) { + if (u == unsigned(ArgNo)) + State.removeAssumedBits(NOT_CAPTURED_IN_RET); + else if (F.onlyReadsMemory()) + State.addKnownBits(NO_CAPTURE); + else + State.addKnownBits(NOT_CAPTURED_IN_RET); + break; + } + } + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + if (isKnownNoCapture()) + return "known not-captured"; + if (isAssumedNoCapture()) + return "assumed not-captured"; + if (isKnownNoCaptureMaybeReturned()) + return "known not-captured-maybe-returned"; + if (isAssumedNoCaptureMaybeReturned()) + return "assumed not-captured-maybe-returned"; + return "assumed-captured"; + } +}; + +/// Attributor-aware capture tracker. +struct AACaptureUseTracker final : public CaptureTracker { + + /// Create a capture tracker that can lookup in-flight abstract attributes + /// through the Attributor \p A. + /// + /// If a use leads to a potential capture, \p CapturedInMemory is set and the + /// search is stopped. If a use leads to a return instruction, + /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed. + /// If a use leads to a ptr2int which may capture the value, + /// \p CapturedInInteger is set. If a use is found that is currently assumed + /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies + /// set. All values in \p PotentialCopies are later tracked as well. For every + /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0, + /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger + /// conservatively set to true. + AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA, + const AAIsDead &IsDeadAA, IntegerState &State, + SmallVectorImpl &PotentialCopies, + unsigned &RemainingUsesToExplore) + : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State), + PotentialCopies(PotentialCopies), + RemainingUsesToExplore(RemainingUsesToExplore) {} + + /// Determine if \p V maybe captured. *Also updates the state!* + bool valueMayBeCaptured(const Value *V) { + if (V->getType()->isPointerTy()) { + PointerMayBeCaptured(V, this); + } else { + State.indicatePessimisticFixpoint(); + } + return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + } + + /// See CaptureTracker::tooManyUses(). + void tooManyUses() override { + State.removeAssumedBits(AANoCapture::NO_CAPTURE); + } + + bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override { + if (CaptureTracker::isDereferenceableOrNull(O, DL)) + return true; + const auto &DerefAA = + A.getAAFor(NoCaptureAA, IRPosition::value(*O)); + return DerefAA.getAssumedDereferenceableBytes(); + } + + /// See CaptureTracker::captured(...). + bool captured(const Use *U) override { + Instruction *UInst = cast(U->getUser()); + LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst + << "\n"); + + // Because we may reuse the tracker multiple times we keep track of the + // number of explored uses ourselves as well. + if (RemainingUsesToExplore-- == 0) { + LLVM_DEBUG(dbgs() << " - too many uses to explore!\n"); + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + } + + // Deal with ptr2int by following uses. + if (isa(UInst)) { + LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n"); + return valueMayBeCaptured(UInst); + } + + // Explicitly catch return instructions. + if (isa(UInst)) + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ true); + + // For now we only use special logic for call sites. However, the tracker + // itself knows about a lot of other non-capturing cases already. + CallSite CS(UInst); + if (!CS || !CS.isArgOperand(U)) + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + + unsigned ArgNo = CS.getArgumentNo(U); + const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo); + // If we have a abstract no-capture attribute for the argument we can use + // it to justify a non-capture attribute here. This allows recursion! + auto &ArgNoCaptureAA = A.getAAFor(NoCaptureAA, CSArgPos); + if (ArgNoCaptureAA.isAssumedNoCapture()) + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ false); + if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + addPotentialCopy(CS); + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ false); + } + + // Lastly, we could not find a reason no-capture can be assumed so we don't. + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + } + + /// Register \p CS as potential copy of the value we are checking. + void addPotentialCopy(CallSite CS) { + PotentialCopies.push_back(CS.getInstruction()); + } + + /// See CaptureTracker::shouldExplore(...). + bool shouldExplore(const Use *U) override { + // Check liveness. + return !IsDeadAA.isAssumedDead(cast(U->getUser())); + } + + /// Update the state according to \p CapturedInMem, \p CapturedInInt, and + /// \p CapturedInRet, then return the appropriate value for use in the + /// CaptureTracker::captured() interface. + bool isCapturedIn(bool CapturedInMem, bool CapturedInInt, + bool CapturedInRet) { + LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int " + << CapturedInInt << "|Ret " << CapturedInRet << "]\n"); + if (CapturedInMem) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM); + if (CapturedInInt) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT); + if (CapturedInRet) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET); + return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + } + +private: + /// The attributor providing in-flight abstract attributes. + Attributor &A; + + /// The abstract attribute currently updated. + AANoCapture &NoCaptureAA; + + /// The abstract liveness state. + const AAIsDead &IsDeadAA; + + /// The state currently updated. + IntegerState &State; + + /// Set of potential copies of the tracked value. + SmallVectorImpl &PotentialCopies; + + /// Global counter to limit the number of explored uses. + unsigned &RemainingUsesToExplore; +}; + +ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { + const IRPosition &IRP = getIRPosition(); + const Value *V = + getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); + if (!V) + return indicatePessimisticFixpoint(); + + const Function *F = + getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + assert(F && "Expected a function!"); + const IRPosition &FnPos = IRPosition::function(*F); + const auto &IsDeadAA = A.getAAFor(*this, FnPos); + + AANoCapture::StateType T; + + // Readonly means we cannot capture through memory. + const auto &FnMemAA = A.getAAFor(*this, FnPos); + if (FnMemAA.isAssumedReadOnly()) { + T.addKnownBits(NOT_CAPTURED_IN_MEM); + if (FnMemAA.isKnownReadOnly()) + addKnownBits(NOT_CAPTURED_IN_MEM); + } + + // Make sure all returned values are different than the underlying value. + // TODO: we could do this in a more sophisticated way inside + // AAReturnedValues, e.g., track all values that escape through returns + // directly somehow. + auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) { + bool SeenConstant = false; + for (auto &It : RVAA.returned_values()) { + if (isa(It.first)) { + if (SeenConstant) + return false; + SeenConstant = true; + } else if (!isa(It.first) || + It.first == getAssociatedArgument()) + return false; + } + return true; + }; + + const auto &NoUnwindAA = A.getAAFor(*this, FnPos); + if (NoUnwindAA.isAssumedNoUnwind()) { + bool IsVoidTy = F->getReturnType()->isVoidTy(); + const AAReturnedValues *RVAA = + IsVoidTy ? nullptr : &A.getAAFor(*this, FnPos); + if (IsVoidTy || CheckReturnedArgs(*RVAA)) { + T.addKnownBits(NOT_CAPTURED_IN_RET); + if (T.isKnown(NOT_CAPTURED_IN_MEM)) + return ChangeStatus::UNCHANGED; + if (NoUnwindAA.isKnownNoUnwind() && + (IsVoidTy || RVAA->getState().isAtFixpoint())) { + addKnownBits(NOT_CAPTURED_IN_RET); + if (isKnown(NOT_CAPTURED_IN_MEM)) + return indicateOptimisticFixpoint(); + } + } + } + + // Use the CaptureTracker interface and logic with the specialized tracker, + // defined in AACaptureUseTracker, that can look at in-flight abstract + // attributes and directly updates the assumed state. + SmallVector PotentialCopies; + unsigned RemainingUsesToExplore = DefaultMaxUsesToExplore; + AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies, + RemainingUsesToExplore); + + // Check all potential copies of the associated value until we can assume + // none will be captured or we have to assume at least one might be. + unsigned Idx = 0; + PotentialCopies.push_back(V); + while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size()) + Tracker.valueMayBeCaptured(PotentialCopies[Idx++]); + + AAAlign::StateType &S = getState(); + auto Assumed = S.getAssumed(); + S.intersectAssumedBits(T.getAssumed()); + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} + +/// NoCapture attribute for function arguments. +struct AANoCaptureArgument final : AANoCaptureImpl { + AANoCaptureArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) } +}; + +/// NoCapture attribute for call site arguments. +struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { + AANoCaptureCallSiteArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + if (!Arg) + return indicatePessimisticFixpoint(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)}; +}; + +/// NoCapture attribute for floating values. +struct AANoCaptureFloating final : AANoCaptureImpl { + AANoCaptureFloating(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(nocapture) + } +}; + +/// NoCapture attribute for function return value. +struct AANoCaptureReturned final : AANoCaptureImpl { + AANoCaptureReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// NoCapture attribute deduction for a call site return value. +struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { + AANoCaptureCallSiteReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(nocapture) + } +}; + +/// ------------------ Value Simplify Attribute ---------------------------- +struct AAValueSimplifyImpl : AAValueSimplify { + AAValueSimplifyImpl(const IRPosition &IRP) : AAValueSimplify(IRP) {} + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple") + : "not-simple"; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + + /// See AAValueSimplify::getAssumedSimplifiedValue() + Optional getAssumedSimplifiedValue(Attributor &A) const override { + if (!getAssumed()) + return const_cast(&getAssociatedValue()); + return SimplifiedAssociatedValue; + } + void initialize(Attributor &A) override {} + + /// Helper function for querying AAValueSimplify and updating candicate. + /// \param QueryingValue Value trying to unify with SimplifiedValue + /// \param AccumulatedSimplifiedValue Current simplification result. + static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA, + Value &QueryingValue, + Optional &AccumulatedSimplifiedValue) { + // FIXME: Add a typecast support. + + auto &ValueSimpifyAA = A.getAAFor( + QueryingAA, IRPosition::value(QueryingValue)); + + Optional QueryingValueSimplified = + ValueSimpifyAA.getAssumedSimplifiedValue(A); + + if (!QueryingValueSimplified.hasValue()) + return true; + + if (!QueryingValueSimplified.getValue()) + return false; + + Value &QueryingValueSimplifiedUnwrapped = + *QueryingValueSimplified.getValue(); + + if (isa(QueryingValueSimplifiedUnwrapped)) + return true; + + if (AccumulatedSimplifiedValue.hasValue()) + return AccumulatedSimplifiedValue == QueryingValueSimplified; + + LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << QueryingValue + << " is assumed to be " + << QueryingValueSimplifiedUnwrapped << "\n"); + + AccumulatedSimplifiedValue = QueryingValueSimplified; + return true; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + if (!SimplifiedAssociatedValue.hasValue() || + !SimplifiedAssociatedValue.getValue()) + return Changed; + + if (auto *C = dyn_cast(SimplifiedAssociatedValue.getValue())) { + // We can replace the AssociatedValue with the constant. + Value &V = getAssociatedValue(); + if (!V.user_empty() && &V != C && V.getType() == C->getType()) { + LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << V << " -> " << *C + << "\n"); + V.replaceAllUsesWith(C); + Changed = ChangeStatus::CHANGED; + } + } + + return Changed | AAValueSimplify::manifest(A); + } + +protected: + // An assumed simplified value. Initially, it is set to Optional::None, which + // means that the value is not clear under current assumption. If in the + // pessimistic state, getAssumedSimplifiedValue doesn't return this value but + // returns orignal associated value. + Optional SimplifiedAssociatedValue; +}; + +struct AAValueSimplifyArgument final : AAValueSimplifyImpl { + AAValueSimplifyArgument(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto PredForCallSite = [&](AbstractCallSite ACS) { + // Check if we have an associated argument or not (which can happen for + // callback calls). + if (Value *ArgOp = ACS.getCallArgOperand(getArgNo())) + return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue); + return false; + }; + + if (!A.checkForAllCallSites(PredForCallSite, *this, true)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyReturned : AAValueSimplifyImpl { + AAValueSimplifyReturned(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto PredForReturned = [&](Value &V) { + return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); + }; + + if (!A.checkForAllReturnedValues(PredForReturned, *this)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyFloating : AAValueSimplifyImpl { + AAValueSimplifyFloating(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Value &V = getAnchorValue(); + + // TODO: add other stuffs + if (isa(V) || isa(V)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto VisitValueCB = [&](Value &V, BooleanState, bool Stripped) -> bool { + auto &AA = A.getAAFor(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + // TODO: Look the instruction and check recursively. + LLVM_DEBUG( + dbgs() << "[Attributor][ValueSimplify] Can't be stripped more : " + << V << "\n"); + indicatePessimisticFixpoint(); + return false; + } + return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); + }; + + if (!genericValueTraversal( + A, getIRPosition(), *this, static_cast(*this), + VisitValueCB)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyFunction : AAValueSimplifyImpl { + AAValueSimplifyFunction(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + SimplifiedAssociatedValue = &getAnchorValue(); + indicateOptimisticFixpoint(); + } + /// See AbstractAttribute::initialize(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable( + "AAValueSimplify(Function|CallSite)::updateImpl will not be called"); + } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FN_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyCallSite : AAValueSimplifyFunction { + AAValueSimplifyCallSite(const IRPosition &IRP) + : AAValueSimplifyFunction(IRP) {} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned { + AAValueSimplifyCallSiteReturned(const IRPosition &IRP) + : AAValueSimplifyReturned(IRP) {} + + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(value_simplify) + } +}; +struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { + AAValueSimplifyCallSiteArgument(const IRPosition &IRP) + : AAValueSimplifyFloating(IRP) {} + + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(value_simplify) + } +}; + +/// ----------------------- Heap-To-Stack Conversion --------------------------- +struct AAHeapToStackImpl : public AAHeapToStack { + AAHeapToStackImpl(const IRPosition &IRP) : AAHeapToStack(IRP) {} + + const std::string getAsStr() const override { + return "[H2S] Mallocs: " + std::to_string(MallocCalls.size()); + } + + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function *F = getAssociatedFunction(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + for (Instruction *MallocCall : MallocCalls) { + // This malloc cannot be replaced. + if (BadMallocCalls.count(MallocCall)) + continue; + + for (Instruction *FreeCall : FreesForMalloc[MallocCall]) { + LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n"); + A.deleteAfterManifest(*FreeCall); + HasChanged = ChangeStatus::CHANGED; + } + + LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall + << "\n"); + + Constant *Size; + if (isCallocLikeFn(MallocCall, TLI)) { + auto *Num = cast(MallocCall->getOperand(0)); + auto *SizeT = dyn_cast(MallocCall->getOperand(1)); + APInt TotalSize = SizeT->getValue() * Num->getValue(); + Size = + ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize); + } else { + Size = cast(MallocCall->getOperand(0)); + } + + unsigned AS = cast(MallocCall->getType())->getAddressSpace(); + Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, + Size, "", MallocCall->getNextNode()); + + if (AI->getType() != MallocCall->getType()) + AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", + AI->getNextNode()); + + MallocCall->replaceAllUsesWith(AI); + + if (auto *II = dyn_cast(MallocCall)) { + auto *NBB = II->getNormalDest(); + BranchInst::Create(NBB, MallocCall->getParent()); + A.deleteAfterManifest(*MallocCall); + } else { + A.deleteAfterManifest(*MallocCall); + } + + if (isCallocLikeFn(MallocCall, TLI)) { + auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc", + AI->getNextNode()); + Value *Ops[] = { + BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size, + ConstantInt::get(Type::getInt1Ty(F->getContext()), false)}; + + Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()}; + Module *M = F->getParent(); + Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); + CallInst::Create(Fn, Ops, "", BI->getNextNode()); + } + HasChanged = ChangeStatus::CHANGED; + } + + return HasChanged; + } + + /// Collection of all malloc calls in a function. + SmallSetVector MallocCalls; + + /// Collection of malloc calls that cannot be converted. + DenseSet BadMallocCalls; + + /// A map for each malloc call to the set of associated free calls. + DenseMap> FreesForMalloc; + + ChangeStatus updateImpl(Attributor &A) override; +}; + +ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { + const Function *F = getAssociatedFunction(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + auto UsesCheck = [&](Instruction &I) { + SmallPtrSet Visited; + SmallVector Worklist; + + for (Use &U : I.uses()) + Worklist.push_back(&U); + + while (!Worklist.empty()) { + const Use *U = Worklist.pop_back_val(); + if (!Visited.insert(U).second) + continue; + + auto *UserI = U->getUser(); + + if (isa(UserI)) + continue; + if (auto *SI = dyn_cast(UserI)) { + if (SI->getValueOperand() == U->get()) { + LLVM_DEBUG(dbgs() << "[H2S] escaping store to memory: " << *UserI << "\n"); + return false; + } + // A store into the malloc'ed memory is fine. + continue; + } + + // NOTE: Right now, if a function that has malloc pointer as an argument + // frees memory, we assume that the malloc pointer is freed. + + // TODO: Add nofree callsite argument attribute to indicate that pointer + // argument is not freed. + if (auto *CB = dyn_cast(UserI)) { + if (!CB->isArgOperand(U)) + continue; + + if (CB->isLifetimeStartOrEnd()) + continue; + + // Record malloc. + if (isFreeCall(UserI, TLI)) { + FreesForMalloc[&I].insert( + cast(const_cast(UserI))); + continue; + } + + // If a function does not free memory we are fine + const auto &NoFreeAA = + A.getAAFor(*this, IRPosition::callsite_function(*CB)); + + unsigned ArgNo = U - CB->arg_begin(); + const auto &NoCaptureAA = A.getAAFor( + *this, IRPosition::callsite_argument(*CB, ArgNo)); + + if (!NoCaptureAA.isAssumedNoCapture() || !NoFreeAA.isAssumedNoFree()) { + LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); + return false; + } + continue; + } + + if (isa(UserI) || isa(UserI)) { + for (Use &U : UserI->uses()) + Worklist.push_back(&U); + continue; + } + + // Unknown user. + LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n"); + return false; + } + return true; + }; + + auto MallocCallocCheck = [&](Instruction &I) { + if (BadMallocCalls.count(&I)) + return true; + + bool IsMalloc = isMallocLikeFn(&I, TLI); + bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI); + if (!IsMalloc && !IsCalloc) { + BadMallocCalls.insert(&I); + return true; + } + + if (IsMalloc) { + if (auto *Size = dyn_cast(I.getOperand(0))) + if (Size->getValue().sle(MaxHeapToStackSize)) + if (UsesCheck(I)) { + MallocCalls.insert(&I); + return true; + } + } else if (IsCalloc) { + bool Overflow = false; + if (auto *Num = dyn_cast(I.getOperand(0))) + if (auto *Size = dyn_cast(I.getOperand(1))) + if ((Size->getValue().umul_ov(Num->getValue(), Overflow)) + .sle(MaxHeapToStackSize)) + if (!Overflow && UsesCheck(I)) { + MallocCalls.insert(&I); + return true; + } + } + + BadMallocCalls.insert(&I); + return true; + }; + + size_t NumBadMallocs = BadMallocCalls.size(); + + A.checkForAllCallLikeInstructions(MallocCallocCheck, *this); + + if (NumBadMallocs != BadMallocCalls.size()) + return ChangeStatus::CHANGED; + + return ChangeStatus::UNCHANGED; +} + +struct AAHeapToStackFunction final : public AAHeapToStackImpl { + AAHeapToStackFunction(const IRPosition &IRP) : AAHeapToStackImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECL(MallocCalls, Function, + "Number of MallocCalls converted to allocas"); + BUILD_STAT_NAME(MallocCalls, Function) += MallocCalls.size(); + } +}; + +/// -------------------- Memory Behavior Attributes ---------------------------- +/// Includes read-none, read-only, and write-only. +/// ---------------------------------------------------------------------------- +struct AAMemoryBehaviorImpl : public AAMemoryBehavior { + AAMemoryBehaviorImpl(const IRPosition &IRP) : AAMemoryBehavior(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + intersectAssumedBits(BEST_STATE); + getKnownStateFromValue(getIRPosition(), getState()); + IRAttribute::initialize(A); + } + + /// Return the memory behavior information encoded in the IR for \p IRP. + static void getKnownStateFromValue(const IRPosition &IRP, + IntegerState &State) { + SmallVector Attrs; + IRP.getAttrs(AttrKinds, Attrs); + for (const Attribute &Attr : Attrs) { + switch (Attr.getKindAsEnum()) { + case Attribute::ReadNone: + State.addKnownBits(NO_ACCESSES); + break; + case Attribute::ReadOnly: + State.addKnownBits(NO_WRITES); + break; + case Attribute::WriteOnly: + State.addKnownBits(NO_READS); + break; + default: + llvm_unreachable("Unexpcted attribute!"); + } + } + + if (auto *I = dyn_cast(&IRP.getAnchorValue())) { + if (!I->mayReadFromMemory()) + State.addKnownBits(NO_READS); + if (!I->mayWriteToMemory()) + State.addKnownBits(NO_WRITES); + } + } + + /// See AbstractAttribute::getDeducedAttributes(...). + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + assert(Attrs.size() == 0); + if (isAssumedReadNone()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); + else if (isAssumedReadOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly)); + else if (isAssumedWriteOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly)); + assert(Attrs.size() <= 1); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + IRPosition &IRP = getIRPosition(); + + // Check if we would improve the existing attributes first. + SmallVector DeducedAttrs; + getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); + if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { + return IRP.hasAttr(Attr.getKindAsEnum(), + /* IgnoreSubsumingPositions */ true); + })) + return ChangeStatus::UNCHANGED; + + // Clear existing attributes. + IRP.removeAttrs(AttrKinds); + + // Use the generic manifest method. + return IRAttribute::manifest(A); + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + if (isAssumedReadNone()) + return "readnone"; + if (isAssumedReadOnly()) + return "readonly"; + if (isAssumedWriteOnly()) + return "writeonly"; + return "may-read/write"; + } + + /// The set of IR attributes AAMemoryBehavior deals with. + static const Attribute::AttrKind AttrKinds[3]; +}; + +const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = { + Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly}; + +/// Memory behavior attribute for a floating value. +struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl { + AAMemoryBehaviorFloating(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + // Initialize the use vector with all direct uses of the associated value. + for (const Use &U : getAssociatedValue().uses()) + Uses.insert(&U); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FLOATING_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FLOATING_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FLOATING_ATTR(writeonly) + } + +private: + /// Return true if users of \p UserI might access the underlying + /// variable/location described by \p U and should therefore be analyzed. + bool followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI); + + /// Update the state according to the effect of use \p U in \p UserI. + void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI); + +protected: + /// Container for (transitive) uses of the associated argument. + SetVector Uses; +}; + +/// Memory behavior attribute for function argument. +struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating { + AAMemoryBehaviorArgument(const IRPosition &IRP) + : AAMemoryBehaviorFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorFloating::initialize(A); + + // Initialize the use vector with all direct uses of the associated value. + Argument *Arg = getAssociatedArgument(); + if (!Arg || !Arg->getParent()->hasExactDefinition()) + indicatePessimisticFixpoint(); + } + + ChangeStatus manifest(Attributor &A) override { + // TODO: From readattrs.ll: "inalloca parameters are always + // considered written" + if (hasAttr({Attribute::InAlloca})) { + removeKnownBits(NO_WRITES); + removeAssumedBits(NO_WRITES); + } + return AAMemoryBehaviorFloating::manifest(A); + } + + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_ARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_ARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_ARG_ATTR(writeonly) + } +}; + +struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { + AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP) + : AAMemoryBehaviorArgument(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CSARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CSARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CSARG_ATTR(writeonly) + } +}; + +/// Memory behavior attribute for a call site return position. +struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { + AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP) + : AAMemoryBehaviorFloating(IRP) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // We do not annotate returned values. + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// An AA to represent the memory behavior function attributes. +struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl { + AAMemoryBehaviorFunction(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + Function &F = cast(getAnchorValue()); + if (isAssumedReadNone()) { + F.removeFnAttr(Attribute::ArgMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + return AAMemoryBehaviorImpl::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FN_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FN_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FN_ATTR(writeonly) + } +}; + +/// AAMemoryBehavior attribute for call sites. +struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { + AAMemoryBehaviorCallSite(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || !F->hasExactDefinition()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CS_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CS_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CS_ATTR(writeonly) + } +}; +} // namespace + +ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) { + + // The current assumed state used to determine a change. + auto AssumedState = getAssumed(); + + auto CheckRWInst = [&](Instruction &I) { + // If the instruction has an own memory behavior state, use it to restrict + // the local state. No further analysis is required as the other memory + // state is as optimistic as it gets. + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + const auto &MemBehaviorAA = A.getAAFor( + *this, IRPosition::callsite_function(ICS)); + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return !isAtFixpoint(); + } + + // Remove access kind modifiers if necessary. + if (I.mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (I.mayWriteToMemory()) + removeAssumedBits(NO_WRITES); + return !isAtFixpoint(); + }; + + if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) + return indicatePessimisticFixpoint(); + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; +} + +ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) { + + const IRPosition &IRP = getIRPosition(); + const IRPosition &FnPos = IRPosition::function_scope(IRP); + AAMemoryBehavior::StateType &S = getState(); + + // First, check the function scope. We take the known information and we avoid + // work if the assumed information implies the current assumed information for + // this attribute. + const auto &FnMemAA = A.getAAFor(*this, FnPos); + S.addKnownBits(FnMemAA.getKnown()); + if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed()) + return ChangeStatus::UNCHANGED; + + // Make sure the value is not captured (except through "return"), if + // it is, any information derived would be irrelevant anyway as we cannot + // check the potential aliases introduced by the capture. However, no need + // to fall back to anythign less optimistic than the function state. + const auto &ArgNoCaptureAA = A.getAAFor(*this, IRP); + if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + S.intersectAssumedBits(FnMemAA.getAssumed()); + return ChangeStatus::CHANGED; + } + + // The current assumed state used to determine a change. + auto AssumedState = S.getAssumed(); + + // Liveness information to exclude dead users. + // TODO: Take the FnPos once we have call site specific liveness information. + const auto &LivenessAA = A.getAAFor( + *this, IRPosition::function(*IRP.getAssociatedFunction())); + + // Visit and expand uses until all are analyzed or a fixpoint is reached. + for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) { + const Use *U = Uses[i]; + Instruction *UserI = cast(U->getUser()); + LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI + << " [Dead: " << (LivenessAA.isAssumedDead(UserI)) + << "]\n"); + if (LivenessAA.isAssumedDead(UserI)) + continue; + + // Check if the users of UserI should also be visited. + if (followUsersOfUseIn(A, U, UserI)) + for (const Use &UserIUse : UserI->uses()) + Uses.insert(&UserIUse); + + // If UserI might touch memory we analyze the use in detail. + if (UserI->mayReadOrWriteMemory()) + analyzeUseIn(A, U, UserI); + } + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; +} + +bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + // The loaded value is unrelated to the pointer argument, no need to + // follow the users of the load. + if (isa(UserI)) + return false; + + // By default we follow all uses assuming UserI might leak information on U, + // we have special handling for call sites operands though. + ImmutableCallSite ICS(UserI); + if (!ICS || !ICS.isArgOperand(U)) + return true; + + // If the use is a call argument known not to be captured, the users of + // the call do not need to be visited because they have to be unrelated to + // the input. Note that this check is not trivial even though we disallow + // general capturing of the underlying argument. The reason is that the + // call might the argument "through return", which we allow and for which we + // need to check call users. + unsigned ArgNo = ICS.getArgumentNo(U); + const auto &ArgNoCaptureAA = + A.getAAFor(*this, IRPosition::callsite_argument(ICS, ArgNo)); + return !ArgNoCaptureAA.isAssumedNoCapture(); +} + +void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + assert(UserI->mayReadOrWriteMemory()); + + switch (UserI->getOpcode()) { + default: + // TODO: Handle all atomics and other side-effect operations we know of. + break; + case Instruction::Load: + // Loads cause the NO_READS property to disappear. + removeAssumedBits(NO_READS); + return; + + case Instruction::Store: + // Stores cause the NO_WRITES property to disappear if the use is the + // pointer operand. Note that we do assume that capturing was taken care of + // somewhere else. + if (cast(UserI)->getPointerOperand() == U->get()) + removeAssumedBits(NO_WRITES); + return; + + case Instruction::Call: + case Instruction::CallBr: + case Instruction::Invoke: { + // For call sites we look at the argument memory behavior attribute (this + // could be recursive!) in order to restrict our own state. + ImmutableCallSite ICS(UserI); + + // Give up on operand bundles. + if (ICS.isBundleOperand(U)) { + indicatePessimisticFixpoint(); + return; + } + + // Calling a function does read the function pointer, maybe write it if the + // function is self-modifying. + if (ICS.isCallee(U)) { + removeAssumedBits(NO_READS); + break; + } - // Callback function - std::function CallSiteCheck = [&](CallSite CS) { - assert(CS && "Sanity check: Call site was not initialized properly!"); + // Adjust the possible access behavior based on the information on the + // argument. + unsigned ArgNo = ICS.getArgumentNo(U); + const IRPosition &ArgPos = IRPosition::callsite_argument(ICS, ArgNo); + const auto &MemBehaviorAA = A.getAAFor(*this, ArgPos); + // "assumed" has at most the same bits as the MemBehaviorAA assumed + // and at least "known". + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return; + } + }; - auto *NonNullAA = A.getAAFor(*this, *CS.getInstruction(), ArgNo); + // Generally, look at the "may-properties" and adjust the assumed state if we + // did not trigger special handling before. + if (UserI->mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (UserI->mayWriteToMemory()) + removeAssumedBits(NO_WRITES); +} - // Check that NonNullAA is AANonNullCallSiteArgument. - if (NonNullAA) { - ImmutableCallSite ICS(&NonNullAA->getAnchoredValue()); - if (ICS && CS.getInstruction() == ICS.getInstruction()) - return NonNullAA->isAssumedNonNull(); - return false; - } +/// ---------------------------------------------------------------------------- +/// Attributor +/// ---------------------------------------------------------------------------- - if (CS.paramHasAttr(ArgNo, Attribute::NonNull)) - return true; +bool Attributor::isAssumedDead(const AbstractAttribute &AA, + const AAIsDead *LivenessAA) { + const Instruction *CtxI = AA.getIRPosition().getCtxI(); + if (!CtxI) + return false; - Value *V = CS.getArgOperand(ArgNo); - if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout())) - return true; + if (!LivenessAA) + LivenessAA = + &getAAFor(AA, IRPosition::function(*CtxI->getFunction()), + /* TrackDependence */ false); + // Don't check liveness for AAIsDead. + if (&AA == LivenessAA) return false; - }; - if (!A.checkForAllCallSites(F, CallSiteCheck, true)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } - return ChangeStatus::UNCHANGED; -} -ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) { - // NOTE: Never look at the argument of the callee in this method. - // If we do this, "nonnull" is always deduced because of the assumption. + if (!LivenessAA->isAssumedDead(CtxI)) + return false; - Value &V = *getAssociatedValue(); + // We actually used liveness information so we have to record a dependence. + recordDependence(*LivenessAA, AA); - auto *NonNullAA = A.getAAFor(*this, V); + return true; +} - if (!NonNullAA || !NonNullAA->isAssumedNonNull()) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; +bool Attributor::checkForAllCallSites( + const function_ref &Pred, + const AbstractAttribute &QueryingAA, bool RequireAllCallSites) { + // We can try to determine information from + // the call sites. However, this is only possible all call sites are known, + // hence the function has internal linkage. + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) { + LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP + << "\n"); + return false; } - return ChangeStatus::UNCHANGED; + return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites, + &QueryingAA); } -/// ------------------------ Will-Return Attributes ---------------------------- +bool Attributor::checkForAllCallSites( + const function_ref &Pred, const Function &Fn, + bool RequireAllCallSites, const AbstractAttribute *QueryingAA) { + if (RequireAllCallSites && !Fn.hasLocalLinkage()) { + LLVM_DEBUG( + dbgs() + << "[Attributor] Function " << Fn.getName() + << " has no internal linkage, hence not all call sites are known\n"); + return false; + } -struct AAWillReturnImpl : public AAWillReturn, BooleanState { + for (const Use &U : Fn.uses()) { + AbstractCallSite ACS(&U); + if (!ACS) { + LLVM_DEBUG(dbgs() << "[Attributor] Function " + << Fn.getName() + << " has non call site use " << *U.get() << " in " + << *U.getUser() << "\n"); + return false; + } - /// See AbstractAttribute::AbstractAttribute(...). - AAWillReturnImpl(Function &F, InformationCache &InfoCache) - : AAWillReturn(F, InfoCache) {} + Instruction *I = ACS.getInstruction(); + Function *Caller = I->getFunction(); - /// See AAWillReturn::isKnownWillReturn(). - bool isKnownWillReturn() const override { return getKnown(); } + const auto *LivenessAA = + lookupAAFor(IRPosition::function(*Caller), QueryingAA, + /* TrackDependence */ false); - /// See AAWillReturn::isAssumedWillReturn(). - bool isAssumedWillReturn() const override { return getAssumed(); } + // Skip dead calls. + if (LivenessAA && LivenessAA->isAssumedDead(I)) { + // We actually used liveness information so we have to record a + // dependence. + if (QueryingAA) + recordDependence(*LivenessAA, *QueryingAA); + continue; + } - /// See AbstractAttribute::getState(...). - AbstractState &getState() override { return *this; } + const Use *EffectiveUse = + ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U; + if (!ACS.isCallee(EffectiveUse)) { + if (!RequireAllCallSites) + continue; + LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser() + << " is an invalid use of " + << Fn.getName() << "\n"); + return false; + } - /// See AbstractAttribute::getState(...). - const AbstractState &getState() const override { return *this; } + if (Pred(ACS)) + continue; - /// See AbstractAttribute::getAsStr() - const std::string getAsStr() const override { - return getAssumed() ? "willreturn" : "may-noreturn"; + LLVM_DEBUG(dbgs() << "[Attributor] Call site callback failed for " + << *ACS.getInstruction() << "\n"); + return false; } -}; -struct AAWillReturnFunction final : AAWillReturnImpl { - - /// See AbstractAttribute::AbstractAttribute(...). - AAWillReturnFunction(Function &F, InformationCache &InfoCache) - : AAWillReturnImpl(F, InfoCache) {} - - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_FUNCTION; - } + return true; +} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override; +bool Attributor::checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred, + const AbstractAttribute &QueryingAA) { - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; -}; + const IRPosition &IRP = QueryingAA.getIRPosition(); + // Since we need to provide return instructions we have to have an exact + // definition. + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) + return false; -// Helper function that checks whether a function has any cycle. -// TODO: Replace with more efficent code -bool containsCycle(Function &F) { - SmallPtrSet Visited; + // If this is a call site query we use the call site specific return values + // and liveness information. + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &AARetVal = getAAFor(QueryingAA, QueryIRP); + if (!AARetVal.getState().isValidState()) + return false; - // Traverse BB by dfs and check whether successor is already visited. - for (BasicBlock *BB : depth_first(&F)) { - Visited.insert(BB); - for (auto *SuccBB : successors(BB)) { - if (Visited.count(SuccBB)) - return true; - } - } - return false; + return AARetVal.checkForAllReturnedValuesAndReturnInsts(Pred); } -// Helper function that checks the function have a loop which might become an -// endless loop -// FIXME: Any cycle is regarded as endless loop for now. -// We have to allow some patterns. -bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); } +bool Attributor::checkForAllReturnedValues( + const function_ref &Pred, + const AbstractAttribute &QueryingAA) { -void AAWillReturnFunction::initialize(Attributor &A) { - Function &F = getAnchorScope(); - - if (containsPossiblyEndlessLoop(F)) - indicatePessimisticFixpoint(); -} + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) + return false; -ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &AARetVal = getAAFor(QueryingAA, QueryIRP); + if (!AARetVal.getState().isValidState()) + return false; - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + return AARetVal.checkForAllReturnedValuesAndReturnInsts( + [&](Value &RV, const SmallSetVector &) { + return Pred(RV); + }); +} - for (unsigned Opcode : - {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}) { +static bool +checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap, + const function_ref &Pred, + const AAIsDead *LivenessAA, bool &AnyDead, + const ArrayRef &Opcodes) { + for (unsigned Opcode : Opcodes) { for (Instruction *I : OpcodeInstMap[Opcode]) { - auto ICS = ImmutableCallSite(I); - - if (ICS.hasFnAttr(Attribute::WillReturn)) + // Skip dead instructions. + if (LivenessAA && LivenessAA->isAssumedDead(I)) { + AnyDead = true; continue; - - auto *WillReturnAA = A.getAAFor(*this, *I); - if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; } - auto *NoRecurseAA = A.getAAFor(*this, *I); - - // FIXME: (i) Prohibit any recursion for now. - // (ii) AANoRecurse isn't implemented yet so currently any call is - // regarded as having recursion. - // Code below should be - // if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) && - if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } + if (!Pred(*I)) + return false; } } - - return ChangeStatus::UNCHANGED; + return true; } -/// ---------------------------------------------------------------------------- -/// Attributor -/// ---------------------------------------------------------------------------- +bool Attributor::checkForAllInstructions( + const llvm::function_ref &Pred, + const AbstractAttribute &QueryingAA, const ArrayRef &Opcodes) { -bool Attributor::checkForAllCallSites(Function &F, - std::function &Pred, - bool RequireAllCallSites) { - // We can try to determine information from - // the call sites. However, this is only possible all call sites are known, - // hence the function has internal linkage. - if (RequireAllCallSites && !F.hasInternalLinkage()) { - LLVM_DEBUG( - dbgs() - << "Attributor: Function " << F.getName() - << " has no internal linkage, hence not all call sites are known\n"); + const IRPosition &IRP = QueryingAA.getIRPosition(); + // Since we need to provide instructions we have to have an exact definition. + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) return false; - } - for (const Use &U : F.uses()) { + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &LivenessAA = + getAAFor(QueryingAA, QueryIRP, /* TrackDependence */ false); + bool AnyDead = false; - CallSite CS(U.getUser()); - if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) { - if (!RequireAllCallSites) - continue; + auto &OpcodeInstMap = + InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); + if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead, + Opcodes)) + return false; - LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser() - << " is an invalid use of " << F.getName() << "\n"); - return false; - } + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + recordDependence(LivenessAA, QueryingAA); - if (Pred(CS)) - continue; + return true; +} - LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for " - << *CS.getInstruction() << "\n"); +bool Attributor::checkForAllReadWriteInstructions( + const llvm::function_ref &Pred, + AbstractAttribute &QueryingAA) { + + const Function *AssociatedFunction = + QueryingAA.getIRPosition().getAssociatedFunction(); + if (!AssociatedFunction) return false; + + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &LivenessAA = + getAAFor(QueryingAA, QueryIRP, /* TrackDependence */ false); + bool AnyDead = false; + + for (Instruction *I : + InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) { + // Skip dead instructions. + if (LivenessAA.isAssumedDead(I)) { + AnyDead = true; + continue; + } + + if (!Pred(*I)) + return false; } + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + recordDependence(LivenessAA, QueryingAA); + return true; } -ChangeStatus Attributor::run() { - // Initialize all abstract attributes. - for (AbstractAttribute *AA : AllAbstractAttributes) - AA->initialize(*this); - +ChangeStatus Attributor::run(Module &M) { LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized " << AllAbstractAttributes.size() << " abstract attributes.\n"); @@ -1370,10 +4470,25 @@ ChangeStatus Attributor::run() { SetVector Worklist; Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end()); + bool RecomputeDependences = false; + do { + // Remember the size to determine new attributes. + size_t NumAAs = AllAbstractAttributes.size(); LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter << ", Worklist size: " << Worklist.size() << "\n"); + // If dependences (=QueryMap) are recomputed we have to look at all abstract + // attributes again, regardless of what changed in the last iteration. + if (RecomputeDependences) { + LLVM_DEBUG( + dbgs() << "[Attributor] Run all AAs to recompute dependences\n"); + QueryMap.clear(); + ChangedAAs.clear(); + Worklist.insert(AllAbstractAttributes.begin(), + AllAbstractAttributes.end()); + } + // Add all abstract attributes that are potentially dependent on one that // changed to the work list. for (AbstractAttribute *ChangedAA : ChangedAAs) { @@ -1381,27 +4496,42 @@ ChangeStatus Attributor::run() { Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end()); } + LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter + << ", Worklist+Dependent size: " << Worklist.size() + << "\n"); + // Reset the changed set. ChangedAAs.clear(); // Update all abstract attribute in the work list and record the ones that // changed. for (AbstractAttribute *AA : Worklist) - if (AA->update(*this) == ChangeStatus::CHANGED) - ChangedAAs.push_back(AA); + if (!isAssumedDead(*AA, nullptr)) + if (AA->update(*this) == ChangeStatus::CHANGED) + ChangedAAs.push_back(AA); + + // Check if we recompute the dependences in the next iteration. + RecomputeDependences = (DepRecomputeInterval > 0 && + IterationCounter % DepRecomputeInterval == 0); + + // Add attributes to the changed set if they have been created in the last + // iteration. + ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs, + AllAbstractAttributes.end()); // Reset the work list and repopulate with the changed abstract attributes. // Note that dependent ones are added above. Worklist.clear(); Worklist.insert(ChangedAAs.begin(), ChangedAAs.end()); - } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations); + } while (!Worklist.empty() && (IterationCounter++ < MaxFixpointIterations || + VerifyMaxFixpointIterations)); LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: " << IterationCounter << "/" << MaxFixpointIterations << " iterations\n"); - bool FinishedAtFixpoint = Worklist.empty(); + size_t NumFinalAAs = AllAbstractAttributes.size(); // Reset abstract arguments not settled in a sound fixpoint by now. This // happens when we stopped the fixpoint iteration early. Note that only the @@ -1448,8 +4578,14 @@ ChangeStatus Attributor::run() { if (!State.isValidState()) continue; + // Skip dead code. + if (isAssumedDead(*AA, nullptr)) + continue; // Manifest the state and record if we changed the IR. ChangeStatus LocalChange = AA->manifest(*this); + if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled()) + AA->trackStatistics(); + ManifestChange = ManifestChange | LocalChange; NumAtFixpoint++; @@ -1462,69 +4598,92 @@ ChangeStatus Attributor::run() { << " arguments while " << NumAtFixpoint << " were in a valid fixpoint state\n"); - // If verification is requested, we finished this run at a fixpoint, and the - // IR was changed, we re-run the whole fixpoint analysis, starting at - // re-initialization of the arguments. This re-run should not result in an IR - // change. Though, the (virtual) state of attributes at the end of the re-run - // might be more optimistic than the known state or the IR state if the better - // state cannot be manifested. - if (VerifyAttributor && FinishedAtFixpoint && - ManifestChange == ChangeStatus::CHANGED) { - VerifyAttributor = false; - ChangeStatus VerifyStatus = run(); - if (VerifyStatus != ChangeStatus::UNCHANGED) - llvm_unreachable( - "Attributor verification failed, re-run did result in an IR change " - "even after a fixpoint was reached in the original run. (False " - "positives possible!)"); - VerifyAttributor = true; - } - NumAttributesManifested += NumManifested; NumAttributesValidFixpoint += NumAtFixpoint; - return ManifestChange; -} - -void Attributor::identifyDefaultAbstractAttributes( - Function &F, InformationCache &InfoCache, - DenseSet *Whitelist) { + (void)NumFinalAAs; + assert( + NumFinalAAs == AllAbstractAttributes.size() && + "Expected the final number of abstract attributes to remain unchanged!"); + + // Delete stuff at the end to avoid invalid references and a nice order. + { + LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least " + << ToBeDeletedFunctions.size() << " functions and " + << ToBeDeletedBlocks.size() << " blocks and " + << ToBeDeletedInsts.size() << " instructions\n"); + for (Instruction *I : ToBeDeletedInsts) { + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } - // Every function can be nounwind. - registerAA(*new AANoUnwindFunction(F, InfoCache)); + if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { + SmallVector ToBeDeletedBBs; + ToBeDeletedBBs.reserve(NumDeadBlocks); + ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end()); + DeleteDeadBlocks(ToBeDeletedBBs); + STATS_DECLTRACK(AAIsDead, BasicBlock, + "Number of dead basic blocks deleted."); + } - // Every function might be marked "nosync" - registerAA(*new AANoSyncFunction(F, InfoCache)); + STATS_DECL(AAIsDead, Function, "Number of dead functions deleted."); + for (Function *Fn : ToBeDeletedFunctions) { + Fn->replaceAllUsesWith(UndefValue::get(Fn->getType())); + Fn->eraseFromParent(); + STATS_TRACK(AAIsDead, Function); + } - // Every function might be "no-free". - registerAA(*new AANoFreeFunction(F, InfoCache)); + // Identify dead internal functions and delete them. This happens outside + // the other fixpoint analysis as we might treat potentially dead functions + // as live to lower the number of iterations. If they happen to be dead, the + // below fixpoint loop will identify and eliminate them. + SmallVector InternalFns; + for (Function &F : M) + if (F.hasLocalLinkage()) + InternalFns.push_back(&F); + + bool FoundDeadFn = true; + while (FoundDeadFn) { + FoundDeadFn = false; + for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) { + Function *F = InternalFns[u]; + if (!F) + continue; - // Return attributes are only appropriate if the return type is non void. - Type *ReturnType = F.getReturnType(); - if (!ReturnType->isVoidTy()) { - // Argument attribute "returned" --- Create only one per function even - // though it is an argument attribute. - if (!Whitelist || Whitelist->count(AAReturnedValues::ID)) - registerAA(*new AAReturnedValuesImpl(F, InfoCache)); + const auto *LivenessAA = + lookupAAFor(IRPosition::function(*F)); + if (LivenessAA && + !checkForAllCallSites([](AbstractCallSite ACS) { return false; }, + *LivenessAA, true)) + continue; - // Every function with pointer return type might be marked nonnull. - if (ReturnType->isPointerTy() && - (!Whitelist || Whitelist->count(AANonNullReturned::ID))) - registerAA(*new AANonNullReturned(F, InfoCache)); + STATS_TRACK(AAIsDead, Function); + F->replaceAllUsesWith(UndefValue::get(F->getType())); + F->eraseFromParent(); + InternalFns[u] = nullptr; + FoundDeadFn = true; + } + } } - // Every argument with pointer type might be marked nonnull. - for (Argument &Arg : F.args()) { - if (Arg.getType()->isPointerTy()) - registerAA(*new AANonNullArgument(Arg, InfoCache)); + if (VerifyMaxFixpointIterations && + IterationCounter != MaxFixpointIterations) { + errs() << "\n[Attributor] Fixpoint iteration done after: " + << IterationCounter << "/" << MaxFixpointIterations + << " iterations\n"; + llvm_unreachable("The fixpoint was not reached with exactly the number of " + "specified iterations!"); } - // Every function might be "will-return". - registerAA(*new AAWillReturnFunction(F, InfoCache)); + return ManifestChange; +} + +void Attributor::initializeInformationCache(Function &F) { - // Walk all instructions to find more attribute opportunities and also - // interesting instructions that might be queried by abstract attributes - // during their initialization or update. + // Walk all instructions to find interesting instructions that might be + // queried by abstract attributes during their initialization or update. + // This has to happen before we create attributes. auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F]; auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F]; @@ -1540,8 +4699,12 @@ void Attributor::identifyDefaultAbstractAttributes( default: assert((!ImmutableCallSite(&I)) && (!isa(&I)) && "New call site/base instruction type needs to be known int the " - "attributor."); + "Attributor."); break; + case Instruction::Load: + // The alignment of a pointer is interesting for loads. + case Instruction::Store: + // The alignment of a pointer is interesting for stores. case Instruction::Call: case Instruction::CallBr: case Instruction::Invoke: @@ -1555,18 +4718,154 @@ void Attributor::identifyDefaultAbstractAttributes( InstOpcodeMap[I.getOpcode()].push_back(&I); if (I.mayReadOrWriteMemory()) ReadOrWriteInsts.push_back(&I); + } +} + +void Attributor::identifyDefaultAbstractAttributes(Function &F) { + if (!VisitedFunctions.insert(&F).second) + return; + + IRPosition FPos = IRPosition::function(F); + + // Check for dead BasicBlocks in every function. + // We need dead instruction detection because we do not want to deal with + // broken IR in which SSA rules do not apply. + getOrCreateAAFor(FPos); + + // Every function might be "will-return". + getOrCreateAAFor(FPos); + + // Every function can be nounwind. + getOrCreateAAFor(FPos); + + // Every function might be marked "nosync" + getOrCreateAAFor(FPos); + + // Every function might be "no-free". + getOrCreateAAFor(FPos); + + // Every function might be "no-return". + getOrCreateAAFor(FPos); + + // Every function might be "no-recurse". + getOrCreateAAFor(FPos); + + // Every function might be "readnone/readonly/writeonly/...". + getOrCreateAAFor(FPos); + + // Every function might be applicable for Heap-To-Stack conversion. + if (EnableHeapToStack) + getOrCreateAAFor(FPos); + + // Return attributes are only appropriate if the return type is non void. + Type *ReturnType = F.getReturnType(); + if (!ReturnType->isVoidTy()) { + // Argument attribute "returned" --- Create only one per function even + // though it is an argument attribute. + getOrCreateAAFor(FPos); + + IRPosition RetPos = IRPosition::returned(F); + + // Every function might be simplified. + getOrCreateAAFor(RetPos); + + if (ReturnType->isPointerTy()) { + + // Every function with pointer return type might be marked align. + getOrCreateAAFor(RetPos); + + // Every function with pointer return type might be marked nonnull. + getOrCreateAAFor(RetPos); + + // Every function with pointer return type might be marked noalias. + getOrCreateAAFor(RetPos); + // Every function with pointer return type might be marked + // dereferenceable. + getOrCreateAAFor(RetPos); + } + } + + for (Argument &Arg : F.args()) { + IRPosition ArgPos = IRPosition::argument(Arg); + + // Every argument might be simplified. + getOrCreateAAFor(ArgPos); + + if (Arg.getType()->isPointerTy()) { + // Every argument with pointer type might be marked nonnull. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked noalias. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked dereferenceable. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked align. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked nocapture. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked + // "readnone/readonly/writeonly/..." + getOrCreateAAFor(ArgPos); + } + } + + auto CallSitePred = [&](Instruction &I) -> bool { CallSite CS(&I); - if (CS && CS.getCalledFunction()) { + if (CS.getCalledFunction()) { for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) { + + IRPosition CSArgPos = IRPosition::callsite_argument(CS, i); + + // Call site argument might be simplified. + getOrCreateAAFor(CSArgPos); + if (!CS.getArgument(i)->getType()->isPointerTy()) continue; // Call site argument attribute "non-null". - registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i); + getOrCreateAAFor(CSArgPos); + + // Call site argument attribute "no-alias". + getOrCreateAAFor(CSArgPos); + + // Call site argument attribute "dereferenceable". + getOrCreateAAFor(CSArgPos); + + // Call site argument attribute "align". + getOrCreateAAFor(CSArgPos); } } - } + return true; + }; + + auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + bool Success, AnyDead = false; + Success = checkForAllInstructionsImpl( + OpcodeInstMap, CallSitePred, nullptr, AnyDead, + {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call}); + (void)Success; + assert(Success && !AnyDead && "Expected the check call to be successful!"); + + auto LoadStorePred = [&](Instruction &I) -> bool { + if (isa(I)) + getOrCreateAAFor( + IRPosition::value(*cast(I).getPointerOperand())); + else + getOrCreateAAFor( + IRPosition::value(*cast(I).getPointerOperand())); + return true; + }; + Success = checkForAllInstructionsImpl( + OpcodeInstMap, LoadStorePred, nullptr, AnyDead, + {(unsigned)Instruction::Load, (unsigned)Instruction::Store}); + (void)Success; + assert(Success && !AnyDead && "Expected the check call to be successful!"); } /// Helpers to ease debugging through output streams and print calls. @@ -1576,21 +4875,39 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) { return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged"); } -raw_ostream &llvm::operator<<(raw_ostream &OS, - AbstractAttribute::ManifestPosition AP) { +raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) { switch (AP) { - case AbstractAttribute::MP_ARGUMENT: + case IRPosition::IRP_INVALID: + return OS << "inv"; + case IRPosition::IRP_FLOAT: + return OS << "flt"; + case IRPosition::IRP_RETURNED: + return OS << "fn_ret"; + case IRPosition::IRP_CALL_SITE_RETURNED: + return OS << "cs_ret"; + case IRPosition::IRP_FUNCTION: + return OS << "fn"; + case IRPosition::IRP_CALL_SITE: + return OS << "cs"; + case IRPosition::IRP_ARGUMENT: return OS << "arg"; - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: + case IRPosition::IRP_CALL_SITE_ARGUMENT: return OS << "cs_arg"; - case AbstractAttribute::MP_FUNCTION: - return OS << "fn"; - case AbstractAttribute::MP_RETURNED: - return OS << "fn_ret"; } llvm_unreachable("Unknown attribute position!"); } +raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) { + const Value &AV = Pos.getAssociatedValue(); + return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " [" + << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}"; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerState &S) { + return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")" + << static_cast(S); +} + raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) { return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : "")); } @@ -1601,8 +4918,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) { } void AbstractAttribute::print(raw_ostream &OS) const { - OS << "[" << getManifestPosition() << "][" << getAsStr() << "][" - << AnchoredVal.getName() << "]"; + OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState() + << "]"; } ///} @@ -1610,7 +4927,7 @@ void AbstractAttribute::print(raw_ostream &OS) const { /// Pass (Manager) Boilerplate /// ---------------------------------------------------------------------------- -static bool runAttributorOnModule(Module &M) { +static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) { if (DisableAttributor) return false; @@ -1619,39 +4936,39 @@ static bool runAttributorOnModule(Module &M) { // Create an Attributor and initially empty information cache that is filled // while we identify default attribute opportunities. - Attributor A; - InformationCache InfoCache; + InformationCache InfoCache(M, AG); + Attributor A(InfoCache, DepRecInterval); + + for (Function &F : M) + A.initializeInformationCache(F); for (Function &F : M) { - // TODO: Not all attributes require an exact definition. Find a way to - // enable deduction for some but not all attributes in case the - // definition might be changed at runtime, see also - // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html. - // TODO: We could always determine abstract attributes and if sufficient - // information was found we could duplicate the functions that do not - // have an exact definition. - if (!F.hasExactDefinition()) { + if (F.hasExactDefinition()) + NumFnWithExactDefinition++; + else NumFnWithoutExactDefinition++; - continue; - } - // For now we ignore naked and optnone functions. - if (F.hasFnAttribute(Attribute::Naked) || - F.hasFnAttribute(Attribute::OptimizeNone)) - continue; - - NumFnWithExactDefinition++; + // We look at internal functions only on-demand but if any use is not a + // direct call, we have to do it eagerly. + if (F.hasLocalLinkage()) { + if (llvm::all_of(F.uses(), [](const Use &U) { + return ImmutableCallSite(U.getUser()) && + ImmutableCallSite(U.getUser()).isCallee(&U); + })) + continue; + } // Populate the Attributor with abstract attribute opportunities in the // function and the information cache with IR information. - A.identifyDefaultAbstractAttributes(F, InfoCache); + A.identifyDefaultAbstractAttributes(F); } - return A.run() == ChangeStatus::CHANGED; + return A.run(M) == ChangeStatus::CHANGED; } PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { - if (runAttributorOnModule(M)) { + AnalysisGetter AG(AM); + if (runAttributorOnModule(M, AG)) { // FIXME: Think about passes we will preserve and add them here. return PreservedAnalyses::none(); } @@ -1670,12 +4987,14 @@ struct AttributorLegacyPass : public ModulePass { bool runOnModule(Module &M) override { if (skipModule(M)) return false; - return runAttributorOnModule(M); + + AnalysisGetter AG; + return runAttributorOnModule(M, AG); } void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: Think about passes we will preserve and add them here. - AU.setPreservesCFG(); + AU.addRequired(); } }; @@ -1684,7 +5003,147 @@ struct AttributorLegacyPass : public ModulePass { Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); } char AttributorLegacyPass::ID = 0; + +const char AAReturnedValues::ID = 0; +const char AANoUnwind::ID = 0; +const char AANoSync::ID = 0; +const char AANoFree::ID = 0; +const char AANonNull::ID = 0; +const char AANoRecurse::ID = 0; +const char AAWillReturn::ID = 0; +const char AANoAlias::ID = 0; +const char AANoReturn::ID = 0; +const char AAIsDead::ID = 0; +const char AADereferenceable::ID = 0; +const char AAAlign::ID = 0; +const char AANoCapture::ID = 0; +const char AAValueSimplify::ID = 0; +const char AAHeapToStack::ID = 0; +const char AAMemoryBehavior::ID = 0; + +// Macro magic to create the static generator function for attributes that +// follow the naming scheme. + +#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \ + case IRPosition::PK: \ + llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!"); + +#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \ + case IRPosition::PK: \ + AA = new CLASS##SUFFIX(IRP); \ + break; + +#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ + SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + } \ + return *AA; \ + } + +#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ + SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + } \ + return *AA; \ + } + +#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues) + +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) + +CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) + +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack) + +CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) + +#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef SWITCH_PK_CREATE +#undef SWITCH_PK_INV + INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp index 6c365f3f3cbe..de80c88c1591 100644 --- a/lib/Transforms/IPO/BlockExtractor.cpp +++ b/lib/Transforms/IPO/BlockExtractor.cpp @@ -119,6 +119,8 @@ void BlockExtractor::loadFile() { /*KeepEmpty=*/false); if (LineSplit.empty()) continue; + if (LineSplit.size()!=2) + report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'"); SmallVector BBNames; LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1, /*KeepEmpty=*/false); @@ -204,7 +206,8 @@ bool BlockExtractor::runOnModule(Module &M) { ++NumExtracted; Changed = true; } - Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); + Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index ad877ae1786c..3cf839e397f8 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -48,7 +48,7 @@ static void FindUsedValues(GlobalVariable *LLVMUsed, ConstantArray *Inits = cast(LLVMUsed->getInitializer()); for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { - Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases(); + Value *Operand = Inits->getOperand(i)->stripPointerCasts(); GlobalValue *GV = cast(Operand); UsedValues.insert(GV); } @@ -120,7 +120,7 @@ static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { // Bump the alignment if necessary. if (Old->getAlignment() || New->getAlignment()) - New->setAlignment(std::max(getAlignment(Old), getAlignment(New))); + New->setAlignment(Align(std::max(getAlignment(Old), getAlignment(New)))); copyDebugLocMetadata(Old, New); Old->replaceAllUsesWith(NewConstant); diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp index e30b33aa4872..e20159ba0db5 100644 --- a/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -84,13 +84,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) { for (GlobalObject &GO : M.global_objects()) { Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); - for (MDNode *Type : Types) { - // Sanity check. GO must not be a function declaration. - assert(!isa(&GO) || !cast(&GO)->isDeclaration()); - + for (MDNode *Type : Types) if (ConstantInt *TypeId = extractNumericTypeId(Type)) TypeIds.insert(TypeId->getZExtValue()); - } } NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); @@ -108,11 +104,11 @@ void CrossDSOCFI::buildCFICheck(Module &M) { FunctionCallee C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx)); - Function *F = dyn_cast(C.getCallee()); + Function *F = cast(C.getCallee()); // Take over the existing function. The frontend emits a weak stub so that the // linker knows about the symbol; this pass replaces the function body. F->deleteBody(); - F->setAlignment(4096); + F->setAlignment(Align(4096)); Triple T(M.getTargetTriple()); if (T.isARM() || T.isThumb()) diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 5ccd8bc4b0fb..b174c63a577b 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -78,11 +78,8 @@ STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); STATISTIC(NumNoFree, "Number of functions marked as nofree"); -// FIXME: This is disabled by default to avoid exposing security vulnerabilities -// in C/C++ code compiled by clang: -// http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html static cl::opt EnableNonnullArgPropagation( - "enable-nonnull-arg-prop", cl::Hidden, + "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, cl::desc("Try to propagate nonnull argument attributes from callsites to " "caller functions.")); @@ -664,6 +661,25 @@ static bool addArgumentAttrsFromCallsites(Function &F) { return Changed; } +static bool addReadAttr(Argument *A, Attribute::AttrKind R) { + assert((R == Attribute::ReadOnly || R == Attribute::ReadNone) + && "Must be a Read attribute."); + assert(A && "Argument must not be null."); + + // If the argument already has the attribute, nothing needs to be done. + if (A->hasAttribute(R)) + return false; + + // Otherwise, remove potentially conflicting attribute, add the new one, + // and update statistics. + A->removeAttr(Attribute::WriteOnly); + A->removeAttr(Attribute::ReadOnly); + A->removeAttr(Attribute::ReadNone); + A->addAttr(R); + R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; + return true; +} + /// Deduce nocapture attributes for the SCC. static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; @@ -732,11 +748,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { SmallPtrSet Self; Self.insert(&*A); Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); - if (R != Attribute::None) { - A->addAttr(R); - Changed = true; - R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; - } + if (R != Attribute::None) + Changed = addReadAttr(A, R); } } } @@ -833,12 +846,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (ReadAttr != Attribute::None) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - // Clear out existing readonly/readnone attributes - A->removeAttr(Attribute::ReadOnly); - A->removeAttr(Attribute::ReadNone); - A->addAttr(ReadAttr); - ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; - Changed = true; + Changed = addReadAttr(A, ReadAttr); } } } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 62c7fbd07223..3f5cc078d75f 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -450,7 +450,7 @@ static void computeImportForFunction( } else if (PrintImportFailures) { assert(!FailureInfo && "Expected no FailureInfo for newly rejected candidate"); - FailureInfo = llvm::make_unique( + FailureInfo = std::make_unique( VI, Edge.second.getHotness(), Reason, 1); } LLVM_DEBUG( @@ -764,7 +764,7 @@ void llvm::computeDeadSymbols( } // Make value live and add it to the worklist if it was not live before. - auto visit = [&](ValueInfo VI) { + auto visit = [&](ValueInfo VI, bool IsAliasee) { // FIXME: If we knew which edges were created for indirect call profiles, // we could skip them here. Any that are live should be reached via // other edges, e.g. reference edges. Otherwise, using a profile collected @@ -800,12 +800,15 @@ void llvm::computeDeadSymbols( Interposable = true; } - if (!KeepAliveLinkage) - return; + if (!IsAliasee) { + if (!KeepAliveLinkage) + return; - if (Interposable) - report_fatal_error( - "Interposable and available_externally/linkonce_odr/weak_odr symbol"); + if (Interposable) + report_fatal_error( + "Interposable and available_externally/linkonce_odr/weak_odr " + "symbol"); + } } for (auto &S : VI.getSummaryList()) @@ -821,16 +824,16 @@ void llvm::computeDeadSymbols( // If this is an alias, visit the aliasee VI to ensure that all copies // are marked live and it is added to the worklist for further // processing of its references. - visit(AS->getAliaseeVI()); + visit(AS->getAliaseeVI(), true); continue; } Summary->setLive(true); for (auto Ref : Summary->refs()) - visit(Ref); + visit(Ref, false); if (auto *FS = dyn_cast(Summary.get())) for (auto Call : FS->calls()) - visit(Call.first); + visit(Call.first, false); } } Index.setWithGlobalValueDeadStripping(); @@ -892,7 +895,7 @@ std::error_code llvm::EmitImportsFiles( StringRef ModulePath, StringRef OutputFilename, const std::map &ModuleToSummariesForIndex) { std::error_code EC; - raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_None); if (EC) return EC; for (auto &ILI : ModuleToSummariesForIndex) @@ -948,23 +951,15 @@ void llvm::thinLTOResolvePrevailingInModule( auto NewLinkage = GS->second->linkage(); if (NewLinkage == GV.getLinkage()) return; - - // Switch the linkage to weakany if asked for, e.g. we do this for - // linker redefined symbols (via --wrap or --defsym). - // We record that the visibility should be changed here in `addThinLTO` - // as we need access to the resolution vectors for each input file in - // order to find which symbols have been redefined. - // We may consider reorganizing this code and moving the linkage recording - // somewhere else, e.g. in thinLTOResolvePrevailingInIndex. - if (NewLinkage == GlobalValue::WeakAnyLinkage) { - GV.setLinkage(NewLinkage); - return; - } - if (GlobalValue::isLocalLinkage(GV.getLinkage()) || + // Don't internalize anything here, because the code below + // lacks necessary correctness checks. Leave this job to + // LLVM 'internalize' pass. + GlobalValue::isLocalLinkage(NewLinkage) || // In case it was dead and already converted to declaration. GV.isDeclaration()) return; + // Check for a non-prevailing def that has interposable linkage // (e.g. non-odr weak or linkonce). In that case we can't simply // convert to available_externally, since it would lose the diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 86b7f3e49ee6..f010f7b703a6 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -17,9 +17,11 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" @@ -29,10 +31,15 @@ using namespace llvm; #define DEBUG_TYPE "globaldce" +static cl::opt + ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::desc("Enable virtual function elimination")); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs, "Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); +STATISTIC(NumVFuncs, "Number of virtual functions removed"); namespace { class GlobalDCELegacyPass : public ModulePass { @@ -118,6 +125,15 @@ void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) { ComputeDependencies(User, Deps); Deps.erase(&GV); // Remove self-reference. for (GlobalValue *GVU : Deps) { + // If this is a dep from a vtable to a virtual function, and we have + // complete information about all virtual call sites which could call + // though this vtable, then skip it, because the call site information will + // be more precise. + if (VFESafeVTables.count(GVU) && isa(&GV)) { + LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> " + << GV.getName() << "\n"); + continue; + } GVDependencies[GVU].insert(&GV); } } @@ -132,12 +148,133 @@ void GlobalDCEPass::MarkLive(GlobalValue &GV, if (Updates) Updates->push_back(&GV); if (Comdat *C = GV.getComdat()) { - for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { MarkLive(*CM.second, Updates); // Recursion depth is only two because only // globals in the same comdat are visited. + } + } +} + +void GlobalDCEPass::ScanVTables(Module &M) { + SmallVector Types; + LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n"); + + auto *LTOPostLinkMD = + cast_or_null(M.getModuleFlag("LTOPostLink")); + bool LTOPostLink = + LTOPostLinkMD && + (cast(LTOPostLinkMD->getValue())->getZExtValue() != 0); + + for (GlobalVariable &GV : M.globals()) { + Types.clear(); + GV.getMetadata(LLVMContext::MD_type, Types); + if (GV.isDeclaration() || Types.empty()) + continue; + + // Use the typeid metadata on the vtable to build a mapping from typeids to + // the list of (GV, offset) pairs which are the possible vtables for that + // typeid. + for (MDNode *Type : Types) { + Metadata *TypeID = Type->getOperand(1).get(); + + uint64_t Offset = + cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + TypeIdMap[TypeID].insert(std::make_pair(&GV, Offset)); + } + + // If the type corresponding to the vtable is private to this translation + // unit, we know that we can see all virtual functions which might use it, + // so VFE is safe. + if (auto GO = dyn_cast(&GV)) { + GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility(); + if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit || + (LTOPostLink && + TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) { + LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n"); + VFESafeVTables.insert(&GV); + } + } + } +} + +void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId, + uint64_t CallOffset) { + for (auto &VTableInfo : TypeIdMap[TypeId]) { + GlobalVariable *VTable = VTableInfo.first; + uint64_t VTableOffset = VTableInfo.second; + + Constant *Ptr = + getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset, + *Caller->getParent()); + if (!Ptr) { + LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n"); + VFESafeVTables.erase(VTable); + return; + } + + auto Callee = dyn_cast(Ptr->stripPointerCasts()); + if (!Callee) { + LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n"); + VFESafeVTables.erase(VTable); + return; + } + + LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> " + << Callee->getName() << "\n"); + GVDependencies[Caller].insert(Callee); } } +void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) { + LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n"); + Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); + + if (!TypeCheckedLoadFunc) + return; + + for (auto U : TypeCheckedLoadFunc->users()) { + auto CI = dyn_cast(U); + if (!CI) + continue; + + auto *Offset = dyn_cast(CI->getArgOperand(1)); + Value *TypeIdValue = CI->getArgOperand(2); + auto *TypeId = cast(TypeIdValue)->getMetadata(); + + if (Offset) { + ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue()); + } else { + // type.checked.load with a non-constant offset, so assume every entry in + // every matching vtable is used. + for (auto &VTableInfo : TypeIdMap[TypeId]) { + VFESafeVTables.erase(VTableInfo.first); + } + } + } +} + +void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) { + if (!ClEnableVFE) + return; + + ScanVTables(M); + + if (VFESafeVTables.empty()) + return; + + ScanTypeCheckedLoadIntrinsics(M); + + LLVM_DEBUG( + dbgs() << "VFE safe vtables:\n"; + for (auto *VTable : VFESafeVTables) + dbgs() << " " << VTable->getName() << "\n"; + ); +} + PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { bool Changed = false; @@ -163,6 +300,10 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { if (Comdat *C = GA.getComdat()) ComdatMembers.insert(std::make_pair(C, &GA)); + // Add dependencies between virtual call sites and the virtual functions they + // might call, if we have that information. + AddVirtualFunctionDependencies(M); + // Loop over the module, adding globals which are obviously necessary. for (GlobalObject &GO : M.global_objects()) { Changed |= RemoveUnusedGlobalValue(GO); @@ -257,8 +398,17 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { }; NumFunctions += DeadFunctions.size(); - for (Function *F : DeadFunctions) + for (Function *F : DeadFunctions) { + if (!F->use_empty()) { + // Virtual functions might still be referenced by one or more vtables, + // but if we've proven them to be unused then it's safe to replace the + // virtual function pointers with null, allowing us to remove the + // function itself. + ++NumVFuncs; + F->replaceNonMetadataUsesWith(ConstantPointerNull::get(F->getType())); + } EraseUnusedGlobalValue(F); + } NumVariables += DeadGlobalVars.size(); for (GlobalVariable *GV : DeadGlobalVars) @@ -277,6 +427,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { ConstantDependenciesCache.clear(); GVDependencies.clear(); ComdatMembers.clear(); + TypeIdMap.clear(); + VFESafeVTables.clear(); if (Changed) return PreservedAnalyses::none(); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index c4fb3ce77f6e..819715b9f8da 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -155,7 +155,8 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { /// Given a value that is stored to a global but never read, determine whether /// it's safe to remove the store and the chain of computation that feeds the /// store. -static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { +static bool IsSafeComputationToRemove( + Value *V, function_ref GetTLI) { do { if (isa(V)) return true; @@ -164,7 +165,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { if (isa(V) || isa(V) || isa(V) || isa(V)) return false; - if (isAllocationFn(V, TLI)) + if (isAllocationFn(V, GetTLI)) return true; Instruction *I = cast(V); @@ -184,8 +185,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { /// This GV is a pointer root. Loop over all users of the global and clean up /// any that obviously don't assign the global a value that isn't dynamically /// allocated. -static bool CleanupPointerRootUsers(GlobalVariable *GV, - const TargetLibraryInfo *TLI) { +static bool +CleanupPointerRootUsers(GlobalVariable *GV, + function_ref GetTLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory // usage over time. The common strategy for leak checkers is to whitelist the @@ -241,18 +243,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, C->destroyConstant(); // This could have invalidated UI, start over from scratch. Dead.clear(); - CleanupPointerRootUsers(GV, TLI); + CleanupPointerRootUsers(GV, GetTLI); return true; } } } for (int i = 0, e = Dead.size(); i != e; ++i) { - if (IsSafeComputationToRemove(Dead[i].first, TLI)) { + if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) { Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { - if (isAllocationFn(I, TLI)) + if (isAllocationFn(I, GetTLI)) break; Instruction *J = dyn_cast(I->getOperand(0)); if (!J) @@ -270,9 +272,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, /// We just marked GV constant. Loop over all users of the global, cleaning up /// the obvious ones. This is largely just a quick scan over the use list to /// clean up the easy and obvious cruft. This returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool CleanupConstantGlobalUsers( + Value *V, Constant *Init, const DataLayout &DL, + function_ref GetTLI) { bool Changed = false; // Note that we need to use a weak value handle for the worklist items. When // we delete a constant array, we may also be holding pointer to one of its @@ -302,12 +304,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = nullptr; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI); + Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI); } else if ((CE->getOpcode() == Instruction::BitCast && CE->getType()->isPointerTy()) || CE->getOpcode() == Instruction::AddrSpaceCast) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI); + Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI); } if (CE->use_empty()) { @@ -321,7 +323,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = nullptr; if (!isa(GEP->getOperand(0))) { ConstantExpr *CE = dyn_cast_or_null( - ConstantFoldInstruction(GEP, DL, TLI)); + ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction()))); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -331,7 +333,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, if (Init && isa(Init) && GEP->isInBounds()) SubInit = Constant::getNullValue(GEP->getResultElementType()); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -348,7 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // us, and if they are all dead, nuke them without remorse. if (isSafeToDestroyConstant(C)) { C->destroyConstant(); - CleanupConstantGlobalUsers(V, Init, DL, TLI); + CleanupConstantGlobalUsers(V, Init, DL, GetTLI); return true; } } @@ -495,8 +497,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // had 256 byte alignment for example, something might depend on that: // propagate info to each field. uint64_t FieldOffset = Layout.getElementOffset(i); - unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); - if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i))) + Align NewAlign(MinAlign(StartAlignment, FieldOffset)); + if (NewAlign > Align(DL.getABITypeAlignment(STy->getElementType(i)))) NGV->setAlignment(NewAlign); // Copy over the debug info for the variable. @@ -511,7 +513,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { NewGlobals.reserve(NumElements); auto ElTy = STy->getElementType(); uint64_t EltSize = DL.getTypeAllocSize(ElTy); - unsigned EltAlign = DL.getABITypeAlignment(ElTy); + Align EltAlign(DL.getABITypeAlignment(ElTy)); uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = Init->getAggregateElement(i); @@ -530,7 +532,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. - unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); + Align NewAlign(MinAlign(StartAlignment, EltSize * i)); if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits, @@ -745,9 +747,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// are uses of the loaded value that would trap if the loaded value is /// dynamically null, then we know that they cannot be reachable with a null /// optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool OptimizeAwayTrappingUsesOfLoads( + GlobalVariable *GV, Constant *LV, const DataLayout &DL, + function_ref GetTLI) { bool Changed = false; // Keep track of whether we are able to remove all the uses of the global @@ -793,10 +795,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // nor is the global. if (AllNonStoreUsesGone) { if (isLeakCheckerRoot(GV)) { - Changed |= CleanupPointerRootUsers(GV, TLI); + Changed |= CleanupPointerRootUsers(GV, GetTLI); } else { Changed = true; - CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); + CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI); } if (GV->use_empty()) { LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); @@ -889,8 +891,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, while (!GV->use_empty()) { if (StoreInst *SI = dyn_cast(GV->user_back())) { // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, - SI->getOrdering(), SI->getSyncScopeID(), SI); + new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, + None, SI->getOrdering(), SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -907,7 +909,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool->getValueType(), InitBool, - InitBool->getName() + ".val", false, 0, + InitBool->getName() + ".val", false, None, LI->getOrdering(), LI->getSyncScopeID(), LI->isUnordered() ? (Instruction *)ICI : LI); InitBoolUsed = true; @@ -1562,10 +1564,10 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, // Try to optimize globals based on the knowledge that only one value (besides // its initializer) is ever stored to the global. -static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, - AtomicOrdering Ordering, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool +optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, + AtomicOrdering Ordering, const DataLayout &DL, + function_ref GetTLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1583,9 +1585,10 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI)) return true; - } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { + } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) { + auto *TLI = &GetTLI(*CI->getFunction()); Type *MallocType = getMallocAllocatedType(CI, TLI); if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, DL, TLI)) @@ -1643,10 +1646,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // instead of a select to synthesize the desired value. bool IsOneZero = false; bool EmitOneOrZero = true; - if (ConstantInt *CI = dyn_cast(OtherVal)){ + auto *CI = dyn_cast(OtherVal); + if (CI && CI->getValue().getActiveBits() <= 64) { IsOneZero = InitVal->isNullValue() && CI->isOne(); - if (ConstantInt *CIInit = dyn_cast(GV->getInitializer())){ + auto *CIInit = dyn_cast(GV->getInitializer()); + if (CIInit && CIInit->getValue().getActiveBits() <= 64) { uint64_t ValInit = CIInit->getZExtValue(); uint64_t ValOther = CI->getZExtValue(); uint64_t ValMinus = ValOther - ValInit; @@ -1711,7 +1716,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. StoreVal = new LoadInst(NewGV->getValueType(), NewGV, - LI->getName() + ".b", false, 0, + LI->getName() + ".b", false, None, LI->getOrdering(), LI->getSyncScopeID(), LI); } else { assert((isa(StoredVal) || isa(StoredVal)) && @@ -1721,15 +1726,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } } StoreInst *NSI = - new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(), + new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(), SI->getSyncScopeID(), SI); NSI->setDebugLoc(SI->getDebugLoc()); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast(UI); - LoadInst *NLI = - new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b", - false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI); + LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV, + LI->getName() + ".b", false, None, + LI->getOrdering(), LI->getSyncScopeID(), LI); Instruction *NSI; if (IsOneZero) NSI = new ZExtInst(NLI, LI->getType(), "", LI); @@ -1914,9 +1919,10 @@ static void makeAllConstantUsesInstructions(Constant *C) { /// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -static bool processInternalGlobal( - GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI, - function_ref LookupDomTree) { +static bool +processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, + function_ref GetTLI, + function_ref LookupDomTree) { auto &DL = GV->getParent()->getDataLayout(); // If this is a first class global and has only one accessing function and // this function is non-recursive, we replace the global with a local alloca @@ -1963,11 +1969,12 @@ static bool processInternalGlobal( bool Changed; if (isLeakCheckerRoot(GV)) { // Delete any constant stores to the global. - Changed = CleanupPointerRootUsers(GV, TLI); + Changed = CleanupPointerRootUsers(GV, GetTLI); } else { // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + Changed = + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); } // If the global is dead now, delete it. @@ -1989,7 +1996,7 @@ static bool processInternalGlobal( GV->setConstant(true); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); // If the global is dead now, just nuke it. if (GV->use_empty()) { @@ -2019,7 +2026,7 @@ static bool processInternalGlobal( GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); if (GV->use_empty()) { LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to " @@ -2033,7 +2040,8 @@ static bool processInternalGlobal( // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. - if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI)) + if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, + GetTLI)) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a @@ -2054,7 +2062,8 @@ static bool processInternalGlobal( /// Analyze the specified global variable and optimize it if possible. If we /// make a change, return true. static bool -processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, +processGlobal(GlobalValue &GV, + function_ref GetTLI, function_ref LookupDomTree) { if (GV.getName().startswith("llvm.")) return false; @@ -2086,7 +2095,7 @@ processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, if (GVar->isConstant() || !GVar->hasInitializer()) return Changed; - return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed; + return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed; } /// Walk all of the direct calls of the specified function, changing them to @@ -2234,7 +2243,8 @@ hasOnlyColdCalls(Function &F, } static bool -OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, +OptimizeFunctions(Module &M, + function_ref GetTLI, function_ref GetTTI, function_ref GetBFI, function_ref LookupDomTree, @@ -2275,17 +2285,13 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, // So, remove unreachable blocks from the function, because a) there's // no point in analyzing them and b) GlobalOpt should otherwise grow // some more complicated logic to break these cycles. - // Removing unreachable blocks might invalidate the dominator so we - // recalculate it. if (!F->isDeclaration()) { - if (removeUnreachableBlocks(*F)) { - auto &DT = LookupDomTree(*F); - DT.recalculate(*F); - Changed = true; - } + auto &DT = LookupDomTree(*F); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + Changed |= removeUnreachableBlocks(*F, &DTU); } - Changed |= processGlobal(*F, TLI, LookupDomTree); + Changed |= processGlobal(*F, GetTLI, LookupDomTree); if (!F->hasLocalLinkage()) continue; @@ -2342,7 +2348,8 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, } static bool -OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, +OptimizeGlobalVars(Module &M, + function_ref GetTLI, function_ref LookupDomTree, SmallPtrSetImpl &NotDiscardableComdats) { bool Changed = false; @@ -2357,7 +2364,10 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, if (GV->hasInitializer()) if (auto *C = dyn_cast(GV->getInitializer())) { auto &DL = M.getDataLayout(); - Constant *New = ConstantFoldConstant(C, DL, TLI); + // TLI is not used in the case of a Constant, so use default nullptr + // for that optional parameter, since we don't have a Function to + // provide GetTLI anyway. + Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); if (New && New != C) GV->setInitializer(New); } @@ -2367,7 +2377,7 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, continue; } - Changed |= processGlobal(*GV, TLI, LookupDomTree); + Changed |= processGlobal(*GV, GetTLI, LookupDomTree); } return Changed; } @@ -2581,8 +2591,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, } static int compareNames(Constant *const *A, Constant *const *B) { - Value *AStripped = (*A)->stripPointerCastsNoFollowAliases(); - Value *BStripped = (*B)->stripPointerCastsNoFollowAliases(); + Value *AStripped = (*A)->stripPointerCasts(); + Value *BStripped = (*B)->stripPointerCasts(); return AStripped->getName().compare(BStripped->getName()); } @@ -2809,7 +2819,14 @@ OptimizeGlobalAliases(Module &M, return Changed; } -static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { +static Function * +FindCXAAtExit(Module &M, function_ref GetTLI) { + // Hack to get a default TLI before we have actual Function. + auto FuncIter = M.begin(); + if (FuncIter == M.end()) + return nullptr; + auto *TLI = &GetTLI(*FuncIter); + LibFunc F = LibFunc_cxa_atexit; if (!TLI->has(F)) return nullptr; @@ -2818,6 +2835,9 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { if (!Fn) return nullptr; + // Now get the actual TLI for Fn. + TLI = &GetTLI(*Fn); + // Make sure that the function has the correct prototype. if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit) return nullptr; @@ -2889,7 +2909,8 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { } static bool optimizeGlobalsInModule( - Module &M, const DataLayout &DL, TargetLibraryInfo *TLI, + Module &M, const DataLayout &DL, + function_ref GetTLI, function_ref GetTTI, function_ref GetBFI, function_ref LookupDomTree) { @@ -2914,24 +2935,24 @@ static bool optimizeGlobalsInModule( NotDiscardableComdats.insert(C); // Delete functions that are trivially dead, ccc -> fastcc - LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree, + LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, NotDiscardableComdats); // Optimize global_ctors list. LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { - return EvaluateStaticConstructor(F, DL, TLI); + return EvaluateStaticConstructor(F, DL, &GetTLI(*F)); }); // Optimize non-address-taken globals. - LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree, - NotDiscardableComdats); + LocalChange |= + OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats); // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); // Try to remove trivial global destructors if they are not removed // already. - Function *CXAAtExitFn = FindCXAAtExit(M, TLI); + Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI); if (CXAAtExitFn) LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); @@ -2946,12 +2967,14 @@ static bool optimizeGlobalsInModule( PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { auto &DL = M.getDataLayout(); - auto &TLI = AM.getResult(M); auto &FAM = AM.getResult(M).getManager(); auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ return FAM.getResult(F); }; + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult(F); }; @@ -2960,7 +2983,7 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { return FAM.getResult(F); }; - if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree)) + if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -2979,10 +3002,12 @@ struct GlobalOptLegacyPass : public ModulePass { return false; auto &DL = M.getDataLayout(); - auto *TLI = &getAnalysis().getTLI(); auto LookupDomTree = [this](Function &F) -> DominatorTree & { return this->getAnalysis(F).getDomTree(); }; + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; auto GetTTI = [this](Function &F) -> TargetTransformInfo & { return this->getAnalysis().getTTI(F); }; @@ -2991,7 +3016,8 @@ struct GlobalOptLegacyPass : public ModulePass { return this->getAnalysis(F).getBFI(); }; - return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree); + return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, + LookupDomTree); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp index ab1a9a79cad6..cfdcc8db7f50 100644 --- a/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/lib/Transforms/IPO/HotColdSplitting.cpp @@ -85,12 +85,6 @@ static cl::opt "multiple of TCC_Basic)")); namespace { - -/// A sequence of basic blocks. -/// -/// A 0-sized SmallVector is slightly cheaper to move than a std::vector. -using BlockSequence = SmallVector; - // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify // this function unless you modify the MBB version as well. // @@ -169,31 +163,6 @@ static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) { return Changed; } -class HotColdSplitting { -public: - HotColdSplitting(ProfileSummaryInfo *ProfSI, - function_ref GBFI, - function_ref GTTI, - std::function *GORE, - function_ref LAC) - : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {} - bool run(Module &M); - -private: - bool isFunctionCold(const Function &F) const; - bool shouldOutlineFrom(const Function &F) const; - bool outlineColdRegions(Function &F, bool HasProfileSummary); - Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT, - BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, - AssumptionCache *AC, unsigned Count); - ProfileSummaryInfo *PSI; - function_ref GetBFI; - function_ref GetTTI; - std::function *GetORE; - function_ref LookupAC; -}; - class HotColdSplittingLegacyPass : public ModulePass { public: static char ID; @@ -321,13 +290,10 @@ static int getOutliningPenalty(ArrayRef Region, return Penalty; } -Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, - DominatorTree &DT, - BlockFrequencyInfo *BFI, - TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, - AssumptionCache *AC, - unsigned Count) { +Function *HotColdSplitting::extractColdRegion( + const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC, + DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, + OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) { assert(!Region.empty()); // TODO: Pass BFI and BPI to update profile information. @@ -349,7 +315,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, return nullptr; Function *OrigF = Region[0]->getParent(); - if (Function *OutF = CE.extractCodeRegion()) { + if (Function *OutF = CE.extractCodeRegion(CEAC)) { User *U = *OutF->user_begin(); CallInst *CI = cast(U); CallSite CS(CI); @@ -607,9 +573,9 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { }); if (!DT) - DT = make_unique(F); + DT = std::make_unique(F); if (!PDT) - PDT = make_unique(F); + PDT = std::make_unique(F); auto Regions = OutliningRegion::create(*BB, *DT, *PDT); for (OutliningRegion &Region : Regions) { @@ -637,9 +603,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { } } + if (OutliningWorklist.empty()) + return Changed; + // Outline single-entry cold regions, splitting up larger regions as needed. unsigned OutlinedFunctionID = 1; - while (!OutliningWorklist.empty()) { + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(F); + do { OutliningRegion Region = OutliningWorklist.pop_back_val(); assert(!Region.empty() && "Empty outlining region in worklist"); do { @@ -650,14 +621,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { BB->dump(); }); - Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC, - OutlinedFunctionID); + Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI, + ORE, AC, OutlinedFunctionID); if (Outlined) { ++OutlinedFunctionID; Changed = true; } } while (!Region.empty()); - } + } while (!OutliningWorklist.empty()); return Changed; } diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 34db75dd8b03..bddf75211599 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -114,6 +114,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createIPSCCPPass()); } +void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createMergeFunctionsPass()); +} + void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { auto PreserveMain = [=](const GlobalValue &GV) { return AllButMain && GV.getName() == "main"; @@ -121,6 +125,15 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { unwrap(PM)->add(createInternalizePass(PreserveMain)); } +void LLVMAddInternalizePassWithMustPreservePredicate( + LLVMPassManagerRef PM, + void *Context, + LLVMBool (*Pred)(LLVMValueRef, void *)) { + unwrap(PM)->add(createInternalizePass([=](const GlobalValue &GV) { + return Pred(wrap(&GV), Context) == 0 ? false : true; + })); +} + void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createStripDeadPrototypesPass()); } diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp index 7f5511e008e1..d1a68b28bd33 100644 --- a/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -18,24 +18,28 @@ using namespace llvm; #define DEBUG_TYPE "inferattrs" -static bool inferAllPrototypeAttributes(Module &M, - const TargetLibraryInfo &TLI) { +static bool inferAllPrototypeAttributes( + Module &M, function_ref GetTLI) { bool Changed = false; for (Function &F : M.functions()) // We only infer things using the prototype and the name; we don't need // definitions. if (F.isDeclaration() && !F.hasOptNone()) - Changed |= inferLibFuncAttributes(F, TLI); + Changed |= inferLibFuncAttributes(F, GetTLI(F)); return Changed; } PreservedAnalyses InferFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { - auto &TLI = AM.getResult(M); + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; - if (!inferAllPrototypeAttributes(M, TLI)) + if (!inferAllPrototypeAttributes(M, GetTLI)) // If we didn't infer anything, preserve all analyses. return PreservedAnalyses::all(); @@ -60,8 +64,10 @@ struct InferFunctionAttrsLegacyPass : public ModulePass { if (skipModule(M)) return false; - auto &TLI = getAnalysis().getTLI(); - return inferAllPrototypeAttributes(M, TLI); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + return inferAllPrototypeAttributes(M, GetTLI); } }; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 945f8affae6e..4b72261131c1 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -239,7 +239,7 @@ static void mergeInlinedArrayAllocas( } if (Align1 > Align2) - AvailableAlloca->setAlignment(AI->getAlignment()); + AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment())); } AI->eraseFromParent(); @@ -527,7 +527,8 @@ static void setInlineRemark(CallSite &CS, StringRef message) { static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function GetAssumptionCache, - ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, + ProfileSummaryInfo *PSI, + std::function GetTLI, bool InsertLifetime, function_ref GetInlineCost, function_ref AARGetter, @@ -626,7 +627,8 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, Instruction *Instr = CS.getInstruction(); - bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); + bool IsTriviallyDead = + isInstructionTriviallyDead(Instr, &GetTLI(*Caller)); int InlineHistoryID; if (!IsTriviallyDead) { @@ -757,13 +759,16 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); ACT = &getAnalysis(); PSI = &getAnalysis().getPSI(); - auto &TLI = getAnalysis().getTLI(); + auto GetTLI = [&](Function &F) -> TargetLibraryInfo & { + return getAnalysis().getTLI(F); + }; auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime, - [this](CallSite CS) { return getInlineCost(CS); }, - LegacyAARGetter(*this), ImportedFunctionsStats); + return inlineCallsImpl( + SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, + [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this), + ImportedFunctionsStats); } /// Remove now-dead linkonce functions at the end of @@ -879,7 +884,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = - llvm::make_unique(); + std::make_unique(); ImportedFunctionsStats->setModuleInfo(M); } diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 91c7b5f5f135..add2ae053735 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -141,10 +141,12 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (NumLoops == 0) return Changed; --NumLoops; AssumptionCache *AC = nullptr; + Function &Func = *L->getHeader()->getParent(); if (auto *ACT = getAnalysisIfAvailable()) - AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent()); + AC = ACT->lookupAssumptionCache(Func); + CodeExtractorAnalysisCache CEAC(Func); CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC); - if (Extractor.extractCodeRegion() != nullptr) { + if (Extractor.extractCodeRegion(CEAC) != nullptr) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index f7371284f47e..2dec366d70e2 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -230,6 +230,16 @@ void ByteArrayBuilder::allocate(const std::set &Bits, Bytes[AllocByteOffset + B] |= AllocMask; } +bool lowertypetests::isJumpTableCanonical(Function *F) { + if (F->isDeclarationForLinker()) + return false; + auto *CI = mdconst::extract_or_null( + F->getParent()->getModuleFlag("CFI Canonical Jump Tables")); + if (!CI || CI->getZExtValue() != 0) + return true; + return F->hasFnAttribute("cfi-canonical-jump-table"); +} + namespace { struct ByteArrayInfo { @@ -251,9 +261,12 @@ class GlobalTypeMember final : TrailingObjects { GlobalObject *GO; size_t NTypes; - // For functions: true if this is a definition (either in the merged module or - // in one of the thinlto modules). - bool IsDefinition; + // For functions: true if the jump table is canonical. This essentially means + // whether the canonical address (i.e. the symbol table entry) of the function + // is provided by the local jump table. This is normally the same as whether + // the function is defined locally, but if canonical jump tables are disabled + // by the user then the jump table never provides a canonical definition. + bool IsJumpTableCanonical; // For functions: true if this function is either defined or used in a thinlto // module and its jumptable entry needs to be exported to thinlto backends. @@ -263,13 +276,13 @@ class GlobalTypeMember final : TrailingObjects { public: static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, - bool IsDefinition, bool IsExported, + bool IsJumpTableCanonical, bool IsExported, ArrayRef Types) { auto *GTM = static_cast(Alloc.Allocate( totalSizeToAlloc(Types.size()), alignof(GlobalTypeMember))); GTM->GO = GO; GTM->NTypes = Types.size(); - GTM->IsDefinition = IsDefinition; + GTM->IsJumpTableCanonical = IsJumpTableCanonical; GTM->IsExported = IsExported; std::uninitialized_copy(Types.begin(), Types.end(), GTM->getTrailingObjects()); @@ -280,8 +293,8 @@ public: return GO; } - bool isDefinition() const { - return IsDefinition; + bool isJumpTableCanonical() const { + return IsJumpTableCanonical; } bool isExported() const { @@ -320,6 +333,49 @@ private: size_t NTargets; }; +struct ScopedSaveAliaseesAndUsed { + Module &M; + SmallPtrSet Used, CompilerUsed; + std::vector> FunctionAliases; + + ScopedSaveAliaseesAndUsed(Module &M) : M(M) { + // The users of this class want to replace all function references except + // for aliases and llvm.used/llvm.compiler.used with references to a jump + // table. We avoid replacing aliases in order to avoid introducing a double + // indirection (or an alias pointing to a declaration in ThinLTO mode), and + // we avoid replacing llvm.used/llvm.compiler.used because these global + // variables describe properties of the global, not the jump table (besides, + // offseted references to the jump table in llvm.used are invalid). + // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly + // indirect) users", so what we do is save the list of globals referenced by + // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW + // replace the aliasees and then set them back to their original values at + // the end. + if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false)) + GV->eraseFromParent(); + if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true)) + GV->eraseFromParent(); + + for (auto &GIS : concat(M.aliases(), M.ifuncs())) { + // FIXME: This should look past all aliases not just interposable ones, + // see discussion on D65118. + if (auto *F = + dyn_cast(GIS.getIndirectSymbol()->stripPointerCasts())) + FunctionAliases.push_back({&GIS, F}); + } + } + + ~ScopedSaveAliaseesAndUsed() { + appendToUsed(M, std::vector(Used.begin(), Used.end())); + appendToCompilerUsed(M, std::vector(CompilerUsed.begin(), + CompilerUsed.end())); + + for (auto P : FunctionAliases) + P.first->setIndirectSymbol( + ConstantExpr::getBitCast(P.second, P.first->getType())); + } +}; + class LowerTypeTestsModule { Module &M; @@ -387,7 +443,8 @@ class LowerTypeTestsModule { uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); void importTypeTest(CallInst *CI); - void importFunction(Function *F, bool isDefinition); + void importFunction(Function *F, bool isJumpTableCanonical, + std::vector &AliasesToErase); BitSetInfo buildBitSet(Metadata *TypeId, @@ -421,7 +478,8 @@ class LowerTypeTestsModule { ArrayRef Globals, ArrayRef ICallBranchFunnels); - void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, bool IsDefinition); + void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, + bool IsJumpTableCanonical); void moveInitializerToModuleConstructor(GlobalVariable *GV); void findGlobalVariableUsersOf(Constant *C, SmallSetVector &Out); @@ -433,7 +491,7 @@ class LowerTypeTestsModule { /// the block. 'This's use list is expected to have at least one element. /// Unlike replaceAllUsesWith this function skips blockaddr and direct call /// uses. - void replaceCfiUses(Function *Old, Value *New, bool IsDefinition); + void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical); /// replaceDirectCalls - Go through the uses list for this definition and /// replace each use, which is a direct function call. @@ -759,43 +817,50 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( // Build a new global with the combined contents of the referenced globals. // This global is a struct whose even-indexed elements contain the original // contents of the referenced globals and whose odd-indexed elements contain - // any padding required to align the next element to the next power of 2. + // any padding required to align the next element to the next power of 2 plus + // any additional padding required to meet its alignment requirements. std::vector GlobalInits; const DataLayout &DL = M.getDataLayout(); + DenseMap GlobalLayout; + Align MaxAlign; + uint64_t CurOffset = 0; + uint64_t DesiredPadding = 0; for (GlobalTypeMember *G : Globals) { - GlobalVariable *GV = cast(G->getGlobal()); + auto *GV = cast(G->getGlobal()); + MaybeAlign Alignment(GV->getAlignment()); + if (!Alignment) + Alignment = Align(DL.getABITypeAlignment(GV->getValueType())); + MaxAlign = std::max(MaxAlign, *Alignment); + uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment); + GlobalLayout[G] = GVOffset; + if (GVOffset != 0) { + uint64_t Padding = GVOffset - CurOffset; + GlobalInits.push_back( + ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + } + GlobalInits.push_back(GV->getInitializer()); uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); + CurOffset = GVOffset + InitSize; - // Compute the amount of padding required. - uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; + // Compute the amount of padding that we'd like for the next element. + DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize; // Experiments of different caps with Chromium on both x64 and ARM64 // have shown that the 32-byte cap generates the smallest binary on // both platforms while different caps yield similar performance. // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html) - if (Padding > 32) - Padding = alignTo(InitSize, 32) - InitSize; - - GlobalInits.push_back( - ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + if (DesiredPadding > 32) + DesiredPadding = alignTo(InitSize, 32) - InitSize; } - if (!GlobalInits.empty()) - GlobalInits.pop_back(); + Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); auto *CombinedGlobal = new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); + CombinedGlobal->setAlignment(MaxAlign); StructType *NewTy = cast(NewInit->getType()); - const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy); - - // Compute the offsets of the original globals within the new global. - DenseMap GlobalLayout; - for (unsigned I = 0; I != Globals.size(); ++I) - // Multiply by 2 to account for padding elements. - GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); - lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout); // Build aliases pointing to offsets into the combined global for each @@ -975,14 +1040,16 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { } // ThinLTO backend: the function F has a jump table entry; update this module -// accordingly. isDefinition describes the type of the jump table entry. -void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { +// accordingly. isJumpTableCanonical describes the type of the jump table entry. +void LowerTypeTestsModule::importFunction( + Function *F, bool isJumpTableCanonical, + std::vector &AliasesToErase) { assert(F->getType()->getAddressSpace() == 0); GlobalValue::VisibilityTypes Visibility = F->getVisibility(); std::string Name = F->getName(); - if (F->isDeclarationForLinker() && isDefinition) { + if (F->isDeclarationForLinker() && isJumpTableCanonical) { // Non-dso_local functions may be overriden at run time, // don't short curcuit them if (F->isDSOLocal()) { @@ -997,12 +1064,13 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { } Function *FDecl; - if (F->isDeclarationForLinker() && !isDefinition) { - // Declaration of an external function. + if (!isJumpTableCanonical) { + // Either a declaration of an external function or a reference to a locally + // defined jump table. FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, F->getAddressSpace(), Name + ".cfi_jt", &M); FDecl->setVisibility(GlobalValue::HiddenVisibility); - } else if (isDefinition) { + } else { F->setName(Name + ".cfi"); F->setLinkage(GlobalValue::ExternalLinkage); FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, @@ -1011,8 +1079,8 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { Visibility = GlobalValue::HiddenVisibility; // Delete aliases pointing to this function, they'll be re-created in the - // merged output - SmallVector ToErase; + // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed + // will want to reset the aliasees first. for (auto &U : F->uses()) { if (auto *A = dyn_cast(U.getUser())) { Function *AliasDecl = Function::Create( @@ -1020,24 +1088,15 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { F->getAddressSpace(), "", &M); AliasDecl->takeName(A); A->replaceAllUsesWith(AliasDecl); - ToErase.push_back(A); + AliasesToErase.push_back(A); } } - for (auto *A : ToErase) - A->eraseFromParent(); - } else { - // Function definition without type metadata, where some other translation - // unit contained a declaration with type metadata. This normally happens - // during mixed CFI + non-CFI compilation. We do nothing with the function - // so that it is treated the same way as a function defined outside of the - // LTO unit. - return; } - if (F->isWeakForLinker()) - replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isDefinition); + if (F->hasExternalWeakLinkage()) + replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical); else - replaceCfiUses(F, FDecl, isDefinition); + replaceCfiUses(F, FDecl, isJumpTableCanonical); // Set visibility late because it's used in replaceCfiUses() to determine // whether uses need to to be replaced. @@ -1225,7 +1284,7 @@ void LowerTypeTestsModule::findGlobalVariableUsersOf( // Replace all uses of F with (F ? JT : 0). void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( - Function *F, Constant *JT, bool IsDefinition) { + Function *F, Constant *JT, bool IsJumpTableCanonical) { // The target expression can not appear in a constant initializer on most // (all?) targets. Switch to a runtime initializer. SmallSetVector GlobalVarUsers; @@ -1239,7 +1298,7 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( Function::Create(cast(F->getValueType()), GlobalValue::ExternalWeakLinkage, F->getAddressSpace(), "", &M); - replaceCfiUses(F, PlaceholderFn, IsDefinition); + replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical); Constant *Target = ConstantExpr::getSelect( ConstantExpr::getICmp(CmpInst::ICMP_NE, F, @@ -1276,8 +1335,9 @@ selectJumpTableArmEncoding(ArrayRef Functions, unsigned ArmCount = 0, ThumbCount = 0; for (const auto GTM : Functions) { - if (!GTM->isDefinition()) { + if (!GTM->isJumpTableCanonical()) { // PLT stubs are always ARM. + // FIXME: This is the wrong heuristic for non-canonical jump tables. ++ArmCount; continue; } @@ -1303,7 +1363,7 @@ void LowerTypeTestsModule::createJumpTable( cast(Functions[I]->getGlobal())); // Align the whole table by entry size. - F->setAlignment(getJumpTableEntrySize()); + F->setAlignment(Align(getJumpTableEntrySize())); // Skip prologue. // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3. // Luckily, this function does not get any prologue even without the @@ -1438,47 +1498,53 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); - // Build aliases pointing to offsets into the jump table, and replace - // references to the original functions with references to the aliases. - for (unsigned I = 0; I != Functions.size(); ++I) { - Function *F = cast(Functions[I]->getGlobal()); - bool IsDefinition = Functions[I]->isDefinition(); - - Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( - ConstantExpr::getInBoundsGetElementPtr( - JumpTableType, JumpTable, - ArrayRef{ConstantInt::get(IntPtrTy, 0), - ConstantInt::get(IntPtrTy, I)}), - F->getType()); - if (Functions[I]->isExported()) { - if (IsDefinition) { - ExportSummary->cfiFunctionDefs().insert(F->getName()); + { + ScopedSaveAliaseesAndUsed S(M); + + // Build aliases pointing to offsets into the jump table, and replace + // references to the original functions with references to the aliases. + for (unsigned I = 0; I != Functions.size(); ++I) { + Function *F = cast(Functions[I]->getGlobal()); + bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical(); + + Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( + ConstantExpr::getInBoundsGetElementPtr( + JumpTableType, JumpTable, + ArrayRef{ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, I)}), + F->getType()); + if (Functions[I]->isExported()) { + if (IsJumpTableCanonical) { + ExportSummary->cfiFunctionDefs().insert(F->getName()); + } else { + GlobalAlias *JtAlias = GlobalAlias::create( + F->getValueType(), 0, GlobalValue::ExternalLinkage, + F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); + JtAlias->setVisibility(GlobalValue::HiddenVisibility); + ExportSummary->cfiFunctionDecls().insert(F->getName()); + } + } + if (!IsJumpTableCanonical) { + if (F->hasExternalWeakLinkage()) + replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, + IsJumpTableCanonical); + else + replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical); } else { - GlobalAlias *JtAlias = GlobalAlias::create( - F->getValueType(), 0, GlobalValue::ExternalLinkage, - F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); - JtAlias->setVisibility(GlobalValue::HiddenVisibility); - ExportSummary->cfiFunctionDecls().insert(F->getName()); + assert(F->getType()->getAddressSpace() == 0); + + GlobalAlias *FAlias = + GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "", + CombinedGlobalElemPtr, &M); + FAlias->setVisibility(F->getVisibility()); + FAlias->takeName(F); + if (FAlias->hasName()) + F->setName(FAlias->getName() + ".cfi"); + replaceCfiUses(F, FAlias, IsJumpTableCanonical); + if (!F->hasLocalLinkage()) + F->setVisibility(GlobalVariable::HiddenVisibility); } } - if (!IsDefinition) { - if (F->isWeakForLinker()) - replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, IsDefinition); - else - replaceCfiUses(F, CombinedGlobalElemPtr, IsDefinition); - } else { - assert(F->getType()->getAddressSpace() == 0); - - GlobalAlias *FAlias = GlobalAlias::create( - F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M); - FAlias->setVisibility(F->getVisibility()); - FAlias->takeName(F); - if (FAlias->hasName()) - F->setName(FAlias->getName() + ".cfi"); - replaceCfiUses(F, FAlias, IsDefinition); - if (!F->hasLocalLinkage()) - F->setVisibility(GlobalVariable::HiddenVisibility); - } } createJumpTable(JumpTableFn, Functions); @@ -1623,7 +1689,7 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + ": "); std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); ExitOnErr(errorCodeToError(EC)); yaml::Output Out(OS); @@ -1643,7 +1709,8 @@ static bool isDirectCall(Use& U) { return false; } -void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefinition) { +void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, + bool IsJumpTableCanonical) { SmallSetVector Constants; auto UI = Old->use_begin(), E = Old->use_end(); for (; UI != E;) { @@ -1655,7 +1722,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi continue; // Skip direct calls to externally defined or non-dso_local functions - if (isDirectCall(U) && (Old->isDSOLocal() || !IsDefinition)) + if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) continue; // Must handle Constants specially, we cannot call replaceUsesOfWith on a @@ -1678,16 +1745,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi } void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { - auto UI = Old->use_begin(), E = Old->use_end(); - for (; UI != E;) { - Use &U = *UI; - ++UI; - - if (!isDirectCall(U)) - continue; - - U.set(New); - } + Old->replaceUsesWithIf(New, [](Use &U) { return isDirectCall(U); }); } bool LowerTypeTestsModule::lower() { @@ -1734,10 +1792,16 @@ bool LowerTypeTestsModule::lower() { Decls.push_back(&F); } - for (auto F : Defs) - importFunction(F, /*isDefinition*/ true); - for (auto F : Decls) - importFunction(F, /*isDefinition*/ false); + std::vector AliasesToErase; + { + ScopedSaveAliaseesAndUsed S(M); + for (auto F : Defs) + importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase); + for (auto F : Decls) + importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase); + } + for (GlobalAlias *GA : AliasesToErase) + GA->eraseFromParent(); return true; } @@ -1823,6 +1887,17 @@ bool LowerTypeTestsModule::lower() { CfiFunctionLinkage Linkage = P.second.Linkage; MDNode *FuncMD = P.second.FuncMD; Function *F = M.getFunction(FunctionName); + if (F && F->hasLocalLinkage()) { + // Locally defined function that happens to have the same name as a + // function defined in a ThinLTO module. Rename it to move it out of + // the way of the external reference that we're about to create. + // Note that setName will find a unique name for the function, so even + // if there is an existing function with the suffix there won't be a + // name collision. + F->setName(F->getName() + ".1"); + F = nullptr; + } + if (!F) F = Function::Create( FunctionType::get(Type::getVoidTy(M.getContext()), false), @@ -1871,24 +1946,26 @@ bool LowerTypeTestsModule::lower() { Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); - bool IsDefinition = !GO.isDeclarationForLinker(); + bool IsJumpTableCanonical = false; bool IsExported = false; if (Function *F = dyn_cast(&GO)) { + IsJumpTableCanonical = isJumpTableCanonical(F); if (ExportedFunctions.count(F->getName())) { - IsDefinition |= ExportedFunctions[F->getName()].Linkage == CFL_Definition; + IsJumpTableCanonical |= + ExportedFunctions[F->getName()].Linkage == CFL_Definition; IsExported = true; // TODO: The logic here checks only that the function is address taken, // not that the address takers are live. This can be updated to check // their liveness and emit fewer jumptable entries once monolithic LTO // builds also emit summaries. } else if (!F->hasAddressTaken()) { - if (!CrossDsoCfi || !IsDefinition || F->hasLocalLinkage()) + if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage()) continue; } } - auto *GTM = - GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); + auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical, + IsExported, Types); GlobalTypeMembers[&GO] = GTM; for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 3a08069dcd4a..8b9abaddc84c 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -769,7 +769,7 @@ void MergeFunctions::writeAlias(Function *F, Function *G) { PtrType->getElementType(), PtrType->getAddressSpace(), G->getLinkage(), "", BitcastF, G->getParent()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + F->setAlignment(MaybeAlign(std::max(F->getAlignment(), G->getAlignment()))); GA->takeName(G); GA->setVisibility(G->getVisibility()); GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); @@ -816,7 +816,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { removeUsers(F); F->replaceAllUsesWith(NewF); - unsigned MaxAlignment = std::max(G->getAlignment(), NewF->getAlignment()); + MaybeAlign MaxAlignment(std::max(G->getAlignment(), NewF->getAlignment())); writeThunkOrAlias(F, G); writeThunkOrAlias(F, NewF); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 733782e8764d..e193074884af 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -409,7 +409,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, return std::unique_ptr(); std::unique_ptr OutliningInfo = - llvm::make_unique(); + std::make_unique(); auto IsSingleEntry = [](SmallVectorImpl &BlockList) { BasicBlock *Dom = BlockList.front(); @@ -589,7 +589,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) { }; std::unique_ptr OutliningInfo = - llvm::make_unique(); + std::make_unique(); BasicBlock *CurrEntry = EntryBlock; bool CandidateFound = false; @@ -966,7 +966,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE, function_ref LookupAC) : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { - ClonedOI = llvm::make_unique(); + ClonedOI = std::make_unique(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; @@ -991,7 +991,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( OptimizationRemarkEmitter &ORE, function_ref LookupAC) : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { - ClonedOMRI = llvm::make_unique(); + ClonedOMRI = std::make_unique(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; @@ -1122,6 +1122,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { BranchProbabilityInfo BPI(*ClonedFunc, LI); ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); + SetVector Inputs, Outputs, Sinks; for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : ClonedOMRI->ORI) { @@ -1148,7 +1151,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { if (Outputs.size() > 0 && !ForceLiveExit) continue; - Function *OutlinedFunc = CE.extractCodeRegion(); + Function *OutlinedFunc = CE.extractCodeRegion(CEAC); if (OutlinedFunc) { CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); @@ -1210,11 +1213,12 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { } // Extract the body of the if. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); Function *OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc), /* AllowVarargs */ true) - .extractCodeRegion(); + .extractCodeRegion(CEAC); if (OutlinedFunc) { BasicBlock *OutliningCallBB = @@ -1264,7 +1268,7 @@ std::pair PartialInlinerImpl::unswitchFunction(Function *F) { if (PSI->isFunctionEntryCold(F)) return {false, nullptr}; - if (empty(F->users())) + if (F->users().empty()) return {false, nullptr}; OptimizationRemarkEmitter ORE(F); @@ -1370,7 +1374,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { return false; } - assert(empty(Cloner.OrigFunc->users()) && + assert(Cloner.OrigFunc->users().empty() && "F's users should all be replaced!"); std::vector Users(Cloner.ClonedFunc->user_begin(), diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 3ea77f08fd3c..5314a8219b1e 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -654,6 +654,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createGlobalsAAWrapperPass()); MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); addExtensionsToPM(EP_VectorizerStart, MPM); diff --git a/lib/Transforms/IPO/SCCP.cpp b/lib/Transforms/IPO/SCCP.cpp index 7be3608bd2ec..307690729b14 100644 --- a/lib/Transforms/IPO/SCCP.cpp +++ b/lib/Transforms/IPO/SCCP.cpp @@ -9,16 +9,18 @@ using namespace llvm; PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) { const DataLayout &DL = M.getDataLayout(); - auto &TLI = AM.getResult(M); auto &FAM = AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = FAM.getResult(F); return { - make_unique(F, DT, FAM.getResult(F)), + std::make_unique(F, DT, FAM.getResult(F)), &DT, FAM.getCachedResult(F)}; }; - if (!runIPSCCP(M, DL, &TLI, getAnalysis)) + if (!runIPSCCP(M, DL, GetTLI, getAnalysis)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -47,14 +49,14 @@ public: if (skipModule(M)) return false; const DataLayout &DL = M.getDataLayout(); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); - + auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = this->getAnalysis(F).getDomTree(); return { - make_unique( + std::make_unique( F, DT, this->getAnalysis().getAssumptionCache( F)), @@ -62,7 +64,7 @@ public: nullptr}; // manager, so set them to nullptr. }; - return runIPSCCP(M, DL, TLI, getAnalysis); + return runIPSCCP(M, DL, GetTLI, getAnalysis); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 877d20e72ffc..6184681db8a2 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -72,6 +72,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include #include #include @@ -79,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +130,12 @@ static cl::opt ProfileSampleAccurate( "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. ")); +static cl::opt ProfileAccurateForSymsInList( + "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, + cl::init(true), + cl::desc("For symbols in profile symbol list, regard their profiles to " + "be accurate. It may be overriden by profile-sample-accurate. ")); + namespace { using BlockWeightMap = DenseMap; @@ -137,9 +145,11 @@ using EdgeWeightMap = DenseMap; using BlockEdgeMap = DenseMap>; +class SampleProfileLoader; + class SampleCoverageTracker { public: - SampleCoverageTracker() = default; + SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){}; bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, uint32_t Discriminator, uint64_t Samples); @@ -185,6 +195,76 @@ private: /// keyed by FunctionSamples pointers, but these stats are cleared after /// every function, so we just need to keep a single counter. uint64_t TotalUsedSamples = 0; + + SampleProfileLoader &SPLoader; +}; + +class GUIDToFuncNameMapper { +public: + GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader, + DenseMap &GUIDToFuncNameMap) + : CurrentReader(Reader), CurrentModule(M), + CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { + if (CurrentReader.getFormat() != SPF_Compact_Binary) + return; + + for (const auto &F : CurrentModule) { + StringRef OrigName = F.getName(); + CurrentGUIDToFuncNameMap.insert( + {Function::getGUID(OrigName), OrigName}); + + // Local to global var promotion used by optimization like thinlto + // will rename the var and add suffix like ".llvm.xxx" to the + // original local name. In sample profile, the suffixes of function + // names are all stripped. Since it is possible that the mapper is + // built in post-thin-link phase and var promotion has been done, + // we need to add the substring of function name without the suffix + // into the GUIDToFuncNameMap. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (CanonName != OrigName) + CurrentGUIDToFuncNameMap.insert( + {Function::getGUID(CanonName), CanonName}); + } + + // Update GUIDToFuncNameMap for each function including inlinees. + SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap); + } + + ~GUIDToFuncNameMapper() { + if (CurrentReader.getFormat() != SPF_Compact_Binary) + return; + + CurrentGUIDToFuncNameMap.clear(); + + // Reset GUIDToFuncNameMap for of each function as they're no + // longer valid at this point. + SetGUIDToFuncNameMapForAll(nullptr); + } + +private: + void SetGUIDToFuncNameMapForAll(DenseMap *Map) { + std::queue FSToUpdate; + for (auto &IFS : CurrentReader.getProfiles()) { + FSToUpdate.push(&IFS.second); + } + + while (!FSToUpdate.empty()) { + FunctionSamples *FS = FSToUpdate.front(); + FSToUpdate.pop(); + FS->GUIDToFuncNameMap = Map; + for (const auto &ICS : FS->getCallsiteSamples()) { + const FunctionSamplesMap &FSMap = ICS.second; + for (auto &IFS : FSMap) { + FunctionSamples &FS = const_cast(IFS.second); + FSToUpdate.push(&FS); + } + } + } + } + + SampleProfileReader &CurrentReader; + Module &CurrentModule; + DenseMap &CurrentGUIDToFuncNameMap; }; /// Sample profile pass. @@ -199,8 +279,9 @@ public: std::function GetAssumptionCache, std::function GetTargetTransformInfo) : GetAC(std::move(GetAssumptionCache)), - GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), - RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {} + GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), + Filename(Name), RemappingFilename(RemapName), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -209,6 +290,8 @@ public: void dump() { Reader->dump(); } protected: + friend class SampleCoverageTracker; + bool runOnFunction(Function &F, ModuleAnalysisManager *AM); unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F); @@ -237,6 +320,8 @@ protected: bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); void clearFunctionData(); + bool callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI); /// Map basic blocks to their computed weights. /// @@ -310,6 +395,10 @@ protected: /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; + /// Profle Symbol list tells whether a function name appears in the binary + /// used to generate the current profile. + std::unique_ptr PSL; + /// Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed @@ -326,6 +415,21 @@ protected: uint64_t entryCount; }; DenseMap notInlinedCallInfo; + + // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for + // all the function symbols defined or declared in current module. + DenseMap GUIDToFuncNameMap; + + // All the Names used in FunctionSamples including outline function + // names, inline instance names and call target names. + StringSet<> NamesInProfile; + + // For symbol in profile symbol list, whether to regard their profiles + // to be accurate. It is mainly decided by existance of profile symbol + // list and -profile-accurate-for-symsinlist flag, but it can be + // overriden by -profile-sample-accurate or profile-sample-accurate + // attribute. + bool ProfAccForSymsInList; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -381,14 +485,23 @@ private: /// To decide whether an inlined callsite is hot, we compare the callsite /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is /// regarded as hot if the count is above the cutoff value. -static bool callsiteIsHot(const FunctionSamples *CallsiteFS, - ProfileSummaryInfo *PSI) { +/// +/// When ProfileAccurateForSymsInList is enabled and profile symbol list +/// is present, functions in the profile symbol list but without profile will +/// be regarded as cold and much less inlining will happen in CGSCC inlining +/// pass, so we tend to lower the hot criteria here to allow more early +/// inlining to happen for warm callsites and it is helpful for performance. +bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI) { if (!CallsiteFS) return false; // The callsite was not inlined in the original binary. assert(PSI && "PSI is expected to be non null"); uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); - return PSI->isHotCount(CallsiteTotalSamples); + if (ProfAccForSymsInList) + return !PSI->isColdCount(CallsiteTotalSamples); + else + return PSI->isHotCount(CallsiteTotalSamples); } /// Mark as used the sample record for the given function samples at @@ -425,7 +538,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countUsedRecords(CalleeSamples, PSI); } @@ -444,7 +557,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countBodyRecords(CalleeSamples, PSI); } @@ -465,7 +578,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Total += countBodySamples(CalleeSamples, PSI); } @@ -788,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunctions( Function &F, DenseSet &InlinedGUIDs) { DenseSet PromotedInsns; + // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure + // Profile symbol list is ignored when profile-sample-accurate is on. + assert((!ProfAccForSymsInList || + (!ProfileSampleAccurate && + !F.hasFnAttribute("profile-sample-accurate"))) && + "ProfAccForSymsInList should be false when profile-sample-accurate " + "is enabled"); + DenseMap localNotInlinedCallSites; bool Changed = false; while (true) { @@ -1219,17 +1340,12 @@ void SampleProfileLoader::buildEdges(Function &F) { } /// Returns the sorted CallTargetMap \p M by count in descending order. -static SmallVector SortCallTargets( - const SampleRecord::CallTargetMap &M) { +static SmallVector GetSortedValueDataFromCallTargets( + const SampleRecord::CallTargetMap & M) { SmallVector R; - for (auto I = M.begin(); I != M.end(); ++I) - R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()}); - llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) { - if (L.Count == R.Count) - return L.Value > R.Value; - else - return L.Count > R.Count; - }); + for (const auto &I : SampleRecord::SortCallTargets(M)) { + R.emplace_back(InstrProfValueData{FunctionSamples::getGUID(I.first), I.second}); + } return R; } @@ -1324,7 +1440,7 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (!T || T.get().empty()) continue; SmallVector SortedCallTargets = - SortCallTargets(T.get()); + GetSortedValueDataFromCallTargets(T.get()); uint64_t Sum; findIndirectCallFunctionSamples(I, Sum); annotateValueSite(*I.getParent()->getParent()->getParent(), I, @@ -1374,6 +1490,8 @@ void SampleProfileLoader::propagateWeights(Function &F) { } } + misexpect::verifyMisExpect(TI, Weights, TI->getContext()); + uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. @@ -1557,30 +1675,29 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", bool SampleProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); - auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx); + + std::unique_ptr RemapReader; + auto ReaderOrErr = + SampleProfileReader::create(Filename, Ctx, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); return false; } Reader = std::move(ReaderOrErr.get()); - Reader->collectFuncsToUse(M); + Reader->collectFuncsFrom(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); - - if (!RemappingFilename.empty()) { - // Apply profile remappings to the loaded profile data if requested. - // For now, we only support remapping symbols encoded using the Itanium - // C++ ABI's name mangling scheme. - ReaderOrErr = SampleProfileReaderItaniumRemapper::create( - RemappingFilename, Ctx, std::move(Reader)); - if (std::error_code EC = ReaderOrErr.getError()) { - std::string Msg = "Could not open profile remapping file: " + EC.message(); - Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); - return false; - } - Reader = std::move(ReaderOrErr.get()); - ProfileIsValid = (Reader->read() == sampleprof_error::success); + PSL = Reader->getProfileSymbolList(); + + // While profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = + ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate; + if (ProfAccForSymsInList) { + NamesInProfile.clear(); + if (auto NameTable = Reader->getNameTable()) + NamesInProfile.insert(NameTable->begin(), NameTable->end()); } + return true; } @@ -1594,7 +1711,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI) { - FunctionSamples::GUIDToFuncNameMapper Mapper(M); + GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); if (!ProfileIsValid) return false; @@ -1651,19 +1768,48 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { - + DILocation2SampleMap.clear(); // By default the entry count is initialized to -1, which will be treated // conservatively by getEntryCount as the same as unknown (None). This is // to avoid newly added code to be treated as cold. If we have samples // this will be overwritten in emitAnnotations. - // If ProfileSampleAccurate is true or F has profile-sample-accurate - // attribute, initialize the entry count to 0 so callsites or functions - // unsampled will be treated as cold. - uint64_t initialEntryCount = - (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) - ? 0 - : -1; + uint64_t initialEntryCount = -1; + + ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL; + if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) { + // initialize all the function entry counts to 0. It means all the + // functions without profile will be regarded as cold. + initialEntryCount = 0; + // profile-sample-accurate is a user assertion which has a higher precedence + // than symbol list. When profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = false; + } + + // PSL -- profile symbol list include all the symbols in sampled binary. + // If ProfileAccurateForSymsInList is enabled, PSL is used to treat + // old functions without samples being cold, without having to worry + // about new and hot functions being mistakenly treated as cold. + if (ProfAccForSymsInList) { + // Initialize the entry count to 0 for functions in the list. + if (PSL->contains(F.getName())) + initialEntryCount = 0; + + // Function in the symbol list but without sample will be regarded as + // cold. To minimize the potential negative performance impact it could + // have, we want to be a little conservative here saying if a function + // shows up in the profile, no matter as outline function, inline instance + // or call targets, treat the function as not being cold. This will handle + // the cases such as most callsites of a function are inlined in sampled + // binary but not inlined in current build (because of source code drift, + // imprecise debug information, or the callsites are all cold individually + // but not cold accumulatively...), so the outline function showing up as + // cold in sampled binary will actually not be cold after current build. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (NamesInProfile.count(CanonName)) + initialEntryCount = -1; + } + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { @@ -1672,7 +1818,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) .getManager(); ORE = &FAM.getResult(F); } else { - OwnedORE = make_unique(&F); + OwnedORE = std::make_unique(&F); ORE = OwnedORE.get(); } Samples = Reader->getSamplesFor(F); diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 24c476376c14..690b5e8bf49e 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -218,10 +219,18 @@ void splitAndWriteThinLTOBitcode( promoteTypeIds(M, ModuleId); - // Returns whether a global has attached type metadata. Such globals may - // participate in CFI or whole-program devirtualization, so they need to - // appear in the merged module instead of the thin LTO module. + // Returns whether a global or its associated global has attached type + // metadata. The former may participate in CFI or whole-program + // devirtualization, so they need to appear in the merged module instead of + // the thin LTO module. Similarly, globals that are associated with globals + // with type metadata need to appear in the merged module because they will + // reference the global's section directly. auto HasTypeMetadata = [](const GlobalObject *GO) { + if (MDNode *MD = GO->getMetadata(LLVMContext::MD_associated)) + if (auto *AssocVM = dyn_cast_or_null(MD->getOperand(0))) + if (auto *AssocGO = dyn_cast(AssocVM->getValue())) + if (AssocGO->hasMetadata(LLVMContext::MD_type)) + return true; return GO->hasMetadata(LLVMContext::MD_type); }; @@ -315,9 +324,9 @@ void splitAndWriteThinLTOBitcode( SmallVector Elts; Elts.push_back(MDString::get(Ctx, F.getName())); CfiFunctionLinkage Linkage; - if (!F.isDeclarationForLinker()) + if (lowertypetests::isJumpTableCanonical(&F)) Linkage = CFL_Definition; - else if (F.isWeakForLinker()) + else if (F.hasExternalWeakLinkage()) Linkage = CFL_WeakDeclaration; else Linkage = CFL_Declaration; @@ -457,7 +466,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, // splitAndWriteThinLTOBitcode). Just always build it once via the // buildModuleSummaryIndex when Module(s) are ready. ProfileSummaryInfo PSI(M); - NewIndex = llvm::make_unique( + NewIndex = std::make_unique( buildModuleSummaryIndex(M, nullptr, &PSI)); Index = NewIndex.get(); } diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp index 6b6dd6194e17..f0cf5581ba8a 100644 --- a/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,12 +24,14 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines. +// This pass is intended to be used during the regular and thin LTO pipelines: +// // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are // eligible for virtual call optimization (i.e. calls that use either of the -// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During -// ThinLTO, the pass operates in two phases: +// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). +// +// During hybrid Regular/ThinLTO, the pass operates in two phases: // - Export phase: this is run during the thin link over a single merged module // that contains all vtables with !type metadata that participate in the link. // The pass computes a resolution for each virtual call and stores it in the @@ -38,6 +40,14 @@ // modules. The pass applies the resolutions previously computed during the // import phase to each eligible virtual call. // +// During ThinLTO, the pass operates in two phases: +// - Export phase: this is run during the thin link over the index which +// contains a summary of all vtables with !type metadata that participate in +// the link. It computes a resolution for each virtual call and stores it in +// the type identifier summary. Only single implementation devirtualization +// is supported. +// - Import phase: (same as with hybrid case above). +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -117,6 +127,11 @@ static cl::opt cl::desc("Maximum number of call targets per " "call site to enable branch funnels")); +static cl::opt + PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, + cl::init(false), cl::ZeroOrMore, + cl::desc("Print index-based devirtualization messages")); + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -265,6 +280,25 @@ template <> struct DenseMapInfo { } }; +template <> struct DenseMapInfo { + static VTableSlotSummary getEmptyKey() { + return {DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()}; + } + static VTableSlotSummary getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const VTableSlotSummary &I) { + return DenseMapInfo::getHashValue(I.TypeID) ^ + DenseMapInfo::getHashValue(I.ByteOffset); + } + static bool isEqual(const VTableSlotSummary &LHS, + const VTableSlotSummary &RHS) { + return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset; + } +}; + } // end namespace llvm namespace { @@ -342,19 +376,21 @@ struct CallSiteInfo { /// pass the vector is non-empty, we will need to add a use of llvm.type.test /// to each of the function summaries in the vector. std::vector SummaryTypeCheckedLoadUsers; + std::vector SummaryTypeTestAssumeUsers; bool isExported() const { return SummaryHasTypeTestAssumeUsers || !SummaryTypeCheckedLoadUsers.empty(); } - void markSummaryHasTypeTestAssumeUsers() { - SummaryHasTypeTestAssumeUsers = true; + void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { + SummaryTypeCheckedLoadUsers.push_back(FS); AllCallSitesDevirted = false; } - void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { - SummaryTypeCheckedLoadUsers.push_back(FS); + void addSummaryTypeTestAssumeUser(FunctionSummary *FS) { + SummaryTypeTestAssumeUsers.push_back(FS); + SummaryHasTypeTestAssumeUsers = true; AllCallSitesDevirted = false; } @@ -456,7 +492,6 @@ struct DevirtModule { void buildTypeIdentifierMap( std::vector &Bits, DenseMap> &TypeIdMap); - Constant *getPointerAtOffset(Constant *I, uint64_t Offset); bool tryFindVirtualCallTargets(std::vector &TargetsForSlot, const std::set &TypeMemberInfos, @@ -464,7 +499,8 @@ struct DevirtModule { void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported); - bool trySingleImplDevirt(MutableArrayRef TargetsForSlot, + bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary, + MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res); @@ -542,6 +578,38 @@ struct DevirtModule { function_ref LookupDomTree); }; +struct DevirtIndex { + ModuleSummaryIndex &ExportSummary; + // The set in which to record GUIDs exported from their module by + // devirtualization, used by client to ensure they are not internalized. + std::set &ExportedGUIDs; + // A map in which to record the information necessary to locate the WPD + // resolution for local targets in case they are exported by cross module + // importing. + std::map> &LocalWPDTargetsMap; + + MapVector CallSlots; + + DevirtIndex( + ModuleSummaryIndex &ExportSummary, + std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap) + : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs), + LocalWPDTargetsMap(LocalWPDTargetsMap) {} + + bool tryFindVirtualCallTargets(std::vector &TargetsForSlot, + const TypeIdCompatibleVtableInfo TIdInfo, + uint64_t ByteOffset); + + bool trySingleImplDevirt(MutableArrayRef TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set &DevirtTargets); + + void run(); +}; + struct WholeProgramDevirt : public ModulePass { static char ID; @@ -572,7 +640,7 @@ struct WholeProgramDevirt : public ModulePass { // an optimization remark emitter on the fly, when we need it. std::unique_ptr ORE; auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { - ORE = make_unique(F); + ORE = std::make_unique(F); return *ORE; }; @@ -632,6 +700,41 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, return PreservedAnalyses::none(); } +namespace llvm { +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap) { + DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run(); +} + +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + function_ref isExported, + std::map> &LocalWPDTargetsMap) { + for (auto &T : LocalWPDTargetsMap) { + auto &VI = T.first; + // This was enforced earlier during trySingleImplDevirt. + assert(VI.getSummaryList().size() == 1 && + "Devirt of local target has more than one copy"); + auto &S = VI.getSummaryList()[0]; + if (!isExported(S->modulePath(), VI.getGUID())) + continue; + + // It's been exported by a cross module import. + for (auto &SlotSummary : T.second) { + auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID); + assert(TIdSum); + auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset); + assert(WPDRes != TIdSum->WPDRes.end()); + WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + WPDRes->second.SingleImplName, + Summary.getModuleHash(S->modulePath())); + } + } +} + +} // end namespace llvm + bool DevirtModule::runForTesting( Module &M, function_ref AARGetter, function_ref OREGetter, @@ -662,7 +765,7 @@ bool DevirtModule::runForTesting( ExitOnError ExitOnErr( "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": "); std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); ExitOnErr(errorCodeToError(EC)); yaml::Output Out(OS); @@ -706,38 +809,6 @@ void DevirtModule::buildTypeIdentifierMap( } } -Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) { - if (I->getType()->isPointerTy()) { - if (Offset == 0) - return I; - return nullptr; - } - - const DataLayout &DL = M.getDataLayout(); - - if (auto *C = dyn_cast(I)) { - const StructLayout *SL = DL.getStructLayout(C->getType()); - if (Offset >= SL->getSizeInBytes()) - return nullptr; - - unsigned Op = SL->getElementContainingOffset(Offset); - return getPointerAtOffset(cast(I->getOperand(Op)), - Offset - SL->getElementOffset(Op)); - } - if (auto *C = dyn_cast(I)) { - ArrayType *VTableTy = C->getType(); - uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); - - unsigned Op = Offset / ElemSize; - if (Op >= C->getNumOperands()) - return nullptr; - - return getPointerAtOffset(cast(I->getOperand(Op)), - Offset % ElemSize); - } - return nullptr; -} - bool DevirtModule::tryFindVirtualCallTargets( std::vector &TargetsForSlot, const std::set &TypeMemberInfos, uint64_t ByteOffset) { @@ -746,7 +817,7 @@ bool DevirtModule::tryFindVirtualCallTargets( return false; Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), - TM.Offset + ByteOffset); + TM.Offset + ByteOffset, M); if (!Ptr) return false; @@ -766,6 +837,34 @@ bool DevirtModule::tryFindVirtualCallTargets( return !TargetsForSlot.empty(); } +bool DevirtIndex::tryFindVirtualCallTargets( + std::vector &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo, + uint64_t ByteOffset) { + for (const TypeIdOffsetVtableInfo P : TIdInfo) { + // VTable initializer should have only one summary, or all copies must be + // linkonce/weak ODR. + assert(P.VTableVI.getSummaryList().size() == 1 || + llvm::all_of( + P.VTableVI.getSummaryList(), + [&](const std::unique_ptr &Summary) { + return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) || + GlobalValue::isWeakODRLinkage(Summary->linkage()); + })); + const auto *VS = cast(P.VTableVI.getSummaryList()[0].get()); + if (!P.VTableVI.getSummaryList()[0]->isLive()) + continue; + for (auto VTP : VS->vTableFuncs()) { + if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset) + continue; + + TargetsForSlot.push_back(VTP.FuncVI); + } + } + + // Give up if we couldn't find any targets. + return !TargetsForSlot.empty(); +} + void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported) { auto Apply = [&](CallSiteInfo &CSInfo) { @@ -788,9 +887,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, Apply(P.second); } +static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) { + // We can't add calls if we haven't seen a definition + if (Callee.getSummaryList().empty()) + return false; + + // Insert calls into the summary index so that the devirtualized targets + // are eligible for import. + // FIXME: Annotate type tests with hotness. For now, mark these as hot + // to better ensure we have the opportunity to inline them. + bool IsExported = false; + auto &S = Callee.getSummaryList()[0]; + CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0); + auto AddCalls = [&](CallSiteInfo &CSInfo) { + for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) { + FS->addCall({Callee, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) { + FS->addCall({Callee, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + }; + AddCalls(SlotInfo.CSInfo); + for (auto &P : SlotInfo.ConstCSInfo) + AddCalls(P.second); + return IsExported; +} + bool DevirtModule::trySingleImplDevirt( - MutableArrayRef TargetsForSlot, - VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) { + ModuleSummaryIndex *ExportSummary, + MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res) { // See if the program contains a single implementation of this virtual // function. Function *TheFn = TargetsForSlot[0].Fn; @@ -830,6 +958,10 @@ bool DevirtModule::trySingleImplDevirt( TheFn->setVisibility(GlobalValue::HiddenVisibility); TheFn->setName(NewName); } + if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID())) + // Any needed promotion of 'TheFn' has already been done during + // LTO unit split, so we can ignore return value of AddCalls. + AddCalls(SlotInfo, TheFnVI); Res->TheKind = WholeProgramDevirtResolution::SingleImpl; Res->SingleImplName = TheFn->getName(); @@ -837,6 +969,63 @@ bool DevirtModule::trySingleImplDevirt( return true; } +bool DevirtIndex::trySingleImplDevirt(MutableArrayRef TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set &DevirtTargets) { + // See if the program contains a single implementation of this virtual + // function. + auto TheFn = TargetsForSlot[0]; + for (auto &&Target : TargetsForSlot) + if (TheFn != Target) + return false; + + // Don't devirtualize if we don't have target definition. + auto Size = TheFn.getSummaryList().size(); + if (!Size) + return false; + + // If the summary list contains multiple summaries where at least one is + // a local, give up, as we won't know which (possibly promoted) name to use. + for (auto &S : TheFn.getSummaryList()) + if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1) + return false; + + // Collect functions devirtualized at least for one call site for stats. + if (PrintSummaryDevirt) + DevirtTargets.insert(TheFn); + + auto &S = TheFn.getSummaryList()[0]; + bool IsExported = AddCalls(SlotInfo, TheFn); + if (IsExported) + ExportedGUIDs.insert(TheFn.getGUID()); + + // Record in summary for use in devirtualization during the ThinLTO import + // step. + Res->TheKind = WholeProgramDevirtResolution::SingleImpl; + if (GlobalValue::isLocalLinkage(S->linkage())) { + if (IsExported) + // If target is a local function and we are exporting it by + // devirtualizing a call in another module, we need to record the + // promoted name. + Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + TheFn.name(), ExportSummary.getModuleHash(S->modulePath())); + else { + LocalWPDTargetsMap[TheFn].push_back(SlotSummary); + Res->SingleImplName = TheFn.name(); + } + } else + Res->SingleImplName = TheFn.name(); + + // Name will be empty if this thin link driven off of serialized combined + // index (e.g. llvm-lto). However, WPD is not supported/invoked for the + // legacy LTO API anyway. + assert(!Res->SingleImplName.empty()); + + return true; +} + void DevirtModule::tryICallBranchFunnel( MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res, VTableSlot Slot) { @@ -1302,10 +1491,13 @@ void DevirtModule::rebuildGlobal(VTableBits &B) { if (B.Before.Bytes.empty() && B.After.Bytes.empty()) return; - // Align each byte array to pointer width. - unsigned PointerSize = M.getDataLayout().getPointerSize(); - B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize)); - B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize)); + // Align the before byte array to the global's minimum alignment so that we + // don't break any alignment requirements on the global. + MaybeAlign Alignment(B.GV->getAlignment()); + if (!Alignment) + Alignment = + Align(M.getDataLayout().getABITypeAlignment(B.GV->getValueType())); + B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment)); // Before was stored in reverse order; flip it now. for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I) @@ -1322,6 +1514,7 @@ void DevirtModule::rebuildGlobal(VTableBits &B) { GlobalVariable::PrivateLinkage, NewInit, "", B.GV); NewGV->setSection(B.GV->getSection()); NewGV->setComdat(B.GV->getComdat()); + NewGV->setAlignment(MaybeAlign(B.GV->getAlignment())); // Copy the original vtable's metadata to the anonymous global, adjusting // offsets as required. @@ -1483,8 +1676,11 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { } void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { + auto *TypeId = dyn_cast(Slot.TypeID); + if (!TypeId) + return; const TypeIdSummary *TidSummary = - ImportSummary->getTypeIdSummary(cast(Slot.TypeID)->getString()); + ImportSummary->getTypeIdSummary(TypeId->getString()); if (!TidSummary) return; auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset); @@ -1493,6 +1689,7 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { const WholeProgramDevirtResolution &Res = ResI->second; if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) { + assert(!Res.SingleImplName.empty()); // The type of the function in the declaration is irrelevant because every // call site will cast it to the correct type. Constant *SingleImpl = @@ -1627,8 +1824,7 @@ bool DevirtModule::run() { // FIXME: Only add live functions. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { for (Metadata *MD : MetadataByGUID[VF.GUID]) { - CallSlots[{MD, VF.Offset}] - .CSInfo.markSummaryHasTypeTestAssumeUsers(); + CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); } } for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { @@ -1641,7 +1837,7 @@ bool DevirtModule::run() { for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { CallSlots[{MD, VC.VFunc.Offset}] .ConstCSInfo[VC.Args] - .markSummaryHasTypeTestAssumeUsers(); + .addSummaryTypeTestAssumeUser(FS); } } for (const FunctionSummary::ConstVCall &VC : @@ -1673,7 +1869,7 @@ bool DevirtModule::run() { cast(S.first.TypeID)->getString()) .WPDRes[S.first.ByteOffset]; - if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) { + if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); @@ -1710,7 +1906,7 @@ bool DevirtModule::run() { using namespace ore; OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) << "devirtualized " - << NV("FunctionName", F->getName())); + << NV("FunctionName", DT.first)); } } @@ -1722,5 +1918,86 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); + // We have lowered or deleted the type checked load intrinsics, so we no + // longer have enough information to reason about the liveness of virtual + // function pointers in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + return true; } + +void DevirtIndex::run() { + if (ExportSummary.typeIdCompatibleVtableMap().empty()) + return; + + DenseMap> NameByGUID; + for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) { + NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); + } + + // Collect information from summary about which calls to try to devirtualize. + for (auto &P : ExportSummary) { + for (auto &S : P.second.SummaryList) { + auto *FS = dyn_cast(S.get()); + if (!FS) + continue; + // FIXME: Only add live functions. + for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); + } + } + for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_test_assume_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeTestAssumeUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_checked_load_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeCheckedLoadUser(FS); + } + } + } + } + + std::set DevirtTargets; + // For each (type, offset) pair: + for (auto &S : CallSlots) { + // Search each of the members of the type identifier for the virtual + // function implementation at offset S.first.ByteOffset, and add to + // TargetsForSlot. + std::vector TargetsForSlot; + auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); + assert(TidSummary); + if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, + S.first.ByteOffset)) { + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; + + if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, + DevirtTargets)) + continue; + } + } + + // Optionally have the thin link print message for each devirtualized + // function. + if (PrintSummaryDevirt) + for (const auto &DT : DevirtTargets) + errs() << "Devirtualized call to " << DT << "\n"; + + return; +} diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index ba15b023f2a3..8bc34825f8a7 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1097,6 +1097,107 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { return nullptr; } +Instruction * +InstCombiner::canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + BinaryOperator &I) { + assert((I.getOpcode() == Instruction::Add || + I.getOpcode() == Instruction::Or || + I.getOpcode() == Instruction::Sub) && + "Expecting add/or/sub instruction"); + + // We have a subtraction/addition between a (potentially truncated) *logical* + // right-shift of X and a "select". + Value *X, *Select; + Instruction *LowBitsToSkip, *Extract; + if (!match(&I, m_c_BinOp(m_TruncOrSelf(m_CombineAnd( + m_LShr(m_Value(X), m_Instruction(LowBitsToSkip)), + m_Instruction(Extract))), + m_Value(Select)))) + return nullptr; + + // `add`/`or` is commutative; but for `sub`, "select" *must* be on RHS. + if (I.getOpcode() == Instruction::Sub && I.getOperand(1) != Select) + return nullptr; + + Type *XTy = X->getType(); + bool HadTrunc = I.getType() != XTy; + + // If there was a truncation of extracted value, then we'll need to produce + // one extra instruction, so we need to ensure one instruction will go away. + if (HadTrunc && !match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + return nullptr; + + // Extraction should extract high NBits bits, with shift amount calculated as: + // low bits to skip = shift bitwidth - high bits to extract + // The shift amount itself may be extended, and we need to look past zero-ext + // when matching NBits, that will matter for matching later. + Constant *C; + Value *NBits; + if (!match( + LowBitsToSkip, + m_ZExtOrSelf(m_Sub(m_Constant(C), m_ZExtOrSelf(m_Value(NBits))))) || + !match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(C->getType()->getScalarSizeInBits(), + X->getType()->getScalarSizeInBits())))) + return nullptr; + + // Sign-extending value can be zero-extended if we `sub`tract it, + // or sign-extended otherwise. + auto SkipExtInMagic = [&I](Value *&V) { + if (I.getOpcode() == Instruction::Sub) + match(V, m_ZExtOrSelf(m_Value(V))); + else + match(V, m_SExtOrSelf(m_Value(V))); + }; + + // Now, finally validate the sign-extending magic. + // `select` itself may be appropriately extended, look past that. + SkipExtInMagic(Select); + + ICmpInst::Predicate Pred; + const APInt *Thr; + Value *SignExtendingValue, *Zero; + bool ShouldSignext; + // It must be a select between two values we will later establish to be a + // sign-extending value and a zero constant. The condition guarding the + // sign-extension must be based on a sign bit of the same X we had in `lshr`. + if (!match(Select, m_Select(m_ICmp(Pred, m_Specific(X), m_APInt(Thr)), + m_Value(SignExtendingValue), m_Value(Zero))) || + !isSignBitCheck(Pred, *Thr, ShouldSignext)) + return nullptr; + + // icmp-select pair is commutative. + if (!ShouldSignext) + std::swap(SignExtendingValue, Zero); + + // If we should not perform sign-extension then we must add/or/subtract zero. + if (!match(Zero, m_Zero())) + return nullptr; + // Otherwise, it should be some constant, left-shifted by the same NBits we + // had in `lshr`. Said left-shift can also be appropriately extended. + // Again, we must look past zero-ext when looking for NBits. + SkipExtInMagic(SignExtendingValue); + Constant *SignExtendingValueBaseConstant; + if (!match(SignExtendingValue, + m_Shl(m_Constant(SignExtendingValueBaseConstant), + m_ZExtOrSelf(m_Specific(NBits))))) + return nullptr; + // If we `sub`, then the constant should be one, else it should be all-ones. + if (I.getOpcode() == Instruction::Sub + ? !match(SignExtendingValueBaseConstant, m_One()) + : !match(SignExtendingValueBaseConstant, m_AllOnes())) + return nullptr; + + auto *NewAShr = BinaryOperator::CreateAShr(X, LowBitsToSkip, + Extract->getName() + ".sext"); + NewAShr->copyIRFlags(Extract); // Preserve `exact`-ness. + if (!HadTrunc) + return NewAShr; + + Builder.Insert(NewAShr); + return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType()); +} + Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), @@ -1302,12 +1403,32 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Instruction *V = canonicalizeLowbitMask(I, Builder)) return V; + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) + return V; + if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I)) return SatAdd; return Changed ? &I : nullptr; } +/// Eliminate an op from a linear interpolation (lerp) pattern. +static Instruction *factorizeLerp(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Value *X, *Y, *Z; + if (!match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_Value(Y), + m_OneUse(m_FSub(m_FPOne(), + m_Value(Z))))), + m_OneUse(m_c_FMul(m_Value(X), m_Deferred(Z)))))) + return nullptr; + + // (Y * (1.0 - Z)) + (X * Z) --> Y + Z * (X - Y) [8 commuted variants] + Value *XY = Builder.CreateFSubFMF(X, Y, &I); + Value *MulZ = Builder.CreateFMulFMF(Z, XY, &I); + return BinaryOperator::CreateFAddFMF(Y, MulZ, &I); +} + /// Factor a common operand out of fadd/fsub of fmul/fdiv. static Instruction *factorizeFAddFSub(BinaryOperator &I, InstCombiner::BuilderTy &Builder) { @@ -1315,6 +1436,10 @@ static Instruction *factorizeFAddFSub(BinaryOperator &I, I.getOpcode() == Instruction::FSub) && "Expecting fadd/fsub"); assert(I.hasAllowReassoc() && I.hasNoSignedZeros() && "FP factorization requires FMF"); + + if (Instruction *Lerp = factorizeLerp(I, Builder)) + return Lerp; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Value *X, *Y, *Z; bool IsFMul; @@ -1362,17 +1487,32 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I)) return FoldedFAdd; - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - Value *X; // (-X) + Y --> Y - X - if (match(LHS, m_FNeg(m_Value(X)))) - return BinaryOperator::CreateFSubFMF(RHS, X, &I); - // Y + (-X) --> Y - X - if (match(RHS, m_FNeg(m_Value(X)))) - return BinaryOperator::CreateFSubFMF(LHS, X, &I); + Value *X, *Y; + if (match(&I, m_c_FAdd(m_FNeg(m_Value(X)), m_Value(Y)))) + return BinaryOperator::CreateFSubFMF(Y, X, &I); + + // Similar to above, but look through fmul/fdiv for the negated term. + // (-X * Y) + Z --> Z - (X * Y) [4 commuted variants] + Value *Z; + if (match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))), + m_Value(Z)))) { + Value *XY = Builder.CreateFMulFMF(X, Y, &I); + return BinaryOperator::CreateFSubFMF(Z, XY, &I); + } + // (-X / Y) + Z --> Z - (X / Y) [2 commuted variants] + // (X / -Y) + Z --> Z - (X / Y) [2 commuted variants] + if (match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y))), + m_Value(Z))) || + match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))), + m_Value(Z)))) { + Value *XY = Builder.CreateFDivFMF(X, Y, &I); + return BinaryOperator::CreateFSubFMF(Z, XY, &I); + } // Check for (fadd double (sitofp x), y), see if we can merge this into an // integer add followed by a promotion. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (SIToFPInst *LHSConv = dyn_cast(LHS)) { Value *LHSIntVal = LHSConv->getOperand(0); Type *FPType = LHSConv->getType(); @@ -1631,37 +1771,50 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { const APInt *Op0C; if (match(Op0, m_APInt(Op0C))) { - unsigned BitWidth = I.getType()->getScalarSizeInBits(); - // -(X >>u 31) -> (X >>s 31) - // -(X >>s 31) -> (X >>u 31) if (Op0C->isNullValue()) { + Value *Op1Wide; + match(Op1, m_TruncOrSelf(m_Value(Op1Wide))); + bool HadTrunc = Op1Wide != Op1; + bool NoTruncOrTruncIsOneUse = !HadTrunc || Op1->hasOneUse(); + unsigned BitWidth = Op1Wide->getType()->getScalarSizeInBits(); + Value *X; const APInt *ShAmt; - if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) && + // -(X >>u 31) -> (X >>s 31) + if (NoTruncOrTruncIsOneUse && + match(Op1Wide, m_LShr(m_Value(X), m_APInt(ShAmt))) && *ShAmt == BitWidth - 1) { - Value *ShAmtOp = cast(Op1)->getOperand(1); - return BinaryOperator::CreateAShr(X, ShAmtOp); + Value *ShAmtOp = cast(Op1Wide)->getOperand(1); + Instruction *NewShift = BinaryOperator::CreateAShr(X, ShAmtOp); + NewShift->copyIRFlags(Op1Wide); + if (!HadTrunc) + return NewShift; + Builder.Insert(NewShift); + return TruncInst::CreateTruncOrBitCast(NewShift, Op1->getType()); } - if (match(Op1, m_AShr(m_Value(X), m_APInt(ShAmt))) && + // -(X >>s 31) -> (X >>u 31) + if (NoTruncOrTruncIsOneUse && + match(Op1Wide, m_AShr(m_Value(X), m_APInt(ShAmt))) && *ShAmt == BitWidth - 1) { - Value *ShAmtOp = cast(Op1)->getOperand(1); - return BinaryOperator::CreateLShr(X, ShAmtOp); + Value *ShAmtOp = cast(Op1Wide)->getOperand(1); + Instruction *NewShift = BinaryOperator::CreateLShr(X, ShAmtOp); + NewShift->copyIRFlags(Op1Wide); + if (!HadTrunc) + return NewShift; + Builder.Insert(NewShift); + return TruncInst::CreateTruncOrBitCast(NewShift, Op1->getType()); } - if (Op1->hasOneUse()) { + if (!HadTrunc && Op1->hasOneUse()) { Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(Op1, LHS, RHS).Flavor; if (SPF == SPF_ABS || SPF == SPF_NABS) { // This is a negate of an ABS/NABS pattern. Just swap the operands // of the select. - SelectInst *SI = cast(Op1); - Value *TrueVal = SI->getTrueValue(); - Value *FalseVal = SI->getFalseValue(); - SI->setTrueValue(FalseVal); - SI->setFalseValue(TrueVal); + cast(Op1)->swapValues(); // Don't swap prof metadata, we didn't change the branch behavior. - return replaceInstUsesWith(I, SI); + return replaceInstUsesWith(I, Op1); } } } @@ -1686,6 +1839,23 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateNeg(Y); } + // (sub (or A, B) (and A, B)) --> (xor A, B) + { + Value *A, *B; + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) + return BinaryOperator::CreateXor(A, B); + } + + // (sub (and A, B) (or A, B)) --> neg (xor A, B) + { + Value *A, *B; + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) && + (Op0->hasOneUse() || Op1->hasOneUse())) + return BinaryOperator::CreateNeg(Builder.CreateXor(A, B)); + } + // (sub (or A, B), (xor A, B)) --> (and A, B) { Value *A, *B; @@ -1694,6 +1864,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAnd(A, B); } + // (sub (xor A, B) (or A, B)) --> neg (and A, B) + { + Value *A, *B; + if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) && + (Op0->hasOneUse() || Op1->hasOneUse())) + return BinaryOperator::CreateNeg(Builder.CreateAnd(A, B)); + } + { Value *Y; // ((X | Y) - X) --> (~X & Y) @@ -1778,7 +1957,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { std::swap(LHS, RHS); // LHS is now O above and expected to have at least 2 uses (the min/max) // NotA is epected to have 2 uses from the min/max and 1 from the sub. - if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && + if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && !NotA->hasNUsesOrMore(4)) { // Note: We don't generate the inverse max/min, just create the not of // it and let other folds do the rest. @@ -1826,6 +2005,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return SelectInst::Create(Cmp, Neg, A); } + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) + return V; + if (Instruction *Ext = narrowMathIfNoOverflow(I)) return Ext; @@ -1865,6 +2048,22 @@ static Instruction *foldFNegIntoConstant(Instruction &I) { return nullptr; } +static Instruction *hoistFNegAboveFMulFDiv(Instruction &I, + InstCombiner::BuilderTy &Builder) { + Value *FNeg; + if (!match(&I, m_FNeg(m_Value(FNeg)))) + return nullptr; + + Value *X, *Y; + if (match(FNeg, m_OneUse(m_FMul(m_Value(X), m_Value(Y))))) + return BinaryOperator::CreateFMulFMF(Builder.CreateFNegFMF(X, &I), Y, &I); + + if (match(FNeg, m_OneUse(m_FDiv(m_Value(X), m_Value(Y))))) + return BinaryOperator::CreateFDivFMF(Builder.CreateFNegFMF(X, &I), Y, &I); + + return nullptr; +} + Instruction *InstCombiner::visitFNeg(UnaryOperator &I) { Value *Op = I.getOperand(0); @@ -1882,6 +2081,9 @@ Instruction *InstCombiner::visitFNeg(UnaryOperator &I) { match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) return BinaryOperator::CreateFSubFMF(Y, X, &I); + if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder)) + return R; + return nullptr; } @@ -1903,6 +2105,9 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Instruction *X = foldFNegIntoConstant(I)) return X; + if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder)) + return R; + Value *X, *Y; Constant *C; @@ -1944,6 +2149,21 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y)))))) return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPExt(Y, Ty), &I); + // Similar to above, but look through fmul/fdiv of the negated value: + // Op0 - (-X * Y) --> Op0 + (X * Y) + // Op0 - (Y * -X) --> Op0 + (X * Y) + if (match(Op1, m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))))) { + Value *FMul = Builder.CreateFMulFMF(X, Y, &I); + return BinaryOperator::CreateFAddFMF(Op0, FMul, &I); + } + // Op0 - (-X / Y) --> Op0 + (X / Y) + // Op0 - (X / -Y) --> Op0 + (X / Y) + if (match(Op1, m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y)))) || + match(Op1, m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))))) { + Value *FDiv = Builder.CreateFDivFMF(X, Y, &I); + return BinaryOperator::CreateFAddFMF(Op0, FDiv, &I); + } + // Handle special cases for FSub with selects feeding the operation if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1)) return replaceInstUsesWith(I, V); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 2b9859b602f4..4a30b60ca931 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -160,16 +160,14 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, } /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise -/// (V < Lo || V >= Hi). This method expects that Lo <= Hi. IsSigned indicates +/// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates /// whether to treat V, Lo, and Hi as signed or not. Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi, bool isSigned, bool Inside) { - assert((isSigned ? Lo.sle(Hi) : Lo.ule(Hi)) && - "Lo is not <= Hi in range emission code!"); + assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) && + "Lo is not < Hi in range emission code!"); Type *Ty = V->getType(); - if (Lo == Hi) - return Inside ? ConstantInt::getFalse(Ty) : ConstantInt::getTrue(Ty); // V >= Min && V < Hi --> V < Hi // V < Min || V >= Hi --> V >= Hi @@ -1051,9 +1049,103 @@ static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, return nullptr; } +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. +static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp, + ICmpInst *UnsignedICmp, bool IsAnd, + const SimplifyQuery &Q, + InstCombiner::BuilderTy &Builder) { + Value *ZeroCmpOp; + ICmpInst::Predicate EqPred; + if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(ZeroCmpOp), m_Zero())) || + !ICmpInst::isEquality(EqPred)) + return nullptr; + + auto IsKnownNonZero = [&](Value *V) { + return isKnownNonZero(V, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + }; + + ICmpInst::Predicate UnsignedPred; + + Value *A, *B; + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(ZeroCmpOp), m_Value(A))) && + match(ZeroCmpOp, m_c_Add(m_Specific(A), m_Value(B))) && + (ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) { + if (UnsignedICmp->getOperand(0) != ZeroCmpOp) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) { + if (!IsKnownNonZero(NonZero)) + std::swap(NonZero, Other); + return IsKnownNonZero(NonZero); + }; + + // Given ZeroCmpOp = (A + B) + // ZeroCmpOp <= A && ZeroCmpOp != 0 --> (0-B) < A + // ZeroCmpOp > A || ZeroCmpOp == 0 --> (0-B) >= A + // + // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff + // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff + // with X being the value (A/B) that is known to be non-zero, + // and Y being remaining value. + if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && + IsAnd) + return Builder.CreateICmpULT(Builder.CreateNeg(B), A); + if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE && + IsAnd && GetKnownNonZeroAndOther(B, A)) + return Builder.CreateICmpULT(Builder.CreateNeg(B), A); + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + !IsAnd) + return Builder.CreateICmpUGE(Builder.CreateNeg(B), A); + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ && + !IsAnd && GetKnownNonZeroAndOther(B, A)) + return Builder.CreateICmpUGE(Builder.CreateNeg(B), A); + } + + Value *Base, *Offset; + if (!match(ZeroCmpOp, m_Sub(m_Value(Base), m_Value(Offset)))) + return nullptr; + + if (!match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(Base), m_Specific(Offset))) || + !ICmpInst::isUnsigned(UnsignedPred)) + return nullptr; + if (UnsignedICmp->getOperand(0) != Base) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + // Base >=/> Offset && (Base - Offset) != 0 <--> Base > Offset + // (no overflow and not null) + if ((UnsignedPred == ICmpInst::ICMP_UGE || + UnsignedPred == ICmpInst::ICMP_UGT) && + EqPred == ICmpInst::ICMP_NE && IsAnd) + return Builder.CreateICmpUGT(Base, Offset); + + // Base <=/< Offset || (Base - Offset) == 0 <--> Base <= Offset + // (overflow or null) + if ((UnsignedPred == ICmpInst::ICMP_ULE || + UnsignedPred == ICmpInst::ICMP_ULT) && + EqPred == ICmpInst::ICMP_EQ && !IsAnd) + return Builder.CreateICmpULE(Base, Offset); + + // Base <= Offset && (Base - Offset) != 0 --> Base < Offset + if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && + IsAnd) + return Builder.CreateICmpULT(Base, Offset); + + // Base > Offset || (Base - Offset) == 0 --> Base >= Offset + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + !IsAnd) + return Builder.CreateICmpUGE(Base, Offset); + + return nullptr; +} + /// Fold (icmp)&(icmp) if possible. Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI) { + const SimplifyQuery Q = SQ.getWithInstruction(&CxtI); + // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) // if K1 and K2 are a one-bit mask. if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, true, CxtI)) @@ -1096,6 +1188,13 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (Value *V = foldIsPowerOf2(LHS, RHS, true /* JoinedByAnd */, Builder)) return V; + if (Value *X = + foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/true, Q, Builder)) + return X; + if (Value *X = + foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder)) + return X; + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); @@ -1196,16 +1295,22 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: - if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13 + // (X != 13 & X u< 14) -> X < 13 + if (LHSC->getValue() == (RHSC->getValue() - 1)) return Builder.CreateICmpULT(LHS0, LHSC); - if (LHSC->isZero()) // (X != 0 & X u< 14) -> X-1 u< 13 + if (LHSC->isZero()) // (X != 0 & X u< C) -> X-1 u< C-1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), false, true); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: - if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13 + // (X != 13 & X s< 14) -> X < 13 + if (LHSC->getValue() == (RHSC->getValue() - 1)) return Builder.CreateICmpSLT(LHS0, LHSC); - break; // (X != 13 & X s< 15) -> no change + // (X != INT_MIN & X s< C) -> X-(INT_MIN+1) u< (C-(INT_MIN+1)) + if (LHSC->isMinValue(true)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), + true, true); + break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_NE: // Potential folds for this case should already be handled. break; @@ -1216,10 +1321,15 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: - if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14 + // (X u> 13 & X != 14) -> X u> 14 + if (RHSC->getValue() == (LHSC->getValue() + 1)) return Builder.CreateICmp(PredL, LHS0, RHSC); + // X u> C & X != UINT_MAX -> (X-(C+1)) u< UINT_MAX-(C+1) + if (RHSC->isMaxValue(false)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), + false, true); break; // (X u> 13 & X != 15) -> no change - case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X u< 15) -> (X-14) u< 1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), false, true); } @@ -1229,10 +1339,15 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: - if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14 + // (X s> 13 & X != 14) -> X s> 14 + if (RHSC->getValue() == (LHSC->getValue() + 1)) return Builder.CreateICmp(PredL, LHS0, RHSC); + // X s> C & X != INT_MAX -> (X-(C+1)) u< INT_MAX-(C+1) + if (RHSC->isMaxValue(true)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), + true, true); break; // (X s> 13 & X != 15) -> no change - case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 + case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) u< 1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true, true); } @@ -1352,8 +1467,8 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I, Value *A, *B; if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) && match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) && - !IsFreeToInvert(A, A->hasOneUse()) && - !IsFreeToInvert(B, B->hasOneUse())) { + !isFreeToInvert(A, A->hasOneUse()) && + !isFreeToInvert(B, B->hasOneUse())) { Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan"); return BinaryOperator::CreateNot(AndOr); } @@ -1770,13 +1885,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) - if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) + if (Op1->hasOneUse() || isFreeToInvert(C, C->hasOneUse())) return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C)); // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) - if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) + if (Op0->hasOneUse() || isFreeToInvert(C, C->hasOneUse())) return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) @@ -1844,6 +1959,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { A->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(A, Op0, Constant::getNullValue(I.getType())); + // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0. + { + Value *X, *Y; + const APInt *ShAmt; + Type *Ty = I.getType(); + if (match(&I, m_c_And(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)), + m_APInt(ShAmt))), + m_Deferred(X))) && + *ShAmt == Ty->getScalarSizeInBits() - 1) { + Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); + return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty)); + } + } + return nullptr; } @@ -2057,6 +2186,8 @@ Value *InstCombiner::matchSelectFromAndOr(Value *A, Value *C, Value *B, /// Fold (icmp)|(icmp) if possible. Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI) { + const SimplifyQuery Q = SQ.getWithInstruction(&CxtI); + // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) // if K1 and K2 are a one-bit mask. if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, false, CxtI)) @@ -2182,6 +2313,13 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (Value *V = foldIsPowerOf2(LHS, RHS, false /* JoinedByAnd */, Builder)) return V; + if (Value *X = + foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/false, Q, Builder)) + return X; + if (Value *X = + foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder)) + return X; + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). if (!LHSC || !RHSC) return nullptr; @@ -2251,8 +2389,19 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, case ICmpInst::ICMP_EQ: // Potential folds for this case should already be handled. break; - case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change - case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change + case ICmpInst::ICMP_UGT: + // (X == 0 || X u> C) -> (X-1) u>= C + if (LHSC->isMinValue(false)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1, + false, false); + // (X == 13 | X u> 14) -> no change + break; + case ICmpInst::ICMP_SGT: + // (X == INT_MIN || X s> C) -> (X-(INT_MIN+1)) u>= C-INT_MIN + if (LHSC->isMinValue(true)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1, + true, false); + // (X == 13 | X s> 14) -> no change break; } break; @@ -2261,6 +2410,10 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change + // (X u< C || X == UINT_MAX) => (X-C) u>= UINT_MAX-C + if (RHSC->isMaxValue(false)) + return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(), + false, false); break; case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 assert(!RHSC->isMaxValue(false) && "Missed icmp simplification"); @@ -2272,9 +2425,14 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, switch (PredR) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change + case ICmpInst::ICMP_EQ: + // (X s< C || X == INT_MAX) => (X-C) u>= INT_MAX-C + if (RHSC->isMaxValue(true)) + return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(), + true, false); + // (X s< 13 | X == 14) -> no change break; - case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 + case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) u> 2 assert(!RHSC->isMaxValue(true) && "Missed icmp simplification"); return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, true, false); @@ -2552,6 +2710,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } + // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? -1 : X. + { + Value *X, *Y; + const APInt *ShAmt; + Type *Ty = I.getType(); + if (match(&I, m_c_Or(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)), + m_APInt(ShAmt))), + m_Deferred(X))) && + *ShAmt == Ty->getScalarSizeInBits() - 1) { + Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); + return SelectInst::Create(NewICmpInst, ConstantInt::getAllOnesValue(Ty), + X); + } + } + + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) + return V; + return nullptr; } @@ -2617,7 +2794,11 @@ static Instruction *foldXorToXor(BinaryOperator &I, return nullptr; } -Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, + BinaryOperator &I) { + assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && + I.getOperand(1) == RHS && "Should be 'xor' with these operands"); + if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { if (LHS->getOperand(0) == RHS->getOperand(1) && LHS->getOperand(1) == RHS->getOperand(0)) @@ -2672,14 +2853,35 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) { // TODO: Independently handle cases where the 'and' side is a constant. - if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) { - // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS - RHS->setPredicate(RHS->getInversePredicate()); - return Builder.CreateAnd(LHS, RHS); + ICmpInst *X = nullptr, *Y = nullptr; + if (OrICmp == LHS && AndICmp == RHS) { + // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y + X = LHS; + Y = RHS; } - if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) { - // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS - LHS->setPredicate(LHS->getInversePredicate()); + if (OrICmp == RHS && AndICmp == LHS) { + // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X + X = RHS; + Y = LHS; + } + if (X && Y && (Y->hasOneUse() || canFreelyInvertAllUsersOf(Y, &I))) { + // Invert the predicate of 'Y', thus inverting its output. + Y->setPredicate(Y->getInversePredicate()); + // So, are there other uses of Y? + if (!Y->hasOneUse()) { + // We need to adapt other uses of Y though. Get a value that matches + // the original value of Y before inversion. While this increases + // immediate instruction count, we have just ensured that all the + // users are freely-invertible, so that 'not' *will* get folded away. + BuilderTy::InsertPointGuard Guard(Builder); + // Set insertion point to right after the Y. + Builder.SetInsertPoint(Y->getParent(), ++(Y->getIterator())); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); + // Replace all uses of Y (excluding the one in NotY!) with NotY. + Y->replaceUsesWithIf(NotY, + [NotY](Use &U) { return U.getUser() != NotY; }); + } + // All done. return Builder.CreateAnd(LHS, RHS); } } @@ -2747,9 +2949,9 @@ static Instruction *sinkNotIntoXor(BinaryOperator &I, return nullptr; // We only want to do the transform if it is free to do. - if (IsFreeToInvert(X, X->hasOneUse())) { + if (isFreeToInvert(X, X->hasOneUse())) { // Ok, good. - } else if (IsFreeToInvert(Y, Y->hasOneUse())) { + } else if (isFreeToInvert(Y, Y->hasOneUse())) { std::swap(X, Y); } else return nullptr; @@ -2827,9 +3029,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // Apply DeMorgan's Law when inverts are free: // ~(X & Y) --> (~X | ~Y) // ~(X | Y) --> (~X & ~Y) - if (IsFreeToInvert(NotVal->getOperand(0), + if (isFreeToInvert(NotVal->getOperand(0), NotVal->getOperand(0)->hasOneUse()) && - IsFreeToInvert(NotVal->getOperand(1), + isFreeToInvert(NotVal->getOperand(1), NotVal->getOperand(1)->hasOneUse())) { Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs"); Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs"); @@ -3004,7 +3206,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (auto *LHS = dyn_cast(I.getOperand(0))) if (auto *RHS = dyn_cast(I.getOperand(1))) - if (Value *V = foldXorOfICmps(LHS, RHS)) + if (Value *V = foldXorOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, V); if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) @@ -3052,7 +3254,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (SelectPatternResult::isMinOrMax(SPF)) { // It's possible we get here before the not has been simplified, so make // sure the input to the not isn't freely invertible. - if (match(LHS, m_Not(m_Value(X))) && !IsFreeToInvert(X, X->hasOneUse())) { + if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) { Value *NotY = Builder.CreateNot(RHS); return SelectInst::Create( Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY); @@ -3060,7 +3262,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // It's possible we get here before the not has been simplified, so make // sure the input to the not isn't freely invertible. - if (match(RHS, m_Not(m_Value(Y))) && !IsFreeToInvert(Y, Y->hasOneUse())) { + if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) { Value *NotX = Builder.CreateNot(LHS); return SelectInst::Create( Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y); @@ -3068,8 +3270,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // If both sides are freely invertible, then we can get rid of the xor // completely. - if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && - IsFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) { + if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && + isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) { Value *NotLHS = Builder.CreateNot(LHS); Value *NotRHS = Builder.CreateNot(RHS); return SelectInst::Create( diff --git a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp index 5f37a00f56cf..825f4b468b0a 100644 --- a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -124,7 +124,7 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) { auto *SI = new StoreInst(RMWI.getValOperand(), RMWI.getPointerOperand(), &RMWI); SI->setAtomic(Ordering, RMWI.getSyncScopeID()); - SI->setAlignment(DL.getABITypeAlignment(RMWI.getType())); + SI->setAlignment(MaybeAlign(DL.getABITypeAlignment(RMWI.getType()))); return eraseInstFromFunction(RMWI); } @@ -154,6 +154,6 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) { LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand()); Load->setAtomic(Ordering, RMWI.getSyncScopeID()); - Load->setAlignment(DL.getABITypeAlignment(RMWI.getType())); + Load->setAlignment(MaybeAlign(DL.getABITypeAlignment(RMWI.getType()))); return Load; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4b3333affa72..c650d242cd50 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -185,7 +185,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder.CreateLoad(IntType, Src); // Alignment from the mem intrinsic will be better, so use it. - L->setAlignment(CopySrcAlign); + L->setAlignment( + MaybeAlign(CopySrcAlign)); // FIXME: Check if we can use Align instead. if (CopyMD) L->setMetadata(LLVMContext::MD_tbaa, CopyMD); MDNode *LoopMemParallelMD = @@ -198,7 +199,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { StoreInst *S = Builder.CreateStore(L, Dest); // Alignment from the mem intrinsic will be better, so use it. - S->setAlignment(CopyDstAlign); + S->setAlignment( + MaybeAlign(CopyDstAlign)); // FIXME: Check if we can use Align instead. if (CopyMD) S->setMetadata(LLVMContext::MD_tbaa, CopyMD); if (LoopMemParallelMD) @@ -223,9 +225,10 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { } Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); - if (MI->getDestAlignment() < Alignment) { - MI->setDestAlignment(Alignment); + const unsigned KnownAlignment = + getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); + if (MI->getDestAlignment() < KnownAlignment) { + MI->setDestAlignment(KnownAlignment); return MI; } @@ -243,13 +246,9 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) { ConstantInt *FillC = dyn_cast(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return nullptr; - uint64_t Len = LenC->getLimitedValue(); - Alignment = MI->getDestAlignment(); + const uint64_t Len = LenC->getLimitedValue(); assert(Len && "0-sized memory setting should be removed already."); - - // Alignment 0 is identity for alignment 1 for memset, but not store. - if (Alignment == 0) - Alignment = 1; + const Align Alignment = assumeAligned(MI->getDestAlignment()); // If it is an atomic and alignment is less than the size then we will // introduce the unaligned memory access which will be later transformed @@ -1060,9 +1059,9 @@ Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) { // If we can unconditionally load from this address, replace with a // load/select idiom. TODO: use DT for context sensitive query - if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment, - II.getModule()->getDataLayout(), - &II, nullptr)) { + if (isDereferenceableAndAlignedPointer( + LoadPtr, II.getType(), MaybeAlign(Alignment), + II.getModule()->getDataLayout(), &II, nullptr)) { Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, "unmaskedload"); return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); @@ -1086,7 +1085,8 @@ Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) { // If the mask is all ones, this is a plain vector store of the 1st argument. if (ConstMask->isAllOnesValue()) { Value *StorePtr = II.getArgOperand(1); - unsigned Alignment = cast(II.getArgOperand(2))->getZExtValue(); + MaybeAlign Alignment( + cast(II.getArgOperand(2))->getZExtValue()); return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); } @@ -2234,6 +2234,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return replaceInstUsesWith(*II, Add); } + // Try to simplify the underlying FMul. + if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), + II->getFastMathFlags(), + SQ.getWithInstruction(II))) { + auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); + FAdd->copyFastMathFlags(II); + return FAdd; + } + LLVM_FALLTHROUGH; } case Intrinsic::fma: { @@ -2258,9 +2267,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return II; } - // fma x, 1, z -> fadd x, z - if (match(Src1, m_FPOne())) { - auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2)); + // Try to simplify the underlying FMul. We can only apply simplifications + // that do not require rounding. + if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1), + II->getFastMathFlags(), + SQ.getWithInstruction(II))) { + auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); FAdd->copyFastMathFlags(II); return FAdd; } @@ -2331,7 +2343,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC VSX loads into normal loads. Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); - return new LoadInst(II->getType(), Ptr, Twine(""), false, 1); + return new LoadInst(II->getType(), Ptr, Twine(""), false, Align::None()); } case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: @@ -2349,7 +2361,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC VSX stores into normal stores. Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); - return new StoreInst(II->getArgOperand(0), Ptr, false, 1); + return new StoreInst(II->getArgOperand(0), Ptr, false, Align::None()); } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. @@ -3885,6 +3897,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Asan needs to poison memory to detect invalid access which is possible // even for empty lifetime range. if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || + II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) || II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) break; @@ -3950,10 +3963,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } case Intrinsic::experimental_gc_relocate: { + auto &GCR = *cast(II); + + // If we have two copies of the same pointer in the statepoint argument + // list, canonicalize to one. This may let us common gc.relocates. + if (GCR.getBasePtr() == GCR.getDerivedPtr() && + GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) { + auto *OpIntTy = GCR.getOperand(2)->getType(); + II->setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); + return II; + } + // Translate facts known about a pointer before relocating into // facts about the relocate value, while being careful to // preserve relocation semantics. - Value *DerivedPtr = cast(II)->getDerivedPtr(); + Value *DerivedPtr = GCR.getDerivedPtr(); // Remove the relocation if unused, note that this check is required // to prevent the cases below from looping forever. @@ -4177,10 +4201,58 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) { return nullptr; } +static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) { + unsigned NumArgs = Call.getNumArgOperands(); + ConstantInt *Op0C = dyn_cast(Call.getOperand(0)); + ConstantInt *Op1C = + (NumArgs == 1) ? nullptr : dyn_cast(Call.getOperand(1)); + // Bail out if the allocation size is zero. + if ((Op0C && Op0C->isNullValue()) || (Op1C && Op1C->isNullValue())) + return; + + if (isMallocLikeFn(&Call, TLI) && Op0C) { + if (isOpNewLikeFn(&Call, TLI)) + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableBytes( + Call.getContext(), Op0C->getZExtValue())); + else + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Op0C->getZExtValue())); + } else if (isReallocLikeFn(&Call, TLI) && Op1C) { + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Op1C->getZExtValue())); + } else if (isCallocLikeFn(&Call, TLI) && Op0C && Op1C) { + bool Overflow; + const APInt &N = Op0C->getValue(); + APInt Size = N.umul_ov(Op1C->getValue(), Overflow); + if (!Overflow) + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Size.getZExtValue())); + } else if (isStrdupLikeFn(&Call, TLI)) { + uint64_t Len = GetStringLength(Call.getOperand(0)); + if (Len) { + // strdup + if (NumArgs == 1) + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Len)); + // strndup + else if (NumArgs == 2 && Op1C) + Call.addAttribute( + AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), std::min(Len, Op1C->getZExtValue() + 1))); + } + } +} + /// Improvements for call, callbr and invoke instructions. Instruction *InstCombiner::visitCallBase(CallBase &Call) { - if (isAllocLikeFn(&Call, &TLI)) - return visitAllocSite(Call); + if (isAllocationFn(&Call, &TLI)) + annotateAnyAllocSite(Call, &TLI); bool Changed = false; @@ -4312,6 +4384,9 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) { if (I) return eraseInstFromFunction(*I); } + if (isAllocLikeFn(&Call, &TLI)) + return visitAllocSite(Call); + return Changed ? &Call : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 2c9ba203fbf3..65aaef28d87a 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -140,7 +140,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, } AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); - New->setAlignment(AI.getAlignment()); + New->setAlignment(MaybeAlign(AI.getAlignment())); New->takeName(&AI); New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); @@ -1531,16 +1531,16 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { // what we can and cannot do safely varies from operation to operation, and // is explained below in the various case statements. Type *Ty = FPT.getType(); - BinaryOperator *OpI = dyn_cast(FPT.getOperand(0)); - if (OpI && OpI->hasOneUse()) { - Type *LHSMinType = getMinimumFPType(OpI->getOperand(0)); - Type *RHSMinType = getMinimumFPType(OpI->getOperand(1)); - unsigned OpWidth = OpI->getType()->getFPMantissaWidth(); + auto *BO = dyn_cast(FPT.getOperand(0)); + if (BO && BO->hasOneUse()) { + Type *LHSMinType = getMinimumFPType(BO->getOperand(0)); + Type *RHSMinType = getMinimumFPType(BO->getOperand(1)); + unsigned OpWidth = BO->getType()->getFPMantissaWidth(); unsigned LHSWidth = LHSMinType->getFPMantissaWidth(); unsigned RHSWidth = RHSMinType->getFPMantissaWidth(); unsigned SrcWidth = std::max(LHSWidth, RHSWidth); unsigned DstWidth = Ty->getFPMantissaWidth(); - switch (OpI->getOpcode()) { + switch (BO->getOpcode()) { default: break; case Instruction::FAdd: case Instruction::FSub: @@ -1563,10 +1563,10 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { // could be tightened for those cases, but they are rare (the main // case of interest here is (float)((double)float + float)). if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) { - Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty); - Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty); - Instruction *RI = BinaryOperator::Create(OpI->getOpcode(), LHS, RHS); - RI->copyFastMathFlags(OpI); + Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty); + Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty); + Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS); + RI->copyFastMathFlags(BO); return RI; } break; @@ -1577,9 +1577,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { // rounding can possibly occur; we can safely perform the operation // in the destination format if it can represent both sources. if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) { - Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty); - Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty); - return BinaryOperator::CreateFMulFMF(LHS, RHS, OpI); + Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty); + Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty); + return BinaryOperator::CreateFMulFMF(LHS, RHS, BO); } break; case Instruction::FDiv: @@ -1590,9 +1590,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { // condition used here is a good conservative first pass. // TODO: Tighten bound via rigorous analysis of the unbalanced case. if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) { - Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty); - Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty); - return BinaryOperator::CreateFDivFMF(LHS, RHS, OpI); + Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty); + Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty); + return BinaryOperator::CreateFDivFMF(LHS, RHS, BO); } break; case Instruction::FRem: { @@ -1604,14 +1604,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { break; Value *LHS, *RHS; if (LHSWidth == SrcWidth) { - LHS = Builder.CreateFPTrunc(OpI->getOperand(0), LHSMinType); - RHS = Builder.CreateFPTrunc(OpI->getOperand(1), LHSMinType); + LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType); + RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType); } else { - LHS = Builder.CreateFPTrunc(OpI->getOperand(0), RHSMinType); - RHS = Builder.CreateFPTrunc(OpI->getOperand(1), RHSMinType); + LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType); + RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType); } - Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, OpI); + Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO); return CastInst::CreateFPCast(ExactResult, Ty); } } @@ -2338,8 +2338,23 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector Idxs(NumZeros + 1, Builder.getInt32(0)); - return GetElementPtrInst::CreateInBounds(SrcPTy->getElementType(), Src, - Idxs); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs); + + // If the source pointer is dereferenceable, then assume it points to an + // allocated object and apply "inbounds" to the GEP. + bool CanBeNull; + if (Src->getPointerDereferenceableBytes(DL, CanBeNull)) { + // In a non-default address space (not 0), a null pointer can not be + // assumed inbounds, so ignore that case (dereferenceable_or_null). + // The reason is that 'null' is not treated differently in these address + // spaces, and we consequently ignore the 'gep inbounds' special case + // for 'null' which allows 'inbounds' on 'null' if the indices are + // zeros. + if (SrcPTy->getAddressSpace() == 0 || !CanBeNull) + GEP->setIsInBounds(); + } + return GEP; } } @@ -2391,28 +2406,47 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } - if (ShuffleVectorInst *SVI = dyn_cast(Src)) { + if (auto *Shuf = dyn_cast(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. - if (SVI->hasOneUse() && DestTy->isVectorTy() && - DestTy->getVectorNumElements() == SVI->getType()->getNumElements() && - SVI->getType()->getNumElements() == - SVI->getOperand(0)->getType()->getVectorNumElements()) { + Value *ShufOp0 = Shuf->getOperand(0); + Value *ShufOp1 = Shuf->getOperand(1); + unsigned NumShufElts = Shuf->getType()->getVectorNumElements(); + unsigned NumSrcVecElts = ShufOp0->getType()->getVectorNumElements(); + if (Shuf->hasOneUse() && DestTy->isVectorTy() && + DestTy->getVectorNumElements() == NumShufElts && + NumShufElts == NumSrcVecElts) { BitCastInst *Tmp; // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow // us to eliminate at least one cast. - if (((Tmp = dyn_cast(SVI->getOperand(0))) && + if (((Tmp = dyn_cast(ShufOp0)) && Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast(SVI->getOperand(1))) && + ((Tmp = dyn_cast(ShufOp1)) && Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy); + Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy); + Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy); // Return a new shuffle vector. Use the same element ID's, as we // know the vector types match #elts. - return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); + return new ShuffleVectorInst(LHS, RHS, Shuf->getOperand(2)); } } + + // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as + // a byte-swap: + // bitcast (shuf X, undef, ) --> bswap (bitcast X) + // TODO: We should match the related pattern for bitreverse. + if (DestTy->isIntegerTy() && + DL.isLegalInteger(DestTy->getScalarSizeInBits()) && + SrcTy->getScalarSizeInBits() == 8 && NumShufElts % 2 == 0 && + Shuf->hasOneUse() && Shuf->isReverse()) { + assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask"); + assert(isa(ShufOp1) && "Unexpected shuffle op"); + Function *Bswap = + Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy); + Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy); + return IntrinsicInst::Create(Bswap, { ScalarX }); + } } // Handle the A->B->A cast, and there is an intervening PHI node. diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 3a4283ae5406..a9f64feb600c 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -69,34 +69,6 @@ static bool hasBranchUse(ICmpInst &I) { return false; } -/// Given an exploded icmp instruction, return true if the comparison only -/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the -/// result of the comparison is true when the input value is signed. -static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, - bool &TrueIfSigned) { - switch (Pred) { - case ICmpInst::ICMP_SLT: // True if LHS s< 0 - TrueIfSigned = true; - return RHS.isNullValue(); - case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 - TrueIfSigned = true; - return RHS.isAllOnesValue(); - case ICmpInst::ICMP_SGT: // True if LHS s> -1 - TrueIfSigned = false; - return RHS.isAllOnesValue(); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == high-bit-mask - 1 - TrueIfSigned = true; - return RHS.isMaxSignedValue(); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) - TrueIfSigned = true; - return RHS.isSignMask(); - default: - return false; - } -} - /// Returns true if the exploded icmp can be expressed as a signed comparison /// to zero and updates the predicate accordingly. /// The signedness of the comparison is preserved. @@ -832,6 +804,10 @@ getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, const DataLayout &DL) { + // FIXME: Support vector of pointers. + if (GEPLHS->getType()->isVectorTy()) + return nullptr; + if (!GEPLHS->hasAllConstantIndices()) return nullptr; @@ -882,7 +858,9 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); - if (PtrBase == RHS && GEPLHS->isInBounds()) { + // FIXME: Support vector pointer GEPs. + if (PtrBase == RHS && GEPLHS->isInBounds() && + !GEPLHS->getType()->isVectorTy()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can @@ -894,6 +872,37 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, Offset = EmitGEPOffset(GEPLHS); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, Constant::getNullValue(Offset->getType())); + } + + if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) && + isa(RHS) && cast(RHS)->isNullValue() && + !NullPointerIsDefined(I.getFunction(), + RHS->getType()->getPointerAddressSpace())) { + // For most address spaces, an allocation can't be placed at null, but null + // itself is treated as a 0 size allocation in the in bounds rules. Thus, + // the only valid inbounds address derived from null, is null itself. + // Thus, we have four cases to consider: + // 1) Base == nullptr, Offset == 0 -> inbounds, null + // 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds + // 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations) + // 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison) + // + // (Note if we're indexing a type of size 0, that simply collapses into one + // of the buckets above.) + // + // In general, we're allowed to make values less poison (i.e. remove + // sources of full UB), so in this case, we just select between the two + // non-poison cases (1 and 4 above). + // + // For vectors, we apply the same reasoning on a per-lane basis. + auto *Base = GEPLHS->getPointerOperand(); + if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) { + int NumElts = GEPLHS->getType()->getVectorNumElements(); + Base = Builder.CreateVectorSplat(NumElts, Base); + } + return new ICmpInst(Cond, Base, + ConstantExpr::getPointerBitCastOrAddrSpaceCast( + cast(RHS), Base->getType())); } else if (GEPOperator *GEPRHS = dyn_cast(RHS)) { // If the base pointers are different, but the indices are the same, just // compare the base pointer. @@ -916,11 +925,13 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // If we're comparing GEPs with two base pointers that only differ in type // and both GEPs have only constant indices or just one use, then fold // the compare with the adjusted indices. + // FIXME: Support vector of pointers. if (GEPLHS->isInBounds() && GEPRHS->isInBounds() && (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && PtrBase->stripPointerCasts() == - GEPRHS->getOperand(0)->stripPointerCasts()) { + GEPRHS->getOperand(0)->stripPointerCasts() && + !GEPLHS->getType()->isVectorTy()) { Value *LOffset = EmitGEPOffset(GEPLHS); Value *ROffset = EmitGEPOffset(GEPRHS); @@ -949,12 +960,14 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } // If one of the GEPs has all zero indices, recurse. - if (GEPLHS->hasAllZeroIndices()) + // FIXME: Handle vector of pointers. + if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices()) return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0), ICmpInst::getSwappedPredicate(Cond), I); // If the other GEP has all zero indices, recurse. - if (GEPRHS->hasAllZeroIndices()) + // FIXME: Handle vector of pointers. + if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices()) return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); @@ -964,15 +977,20 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, unsigned DiffOperand = 0; // The operand that differs. for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { - if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != - GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { + Type *LHSType = GEPLHS->getOperand(i)->getType(); + Type *RHSType = GEPRHS->getOperand(i)->getType(); + // FIXME: Better support for vector of pointers. + if (LHSType->getPrimitiveSizeInBits() != + RHSType->getPrimitiveSizeInBits() || + (GEPLHS->getType()->isVectorTy() && + (!LHSType->isVectorTy() || !RHSType->isVectorTy()))) { // Irreconcilable differences. NumDifferences = 2; break; - } else { - if (NumDifferences++) break; - DiffOperand = i; } + + if (NumDifferences++) break; + DiffOperand = i; } if (NumDifferences == 0) // SAME GEP? @@ -1317,6 +1335,59 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } +/// If we have: +/// icmp eq/ne (urem/srem %x, %y), 0 +/// iff %y is a power-of-two, we can replace this with a bit test: +/// icmp eq/ne (and %x, (add %y, -1)), 0 +Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { + // This fold is only valid for equality predicates. + if (!I.isEquality()) + return nullptr; + ICmpInst::Predicate Pred; + Value *X, *Y, *Zero; + if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))), + m_CombineAnd(m_Zero(), m_Value(Zero))))) + return nullptr; + if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I)) + return nullptr; + // This may increase instruction count, we don't enforce that Y is a constant. + Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType())); + Value *Masked = Builder.CreateAnd(X, Mask); + return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero); +} + +/// Fold equality-comparison between zero and any (maybe truncated) right-shift +/// by one-less-than-bitwidth into a sign test on the original value. +Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) { + Instruction *Val; + ICmpInst::Predicate Pred; + if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero()))) + return nullptr; + + Value *X; + Type *XTy; + + Constant *C; + if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) { + XTy = X->getType(); + unsigned XBitWidth = XTy->getScalarSizeInBits(); + if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(XBitWidth, XBitWidth - 1)))) + return nullptr; + } else if (isa(Val) && + (X = reassociateShiftAmtsOfTwoSameDirectionShifts( + cast(Val), SQ.getWithInstruction(Val), + /*AnalyzeForSignBitExtraction=*/true))) { + XTy = X->getType(); + } else + return nullptr; + + return ICmpInst::Create(Instruction::ICmp, + Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE + : ICmpInst::ICMP_SLT, + X, ConstantInt::getNullValue(XTy)); +} + // Handle icmp pred X, 0 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { CmpInst::Predicate Pred = Cmp.getPredicate(); @@ -1335,6 +1406,9 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { } } + if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp)) + return New; + // Given: // icmp eq/ne (urem %x, %y), 0 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': @@ -2179,6 +2253,44 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, return nullptr; } +Instruction *InstCombiner::foldICmpSRemConstant(ICmpInst &Cmp, + BinaryOperator *SRem, + const APInt &C) { + // Match an 'is positive' or 'is negative' comparison of remainder by a + // constant power-of-2 value: + // (X % pow2C) sgt/slt 0 + const ICmpInst::Predicate Pred = Cmp.getPredicate(); + if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT) + return nullptr; + + // TODO: The one-use check is standard because we do not typically want to + // create longer instruction sequences, but this might be a special-case + // because srem is not good for analysis or codegen. + if (!SRem->hasOneUse()) + return nullptr; + + const APInt *DivisorC; + if (!C.isNullValue() || !match(SRem->getOperand(1), m_Power2(DivisorC))) + return nullptr; + + // Mask off the sign bit and the modulo bits (low-bits). + Type *Ty = SRem->getType(); + APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits()); + Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1)); + Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC); + + // For 'is positive?' check that the sign-bit is clear and at least 1 masked + // bit is set. Example: + // (i8 X % 32) s> 0 --> (X & 159) s> 0 + if (Pred == ICmpInst::ICMP_SGT) + return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty)); + + // For 'is negative?' check that the sign-bit is set and at least 1 masked + // bit is set. Example: + // (i16 X % 4) s< 0 --> (X & 32771) u> 32768 + return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask)); +} + /// Fold icmp (udiv X, Y), C. Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, @@ -2387,6 +2499,11 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, const APInt *C2; APInt SubResult; + // icmp eq/ne (sub C, Y), C -> icmp eq/ne Y, 0 + if (match(X, m_APInt(C2)) && *C2 == C && Cmp.isEquality()) + return new ICmpInst(Cmp.getPredicate(), Y, + ConstantInt::get(Y->getType(), 0)); + // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C) if (match(X, m_APInt(C2)) && ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) || @@ -2509,20 +2626,49 @@ bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, // TODO: Generalize this to work with other comparison idioms or ensure // they get canonicalized into this form. - // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32 - // Greater), where Equal, Less and Greater are placeholders for any three - // constants. - ICmpInst::Predicate PredA, PredB; - if (match(SI->getTrueValue(), m_ConstantInt(Equal)) && - match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) && - PredA == ICmpInst::ICMP_EQ && - match(SI->getFalseValue(), - m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)), - m_ConstantInt(Less), m_ConstantInt(Greater))) && - PredB == ICmpInst::ICMP_SLT) { - return true; + // select i1 (a == b), + // i32 Equal, + // i32 (select i1 (a < b), i32 Less, i32 Greater) + // where Equal, Less and Greater are placeholders for any three constants. + ICmpInst::Predicate PredA; + if (!match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) || + !ICmpInst::isEquality(PredA)) + return false; + Value *EqualVal = SI->getTrueValue(); + Value *UnequalVal = SI->getFalseValue(); + // We still can get non-canonical predicate here, so canonicalize. + if (PredA == ICmpInst::ICMP_NE) + std::swap(EqualVal, UnequalVal); + if (!match(EqualVal, m_ConstantInt(Equal))) + return false; + ICmpInst::Predicate PredB; + Value *LHS2, *RHS2; + if (!match(UnequalVal, m_Select(m_ICmp(PredB, m_Value(LHS2), m_Value(RHS2)), + m_ConstantInt(Less), m_ConstantInt(Greater)))) + return false; + // We can get predicate mismatch here, so canonicalize if possible: + // First, ensure that 'LHS' match. + if (LHS2 != LHS) { + // x sgt y <--> y slt x + std::swap(LHS2, RHS2); + PredB = ICmpInst::getSwappedPredicate(PredB); + } + if (LHS2 != LHS) + return false; + // We also need to canonicalize 'RHS'. + if (PredB == ICmpInst::ICMP_SGT && isa(RHS2)) { + // x sgt C-1 <--> x sge C <--> not(x slt C) + auto FlippedStrictness = + getFlippedStrictnessPredicateAndConstant(PredB, cast(RHS2)); + if (!FlippedStrictness) + return false; + assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check"); + RHS2 = FlippedStrictness->second; + // And kind-of perform the result swap. + std::swap(Less, Greater); + PredB = ICmpInst::ICMP_SLT; } - return false; + return PredB == ICmpInst::ICMP_SLT && RHS == RHS2; } Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp, @@ -2702,6 +2848,10 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C)) return I; break; + case Instruction::SRem: + if (Instruction *I = foldICmpSRemConstant(Cmp, BO, *C)) + return I; + break; case Instruction::UDiv: if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C)) return I; @@ -2926,6 +3076,28 @@ Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp, } break; } + + case Intrinsic::uadd_sat: { + // uadd.sat(a, b) == 0 -> (a | b) == 0 + if (C.isNullValue()) { + Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1)); + return replaceInstUsesWith(Cmp, Builder.CreateICmp( + Cmp.getPredicate(), Or, Constant::getNullValue(Ty))); + + } + break; + } + + case Intrinsic::usub_sat: { + // usub.sat(a, b) == 0 -> a <= b + if (C.isNullValue()) { + ICmpInst::Predicate NewPred = Cmp.getPredicate() == ICmpInst::ICMP_EQ + ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; + return ICmpInst::Create(Instruction::ICmp, NewPred, + II->getArgOperand(0), II->getArgOperand(1)); + } + break; + } default: break; } @@ -3275,6 +3447,7 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I, // we should move shifts to the same hand of 'and', i.e. rewrite as // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x) // We are only interested in opposite logical shifts here. +// One of the shifts can be truncated. // If we can, we want to end up creating 'lshr' shift. static Value * foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, @@ -3284,55 +3457,215 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, return nullptr; auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value()); - auto m_AnyLShr = m_LShr(m_Value(), m_Value()); - - // Look for an 'and' of two (opposite) logical shifts. - // Pick the single-use shift as XShift. - Value *XShift, *YShift; - if (!match(I.getOperand(0), - m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))), - m_CombineAnd(m_AnyLogicalShift, m_Value(YShift))))) + + // Look for an 'and' of two logical shifts, one of which may be truncated. + // We use m_TruncOrSelf() on the RHS to correctly handle commutative case. + Instruction *XShift, *MaybeTruncation, *YShift; + if (!match( + I.getOperand(0), + m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)), + m_CombineAnd(m_TruncOrSelf(m_CombineAnd( + m_AnyLogicalShift, m_Instruction(YShift))), + m_Instruction(MaybeTruncation))))) return nullptr; - // If YShift is a single-use 'lshr', swap the shifts around. - if (match(YShift, m_OneUse(m_AnyLShr))) + // We potentially looked past 'trunc', but only when matching YShift, + // therefore YShift must have the widest type. + Instruction *WidestShift = YShift; + // Therefore XShift must have the shallowest type. + // Or they both have identical types if there was no truncation. + Instruction *NarrowestShift = XShift; + + Type *WidestTy = WidestShift->getType(); + assert(NarrowestShift->getType() == I.getOperand(0)->getType() && + "We did not look past any shifts while matching XShift though."); + bool HadTrunc = WidestTy != I.getOperand(0)->getType(); + + // If YShift is a 'lshr', swap the shifts around. + if (match(YShift, m_LShr(m_Value(), m_Value()))) std::swap(XShift, YShift); // The shifts must be in opposite directions. - Instruction::BinaryOps XShiftOpcode = - cast(XShift)->getOpcode(); - if (XShiftOpcode == cast(YShift)->getOpcode()) + auto XShiftOpcode = XShift->getOpcode(); + if (XShiftOpcode == YShift->getOpcode()) return nullptr; // Do not care about same-direction shifts here. Value *X, *XShAmt, *Y, *YShAmt; - match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt))); - match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt))); + match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt)))); + match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt)))); + + // If one of the values being shifted is a constant, then we will end with + // and+icmp, and [zext+]shift instrs will be constant-folded. If they are not, + // however, we will need to ensure that we won't increase instruction count. + if (!isa(X) && !isa(Y)) { + // At least one of the hands of the 'and' should be one-use shift. + if (!match(I.getOperand(0), + m_c_And(m_OneUse(m_AnyLogicalShift), m_Value()))) + return nullptr; + if (HadTrunc) { + // Due to the 'trunc', we will need to widen X. For that either the old + // 'trunc' or the shift amt in the non-truncated shift should be one-use. + if (!MaybeTruncation->hasOneUse() && + !NarrowestShift->getOperand(1)->hasOneUse()) + return nullptr; + } + } + + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now. + if (XShAmt->getType() != YShAmt->getType()) + return nullptr; // Can we fold (XShAmt+YShAmt) ? - Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt, - SQ.getWithInstruction(&I)); + auto *NewShAmt = dyn_cast_or_null( + SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false, + /*isNUW=*/false, SQ.getWithInstruction(&I))); if (!NewShAmt) return nullptr; + NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy); + unsigned WidestBitWidth = WidestTy->getScalarSizeInBits(); + // Is the new shift amount smaller than the bit width? // FIXME: could also rely on ConstantRange. - unsigned BitWidth = X->getType()->getScalarSizeInBits(); - if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, - APInt(BitWidth, BitWidth)))) + if (!match(NewShAmt, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, + APInt(WidestBitWidth, WidestBitWidth)))) return nullptr; - // All good, we can do this fold. The shift is the same that was for X. + + // An extra legality check is needed if we had trunc-of-lshr. + if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) { + auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ, + WidestShift]() { + // It isn't obvious whether it's worth it to analyze non-constants here. + // Also, let's basically give up on non-splat cases, pessimizing vectors. + // If *any* of these preconditions matches we can perform the fold. + Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy() + ? NewShAmt->getSplatValue() + : NewShAmt; + // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold. + if (NewShAmtSplat && + (NewShAmtSplat->isNullValue() || + NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1)) + return true; + // We consider *min* leading zeros so a single outlier + // blocks the transform as opposed to allowing it. + if (auto *C = dyn_cast(NarrowestShift->getOperand(0))) { + KnownBits Known = computeKnownBits(C, SQ.DL); + unsigned MinLeadZero = Known.countMinLeadingZeros(); + // If the value being shifted has at most lowest bit set we can fold. + unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; + if (MaxActiveBits <= 1) + return true; + // Precondition: NewShAmt u<= countLeadingZeros(C) + if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero)) + return true; + } + if (auto *C = dyn_cast(WidestShift->getOperand(0))) { + KnownBits Known = computeKnownBits(C, SQ.DL); + unsigned MinLeadZero = Known.countMinLeadingZeros(); + // If the value being shifted has at most lowest bit set we can fold. + unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; + if (MaxActiveBits <= 1) + return true; + // Precondition: ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C) + if (NewShAmtSplat) { + APInt AdjNewShAmt = + (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger(); + if (AdjNewShAmt.ule(MinLeadZero)) + return true; + } + } + return false; // Can't tell if it's ok. + }; + if (!CanFold()) + return nullptr; + } + + // All good, we can do this fold. + X = Builder.CreateZExt(X, WidestTy); + Y = Builder.CreateZExt(Y, WidestTy); + // The shift is the same that was for X. Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr ? Builder.CreateLShr(X, NewShAmt) : Builder.CreateShl(X, NewShAmt); Value *T1 = Builder.CreateAnd(T0, Y); return Builder.CreateICmp(I.getPredicate(), T1, - Constant::getNullValue(X->getType())); + Constant::getNullValue(WidestTy)); +} + +/// Fold +/// (-1 u/ x) u< y +/// ((x * y) u/ x) != y +/// to +/// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit +/// Note that the comparison is commutative, while inverted (u>=, ==) predicate +/// will mean that we are looking for the opposite answer. +Value *InstCombiner::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) { + ICmpInst::Predicate Pred; + Value *X, *Y; + Instruction *Mul; + bool NeedNegation; + // Look for: (-1 u/ x) u= y + if (!I.isEquality() && + match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))), + m_Value(Y)))) { + Mul = nullptr; + // Canonicalize as-if y was on RHS. + if (I.getOperand(1) != Y) + Pred = I.getSwappedPredicate(); + + // Are we checking that overflow does not happen, or does happen? + switch (Pred) { + case ICmpInst::Predicate::ICMP_ULT: + NeedNegation = false; + break; // OK + case ICmpInst::Predicate::ICMP_UGE: + NeedNegation = true; + break; // OK + default: + return nullptr; // Wrong predicate. + } + } else // Look for: ((x * y) u/ x) !=/== y + if (I.isEquality() && + match(&I, m_c_ICmp(Pred, m_Value(Y), + m_OneUse(m_UDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y), + m_Value(X)), + m_Instruction(Mul)), + m_Deferred(X)))))) { + NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ; + } else + return nullptr; + + BuilderTy::InsertPointGuard Guard(Builder); + // If the pattern included (x * y), we'll want to insert new instructions + // right before that original multiplication so that we can replace it. + bool MulHadOtherUses = Mul && !Mul->hasOneUse(); + if (MulHadOtherUses) + Builder.SetInsertPoint(Mul); + + Function *F = Intrinsic::getDeclaration( + I.getModule(), Intrinsic::umul_with_overflow, X->getType()); + CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul"); + + // If the multiplication was used elsewhere, to ensure that we don't leave + // "duplicate" instructions, replace uses of that original multiplication + // with the multiplication result from the with.overflow intrinsic. + if (MulHadOtherUses) + replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "umul.val")); + + Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov"); + if (NeedNegation) // This technically increases instruction count. + Res = Builder.CreateNot(Res, "umul.not.ov"); + + return Res; } /// Try to fold icmp (binop), X or icmp X, (binop). /// TODO: A large part of this logic is duplicated in InstSimplify's /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code /// duplication. -Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { +Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) { + const SimplifyQuery Q = SQ.getWithInstruction(&I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Special logic for binary operators. @@ -3345,13 +3678,13 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { Value *X; // Convert add-with-unsigned-overflow comparisons into a 'not' with compare. - // (Op1 + X) ~Op1 u (Op0 + X) --> X >u ~Op0 + // (Op1 + X) u= Op1 --> ~Op1 u= X if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) && - Pred == ICmpInst::ICMP_ULT) + (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) return new ICmpInst(Pred, Builder.CreateNot(Op1), X); + // Op0 u>/u<= (Op0 + X) --> X u>/u<= ~Op0 if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) && - Pred == ICmpInst::ICMP_UGT) + (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) return new ICmpInst(Pred, X, Builder.CreateNot(Op0)); bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; @@ -3378,21 +3711,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { D = BO1->getOperand(1); } - // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow. + // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow. if ((A == Op1 || B == Op1) && NoOp0WrapProblem) return new ICmpInst(Pred, A == Op1 ? B : A, Constant::getNullValue(Op1->getType())); - // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + // icmp C, (C+D) -> icmp 0, D for equalities or if there is no overflow. + // icmp D, (C+D) -> icmp 0, C for equalities or if there is no overflow. if ((C == Op0 || D == Op0) && NoOp1WrapProblem) return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), C == Op0 ? D : C); - // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. + // icmp (A+B), (A+D) -> icmp B, D for equalities or if there is no overflow. if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem && - NoOp1WrapProblem && - // Try not to increase register pressure. - BO0->hasOneUse() && BO1->hasOneUse()) { + NoOp1WrapProblem) { // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y, *Z; if (A == C) { @@ -3416,39 +3749,39 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { return new ICmpInst(Pred, Y, Z); } - // icmp slt (X + -1), Y -> icmp sle X, Y + // icmp slt (A + -1), Op1 -> icmp sle A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && match(B, m_AllOnes())) return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); - // icmp sge (X + -1), Y -> icmp sgt X, Y + // icmp sge (A + -1), Op1 -> icmp sgt A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && match(B, m_AllOnes())) return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); - // icmp sle (X + 1), Y -> icmp slt X, Y + // icmp sle (A + 1), Op1 -> icmp slt A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One())) return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); - // icmp sgt (X + 1), Y -> icmp sge X, Y + // icmp sgt (A + 1), Op1 -> icmp sge A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One())) return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); - // icmp sgt X, (Y + -1) -> icmp sge X, Y + // icmp sgt Op0, (C + -1) -> icmp sge Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT && match(D, m_AllOnes())) return new ICmpInst(CmpInst::ICMP_SGE, Op0, C); - // icmp sle X, (Y + -1) -> icmp slt X, Y + // icmp sle Op0, (C + -1) -> icmp slt Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE && match(D, m_AllOnes())) return new ICmpInst(CmpInst::ICMP_SLT, Op0, C); - // icmp sge X, (Y + 1) -> icmp sgt X, Y + // icmp sge Op0, (C + 1) -> icmp sgt Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One())) return new ICmpInst(CmpInst::ICMP_SGT, Op0, C); - // icmp slt X, (Y + 1) -> icmp sle X, Y + // icmp slt Op0, (C + 1) -> icmp sle Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One())) return new ICmpInst(CmpInst::ICMP_SLE, Op0, C); @@ -3456,33 +3789,33 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { // canonicalization from (X -nuw 1) to (X + -1) means that the combinations // wouldn't happen even if they were implemented. // - // icmp ult (X - 1), Y -> icmp ule X, Y - // icmp uge (X - 1), Y -> icmp ugt X, Y - // icmp ugt X, (Y - 1) -> icmp uge X, Y - // icmp ule X, (Y - 1) -> icmp ult X, Y + // icmp ult (A - 1), Op1 -> icmp ule A, Op1 + // icmp uge (A - 1), Op1 -> icmp ugt A, Op1 + // icmp ugt Op0, (C - 1) -> icmp uge Op0, C + // icmp ule Op0, (C - 1) -> icmp ult Op0, C - // icmp ule (X + 1), Y -> icmp ult X, Y + // icmp ule (A + 1), Op0 -> icmp ult A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One())) return new ICmpInst(CmpInst::ICMP_ULT, A, Op1); - // icmp ugt (X + 1), Y -> icmp uge X, Y + // icmp ugt (A + 1), Op0 -> icmp uge A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One())) return new ICmpInst(CmpInst::ICMP_UGE, A, Op1); - // icmp uge X, (Y + 1) -> icmp ugt X, Y + // icmp uge Op0, (C + 1) -> icmp ugt Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One())) return new ICmpInst(CmpInst::ICMP_UGT, Op0, C); - // icmp ult X, (Y + 1) -> icmp ule X, Y + // icmp ult Op0, (C + 1) -> icmp ule Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One())) return new ICmpInst(CmpInst::ICMP_ULE, Op0, C); // if C1 has greater magnitude than C2: - // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y + // icmp (A + C1), (C + C2) -> icmp (A + C3), C // s.t. C3 = C1 - C2 // // if C2 has greater magnitude than C1: - // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3) + // icmp (A + C1), (C + C2) -> icmp A, (C + C3) // s.t. C3 = C2 - C1 if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) @@ -3520,29 +3853,35 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { D = BO1->getOperand(1); } - // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. + // icmp (A-B), A -> icmp 0, B for equalities or if there is no overflow. if (A == Op1 && NoOp0WrapProblem) return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); - // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. + // icmp C, (C-D) -> icmp D, 0 for equalities or if there is no overflow. if (C == Op0 && NoOp1WrapProblem) return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); - // (A - B) >u A --> A C icmp Y, Z for equalities or if there is no overflow. - if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && - // Try not to increase register pressure. - BO0->hasOneUse() && BO1->hasOneUse()) + // Convert sub-with-unsigned-overflow comparisons into a comparison of args. + // (A - B) u>/u<= A --> B u>/u<= A + if (A == Op1 && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) + return new ICmpInst(Pred, B, A); + // C u= (C - D) --> C u= D + if (C == Op0 && (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) + return new ICmpInst(Pred, C, D); + // (A - B) u>=/u< A --> B u>/u<= A iff B != 0 + if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A); + // C u<=/u> (C - D) --> C u= D iff B != 0 + if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) && + isKnownNonZero(D, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D); + + // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow. + if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem) return new ICmpInst(Pred, A, C); - // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. - if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && - // Try not to increase register pressure. - BO0->hasOneUse() && BO1->hasOneUse()) + + // icmp (A-B), (A-D) -> icmp D, B for equalities or if there is no overflow. + if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem) return new ICmpInst(Pred, D, B); // icmp (0-X) < cst --> x > -cst @@ -3677,6 +4016,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { } } + if (Value *V = foldUnsignedMultiplicationOverflowCheck(I)) + return replaceInstUsesWith(I, V); + if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder)) return replaceInstUsesWith(I, V); @@ -3953,125 +4295,140 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { return nullptr; } -/// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so -/// far. -Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) { - const CastInst *LHSCI = cast(ICmp.getOperand(0)); - Value *LHSCIOp = LHSCI->getOperand(0); - Type *SrcTy = LHSCIOp->getType(); - Type *DestTy = LHSCI->getType(); - - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the - // integer type is the same size as the pointer type. - const auto& CompatibleSizes = [&](Type* SrcTy, Type* DestTy) -> bool { - if (isa(SrcTy)) { - SrcTy = cast(SrcTy)->getElementType(); - DestTy = cast(DestTy)->getElementType(); - } - return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth(); - }; - if (LHSCI->getOpcode() == Instruction::PtrToInt && - CompatibleSizes(SrcTy, DestTy)) { - Value *RHSOp = nullptr; - if (auto *RHSC = dyn_cast(ICmp.getOperand(1))) { - Value *RHSCIOp = RHSC->getOperand(0); - if (RHSCIOp->getType()->getPointerAddressSpace() == - LHSCIOp->getType()->getPointerAddressSpace()) { - RHSOp = RHSC->getOperand(0); - // If the pointer types don't match, insert a bitcast. - if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType()); - } - } else if (auto *RHSC = dyn_cast(ICmp.getOperand(1))) { - RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); - } - - if (RHSOp) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSOp); - } - - // The code below only handles extension cast instructions, so far. - // Enforce this. - if (LHSCI->getOpcode() != Instruction::ZExt && - LHSCI->getOpcode() != Instruction::SExt) +static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp, + InstCombiner::BuilderTy &Builder) { + assert(isa(ICmp.getOperand(0)) && "Expected cast for operand 0"); + auto *CastOp0 = cast(ICmp.getOperand(0)); + Value *X; + if (!match(CastOp0, m_ZExtOrSExt(m_Value(X)))) return nullptr; - bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; - bool isSignedCmp = ICmp.isSigned(); - - if (auto *CI = dyn_cast(ICmp.getOperand(1))) { - // Not an extension from the same type? - Value *RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) - return nullptr; - + bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt; + bool IsSignedCmp = ICmp.isSigned(); + if (auto *CastOp1 = dyn_cast(ICmp.getOperand(1))) { // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. - if (CI->getOpcode() != LHSCI->getOpcode()) + // TODO: This is too strict. We can handle some predicates (equality?). + if (CastOp0->getOpcode() != CastOp1->getOpcode()) return nullptr; - // Deal with equality cases early. + // Not an extension from the same type? + Value *Y = CastOp1->getOperand(0); + Type *XTy = X->getType(), *YTy = Y->getType(); + if (XTy != YTy) { + // One of the casts must have one use because we are creating a new cast. + if (!CastOp0->hasOneUse() && !CastOp1->hasOneUse()) + return nullptr; + // Extend the narrower operand to the type of the wider operand. + if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits()) + X = Builder.CreateCast(CastOp0->getOpcode(), X, YTy); + else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits()) + Y = Builder.CreateCast(CastOp0->getOpcode(), Y, XTy); + else + return nullptr; + } + + // (zext X) == (zext Y) --> X == Y + // (sext X) == (sext Y) --> X == Y if (ICmp.isEquality()) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp); + return new ICmpInst(ICmp.getPredicate(), X, Y); // A signed comparison of sign extended values simplifies into a // signed comparison. - if (isSignedCmp && isSignedExt) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp); + if (IsSignedCmp && IsSignedExt) + return new ICmpInst(ICmp.getPredicate(), X, Y); // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, RHSCIOp); + return new ICmpInst(ICmp.getUnsignedPredicate(), X, Y); } - // If we aren't dealing with a constant on the RHS, exit early. + // Below here, we are only folding a compare with constant. auto *C = dyn_cast(ICmp.getOperand(1)); if (!C) return nullptr; // Compute the constant that would happen if we truncated to SrcTy then // re-extended to DestTy. + Type *SrcTy = CastOp0->getSrcTy(); + Type *DestTy = CastOp0->getDestTy(); Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy); - Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy); + Constant *Res2 = ConstantExpr::getCast(CastOp0->getOpcode(), Res1, DestTy); // If the re-extended constant didn't change... if (Res2 == C) { - // Deal with equality cases early. if (ICmp.isEquality()) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1); + return new ICmpInst(ICmp.getPredicate(), X, Res1); // A signed comparison of sign extended values simplifies into a // signed comparison. - if (isSignedExt && isSignedCmp) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1); + if (IsSignedExt && IsSignedCmp) + return new ICmpInst(ICmp.getPredicate(), X, Res1); // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, Res1); + return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res1); } // The re-extended constant changed, partly changed (in the case of a vector), // or could not be determined to be equal (in the case of a constant // expression), so the constant cannot be represented in the shorter type. - // Consequently, we cannot emit a simple comparison. // All the cases that fold to true or false will have already been handled // by SimplifyICmpInst, so only deal with the tricky case. + if (IsSignedCmp || !IsSignedExt || !isa(C)) + return nullptr; + + // Is source op positive? + // icmp ult (sext X), C --> icmp sgt X, -1 + if (ICmp.getPredicate() == ICmpInst::ICMP_ULT) + return new ICmpInst(CmpInst::ICMP_SGT, X, Constant::getAllOnesValue(SrcTy)); + + // Is source op negative? + // icmp ugt (sext X), C --> icmp slt X, 0 + assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); + return new ICmpInst(CmpInst::ICMP_SLT, X, Constant::getNullValue(SrcTy)); +} - if (isSignedCmp || !isSignedExt || !isa(C)) +/// Handle icmp (cast x), (cast or constant). +Instruction *InstCombiner::foldICmpWithCastOp(ICmpInst &ICmp) { + auto *CastOp0 = dyn_cast(ICmp.getOperand(0)); + if (!CastOp0) + return nullptr; + if (!isa(ICmp.getOperand(1)) && !isa(ICmp.getOperand(1))) return nullptr; - // Evaluate the comparison for LT (we invert for GT below). LE and GE cases - // should have been folded away previously and not enter in here. + Value *Op0Src = CastOp0->getOperand(0); + Type *SrcTy = CastOp0->getSrcTy(); + Type *DestTy = CastOp0->getDestTy(); - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName()); + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // integer type is the same size as the pointer type. + auto CompatibleSizes = [&](Type *SrcTy, Type *DestTy) { + if (isa(SrcTy)) { + SrcTy = cast(SrcTy)->getElementType(); + DestTy = cast(DestTy)->getElementType(); + } + return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth(); + }; + if (CastOp0->getOpcode() == Instruction::PtrToInt && + CompatibleSizes(SrcTy, DestTy)) { + Value *NewOp1 = nullptr; + if (auto *PtrToIntOp1 = dyn_cast(ICmp.getOperand(1))) { + Value *PtrSrc = PtrToIntOp1->getOperand(0); + if (PtrSrc->getType()->getPointerAddressSpace() == + Op0Src->getType()->getPointerAddressSpace()) { + NewOp1 = PtrToIntOp1->getOperand(0); + // If the pointer types don't match, insert a bitcast. + if (Op0Src->getType() != NewOp1->getType()) + NewOp1 = Builder.CreateBitCast(NewOp1, Op0Src->getType()); + } + } else if (auto *RHSC = dyn_cast(ICmp.getOperand(1))) { + NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy); + } - // Finally, return the value computed. - if (ICmp.getPredicate() == ICmpInst::ICMP_ULT) - return replaceInstUsesWith(ICmp, Result); + if (NewOp1) + return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1); + } - assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); - return BinaryOperator::CreateNot(Result); + return foldICmpWithZextOrSext(ICmp, Builder); } static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) { @@ -4791,41 +5148,35 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { return nullptr; } -/// If we have an icmp le or icmp ge instruction with a constant operand, turn -/// it into the appropriate icmp lt or icmp gt instruction. This transform -/// allows them to be folded in visitICmpInst. -static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { - ICmpInst::Predicate Pred = I.getPredicate(); - if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGE && - Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_UGE) - return nullptr; +llvm::Optional> +llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, + Constant *C) { + assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) && + "Only for relational integer predicates."); - Value *Op0 = I.getOperand(0); - Value *Op1 = I.getOperand(1); - auto *Op1C = dyn_cast(Op1); - if (!Op1C) - return nullptr; + Type *Type = C->getType(); + bool IsSigned = ICmpInst::isSigned(Pred); + + CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred); + bool WillIncrement = + UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT; - // Check if the constant operand can be safely incremented/decremented without - // overflowing/underflowing. For scalars, SimplifyICmpInst has already handled - // the edge cases for us, so we just assert on them. For vectors, we must - // handle the edge cases. - Type *Op1Type = Op1->getType(); - bool IsSigned = I.isSigned(); - bool IsLE = (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE); - auto *CI = dyn_cast(Op1C); - if (CI) { - // A <= MAX -> TRUE ; A >= MIN -> TRUE - assert(IsLE ? !CI->isMaxValue(IsSigned) : !CI->isMinValue(IsSigned)); - } else if (Op1Type->isVectorTy()) { - // TODO? If the edge cases for vectors were guaranteed to be handled as they - // are for scalar, we could remove the min/max checks. However, to do that, - // we would have to use insertelement/shufflevector to replace edge values. - unsigned NumElts = Op1Type->getVectorNumElements(); + // Check if the constant operand can be safely incremented/decremented + // without overflowing/underflowing. + auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) { + return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned); + }; + + if (auto *CI = dyn_cast(C)) { + // Bail out if the constant can't be safely incremented/decremented. + if (!ConstantIsOk(CI)) + return llvm::None; + } else if (Type->isVectorTy()) { + unsigned NumElts = Type->getVectorNumElements(); for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = Op1C->getAggregateElement(i); + Constant *Elt = C->getAggregateElement(i); if (!Elt) - return nullptr; + return llvm::None; if (isa(Elt)) continue; @@ -4833,20 +5184,43 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { // Bail out if we can't determine if this constant is min/max or if we // know that this constant is min/max. auto *CI = dyn_cast(Elt); - if (!CI || (IsLE ? CI->isMaxValue(IsSigned) : CI->isMinValue(IsSigned))) - return nullptr; + if (!CI || !ConstantIsOk(CI)) + return llvm::None; } } else { // ConstantExpr? - return nullptr; + return llvm::None; } - // Increment or decrement the constant and set the new comparison predicate: - // ULE -> ULT ; UGE -> UGT ; SLE -> SLT ; SGE -> SGT - Constant *OneOrNegOne = ConstantInt::get(Op1Type, IsLE ? 1 : -1, true); - CmpInst::Predicate NewPred = IsLE ? ICmpInst::ICMP_ULT: ICmpInst::ICMP_UGT; - NewPred = IsSigned ? ICmpInst::getSignedPredicate(NewPred) : NewPred; - return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne)); + CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred); + + // Increment or decrement the constant. + Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true); + Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne); + + return std::make_pair(NewPred, NewC); +} + +/// If we have an icmp le or icmp ge instruction with a constant operand, turn +/// it into the appropriate icmp lt or icmp gt instruction. This transform +/// allows them to be folded in visitICmpInst. +static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { + ICmpInst::Predicate Pred = I.getPredicate(); + if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) || + isCanonicalPredicate(Pred)) + return nullptr; + + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + auto *Op1C = dyn_cast(Op1); + if (!Op1C) + return nullptr; + + auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, Op1C); + if (!FlippedStrictness) + return nullptr; + + return new ICmpInst(FlippedStrictness->first, Op0, FlippedStrictness->second); } /// Integer compare with boolean values can always be turned into bitwise ops. @@ -5002,6 +5376,7 @@ static Instruction *foldVectorCmp(CmpInst &Cmp, Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; + const SimplifyQuery Q = SQ.getWithInstruction(&I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); unsigned Op0Cplxity = getComplexity(Op0); unsigned Op1Cplxity = getComplexity(Op1); @@ -5016,8 +5391,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Changed = true; } - if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, - SQ.getWithInstruction(&I))) + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, Q)) return replaceInstUsesWith(I, V); // Comparing -val or val with non-zero is the same as just comparing val @@ -5050,6 +5424,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; + if (Instruction *Res = foldICmpBinOp(I, Q)) + return Res; + if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; @@ -5098,6 +5475,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInstWithConstant(I)) return Res; + // Try to match comparison as a sign bit test. Intentionally do this after + // foldICmpInstWithConstant() to potentially let other folds to happen first. + if (Instruction *New = foldSignBitTest(I)) + return New; + if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) return Res; @@ -5124,20 +5506,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpBitCast(I, Builder)) return Res; - if (isa(Op0)) { - // Handle the special case of: icmp (cast bool to X), - // This comes up when you have code like - // int X = A < B; - // if (X) ... - // For generality, we handle any zero-extension of any operand comparison - // with a constant or another cast from the same type. - if (isa(Op1) || isa(Op1)) - if (Instruction *R = foldICmpWithCastAndCast(I)) - return R; - } - - if (Instruction *Res = foldICmpBinOp(I)) - return Res; + if (Instruction *R = foldICmpWithCastOp(I)) + return R; if (Instruction *Res = foldICmpWithMinMax(I)) return Res; diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 434b0d591215..1dbc06d92e7a 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -113,6 +113,48 @@ static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) { } } +/// Given an exploded icmp instruction, return true if the comparison only +/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the +/// result of the comparison is true when the input value is signed. +inline bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, + bool &TrueIfSigned) { + switch (Pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS.isNullValue(); + case ICmpInst::ICMP_SLE: // True if LHS s<= -1 + TrueIfSigned = true; + return RHS.isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS.isAllOnesValue(); + case ICmpInst::ICMP_SGE: // True if LHS s>= 0 + TrueIfSigned = false; + return RHS.isNullValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = true; + return RHS.isMaxSignedValue(); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULT: + // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = false; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULE: + // True if LHS u<= RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = false; + return RHS.isMaxSignedValue(); + default: + return false; + } +} + +llvm::Optional> +getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C); + /// Return the source operand of a potentially bitcasted value while optionally /// checking if it has one use. If there is no bitcast or the one use check is /// not met, return the input value itself. @@ -139,32 +181,17 @@ static inline Constant *SubOne(Constant *C) { /// This happens in cases where the ~ can be eliminated. If WillInvertAllUses /// is true, work under the assumption that the caller intends to remove all /// uses of V and only keep uses of ~V. -static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { +/// +/// See also: canFreelyInvertAllUsersOf() +static inline bool isFreeToInvert(Value *V, bool WillInvertAllUses) { // ~(~(X)) -> X. if (match(V, m_Not(m_Value()))) return true; // Constants can be considered to be not'ed values. - if (isa(V)) + if (match(V, m_AnyIntegralConstant())) return true; - // A vector of constant integers can be inverted easily. - if (V->getType()->isVectorTy() && isa(V)) { - unsigned NumElts = V->getType()->getVectorNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = cast(V)->getAggregateElement(i); - if (!Elt) - return false; - - if (isa(Elt)) - continue; - - if (!isa(Elt)) - return false; - } - return true; - } - // Compares can be inverted if all of their uses are being modified to use the // ~V. if (isa(V)) @@ -185,6 +212,32 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { return false; } +/// Given i1 V, can every user of V be freely adapted if V is changed to !V ? +/// +/// See also: isFreeToInvert() +static inline bool canFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) { + // Look at every user of V. + for (User *U : V->users()) { + if (U == IgnoredUser) + continue; // Don't consider this user. + + auto *I = cast(U); + switch (I->getOpcode()) { + case Instruction::Select: + case Instruction::Br: + break; // Free to invert by swapping true/false values/destinations. + case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring it. + if (!match(I, m_Not(m_Value()))) + return false; // Not a 'not'. + break; + default: + return false; // Don't know, likely not freely invertible. + } + // So far all users were free to invert... + } + return true; // Can freely invert all users! +} + /// Some binary operators require special handling to avoid poison and undefined /// behavior. If a constant vector has undef elements, replace those undefs with /// identity constants if possible because those are always safe to execute. @@ -337,6 +390,13 @@ public: Instruction *visitOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); + Value *reassociateShiftAmtsOfTwoSameDirectionShifts( + BinaryOperator *Sh0, const SimplifyQuery &SQ, + bool AnalyzeForSignBitExtraction = false); + Instruction *canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + BinaryOperator &I); + Instruction *foldVariableSignZeroExtensionOfVariableHighBitExtract( + BinaryOperator &OldAShr); Instruction *visitAShr(BinaryOperator &I); Instruction *visitLShr(BinaryOperator &I); Instruction *commonShiftTransforms(BinaryOperator &I); @@ -541,6 +601,7 @@ private: Instruction *narrowMathIfNoOverflow(BinaryOperator &I); Instruction *narrowRotate(TruncInst &Trunc); Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN); + Instruction *matchSAddSubSat(SelectInst &MinMax1); /// Determine if a pair of casts can be replaced by a single cast. /// @@ -557,7 +618,7 @@ private: Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); - Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &I); /// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp). /// NOTE: Unlike most of instcombine, this returns a Value which should @@ -725,7 +786,7 @@ public: Value *LHS, Value *RHS, Instruction *CxtI) const; /// Maximum size of array considered when transforming. - uint64_t MaxArraySizeForCombine; + uint64_t MaxArraySizeForCombine = 0; private: /// Performs a few simplifications for operators which are associative @@ -798,7 +859,8 @@ private: int DmaskIdx = -1); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt &UndefElts, unsigned Depth = 0); + APInt &UndefElts, unsigned Depth = 0, + bool AllowMultipleUsers = false); /// Canonicalize the position of binops relative to shufflevector. Instruction *foldVectorBinop(BinaryOperator &Inst); @@ -847,17 +909,21 @@ private: Constant *RHSC); Instruction *foldICmpAddOpConst(Value *X, const APInt &C, ICmpInst::Predicate Pred); - Instruction *foldICmpWithCastAndCast(ICmpInst &ICI); + Instruction *foldICmpWithCastOp(ICmpInst &ICI); Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp); Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp); Instruction *foldICmpWithConstant(ICmpInst &Cmp); Instruction *foldICmpInstWithConstant(ICmpInst &Cmp); Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp); - Instruction *foldICmpBinOp(ICmpInst &Cmp); + Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); Instruction *foldICmpEquality(ICmpInst &Cmp); + Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); + Instruction *foldSignBitTest(ICmpInst &I); Instruction *foldICmpWithZero(ICmpInst &Cmp); + Value *foldUnsignedMultiplicationOverflowCheck(ICmpInst &Cmp); + Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select, ConstantInt *C); Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc, @@ -874,6 +940,8 @@ private: const APInt &C); Instruction *foldICmpShrConstant(ICmpInst &Cmp, BinaryOperator *Shr, const APInt &C); + Instruction *foldICmpSRemConstant(ICmpInst &Cmp, BinaryOperator *UDiv, + const APInt &C); Instruction *foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, const APInt &C); Instruction *foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div, diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 054fb7da09a2..3a0e05832fcb 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -175,7 +175,7 @@ static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType()); if (!AllocaSize) return false; - return isDereferenceableAndAlignedPointer(V, AI->getAlignment(), + return isDereferenceableAndAlignedPointer(V, Align(AI->getAlignment()), APInt(64, AllocaSize), DL); } @@ -197,7 +197,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { if (C->getValue().getActiveBits() <= 64) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName()); - New->setAlignment(AI.getAlignment()); + New->setAlignment(MaybeAlign(AI.getAlignment())); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info @@ -345,7 +345,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) - AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); + AI.setAlignment( + MaybeAlign(DL.getPrefTypeAlignment(AI.getAllocatedType()))); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should @@ -377,12 +378,12 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( - DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); + MaybeAlign(DL.getPrefTypeAlignment(EntryAI->getAllocatedType()))); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. - unsigned MaxAlign = std::max(EntryAI->getAlignment(), - AI.getAlignment()); + const MaybeAlign MaxAlign( + std::max(EntryAI->getAlignment(), AI.getAlignment())); EntryAI->setAlignment(MaxAlign); if (AI.getType() != EntryAI->getType()) return new BitCastInst(EntryAI, AI.getType()); @@ -455,9 +456,6 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT Value *Ptr = LI.getPointerOperand(); unsigned AS = LI.getPointerAddressSpace(); - SmallVector, 8> MD; - LI.getAllMetadata(MD); - Value *NewPtr = nullptr; if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) && NewPtr->getType()->getPointerElementType() == NewTy && @@ -467,48 +465,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); - MDBuilder MDB(NewLoad->getContext()); - for (const auto &MDPair : MD) { - unsigned ID = MDPair.first; - MDNode *N = MDPair.second; - // Note, essentially every kind of metadata should be preserved here! This - // routine is supposed to clone a load instruction changing *only its type*. - // The only metadata it makes sense to drop is metadata which is invalidated - // when the pointer type changes. This should essentially never be the case - // in LLVM, but we explicitly switch over only known metadata to be - // conservatively correct. If you are adding metadata to LLVM which pertains - // to loads, you almost certainly want to add it here. - switch (ID) { - case LLVMContext::MD_dbg: - case LLVMContext::MD_tbaa: - case LLVMContext::MD_prof: - case LLVMContext::MD_fpmath: - case LLVMContext::MD_tbaa_struct: - case LLVMContext::MD_invariant_load: - case LLVMContext::MD_alias_scope: - case LLVMContext::MD_noalias: - case LLVMContext::MD_nontemporal: - case LLVMContext::MD_mem_parallel_loop_access: - case LLVMContext::MD_access_group: - // All of these directly apply. - NewLoad->setMetadata(ID, N); - break; - - case LLVMContext::MD_nonnull: - copyNonnullMetadata(LI, N, *NewLoad); - break; - case LLVMContext::MD_align: - case LLVMContext::MD_dereferenceable: - case LLVMContext::MD_dereferenceable_or_null: - // These only directly apply if the new type is also a pointer. - if (NewTy->isPointerTy()) - NewLoad->setMetadata(ID, N); - break; - case LLVMContext::MD_range: - copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad); - break; - } - } + copyMetadataForLoad(*NewLoad, LI); return NewLoad; } @@ -1004,9 +961,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); if (KnownAlign > EffectiveLoadAlign) - LI.setAlignment(KnownAlign); + LI.setAlignment(MaybeAlign(KnownAlign)); else if (LoadAlign == 0) - LI.setAlignment(EffectiveLoadAlign); + LI.setAlignment(MaybeAlign(EffectiveLoadAlign)); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { @@ -1063,11 +1020,11 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // if (SelectInst *SI = dyn_cast(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). - unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align, - DL, SI) && - isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align, - DL, SI)) { + const MaybeAlign Alignment(LI.getAlignment()); + if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), + Alignment, DL, SI) && + isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), + Alignment, DL, SI)) { LoadInst *V1 = Builder.CreateLoad(LI.getType(), SI->getOperand(1), SI->getOperand(1)->getName() + ".val"); @@ -1075,9 +1032,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Builder.CreateLoad(LI.getType(), SI->getOperand(2), SI->getOperand(2)->getName() + ".val"); assert(LI.isUnordered() && "implied by above"); - V1->setAlignment(Align); + V1->setAlignment(Alignment); V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); - V2->setAlignment(Align); + V2->setAlignment(Alignment); V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); return SelectInst::Create(SI->getCondition(), V1, V2); } @@ -1399,15 +1356,15 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { return eraseInstFromFunction(SI); // Attempt to improve the alignment. - unsigned KnownAlign = getOrEnforceKnownAlignment( - Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT); - unsigned StoreAlign = SI.getAlignment(); - unsigned EffectiveStoreAlign = - StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); + const Align KnownAlign = Align(getOrEnforceKnownAlignment( + Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT)); + const MaybeAlign StoreAlign = MaybeAlign(SI.getAlignment()); + const Align EffectiveStoreAlign = + StoreAlign ? *StoreAlign : Align(DL.getABITypeAlignment(Val->getType())); if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); - else if (StoreAlign == 0) + else if (!StoreAlign) SI.setAlignment(EffectiveStoreAlign); // Try to canonicalize the stored type. @@ -1622,8 +1579,8 @@ bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) { // Advance to a place where it is safe to insert the new store and insert it. BBI = DestBB->getFirstInsertionPt(); - StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), - SI.isVolatile(), SI.getAlignment(), + StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), + MaybeAlign(SI.getAlignment()), SI.getOrdering(), SI.getSyncScopeID()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(MergedLoc); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index cc753ce05313..0b9128a9f5a1 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -124,6 +124,50 @@ static Constant *getLogBase2(Type *Ty, Constant *C) { return ConstantVector::get(Elts); } +// TODO: This is a specific form of a much more general pattern. +// We could detect a select with any binop identity constant, or we +// could use SimplifyBinOp to see if either arm of the select reduces. +// But that needs to be done carefully and/or while removing potential +// reverse canonicalizations as in InstCombiner::foldSelectIntoOp(). +static Value *foldMulSelectToNegate(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Value *Cond, *OtherOp; + + // mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp + // mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp + if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())), + m_Value(OtherOp)))) + return Builder.CreateSelect(Cond, OtherOp, Builder.CreateNeg(OtherOp)); + + // mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp + // mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp + if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())), + m_Value(OtherOp)))) + return Builder.CreateSelect(Cond, Builder.CreateNeg(OtherOp), OtherOp); + + // fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp + // fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp + if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(1.0), + m_SpecificFP(-1.0))), + m_Value(OtherOp)))) { + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + return Builder.CreateSelect(Cond, OtherOp, Builder.CreateFNeg(OtherOp)); + } + + // fmul (select Cond, -1.0, 1.0), OtherOp --> select Cond, -OtherOp, OtherOp + // fmul OtherOp, (select Cond, -1.0, 1.0) --> select Cond, -OtherOp, OtherOp + if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(-1.0), + m_SpecificFP(1.0))), + m_Value(OtherOp)))) { + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + return Builder.CreateSelect(Cond, Builder.CreateFNeg(OtherOp), OtherOp); + } + + return nullptr; +} + Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyMulInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) @@ -213,6 +257,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) return FoldedMul; + if (Value *FoldedMul = foldMulSelectToNegate(I, Builder)) + return replaceInstUsesWith(I, FoldedMul); + // Simplify mul instructions with a constant RHS. if (isa(Op1)) { // Canonicalize (X+C1)*CI -> X*CI+C1*CI. @@ -358,6 +405,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) return FoldedMul; + if (Value *FoldedMul = foldMulSelectToNegate(I, Builder)) + return replaceInstUsesWith(I, FoldedMul); + // X * -1.0 --> -X Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (match(Op1, m_SpecificFP(-1.0))) @@ -373,16 +423,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Constant(C))) return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I); - // Sink negation: -X * Y --> -(X * Y) - // But don't transform constant expressions because there's an inverse fold. - if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))) && !isa(Op0)) - return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op1, &I), &I); - - // Sink negation: Y * -X --> -(X * Y) - // But don't transform constant expressions because there's an inverse fold. - if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))) && !isa(Op1)) - return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op0, &I), &I); - // fabs(X) * fabs(X) -> X * X if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X)))) return BinaryOperator::CreateFMulFMF(X, X, &I); @@ -1211,8 +1251,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { !IsTan && match(Op0, m_Intrinsic(m_Value(X))) && match(Op1, m_Intrinsic(m_Specific(X))); - if ((IsTan || IsCot) && hasUnaryFloatFn(&TLI, I.getType(), LibFunc_tan, - LibFunc_tanf, LibFunc_tanl)) { + if ((IsTan || IsCot) && + hasFloatFn(&TLI, I.getType(), LibFunc_tan, LibFunc_tanf, LibFunc_tanl)) { IRBuilder<> B(&I); IRBuilder<>::FastMathFlagGuard FMFGuard(B); B.setFastMathFlags(I.getFastMathFlags()); @@ -1244,6 +1284,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return &I; } + // X / fabs(X) -> copysign(1.0, X) + // fabs(X) / X -> copysign(1.0, X) + if (I.hasNoNaNs() && I.hasNoInfs() && + (match(&I, + m_FDiv(m_Value(X), m_Intrinsic(m_Deferred(X)))) || + match(&I, m_FDiv(m_Intrinsic(m_Value(X)), + m_Deferred(X))))) { + Value *V = Builder.CreateBinaryIntrinsic( + Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I); + return replaceInstUsesWith(I, V); + } return nullptr; } @@ -1309,6 +1360,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { + // This may increase instruction count, we don't enforce that Y is a + // constant. Constant *N1 = Constant::getAllOnesValue(Ty); Value *Add = Builder.CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 5820ab726637..e0376b7582f3 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -542,7 +542,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // visitLoadInst will propagate an alignment onto the load when TD is around, // and if TD isn't around, we can't handle the mixed case. bool isVolatile = FirstLI->isVolatile(); - unsigned LoadAlignment = FirstLI->getAlignment(); + MaybeAlign LoadAlignment(FirstLI->getAlignment()); unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); // We can't sink the load if the loaded value could be modified between the @@ -574,10 +574,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // If some of the loads have an alignment specified but not all of them, // we can't do the transformation. - if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) + if ((LoadAlignment.hasValue()) != (LI->getAlignment() != 0)) return nullptr; - LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); + LoadAlignment = std::min(LoadAlignment, MaybeAlign(LI->getAlignment())); // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index aefaf5af1750..9fc871e49b30 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -785,6 +785,41 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, return nullptr; } +/// Fold the following code sequence: +/// \code +/// int a = ctlz(x & -x); +// x ? 31 - a : a; +/// \code +/// +/// into: +/// cttz(x) +static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal, + Value *FalseVal, + InstCombiner::BuilderTy &Builder) { + unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); + if (!ICI->isEquality() || !match(ICI->getOperand(1), m_Zero())) + return nullptr; + + if (ICI->getPredicate() == ICmpInst::ICMP_NE) + std::swap(TrueVal, FalseVal); + + if (!match(FalseVal, + m_Xor(m_Deferred(TrueVal), m_SpecificInt(BitWidth - 1)))) + return nullptr; + + if (!match(TrueVal, m_Intrinsic())) + return nullptr; + + Value *X = ICI->getOperand(0); + auto *II = cast(TrueVal); + if (!match(II->getOperand(0), m_c_And(m_Specific(X), m_Neg(m_Specific(X))))) + return nullptr; + + Function *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::cttz, + II->getType()); + return CallInst::Create(F, {X, II->getArgOperand(1)}); +} + /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single /// call to cttz/ctlz with flag 'is_zero_undef' cleared. /// @@ -973,8 +1008,7 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp, // If we are swapping the select operands, swap the metadata too. assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS && "Unexpected results from matchSelectPattern"); - Sel.setTrueValue(LHS); - Sel.setFalseValue(RHS); + Sel.swapValues(); Sel.swapProfMetadata(); return &Sel; } @@ -1056,17 +1090,293 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, } // We are swapping the select operands, so swap the metadata too. - Sel.setTrueValue(FVal); - Sel.setFalseValue(TVal); + Sel.swapValues(); Sel.swapProfMetadata(); return &Sel; } +static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *ReplaceOp, + const SimplifyQuery &Q) { + // If this is a binary operator, try to simplify it with the replaced op + // because we know Op and ReplaceOp are equivalant. + // For example: V = X + 1, Op = X, ReplaceOp = 42 + // Simplifies as: add(42, 1) --> 43 + if (auto *BO = dyn_cast(V)) { + if (BO->getOperand(0) == Op) + return SimplifyBinOp(BO->getOpcode(), ReplaceOp, BO->getOperand(1), Q); + if (BO->getOperand(1) == Op) + return SimplifyBinOp(BO->getOpcode(), BO->getOperand(0), ReplaceOp, Q); + } + + return nullptr; +} + +/// If we have a select with an equality comparison, then we know the value in +/// one of the arms of the select. See if substituting this value into an arm +/// and simplifying the result yields the same value as the other arm. +/// +/// To make this transform safe, we must drop poison-generating flags +/// (nsw, etc) if we simplified to a binop because the select may be guarding +/// that poison from propagating. If the existing binop already had no +/// poison-generating flags, then this transform can be done by instsimplify. +/// +/// Consider: +/// %cmp = icmp eq i32 %x, 2147483647 +/// %add = add nsw i32 %x, 1 +/// %sel = select i1 %cmp, i32 -2147483648, i32 %add +/// +/// We can't replace %sel with %add unless we strip away the flags. +/// TODO: Wrapping flags could be preserved in some cases with better analysis. +static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, + const SimplifyQuery &Q) { + if (!Cmp.isEquality()) + return nullptr; + + // Canonicalize the pattern to ICMP_EQ by swapping the select operands. + Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); + if (Cmp.getPredicate() == ICmpInst::ICMP_NE) + std::swap(TrueVal, FalseVal); + + // Try each equivalence substitution possibility. + // We have an 'EQ' comparison, so the select's false value will propagate. + // Example: + // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 + // (X == 42) ? (X + 1) : 43 --> (X == 42) ? (42 + 1) : 43 --> 43 + Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); + if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q) == TrueVal || + simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q) == TrueVal || + simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q) == FalseVal || + simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q) == FalseVal) { + if (auto *FalseInst = dyn_cast(FalseVal)) + FalseInst->dropPoisonGeneratingFlags(); + return FalseVal; + } + return nullptr; +} + +// See if this is a pattern like: +// %old_cmp1 = icmp slt i32 %x, C2 +// %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high +// %old_x_offseted = add i32 %x, C1 +// %old_cmp0 = icmp ult i32 %old_x_offseted, C0 +// %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement +// This can be rewritten as more canonical pattern: +// %new_cmp1 = icmp slt i32 %x, -C1 +// %new_cmp2 = icmp sge i32 %x, C0-C1 +// %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x +// %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low +// Iff -C1 s<= C2 s<= C0-C1 +// Also ULT predicate can also be UGT iff C0 != -1 (+invert result) +// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.) +static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, + InstCombiner::BuilderTy &Builder) { + Value *X = Sel0.getTrueValue(); + Value *Sel1 = Sel0.getFalseValue(); + + // First match the condition of the outermost select. + // Said condition must be one-use. + if (!Cmp0.hasOneUse()) + return nullptr; + Value *Cmp00 = Cmp0.getOperand(0); + Constant *C0; + if (!match(Cmp0.getOperand(1), + m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0)))) + return nullptr; + // Canonicalize Cmp0 into the form we expect. + // FIXME: we shouldn't care about lanes that are 'undef' in the end? + switch (Cmp0.getPredicate()) { + case ICmpInst::Predicate::ICMP_ULT: + break; // Great! + case ICmpInst::Predicate::ICMP_ULE: + // We'd have to increment C0 by one, and for that it must not have all-ones + // element, but then it would have been canonicalized to 'ult' before + // we get here. So we can't do anything useful with 'ule'. + return nullptr; + case ICmpInst::Predicate::ICMP_UGT: + // We want to canonicalize it to 'ult', so we'll need to increment C0, + // which again means it must not have any all-ones elements. + if (!match(C0, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE, + APInt::getAllOnesValue( + C0->getType()->getScalarSizeInBits())))) + return nullptr; // Can't do, have all-ones element[s]. + C0 = AddOne(C0); + std::swap(X, Sel1); + break; + case ICmpInst::Predicate::ICMP_UGE: + // The only way we'd get this predicate if this `icmp` has extra uses, + // but then we won't be able to do this fold. + return nullptr; + default: + return nullptr; // Unknown predicate. + } + + // Now that we've canonicalized the ICmp, we know the X we expect; + // the select in other hand should be one-use. + if (!Sel1->hasOneUse()) + return nullptr; + + // We now can finish matching the condition of the outermost select: + // it should either be the X itself, or an addition of some constant to X. + Constant *C1; + if (Cmp00 == X) + C1 = ConstantInt::getNullValue(Sel0.getType()); + else if (!match(Cmp00, + m_Add(m_Specific(X), + m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1))))) + return nullptr; + + Value *Cmp1; + ICmpInst::Predicate Pred1; + Constant *C2; + Value *ReplacementLow, *ReplacementHigh; + if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow), + m_Value(ReplacementHigh))) || + !match(Cmp1, + m_ICmp(Pred1, m_Specific(X), + m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2))))) + return nullptr; + + if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse())) + return nullptr; // Not enough one-use instructions for the fold. + // FIXME: this restriction could be relaxed if Cmp1 can be reused as one of + // two comparisons we'll need to build. + + // Canonicalize Cmp1 into the form we expect. + // FIXME: we shouldn't care about lanes that are 'undef' in the end? + switch (Pred1) { + case ICmpInst::Predicate::ICMP_SLT: + break; + case ICmpInst::Predicate::ICMP_SLE: + // We'd have to increment C2 by one, and for that it must not have signed + // max element, but then it would have been canonicalized to 'slt' before + // we get here. So we can't do anything useful with 'sle'. + return nullptr; + case ICmpInst::Predicate::ICMP_SGT: + // We want to canonicalize it to 'slt', so we'll need to increment C2, + // which again means it must not have any signed max elements. + if (!match(C2, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE, + APInt::getSignedMaxValue( + C2->getType()->getScalarSizeInBits())))) + return nullptr; // Can't do, have signed max element[s]. + C2 = AddOne(C2); + LLVM_FALLTHROUGH; + case ICmpInst::Predicate::ICMP_SGE: + // Also non-canonical, but here we don't need to change C2, + // so we don't have any restrictions on C2, so we can just handle it. + std::swap(ReplacementLow, ReplacementHigh); + break; + default: + return nullptr; // Unknown predicate. + } + + // The thresholds of this clamp-like pattern. + auto *ThresholdLowIncl = ConstantExpr::getNeg(C1); + auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1); + + // The fold has a precondition 1: C2 s>= ThresholdLow + auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2, + ThresholdLowIncl); + if (!match(Precond1, m_One())) + return nullptr; + // The fold has a precondition 2: C2 s<= ThresholdHigh + auto *Precond2 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SLE, C2, + ThresholdHighExcl); + if (!match(Precond2, m_One())) + return nullptr; + + // All good, finally emit the new pattern. + Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl); + Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl); + Value *MaybeReplacedLow = + Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X); + Instruction *MaybeReplacedHigh = + SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow); + + return MaybeReplacedHigh; +} + +// If we have +// %cmp = icmp [canonical predicate] i32 %x, C0 +// %r = select i1 %cmp, i32 %y, i32 C1 +// Where C0 != C1 and %x may be different from %y, see if the constant that we +// will have if we flip the strictness of the predicate (i.e. without changing +// the result) is identical to the C1 in select. If it matches we can change +// original comparison to one with swapped predicate, reuse the constant, +// and swap the hands of select. +static Instruction * +tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, + InstCombiner::BuilderTy &Builder) { + ICmpInst::Predicate Pred; + Value *X; + Constant *C0; + if (!match(&Cmp, m_OneUse(m_ICmp( + Pred, m_Value(X), + m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0)))))) + return nullptr; + + // If comparison predicate is non-relational, we won't be able to do anything. + if (ICmpInst::isEquality(Pred)) + return nullptr; + + // If comparison predicate is non-canonical, then we certainly won't be able + // to make it canonical; canonicalizeCmpWithConstant() already tried. + if (!isCanonicalPredicate(Pred)) + return nullptr; + + // If the [input] type of comparison and select type are different, lets abort + // for now. We could try to compare constants with trunc/[zs]ext though. + if (C0->getType() != Sel.getType()) + return nullptr; + + // FIXME: are there any magic icmp predicate+constant pairs we must not touch? + + Value *SelVal0, *SelVal1; // We do not care which one is from where. + match(&Sel, m_Select(m_Value(), m_Value(SelVal0), m_Value(SelVal1))); + // At least one of these values we are selecting between must be a constant + // else we'll never succeed. + if (!match(SelVal0, m_AnyIntegralConstant()) && + !match(SelVal1, m_AnyIntegralConstant())) + return nullptr; + + // Does this constant C match any of the `select` values? + auto MatchesSelectValue = [SelVal0, SelVal1](Constant *C) { + return C->isElementWiseEqual(SelVal0) || C->isElementWiseEqual(SelVal1); + }; + + // If C0 *already* matches true/false value of select, we are done. + if (MatchesSelectValue(C0)) + return nullptr; + + // Check the constant we'd have with flipped-strictness predicate. + auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, C0); + if (!FlippedStrictness) + return nullptr; + + // If said constant doesn't match either, then there is no hope, + if (!MatchesSelectValue(FlippedStrictness->second)) + return nullptr; + + // It matched! Lets insert the new comparison just before select. + InstCombiner::BuilderTy::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(&Sel); + + Pred = ICmpInst::getSwappedPredicate(Pred); // Yes, swapped. + Value *NewCmp = Builder.CreateICmp(Pred, X, FlippedStrictness->second, + Cmp.getName() + ".inv"); + Sel.setCondition(NewCmp); + Sel.swapValues(); + Sel.swapProfMetadata(); + + return &Sel; +} + /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); + if (Value *V = foldSelectValueEquivalence(SI, *ICI, SQ)) + return replaceInstUsesWith(SI, V); if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder)) return NewSel; @@ -1074,12 +1384,21 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, Builder)) return NewAbs; + if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder)) + return NewAbs; + + if (Instruction *NewSel = + tryToReuseConstantFromSelectInComparison(SI, *ICI, Builder)) + return NewSel; + bool Changed = adjustMinMax(SI, *ICI); if (Value *V = foldSelectICmpAnd(SI, ICI, Builder)) return replaceInstUsesWith(SI, V); // NOTE: if we wanted to, this is where to detect integer MIN/MAX + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); ICmpInst::Predicate Pred = ICI->getPredicate(); Value *CmpLHS = ICI->getOperand(0); Value *CmpRHS = ICI->getOperand(1); @@ -1149,6 +1468,9 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder)) return V; + if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder)) + return V; + if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); @@ -1253,6 +1575,16 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, } } + // max(max(A, B), min(A, B)) --> max(A, B) + // min(min(A, B), max(A, B)) --> min(A, B) + // TODO: This could be done in instsimplify. + if (SPF1 == SPF2 && + ((SPF1 == SPF_UMIN && match(C, m_c_UMax(m_Specific(A), m_Specific(B)))) || + (SPF1 == SPF_SMIN && match(C, m_c_SMax(m_Specific(A), m_Specific(B)))) || + (SPF1 == SPF_UMAX && match(C, m_c_UMin(m_Specific(A), m_Specific(B)))) || + (SPF1 == SPF_SMAX && match(C, m_c_SMin(m_Specific(A), m_Specific(B)))))) + return replaceInstUsesWith(Outer, Inner); + // ABS(ABS(X)) -> ABS(X) // NABS(NABS(X)) -> NABS(X) // TODO: This could be done in instsimplify. @@ -1280,7 +1612,7 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, return true; } - if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) { + if (isFreeToInvert(V, !V->hasNUsesOrMore(3))) { NotV = nullptr; return true; } @@ -1492,6 +1824,30 @@ static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { ConstantVector::get(Mask)); } +/// If we have a select of vectors with a scalar condition, try to convert that +/// to a vector select by splatting the condition. A splat may get folded with +/// other operations in IR and having all operands of a select be vector types +/// is likely better for vector codegen. +static Instruction *canonicalizeScalarSelectOfVecs( + SelectInst &Sel, InstCombiner::BuilderTy &Builder) { + Type *Ty = Sel.getType(); + if (!Ty->isVectorTy()) + return nullptr; + + // We can replace a single-use extract with constant index. + Value *Cond = Sel.getCondition(); + if (!match(Cond, m_OneUse(m_ExtractElement(m_Value(), m_ConstantInt())))) + return nullptr; + + // select (extelt V, Index), T, F --> select (splat V, Index), T, F + // Splatting the extracted condition reduces code (we could directly create a + // splat shuffle of the source vector to eliminate the intermediate step). + unsigned NumElts = Ty->getVectorNumElements(); + Value *SplatCond = Builder.CreateVectorSplat(NumElts, Cond); + Sel.setCondition(SplatCond); + return &Sel; +} + /// Reuse bitcasted operands between a compare and select: /// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) --> /// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D)) @@ -1648,6 +2004,71 @@ static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X, return nullptr; } +/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. +Instruction *InstCombiner::matchSAddSubSat(SelectInst &MinMax1) { + Type *Ty = MinMax1.getType(); + + // We are looking for a tree of: + // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B)))) + // Where the min and max could be reversed + Instruction *MinMax2; + BinaryOperator *AddSub; + const APInt *MinValue, *MaxValue; + if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) { + if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue)))) + return nullptr; + } else if (match(&MinMax1, + m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) { + if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue)))) + return nullptr; + } else + return nullptr; + + // Check that the constants clamp a saturate, and that the new type would be + // sensible to convert to. + if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1) + return nullptr; + // In what bitwidth can this be treated as saturating arithmetics? + unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1; + // FIXME: This isn't quite right for vectors, but using the scalar type is a + // good first approximation for what should be done there. + if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth)) + return nullptr; + + // Also make sure that the number of uses is as expected. The "3"s are for the + // the two items of min/max (the compare and the select). + if (MinMax2->hasNUsesOrMore(3) || AddSub->hasNUsesOrMore(3)) + return nullptr; + + // Create the new type (which can be a vector type) + Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); + // Match the two extends from the add/sub + Value *A, *B; + if(!match(AddSub, m_BinOp(m_SExt(m_Value(A)), m_SExt(m_Value(B))))) + return nullptr; + // And check the incoming values are of a type smaller than or equal to the + // size of the saturation. Otherwise the higher bits can cause different + // results. + if (A->getType()->getScalarSizeInBits() > NewBitWidth || + B->getType()->getScalarSizeInBits() > NewBitWidth) + return nullptr; + + Intrinsic::ID IntrinsicID; + if (AddSub->getOpcode() == Instruction::Add) + IntrinsicID = Intrinsic::sadd_sat; + else if (AddSub->getOpcode() == Instruction::Sub) + IntrinsicID = Intrinsic::ssub_sat; + else + return nullptr; + + // Finally create and return the sat intrinsic, truncated to the new type + Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); + Value *AT = Builder.CreateSExt(A, NewTy); + Value *BT = Builder.CreateSExt(B, NewTy); + Value *Sat = Builder.CreateCall(F, {AT, BT}); + return CastInst::Create(Instruction::SExt, Sat, Ty); +} + /// Reduce a sequence of min/max with a common operand. static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, Value *RHS, @@ -1788,6 +2209,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *I = canonicalizeSelectToShuffle(SI)) return I; + if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, Builder)) + return I; + // Canonicalize a one-use integer compare with a non-canonical predicate by // inverting the predicate and swapping the select operands. This matches a // compare canonicalization for conditional branches. @@ -2013,16 +2437,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { (LHS->getType()->isFPOrFPVectorTy() && ((CmpLHS != LHS && CmpLHS != RHS) || (CmpRHS != LHS && CmpRHS != RHS)))) { - CmpInst::Predicate Pred = getMinMaxPred(SPF, SPR.Ordered); + CmpInst::Predicate MinMaxPred = getMinMaxPred(SPF, SPR.Ordered); Value *Cmp; - if (CmpInst::isIntPredicate(Pred)) { - Cmp = Builder.CreateICmp(Pred, LHS, RHS); + if (CmpInst::isIntPredicate(MinMaxPred)) { + Cmp = Builder.CreateICmp(MinMaxPred, LHS, RHS); } else { IRBuilder<>::FastMathFlagGuard FMFG(Builder); - auto FMF = cast(SI.getCondition())->getFastMathFlags(); + auto FMF = + cast(SI.getCondition())->getFastMathFlags(); Builder.setFastMathFlags(FMF); - Cmp = Builder.CreateFCmp(Pred, LHS, RHS); + Cmp = Builder.CreateFCmp(MinMaxPred, LHS, RHS); } Value *NewSI = Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI); @@ -2040,9 +2465,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * { Value *A; if (match(X, m_Not(m_Value(A))) && !X->hasNUsesOrMore(3) && - !IsFreeToInvert(A, A->hasOneUse()) && + !isFreeToInvert(A, A->hasOneUse()) && // Passing false to only consider m_Not and constants. - IsFreeToInvert(Y, false)) { + isFreeToInvert(Y, false)) { Value *B = Builder.CreateNot(Y); Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF), A, B); @@ -2070,6 +2495,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder)) return I; + if (Instruction *I = matchSAddSubSat(SI)) + return I; } } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index c821292400cd..64294838644f 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -25,50 +25,275 @@ using namespace PatternMatch; // we should rewrite it as // x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) // This is valid for any shift, but they must be identical. -static Instruction * -reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, - const SimplifyQuery &SQ) { - // Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1 - Value *X, *ShAmt1, *ShAmt0; +// +// AnalyzeForSignBitExtraction indicates that we will only analyze whether this +// pattern has any 2 right-shifts that sum to 1 less than original bit width. +Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts( + BinaryOperator *Sh0, const SimplifyQuery &SQ, + bool AnalyzeForSignBitExtraction) { + // Look for a shift of some instruction, ignore zext of shift amount if any. + Instruction *Sh0Op0; + Value *ShAmt0; + if (!match(Sh0, + m_Shift(m_Instruction(Sh0Op0), m_ZExtOrSelf(m_Value(ShAmt0))))) + return nullptr; + + // If there is a truncation between the two shifts, we must make note of it + // and look through it. The truncation imposes additional constraints on the + // transform. Instruction *Sh1; - if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)), - m_Instruction(Sh1)), - m_Value(ShAmt0)))) + Value *Trunc = nullptr; + match(Sh0Op0, + m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1)), m_Value(Trunc)), + m_Instruction(Sh1))); + + // Inner shift: (x shiftopcode ShAmt1) + // Like with other shift, ignore zext of shift amount if any. + Value *X, *ShAmt1; + if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1))))) + return nullptr; + + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now.. + if (ShAmt0->getType() != ShAmt1->getType()) + return nullptr; + + // We are only looking for signbit extraction if we have two right shifts. + bool HadTwoRightShifts = match(Sh0, m_Shr(m_Value(), m_Value())) && + match(Sh1, m_Shr(m_Value(), m_Value())); + // ... and if it's not two right-shifts, we know the answer already. + if (AnalyzeForSignBitExtraction && !HadTwoRightShifts) return nullptr; - // The shift opcodes must be identical. + // The shift opcodes must be identical, unless we are just checking whether + // this pattern can be interpreted as a sign-bit-extraction. Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode(); - if (ShiftOpcode != Sh1->getOpcode()) + bool IdenticalShOpcodes = Sh0->getOpcode() == Sh1->getOpcode(); + if (!IdenticalShOpcodes && !AnalyzeForSignBitExtraction) return nullptr; + + // If we saw truncation, we'll need to produce extra instruction, + // and for that one of the operands of the shift must be one-use, + // unless of course we don't actually plan to produce any instructions here. + if (Trunc && !AnalyzeForSignBitExtraction && + !match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + return nullptr; + // Can we fold (ShAmt0+ShAmt1) ? - Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1, - SQ.getWithInstruction(Sh0)); + auto *NewShAmt = dyn_cast_or_null( + SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false, + SQ.getWithInstruction(Sh0))); if (!NewShAmt) return nullptr; // Did not simplify. - // Is the new shift amount smaller than the bit width? - // FIXME: could also rely on ConstantRange. - unsigned BitWidth = X->getType()->getScalarSizeInBits(); + unsigned NewShAmtBitWidth = NewShAmt->getType()->getScalarSizeInBits(); + unsigned XBitWidth = X->getType()->getScalarSizeInBits(); + // Is the new shift amount smaller than the bit width of inner/new shift? if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, - APInt(BitWidth, BitWidth)))) - return nullptr; + APInt(NewShAmtBitWidth, XBitWidth)))) + return nullptr; // FIXME: could perform constant-folding. + + // If there was a truncation, and we have a right-shift, we can only fold if + // we are left with the original sign bit. Likewise, if we were just checking + // that this is a sighbit extraction, this is the place to check it. + // FIXME: zero shift amount is also legal here, but we can't *easily* check + // more than one predicate so it's not really worth it. + if (HadTwoRightShifts && (Trunc || AnalyzeForSignBitExtraction)) { + // If it's not a sign bit extraction, then we're done. + if (!match(NewShAmt, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(NewShAmtBitWidth, XBitWidth - 1)))) + return nullptr; + // If it is, and that was the question, return the base value. + if (AnalyzeForSignBitExtraction) + return X; + } + + assert(IdenticalShOpcodes && "Should not get here with different shifts."); + // All good, we can do this fold. + NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType()); + BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt); - // If both of the original shifts had the same flag set, preserve the flag. - if (ShiftOpcode == Instruction::BinaryOps::Shl) { - NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() && - Sh1->hasNoUnsignedWrap()); - NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() && - Sh1->hasNoSignedWrap()); - } else { - NewShift->setIsExact(Sh0->isExact() && Sh1->isExact()); + + // The flags can only be propagated if there wasn't a trunc. + if (!Trunc) { + // If the pattern did not involve trunc, and both of the original shifts + // had the same flag set, preserve the flag. + if (ShiftOpcode == Instruction::BinaryOps::Shl) { + NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() && + Sh1->hasNoUnsignedWrap()); + NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() && + Sh1->hasNoSignedWrap()); + } else { + NewShift->setIsExact(Sh0->isExact() && Sh1->isExact()); + } + } + + Instruction *Ret = NewShift; + if (Trunc) { + Builder.Insert(NewShift); + Ret = CastInst::Create(Instruction::Trunc, NewShift, Sh0->getType()); + } + + return Ret; +} + +// Try to replace `undef` constants in C with Replacement. +static Constant *replaceUndefsWith(Constant *C, Constant *Replacement) { + if (C && match(C, m_Undef())) + return Replacement; + + if (auto *CV = dyn_cast(C)) { + llvm::SmallVector NewOps(CV->getNumOperands()); + for (unsigned i = 0, NumElts = NewOps.size(); i != NumElts; ++i) { + Constant *EltC = CV->getOperand(i); + NewOps[i] = EltC && match(EltC, m_Undef()) ? Replacement : EltC; + } + return ConstantVector::get(NewOps); + } + + // Don't know how to deal with this constant. + return C; +} + +// If we have some pattern that leaves only some low bits set, and then performs +// left-shift of those bits, if none of the bits that are left after the final +// shift are modified by the mask, we can omit the mask. +// +// There are many variants to this pattern: +// a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt +// b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt +// c) (x & (-1 >> MaskShAmt)) << ShiftShAmt +// d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt +// e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt +// f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt +// All these patterns can be simplified to just: +// x << ShiftShAmt +// iff: +// a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x) +// c,d,e,f) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt) +static Instruction * +dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, + const SimplifyQuery &Q, + InstCombiner::BuilderTy &Builder) { + assert(OuterShift->getOpcode() == Instruction::BinaryOps::Shl && + "The input must be 'shl'!"); + + Value *Masked, *ShiftShAmt; + match(OuterShift, m_Shift(m_Value(Masked), m_Value(ShiftShAmt))); + + Type *NarrowestTy = OuterShift->getType(); + Type *WidestTy = Masked->getType(); + // The mask must be computed in a type twice as wide to ensure + // that no bits are lost if the sum-of-shifts is wider than the base type. + Type *ExtendedTy = WidestTy->getExtendedType(); + + Value *MaskShAmt; + + // ((1 << MaskShAmt) - 1) + auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes()); + // (~(-1 << maskNbits)) + auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes()); + // (-1 >> MaskShAmt) + auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt)); + // ((-1 << MaskShAmt) >> MaskShAmt) + auto MaskD = + m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt)); + + Value *X; + Constant *NewMask; + + if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) { + // Can we simplify (MaskShAmt+ShiftShAmt) ? + auto *SumOfShAmts = dyn_cast_or_null(SimplifyAddInst( + MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); + if (!SumOfShAmts) + return nullptr; // Did not simplify. + // In this pattern SumOfShAmts correlates with the number of low bits + // that shall remain in the root value (OuterShift). + + // An extend of an undef value becomes zero because the high bits are never + // completely unknown. Replace the the `undef` shift amounts with final + // shift bitwidth to ensure that the value remains undef when creating the + // subsequent shift op. + SumOfShAmts = replaceUndefsWith( + SumOfShAmts, ConstantInt::get(SumOfShAmts->getType()->getScalarType(), + ExtendedTy->getScalarSizeInBits())); + auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy); + // And compute the mask as usual: ~(-1 << (SumOfShAmts)) + auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy); + auto *ExtendedInvertedMask = + ConstantExpr::getShl(ExtendedAllOnes, ExtendedSumOfShAmts); + NewMask = ConstantExpr::getNot(ExtendedInvertedMask); + } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) || + match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)), + m_Deferred(MaskShAmt)))) { + // Can we simplify (ShiftShAmt-MaskShAmt) ? + auto *ShAmtsDiff = dyn_cast_or_null(SimplifySubInst( + ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); + if (!ShAmtsDiff) + return nullptr; // Did not simplify. + // In this pattern ShAmtsDiff correlates with the number of high bits that + // shall be unset in the root value (OuterShift). + + // An extend of an undef value becomes zero because the high bits are never + // completely unknown. Replace the the `undef` shift amounts with negated + // bitwidth of innermost shift to ensure that the value remains undef when + // creating the subsequent shift op. + unsigned WidestTyBitWidth = WidestTy->getScalarSizeInBits(); + ShAmtsDiff = replaceUndefsWith( + ShAmtsDiff, ConstantInt::get(ShAmtsDiff->getType()->getScalarType(), + -WidestTyBitWidth)); + auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt( + ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff->getType(), + WidestTyBitWidth, + /*isSigned=*/false), + ShAmtsDiff), + ExtendedTy); + // And compute the mask as usual: (-1 l>> (NumHighBitsToClear)) + auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy); + NewMask = + ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear); + } else + return nullptr; // Don't know anything about this pattern. + + NewMask = ConstantExpr::getTrunc(NewMask, NarrowestTy); + + // Does this mask has any unset bits? If not then we can just not apply it. + bool NeedMask = !match(NewMask, m_AllOnes()); + + // If we need to apply a mask, there are several more restrictions we have. + if (NeedMask) { + // The old masking instruction must go away. + if (!Masked->hasOneUse()) + return nullptr; + // The original "masking" instruction must not have been`ashr`. + if (match(Masked, m_AShr(m_Value(), m_Value()))) + return nullptr; } - return NewShift; + + // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits. + auto *NewShift = BinaryOperator::Create(OuterShift->getOpcode(), X, + OuterShift->getOperand(1)); + + if (!NeedMask) + return NewShift; + + Builder.Insert(NewShift); + return BinaryOperator::Create(Instruction::And, NewShift, NewMask); } Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); + // If the shift amount is a one-use `sext`, we can demote it to `zext`. + Value *Y; + if (match(Op1, m_OneUse(m_SExt(m_Value(Y))))) { + Value *NewExt = Builder.CreateZExt(Y, I.getType(), Op1->getName()); + return BinaryOperator::Create(I.getOpcode(), Op0, NewExt); + } + // See if we can fold away this shift. if (SimplifyDemandedInstructionBits(I)) return &I; @@ -83,8 +308,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; - if (Instruction *NewShift = - reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ)) + if (auto *NewShift = cast_or_null( + reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))) return NewShift; // (C1 shift (A add C2)) -> (C1 shift C2) shift A) @@ -618,9 +843,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, } Instruction *InstCombiner::visitShl(BinaryOperator &I) { + const SimplifyQuery Q = SQ.getWithInstruction(&I); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), - I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), - SQ.getWithInstruction(&I))) + I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), Q)) return replaceInstUsesWith(I, V); if (Instruction *X = foldVectorBinop(I)) @@ -629,6 +855,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { if (Instruction *V = commonShiftTransforms(I)) return V; + if (Instruction *V = dropRedundantMaskingOfLeftShiftInput(&I, Q, Builder)) + return V; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); unsigned BitWidth = Ty->getScalarSizeInBits(); @@ -636,12 +865,11 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { const APInt *ShAmtAPInt; if (match(Op1, m_APInt(ShAmtAPInt))) { unsigned ShAmt = ShAmtAPInt->getZExtValue(); - unsigned BitWidth = Ty->getScalarSizeInBits(); // shl (zext X), ShAmt --> zext (shl X, ShAmt) // This is only valid if X would have zeros shifted out. Value *X; - if (match(Op0, m_ZExt(m_Value(X)))) { + if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) { unsigned SrcWidth = X->getType()->getScalarSizeInBits(); if (ShAmt < SrcWidth && MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I)) @@ -719,6 +947,12 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { // (X * C2) << C1 --> X * (C2 << C1) if (match(Op0, m_Mul(m_Value(X), m_Constant(C2)))) return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1)); + + // shl (zext i1 X), C1 --> select (X, 1 << C1, 0) + if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { + auto *NewC = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C1); + return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty)); + } } // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1 @@ -859,6 +1093,75 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return nullptr; } +Instruction * +InstCombiner::foldVariableSignZeroExtensionOfVariableHighBitExtract( + BinaryOperator &OldAShr) { + assert(OldAShr.getOpcode() == Instruction::AShr && + "Must be called with arithmetic right-shift instruction only."); + + // Check that constant C is a splat of the element-wise bitwidth of V. + auto BitWidthSplat = [](Constant *C, Value *V) { + return match( + C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(C->getType()->getScalarSizeInBits(), + V->getType()->getScalarSizeInBits()))); + }; + + // It should look like variable-length sign-extension on the outside: + // (Val << (bitwidth(Val)-Nbits)) a>> (bitwidth(Val)-Nbits) + Value *NBits; + Instruction *MaybeTrunc; + Constant *C1, *C2; + if (!match(&OldAShr, + m_AShr(m_Shl(m_Instruction(MaybeTrunc), + m_ZExtOrSelf(m_Sub(m_Constant(C1), + m_ZExtOrSelf(m_Value(NBits))))), + m_ZExtOrSelf(m_Sub(m_Constant(C2), + m_ZExtOrSelf(m_Deferred(NBits)))))) || + !BitWidthSplat(C1, &OldAShr) || !BitWidthSplat(C2, &OldAShr)) + return nullptr; + + // There may or may not be a truncation after outer two shifts. + Instruction *HighBitExtract; + match(MaybeTrunc, m_TruncOrSelf(m_Instruction(HighBitExtract))); + bool HadTrunc = MaybeTrunc != HighBitExtract; + + // And finally, the innermost part of the pattern must be a right-shift. + Value *X, *NumLowBitsToSkip; + if (!match(HighBitExtract, m_Shr(m_Value(X), m_Value(NumLowBitsToSkip)))) + return nullptr; + + // Said right-shift must extract high NBits bits - C0 must be it's bitwidth. + Constant *C0; + if (!match(NumLowBitsToSkip, + m_ZExtOrSelf( + m_Sub(m_Constant(C0), m_ZExtOrSelf(m_Specific(NBits))))) || + !BitWidthSplat(C0, HighBitExtract)) + return nullptr; + + // Since the NBits is identical for all shifts, if the outermost and + // innermost shifts are identical, then outermost shifts are redundant. + // If we had truncation, do keep it though. + if (HighBitExtract->getOpcode() == OldAShr.getOpcode()) + return replaceInstUsesWith(OldAShr, MaybeTrunc); + + // Else, if there was a truncation, then we need to ensure that one + // instruction will go away. + if (HadTrunc && !match(&OldAShr, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + return nullptr; + + // Finally, bypass two innermost shifts, and perform the outermost shift on + // the operands of the innermost shift. + Instruction *NewAShr = + BinaryOperator::Create(OldAShr.getOpcode(), X, NumLowBitsToSkip); + NewAShr->copyIRFlags(HighBitExtract); // We can preserve 'exact'-ness. + if (!HadTrunc) + return NewAShr; + + Builder.Insert(NewAShr); + return TruncInst::CreateTruncOrBitCast(NewAShr, OldAShr.getType()); +} + Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), SQ.getWithInstruction(&I))) @@ -933,6 +1236,9 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } } + if (Instruction *R = foldVariableSignZeroExtensionOfVariableHighBitExtract(I)) + return R; + // See if we can turn a signed shr into an unsigned shr. if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I)) return BinaryOperator::CreateLShr(Op0, Op1); diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index e0d85c4b49ae..d30ab8001897 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -971,6 +971,13 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, APInt DemandedElts, int DMaskIdx) { + + // FIXME: Allow v3i16/v3f16 in buffer intrinsics when the types are fully supported. + if (DMaskIdx < 0 && + II->getType()->getScalarSizeInBits() != 32 && + DemandedElts.getActiveBits() == 3) + return nullptr; + unsigned VWidth = II->getType()->getVectorNumElements(); if (VWidth == 1) return nullptr; @@ -1067,16 +1074,22 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, } /// The specified value produces a vector with any number of elements. +/// This method analyzes which elements of the operand are undef and returns +/// that information in UndefElts. +/// /// DemandedElts contains the set of elements that are actually used by the -/// caller. This method analyzes which elements of the operand are undef and -/// returns that information in UndefElts. +/// caller, and by default (AllowMultipleUsers equals false) the value is +/// simplified only if it has a single caller. If AllowMultipleUsers is set +/// to true, DemandedElts refers to the union of sets of elements that are +/// used by all callers. /// /// If the information about demanded elements can be used to simplify the /// operation, the operation is simplified, then the resultant value is /// returned. This returns null if no change was made. Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, - unsigned Depth) { + unsigned Depth, + bool AllowMultipleUsers) { unsigned VWidth = V->getType()->getVectorNumElements(); APInt EltMask(APInt::getAllOnesValue(VWidth)); assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); @@ -1130,19 +1143,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (Depth == 10) return nullptr; - // If multiple users are using the root value, proceed with - // simplification conservatively assuming that all elements - // are needed. - if (!V->hasOneUse()) { - // Quit if we find multiple users of a non-root value though. - // They'll be handled when it's their turn to be visited by - // the main instcombine process. - if (Depth != 0) - // TODO: Just compute the UndefElts information recursively. - return nullptr; + if (!AllowMultipleUsers) { + // If multiple users are using the root value, proceed with + // simplification conservatively assuming that all elements + // are needed. + if (!V->hasOneUse()) { + // Quit if we find multiple users of a non-root value though. + // They'll be handled when it's their turn to be visited by + // the main instcombine process. + if (Depth != 0) + // TODO: Just compute the UndefElts information recursively. + return nullptr; - // Conservatively assume that all elements are needed. - DemandedElts = EltMask; + // Conservatively assume that all elements are needed. + DemandedElts = EltMask; + } } Instruction *I = dyn_cast(V); @@ -1674,8 +1689,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::amdgcn_buffer_load_format: case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: + case Intrinsic::amdgcn_raw_tbuffer_load: case Intrinsic::amdgcn_struct_buffer_load: case Intrinsic::amdgcn_struct_buffer_load_format: + case Intrinsic::amdgcn_struct_tbuffer_load: + case Intrinsic::amdgcn_tbuffer_load: return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts); default: { if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index dc9abdd7f47a..9c890748e5ab 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -253,6 +253,69 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, return nullptr; } +/// Find elements of V demanded by UserInstr. +static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { + unsigned VWidth = V->getType()->getVectorNumElements(); + + // Conservatively assume that all elements are needed. + APInt UsedElts(APInt::getAllOnesValue(VWidth)); + + switch (UserInstr->getOpcode()) { + case Instruction::ExtractElement: { + ExtractElementInst *EEI = cast(UserInstr); + assert(EEI->getVectorOperand() == V); + ConstantInt *EEIIndexC = dyn_cast(EEI->getIndexOperand()); + if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) { + UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue()); + } + break; + } + case Instruction::ShuffleVector: { + ShuffleVectorInst *Shuffle = cast(UserInstr); + unsigned MaskNumElts = UserInstr->getType()->getVectorNumElements(); + + UsedElts = APInt(VWidth, 0); + for (unsigned i = 0; i < MaskNumElts; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal == -1u || MaskVal >= 2 * VWidth) + continue; + if (Shuffle->getOperand(0) == V && (MaskVal < VWidth)) + UsedElts.setBit(MaskVal); + if (Shuffle->getOperand(1) == V && + ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth))) + UsedElts.setBit(MaskVal - VWidth); + } + break; + } + default: + break; + } + return UsedElts; +} + +/// Find union of elements of V demanded by all its users. +/// If it is known by querying findDemandedEltsBySingleUser that +/// no user demands an element of V, then the corresponding bit +/// remains unset in the returned value. +static APInt findDemandedEltsByAllUsers(Value *V) { + unsigned VWidth = V->getType()->getVectorNumElements(); + + APInt UnionUsedElts(VWidth, 0); + for (const Use &U : V->uses()) { + if (Instruction *I = dyn_cast(U.getUser())) { + UnionUsedElts |= findDemandedEltsBySingleUser(V, I); + } else { + UnionUsedElts = APInt::getAllOnesValue(VWidth); + break; + } + + if (UnionUsedElts.isAllOnesValue()) + break; + } + + return UnionUsedElts; +} + Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Value *SrcVec = EI.getVectorOperand(); Value *Index = EI.getIndexOperand(); @@ -271,19 +334,35 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { return nullptr; // This instruction only demands the single element from the input vector. - // If the input vector has a single use, simplify it based on this use - // property. - if (SrcVec->hasOneUse() && NumElts != 1) { - APInt UndefElts(NumElts, 0); - APInt DemandedElts(NumElts, 0); - DemandedElts.setBit(IndexC->getZExtValue()); - if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts, - UndefElts)) { - EI.setOperand(0, V); - return &EI; + if (NumElts != 1) { + // If the input vector has a single use, simplify it based on this use + // property. + if (SrcVec->hasOneUse()) { + APInt UndefElts(NumElts, 0); + APInt DemandedElts(NumElts, 0); + DemandedElts.setBit(IndexC->getZExtValue()); + if (Value *V = + SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) { + EI.setOperand(0, V); + return &EI; + } + } else { + // If the input vector has multiple uses, simplify it based on a union + // of all elements used. + APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); + if (!DemandedElts.isAllOnesValue()) { + APInt UndefElts(NumElts, 0); + if (Value *V = SimplifyDemandedVectorElts( + SrcVec, DemandedElts, UndefElts, 0 /* Depth */, + true /* AllowMultipleUsers */)) { + if (V != SrcVec) { + SrcVec->replaceAllUsesWith(V); + return &EI; + } + } + } } } - if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) return I; @@ -766,6 +845,55 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask); } +/// Try to fold an extract+insert element into an existing identity shuffle by +/// changing the shuffle's mask to include the index of this insert element. +static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) { + // Check if the vector operand of this insert is an identity shuffle. + auto *Shuf = dyn_cast(InsElt.getOperand(0)); + if (!Shuf || !isa(Shuf->getOperand(1)) || + !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding())) + return nullptr; + + // Check for a constant insertion index. + uint64_t IdxC; + if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) + return nullptr; + + // Check if this insert's scalar op is extracted from the identity shuffle's + // input vector. + Value *Scalar = InsElt.getOperand(1); + Value *X = Shuf->getOperand(0); + if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC)))) + return nullptr; + + // Replace the shuffle mask element at the index of this extract+insert with + // that same index value. + // For example: + // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask' + unsigned NumMaskElts = Shuf->getType()->getVectorNumElements(); + SmallVector NewMaskVec(NumMaskElts); + Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext()); + Constant *NewMaskEltC = ConstantInt::get(I32Ty, IdxC); + Constant *OldMask = Shuf->getMask(); + for (unsigned i = 0; i != NumMaskElts; ++i) { + if (i != IdxC) { + // All mask elements besides the inserted element remain the same. + NewMaskVec[i] = OldMask->getAggregateElement(i); + } else if (OldMask->getAggregateElement(i) == NewMaskEltC) { + // If the mask element was already set, there's nothing to do + // (demanded elements analysis may unset it later). + return nullptr; + } else { + assert(isa(OldMask->getAggregateElement(i)) && + "Unexpected shuffle mask element for identity shuffle"); + NewMaskVec[i] = NewMaskEltC; + } + } + + Constant *NewMask = ConstantVector::get(NewMaskVec); + return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask); +} + /// If we have an insertelement instruction feeding into another insertelement /// and the 2nd is inserting a constant into the vector, canonicalize that /// constant insertion before the insertion of a variable: @@ -987,6 +1115,9 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { if (Instruction *Splat = foldInsEltIntoSplat(IE)) return Splat; + if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE)) + return IdentityShuf; + return nullptr; } @@ -1009,17 +1140,23 @@ static bool canEvaluateShuffled(Value *V, ArrayRef Mask, if (Depth == 0) return false; switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + // Propagating an undefined shuffle mask element to integer div/rem is not + // allowed because those opcodes can create immediate undefined behavior + // from an undefined element in an operand. + if (llvm::any_of(Mask, [](int M){ return M == -1; })) + return false; + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: @@ -1040,9 +1177,7 @@ static bool canEvaluateShuffled(Value *V, ArrayRef Mask, case Instruction::FPExt: case Instruction::GetElementPtr: { // Bail out if we would create longer vector ops. We could allow creating - // longer vector ops, but that may result in more expensive codegen. We - // would also need to limit the transform to avoid undefined behavior for - // integer div/rem. + // longer vector ops, but that may result in more expensive codegen. Type *ITy = I->getType(); if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements()) return false; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 385f4926b845..ecb486c544e0 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -200,8 +200,8 @@ bool InstCombiner::shouldChangeType(Type *From, Type *To) const { // where both B and C should be ConstantInts, results in a constant that does // not overflow. This function only handles the Add and Sub opcodes. For // all other opcodes, the function conservatively returns false. -static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { - OverflowingBinaryOperator *OBO = dyn_cast(&I); +static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { + auto *OBO = dyn_cast(&I); if (!OBO || !OBO->hasNoSignedWrap()) return false; @@ -224,10 +224,15 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { } static bool hasNoUnsignedWrap(BinaryOperator &I) { - OverflowingBinaryOperator *OBO = dyn_cast(&I); + auto *OBO = dyn_cast(&I); return OBO && OBO->hasNoUnsignedWrap(); } +static bool hasNoSignedWrap(BinaryOperator &I) { + auto *OBO = dyn_cast(&I); + return OBO && OBO->hasNoSignedWrap(); +} + /// Conservatively clears subclassOptionalData after a reassociation or /// commutation. We preserve fast-math flags when applicable as they can be /// preserved. @@ -332,22 +337,21 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); - // Conservatively clear the optional flags, since they may not be - // preserved by the reassociation. bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0); - bool IsNSW = MaintainNoSignedWrap(I, B, C); + bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0); + // Conservatively clear all optional flags since they may not be + // preserved by the reassociation. Reset nsw/nuw based on the above + // analysis. ClearSubclassDataAfterReassociation(I); + // Note: this is only valid because SimplifyBinOp doesn't look at + // the operands to Op0. if (IsNUW) I.setHasNoUnsignedWrap(true); - if (IsNSW && - (!Op0 || (isa(Op0) && Op0->hasNoSignedWrap()))) { - // Note: this is only valid because SimplifyBinOp doesn't look at - // the operands to Op0. + if (IsNSW) I.setHasNoSignedWrap(true); - } Changed = true; ++NumReassoc; @@ -610,7 +614,6 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I, HasNUW &= ROBO->hasNoUnsignedWrap(); } - const APInt *CInt; if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) { // We can propagate 'nsw' if we know that @@ -620,6 +623,7 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I, // %Z = mul nsw i16 %X, C+1 // // iff C+1 isn't INT_MIN + const APInt *CInt; if (match(V, m_APInt(CInt))) { if (!CInt->isMinSignedValue()) BO->setHasNoSignedWrap(HasNSW); @@ -763,12 +767,16 @@ Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, if (match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))) && match(RHS, m_Select(m_Specific(A), m_Value(D), m_Value(E)))) { bool SelectsHaveOneUse = LHS->hasOneUse() && RHS->hasOneUse(); + + FastMathFlags FMF; BuilderTy::FastMathFlagGuard Guard(Builder); - if (isa(&I)) - Builder.setFastMathFlags(I.getFastMathFlags()); + if (isa(&I)) { + FMF = I.getFastMathFlags(); + Builder.setFastMathFlags(FMF); + } - Value *V1 = SimplifyBinOp(Opcode, C, E, SQ.getWithInstruction(&I)); - Value *V2 = SimplifyBinOp(Opcode, B, D, SQ.getWithInstruction(&I)); + Value *V1 = SimplifyBinOp(Opcode, C, E, FMF, SQ.getWithInstruction(&I)); + Value *V2 = SimplifyBinOp(Opcode, B, D, FMF, SQ.getWithInstruction(&I)); if (V1 && V2) SI = Builder.CreateSelect(A, V2, V1); else if (V2 && SelectsHaveOneUse) @@ -1659,7 +1667,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // to an index of zero, so replace it with zero if it is not zero already. Type *EltTy = GTI.getIndexedType(); if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0) - if (!isa(*I) || !cast(*I)->isNullValue()) { + if (!isa(*I) || !match(I->get(), m_Zero())) { *I = Constant::getNullValue(NewIndexType); MadeChange = true; } @@ -2549,9 +2557,7 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True Value *X = nullptr; - BasicBlock *TrueDest; - BasicBlock *FalseDest; - if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && + if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) && !isa(X)) { // Swap Destinations and condition... BI.setCondition(X); @@ -2569,8 +2575,8 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq. CmpInst::Predicate Pred; - if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest, - FalseDest)) && + if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), + m_BasicBlock(), m_BasicBlock())) && !isCanonicalPredicate(Pred)) { // Swap destinations and condition. CmpInst *Cond = cast(BI.getCondition()); @@ -3156,6 +3162,21 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { findDbgUsers(DbgUsers, I); for (auto *DII : reverse(DbgUsers)) { if (DII->getParent() == SrcBlock) { + if (isa(DII)) { + // A dbg.declare instruction should not be cloned, since there can only be + // one per variable fragment. It should be left in the original place since + // sunk instruction is not an alloca(otherwise we could not be here). + // But we need to update arguments of dbg.declare instruction, so that it + // would not point into sunk instruction. + if (!isa(I)) + continue; // dbg.declare points at something it shouldn't + + DII->setOperand( + 0, MetadataAsValue::get(I->getContext(), + ValueAsMetadata::get(I->getOperand(0)))); + continue; + } + // dbg.value is in the same basic block as the sunk inst, see if we can // salvage it. Clone a new copy of the instruction: on success we need // both salvaged and unsalvaged copies. @@ -3580,7 +3601,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { // Required analyses. auto AA = &getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); auto &ORE = getAnalysis().getORE(); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6821e214e921..d92ee11c2e1a 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -129,6 +129,8 @@ static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *const kAsanModuleCtorName = "asan.module_ctor"; static const char *const kAsanModuleDtorName = "asan.module_dtor"; static const uint64_t kAsanCtorAndDtorPriority = 1; +// On Emscripten, the system needs more than one priorities for constructors. +static const uint64_t kAsanEmscriptenCtorAndDtorPriority = 50; static const char *const kAsanReportErrorTemplate = "__asan_report_"; static const char *const kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *const kAsanUnregisterGlobalsName = @@ -191,6 +193,11 @@ static cl::opt ClRecover( cl::desc("Enable recovery mode (continue-after-error)."), cl::Hidden, cl::init(false)); +static cl::opt ClInsertVersionCheck( + "asan-guard-against-version-mismatch", + cl::desc("Guard against compiler/runtime version mismatch."), + cl::Hidden, cl::init(true)); + // This flag may need to be replaced with -f[no-]asan-reads. static cl::opt ClInstrumentReads("asan-instrument-reads", cl::desc("instrument read instructions"), @@ -530,6 +537,14 @@ static size_t RedzoneSizeForScale(int MappingScale) { return std::max(32U, 1U << MappingScale); } +static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { + if (TargetTriple.isOSEmscripten()) { + return kAsanEmscriptenCtorAndDtorPriority; + } else { + return kAsanCtorAndDtorPriority; + } +} + namespace { /// Module analysis for getting various metadata about the module. @@ -565,10 +580,10 @@ char ASanGlobalsMetadataWrapperPass::ID = 0; /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer { - AddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD, + AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false) - : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(GlobalsMD) { + : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) { this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; this->CompileKernel = ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel; @@ -677,7 +692,7 @@ private: FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset; InlineAsm *EmptyAsm; Value *LocalDynamicShadow = nullptr; - GlobalsMetadata GlobalsMD; + const GlobalsMetadata &GlobalsMD; DenseMap ProcessedAllocas; }; @@ -706,8 +721,8 @@ public: GlobalsMetadata &GlobalsMD = getAnalysis().getGlobalsMD(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); - AddressSanitizer ASan(*F.getParent(), GlobalsMD, CompileKernel, Recover, + &getAnalysis().getTLI(F); + AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover, UseAfterScope); return ASan.instrumentFunction(F, TLI); } @@ -720,10 +735,10 @@ private: class ModuleAddressSanitizer { public: - ModuleAddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD, + ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true, bool UseOdrIndicator = false) - : GlobalsMD(GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), + : GlobalsMD(*GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), // Enable aliases as they should have no downside with ODR indicators. UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias), UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator), @@ -783,7 +798,7 @@ private: } int GetAsanVersion(const Module &M) const; - GlobalsMetadata GlobalsMD; + const GlobalsMetadata &GlobalsMD; bool CompileKernel; bool Recover; bool UseGlobalsGC; @@ -830,7 +845,7 @@ public: bool runOnModule(Module &M) override { GlobalsMetadata &GlobalsMD = getAnalysis().getGlobalsMD(); - ModuleAddressSanitizer ASanModule(M, GlobalsMD, CompileKernel, Recover, + ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover, UseGlobalGC, UseOdrIndicator); return ASanModule.instrumentModule(M); } @@ -1033,7 +1048,7 @@ struct FunctionStackPoisoner : public InstVisitor { if (!II.isLifetimeStartOrEnd()) return; // Found lifetime intrinsic, add ASan instrumentation if necessary. - ConstantInt *Size = dyn_cast(II.getArgOperand(0)); + auto *Size = cast(II.getArgOperand(0)); // If size argument is undefined, don't do anything. if (Size->isMinusOne()) return; // Check that size doesn't saturate uint64_t and can @@ -1156,7 +1171,7 @@ PreservedAnalyses AddressSanitizerPass::run(Function &F, Module &M = *F.getParent(); if (auto *R = MAM.getCachedResult(M)) { const TargetLibraryInfo *TLI = &AM.getResult(F); - AddressSanitizer Sanitizer(M, *R, CompileKernel, Recover, UseAfterScope); + AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope); if (Sanitizer.instrumentFunction(F, TLI)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -1178,7 +1193,7 @@ ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel, PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M, AnalysisManager &AM) { GlobalsMetadata &GlobalsMD = AM.getResult(M); - ModuleAddressSanitizer Sanitizer(M, GlobalsMD, CompileKernel, Recover, + ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover, UseGlobalGC, UseOdrIndicator); if (Sanitizer.instrumentModule(M)) return PreservedAnalyses::none(); @@ -1331,7 +1346,7 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, unsigned *Alignment, Value **MaybeMask) { // Skip memory accesses inserted by another instrumentation. - if (I->getMetadata("nosanitize")) return nullptr; + if (I->hasMetadata("nosanitize")) return nullptr; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) @@ -1775,9 +1790,10 @@ void ModuleAddressSanitizer::createInitializerPoisonCalls( // Must have a function or null ptr. if (Function *F = dyn_cast(CS->getOperand(1))) { if (F->getName() == kAsanModuleCtorName) continue; - ConstantInt *Priority = dyn_cast(CS->getOperand(0)); + auto *Priority = cast(CS->getOperand(0)); // Don't instrument CTORs that will run before asan.module_ctor. - if (Priority->getLimitedValue() <= kAsanCtorAndDtorPriority) continue; + if (Priority->getLimitedValue() <= GetCtorAndDtorPriority(TargetTriple)) + continue; poisonOneInitializer(*F, ModuleName); } } @@ -1919,7 +1935,12 @@ StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const { case Triple::COFF: return ".ASAN$GL"; case Triple::ELF: return "asan_globals"; case Triple::MachO: return "__DATA,__asan_globals,regular"; - default: break; + case Triple::Wasm: + case Triple::XCOFF: + report_fatal_error( + "ModuleAddressSanitizer not implemented for object file format."); + case Triple::UnknownObjectFormat: + break; } llvm_unreachable("unsupported object format"); } @@ -2033,7 +2054,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsCOFF( unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType()); assert(isPowerOf2_32(SizeOfGlobalStruct) && "global metadata will not be padded appropriately"); - Metadata->setAlignment(SizeOfGlobalStruct); + Metadata->setAlignment(assumeAligned(SizeOfGlobalStruct)); SetComdatForGlobalMetadata(G, Metadata, ""); } @@ -2170,7 +2191,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray( M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), ""); if (Mapping.Scale > 3) - AllGlobals->setAlignment(1ULL << Mapping.Scale); + AllGlobals->setAlignment(Align(1ULL << Mapping.Scale)); IRB.CreateCall(AsanRegisterGlobals, {IRB.CreatePointerCast(AllGlobals, IntptrTy), @@ -2270,7 +2291,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); NewGlobal->setComdat(G->getComdat()); - NewGlobal->setAlignment(MinRZ); + NewGlobal->setAlignment(MaybeAlign(MinRZ)); // Don't fold globals with redzones. ODR violation detector and redzone // poisoning implicitly creates a dependence on the global's address, so it // is no longer valid for it to be marked unnamed_addr. @@ -2338,7 +2359,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, // Set meaningful attributes for indicator symbol. ODRIndicatorSym->setVisibility(NewGlobal->getVisibility()); ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass()); - ODRIndicatorSym->setAlignment(1); + ODRIndicatorSym->setAlignment(Align::None()); ODRIndicator = ODRIndicatorSym; } @@ -2410,39 +2431,39 @@ bool ModuleAddressSanitizer::instrumentModule(Module &M) { // Create a module constructor. A destructor is created lazily because not all // platforms, and not all modules need it. + std::string AsanVersion = std::to_string(GetAsanVersion(M)); std::string VersionCheckName = - kAsanVersionCheckNamePrefix + std::to_string(GetAsanVersion(M)); + ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : ""; std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}, VersionCheckName); bool CtorComdat = true; - bool Changed = false; // TODO(glider): temporarily disabled globals instrumentation for KASan. if (ClGlobals) { IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); - Changed |= InstrumentGlobals(IRB, M, &CtorComdat); + InstrumentGlobals(IRB, M, &CtorComdat); } + const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple); + // Put the constructor and destructor in comdat if both // (1) global instrumentation is not TU-specific // (2) target is ELF. if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority, - AsanCtorFunction); + appendToGlobalCtors(M, AsanCtorFunction, Priority, AsanCtorFunction); if (AsanDtorFunction) { AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName)); - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority, - AsanDtorFunction); + appendToGlobalDtors(M, AsanDtorFunction, Priority, AsanDtorFunction); } } else { - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + appendToGlobalCtors(M, AsanCtorFunction, Priority); if (AsanDtorFunction) - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); + appendToGlobalDtors(M, AsanDtorFunction, Priority); } - return Changed; + return true; } void AddressSanitizer::initializeCallbacks(Module &M) { @@ -2664,7 +2685,7 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (CS) { // A call inside BB. TempsToInstrument.clear(); - if (CS.doesNotReturn() && !CS->getMetadata("nosanitize")) + if (CS.doesNotReturn() && !CS->hasMetadata("nosanitize")) NoReturnCalls.push_back(CS.getInstruction()); } if (CallInst *CI = dyn_cast(&Inst)) @@ -2877,18 +2898,19 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { for (Argument &Arg : F.args()) { if (Arg.hasByValAttr()) { Type *Ty = Arg.getType()->getPointerElementType(); - unsigned Align = Arg.getParamAlignment(); - if (Align == 0) Align = DL.getABITypeAlignment(Ty); + unsigned Alignment = Arg.getParamAlignment(); + if (Alignment == 0) + Alignment = DL.getABITypeAlignment(Ty); AllocaInst *AI = IRB.CreateAlloca( Ty, nullptr, (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) + ".byval"); - AI->setAlignment(Align); + AI->setAlignment(Align(Alignment)); Arg.replaceAllUsesWith(AI); uint64_t AllocSize = DL.getTypeAllocSize(Ty); - IRB.CreateMemCpy(AI, Align, &Arg, Align, AllocSize); + IRB.CreateMemCpy(AI, Alignment, &Arg, Alignment, AllocSize); } } } @@ -2919,7 +2941,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout( } assert((ClRealignStack & (ClRealignStack - 1)) == 0); size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack); - Alloca->setAlignment(FrameAlignment); + Alloca->setAlignment(MaybeAlign(FrameAlignment)); return IRB.CreatePointerCast(Alloca, IntptrTy); } @@ -2928,7 +2950,7 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() { IRBuilder<> IRB(dyn_cast(FirstBB.begin())); DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr); IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout); - DynamicAllocaLayout->setAlignment(32); + DynamicAllocaLayout->setAlignment(Align(32)); } void FunctionStackPoisoner::processDynamicAllocas() { @@ -3275,7 +3297,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { // Insert new alloca with new NewSize and Align params. AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize); - NewAlloca->setAlignment(Align); + NewAlloca->setAlignment(MaybeAlign(Align)); // NewAddress = Address + Align Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy), diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 4dc9b611c156..ae34be986537 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -224,7 +224,7 @@ struct BoundsCheckingLegacyPass : public FunctionPass { } bool runOnFunction(Function &F) override { - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &SE = getAnalysis().getSE(); return addBoundsChecking(F, TLI, SE); } diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h index 971e00041762..8bb6f47c4846 100644 --- a/lib/Transforms/Instrumentation/CFGMST.h +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -257,13 +257,13 @@ public: std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); if (Inserted) { // Newly inserted, update the real info. - Iter->second = std::move(llvm::make_unique(Index)); + Iter->second = std::move(std::make_unique(Index)); Index++; } std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); if (Inserted) // Newly inserted, update the real info. - Iter->second = std::move(llvm::make_unique(Index)); + Iter->second = std::move(std::make_unique(Index)); AllEdges.emplace_back(new Edge(Src, Dest, W)); return *AllEdges.back(); } diff --git a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 3f4f9bc7145d..55c64fa4b727 100644 --- a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -512,30 +512,38 @@ static bool isHoistable(Instruction *I, DominatorTree &DT) { // first-region entry block) or the (hoistable or unhoistable) base values that // are defined outside (including the first-region entry block) of the // scope. The returned set doesn't include constants. -static std::set getBaseValues(Value *V, - DominatorTree &DT) { +static std::set getBaseValues( + Value *V, DominatorTree &DT, + DenseMap> &Visited) { + if (Visited.count(V)) { + return Visited[V]; + } std::set Result; if (auto *I = dyn_cast(V)) { // We don't stop at a block that's not in the Scope because we would miss some // instructions that are based on the same base values if we stop there. if (!isHoistable(I, DT)) { Result.insert(I); + Visited.insert(std::make_pair(V, Result)); return Result; } // I is hoistable above the Scope. for (Value *Op : I->operands()) { - std::set OpResult = getBaseValues(Op, DT); + std::set OpResult = getBaseValues(Op, DT, Visited); Result.insert(OpResult.begin(), OpResult.end()); } + Visited.insert(std::make_pair(V, Result)); return Result; } if (isa(V)) { Result.insert(V); + Visited.insert(std::make_pair(V, Result)); return Result; } // We don't include others like constants because those won't lead to any // chance of folding of conditions (eg two bit checks merged into one check) // after CHR. + Visited.insert(std::make_pair(V, Result)); return Result; // empty } @@ -1078,12 +1086,13 @@ static bool shouldSplit(Instruction *InsertPoint, if (!PrevConditionValues.empty() && !ConditionValues.empty()) { // Use std::set as DenseSet doesn't work with set_intersection. std::set PrevBases, Bases; + DenseMap> Visited; for (Value *V : PrevConditionValues) { - std::set BaseValues = getBaseValues(V, DT); + std::set BaseValues = getBaseValues(V, DT, Visited); PrevBases.insert(BaseValues.begin(), BaseValues.end()); } for (Value *V : ConditionValues) { - std::set BaseValues = getBaseValues(V, DT); + std::set BaseValues = getBaseValues(V, DT, Visited); Bases.insert(BaseValues.begin(), BaseValues.end()); } CHR_DEBUG( @@ -1538,10 +1547,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, } if (auto *SI = dyn_cast(U)) { // Swap operands - Value *TrueValue = SI->getTrueValue(); - Value *FalseValue = SI->getFalseValue(); - SI->setTrueValue(FalseValue); - SI->setFalseValue(TrueValue); + SI->swapValues(); SI->swapProfMetadata(); if (Scope->TrueBiasedSelects.count(SI)) { assert(Scope->FalseBiasedSelects.count(SI) == 0 && @@ -2073,7 +2079,7 @@ bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { getAnalysis().getPSI(); RegionInfo &RI = getAnalysis().getRegionInfo(); std::unique_ptr OwnedORE = - llvm::make_unique(&F); + std::make_unique(&F); return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); } diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 2279c1bcb6a8..c0353cba0b2f 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1212,7 +1212,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, return DFS.ZeroShadow; case 1: { LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos); - LI->setAlignment(ShadowAlign); + LI->setAlignment(MaybeAlign(ShadowAlign)); return LI; } case 2: { diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 59950ffc4e9a..ac6082441eae 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -86,7 +86,9 @@ public: ReversedVersion[3] = Options.Version[0]; ReversedVersion[4] = '\0'; } - bool runOnModule(Module &M, const TargetLibraryInfo &TLI); + bool + runOnModule(Module &M, + std::function GetTLI); private: // Create the .gcno files for the Module based on DebugInfo. @@ -102,9 +104,9 @@ private: std::vector &Regexes); // Get pointers to the functions in the runtime library. - FunctionCallee getStartFileFunc(); - FunctionCallee getEmitFunctionFunc(); - FunctionCallee getEmitArcsFunc(); + FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI); + FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI); + FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI); FunctionCallee getSummaryInfoFunc(); FunctionCallee getEndFileFunc(); @@ -127,7 +129,7 @@ private: SmallVector FileChecksums; Module *M; - const TargetLibraryInfo *TLI; + std::function GetTLI; LLVMContext *Ctx; SmallVector, 16> Funcs; std::vector FilterRe; @@ -147,8 +149,9 @@ public: StringRef getPassName() const override { return "GCOV Profiler"; } bool runOnModule(Module &M) override { - auto &TLI = getAnalysis().getTLI(); - return Profiler.runOnModule(M, TLI); + return Profiler.runOnModule(M, [this](Function &F) -> TargetLibraryInfo & { + return getAnalysis().getTLI(F); + }); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -555,9 +558,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, return CurPath.str(); } -bool GCOVProfiler::runOnModule(Module &M, const TargetLibraryInfo &TLI) { +bool GCOVProfiler::runOnModule( + Module &M, std::function GetTLI) { this->M = &M; - this->TLI = &TLI; + this->GetTLI = std::move(GetTLI); Ctx = &M.getContext(); AddFlushBeforeForkAndExec(); @@ -574,9 +578,12 @@ PreservedAnalyses GCOVProfilerPass::run(Module &M, ModuleAnalysisManager &AM) { GCOVProfiler Profiler(GCOVOpts); + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); - auto &TLI = AM.getResult(M); - if (!Profiler.runOnModule(M, TLI)) + if (!Profiler.runOnModule(M, [&](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + })) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -624,6 +631,7 @@ static bool shouldKeepInEntry(BasicBlock::iterator It) { void GCOVProfiler::AddFlushBeforeForkAndExec() { SmallVector ForkAndExecs; for (auto &F : M->functions()) { + auto *TLI = &GetTLI(F); for (auto &I : instructions(F)) { if (CallInst *CI = dyn_cast(&I)) { if (Function *Callee = CI->getCalledFunction()) { @@ -669,7 +677,8 @@ void GCOVProfiler::emitProfileNotes() { continue; std::error_code EC; - raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, sys::fs::F_None); + raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, + sys::fs::OF_None); if (EC) { Ctx->emitError(Twine("failed to open coverage notes file for writing: ") + EC.message()); @@ -695,7 +704,7 @@ void GCOVProfiler::emitProfileNotes() { ++It; EntryBlock.splitBasicBlock(It); - Funcs.push_back(make_unique(SP, &F, &out, FunctionIdent++, + Funcs.push_back(std::make_unique(SP, &F, &out, FunctionIdent++, Options.UseCfgChecksum, Options.ExitBlockBeforeBody)); GCOVFunction &Func = *Funcs.back(); @@ -873,7 +882,7 @@ bool GCOVProfiler::emitProfileArcs() { return Result; } -FunctionCallee GCOVProfiler::getStartFileFunc() { +FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) { Type *Args[] = { Type::getInt8PtrTy(*Ctx), // const char *orig_filename Type::getInt8PtrTy(*Ctx), // const char version[4] @@ -887,7 +896,7 @@ FunctionCallee GCOVProfiler::getStartFileFunc() { return Res; } -FunctionCallee GCOVProfiler::getEmitFunctionFunc() { +FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) { Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name @@ -906,7 +915,7 @@ FunctionCallee GCOVProfiler::getEmitFunctionFunc() { return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); } -FunctionCallee GCOVProfiler::getEmitArcsFunc() { +FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) { Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters @@ -943,9 +952,11 @@ Function *GCOVProfiler::insertCounterWriteout( BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); IRBuilder<> Builder(BB); - FunctionCallee StartFile = getStartFileFunc(); - FunctionCallee EmitFunction = getEmitFunctionFunc(); - FunctionCallee EmitArcs = getEmitArcsFunc(); + auto *TLI = &GetTLI(*WriteoutF); + + FunctionCallee StartFile = getStartFileFunc(TLI); + FunctionCallee EmitFunction = getEmitFunctionFunc(TLI); + FunctionCallee EmitArcs = getEmitArcsFunc(TLI); FunctionCallee SummaryInfo = getSummaryInfoFunc(); FunctionCallee EndFile = getEndFileFunc(); diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 90a9f4955a4b..f87132ee4758 100644 --- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -52,7 +54,10 @@ using namespace llvm; #define DEBUG_TYPE "hwasan" static const char *const kHwasanModuleCtorName = "hwasan.module_ctor"; +static const char *const kHwasanNoteName = "hwasan.note"; static const char *const kHwasanInitName = "__hwasan_init"; +static const char *const kHwasanPersonalityThunkName = + "__hwasan_personality_thunk"; static const char *const kHwasanShadowMemoryDynamicAddress = "__hwasan_shadow_memory_dynamic_address"; @@ -112,6 +117,9 @@ static cl::opt ClGenerateTagsWithCalls( cl::desc("generate new tags with runtime library calls"), cl::Hidden, cl::init(false)); +static cl::opt ClGlobals("hwasan-globals", cl::desc("Instrument globals"), + cl::Hidden, cl::init(false)); + static cl::opt ClMatchAllTag( "hwasan-match-all-tag", cl::desc("don't report bad accesses via pointers with this tag"), @@ -155,8 +163,18 @@ static cl::opt static cl::opt ClInstrumentLandingPads("hwasan-instrument-landing-pads", - cl::desc("instrument landing pads"), cl::Hidden, - cl::init(true)); + cl::desc("instrument landing pads"), cl::Hidden, + cl::init(false), cl::ZeroOrMore); + +static cl::opt ClUseShortGranules( + "hwasan-use-short-granules", + cl::desc("use short granules in allocas and outlined checks"), cl::Hidden, + cl::init(false), cl::ZeroOrMore); + +static cl::opt ClInstrumentPersonalityFunctions( + "hwasan-instrument-personality-functions", + cl::desc("instrument personality functions"), cl::Hidden, cl::init(false), + cl::ZeroOrMore); static cl::opt ClInlineAllChecks("hwasan-inline-all-checks", cl::desc("inline all checks"), @@ -169,16 +187,16 @@ namespace { class HWAddressSanitizer { public: explicit HWAddressSanitizer(Module &M, bool CompileKernel = false, - bool Recover = false) { + bool Recover = false) : M(M) { this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ? ClEnableKhwasan : CompileKernel; - initializeModule(M); + initializeModule(); } bool sanitizeFunction(Function &F); - void initializeModule(Module &M); + void initializeModule(); void initializeCallbacks(Module &M); @@ -216,9 +234,14 @@ public: Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty); void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord); + void instrumentGlobal(GlobalVariable *GV, uint8_t Tag); + void instrumentGlobals(); + + void instrumentPersonalityFunctions(); + private: LLVMContext *C; - std::string CurModuleUniqueId; + Module &M; Triple TargetTriple; FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset; FunctionCallee HWAsanHandleVfork; @@ -238,17 +261,21 @@ private: bool InTls; void init(Triple &TargetTriple); - unsigned getAllocaAlignment() const { return 1U << Scale; } + unsigned getObjectAlignment() const { return 1U << Scale; } }; ShadowMapping Mapping; + Type *VoidTy = Type::getVoidTy(M.getContext()); Type *IntptrTy; Type *Int8PtrTy; Type *Int8Ty; Type *Int32Ty; + Type *Int64Ty = Type::getInt64Ty(M.getContext()); bool CompileKernel; bool Recover; + bool UseShortGranules; + bool InstrumentLandingPads; Function *HwasanCtorFunction; @@ -278,7 +305,7 @@ public: StringRef getPassName() const override { return "HWAddressSanitizer"; } bool doInitialization(Module &M) override { - HWASan = llvm::make_unique(M, CompileKernel, Recover); + HWASan = std::make_unique(M, CompileKernel, Recover); return true; } @@ -333,7 +360,7 @@ PreservedAnalyses HWAddressSanitizerPass::run(Module &M, /// Module-level initialization. /// /// inserts a call to __hwasan_init to the module's constructor list. -void HWAddressSanitizer::initializeModule(Module &M) { +void HWAddressSanitizer::initializeModule() { LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n"); auto &DL = M.getDataLayout(); @@ -342,7 +369,6 @@ void HWAddressSanitizer::initializeModule(Module &M) { Mapping.init(TargetTriple); C = &(M.getContext()); - CurModuleUniqueId = getUniqueModuleId(&M); IRBuilder<> IRB(*C); IntptrTy = IRB.getIntPtrTy(DL); Int8PtrTy = IRB.getInt8PtrTy(); @@ -350,6 +376,21 @@ void HWAddressSanitizer::initializeModule(Module &M) { Int32Ty = IRB.getInt32Ty(); HwasanCtorFunction = nullptr; + + // Older versions of Android do not have the required runtime support for + // short granules, global or personality function instrumentation. On other + // platforms we currently require using the latest version of the runtime. + bool NewRuntime = + !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30); + + UseShortGranules = + ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime; + + // If we don't have personality function support, fall back to landing pads. + InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences() + ? ClInstrumentLandingPads + : !NewRuntime; + if (!CompileKernel) { std::tie(HwasanCtorFunction, std::ignore) = getOrCreateSanitizerCtorAndInitFunctions( @@ -363,6 +404,18 @@ void HWAddressSanitizer::initializeModule(Module &M) { Ctor->setComdat(CtorComdat); appendToGlobalCtors(M, Ctor, 0, Ctor); }); + + bool InstrumentGlobals = + ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime; + if (InstrumentGlobals) + instrumentGlobals(); + + bool InstrumentPersonalityFunctions = + ClInstrumentPersonalityFunctions.getNumOccurrences() + ? ClInstrumentPersonalityFunctions + : NewRuntime; + if (InstrumentPersonalityFunctions) + instrumentPersonalityFunctions(); } if (!TargetTriple.isAndroid()) { @@ -456,7 +509,7 @@ Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I, unsigned *Alignment, Value **MaybeMask) { // Skip memory accesses inserted by another instrumentation. - if (I->getMetadata("nosanitize")) return nullptr; + if (I->hasMetadata("nosanitize")) return nullptr; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) @@ -564,9 +617,11 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite, TargetTriple.isOSBinFormatELF() && !Recover) { Module *M = IRB.GetInsertBlock()->getParent()->getParent(); Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy); - IRB.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::hwasan_check_memaccess), - {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)}); + IRB.CreateCall(Intrinsic::getDeclaration( + M, UseShortGranules + ? Intrinsic::hwasan_check_memaccess_shortgranules + : Intrinsic::hwasan_check_memaccess), + {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)}); return; } @@ -718,7 +773,9 @@ static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) { bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size) { - size_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment()); + size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); + if (!UseShortGranules) + Size = AlignedSize; Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty()); if (ClInstrumentWithCalls) { @@ -738,7 +795,7 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1); if (Size != AlignedSize) { IRB.CreateStore( - ConstantInt::get(Int8Ty, Size % Mapping.getAllocaAlignment()), + ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()), IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize)); IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32( Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy), @@ -778,8 +835,9 @@ Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { // FIXME: use addressofreturnaddress (but implement it in aarch64 backend // first). Module *M = IRB.GetInsertBlock()->getParent()->getParent(); - auto GetStackPointerFn = - Intrinsic::getDeclaration(M, Intrinsic::frameaddress); + auto GetStackPointerFn = Intrinsic::getDeclaration( + M, Intrinsic::frameaddress, + IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); Value *StackPointer = IRB.CreateCall( GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())}); @@ -912,8 +970,10 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { PC = readRegister(IRB, "pc"); else PC = IRB.CreatePtrToInt(F, IntptrTy); - auto GetStackPointerFn = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress); + Module *M = F->getParent(); + auto GetStackPointerFn = Intrinsic::getDeclaration( + M, Intrinsic::frameaddress, + IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); Value *SP = IRB.CreatePtrToInt( IRB.CreateCall(GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())}), @@ -999,11 +1059,8 @@ bool HWAddressSanitizer::instrumentStack( AI->hasName() ? AI->getName().str() : "alloca." + itostr(N); Replacement->setName(Name + ".hwasan"); - for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) { - Use &U = *UI++; - if (U.getUser() != AILong) - U.set(Replacement); - } + AI->replaceUsesWithIf(Replacement, + [AILong](Use &U) { return U.getUser() != AILong; }); for (auto *DDI : AllocaDeclareMap.lookup(AI)) { DIExpression *OldExpr = DDI->getExpression(); @@ -1020,7 +1077,7 @@ bool HWAddressSanitizer::instrumentStack( // Re-tag alloca memory with the special UAR tag. Value *Tag = getUARTag(IRB, StackTag); - tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getAllocaAlignment())); + tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment())); } } @@ -1074,7 +1131,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { if (auto *Alloca = dyn_cast_or_null(DDI->getAddress())) AllocaDeclareMap[Alloca].push_back(DDI); - if (ClInstrumentLandingPads && isa(Inst)) + if (InstrumentLandingPads && isa(Inst)) LandingPadVec.push_back(&Inst); Value *MaybeMask = nullptr; @@ -1093,6 +1150,13 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { if (!LandingPadVec.empty()) instrumentLandingPads(LandingPadVec); + if (AllocasToInstrument.empty() && F.hasPersonalityFn() && + F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) { + // __hwasan_personality_thunk is a no-op for functions without an + // instrumented stack, so we can drop it. + F.setPersonalityFn(nullptr); + } + if (AllocasToInstrument.empty() && ToInstrument.empty()) return false; @@ -1118,8 +1182,9 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { DenseMap AllocaToPaddedAllocaMap; for (AllocaInst *AI : AllocasToInstrument) { uint64_t Size = getAllocaSizeInBytes(*AI); - uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment()); - AI->setAlignment(std::max(AI->getAlignment(), 16u)); + uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); + AI->setAlignment( + MaybeAlign(std::max(AI->getAlignment(), Mapping.getObjectAlignment()))); if (Size != AlignedSize) { Type *AllocatedType = AI->getAllocatedType(); if (AI->isArrayAllocation()) { @@ -1132,7 +1197,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { auto *NewAI = new AllocaInst( TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI); NewAI->takeName(AI); - NewAI->setAlignment(AI->getAlignment()); + NewAI->setAlignment(MaybeAlign(AI->getAlignment())); NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca()); NewAI->setSwiftError(AI->isSwiftError()); NewAI->copyMetadata(*AI); @@ -1179,6 +1244,257 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { return Changed; } +void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) { + Constant *Initializer = GV->getInitializer(); + uint64_t SizeInBytes = + M.getDataLayout().getTypeAllocSize(Initializer->getType()); + uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment()); + if (SizeInBytes != NewSize) { + // Pad the initializer out to the next multiple of 16 bytes and add the + // required short granule tag. + std::vector Init(NewSize - SizeInBytes, 0); + Init.back() = Tag; + Constant *Padding = ConstantDataArray::get(*C, Init); + Initializer = ConstantStruct::getAnon({Initializer, Padding}); + } + + auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(), + GlobalValue::ExternalLinkage, Initializer, + GV->getName() + ".hwasan"); + NewGV->copyAttributesFrom(GV); + NewGV->setLinkage(GlobalValue::PrivateLinkage); + NewGV->copyMetadata(GV, 0); + NewGV->setAlignment( + MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment()))); + + // It is invalid to ICF two globals that have different tags. In the case + // where the size of the global is a multiple of the tag granularity the + // contents of the globals may be the same but the tags (i.e. symbol values) + // may be different, and the symbols are not considered during ICF. In the + // case where the size is not a multiple of the granularity, the short granule + // tags would discriminate two globals with different tags, but there would + // otherwise be nothing stopping such a global from being incorrectly ICF'd + // with an uninstrumented (i.e. tag 0) global that happened to have the short + // granule tag in the last byte. + NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None); + + // Descriptor format (assuming little-endian): + // bytes 0-3: relative address of global + // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case + // it isn't, we create multiple descriptors) + // byte 7: tag + auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty); + const uint64_t MaxDescriptorSize = 0xfffff0; + for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes; + DescriptorPos += MaxDescriptorSize) { + auto *Descriptor = + new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage, + nullptr, GV->getName() + ".hwasan.descriptor"); + auto *GVRelPtr = ConstantExpr::getTrunc( + ConstantExpr::getAdd( + ConstantExpr::getSub( + ConstantExpr::getPtrToInt(NewGV, Int64Ty), + ConstantExpr::getPtrToInt(Descriptor, Int64Ty)), + ConstantInt::get(Int64Ty, DescriptorPos)), + Int32Ty); + uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize); + auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24)); + Descriptor->setComdat(NewGV->getComdat()); + Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag})); + Descriptor->setSection("hwasan_globals"); + Descriptor->setMetadata(LLVMContext::MD_associated, + MDNode::get(*C, ValueAsMetadata::get(NewGV))); + appendToCompilerUsed(M, Descriptor); + } + + Constant *Aliasee = ConstantExpr::getIntToPtr( + ConstantExpr::getAdd( + ConstantExpr::getPtrToInt(NewGV, Int64Ty), + ConstantInt::get(Int64Ty, uint64_t(Tag) << kPointerTagShift)), + GV->getType()); + auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(), + GV->getLinkage(), "", Aliasee, &M); + Alias->setVisibility(GV->getVisibility()); + Alias->takeName(GV); + GV->replaceAllUsesWith(Alias); + GV->eraseFromParent(); +} + +void HWAddressSanitizer::instrumentGlobals() { + // Start by creating a note that contains pointers to the list of global + // descriptors. Adding a note to the output file will cause the linker to + // create a PT_NOTE program header pointing to the note that we can use to + // find the descriptor list starting from the program headers. A function + // provided by the runtime initializes the shadow memory for the globals by + // accessing the descriptor list via the note. The dynamic loader needs to + // call this function whenever a library is loaded. + // + // The reason why we use a note for this instead of a more conventional + // approach of having a global constructor pass a descriptor list pointer to + // the runtime is because of an order of initialization problem. With + // constructors we can encounter the following problematic scenario: + // + // 1) library A depends on library B and also interposes one of B's symbols + // 2) B's constructors are called before A's (as required for correctness) + // 3) during construction, B accesses one of its "own" globals (actually + // interposed by A) and triggers a HWASAN failure due to the initialization + // for A not having happened yet + // + // Even without interposition it is possible to run into similar situations in + // cases where two libraries mutually depend on each other. + // + // We only need one note per binary, so put everything for the note in a + // comdat. + Comdat *NoteComdat = M.getOrInsertComdat(kHwasanNoteName); + + Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0); + auto Start = + new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage, + nullptr, "__start_hwasan_globals"); + Start->setVisibility(GlobalValue::HiddenVisibility); + Start->setDSOLocal(true); + auto Stop = + new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage, + nullptr, "__stop_hwasan_globals"); + Stop->setVisibility(GlobalValue::HiddenVisibility); + Stop->setDSOLocal(true); + + // Null-terminated so actually 8 bytes, which are required in order to align + // the note properly. + auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0"); + + auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(), + Int32Ty, Int32Ty); + auto *Note = + new GlobalVariable(M, NoteTy, /*isConstantGlobal=*/true, + GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName); + Note->setSection(".note.hwasan.globals"); + Note->setComdat(NoteComdat); + Note->setAlignment(Align(4)); + Note->setDSOLocal(true); + + // The pointers in the note need to be relative so that the note ends up being + // placed in rodata, which is the standard location for notes. + auto CreateRelPtr = [&](Constant *Ptr) { + return ConstantExpr::getTrunc( + ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty), + ConstantExpr::getPtrToInt(Note, Int64Ty)), + Int32Ty); + }; + Note->setInitializer(ConstantStruct::getAnon( + {ConstantInt::get(Int32Ty, 8), // n_namesz + ConstantInt::get(Int32Ty, 8), // n_descsz + ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type + Name, CreateRelPtr(Start), CreateRelPtr(Stop)})); + appendToCompilerUsed(M, Note); + + // Create a zero-length global in hwasan_globals so that the linker will + // always create start and stop symbols. + auto Dummy = new GlobalVariable( + M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage, + Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global"); + Dummy->setSection("hwasan_globals"); + Dummy->setComdat(NoteComdat); + Dummy->setMetadata(LLVMContext::MD_associated, + MDNode::get(*C, ValueAsMetadata::get(Note))); + appendToCompilerUsed(M, Dummy); + + std::vector Globals; + for (GlobalVariable &GV : M.globals()) { + if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") || + GV.isThreadLocal()) + continue; + + // Common symbols can't have aliases point to them, so they can't be tagged. + if (GV.hasCommonLinkage()) + continue; + + // Globals with custom sections may be used in __start_/__stop_ enumeration, + // which would be broken both by adding tags and potentially by the extra + // padding/alignment that we insert. + if (GV.hasSection()) + continue; + + Globals.push_back(&GV); + } + + MD5 Hasher; + Hasher.update(M.getSourceFileName()); + MD5::MD5Result Hash; + Hasher.final(Hash); + uint8_t Tag = Hash[0]; + + for (GlobalVariable *GV : Globals) { + // Skip tag 0 in order to avoid collisions with untagged memory. + if (Tag == 0) + Tag = 1; + instrumentGlobal(GV, Tag++); + } +} + +void HWAddressSanitizer::instrumentPersonalityFunctions() { + // We need to untag stack frames as we unwind past them. That is the job of + // the personality function wrapper, which either wraps an existing + // personality function or acts as a personality function on its own. Each + // function that has a personality function or that can be unwound past has + // its personality function changed to a thunk that calls the personality + // function wrapper in the runtime. + MapVector> PersonalityFns; + for (Function &F : M) { + if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress)) + continue; + + if (F.hasPersonalityFn()) { + PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F); + } else if (!F.hasFnAttribute(Attribute::NoUnwind)) { + PersonalityFns[nullptr].push_back(&F); + } + } + + if (PersonalityFns.empty()) + return; + + FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction( + "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, + Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy); + FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy); + FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy); + + for (auto &P : PersonalityFns) { + std::string ThunkName = kHwasanPersonalityThunkName; + if (P.first) + ThunkName += ("." + P.first->getName()).str(); + FunctionType *ThunkFnTy = FunctionType::get( + Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false); + bool IsLocal = P.first && (!isa(P.first) || + cast(P.first)->hasLocalLinkage()); + auto *ThunkFn = Function::Create(ThunkFnTy, + IsLocal ? GlobalValue::InternalLinkage + : GlobalValue::LinkOnceODRLinkage, + ThunkName, &M); + if (!IsLocal) { + ThunkFn->setVisibility(GlobalValue::HiddenVisibility); + ThunkFn->setComdat(M.getOrInsertComdat(ThunkName)); + } + + auto *BB = BasicBlock::Create(*C, "entry", ThunkFn); + IRBuilder<> IRB(BB); + CallInst *WrapperCall = IRB.CreateCall( + HwasanPersonalityWrapper, + {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2), + ThunkFn->getArg(3), ThunkFn->getArg(4), + P.first ? IRB.CreateBitCast(P.first, Int8PtrTy) + : Constant::getNullValue(Int8PtrTy), + IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy), + IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)}); + WrapperCall->setTailCall(); + IRB.CreateRet(WrapperCall); + + for (Function *F : P.second) + F->setPersonalityFn(ThunkFn); + } +} + void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) { Scale = kDefaultShadowScale; if (ClMappingOffset.getNumOccurrences() > 0) { diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index c7371f567ff3..74d6e76eceb6 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -403,7 +403,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, AM->getResult(M).getManager(); ORE = &FAM.getResult(F); } else { - OwnedORE = llvm::make_unique(&F); + OwnedORE = std::make_unique(&F); ORE = OwnedORE.get(); } diff --git a/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/lib/Transforms/Instrumentation/InstrOrderFile.cpp index a2c1ddfd279e..93d3a8a14d5c 100644 --- a/lib/Transforms/Instrumentation/InstrOrderFile.cpp +++ b/lib/Transforms/Instrumentation/InstrOrderFile.cpp @@ -100,7 +100,8 @@ public: if (!ClOrderFileWriteMapping.empty()) { std::lock_guard LogLock(MappingMutex); std::error_code EC; - llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, llvm::sys::fs::F_Append); + llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, + llvm::sys::fs::OF_Append); if (EC) { report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping + " to save mapping file for order file instrumentation\n"); diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 63c2b8078967..1f092a5f3103 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -157,7 +157,10 @@ public: } bool runOnModule(Module &M) override { - return InstrProf.run(M, getAnalysis().getTLI()); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + return InstrProf.run(M, GetTLI); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -370,8 +373,12 @@ private: } // end anonymous namespace PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { - auto &TLI = AM.getResult(M); - if (!run(M, TLI)) + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + if (!run(M, GetTLI)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -441,7 +448,7 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) { std::unique_ptr BFI; if (Options.UseBFIInPromotion) { std::unique_ptr BPI; - BPI.reset(new BranchProbabilityInfo(*F, LI, TLI)); + BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F))); BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); } @@ -482,9 +489,10 @@ static bool containsProfilingIntrinsics(Module &M) { return false; } -bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { +bool InstrProfiling::run( + Module &M, std::function GetTLI) { this->M = &M; - this->TLI = &TLI; + this->GetTLI = std::move(GetTLI); NamesVar = nullptr; NamesSize = 0; ProfileDataMap.clear(); @@ -601,6 +609,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { bool IsRange = (Ind->getValueKind()->getZExtValue() == llvm::InstrProfValueKind::IPVK_MemOPSize); CallInst *Call = nullptr; + auto *TLI = &GetTLI(*Ind->getFunction()); if (!IsRange) { Value *Args[3] = {Ind->getTargetValue(), Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), @@ -731,9 +740,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { PD = It->second; } - // Match the linkage and visibility of the name global, except on COFF, where - // the linkage must be local and consequentially the visibility must be - // default. + // Match the linkage and visibility of the name global. COFF supports using + // comdats with internal symbols, so do that if we can. Function *Fn = Inc->getParent()->getParent(); GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); @@ -749,19 +757,21 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { // new comdat group for the counters and profiling data. If we use the comdat // of the parent function, that will result in relocations against discarded // sections. - Comdat *Cmdt = nullptr; - GlobalValue::LinkageTypes CounterLinkage = Linkage; - if (needsComdatForCounter(*Fn, *M)) { - StringRef CmdtPrefix = getInstrProfComdatPrefix(); + bool NeedComdat = needsComdatForCounter(*Fn, *M); + if (NeedComdat) { if (TT.isOSBinFormatCOFF()) { - // For COFF, the comdat group name must be the name of a symbol in the - // group. Use the counter variable name, and upgrade its linkage to - // something externally visible, like linkonce_odr. - CmdtPrefix = getInstrProfCountersVarPrefix(); - CounterLinkage = GlobalValue::LinkOnceODRLinkage; + // For COFF, put the counters, data, and values each into their own + // comdats. We can't use a group because the Visual C++ linker will + // report duplicate symbol errors if there are multiple external symbols + // with the same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE. + Linkage = GlobalValue::LinkOnceODRLinkage; + Visibility = GlobalValue::HiddenVisibility; } - Cmdt = M->getOrInsertComdat(getVarName(Inc, CmdtPrefix)); } + auto MaybeSetComdat = [=](GlobalVariable *GV) { + if (NeedComdat) + GV->setComdat(M->getOrInsertComdat(GV->getName())); + }; uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); @@ -775,9 +785,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { CounterPtr->setVisibility(Visibility); CounterPtr->setSection( getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); - CounterPtr->setAlignment(8); - CounterPtr->setComdat(Cmdt); - CounterPtr->setLinkage(CounterLinkage); + CounterPtr->setAlignment(Align(8)); + MaybeSetComdat(CounterPtr); + CounterPtr->setLinkage(Linkage); auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); // Allocate statically the array of pointers to value profile nodes for @@ -797,8 +807,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { ValuesVar->setVisibility(Visibility); ValuesVar->setSection( getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); - ValuesVar->setAlignment(8); - ValuesVar->setComdat(Cmdt); + ValuesVar->setAlignment(Align(8)); + MaybeSetComdat(ValuesVar); ValuesPtrExpr = ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); } @@ -830,8 +840,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { getVarName(Inc, getInstrProfDataVarPrefix())); Data->setVisibility(Visibility); Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); - Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT); - Data->setComdat(Cmdt); + Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); + MaybeSetComdat(Data); + Data->setLinkage(Linkage); PD.RegionCounters = CounterPtr; PD.DataVar = Data; @@ -920,7 +931,7 @@ void InstrProfiling::emitNameData() { // On COFF, it's important to reduce the alignment down to 1 to prevent the // linker from inserting padding before the start of the names section or // between names entries. - NamesVar->setAlignment(1); + NamesVar->setAlignment(Align::None()); UsedVars.push_back(NamesVar); for (auto *NamePtr : ReferencedNames) diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index f56a1bd91b89..a6c2c9b464b6 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -68,7 +68,8 @@ GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str, GlobalValue::PrivateLinkage, StrConst, NamePrefix); if (AllowMerging) GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + GV->setAlignment(Align::None()); // Strings may not be merged w/o setting + // alignment explicitly. return GV; } @@ -116,7 +117,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeMemorySanitizerLegacyPassPass(Registry); initializeHWAddressSanitizerLegacyPassPass(Registry); initializeThreadSanitizerLegacyPassPass(Registry); - initializeSanitizerCoverageModulePass(Registry); + initializeModuleSanitizerCoverageLegacyPassPass(Registry); initializeDataFlowSanitizerPass(Registry); } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b25cbed1bb02..69c9020e060b 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -462,16 +462,9 @@ namespace { /// the module. class MemorySanitizer { public: - MemorySanitizer(Module &M, MemorySanitizerOptions Options) { - this->CompileKernel = - ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : Options.Kernel; - if (ClTrackOrigins.getNumOccurrences() > 0) - this->TrackOrigins = ClTrackOrigins; - else - this->TrackOrigins = this->CompileKernel ? 2 : Options.TrackOrigins; - this->Recover = ClKeepGoing.getNumOccurrences() > 0 - ? ClKeepGoing - : (this->CompileKernel | Options.Recover); + MemorySanitizer(Module &M, MemorySanitizerOptions Options) + : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins), + Recover(Options.Recover) { initializeModule(M); } @@ -594,10 +587,26 @@ private: /// An empty volatile inline asm that prevents callback merge. InlineAsm *EmptyAsm; - - Function *MsanCtorFunction; }; +void insertModuleCtor(Module &M) { + getOrCreateSanitizerCtorAndInitFunctions( + M, kMsanModuleCtorName, kMsanInitName, + /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { + if (!ClWithComdat) { + appendToGlobalCtors(M, Ctor, 0); + return; + } + Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); + Ctor->setComdat(MsanCtorComdat); + appendToGlobalCtors(M, Ctor, 0, Ctor); + }); +} + /// A legacy function pass for msan instrumentation. /// /// Instruments functions to detect unitialized reads. @@ -615,7 +624,7 @@ struct MemorySanitizerLegacyPass : public FunctionPass { bool runOnFunction(Function &F) override { return MSan->sanitizeFunction( - F, getAnalysis().getTLI()); + F, getAnalysis().getTLI(F)); } bool doInitialization(Module &M) override; @@ -623,8 +632,17 @@ struct MemorySanitizerLegacyPass : public FunctionPass { MemorySanitizerOptions Options; }; +template T getOptOrDefault(const cl::opt &Opt, T Default) { + return (Opt.getNumOccurrences() > 0) ? Opt : Default; +} + } // end anonymous namespace +MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K) + : Kernel(getOptOrDefault(ClEnableKmsan, K)), + TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)), + Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {} + PreservedAnalyses MemorySanitizerPass::run(Function &F, FunctionAnalysisManager &FAM) { MemorySanitizer Msan(*F.getParent(), Options); @@ -633,6 +651,14 @@ PreservedAnalyses MemorySanitizerPass::run(Function &F, return PreservedAnalyses::all(); } +PreservedAnalyses MemorySanitizerPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (Options.Kernel) + return PreservedAnalyses::all(); + insertModuleCtor(M); + return PreservedAnalyses::none(); +} + char MemorySanitizerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan", @@ -918,23 +944,6 @@ void MemorySanitizer::initializeModule(Module &M) { OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000); if (!CompileKernel) { - std::tie(MsanCtorFunction, std::ignore) = - getOrCreateSanitizerCtorAndInitFunctions( - M, kMsanModuleCtorName, kMsanInitName, - /*InitArgTypes=*/{}, - /*InitArgs=*/{}, - // This callback is invoked when the functions are created the first - // time. Hook them into the global ctors list in that case: - [&](Function *Ctor, FunctionCallee) { - if (!ClWithComdat) { - appendToGlobalCtors(M, Ctor, 0); - return; - } - Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); - Ctor->setComdat(MsanCtorComdat); - appendToGlobalCtors(M, Ctor, 0, Ctor); - }); - if (TrackOrigins) M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] { return new GlobalVariable( @@ -952,6 +961,8 @@ void MemorySanitizer::initializeModule(Module &M) { } bool MemorySanitizerLegacyPass::doInitialization(Module &M) { + if (!Options.Kernel) + insertModuleCtor(M); MSan.emplace(M, Options); return true; } @@ -2562,6 +2573,11 @@ struct MemorySanitizerVisitor : public InstVisitor { return false; } + void handleInvariantGroup(IntrinsicInst &I) { + setShadow(&I, getShadow(&I, 0)); + setOrigin(&I, getOrigin(&I, 0)); + } + void handleLifetimeStart(IntrinsicInst &I) { if (!PoisonStack) return; @@ -2993,6 +3009,10 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::lifetime_start: handleLifetimeStart(I); break; + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + handleInvariantGroup(I); + break; case Intrinsic::bswap: handleBswap(I); break; @@ -3627,10 +3647,10 @@ struct MemorySanitizerVisitor : public InstVisitor { int getNumOutputArgs(InlineAsm *IA, CallBase *CB) { int NumRetOutputs = 0; int NumOutputs = 0; - Type *RetTy = dyn_cast(CB)->getType(); + Type *RetTy = cast(CB)->getType(); if (!RetTy->isVoidTy()) { // Register outputs are returned via the CallInst return value. - StructType *ST = dyn_cast_or_null(RetTy); + auto *ST = dyn_cast(RetTy); if (ST) NumRetOutputs = ST->getNumElements(); else @@ -3667,7 +3687,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // corresponding CallInst has nO+nI+1 operands (the last operand is the // function to be called). const DataLayout &DL = F.getParent()->getDataLayout(); - CallBase *CB = dyn_cast(&I); + CallBase *CB = cast(&I); IRBuilder<> IRB(&I); InlineAsm *IA = cast(CB->getCalledValue()); int OutputArgs = getNumOutputArgs(IA, CB); @@ -4567,8 +4587,9 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, } bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) { - if (!CompileKernel && (&F == MsanCtorFunction)) + if (!CompileKernel && F.getName() == kMsanModuleCtorName) return false; + MemorySanitizerVisitor Visitor(F, *this, TLI); // Clear out readonly/readnone attributes. diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 6fec3c9c79ee..ca1bb62389e9 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -48,6 +48,7 @@ //===----------------------------------------------------------------------===// #include "CFGMST.h" +#include "ValueProfileCollector.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -61,7 +62,6 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -96,6 +96,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DOTGraphTraits.h" @@ -103,11 +104,11 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" -#include "llvm/Support/JamCRC.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include #include #include @@ -120,6 +121,7 @@ using namespace llvm; using ProfileCount = Function::ProfileCount; +using VPCandidateInfo = ValueProfileCollector::CandidateInfo; #define DEBUG_TYPE "pgo-instrumentation" @@ -286,6 +288,11 @@ static std::string getBranchCondString(Instruction *TI) { return result; } +static const char *ValueProfKindDescr[] = { +#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, +#include "llvm/ProfileData/InstrProfData.inc" +}; + namespace { /// The select instruction visitor plays three roles specified @@ -348,50 +355,6 @@ struct SelectInstVisitor : public InstVisitor { unsigned getNumOfSelectInsts() const { return NSIs; } }; -/// Instruction Visitor class to visit memory intrinsic calls. -struct MemIntrinsicVisitor : public InstVisitor { - Function &F; - unsigned NMemIs = 0; // Number of memIntrinsics instrumented. - VisitMode Mode = VM_counting; // Visiting mode. - unsigned CurCtrId = 0; // Current counter index. - unsigned TotalNumCtrs = 0; // Total number of counters - GlobalVariable *FuncNameVar = nullptr; - uint64_t FuncHash = 0; - PGOUseFunc *UseFunc = nullptr; - std::vector Candidates; - - MemIntrinsicVisitor(Function &Func) : F(Func) {} - - void countMemIntrinsics(Function &Func) { - NMemIs = 0; - Mode = VM_counting; - visit(Func); - } - - void instrumentMemIntrinsics(Function &Func, unsigned TotalNC, - GlobalVariable *FNV, uint64_t FHash) { - Mode = VM_instrument; - TotalNumCtrs = TotalNC; - FuncHash = FHash; - FuncNameVar = FNV; - visit(Func); - } - - std::vector findMemIntrinsics(Function &Func) { - Candidates.clear(); - Mode = VM_annotate; - visit(Func); - return Candidates; - } - - // Visit the IR stream and annotate all mem intrinsic call instructions. - void instrumentOneMemIntrinsic(MemIntrinsic &MI); - - // Visit \p MI instruction and perform tasks according to visit mode. - void visitMemIntrinsic(MemIntrinsic &SI); - - unsigned getNumOfMemIntrinsics() const { return NMemIs; } -}; class PGOInstrumentationGenLegacyPass : public ModulePass { public: @@ -563,13 +526,14 @@ private: // A map that stores the Comdat group in function F. std::unordered_multimap &ComdatMembers; + ValueProfileCollector VPC; + void computeCFGHash(); void renameComdatFunction(); public: - std::vector> ValueSites; + std::vector> ValueSites; SelectInstVisitor SIVisitor; - MemIntrinsicVisitor MIVisitor; std::string FuncName; GlobalVariable *FuncNameVar; @@ -604,23 +568,21 @@ public: std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) - : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), - ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func), - MST(F, BPI, BFI) { + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func), + ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); - MIVisitor.countMemIntrinsics(Func); + ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); if (!IsCS) { NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); NumOfPGOBB += MST.BBInfos.size(); - ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); } else { NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); NumOfCSPGOBB += MST.BBInfos.size(); } - ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); FuncName = getPGOFuncName(F); computeCFGHash(); @@ -647,7 +609,7 @@ public: // value of each BB in the CFG. The higher 32 bits record the number of edges. template void FuncPGOInstrumentation::computeCFGHash() { - std::vector Indexes; + std::vector Indexes; JamCRC JC; for (auto &BB : F) { const Instruction *TI = BB.getTerminator(); @@ -658,7 +620,7 @@ void FuncPGOInstrumentation::computeCFGHash() { continue; uint32_t Index = BI->Index; for (int J = 0; J < 4; J++) - Indexes.push_back((char)(Index >> (J * 8))); + Indexes.push_back((uint8_t)(Index >> (J * 8))); } } JC.update(Indexes); @@ -874,28 +836,36 @@ static void instrumentOneFunc( if (DisableValueProfiling) return; - unsigned NumIndirectCalls = 0; - for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) { - CallSite CS(I); - Value *Callee = CS.getCalledValue(); - LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " - << NumIndirectCalls << "\n"); - IRBuilder<> Builder(I); - assert(Builder.GetInsertPoint() != I->getParent()->end() && - "Cannot get the Instrumentation point"); - Builder.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), - {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), - Builder.getInt64(FuncInfo.FunctionHash), - Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), - Builder.getInt32(IPVK_IndirectCallTarget), - Builder.getInt32(NumIndirectCalls++)}); - } - NumOfPGOICall += NumIndirectCalls; + NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); - // Now instrument memop intrinsic calls. - FuncInfo.MIVisitor.instrumentMemIntrinsics( - F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash); + // For each VP Kind, walk the VP candidates and instrument each one. + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { + unsigned SiteIndex = 0; + if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) + continue; + + for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { + LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] + << " site: CallSite Index = " << SiteIndex << "\n"); + + IRBuilder<> Builder(Cand.InsertPt); + assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && + "Cannot get the Instrumentation point"); + + Value *ToProfile = nullptr; + if (Cand.V->getType()->isIntegerTy()) + ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); + else if (Cand.V->getType()->isPointerTy()) + ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); + assert(ToProfile && "value profiling Value is of unexpected type"); + + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), + {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), + Builder.getInt64(FuncInfo.FunctionHash), ToProfile, + Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}); + } + } // IPVK_First <= Kind <= IPVK_Last } namespace { @@ -984,9 +954,9 @@ class PGOUseFunc { public: PGOUseFunc(Function &Func, Module *Modu, std::unordered_multimap &ComdatMembers, - BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false) - : F(Func), M(Modu), BFI(BFIin), + BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, + ProfileSummaryInfo *PSI, bool IsCS) + : F(Func), M(Modu), BFI(BFIin), PSI(PSI), FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), FreqAttr(FFA_Normal), IsCS(IsCS) {} @@ -1041,6 +1011,7 @@ private: Function &F; Module *M; BlockFrequencyInfo *BFI; + ProfileSummaryInfo *PSI; // This member stores the shared information with class PGOGenFunc. FuncPGOInstrumentation FuncInfo; @@ -1078,15 +1049,9 @@ private: // FIXME: This function should be removed once the functionality in // the inliner is implemented. void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { - if (ProgramMaxCount == 0) - return; - // Threshold of the hot functions. - const BranchProbability HotFunctionThreshold(1, 100); - // Threshold of the cold functions. - const BranchProbability ColdFunctionThreshold(2, 10000); - if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) + if (PSI->isHotCount(EntryCount)) FreqAttr = FFA_Hot; - else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) + else if (PSI->isColdCount(MaxCount)) FreqAttr = FFA_Cold; } }; @@ -1433,43 +1398,6 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { llvm_unreachable("Unknown visiting mode"); } -void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { - Module *M = F.getParent(); - IRBuilder<> Builder(&MI); - Type *Int64Ty = Builder.getInt64Ty(); - Type *I8PtrTy = Builder.getInt8PtrTy(); - Value *Length = MI.getLength(); - assert(!isa(Length)); - Builder.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), - {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), - Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), - Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); - ++CurCtrId; -} - -void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) { - if (!PGOInstrMemOP) - return; - Value *Length = MI.getLength(); - // Not instrument constant length calls. - if (dyn_cast(Length)) - return; - - switch (Mode) { - case VM_counting: - NMemIs++; - return; - case VM_instrument: - instrumentOneMemIntrinsic(MI); - return; - case VM_annotate: - Candidates.push_back(&MI); - return; - } - llvm_unreachable("Unknown visiting mode"); -} - // Traverse all valuesites and annotate the instructions for all value kind. void PGOUseFunc::annotateValueSites() { if (DisableValueProfiling) @@ -1482,11 +1410,6 @@ void PGOUseFunc::annotateValueSites() { annotateValueSites(Kind); } -static const char *ValueProfKindDescr[] = { -#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, -#include "llvm/ProfileData/InstrProfData.inc" -}; - // Annotate the instructions for a specific value kind. void PGOUseFunc::annotateValueSites(uint32_t Kind) { assert(Kind <= IPVK_Last); @@ -1505,11 +1428,11 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) { return; } - for (auto &I : ValueSites) { + for (VPCandidateInfo &I : ValueSites) { LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind << "): Index = " << ValueSiteIndex << " out of " << NumValueSites << "\n"); - annotateValueSite(*M, *I, ProfileRecord, + annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, static_cast(Kind), ValueSiteIndex, Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations : MaxNumAnnotations); @@ -1595,7 +1518,8 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M, static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, - function_ref LookupBFI, bool IsCS) { + function_ref LookupBFI, + ProfileSummaryInfo *PSI, bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -1626,6 +1550,13 @@ static bool annotateAllFunctions( return false; } + // Add the profile summary (read from the header of the indexed summary) here + // so that we can use it below when reading counters (which checks if the + // function should be marked with a cold or inlinehint attribute). + M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), + IsCS ? ProfileSummary::PSK_CSInstr + : ProfileSummary::PSK_Instr); + std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); std::vector HotFunctions; @@ -1638,7 +1569,7 @@ static bool annotateAllFunctions( // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1662,9 +1593,9 @@ static bool annotateAllFunctions( F.getName().equals(ViewBlockFreqFuncName))) { LoopInfo LI{DominatorTree(F)}; std::unique_ptr NewBPI = - llvm::make_unique(F, LI); + std::make_unique(F, LI); std::unique_ptr NewBFI = - llvm::make_unique(F, *NewBPI, LI); + std::make_unique(F, *NewBPI, LI); if (PGOViewCounts == PGOVCT_Graph) NewBFI->view(); else if (PGOViewCounts == PGOVCT_Text) { @@ -1686,9 +1617,6 @@ static bool annotateAllFunctions( } } } - M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), - IsCS ? ProfileSummary::PSK_CSInstr - : ProfileSummary::PSK_Instr); // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence @@ -1730,8 +1658,10 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, return &FAM.getResult(F); }; + auto *PSI = &AM.getResult(M); + if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI, IsCS)) + LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1748,7 +1678,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { return &this->getAnalysis(F).getBFI(); }; - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, + auto *PSI = &getAnalysis().getPSI(); + return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, IsCS); } @@ -1776,6 +1707,9 @@ void llvm::setProfMetadata(Module *M, Instruction *TI, : Weights) { dbgs() << W << " "; } dbgs() << "\n";); + + misexpect::verifyMisExpect(TI, Weights, TI->getContext()); + TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); if (EmitBranchProbability) { std::string BrCondStr = getBranchCondString(TI); diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 188f95b4676b..9f81bb16d0a7 100644 --- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -138,7 +138,7 @@ public: OptimizationRemarkEmitter &ORE, DominatorTree *DT) : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { ValueDataArray = - llvm::make_unique(MemOPMaxVersion + 2); + std::make_unique(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, PreciseRangeLast); @@ -374,8 +374,8 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); Instruction *NewInst = MI->clone(); // Fix the argument. - MemIntrinsic * MemI = dyn_cast(NewInst); - IntegerType *SizeType = dyn_cast(MemI->getLength()->getType()); + auto *MemI = cast(NewInst); + auto *SizeType = dyn_cast(MemI->getLength()->getType()); assert(SizeType && "Expected integer type size argument."); ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId); MemI->setLength(CaseSizeId); diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index ca0cb4bdbe84..f8fa9cad03b8 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/EHPersonalities.h" @@ -176,24 +177,21 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { return Options; } -class SanitizerCoverageModule : public ModulePass { +using DomTreeCallback = function_ref; +using PostDomTreeCallback = + function_ref; + +class ModuleSanitizerCoverage { public: - SanitizerCoverageModule( + ModuleSanitizerCoverage( const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) - : ModulePass(ID), Options(OverrideFromCL(Options)) { - initializeSanitizerCoverageModulePass(*PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override; - bool runOnFunction(Function &F); - static char ID; // Pass identification, replacement for typeid - StringRef getPassName() const override { return "SanitizerCoverageModule"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - } + : Options(OverrideFromCL(Options)) {} + bool instrumentModule(Module &M, DomTreeCallback DTCallback, + PostDomTreeCallback PDTCallback); private: + void instrumentFunction(Function &F, DomTreeCallback DTCallback, + PostDomTreeCallback PDTCallback); void InjectCoverageForIndirectCalls(Function &F, ArrayRef IndirCalls); void InjectTraceForCmp(Function &F, ArrayRef CmpTraceTargets); @@ -252,10 +250,57 @@ private: SanitizerCoverageOptions Options; }; +class ModuleSanitizerCoverageLegacyPass : public ModulePass { +public: + ModuleSanitizerCoverageLegacyPass( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) + : ModulePass(ID), Options(Options) { + initializeModuleSanitizerCoverageLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override { + ModuleSanitizerCoverage ModuleSancov(Options); + auto DTCallback = [this](Function &F) -> const DominatorTree * { + return &this->getAnalysis(F).getDomTree(); + }; + auto PDTCallback = [this](Function &F) -> const PostDominatorTree * { + return &this->getAnalysis(F) + .getPostDomTree(); + }; + return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback); + } + + static char ID; // Pass identification, replacement for typeid + StringRef getPassName() const override { return "ModuleSanitizerCoverage"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + +private: + SanitizerCoverageOptions Options; +}; + } // namespace +PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, + ModuleAnalysisManager &MAM) { + ModuleSanitizerCoverage ModuleSancov(Options); + auto &FAM = MAM.getResult(M).getManager(); + auto DTCallback = [&FAM](Function &F) -> const DominatorTree * { + return &FAM.getResult(F); + }; + auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * { + return &FAM.getResult(F); + }; + if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + std::pair -SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section, +ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, Type *Ty) { GlobalVariable *SecStart = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, @@ -278,7 +323,7 @@ SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section, return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr); } -Function *SanitizerCoverageModule::CreateInitCallsForSections( +Function *ModuleSanitizerCoverage::CreateInitCallsForSections( Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty, const char *Section) { auto SecStartEnd = CreateSecStartEnd(M, Section, Ty); @@ -310,7 +355,8 @@ Function *SanitizerCoverageModule::CreateInitCallsForSections( return CtorFunc; } -bool SanitizerCoverageModule::runOnModule(Module &M) { +bool ModuleSanitizerCoverage::instrumentModule( + Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; C = &(M.getContext()); @@ -403,7 +449,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy); for (auto &F : M) - runOnFunction(F); + instrumentFunction(F, DTCallback, PDTCallback); Function *Ctor = nullptr; @@ -518,29 +564,30 @@ static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT, return true; } -bool SanitizerCoverageModule::runOnFunction(Function &F) { +void ModuleSanitizerCoverage::instrumentFunction( + Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { if (F.empty()) - return false; + return; if (F.getName().find(".module_ctor") != std::string::npos) - return false; // Should not instrument sanitizer init functions. + return; // Should not instrument sanitizer init functions. if (F.getName().startswith("__sanitizer_")) - return false; // Don't instrument __sanitizer_* callbacks. + return; // Don't instrument __sanitizer_* callbacks. // Don't touch available_externally functions, their actual body is elewhere. if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) - return false; + return; // Don't instrument MSVC CRT configuration helpers. They may run before normal // initialization. if (F.getName() == "__local_stdio_printf_options" || F.getName() == "__local_stdio_scanf_options") - return false; + return; if (isa(F.getEntryBlock().getTerminator())) - return false; + return; // Don't instrument functions using SEH for now. Splitting basic blocks like // we do for coverage breaks WinEHPrepare. // FIXME: Remove this when SEH no longer uses landingpad pattern matching. if (F.hasPersonalityFn() && isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) - return false; + return; if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests()); SmallVector IndirCalls; @@ -550,10 +597,8 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { SmallVector DivTraceTargets; SmallVector GepTraceTargets; - const DominatorTree *DT = - &getAnalysis(F).getDomTree(); - const PostDominatorTree *PDT = - &getAnalysis(F).getPostDomTree(); + const DominatorTree *DT = DTCallback(F); + const PostDominatorTree *PDT = PDTCallback(F); bool IsLeafFunc = true; for (auto &BB : F) { @@ -593,10 +638,9 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { InjectTraceForSwitch(F, SwitchTraceTargets); InjectTraceForDiv(F, DivTraceTargets); InjectTraceForGep(F, GepTraceTargets); - return true; } -GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( +GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection( size_t NumElements, Function &F, Type *Ty, const char *Section) { ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); auto Array = new GlobalVariable( @@ -608,8 +652,9 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId)) Array->setComdat(Comdat); Array->setSection(getSectionName(Section)); - Array->setAlignment(Ty->isPointerTy() ? DL->getPointerSize() - : Ty->getPrimitiveSizeInBits() / 8); + Array->setAlignment(Align(Ty->isPointerTy() + ? DL->getPointerSize() + : Ty->getPrimitiveSizeInBits() / 8)); GlobalsToAppendToUsed.push_back(Array); GlobalsToAppendToCompilerUsed.push_back(Array); MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F)); @@ -619,7 +664,7 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( } GlobalVariable * -SanitizerCoverageModule::CreatePCArray(Function &F, +ModuleSanitizerCoverage::CreatePCArray(Function &F, ArrayRef AllBlocks) { size_t N = AllBlocks.size(); assert(N); @@ -646,7 +691,7 @@ SanitizerCoverageModule::CreatePCArray(Function &F, return PCArray; } -void SanitizerCoverageModule::CreateFunctionLocalArrays( +void ModuleSanitizerCoverage::CreateFunctionLocalArrays( Function &F, ArrayRef AllBlocks) { if (Options.TracePCGuard) FunctionGuardArray = CreateFunctionLocalArrayInSection( @@ -660,7 +705,7 @@ void SanitizerCoverageModule::CreateFunctionLocalArrays( FunctionPCsArray = CreatePCArray(F, AllBlocks); } -bool SanitizerCoverageModule::InjectCoverage(Function &F, +bool ModuleSanitizerCoverage::InjectCoverage(Function &F, ArrayRef AllBlocks, bool IsLeafFunc) { if (AllBlocks.empty()) return false; @@ -677,7 +722,7 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, // The cache is used to speed up recording the caller-callee pairs. // The address of the caller is passed implicitly via caller PC. // CacheSize is encoded in the name of the run-time function. -void SanitizerCoverageModule::InjectCoverageForIndirectCalls( +void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls( Function &F, ArrayRef IndirCalls) { if (IndirCalls.empty()) return; @@ -696,7 +741,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( // __sanitizer_cov_trace_switch(CondValue, // {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... }) -void SanitizerCoverageModule::InjectTraceForSwitch( +void ModuleSanitizerCoverage::InjectTraceForSwitch( Function &, ArrayRef SwitchTraceTargets) { for (auto I : SwitchTraceTargets) { if (SwitchInst *SI = dyn_cast(I)) { @@ -735,7 +780,7 @@ void SanitizerCoverageModule::InjectTraceForSwitch( } } -void SanitizerCoverageModule::InjectTraceForDiv( +void ModuleSanitizerCoverage::InjectTraceForDiv( Function &, ArrayRef DivTraceTargets) { for (auto BO : DivTraceTargets) { IRBuilder<> IRB(BO); @@ -753,7 +798,7 @@ void SanitizerCoverageModule::InjectTraceForDiv( } } -void SanitizerCoverageModule::InjectTraceForGep( +void ModuleSanitizerCoverage::InjectTraceForGep( Function &, ArrayRef GepTraceTargets) { for (auto GEP : GepTraceTargets) { IRBuilder<> IRB(GEP); @@ -764,7 +809,7 @@ void SanitizerCoverageModule::InjectTraceForGep( } } -void SanitizerCoverageModule::InjectTraceForCmp( +void ModuleSanitizerCoverage::InjectTraceForCmp( Function &, ArrayRef CmpTraceTargets) { for (auto I : CmpTraceTargets) { if (ICmpInst *ICMP = dyn_cast(I)) { @@ -799,7 +844,7 @@ void SanitizerCoverageModule::InjectTraceForCmp( } } -void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, +void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, bool IsLeafFunc) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); @@ -842,8 +887,10 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } if (Options.StackDepth && IsEntryBB && !IsLeafFunc) { // Check stack depth. If it's the deepest so far, record it. - Function *GetFrameAddr = - Intrinsic::getDeclaration(F.getParent(), Intrinsic::frameaddress); + Module *M = F.getParent(); + Function *GetFrameAddr = Intrinsic::getDeclaration( + M, Intrinsic::frameaddress, + IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); auto FrameAddrPtr = IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)}); auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy); @@ -858,7 +905,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } std::string -SanitizerCoverageModule::getSectionName(const std::string &Section) const { +ModuleSanitizerCoverage::getSectionName(const std::string &Section) const { if (TargetTriple.isOSBinFormatCOFF()) { if (Section == SanCovCountersSectionName) return ".SCOV$CM"; @@ -872,32 +919,29 @@ SanitizerCoverageModule::getSectionName(const std::string &Section) const { } std::string -SanitizerCoverageModule::getSectionStart(const std::string &Section) const { +ModuleSanitizerCoverage::getSectionStart(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) return "\1section$start$__DATA$__" + Section; return "__start___" + Section; } std::string -SanitizerCoverageModule::getSectionEnd(const std::string &Section) const { +ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) return "\1section$end$__DATA$__" + Section; return "__stop___" + Section; } - -char SanitizerCoverageModule::ID = 0; -INITIALIZE_PASS_BEGIN(SanitizerCoverageModule, "sancov", - "SanitizerCoverage: TODO." - "ModulePass", - false, false) +char ModuleSanitizerCoverageLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov", + "Pass for instrumenting coverage on functions", false, + false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(SanitizerCoverageModule, "sancov", - "SanitizerCoverage: TODO." - "ModulePass", - false, false) -ModulePass *llvm::createSanitizerCoverageModulePass( +INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", + "Pass for instrumenting coverage on functions", false, + false) +ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( const SanitizerCoverageOptions &Options) { - return new SanitizerCoverageModule(Options); + return new ModuleSanitizerCoverageLegacyPass(Options); } diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 5be13fa745cb..ac274a155a80 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -92,11 +92,10 @@ namespace { /// ensures the __tsan_init function is in the list of global constructors for /// the module. struct ThreadSanitizer { - ThreadSanitizer(Module &M); bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); private: - void initializeCallbacks(Module &M); + void initialize(Module &M); bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); bool instrumentAtomic(Instruction *I, const DataLayout &DL); bool instrumentMemIntrinsic(Instruction *I); @@ -108,8 +107,6 @@ private: void InsertRuntimeIgnores(Function &F); Type *IntptrTy; - IntegerType *OrdTy; - // Callbacks to run-time library are computed in doInitialization. FunctionCallee TsanFuncEntry; FunctionCallee TsanFuncExit; FunctionCallee TsanIgnoreBegin; @@ -130,7 +127,6 @@ private: FunctionCallee TsanVptrUpdate; FunctionCallee TsanVptrLoad; FunctionCallee MemmoveFn, MemcpyFn, MemsetFn; - Function *TsanCtorFunction; }; struct ThreadSanitizerLegacyPass : FunctionPass { @@ -143,16 +139,32 @@ struct ThreadSanitizerLegacyPass : FunctionPass { private: Optional TSan; }; + +void insertModuleCtor(Module &M) { + getOrCreateSanitizerCtorAndInitFunctions( + M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); +} + } // namespace PreservedAnalyses ThreadSanitizerPass::run(Function &F, FunctionAnalysisManager &FAM) { - ThreadSanitizer TSan(*F.getParent()); + ThreadSanitizer TSan; if (TSan.sanitizeFunction(F, FAM.getResult(F))) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } +PreservedAnalyses ThreadSanitizerPass::run(Module &M, + ModuleAnalysisManager &MAM) { + insertModuleCtor(M); + return PreservedAnalyses::none(); +} + char ThreadSanitizerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan", "ThreadSanitizer: detects data races.", false, false) @@ -169,12 +181,13 @@ void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { } bool ThreadSanitizerLegacyPass::doInitialization(Module &M) { - TSan.emplace(M); + insertModuleCtor(M); + TSan.emplace(); return true; } bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); TSan->sanitizeFunction(F, TLI); return true; } @@ -183,7 +196,10 @@ FunctionPass *llvm::createThreadSanitizerLegacyPassPass() { return new ThreadSanitizerLegacyPass(); } -void ThreadSanitizer::initializeCallbacks(Module &M) { +void ThreadSanitizer::initialize(Module &M) { + const DataLayout &DL = M.getDataLayout(); + IntptrTy = DL.getIntPtrType(M.getContext()); + IRBuilder<> IRB(M.getContext()); AttributeList Attr; Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex, @@ -197,7 +213,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { IRB.getVoidTy()); TsanIgnoreEnd = M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy()); - OrdTy = IRB.getInt32Ty(); + IntegerType *OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { const unsigned ByteSize = 1U << i; const unsigned BitSize = ByteSize * 8; @@ -280,20 +296,6 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy); } -ThreadSanitizer::ThreadSanitizer(Module &M) { - const DataLayout &DL = M.getDataLayout(); - IntptrTy = DL.getIntPtrType(M.getContext()); - std::tie(TsanCtorFunction, std::ignore) = - getOrCreateSanitizerCtorAndInitFunctions( - M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, - /*InitArgs=*/{}, - // This callback is invoked when the functions are created the first - // time. Hook them into the global ctors list in that case: - [&](Function *Ctor, FunctionCallee) { - appendToGlobalCtors(M, Ctor, 0); - }); -} - static bool isVtableAccess(Instruction *I) { if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) return Tag->isTBAAVtableAccess(); @@ -436,9 +438,9 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, const TargetLibraryInfo &TLI) { // This is required to prevent instrumenting call to __tsan_init from within // the module constructor. - if (&F == TsanCtorFunction) + if (F.getName() == kTsanModuleCtorName) return false; - initializeCallbacks(*F.getParent()); + initialize(*F.getParent()); SmallVector AllLoadsAndStores; SmallVector LocalLoadsAndStores; SmallVector AtomicAccesses; diff --git a/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/lib/Transforms/Instrumentation/ValueProfileCollector.cpp new file mode 100644 index 000000000000..604726d4f40f --- /dev/null +++ b/lib/Transforms/Instrumentation/ValueProfileCollector.cpp @@ -0,0 +1,78 @@ +//===- ValueProfileCollector.cpp - determine what to value profile --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The implementation of the ValueProfileCollector via ValueProfileCollectorImpl +// +//===----------------------------------------------------------------------===// + +#include "ValueProfilePlugins.inc" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" + +#include + +using namespace llvm; + +namespace { + +/// A plugin-based class that takes an arbitrary number of Plugin types. +/// Each plugin type must satisfy the following API: +/// 1) the constructor must take a `Function &f`. Typically, the plugin would +/// scan the function looking for candidates. +/// 2) contain a member function with the following signature and name: +/// void run(std::vector &Candidates); +/// such that the plugin would append its result into the vector parameter. +/// +/// Plugins are defined in ValueProfilePlugins.inc +template class PluginChain; + +/// The type PluginChainFinal is the final chain of plugins that will be used by +/// ValueProfileCollectorImpl. +using PluginChainFinal = PluginChain; + +template <> class PluginChain<> { +public: + PluginChain(Function &F) {} + void get(InstrProfValueKind K, std::vector &Candidates) {} +}; + +template +class PluginChain : public PluginChain { + PluginT Plugin; + using Base = PluginChain; + +public: + PluginChain(Function &F) : PluginChain(F), Plugin(F) {} + + void get(InstrProfValueKind K, std::vector &Candidates) { + if (K == PluginT::Kind) + Plugin.run(Candidates); + Base::get(K, Candidates); + } +}; + +} // end anonymous namespace + +/// ValueProfileCollectorImpl inherits the API of PluginChainFinal. +class ValueProfileCollector::ValueProfileCollectorImpl : public PluginChainFinal { +public: + using PluginChainFinal::PluginChainFinal; +}; + +ValueProfileCollector::ValueProfileCollector(Function &F) + : PImpl(new ValueProfileCollectorImpl(F)) {} + +ValueProfileCollector::~ValueProfileCollector() = default; + +std::vector +ValueProfileCollector::get(InstrProfValueKind Kind) const { + std::vector Result; + PImpl->get(Kind, Result); + return Result; +} diff --git a/lib/Transforms/Instrumentation/ValueProfileCollector.h b/lib/Transforms/Instrumentation/ValueProfileCollector.h new file mode 100644 index 000000000000..ff883c8d0c77 --- /dev/null +++ b/lib/Transforms/Instrumentation/ValueProfileCollector.h @@ -0,0 +1,79 @@ +//===- ValueProfileCollector.h - determine what to value profile ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a utility class, ValueProfileCollector, that is used to +// determine what kind of llvm::Value's are worth value-profiling, at which +// point in the program, and which instruction holds the Value Profile metadata. +// Currently, the only users of this utility is the PGOInstrumentation[Gen|Use] +// passes. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H +#define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProf.h" + +namespace llvm { + +/// Utility analysis that determines what values are worth profiling. +/// The actual logic is inside the ValueProfileCollectorImpl, whose job is to +/// populate the Candidates vector. +/// +/// Value profiling an expression means to track the values that this expression +/// takes at runtime and the frequency of each value. +/// It is important to distinguish between two sets of value profiles for a +/// particular expression: +/// 1) The set of values at the point of evaluation. +/// 2) The set of values at the point of use. +/// In some cases, the two sets are identical, but it's not unusual for the two +/// to differ. +/// +/// To elaborate more, consider this C code, and focus on the expression `nn`: +/// void foo(int nn, bool b) { +/// if (b) memcpy(x, y, nn); +/// } +/// The point of evaluation can be as early as the start of the function, and +/// let's say the value profile for `nn` is: +/// total=100; (value,freq) set = {(8,10), (32,50)} +/// The point of use is right before we call memcpy, and since we execute the +/// memcpy conditionally, the value profile of `nn` can be: +/// total=15; (value,freq) set = {(8,10), (4,5)} +/// +/// For this reason, a plugin is responsible for computing the insertion point +/// for each value to be profiled. The `CandidateInfo` structure encapsulates +/// all the information needed for each value profile site. +class ValueProfileCollector { +public: + struct CandidateInfo { + Value *V; // The value to profile. + Instruction *InsertPt; // Insert the VP lib call before this instr. + Instruction *AnnotatedInst; // Where metadata is attached. + }; + + ValueProfileCollector(Function &Fn); + ValueProfileCollector(ValueProfileCollector &&) = delete; + ValueProfileCollector &operator=(ValueProfileCollector &&) = delete; + + ValueProfileCollector(const ValueProfileCollector &) = delete; + ValueProfileCollector &operator=(const ValueProfileCollector &) = delete; + ~ValueProfileCollector(); + + /// returns a list of value profiling candidates of the given kind + std::vector get(InstrProfValueKind Kind) const; + +private: + class ValueProfileCollectorImpl; + std::unique_ptr PImpl; +}; + +} // namespace llvm + +#endif diff --git a/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/lib/Transforms/Instrumentation/ValueProfilePlugins.inc new file mode 100644 index 000000000000..4cc4c6c848c3 --- /dev/null +++ b/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -0,0 +1,75 @@ +//=== ValueProfilePlugins.inc - set of plugins used by ValueProfileCollector =// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a set of plugin classes used in ValueProfileCollectorImpl. +// Each plugin is responsible for collecting Value Profiling candidates for a +// particular optimization. +// Each plugin must satisfy the interface described in ValueProfileCollector.cpp +// +//===----------------------------------------------------------------------===// + +#include "ValueProfileCollector.h" +#include "llvm/Analysis/IndirectCallVisitor.h" +#include "llvm/IR/InstVisitor.h" + +using namespace llvm; +using CandidateInfo = ValueProfileCollector::CandidateInfo; + +///--------------------------- MemIntrinsicPlugin ------------------------------ +class MemIntrinsicPlugin : public InstVisitor { + Function &F; + std::vector *Candidates; + +public: + static constexpr InstrProfValueKind Kind = IPVK_MemOPSize; + + MemIntrinsicPlugin(Function &Fn) : F(Fn), Candidates(nullptr) {} + + void run(std::vector &Cs) { + Candidates = &Cs; + visit(F); + Candidates = nullptr; + } + void visitMemIntrinsic(MemIntrinsic &MI) { + Value *Length = MI.getLength(); + // Not instrument constant length calls. + if (dyn_cast(Length)) + return; + + Instruction *InsertPt = &MI; + Instruction *AnnotatedInst = &MI; + Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst}); + } +}; + +///------------------------ IndirectCallPromotionPlugin ------------------------ +class IndirectCallPromotionPlugin { + Function &F; + +public: + static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget; + + IndirectCallPromotionPlugin(Function &Fn) : F(Fn) {} + + void run(std::vector &Candidates) { + std::vector Result = findIndirectCalls(F); + for (Instruction *I : Result) { + Value *Callee = CallSite(I).getCalledValue(); + Instruction *InsertPt = I; + Instruction *AnnotatedInst = I; + Candidates.emplace_back(CandidateInfo{Callee, InsertPt, AnnotatedInst}); + } + } +}; + +///----------------------- Registration of the plugins ------------------------- +/// For now, registering a plugin with the ValueProfileCollector is done by +/// adding the plugin type to the VP_PLUGIN_LIST macro. +#define VP_PLUGIN_LIST \ + MemIntrinsicPlugin, \ + IndirectCallPromotionPlugin diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp index 3243481dee0d..26dd416d6184 100644 --- a/lib/Transforms/ObjCARC/PtrState.cpp +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -275,6 +275,10 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, } else { InsertAfter = std::next(Inst->getIterator()); } + + if (InsertAfter != BB->end()) + InsertAfter = skipDebugIntrinsics(InsertAfter); + InsertReverseInsertPt(&*InsertAfter); }; diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index de9a62e88c27..0e9f03a06061 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -93,9 +93,7 @@ static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV, const SCEV *AlignSCEV, ScalarEvolution *SE) { // DiffUnits = Diff % int64_t(Alignment) - const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV); - const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV); - const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV); + const SCEV *DiffUnitsSCEV = SE->getURemExpr(DiffSCEV, AlignSCEV); LLVM_DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " << *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n"); @@ -323,7 +321,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { LI->getPointerOperand(), SE); if (NewAlignment > LI->getAlignment()) { - LI->setAlignment(NewAlignment); + LI->setAlignment(MaybeAlign(NewAlignment)); ++NumLoadAlignChanged; } } else if (StoreInst *SI = dyn_cast(J)) { @@ -331,7 +329,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { SI->getPointerOperand(), SE); if (NewAlignment > SI->getAlignment()) { - SI->setAlignment(NewAlignment); + SI->setAlignment(MaybeAlign(NewAlignment)); ++NumStoreAlignChanged; } } else if (MemIntrinsic *MI = dyn_cast(J)) { diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp index 3519b000a33f..c3fba923104f 100644 --- a/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -562,7 +562,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass { if (skipFunction(F)) return false; - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); return doCallSiteSplitting(F, TLI, TTI, DT); diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 98243a23f1ef..9f340afbf7c2 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -204,7 +204,7 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst, /// set found in \p BBs. static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, BasicBlock *Entry, - SmallPtrSet &BBs) { + SetVector &BBs) { assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); // Nodes on the current path to the root. SmallPtrSet Path; @@ -257,7 +257,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, // Visit Orders in bottom-up order. using InsertPtsCostPair = - std::pair, BlockFrequency>; + std::pair, BlockFrequency>; // InsertPtsMap is a map from a BB to the best insertion points for the // subtree of BB (subtree not including the BB itself). @@ -266,7 +266,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) { BasicBlock *Node = *RIt; bool NodeInBBs = BBs.count(Node); - SmallPtrSet &InsertPts = InsertPtsMap[Node].first; + auto &InsertPts = InsertPtsMap[Node].first; BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second; // Return the optimal insert points in BBs. @@ -283,7 +283,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock(); // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child // will update its parent's ParentInsertPts and ParentPtsFreq. - SmallPtrSet &ParentInsertPts = InsertPtsMap[Parent].first; + auto &ParentInsertPts = InsertPtsMap[Parent].first; BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; // Choose to insert in Node or in subtree of Node. // Don't hoist to EHPad because we may not find a proper place to insert @@ -305,12 +305,12 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, } /// Find an insertion point that dominates all uses. -SmallPtrSet ConstantHoistingPass::findConstantInsertionPoint( +SetVector ConstantHoistingPass::findConstantInsertionPoint( const ConstantInfo &ConstInfo) const { assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry."); // Collect all basic blocks. - SmallPtrSet BBs; - SmallPtrSet InsertPts; + SetVector BBs; + SetVector InsertPts; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); @@ -333,15 +333,13 @@ SmallPtrSet ConstantHoistingPass::findConstantInsertionPoint( while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; - BB1 = *BBs.begin(); - BB2 = *std::next(BBs.begin()); + BB1 = BBs.pop_back_val(); + BB2 = BBs.pop_back_val(); BB = DT->findNearestCommonDominator(BB1, BB2); if (BB == Entry) { InsertPts.insert(&Entry->front()); return InsertPts; } - BBs.erase(BB1); - BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); @@ -403,7 +401,7 @@ void ConstantHoistingPass::collectConstantCandidates( return; // Get offset from the base GV. - PointerType *GVPtrTy = dyn_cast(BaseGV->getType()); + PointerType *GVPtrTy = cast(BaseGV->getType()); IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace()); APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true); auto *GEPO = cast(ConstExpr); @@ -830,7 +828,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) { SmallVectorImpl &ConstInfoVec = BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec; for (auto const &ConstInfo : ConstInfoVec) { - SmallPtrSet IPSet = findConstantInsertionPoint(ConstInfo); + SetVector IPSet = findConstantInsertionPoint(ConstInfo); // We can have an empty set if the function contains unreachable blocks. if (IPSet.empty()) continue; diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 770321c740a0..e9e6afe3fdd4 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -82,7 +82,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { bool Changed = false; const DataLayout &DL = F.getParent()->getDataLayout(); TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); while (!WorkList.empty()) { SmallVector NewWorkListVec; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 89497177524f..2ef85268df48 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -62,6 +62,23 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); +STATISTIC(NumSExt, "Number of sext converted to zext"); +STATISTIC(NumAnd, "Number of ands removed"); +STATISTIC(NumNW, "Number of no-wrap deductions"); +STATISTIC(NumNSW, "Number of no-signed-wrap deductions"); +STATISTIC(NumNUW, "Number of no-unsigned-wrap deductions"); +STATISTIC(NumAddNW, "Number of no-wrap deductions for add"); +STATISTIC(NumAddNSW, "Number of no-signed-wrap deductions for add"); +STATISTIC(NumAddNUW, "Number of no-unsigned-wrap deductions for add"); +STATISTIC(NumSubNW, "Number of no-wrap deductions for sub"); +STATISTIC(NumSubNSW, "Number of no-signed-wrap deductions for sub"); +STATISTIC(NumSubNUW, "Number of no-unsigned-wrap deductions for sub"); +STATISTIC(NumMulNW, "Number of no-wrap deductions for mul"); +STATISTIC(NumMulNSW, "Number of no-signed-wrap deductions for mul"); +STATISTIC(NumMulNUW, "Number of no-unsigned-wrap deductions for mul"); +STATISTIC(NumShlNW, "Number of no-wrap deductions for shl"); +STATISTIC(NumShlNSW, "Number of no-signed-wrap deductions for shl"); +STATISTIC(NumShlNUW, "Number of no-unsigned-wrap deductions for shl"); STATISTIC(NumOverflows, "Number of overflow checks removed"); STATISTIC(NumSaturating, "Number of saturating arithmetics converted to normal arithmetics"); @@ -85,6 +102,7 @@ namespace { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } }; @@ -416,37 +434,96 @@ static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) { return NWRegion.contains(LRange); } -static void processOverflowIntrinsic(WithOverflowInst *WO) { - IRBuilder<> B(WO); - Value *NewOp = B.CreateBinOp( - WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName()); - // Constant-folding could have happened. - if (auto *Inst = dyn_cast(NewOp)) { - if (WO->isSigned()) +static void setDeducedOverflowingFlags(Value *V, Instruction::BinaryOps Opcode, + bool NewNSW, bool NewNUW) { + Statistic *OpcNW, *OpcNSW, *OpcNUW; + switch (Opcode) { + case Instruction::Add: + OpcNW = &NumAddNW; + OpcNSW = &NumAddNSW; + OpcNUW = &NumAddNUW; + break; + case Instruction::Sub: + OpcNW = &NumSubNW; + OpcNSW = &NumSubNSW; + OpcNUW = &NumSubNUW; + break; + case Instruction::Mul: + OpcNW = &NumMulNW; + OpcNSW = &NumMulNSW; + OpcNUW = &NumMulNUW; + break; + case Instruction::Shl: + OpcNW = &NumShlNW; + OpcNSW = &NumShlNSW; + OpcNUW = &NumShlNUW; + break; + default: + llvm_unreachable("Will not be called with other binops"); + } + + auto *Inst = dyn_cast(V); + if (NewNSW) { + ++NumNW; + ++*OpcNW; + ++NumNSW; + ++*OpcNSW; + if (Inst) Inst->setHasNoSignedWrap(); - else + } + if (NewNUW) { + ++NumNW; + ++*OpcNW; + ++NumNUW; + ++*OpcNUW; + if (Inst) Inst->setHasNoUnsignedWrap(); } +} - Value *NewI = B.CreateInsertValue(UndefValue::get(WO->getType()), NewOp, 0); - NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(WO->getContext()), 1); +static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI); + +// Rewrite this with.overflow intrinsic as non-overflowing. +static void processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) { + IRBuilder<> B(WO); + Instruction::BinaryOps Opcode = WO->getBinaryOp(); + bool NSW = WO->isSigned(); + bool NUW = !WO->isSigned(); + + Value *NewOp = + B.CreateBinOp(Opcode, WO->getLHS(), WO->getRHS(), WO->getName()); + setDeducedOverflowingFlags(NewOp, Opcode, NSW, NUW); + + StructType *ST = cast(WO->getType()); + Constant *Struct = ConstantStruct::get(ST, + { UndefValue::get(ST->getElementType(0)), + ConstantInt::getFalse(ST->getElementType(1)) }); + Value *NewI = B.CreateInsertValue(Struct, NewOp, 0); WO->replaceAllUsesWith(NewI); WO->eraseFromParent(); ++NumOverflows; + + // See if we can infer the other no-wrap too. + if (auto *BO = dyn_cast(NewOp)) + processBinOp(BO, LVI); } -static void processSaturatingInst(SaturatingInst *SI) { +static void processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) { + Instruction::BinaryOps Opcode = SI->getBinaryOp(); + bool NSW = SI->isSigned(); + bool NUW = !SI->isSigned(); BinaryOperator *BinOp = BinaryOperator::Create( - SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); + Opcode, SI->getLHS(), SI->getRHS(), SI->getName(), SI); BinOp->setDebugLoc(SI->getDebugLoc()); - if (SI->isSigned()) - BinOp->setHasNoSignedWrap(); - else - BinOp->setHasNoUnsignedWrap(); + setDeducedOverflowingFlags(BinOp, Opcode, NSW, NUW); SI->replaceAllUsesWith(BinOp); SI->eraseFromParent(); ++NumSaturating; + + // See if we can infer the other no-wrap too. + if (auto *BO = dyn_cast(BinOp)) + processBinOp(BO, LVI); } /// Infer nonnull attributes for the arguments at the specified callsite. @@ -456,14 +533,14 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { if (auto *WO = dyn_cast(CS.getInstruction())) { if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) { - processOverflowIntrinsic(WO); + processOverflowIntrinsic(WO, LVI); return true; } } if (auto *SI = dyn_cast(CS.getInstruction())) { if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) { - processSaturatingInst(SI); + processSaturatingInst(SI, LVI); return true; } } @@ -632,6 +709,27 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { return true; } +static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { + if (SDI->getType()->isVectorTy()) + return false; + + Value *Base = SDI->getOperand(0); + + Constant *Zero = ConstantInt::get(Base->getType(), 0); + if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, Base, Zero, SDI) != + LazyValueInfo::True) + return false; + + ++NumSExt; + auto *ZExt = + CastInst::CreateZExtOrBitCast(Base, SDI->getType(), SDI->getName(), SDI); + ZExt->setDebugLoc(SDI->getDebugLoc()); + SDI->replaceAllUsesWith(ZExt); + SDI->eraseFromParent(); + + return true; +} + static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { using OBO = OverflowingBinaryOperator; @@ -648,6 +746,7 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { BasicBlock *BB = BinOp->getParent(); + Instruction::BinaryOps Opcode = BinOp->getOpcode(); Value *LHS = BinOp->getOperand(0); Value *RHS = BinOp->getOperand(1); @@ -655,24 +754,48 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp); bool Changed = false; + bool NewNUW = false, NewNSW = false; if (!NUW) { ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( - BinOp->getOpcode(), RRange, OBO::NoUnsignedWrap); - bool NewNUW = NUWRange.contains(LRange); - BinOp->setHasNoUnsignedWrap(NewNUW); + Opcode, RRange, OBO::NoUnsignedWrap); + NewNUW = NUWRange.contains(LRange); Changed |= NewNUW; } if (!NSW) { ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( - BinOp->getOpcode(), RRange, OBO::NoSignedWrap); - bool NewNSW = NSWRange.contains(LRange); - BinOp->setHasNoSignedWrap(NewNSW); + Opcode, RRange, OBO::NoSignedWrap); + NewNSW = NSWRange.contains(LRange); Changed |= NewNSW; } + setDeducedOverflowingFlags(BinOp, Opcode, NewNSW, NewNUW); + return Changed; } +static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { + if (BinOp->getType()->isVectorTy()) + return false; + + // Pattern match (and lhs, C) where C includes a superset of bits which might + // be set in lhs. This is a common truncation idiom created by instcombine. + BasicBlock *BB = BinOp->getParent(); + Value *LHS = BinOp->getOperand(0); + ConstantInt *RHS = dyn_cast(BinOp->getOperand(1)); + if (!RHS || !RHS->getValue().isMask()) + return false; + + ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp); + if (!LRange.getUnsignedMax().ule(RHS->getValue())) + return false; + + BinOp->replaceAllUsesWith(LHS); + BinOp->eraseFromParent(); + NumAnd++; + return true; +} + + static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { if (Constant *C = LVI->getConstant(V, At->getParent(), At)) return C; @@ -740,10 +863,18 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, case Instruction::AShr: BBChanged |= processAShr(cast(II), LVI); break; + case Instruction::SExt: + BBChanged |= processSExt(cast(II), LVI); + break; case Instruction::Add: case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: BBChanged |= processBinOp(cast(II), LVI); break; + case Instruction::And: + BBChanged |= processAnd(cast(II), LVI); + break; } } @@ -796,5 +927,6 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses PA; PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 479e0ed74074..a79d775aa7f3 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -38,17 +38,19 @@ namespace { //===--------------------------------------------------------------------===// // DeadInstElimination pass implementation // - struct DeadInstElimination : public BasicBlockPass { - static char ID; // Pass identification, replacement for typeid - DeadInstElimination() : BasicBlockPass(ID) { - initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); - } - bool runOnBasicBlock(BasicBlock &BB) override { - if (skipBasicBlock(BB)) - return false; - auto *TLIP = getAnalysisIfAvailable(); - TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; - bool Changed = false; +struct DeadInstElimination : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DeadInstElimination() : FunctionPass(ID) { + initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + auto *TLIP = getAnalysisIfAvailable(); + TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; + + bool Changed = false; + for (auto &BB : F) { for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { Instruction *Inst = &*DI++; if (isInstructionTriviallyDead(Inst, TLI)) { @@ -60,13 +62,14 @@ namespace { ++DIEEliminated; } } - return Changed; } + return Changed; + } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); } - }; +}; } char DeadInstElimination::ID = 0; @@ -154,7 +157,7 @@ struct DCELegacyPass : public FunctionPass { return false; auto *TLIP = getAnalysisIfAvailable(); - TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; + TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; return eliminateDeadCode(F, TLI); } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index a81645745b48..685de82810ed 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1254,8 +1254,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, auto *SI = new StoreInst( ConstantInt::get(Earlier->getValueOperand()->getType(), Merged), - Earlier->getPointerOperand(), false, Earlier->getAlignment(), - Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); + Earlier->getPointerOperand(), false, + MaybeAlign(Earlier->getAlignment()), Earlier->getOrdering(), + Earlier->getSyncScopeID(), DepWrite); unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, @@ -1361,7 +1362,7 @@ public: MemoryDependenceResults *MD = &getAnalysis().getMemDep(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); return eliminateDeadStores(F, AA, MD, DT, TLI); } diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp index 876681b4f9de..934853507478 100644 --- a/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/lib/Transforms/Scalar/DivRemPairs.cpp @@ -1,4 +1,4 @@ -//===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===// +//===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This pass hoists and/or decomposes integer division and remainder +// This pass hoists and/or decomposes/recomposes integer division and remainder // instructions to enable CFG improvements and better codegen. // //===----------------------------------------------------------------------===// @@ -19,37 +19,105 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" + using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "div-rem-pairs" STATISTIC(NumPairs, "Number of div/rem pairs"); +STATISTIC(NumRecomposed, "Number of instructions recomposed"); STATISTIC(NumHoisted, "Number of instructions hoisted"); STATISTIC(NumDecomposed, "Number of instructions decomposed"); DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform", "Controls transformations in div-rem-pairs pass"); -/// Find matching pairs of integer div/rem ops (they have the same numerator, -/// denominator, and signedness). If they exist in different basic blocks, bring -/// them together by hoisting or replace the common division operation that is -/// implicit in the remainder: -/// X % Y <--> X - ((X / Y) * Y). -/// -/// We can largely ignore the normal safety and cost constraints on speculation -/// of these ops when we find a matching pair. This is because we are already -/// guaranteed that any exceptions and most cost are already incurred by the -/// first member of the pair. -/// -/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or -/// SimplifyCFG, but it's split off on its own because it's different enough -/// that it doesn't quite match the stated objectives of those passes. -static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, - const DominatorTree &DT) { - bool Changed = false; +namespace { +struct ExpandedMatch { + DivRemMapKey Key; + Instruction *Value; +}; +} // namespace + +/// See if we can match: (which is the form we expand into) +/// X - ((X ?/ Y) * Y) +/// which is equivalent to: +/// X ?% Y +static llvm::Optional matchExpandedRem(Instruction &I) { + Value *Dividend, *XroundedDownToMultipleOfY; + if (!match(&I, m_Sub(m_Value(Dividend), m_Value(XroundedDownToMultipleOfY)))) + return llvm::None; + + Value *Divisor; + Instruction *Div; + // Look for ((X / Y) * Y) + if (!match( + XroundedDownToMultipleOfY, + m_c_Mul(m_CombineAnd(m_IDiv(m_Specific(Dividend), m_Value(Divisor)), + m_Instruction(Div)), + m_Deferred(Divisor)))) + return llvm::None; + + ExpandedMatch M; + M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv; + M.Key.Dividend = Dividend; + M.Key.Divisor = Divisor; + M.Value = &I; + return M; +} + +/// A thin wrapper to store two values that we matched as div-rem pair. +/// We want this extra indirection to avoid dealing with RAUW'ing the map keys. +struct DivRemPairWorklistEntry { + /// The actual udiv/sdiv instruction. Source of truth. + AssertingVH DivInst; + + /// The instruction that we have matched as a remainder instruction. + /// Should only be used as Value, don't introspect it. + AssertingVH RemInst; + + DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_) + : DivInst(DivInst_), RemInst(RemInst_) { + assert((DivInst->getOpcode() == Instruction::UDiv || + DivInst->getOpcode() == Instruction::SDiv) && + "Not a division."); + assert(DivInst->getType() == RemInst->getType() && "Types should match."); + // We can't check anything else about remainder instruction, + // it's not strictly required to be a urem/srem. + } + /// The type for this pair, identical for both the div and rem. + Type *getType() const { return DivInst->getType(); } + + /// Is this pair signed or unsigned? + bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; } + + /// In this pair, what are the divident and divisor? + Value *getDividend() const { return DivInst->getOperand(0); } + Value *getDivisor() const { return DivInst->getOperand(1); } + + bool isRemExpanded() const { + switch (RemInst->getOpcode()) { + case Instruction::SRem: + case Instruction::URem: + return false; // single 'rem' instruction - unexpanded form. + default: + return true; // anything else means we have remainder in expanded form. + } + } +}; +using DivRemWorklistTy = SmallVector; + +/// Find matching pairs of integer div/rem ops (they have the same numerator, +/// denominator, and signedness). Place those pairs into a worklist for further +/// processing. This indirection is needed because we have to use TrackingVH<> +/// because we will be doing RAUW, and if one of the rem instructions we change +/// happens to be an input to another div/rem in the maps, we'd have problems. +static DivRemWorklistTy getWorklist(Function &F) { // Insert all divide and remainder instructions into maps keyed by their // operands and opcode (signed or unsigned). DenseMap DivMap; @@ -66,9 +134,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; else if (I.getOpcode() == Instruction::URem) RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; + else if (auto Match = matchExpandedRem(I)) + RemMap[Match->Key] = Match->Value; } } + // We'll accumulate the matching pairs of div-rem instructions here. + DivRemWorklistTy Worklist; + // We can iterate over either map because we are only looking for matched // pairs. Choose remainders for efficiency because they are usually even more // rare than division. @@ -78,12 +151,77 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, if (!DivInst) continue; - // We have a matching pair of div/rem instructions. If one dominates the - // other, hoist and/or replace one. + // We have a matching pair of div/rem instructions. NumPairs++; Instruction *RemInst = RemPair.second; - bool IsSigned = DivInst->getOpcode() == Instruction::SDiv; - bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned); + + // Place it in the worklist. + Worklist.emplace_back(DivInst, RemInst); + } + + return Worklist; +} + +/// Find matching pairs of integer div/rem ops (they have the same numerator, +/// denominator, and signedness). If they exist in different basic blocks, bring +/// them together by hoisting or replace the common division operation that is +/// implicit in the remainder: +/// X % Y <--> X - ((X / Y) * Y). +/// +/// We can largely ignore the normal safety and cost constraints on speculation +/// of these ops when we find a matching pair. This is because we are already +/// guaranteed that any exceptions and most cost are already incurred by the +/// first member of the pair. +/// +/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or +/// SimplifyCFG, but it's split off on its own because it's different enough +/// that it doesn't quite match the stated objectives of those passes. +static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, + const DominatorTree &DT) { + bool Changed = false; + + // Get the matching pairs of div-rem instructions. We want this extra + // indirection to avoid dealing with having to RAUW the keys of the maps. + DivRemWorklistTy Worklist = getWorklist(F); + + // Process each entry in the worklist. + for (DivRemPairWorklistEntry &E : Worklist) { + if (!DebugCounter::shouldExecute(DRPCounter)) + continue; + + bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned()); + + auto &DivInst = E.DivInst; + auto &RemInst = E.RemInst; + + const bool RemOriginallyWasInExpandedForm = E.isRemExpanded(); + (void)RemOriginallyWasInExpandedForm; // suppress unused variable warning + + if (HasDivRemOp && E.isRemExpanded()) { + // The target supports div+rem but the rem is expanded. + // We should recompose it first. + Value *X = E.getDividend(); + Value *Y = E.getDivisor(); + Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(X, Y) + : BinaryOperator::CreateURem(X, Y); + // Note that we place it right next to the original expanded instruction, + // and letting further handling to move it if needed. + RealRem->setName(RemInst->getName() + ".recomposed"); + RealRem->insertAfter(RemInst); + Instruction *OrigRemInst = RemInst; + // Update AssertingVH<> with new instruction so it doesn't assert. + RemInst = RealRem; + // And replace the original instruction with the new one. + OrigRemInst->replaceAllUsesWith(RealRem); + OrigRemInst->eraseFromParent(); + NumRecomposed++; + // Note that we have left ((X / Y) * Y) around. + // If it had other uses we could rewrite it as X - X % Y + } + + assert((!E.isRemExpanded() || !HasDivRemOp) && + "*If* the target supports div-rem, then by now the RemInst *is* " + "Instruction::[US]Rem."); // If the target supports div+rem and the instructions are in the same block // already, there's nothing to do. The backend should handle this. If the @@ -92,10 +230,16 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, continue; bool DivDominates = DT.dominates(DivInst, RemInst); - if (!DivDominates && !DT.dominates(RemInst, DivInst)) + if (!DivDominates && !DT.dominates(RemInst, DivInst)) { + // We have matching div-rem pair, but they are in two different blocks, + // neither of which dominates one another. + // FIXME: We could hoist both ops to the common predecessor block? continue; + } - if (!DebugCounter::shouldExecute(DRPCounter)) + // The target does not have a single div/rem operation, + // and the rem is already in expanded form. Nothing to do. + if (!HasDivRemOp && E.isRemExpanded()) continue; if (HasDivRemOp) { @@ -107,11 +251,17 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, DivInst->moveAfter(RemInst); NumHoisted++; } else { - // The target does not have a single div/rem operation. Decompose the - // remainder calculation as: + // The target does not have a single div/rem operation, + // and the rem is *not* in a already-expanded form. + // Decompose the remainder calculation as: // X % Y --> X - ((X / Y) * Y). - Value *X = RemInst->getOperand(0); - Value *Y = RemInst->getOperand(1); + + assert(!RemOriginallyWasInExpandedForm && + "We should not be expanding if the rem was in expanded form to " + "begin with."); + + Value *X = E.getDividend(); + Value *Y = E.getDivisor(); Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y); Instruction *Sub = BinaryOperator::CreateSub(X, Mul); @@ -152,8 +302,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // Now kill the explicit remainder. We have replaced it with: // (sub X, (mul (div X, Y), Y) - RemInst->replaceAllUsesWith(Sub); - RemInst->eraseFromParent(); + Sub->setName(RemInst->getName() + ".decomposed"); + Instruction *OrigRemInst = RemInst; + // Update AssertingVH<> with new instruction so it doesn't assert. + RemInst = Sub; + // And replace the original instruction with the new one. + OrigRemInst->replaceAllUsesWith(Sub); + OrigRemInst->eraseFromParent(); NumDecomposed++; } Changed = true; @@ -188,7 +343,7 @@ struct DivRemPairsLegacyPass : public FunctionPass { return optimizeDivRem(F, TTI, DT); } }; -} +} // namespace char DivRemPairsLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs", diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index f1f075257020..ce540683dae2 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -108,11 +108,12 @@ struct SimpleValue { // This can only handle non-void readnone functions. if (CallInst *CI = dyn_cast(Inst)) return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); - return isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst); + return isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst); } }; @@ -240,7 +241,7 @@ static unsigned getHashValueImpl(SimpleValue Val) { assert((isa(Inst) || isa(Inst) || isa(Inst) || isa(Inst) || - isa(Inst)) && + isa(Inst) || isa(Inst)) && "Invalid/unknown instruction"); // Mix in the opcode. @@ -526,7 +527,7 @@ public: const TargetTransformInfo &TTI, DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), - MSSAUpdater(llvm::make_unique(MSSA)) {} + MSSAUpdater(std::make_unique(MSSA)) {} bool run(); @@ -651,7 +652,7 @@ private: bool isInvariantLoad() const { if (auto *LI = dyn_cast(Inst)) - return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr; + return LI->hasMetadata(LLVMContext::MD_invariant_load); return false; } @@ -790,7 +791,7 @@ bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) { // A location loaded from with an invariant_load is assumed to *never* change // within the visible scope of the compilation. if (auto *LI = dyn_cast(I)) - if (LI->getMetadata(LLVMContext::MD_invariant_load)) + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) return true; auto MemLocOpt = MemoryLocation::getOrNone(I); @@ -1359,7 +1360,7 @@ public: if (skipFunction(F)) return false; - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); @@ -1381,6 +1382,7 @@ public: AU.addPreserved(); } AU.addPreserved(); + AU.addPreserved(); AU.setPreservesCFG(); } }; diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index 31670b1464e4..e6abf1ceb026 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,10 +11,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" + using namespace llvm; #define DEBUG_TYPE "flattencfg" @@ -52,15 +54,23 @@ FunctionPass *llvm::createFlattenCFGPass() { return new FlattenCFGPass(); } static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { bool Changed = false; bool LocalChange = true; + + // Use block handles instead of iterating over function blocks directly + // to avoid using iterators invalidated by erasing blocks. + std::vector Blocks; + Blocks.reserve(F.size()); + for (auto &BB : F) + Blocks.push_back(&BB); + while (LocalChange) { LocalChange = false; - // Loop over all of the basic blocks and remove them if they are unneeded... - // - for (Function::iterator BBIt = F.begin(); BBIt != F.end();) { - if (FlattenCFG(&*BBIt++, AA)) { - LocalChange = true; - } + // Loop over all of the basic blocks and try to flatten them. + for (WeakVH &BlockHandle : Blocks) { + // Skip blocks erased by FlattenCFG. + if (auto *BB = cast_or_null(BlockHandle)) + if (FlattenCFG(BB, AA)) + LocalChange = true; } Changed |= LocalChange; } diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp index 4f83e869b303..4d2eac0451df 100644 --- a/lib/Transforms/Scalar/Float2Int.cpp +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -60,11 +60,13 @@ namespace { if (skipFunction(F)) return false; - return Impl.runImpl(F); + const DominatorTree &DT = getAnalysis().getDomTree(); + return Impl.runImpl(F, DT); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); AU.addPreserved(); } @@ -116,21 +118,29 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { // Find the roots - instructions that convert from the FP domain to // integer domain. -void Float2IntPass::findRoots(Function &F, SmallPtrSet &Roots) { - for (auto &I : instructions(F)) { - if (isa(I.getType())) +void Float2IntPass::findRoots(Function &F, const DominatorTree &DT, + SmallPtrSet &Roots) { + for (BasicBlock &BB : F) { + // Unreachable code can take on strange forms that we are not prepared to + // handle. For example, an instruction may have itself as an operand. + if (!DT.isReachableFromEntry(&BB)) continue; - switch (I.getOpcode()) { - default: break; - case Instruction::FPToUI: - case Instruction::FPToSI: - Roots.insert(&I); - break; - case Instruction::FCmp: - if (mapFCmpPred(cast(&I)->getPredicate()) != - CmpInst::BAD_ICMP_PREDICATE) + + for (Instruction &I : BB) { + if (isa(I.getType())) + continue; + switch (I.getOpcode()) { + default: break; + case Instruction::FPToUI: + case Instruction::FPToSI: Roots.insert(&I); - break; + break; + case Instruction::FCmp: + if (mapFCmpPred(cast(&I)->getPredicate()) != + CmpInst::BAD_ICMP_PREDICATE) + Roots.insert(&I); + break; + } } } } @@ -503,7 +513,7 @@ void Float2IntPass::cleanup() { I.first->eraseFromParent(); } -bool Float2IntPass::runImpl(Function &F) { +bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) { LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n"); // Clear out all state. ECs = EquivalenceClasses(); @@ -513,7 +523,7 @@ bool Float2IntPass::runImpl(Function &F) { Ctx = &F.getParent()->getContext(); - findRoots(F, Roots); + findRoots(F, DT, Roots); walkBackwards(Roots); walkForwards(); @@ -527,8 +537,9 @@ bool Float2IntPass::runImpl(Function &F) { namespace llvm { FunctionPass *createFloat2IntPass() { return new Float2IntLegacyPass(); } -PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &) { - if (!runImpl(F)) +PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) { + const DominatorTree &DT = AM.getResult(F); + if (!runImpl(F, DT)) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 1a02e9d33f49..743353eaea22 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -70,6 +70,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -626,6 +627,8 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { PA.preserve(); PA.preserve(); PA.preserve(); + if (LI) + PA.preserve(); return PA; } @@ -1161,15 +1164,30 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Do PHI translation to get its value in the predecessor if necessary. The // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. + // We do the translation for each edge we skipped by going from LI's block + // to LoadBB, otherwise we might miss pieces needing translation. // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getPointerOperand(), DL, AC); - Value *LoadPtr = nullptr; - LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, - *DT, NewInsts); + Value *LoadPtr = LI->getPointerOperand(); + BasicBlock *Cur = LI->getParent(); + while (Cur != LoadBB) { + PHITransAddr Address(LoadPtr, DL, AC); + LoadPtr = Address.PHITranslateWithInsertion( + Cur, Cur->getSinglePredecessor(), *DT, NewInsts); + if (!LoadPtr) { + CanDoPRE = false; + break; + } + Cur = Cur->getSinglePredecessor(); + } + if (LoadPtr) { + PHITransAddr Address(LoadPtr, DL, AC); + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, + NewInsts); + } // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. if (!LoadPtr) { @@ -1184,8 +1202,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (!CanDoPRE) { while (!NewInsts.empty()) { - Instruction *I = NewInsts.pop_back_val(); - markInstructionForDeletion(I); + // Erase instructions generated by the failed PHI translation before + // trying to number them. PHI translation might insert instructions + // in basic blocks other than the current one, and we delete them + // directly, as markInstructionForDeletion only allows removing from the + // current basic block. + NewInsts.pop_back_val()->eraseFromParent(); } // HINT: Don't revert the edge-splitting as following transformation may // also need to split these critical edges. @@ -1219,10 +1241,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, BasicBlock *UnavailablePred = PredLoad.first; Value *LoadPtr = PredLoad.second; - auto *NewLoad = - new LoadInst(LI->getType(), LoadPtr, LI->getName() + ".pre", - LI->isVolatile(), LI->getAlignment(), LI->getOrdering(), - LI->getSyncScopeID(), UnavailablePred->getTerminator()); + auto *NewLoad = new LoadInst( + LI->getType(), LoadPtr, LI->getName() + ".pre", LI->isVolatile(), + MaybeAlign(LI->getAlignment()), LI->getOrdering(), LI->getSyncScopeID(), + UnavailablePred->getTerminator()); NewLoad->setDebugLoc(LI->getDebugLoc()); // Transfer the old load's AA tags to the new load. @@ -1365,6 +1387,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); } +static bool hasUsersIn(Value *V, BasicBlock *BB) { + for (User *U : V->users()) + if (isa(U) && + cast(U)->getParent() == BB) + return true; + return false; +} + bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume && "This function can only be called with llvm.assume intrinsic"); @@ -1403,12 +1433,23 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // We can replace assume value with true, which covers cases like this: // call void @llvm.assume(i1 %cmp) // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true - ReplaceWithConstMap[V] = True; - - // If one of *cmp *eq operand is const, adding it to map will cover this: + ReplaceOperandsWithMap[V] = True; + + // If we find an equality fact, canonicalize all dominated uses in this block + // to one of the two values. We heuristically choice the "oldest" of the + // two where age is determined by value number. (Note that propagateEquality + // above handles the cross block case.) + // + // Key case to cover are: + // 1) // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen // call void @llvm.assume(i1 %cmp) // ret float %0 ; will change it to ret float 3.000000e+00 + // 2) + // %load = load float, float* %addr + // %cmp = fcmp oeq float %load, %0 + // call void @llvm.assume(i1 %cmp) + // ret float %load ; will change it to ret float %0 if (auto *CmpI = dyn_cast(V)) { if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ || CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ || @@ -1416,13 +1457,50 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { CmpI->getFastMathFlags().noNaNs())) { Value *CmpLHS = CmpI->getOperand(0); Value *CmpRHS = CmpI->getOperand(1); - if (isa(CmpLHS)) + // Heuristically pick the better replacement -- the choice of heuristic + // isn't terribly important here, but the fact we canonicalize on some + // replacement is for exposing other simplifications. + // TODO: pull this out as a helper function and reuse w/existing + // (slightly different) logic. + if (isa(CmpLHS) && !isa(CmpRHS)) std::swap(CmpLHS, CmpRHS); - auto *RHSConst = dyn_cast(CmpRHS); + if (!isa(CmpLHS) && isa(CmpRHS)) + std::swap(CmpLHS, CmpRHS); + if ((isa(CmpLHS) && isa(CmpRHS)) || + (isa(CmpLHS) && isa(CmpRHS))) { + // Move the 'oldest' value to the right-hand side, using the value + // number as a proxy for age. + uint32_t LVN = VN.lookupOrAdd(CmpLHS); + uint32_t RVN = VN.lookupOrAdd(CmpRHS); + if (LVN < RVN) + std::swap(CmpLHS, CmpRHS); + } - // If only one operand is constant. - if (RHSConst != nullptr && !isa(CmpLHS)) - ReplaceWithConstMap[CmpLHS] = RHSConst; + // Handle degenerate case where we either haven't pruned a dead path or a + // removed a trivial assume yet. + if (isa(CmpLHS) && isa(CmpRHS)) + return Changed; + + // +0.0 and -0.0 compare equal, but do not imply equivalence. Unless we + // can prove equivalence, bail. + if (CmpRHS->getType()->isFloatTy() && + (!isa(CmpRHS) || cast(CmpRHS)->isZero())) + return Changed; + + LLVM_DEBUG(dbgs() << "Replacing dominated uses of " + << *CmpLHS << " with " + << *CmpRHS << " in block " + << IntrinsicI->getParent()->getName() << "\n"); + + + // Setup the replacement map - this handles uses within the same block + if (hasUsersIn(CmpLHS, IntrinsicI->getParent())) + ReplaceOperandsWithMap[CmpLHS] = CmpRHS; + + // NOTE: The non-block local cases are handled by the call to + // propagateEquality above; this block is just about handling the block + // local cases. TODO: There's a bunch of logic in propagateEqualiy which + // isn't duplicated for the block local case, can we share it somehow? } } return Changed; @@ -1522,6 +1600,41 @@ uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred, return NewNum; } +// Return true if the value number \p Num and NewNum have equal value. +// Return false if the result is unknown. +bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum, + const BasicBlock *Pred, + const BasicBlock *PhiBlock, GVN &Gvn) { + CallInst *Call = nullptr; + LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; + while (Vals) { + Call = dyn_cast(Vals->Val); + if (Call && Call->getParent() == PhiBlock) + break; + Vals = Vals->Next; + } + + if (AA->doesNotAccessMemory(Call)) + return true; + + if (!MD || !AA->onlyReadsMemory(Call)) + return false; + + MemDepResult local_dep = MD->getDependency(Call); + if (!local_dep.isNonLocal()) + return false; + + const MemoryDependenceResults::NonLocalDepInfo &deps = + MD->getNonLocalCallDependency(Call); + + // Check to see if the Call has no function local clobber. + for (unsigned i = 0; i < deps.size(); i++) { + if (deps[i].getResult().isNonFuncLocal()) + return true; + } + return false; +} + /// Translate value number \p Num using phis, so that it has the values of /// the phis in BB. uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, @@ -1568,8 +1681,11 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, } } - if (uint32_t NewNum = expressionNumbering[Exp]) + if (uint32_t NewNum = expressionNumbering[Exp]) { + if (Exp.opcode == Instruction::Call && NewNum != Num) + return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num; return NewNum; + } return Num; } @@ -1637,16 +1753,12 @@ void GVN::assignBlockRPONumber(Function &F) { InvalidBlockRPONumbers = false; } -// Tries to replace instruction with const, using information from -// ReplaceWithConstMap. -bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { +bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const { bool Changed = false; for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { - Value *Operand = Instr->getOperand(OpNum); - auto it = ReplaceWithConstMap.find(Operand); - if (it != ReplaceWithConstMap.end()) { - assert(!isa(Operand) && - "Replacing constants with constants is invalid"); + Value *Operand = Instr->getOperand(OpNum); + auto it = ReplaceOperandsWithMap.find(Operand); + if (it != ReplaceOperandsWithMap.end()) { LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second << " in instruction " << *Instr << '\n'); Instr->setOperand(OpNum, it->second); @@ -1976,6 +2088,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, MD = RunMD; ImplicitControlFlowTracking ImplicitCFT(DT); ICF = &ImplicitCFT; + this->LI = LI; VN.setMemDep(MD); ORE = RunORE; InvalidBlockRPONumbers = true; @@ -2037,13 +2150,13 @@ bool GVN::processBlock(BasicBlock *BB) { return false; // Clearing map before every BB because it can be used only for single BB. - ReplaceWithConstMap.clear(); + ReplaceOperandsWithMap.clear(); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - if (!ReplaceWithConstMap.empty()) - ChangedFunction |= replaceOperandsWithConsts(&*BI); + if (!ReplaceOperandsWithMap.empty()) + ChangedFunction |= replaceOperandsForInBlockEquality(&*BI); ChangedFunction |= processInstruction(&*BI); if (InstrsToErase.empty()) { @@ -2335,7 +2448,7 @@ bool GVN::performPRE(Function &F) { /// the block inserted to the critical edge. BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { BasicBlock *BB = - SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT)); + SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT, LI)); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2350,7 +2463,7 @@ bool GVN::splitCriticalEdges() { do { std::pair Edge = toSplit.pop_back_val(); SplitCriticalEdge(Edge.first, Edge.second, - CriticalEdgeSplittingOptions(DT)); + CriticalEdgeSplittingOptions(DT, LI)); } while (!toSplit.empty()); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2456,18 +2569,26 @@ void GVN::addDeadBlock(BasicBlock *BB) { if (DeadBlocks.count(B)) continue; + // First, split the critical edges. This might also create additional blocks + // to preserve LoopSimplify form and adjust edges accordingly. SmallVector Preds(pred_begin(B), pred_end(B)); for (BasicBlock *P : Preds) { if (!DeadBlocks.count(P)) continue; - if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) { + if (llvm::any_of(successors(P), + [B](BasicBlock *Succ) { return Succ == B; }) && + isCriticalEdge(P->getTerminator(), B)) { if (BasicBlock *S = splitCriticalEdges(P, B)) DeadBlocks.insert(P = S); } + } - for (BasicBlock::iterator II = B->begin(); isa(II); ++II) { - PHINode &Phi = cast(*II); + // Now undef the incoming values from the dead predecessors. + for (BasicBlock *P : predecessors(B)) { + if (!DeadBlocks.count(P)) + continue; + for (PHINode &Phi : B->phis()) { Phi.setIncomingValueForBlock(P, UndefValue::get(Phi.getType())); if (MD) MD->invalidateCachedPointerInfo(&Phi); @@ -2544,10 +2665,11 @@ public: return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), - getAnalysis().getTLI(), + getAnalysis().getTLI(F), getAnalysis().getAAResults(), - NoMemDepAnalysis ? nullptr - : &getAnalysis().getMemDep(), + NoMemDepAnalysis + ? nullptr + : &getAnalysis().getMemDep(), LIWP ? &LIWP->getLoopInfo() : nullptr, &getAnalysis().getORE()); } @@ -2556,6 +2678,7 @@ public: AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); if (!NoMemDepAnalysis) AU.addRequired(); AU.addRequired(); @@ -2563,6 +2686,8 @@ public: AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(LoopSimplifyID); AU.addRequired(); } diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index 7614599653c4..c87e41484b13 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -257,7 +257,7 @@ public: GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, MemoryDependenceResults *MD, MemorySSA *MSSA) : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), - MSSAUpdater(llvm::make_unique(MSSA)) {} + MSSAUpdater(std::make_unique(MSSA)) {} bool run(Function &F) { NumFuncArgs = F.arg_size(); @@ -539,7 +539,7 @@ private: // Check for unsafe hoistings due to side effects. if (K == InsKind::Store) { - if (hasEHOrLoadsOnPath(NewPt, dyn_cast(U), NBBsOnAllPaths)) + if (hasEHOrLoadsOnPath(NewPt, cast(U), NBBsOnAllPaths)) return false; } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) return false; @@ -889,19 +889,18 @@ private: void updateAlignment(Instruction *I, Instruction *Repl) { if (auto *ReplacementLoad = dyn_cast(Repl)) { - ReplacementLoad->setAlignment( - std::min(ReplacementLoad->getAlignment(), - cast(I)->getAlignment())); + ReplacementLoad->setAlignment(MaybeAlign(std::min( + ReplacementLoad->getAlignment(), cast(I)->getAlignment()))); ++NumLoadsRemoved; } else if (auto *ReplacementStore = dyn_cast(Repl)) { ReplacementStore->setAlignment( - std::min(ReplacementStore->getAlignment(), - cast(I)->getAlignment())); + MaybeAlign(std::min(ReplacementStore->getAlignment(), + cast(I)->getAlignment()))); ++NumStoresRemoved; } else if (auto *ReplacementAlloca = dyn_cast(Repl)) { ReplacementAlloca->setAlignment( - std::max(ReplacementAlloca->getAlignment(), - cast(I)->getAlignment())); + MaybeAlign(std::max(ReplacementAlloca->getAlignment(), + cast(I)->getAlignment()))); } else if (isa(Repl)) { ++NumCallsRemoved; } diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp index e14f44bb7069..2697d7809568 100644 --- a/lib/Transforms/Scalar/GuardWidening.cpp +++ b/lib/Transforms/Scalar/GuardWidening.cpp @@ -591,7 +591,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, else Result = RC.getCheckInst(); } - + assert(Result && "Failed to find result value"); Result->setName("wide.chk"); } return true; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index f9fc698a4a9b..5519a00c12c9 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -124,6 +124,11 @@ static cl::opt DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization")); +static cl::opt +LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false), + cl::desc("Predicate conditions in read only loops")); + + namespace { struct RewritePhi; @@ -144,7 +149,11 @@ class IndVarSimplify { bool rewriteNonIntegerIVs(Loop *L); bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); - bool optimizeLoopExits(Loop *L); + /// Try to eliminate loop exits based on analyzeable exit counts + bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); + /// Try to form loop invariant tests for loop exits by changing how many + /// iterations of the loop run when that is unobservable. + bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); @@ -628,12 +637,30 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it - // contains when the loop exits, if possible. + // contains when the loop exits, if possible. We prefer to start with + // expressions which are true for all exits (so as to maximize + // expression reuse by the SCEVExpander), but resort to per-exit + // evaluation if that fails. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); - if (!SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) - continue; - + if (isa(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) { + // TODO: This should probably be sunk into SCEV in some way; maybe a + // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for + // most SCEV expressions and other recurrence types (e.g. shift + // recurrences). Is there existing code we can reuse? + const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); + if (isa(ExitCount)) + continue; + if (auto *AddRec = dyn_cast(SE->getSCEV(Inst))) + if (AddRec->getLoop() == L) + ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); + if (isa(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) + continue; + } + // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized // away. Avoid doing so unless we know we have a value which computes @@ -804,7 +831,7 @@ bool IndVarSimplify::canLoopBeDeleted( L->getExitingBlocks(ExitingBlocks); SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); - if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) + if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) return false; BasicBlock *ExitBlock = ExitBlocks[0]; @@ -1654,6 +1681,10 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { return nullptr; } + // if we reached this point then we are going to replace + // DU.NarrowUse with WideUse. Reattach DbgValue then. + replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); + ExtendKindMap[DU.NarrowUse] = WideAddRec.second; // Returning WideUse pushes it on the worklist. return WideUse; @@ -1779,14 +1810,9 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { DeadInsts.emplace_back(DU.NarrowDef); } - // Attach any debug information to the new PHI. Since OrigPhi and WidePHI - // evaluate the same recurrence, we can just copy the debug info over. - SmallVector DbgValues; - llvm::findDbgValues(DbgValues, OrigPhi); - auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), - ValueAsMetadata::get(WidePhi)); - for (auto &DbgValue : DbgValues) - DbgValue->setOperand(0, MDPhi); + // Attach any debug information to the new PHI. + replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); + return WidePhi; } @@ -1817,8 +1843,8 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef, auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); auto CmpConstrainedLHSRange = ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); - auto NarrowDefRange = - CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); + auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( + *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); }; @@ -2242,8 +2268,8 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) continue; - const auto *AR = dyn_cast(SE->getSCEV(Phi)); - + const auto *AR = cast(SE->getSCEV(Phi)); + // AR may be a pointer type, while BECount is an integer type. // AR may be wider than BECount. With eq/ne tests overflow is immaterial. // AR may not be a narrower type, or we may never exit. @@ -2624,74 +2650,125 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { return MadeAnyChanges; } -bool IndVarSimplify::optimizeLoopExits(Loop *L) { +/// Return a symbolic upper bound for the backedge taken count of the loop. +/// This is more general than getConstantMaxBackedgeTakenCount as it returns +/// an arbitrary expression as opposed to only constants. +/// TODO: Move into the ScalarEvolution class. +static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, + DominatorTree &DT, Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); // Form an expression for the maximum exit count possible for this loop. We // merge the max and exact information to approximate a version of - // getMaxBackedgeTakenInfo which isn't restricted to just constants. - // TODO: factor this out as a version of getMaxBackedgeTakenCount which - // isn't guaranteed to return a constant. + // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. SmallVector ExitCounts; - const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L); + const SCEV *MaxConstEC = SE.getConstantMaxBackedgeTakenCount(L); if (!isa(MaxConstEC)) ExitCounts.push_back(MaxConstEC); for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); if (!isa(ExitCount)) { - assert(DT->dominates(ExitingBB, L->getLoopLatch()) && + assert(DT.dominates(ExitingBB, L->getLoopLatch()) && "We should only have known counts for exiting blocks that " "dominate latch!"); ExitCounts.push_back(ExitCount); } } if (ExitCounts.empty()) - return false; - const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts); + return SE.getCouldNotCompute(); + return SE.getUMinFromMismatchedTypes(ExitCounts); +} - bool Changed = false; - for (BasicBlock *ExitingBB : ExitingBlocks) { +bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Remove all exits which aren't both rewriteable and analyzeable. + auto NewEnd = llvm::remove_if(ExitingBlocks, + [&](BasicBlock *ExitingBB) { // If our exitting block exits multiple loops, we can only rewrite the // innermost one. Otherwise, we're changing how many times the innermost // loop runs before it exits. if (LI->getLoopFor(ExitingBB) != L) - continue; + return true; // Can't rewrite non-branch yet. BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); if (!BI) - continue; + return true; // If already constant, nothing to do. if (isa(BI->getCondition())) - continue; + return true; const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa(ExitCount)) - continue; + return true; + return false; + }); + ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); + + if (ExitingBlocks.empty()) + return false; + + // Get a symbolic upper bound on the loop backedge taken count. + const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); + if (isa(MaxExitCount)) + return false; + + // Visit our exit blocks in order of dominance. We know from the fact that + // all exits (left) are analyzeable that the must be a total dominance order + // between them as each must dominate the latch. The visit order only + // matters for the provably equal case. + llvm::sort(ExitingBlocks, + [&](BasicBlock *A, BasicBlock *B) { + // std::sort sorts in ascending order, so we want the inverse of + // the normal dominance relation. + if (DT->properlyDominates(A, B)) return true; + if (DT->properlyDominates(B, A)) return false; + llvm_unreachable("expected total dominance order!"); + }); +#ifdef ASSERT + for (unsigned i = 1; i < ExitingBlocks.size(); i++) { + assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); + } +#endif + + auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { + BranchInst *BI = cast(ExitingBB->getTerminator()); + bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); + auto *OldCond = BI->getCondition(); + auto *NewCond = ConstantInt::get(OldCond->getType(), + IsTaken ? ExitIfTrue : !ExitIfTrue); + BI->setCondition(NewCond); + if (OldCond->use_empty()) + DeadInsts.push_back(OldCond); + }; + bool Changed = false; + SmallSet DominatingExitCounts; + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + assert(!isa(ExitCount) && "checked above"); + // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. // TODO: Given we know the backedge can't be taken, we should go ahead // and break it. Or at least, kill all the header phis and simplify. if (ExitCount->isZero()) { - bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); - auto *OldCond = BI->getCondition(); - auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) : - ConstantInt::getFalse(OldCond->getType()); - BI->setCondition(NewCond); - if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); + FoldExit(ExitingBB, true); Changed = true; continue; } - // If we end up with a pointer exit count, bail. + // If we end up with a pointer exit count, bail. Note that we can end up + // with a pointer exit count for one exiting block, and not for another in + // the same loop. if (!ExitCount->getType()->isIntegerTy() || !MaxExitCount->getType()->isIntegerTy()) - return false; + continue; Type *WiderType = SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); @@ -2700,35 +2777,198 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L) { assert(MaxExitCount->getType() == ExitCount->getType()); // Can we prove that some other exit must be taken strictly before this - // one? TODO: handle cases where ule is known, and equality is covered - // by a dominating exit + // one? if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxExitCount, ExitCount)) { - bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); - auto *OldCond = BI->getCondition(); - auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) : - ConstantInt::getTrue(OldCond->getType()); - BI->setCondition(NewCond); - if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); + FoldExit(ExitingBB, false); Changed = true; continue; } - // TODO: If we can prove that the exiting iteration is equal to the exit - // count for this exit and that no previous exit oppurtunities exist within - // the loop, then we can discharge all other exits. (May fall out of - // previous TODO.) - - // TODO: If we can't prove any relation between our exit count and the - // loops exit count, but taking this exit doesn't require actually running - // the loop (i.e. no side effects, no computed values used in exit), then - // we can replace the exit test with a loop invariant test which exits on - // the first iteration. + // As we run, keep track of which exit counts we've encountered. If we + // find a duplicate, we've found an exit which would have exited on the + // exiting iteration, but (from the visit order) strictly follows another + // which does the same and is thus dead. + if (!DominatingExitCounts.insert(ExitCount).second) { + FoldExit(ExitingBB, false); + Changed = true; + continue; + } + + // TODO: There might be another oppurtunity to leverage SCEV's reasoning + // here. If we kept track of the min of dominanting exits so far, we could + // discharge exits with EC >= MDEC. This is less powerful than the existing + // transform (since later exits aren't considered), but potentially more + // powerful for any case where SCEV can prove a >=u b, but neither a == b + // or a >u b. Such a case is not currently known. } return Changed; } +bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + bool Changed = false; + + // Finally, see if we can rewrite our exit conditions into a loop invariant + // form. If we have a read-only loop, and we can tell that we must exit down + // a path which does not need any of the values computed within the loop, we + // can rewrite the loop to exit on the first iteration. Note that this + // doesn't either a) tell us the loop exits on the first iteration (unless + // *all* exits are predicateable) or b) tell us *which* exit might be taken. + // This transformation looks a lot like a restricted form of dead loop + // elimination, but restricted to read-only loops and without neccesssarily + // needing to kill the loop entirely. + if (!LoopPredication) + return Changed; + + if (!SE->hasLoopInvariantBackedgeTakenCount(L)) + return Changed; + + // Note: ExactBTC is the exact backedge taken count *iff* the loop exits + // through *explicit* control flow. We have to eliminate the possibility of + // implicit exits (see below) before we know it's truly exact. + const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); + if (isa(ExactBTC) || + !SE->isLoopInvariant(ExactBTC, L) || + !isSafeToExpand(ExactBTC, *SE)) + return Changed; + + auto BadExit = [&](BasicBlock *ExitingBB) { + // If our exiting block exits multiple loops, we can only rewrite the + // innermost one. Otherwise, we're changing how many times the innermost + // loop runs before it exits. + if (LI->getLoopFor(ExitingBB) != L) + return true; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + return true; + + // If already constant, nothing to do. + if (isa(BI->getCondition())) + return true; + + // If the exit block has phis, we need to be able to compute the values + // within the loop which contains them. This assumes trivially lcssa phis + // have already been removed; TODO: generalize + BasicBlock *ExitBlock = + BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); + if (!ExitBlock->phis().empty()) + return true; + + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + assert(!isa(ExactBTC) && "implied by having exact trip count"); + if (!SE->isLoopInvariant(ExitCount, L) || + !isSafeToExpand(ExitCount, *SE)) + return true; + + return false; + }; + + // If we have any exits which can't be predicated themselves, than we can't + // predicate any exit which isn't guaranteed to execute before it. Consider + // two exits (a) and (b) which would both exit on the same iteration. If we + // can predicate (b), but not (a), and (a) preceeds (b) along some path, then + // we could convert a loop from exiting through (a) to one exiting through + // (b). Note that this problem exists only for exits with the same exit + // count, and we could be more aggressive when exit counts are known inequal. + llvm::sort(ExitingBlocks, + [&](BasicBlock *A, BasicBlock *B) { + // std::sort sorts in ascending order, so we want the inverse of + // the normal dominance relation, plus a tie breaker for blocks + // unordered by dominance. + if (DT->properlyDominates(A, B)) return true; + if (DT->properlyDominates(B, A)) return false; + return A->getName() < B->getName(); + }); + // Check to see if our exit blocks are a total order (i.e. a linear chain of + // exits before the backedge). If they aren't, reasoning about reachability + // is complicated and we choose not to for now. + for (unsigned i = 1; i < ExitingBlocks.size(); i++) + if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) + return Changed; + + // Given our sorted total order, we know that exit[j] must be evaluated + // after all exit[i] such j > i. + for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) + if (BadExit(ExitingBlocks[i])) { + ExitingBlocks.resize(i); + break; + } + + if (ExitingBlocks.empty()) + return Changed; + + // We rely on not being able to reach an exiting block on a later iteration + // then it's statically compute exit count. The implementaton of + // getExitCount currently has this invariant, but assert it here so that + // breakage is obvious if this ever changes.. + assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { + return DT->dominates(ExitingBB, L->getLoopLatch()); + })); + + // At this point, ExitingBlocks consists of only those blocks which are + // predicatable. Given that, we know we have at least one exit we can + // predicate if the loop is doesn't have side effects and doesn't have any + // implicit exits (because then our exact BTC isn't actually exact). + // @Reviewers - As structured, this is O(I^2) for loop nests. Any + // suggestions on how to improve this? I can obviously bail out for outer + // loops, but that seems less than ideal. MemorySSA can find memory writes, + // is that enough for *all* side effects? + for (BasicBlock *BB : L->blocks()) + for (auto &I : *BB) + // TODO:isGuaranteedToTransfer + if (I.mayHaveSideEffects() || I.mayThrow()) + return Changed; + + // Finally, do the actual predication for all predicatable blocks. A couple + // of notes here: + // 1) We don't bother to constant fold dominated exits with identical exit + // counts; that's simply a form of CSE/equality propagation and we leave + // it for dedicated passes. + // 2) We insert the comparison at the branch. Hoisting introduces additional + // legality constraints and we leave that to dedicated logic. We want to + // predicate even if we can't insert a loop invariant expression as + // peeling or unrolling will likely reduce the cost of the otherwise loop + // varying check. + Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); + IRBuilder<> B(L->getLoopPreheader()->getTerminator()); + Value *ExactBTCV = nullptr; //lazy generated if needed + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + + auto *BI = cast(ExitingBB->getTerminator()); + Value *NewCond; + if (ExitCount == ExactBTC) { + NewCond = L->contains(BI->getSuccessor(0)) ? + B.getFalse() : B.getTrue(); + } else { + Value *ECV = Rewriter.expandCodeFor(ExitCount); + if (!ExactBTCV) + ExactBTCV = Rewriter.expandCodeFor(ExactBTC); + Value *RHS = ExactBTCV; + if (ECV->getType() != RHS->getType()) { + Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); + ECV = B.CreateZExt(ECV, WiderTy); + RHS = B.CreateZExt(RHS, WiderTy); + } + auto Pred = L->contains(BI->getSuccessor(0)) ? + ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + NewCond = B.CreateICmp(Pred, ECV, RHS); + } + Value *OldCond = BI->getCondition(); + BI->setCondition(NewCond); + if (OldCond->use_empty()) + DeadInsts.push_back(OldCond); + Changed = true; + } + + return Changed; +} + //===----------------------------------------------------------------------===// // IndVarSimplify driver. Manage several subpasses of IV simplification. //===----------------------------------------------------------------------===// @@ -2755,7 +2995,10 @@ bool IndVarSimplify::run(Loop *L) { // transform them to use integer recurrences. Changed |= rewriteNonIntegerIVs(L); +#ifndef NDEBUG + // Used below for a consistency check only const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); +#endif // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE, DL, "indvars"); @@ -2772,20 +3015,22 @@ bool IndVarSimplify::run(Loop *L) { Rewriter.disableCanonicalMode(); Changed |= simplifyAndExtend(L, Rewriter, LI); - // Check to see if this loop has a computable loop-invariant execution count. - // If so, this means that we can compute the final value of any expressions + // Check to see if we can compute the final value of any expressions // that are recurrent in the loop, and substitute the exit values from the - // loop into any instructions outside of the loop that use the final values of - // the current expressions. - // - if (ReplaceExitValue != NeverRepl && - !isa(BackedgeTakenCount)) + // loop into any instructions outside of the loop that use the final values + // of the current expressions. + if (ReplaceExitValue != NeverRepl) Changed |= rewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); - Changed |= optimizeLoopExits(L); + // Try to eliminate loop exits based on analyzeable exit counts + Changed |= optimizeLoopExits(L, Rewriter); + + // Try to form loop invariant tests for loop exits by changing how many + // iterations of the loop run when that is unobservable. + Changed |= predicateLoopExits(L, Rewriter); // If we have a trip count expression, rewrite the loop's exit condition // using it. @@ -2825,7 +3070,7 @@ bool IndVarSimplify::run(Loop *L) { // that our definition of "high cost" is not exactly principled. if (Rewriter.isHighCostExpansion(ExitCount, L)) continue; - + // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead // any pass that uses the SCEVExpander must do it. This does not work @@ -2924,7 +3169,7 @@ struct IndVarSimplifyLegacyPass : public LoopPass { auto *SE = &getAnalysis().getSE(); auto *DT = &getAnalysis().getDomTree(); auto *TLIP = getAnalysisIfAvailable(); - auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; auto *TTIP = getAnalysisIfAvailable(); auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 5f0e2001c73d..e7e73a132fbe 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -141,6 +141,8 @@ using ValueToAddrSpaceMapTy = DenseMap; /// InferAddressSpaces class InferAddressSpaces : public FunctionPass { + const TargetTransformInfo *TTI; + /// Target specific address space which uses of should be replaced if /// possible. unsigned FlatAddrSpace; @@ -264,17 +266,6 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, Module *M = II->getParent()->getParent()->getParent(); switch (II->getIntrinsicID()) { - case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: - case Intrinsic::amdgcn_ds_fadd: - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: { - const ConstantInt *IsVolatile = dyn_cast(II->getArgOperand(4)); - if (!IsVolatile || !IsVolatile->isZero()) - return false; - - LLVM_FALLTHROUGH; - } case Intrinsic::objectsize: { Type *DestTy = II->getType(); Type *SrcTy = NewV->getType(); @@ -285,25 +276,27 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, return true; } default: - return false; + return TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } } -// TODO: Move logic to TTI? void InferAddressSpaces::collectRewritableIntrinsicOperands( IntrinsicInst *II, std::vector> &PostorderStack, DenseSet &Visited) const { - switch (II->getIntrinsicID()) { + auto IID = II->getIntrinsicID(); + switch (IID) { case Intrinsic::objectsize: - case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: - case Intrinsic::amdgcn_ds_fadd: - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; default: + SmallVector OpIndexes; + if (TTI->collectFlatAddressOperands(OpIndexes, IID)) { + for (int Idx : OpIndexes) { + appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(Idx), + PostorderStack, Visited); + } + } break; } } @@ -631,11 +624,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) { if (skipFunction(F)) return false; - const TargetTransformInfo &TTI = - getAnalysis().getTTI(F); + TTI = &getAnalysis().getTTI(F); if (FlatAddrSpace == UninitializedAddressSpace) { - FlatAddrSpace = TTI.getFlatAddressSpace(); + FlatAddrSpace = TTI->getFlatAddressSpace(); if (FlatAddrSpace == UninitializedAddressSpace) return false; } @@ -650,7 +642,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) { // Changes the address spaces of the flat address expressions who are inferred // to point to a specific address space. - return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F); + return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F); } // Constants need to be tracked through RAUW to handle cases with nested diff --git a/lib/Transforms/Scalar/InstSimplifyPass.cpp b/lib/Transforms/Scalar/InstSimplifyPass.cpp index 6616364ab203..ec28f790f252 100644 --- a/lib/Transforms/Scalar/InstSimplifyPass.cpp +++ b/lib/Transforms/Scalar/InstSimplifyPass.cpp @@ -33,37 +33,39 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ, bool Changed = false; do { - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { - // Here be subtlety: the iterator must be incremented before the loop - // body (not sure why), so a range-for loop won't work here. - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - Instruction *I = &*BI++; - // The first time through the loop ToSimplify is empty and we try to - // simplify all instructions. On later iterations ToSimplify is not + for (BasicBlock &BB : F) { + // Unreachable code can take on strange forms that we are not prepared to + // handle. For example, an instruction may have itself as an operand. + if (!SQ.DT->isReachableFromEntry(&BB)) + continue; + + SmallVector DeadInstsInBB; + for (Instruction &I : BB) { + // The first time through the loop, ToSimplify is empty and we try to + // simplify all instructions. On later iterations, ToSimplify is not // empty and we only bother simplifying instructions that are in it. - if (!ToSimplify->empty() && !ToSimplify->count(I)) + if (!ToSimplify->empty() && !ToSimplify->count(&I)) continue; - // Don't waste time simplifying unused instructions. - if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, SQ, ORE)) { + // Don't waste time simplifying dead/unused instructions. + if (isInstructionTriviallyDead(&I)) { + DeadInstsInBB.push_back(&I); + Changed = true; + } else if (!I.use_empty()) { + if (Value *V = SimplifyInstruction(&I, SQ, ORE)) { // Mark all uses for resimplification next time round the loop. - for (User *U : I->users()) + for (User *U : I.users()) Next->insert(cast(U)); - I->replaceAllUsesWith(V); + I.replaceAllUsesWith(V); ++NumSimplified; Changed = true; + // A call can get simplified, but it may not be trivially dead. + if (isInstructionTriviallyDead(&I)) + DeadInstsInBB.push_back(&I); } } - if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { - // RecursivelyDeleteTriviallyDeadInstruction can remove more than one - // instruction, so simply incrementing the iterator does not work. - // When instructions get deleted re-iterate instead. - BI = BB->begin(); - BE = BB->end(); - Changed = true; - } } + RecursivelyDeleteTriviallyDeadInstructions(DeadInstsInBB, SQ.TLI); } // Place the list of instructions to simplify on the next loop iteration @@ -90,7 +92,7 @@ struct InstSimplifyLegacyPass : public FunctionPass { AU.addRequired(); } - /// runOnFunction - Remove instructions that simplify. + /// Remove instructions that simplify. bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; @@ -98,7 +100,7 @@ struct InstSimplifyLegacyPass : public FunctionPass { const DominatorTree *DT = &getAnalysis().getDomTree(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); AssumptionCache *AC = &getAnalysis().getAssumptionCache(F); OptimizationRemarkEmitter *ORE = diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index b86bf2fefbe5..0cf00baaa24a 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -224,13 +224,21 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { BasicBlock *PhiBB) -> std::pair { auto *PredBB = IncomingBB; auto *SuccBB = PhiBB; + SmallPtrSet Visited; while (true) { BranchInst *PredBr = dyn_cast(PredBB->getTerminator()); if (PredBr && PredBr->isConditional()) return {PredBB, SuccBB}; + Visited.insert(PredBB); auto *SinglePredBB = PredBB->getSinglePredecessor(); if (!SinglePredBB) return {nullptr, nullptr}; + + // Stop searching when SinglePredBB has been visited. It means we see + // an unreachable loop. + if (Visited.count(SinglePredBB)) + return {nullptr, nullptr}; + SuccBB = PredBB; PredBB = SinglePredBB; } @@ -253,7 +261,9 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { return; BasicBlock *PredBB = PredOutEdge.first; - BranchInst *PredBr = cast(PredBB->getTerminator()); + BranchInst *PredBr = dyn_cast(PredBB->getTerminator()); + if (!PredBr) + return; uint64_t PredTrueWeight, PredFalseWeight; // FIXME: We currently only set the profile data when it is missing. @@ -286,7 +296,7 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { bool JumpThreading::runOnFunction(Function &F) { if (skipFunction(F)) return false; - auto TLI = &getAnalysis().getTLI(); + auto TLI = &getAnalysis().getTLI(F); // Get DT analysis before LVI. When LVI is initialized it conditionally adds // DT if it's available. auto DT = &getAnalysis().getDomTree(); @@ -1461,7 +1471,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { "Can't handle critical edge here!"); LoadInst *NewVal = new LoadInst( LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred), - LoadI->getName() + ".pr", false, LoadI->getAlignment(), + LoadI->getName() + ".pr", false, MaybeAlign(LoadI->getAlignment()), LoadI->getOrdering(), LoadI->getSyncScopeID(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LoadI->getDebugLoc()); @@ -2423,7 +2433,7 @@ void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, // |----- // v // BB - BranchInst *PredTerm = dyn_cast(Pred->getTerminator()); + BranchInst *PredTerm = cast(Pred->getTerminator()); BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold", BB->getParent(), BB); // Move the unconditional branch to NewBB. diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index d9dda4cef2d2..6ce4831a7359 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -220,7 +220,8 @@ struct LegacyLICMPass : public LoopPass { &getAnalysis().getAAResults(), &getAnalysis().getLoopInfo(), &getAnalysis().getDomTree(), - &getAnalysis().getTLI(), + &getAnalysis().getTLI( + *L->getHeader()->getParent()), &getAnalysis().getTTI( *L->getHeader()->getParent()), SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); @@ -294,7 +295,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, PA.preserve(); PA.preserve(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; @@ -330,6 +331,12 @@ bool LoopInvariantCodeMotion::runOnLoop( assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); + // If this loop has metadata indicating that LICM is not to be performed then + // just exit. + if (hasDisableLICMTransformsHint(L)) { + return false; + } + std::unique_ptr CurAST; std::unique_ptr MSSAU; bool NoOfMemAccTooLarge = false; @@ -340,7 +347,7 @@ bool LoopInvariantCodeMotion::runOnLoop( CurAST = collectAliasInfoForLoop(L, LI, AA); } else { LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n"); - MSSAU = make_unique(MSSA); + MSSAU = std::make_unique(MSSA); unsigned AccessCapCount = 0; for (auto *BB : L->getBlocks()) { @@ -956,7 +963,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // Now that we've finished hoisting make sure that LI and DT are still // valid. -#ifndef NDEBUG +#ifdef EXPENSIVE_CHECKS if (Changed) { assert(DT->verify(DominatorTree::VerificationLevel::Fast) && "Dominator tree verification failed"); @@ -1026,7 +1033,8 @@ namespace { bool isHoistableAndSinkableInst(Instruction &I) { // Only these instructions are hoistable/sinkable. return (isa(I) || isa(I) || isa(I) || - isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || @@ -1092,7 +1100,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - if (LI->getMetadata(LLVMContext::MD_invariant_load)) + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) return true; if (LI->isAtomic() && !TargetExecutesOncePerLoop) @@ -1240,12 +1248,22 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // FIXME: More precise: no Uses that alias SI. if (!Flags->IsSink && !MSSA->dominates(SIMD, MU)) return false; - } else if (const auto *MD = dyn_cast(&MA)) + } else if (const auto *MD = dyn_cast(&MA)) { if (auto *LI = dyn_cast(MD->getMemoryInst())) { (void)LI; // Silence warning. assert(!LI->isUnordered() && "Expected unordered load"); return false; } + // Any call, while it may not be clobbering SI, it may be a use. + if (auto *CI = dyn_cast(MD->getMemoryInst())) { + // Check if the call may read from the memory locattion written + // to by SI. Check CI's attributes and arguments; the number of + // such checks performed is limited above by NoOfMemAccTooLarge. + ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); + if (isModOrRefSet(MRI)) + return false; + } + } } auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); @@ -1375,8 +1393,7 @@ static Instruction *CloneInstructionInExitBlock( if (!I.getName().empty()) New->setName(I.getName() + ".le"); - MemoryAccess *OldMemAcc; - if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) { + if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { // Create a new MemoryAccess and let MemorySSA set its defining access. MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( New, nullptr, New->getParent(), MemorySSA::Beginning); @@ -1385,7 +1402,7 @@ static Instruction *CloneInstructionInExitBlock( MSSAU->insertDef(MemDef, /*RenameUses=*/true); else { auto *MemUse = cast(NewMemAcc); - MSSAU->insertUse(MemUse); + MSSAU->insertUse(MemUse, /*RenameUses=*/true); } } } @@ -1783,7 +1800,7 @@ public: StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); if (UnorderedAtomic) NewSI->setOrdering(AtomicOrdering::Unordered); - NewSI->setAlignment(Alignment); + NewSI->setAlignment(MaybeAlign(Alignment)); NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); @@ -2016,7 +2033,8 @@ bool llvm::promoteLoopAccessesToScalars( if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( Store->getPointerOperand(), Store->getValueOperand()->getType(), - Store->getAlignment(), MDL, Preheader->getTerminator(), DT); + MaybeAlign(Store->getAlignment()), MDL, + Preheader->getTerminator(), DT); } } else return false; // Not a load or store. @@ -2101,20 +2119,21 @@ bool llvm::promoteLoopAccessesToScalars( SomePtr->getName() + ".promoted", Preheader->getTerminator()); if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); - PreheaderLoad->setAlignment(Alignment); + PreheaderLoad->setAlignment(MaybeAlign(Alignment)); PreheaderLoad->setDebugLoc(DL); if (AATags) PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); - MemoryAccess *PreheaderLoadMemoryAccess; if (MSSAU) { - PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( + MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); - MSSAU->insertUse(NewMemUse); + MSSAU->insertUse(NewMemUse, /*RenameUses=*/true); } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); @@ -2161,7 +2180,7 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, LoopToAliasSetMap.erase(MapI); } if (!CurAST) - CurAST = make_unique(*AA); + CurAST = std::make_unique(*AA); // Add everything from the sub loops that are no longer directly available. for (Loop *InnerL : RecomputeLoops) @@ -2180,7 +2199,7 @@ std::unique_ptr LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA( Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) { auto *MSSA = MSSAU->getMemorySSA(); - auto CurAST = make_unique(*AA, MSSA, L); + auto CurAST = std::make_unique(*AA, MSSA, L); CurAST->addAllInstructionsInLoopUsingMSSA(); return CurAST; } diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 1fcf1315a177..a972d6fa2fcd 100644 --- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -312,8 +312,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { IRBuilder<> Builder(MemI); Module *M = BB->getParent()->getParent(); Type *I32 = Type::getInt32Ty(BB->getContext()); - Function *PrefetchFunc = - Intrinsic::getDeclaration(M, Intrinsic::prefetch); + Function *PrefetchFunc = Intrinsic::getDeclaration( + M, Intrinsic::prefetch, PrefPtrValue->getType()); Builder.CreateCall( PrefetchFunc, {PrefPtrValue, diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 8371367e24e7..cee197cf8354 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -191,7 +191,7 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. - const SCEV *S = SE.getMaxBackedgeTakenCount(L); + const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L); if (isa(S)) { LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n"); return Changed ? LoopDeletionResult::Modified diff --git a/lib/Transforms/Scalar/LoopFuse.cpp b/lib/Transforms/Scalar/LoopFuse.cpp index 0bc2bcff2ae1..9f93c68e6128 100644 --- a/lib/Transforms/Scalar/LoopFuse.cpp +++ b/lib/Transforms/Scalar/LoopFuse.cpp @@ -66,7 +66,7 @@ using namespace llvm; #define DEBUG_TYPE "loop-fusion" -STATISTIC(FuseCounter, "Count number of loop fusions performed"); +STATISTIC(FuseCounter, "Loops fused"); STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion"); STATISTIC(InvalidPreheader, "Loop has invalid preheader"); STATISTIC(InvalidHeader, "Loop has invalid header"); @@ -79,12 +79,15 @@ STATISTIC(MayThrowException, "Loop may throw an exception"); STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access"); STATISTIC(NotSimplifiedForm, "Loop is not in simplified form"); STATISTIC(InvalidDependencies, "Dependencies prevent fusion"); -STATISTIC(InvalidTripCount, - "Loop does not have invariant backedge taken count"); +STATISTIC(UnknownTripCount, "Loop has unknown trip count"); STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop"); -STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same"); -STATISTIC(NonAdjacent, "Candidates are not adjacent"); -STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader"); +STATISTIC(NonEqualTripCount, "Loop trip counts are not the same"); +STATISTIC(NonAdjacent, "Loops are not adjacent"); +STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader"); +STATISTIC(FusionNotBeneficial, "Fusion is not beneficial"); +STATISTIC(NonIdenticalGuards, "Candidates have different guards"); +STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block"); +STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block"); enum FusionDependenceAnalysisChoice { FUSION_DEPENDENCE_ANALYSIS_SCEV, @@ -110,6 +113,7 @@ static cl::opt cl::Hidden, cl::init(false), cl::ZeroOrMore); #endif +namespace { /// This class is used to represent a candidate for loop fusion. When it is /// constructed, it checks the conditions for loop fusion to ensure that it /// represents a valid candidate. It caches several parts of a loop that are @@ -143,6 +147,8 @@ struct FusionCandidate { SmallVector MemWrites; /// Are all of the members of this fusion candidate still valid bool Valid; + /// Guard branch of the loop, if it exists + BranchInst *GuardBranch; /// Dominator and PostDominator trees are needed for the /// FusionCandidateCompare function, required by FusionCandidateSet to @@ -151,11 +157,20 @@ struct FusionCandidate { const DominatorTree *DT; const PostDominatorTree *PDT; + OptimizationRemarkEmitter &ORE; + FusionCandidate(Loop *L, const DominatorTree *DT, - const PostDominatorTree *PDT) + const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE) : Preheader(L->getLoopPreheader()), Header(L->getHeader()), ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()), - Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) { + Latch(L->getLoopLatch()), L(L), Valid(true), GuardBranch(nullptr), + DT(DT), PDT(PDT), ORE(ORE) { + + // TODO: This is temporary while we fuse both rotated and non-rotated + // loops. Once we switch to only fusing rotated loops, the initialization of + // GuardBranch can be moved into the initialization list above. + if (isRotated()) + GuardBranch = L->getLoopGuardBranch(); // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect @@ -163,28 +178,28 @@ struct FusionCandidate { // found, invalidate this object and return. for (BasicBlock *BB : L->blocks()) { if (BB->hasAddressTaken()) { - AddressTakenBB++; invalidate(); + reportInvalidCandidate(AddressTakenBB); return; } for (Instruction &I : *BB) { if (I.mayThrow()) { - MayThrowException++; invalidate(); + reportInvalidCandidate(MayThrowException); return; } if (StoreInst *SI = dyn_cast(&I)) { if (SI->isVolatile()) { - ContainsVolatileAccess++; invalidate(); + reportInvalidCandidate(ContainsVolatileAccess); return; } } if (LoadInst *LI = dyn_cast(&I)) { if (LI->isVolatile()) { - ContainsVolatileAccess++; invalidate(); + reportInvalidCandidate(ContainsVolatileAccess); return; } } @@ -214,19 +229,96 @@ struct FusionCandidate { assert(Latch == L->getLoopLatch() && "Latch is out of sync"); } + /// Get the entry block for this fusion candidate. + /// + /// If this fusion candidate represents a guarded loop, the entry block is the + /// loop guard block. If it represents an unguarded loop, the entry block is + /// the preheader of the loop. + BasicBlock *getEntryBlock() const { + if (GuardBranch) + return GuardBranch->getParent(); + else + return Preheader; + } + + /// Given a guarded loop, get the successor of the guard that is not in the + /// loop. + /// + /// This method returns the successor of the loop guard that is not located + /// within the loop (i.e., the successor of the guard that is not the + /// preheader). + /// This method is only valid for guarded loops. + BasicBlock *getNonLoopBlock() const { + assert(GuardBranch && "Only valid on guarded loops."); + assert(GuardBranch->isConditional() && + "Expecting guard to be a conditional branch."); + return (GuardBranch->getSuccessor(0) == Preheader) + ? GuardBranch->getSuccessor(1) + : GuardBranch->getSuccessor(0); + } + + bool isRotated() const { + assert(L && "Expecting loop to be valid."); + assert(Latch && "Expecting latch to be valid."); + return L->isLoopExiting(Latch); + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const { - dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr") + dbgs() << "\tGuardBranch: " + << (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n" + << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr") << "\n" << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n" << "\tExitingBB: " << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n" << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr") << "\n" - << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"; + << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n" + << "\tEntryBlock: " + << (getEntryBlock() ? getEntryBlock()->getName() : "nullptr") + << "\n"; } #endif + /// Determine if a fusion candidate (representing a loop) is eligible for + /// fusion. Note that this only checks whether a single loop can be fused - it + /// does not check whether it is *legal* to fuse two loops together. + bool isEligibleForFusion(ScalarEvolution &SE) const { + if (!isValid()) { + LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n"); + if (!Preheader) + ++InvalidPreheader; + if (!Header) + ++InvalidHeader; + if (!ExitingBlock) + ++InvalidExitingBlock; + if (!ExitBlock) + ++InvalidExitBlock; + if (!Latch) + ++InvalidLatch; + if (L->isInvalid()) + ++InvalidLoop; + + return false; + } + + // Require ScalarEvolution to be able to determine a trip count. + if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() + << " trip count not computable!\n"); + return reportInvalidCandidate(UnknownTripCount); + } + + if (!L->isLoopSimplifyForm()) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() + << " is not in simplified form!\n"); + return reportInvalidCandidate(NotSimplifiedForm); + } + + return true; + } + private: // This is only used internally for now, to clear the MemWrites and MemReads // list and setting Valid to false. I can't envision other uses of this right @@ -239,17 +331,18 @@ private: MemReads.clear(); Valid = false; } -}; -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FusionCandidate &FC) { - if (FC.isValid()) - OS << FC.Preheader->getName(); - else - OS << ""; - - return OS; -} + bool reportInvalidCandidate(llvm::Statistic &Stat) const { + using namespace ore; + assert(L && Preheader && "Fusion candidate not initialized properly!"); + ++Stat; + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(), + L->getStartLoc(), Preheader) + << "[" << Preheader->getParent()->getName() << "]: " + << "Loop is not a candidate for fusion: " << Stat.getDesc()); + return false; + } +}; struct FusionCandidateCompare { /// Comparison functor to sort two Control Flow Equivalent fusion candidates @@ -260,21 +353,24 @@ struct FusionCandidateCompare { const FusionCandidate &RHS) const { const DominatorTree *DT = LHS.DT; + BasicBlock *LHSEntryBlock = LHS.getEntryBlock(); + BasicBlock *RHSEntryBlock = RHS.getEntryBlock(); + // Do not save PDT to local variable as it is only used in asserts and thus // will trigger an unused variable warning if building without asserts. assert(DT && LHS.PDT && "Expecting valid dominator tree"); // Do this compare first so if LHS == RHS, function returns false. - if (DT->dominates(RHS.Preheader, LHS.Preheader)) { + if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) { // RHS dominates LHS // Verify LHS post-dominates RHS - assert(LHS.PDT->dominates(LHS.Preheader, RHS.Preheader)); + assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock)); return false; } - if (DT->dominates(LHS.Preheader, RHS.Preheader)) { + if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) { // Verify RHS Postdominates LHS - assert(LHS.PDT->dominates(RHS.Preheader, LHS.Preheader)); + assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock)); return true; } @@ -286,7 +382,6 @@ struct FusionCandidateCompare { } }; -namespace { using LoopVector = SmallVector; // Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance @@ -301,17 +396,26 @@ using LoopVector = SmallVector; // keeps the FusionCandidateSet sorted will also simplify the implementation. using FusionCandidateSet = std::set; using FusionCandidateCollection = SmallVector; -} // namespace -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, +#if !defined(NDEBUG) +static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const FusionCandidate &FC) { + if (FC.isValid()) + OS << FC.Preheader->getName(); + else + OS << ""; + + return OS; +} + +static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FusionCandidateSet &CandSet) { - for (auto IT : CandSet) - OS << IT << "\n"; + for (const FusionCandidate &FC : CandSet) + OS << FC << '\n'; return OS; } -#if !defined(NDEBUG) static void printFusionCandidates(const FusionCandidateCollection &FusionCandidates) { dbgs() << "Fusion Candidates: \n"; @@ -391,16 +495,6 @@ static void printLoopVector(const LoopVector &LV) { } #endif -static void reportLoopFusion(const FusionCandidate &FC0, - const FusionCandidate &FC1, - OptimizationRemarkEmitter &ORE) { - using namespace ore; - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent()) - << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName())) - << " with " << NV("Cand2", StringRef(FC1.Preheader->getName()))); -} - struct LoopFuser { private: // Sets of control flow equivalent fusion candidates for a given nest level. @@ -497,53 +591,16 @@ private: const FusionCandidate &FC1) const { assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders"); - if (DT.dominates(FC0.Preheader, FC1.Preheader)) - return PDT.dominates(FC1.Preheader, FC0.Preheader); + BasicBlock *FC0EntryBlock = FC0.getEntryBlock(); + BasicBlock *FC1EntryBlock = FC1.getEntryBlock(); - if (DT.dominates(FC1.Preheader, FC0.Preheader)) - return PDT.dominates(FC0.Preheader, FC1.Preheader); + if (DT.dominates(FC0EntryBlock, FC1EntryBlock)) + return PDT.dominates(FC1EntryBlock, FC0EntryBlock); - return false; - } - - /// Determine if a fusion candidate (representing a loop) is eligible for - /// fusion. Note that this only checks whether a single loop can be fused - it - /// does not check whether it is *legal* to fuse two loops together. - bool eligibleForFusion(const FusionCandidate &FC) const { - if (!FC.isValid()) { - LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n"); - if (!FC.Preheader) - InvalidPreheader++; - if (!FC.Header) - InvalidHeader++; - if (!FC.ExitingBlock) - InvalidExitingBlock++; - if (!FC.ExitBlock) - InvalidExitBlock++; - if (!FC.Latch) - InvalidLatch++; - if (FC.L->isInvalid()) - InvalidLoop++; + if (DT.dominates(FC1EntryBlock, FC0EntryBlock)) + return PDT.dominates(FC0EntryBlock, FC1EntryBlock); - return false; - } - - // Require ScalarEvolution to be able to determine a trip count. - if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " trip count not computable!\n"); - InvalidTripCount++; - return false; - } - - if (!FC.L->isLoopSimplifyForm()) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " is not in simplified form!\n"); - NotSimplifiedForm++; - return false; - } - - return true; + return false; } /// Iterate over all loops in the given loop set and identify the loops that @@ -551,8 +608,8 @@ private: /// Flow Equivalent sets, sorted by dominance. void collectFusionCandidates(const LoopVector &LV) { for (Loop *L : LV) { - FusionCandidate CurrCand(L, &DT, &PDT); - if (!eligibleForFusion(CurrCand)) + FusionCandidate CurrCand(L, &DT, &PDT, ORE); + if (!CurrCand.isEligibleForFusion(SE)) continue; // Go through each list in FusionCandidates and determine if L is control @@ -664,31 +721,64 @@ private: if (!identicalTripCounts(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip " "counts. Not fusing.\n"); - NonEqualTripCount++; + reportLoopFusion(*FC0, *FC1, + NonEqualTripCount); continue; } if (!isAdjacent(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates are not adjacent. Not fusing.\n"); - NonAdjacent++; + reportLoopFusion(*FC0, *FC1, NonAdjacent); continue; } - // For now we skip fusing if the second candidate has any instructions - // in the preheader. This is done because we currently do not have the - // safety checks to determine if it is save to move the preheader of - // the second candidate past the body of the first candidate. Once - // these checks are added, this condition can be removed. + // Ensure that FC0 and FC1 have identical guards. + // If one (or both) are not guarded, this check is not necessary. + if (FC0->GuardBranch && FC1->GuardBranch && + !haveIdenticalGuards(*FC0, *FC1)) { + LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical " + "guards. Not Fusing.\n"); + reportLoopFusion(*FC0, *FC1, + NonIdenticalGuards); + continue; + } + + // The following three checks look for empty blocks in FC0 and FC1. If + // any of these blocks are non-empty, we do not fuse. This is done + // because we currently do not have the safety checks to determine if + // it is safe to move the blocks past other blocks in the loop. Once + // these checks are added, these conditions can be relaxed. if (!isEmptyPreheader(*FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty " "preheader. Not fusing.\n"); - NonEmptyPreheader++; + reportLoopFusion(*FC0, *FC1, + NonEmptyPreheader); + continue; + } + + if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) { + LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit " + "block. Not fusing.\n"); + reportLoopFusion(*FC0, *FC1, + NonEmptyExitBlock); + continue; + } + + if (FC1->GuardBranch && !isEmptyGuardBlock(*FC1)) { + LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty guard " + "block. Not fusing.\n"); + reportLoopFusion(*FC0, *FC1, + NonEmptyGuardBlock); continue; } + // Check the dependencies across the loops and do not fuse if it would + // violate them. if (!dependencesAllowFusion(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n"); + reportLoopFusion(*FC0, *FC1, + InvalidDependencies); continue; } @@ -696,9 +786,11 @@ private: LLVM_DEBUG(dbgs() << "\tFusion appears to be " << (BeneficialToFuse ? "" : "un") << "profitable!\n"); - if (!BeneficialToFuse) + if (!BeneficialToFuse) { + reportLoopFusion(*FC0, *FC1, + FusionNotBeneficial); continue; - + } // All analysis has completed and has determined that fusion is legal // and profitable. At this point, start transforming the code and // perform fusion. @@ -710,15 +802,14 @@ private: // Note this needs to be done *before* performFusion because // performFusion will change the original loops, making it not // possible to identify them after fusion is complete. - reportLoopFusion(*FC0, *FC1, ORE); + reportLoopFusion(*FC0, *FC1, FuseCounter); - FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT); + FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE); FusedCand.verify(); - assert(eligibleForFusion(FusedCand) && + assert(FusedCand.isEligibleForFusion(SE) && "Fused candidate should be eligible for fusion!"); // Notify the loop-depth-tree that these loops are not valid objects - // anymore. LDT.removeLoop(FC1->L); CandidateSet.erase(FC0); @@ -889,7 +980,7 @@ private: LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1 << "\n"); assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth()); - assert(DT.dominates(FC0.Preheader, FC1.Preheader)); + assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock())); for (Instruction *WriteL0 : FC0.MemWrites) { for (Instruction *WriteL1 : FC1.MemWrites) @@ -939,18 +1030,89 @@ private: return true; } - /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this - /// case, there is no code in between the two fusion candidates, thus making - /// them adjacent. + /// Determine if two fusion candidates are adjacent in the CFG. + /// + /// This method will determine if there are additional basic blocks in the CFG + /// between the exit of \p FC0 and the entry of \p FC1. + /// If the two candidates are guarded loops, then it checks whether the + /// non-loop successor of the \p FC0 guard branch is the entry block of \p + /// FC1. If not, then the loops are not adjacent. If the two candidates are + /// not guarded loops, then it checks whether the exit block of \p FC0 is the + /// preheader of \p FC1. bool isAdjacent(const FusionCandidate &FC0, const FusionCandidate &FC1) const { - return FC0.ExitBlock == FC1.Preheader; + // If the successor of the guard branch is FC1, then the loops are adjacent + if (FC0.GuardBranch) + return FC0.getNonLoopBlock() == FC1.getEntryBlock(); + else + return FC0.ExitBlock == FC1.getEntryBlock(); + } + + /// Determine if two fusion candidates have identical guards + /// + /// This method will determine if two fusion candidates have the same guards. + /// The guards are considered the same if: + /// 1. The instructions to compute the condition used in the compare are + /// identical. + /// 2. The successors of the guard have the same flow into/around the loop. + /// If the compare instructions are identical, then the first successor of the + /// guard must go to the same place (either the preheader of the loop or the + /// NonLoopBlock). In other words, the the first successor of both loops must + /// both go into the loop (i.e., the preheader) or go around the loop (i.e., + /// the NonLoopBlock). The same must be true for the second successor. + bool haveIdenticalGuards(const FusionCandidate &FC0, + const FusionCandidate &FC1) const { + assert(FC0.GuardBranch && FC1.GuardBranch && + "Expecting FC0 and FC1 to be guarded loops."); + + if (auto FC0CmpInst = + dyn_cast(FC0.GuardBranch->getCondition())) + if (auto FC1CmpInst = + dyn_cast(FC1.GuardBranch->getCondition())) + if (!FC0CmpInst->isIdenticalTo(FC1CmpInst)) + return false; + + // The compare instructions are identical. + // Now make sure the successor of the guards have the same flow into/around + // the loop + if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader) + return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader); + else + return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader); + } + + /// Check that the guard for \p FC *only* contains the cmp/branch for the + /// guard. + /// Once we are able to handle intervening code, any code in the guard block + /// for FC1 will need to be treated as intervening code and checked whether + /// it can safely move around the loops. + bool isEmptyGuardBlock(const FusionCandidate &FC) const { + assert(FC.GuardBranch && "Expecting a fusion candidate with guard branch."); + if (auto *CmpInst = dyn_cast(FC.GuardBranch->getCondition())) { + auto *GuardBlock = FC.GuardBranch->getParent(); + // If the generation of the cmp value is in GuardBlock, then the size of + // the guard block should be 2 (cmp + branch). If the generation of the + // cmp value is in a different block, then the size of the guard block + // should only be 1. + if (CmpInst->getParent() == GuardBlock) + return GuardBlock->size() == 2; + else + return GuardBlock->size() == 1; + } + + return false; } bool isEmptyPreheader(const FusionCandidate &FC) const { + assert(FC.Preheader && "Expecting a valid preheader"); return FC.Preheader->size() == 1; } + bool isEmptyExitBlock(const FusionCandidate &FC) const { + assert(FC.ExitBlock && "Expecting a valid exit block"); + return FC.ExitBlock->size() == 1; + } + /// Fuse two fusion candidates, creating a new fused loop. /// /// This method contains the mechanics of fusing two loops, represented by \p @@ -987,6 +1149,12 @@ private: LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump(); dbgs() << "Fusion Candidate 1: \n"; FC1.dump();); + // Fusing guarded loops is handled slightly differently than non-guarded + // loops and has been broken out into a separate method instead of trying to + // intersperse the logic within a single method. + if (FC0.GuardBranch) + return fuseGuardedLoops(FC0, FC1); + assert(FC1.Preheader == FC0.ExitBlock); assert(FC1.Preheader->size() == 1 && FC1.Preheader->getSingleSuccessor() == FC1.Header); @@ -1131,7 +1299,258 @@ private: SE.verify(); #endif - FuseCounter++; + LLVM_DEBUG(dbgs() << "Fusion done:\n"); + + return FC0.L; + } + + /// Report details on loop fusion opportunities. + /// + /// This template function can be used to report both successful and missed + /// loop fusion opportunities, based on the RemarkKind. The RemarkKind should + /// be one of: + /// - OptimizationRemarkMissed to report when loop fusion is unsuccessful + /// given two valid fusion candidates. + /// - OptimizationRemark to report successful fusion of two fusion + /// candidates. + /// The remarks will be printed using the form: + /// ::: []: + /// and : + template + void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1, + llvm::Statistic &Stat) { + assert(FC0.Preheader && FC1.Preheader && + "Expecting valid fusion candidates"); + using namespace ore; + ++Stat; + ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(), + FC0.Preheader) + << "[" << FC0.Preheader->getParent()->getName() + << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName())) + << " and " << NV("Cand2", StringRef(FC1.Preheader->getName())) + << ": " << Stat.getDesc()); + } + + /// Fuse two guarded fusion candidates, creating a new fused loop. + /// + /// Fusing guarded loops is handled much the same way as fusing non-guarded + /// loops. The rewiring of the CFG is slightly different though, because of + /// the presence of the guards around the loops and the exit blocks after the + /// loop body. As such, the new loop is rewired as follows: + /// 1. Keep the guard branch from FC0 and use the non-loop block target + /// from the FC1 guard branch. + /// 2. Remove the exit block from FC0 (this exit block should be empty + /// right now). + /// 3. Remove the guard branch for FC1 + /// 4. Remove the preheader for FC1. + /// The exit block successor for the latch of FC0 is updated to be the header + /// of FC1 and the non-exit block successor of the latch of FC1 is updated to + /// be the header of FC0, thus creating the fused loop. + Loop *fuseGuardedLoops(const FusionCandidate &FC0, + const FusionCandidate &FC1) { + assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops"); + + BasicBlock *FC0GuardBlock = FC0.GuardBranch->getParent(); + BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent(); + BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock(); + BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock(); + + assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent"); + + SmallVector TreeUpdates; + + //////////////////////////////////////////////////////////////////////////// + // Update the Loop Guard + //////////////////////////////////////////////////////////////////////////// + // The guard for FC0 is updated to guard both FC0 and FC1. This is done by + // changing the NonLoopGuardBlock for FC0 to the NonLoopGuardBlock for FC1. + // Thus, one path from the guard goes to the preheader for FC0 (and thus + // executes the new fused loop) and the other path goes to the NonLoopBlock + // for FC1 (where FC1 guard would have gone if FC1 was not executed). + FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock); + FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock, + FC1.Header); + + // The guard of FC1 is not necessary anymore. + FC1.GuardBranch->eraseFromParent(); + new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock); + + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC1GuardBlock, FC1.Preheader)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock)); + + assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) && + "Expecting guard block to have no predecessors"); + assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) && + "Expecting guard block to have no successors"); + + // Remember the phi nodes originally in the header of FC0 in order to rewire + // them later. However, this is only necessary if the new loop carried + // values might not dominate the exiting branch. While we do not generally + // test if this is the case but simply insert intermediate phi nodes, we + // need to make sure these intermediate phi nodes have different + // predecessors. To this end, we filter the special case where the exiting + // block is the latch block of the first loop. Nothing needs to be done + // anyway as all loop carried values dominate the latch and thereby also the + // exiting branch. + // KB: This is no longer necessary because FC0.ExitingBlock == FC0.Latch + // (because the loops are rotated. Thus, nothing will ever be added to + // OriginalFC0PHIs. + SmallVector OriginalFC0PHIs; + if (FC0.ExitingBlock != FC0.Latch) + for (PHINode &PHI : FC0.Header->phis()) + OriginalFC0PHIs.push_back(&PHI); + + assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!"); + + // Replace incoming blocks for header PHIs first. + FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader); + FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch); + + // The old exiting block of the first loop (FC0) has to jump to the header + // of the second as we need to execute the code in the second header block + // regardless of the trip count. That is, if the trip count is 0, so the + // back edge is never taken, we still have to execute both loop headers, + // especially (but not only!) if the second is a do-while style loop. + // However, doing so might invalidate the phi nodes of the first loop as + // the new values do only need to dominate their latch and not the exiting + // predicate. To remedy this potential problem we always introduce phi + // nodes in the header of the second loop later that select the loop carried + // value, if the second header was reached through an old latch of the + // first, or undef otherwise. This is sound as exiting the first implies the + // second will exit too, __without__ taking the back-edge (their + // trip-counts are equal after all). + FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock, + FC1.Header); + + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); + + // Remove FC0 Exit Block + // The exit block for FC0 is no longer needed since control will flow + // directly to the header of FC1. Since it is an empty block, it can be + // removed at this point. + // TODO: In the future, we can handle non-empty exit blocks my merging any + // instructions from FC0 exit block into FC1 exit block prior to removing + // the block. + assert(pred_begin(FC0.ExitBlock) == pred_end(FC0.ExitBlock) && + "Expecting exit block to be empty"); + FC0.ExitBlock->getTerminator()->eraseFromParent(); + new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock); + + // Remove FC1 Preheader + // The pre-header of L1 is not necessary anymore. + assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader)); + FC1.Preheader->getTerminator()->eraseFromParent(); + new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC1.Preheader, FC1.Header)); + + // Moves the phi nodes from the second to the first loops header block. + while (PHINode *PHI = dyn_cast(&FC1.Header->front())) { + if (SE.isSCEVable(PHI->getType())) + SE.forgetValue(PHI); + if (PHI->hasNUsesOrMore(1)) + PHI->moveBefore(&*FC0.Header->getFirstInsertionPt()); + else + PHI->eraseFromParent(); + } + + // Introduce new phi nodes in the second loop header to ensure + // exiting the first and jumping to the header of the second does not break + // the SSA property of the phis originally in the first loop. See also the + // comment above. + Instruction *L1HeaderIP = &FC1.Header->front(); + for (PHINode *LCPHI : OriginalFC0PHIs) { + int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch); + assert(L1LatchBBIdx >= 0 && + "Expected loop carried value to be rewired at this point!"); + + Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx); + + PHINode *L1HeaderPHI = PHINode::Create( + LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP); + L1HeaderPHI->addIncoming(LCV, FC0.Latch); + L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()), + FC0.ExitingBlock); + + LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI); + } + + // Update the latches + + // Replace latch terminator destinations. + FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); + FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); + + // If FC0.Latch and FC0.ExitingBlock are the same then we have already + // performed the updates above. + if (FC0.Latch != FC0.ExitingBlock) + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0.Latch, FC1.Header)); + + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, + FC0.Latch, FC0.Header)); + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert, + FC1.Latch, FC0.Header)); + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, + FC1.Latch, FC1.Header)); + + // All done + // Apply the updates to the Dominator Tree and cleanup. + + assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) && + "FC1GuardBlock has successors!!"); + assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) && + "FC1GuardBlock has predecessors!!"); + + // Update DT/PDT + DTU.applyUpdates(TreeUpdates); + + LI.removeBlock(FC1.Preheader); + DTU.deleteBB(FC1.Preheader); + DTU.deleteBB(FC0.ExitBlock); + DTU.flush(); + + // Is there a way to keep SE up-to-date so we don't need to forget the loops + // and rebuild the information in subsequent passes of fusion? + SE.forgetLoop(FC1.L); + SE.forgetLoop(FC0.L); + + // Merge the loops. + SmallVector Blocks(FC1.L->block_begin(), + FC1.L->block_end()); + for (BasicBlock *BB : Blocks) { + FC0.L->addBlockEntry(BB); + FC1.L->removeBlockFromLoop(BB); + if (LI.getLoopFor(BB) != FC1.L) + continue; + LI.changeLoopFor(BB, FC0.L); + } + while (!FC1.L->empty()) { + const auto &ChildLoopIt = FC1.L->begin(); + Loop *ChildLoop = *ChildLoopIt; + FC1.L->removeChildLoop(ChildLoopIt); + FC0.L->addChildLoop(ChildLoop); + } + + // Delete the now empty loop L1. + LI.erase(FC1.L); + +#ifndef NDEBUG + assert(!verifyFunction(*FC0.Header->getParent(), &errs())); + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); + assert(PDT.verify()); + LI.verify(DT); + SE.verify(); +#endif LLVM_DEBUG(dbgs() << "Fusion done:\n"); @@ -1177,6 +1596,7 @@ struct LoopFuseLegacy : public FunctionPass { return LF.fuseLoops(F); } }; +} // namespace PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) { auto &LI = AM.getResult(F); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index e561494f19cf..dd477e800693 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -41,6 +41,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -77,16 +78,20 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -102,6 +107,7 @@ using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC(NumBCmp, "Number of memcmp's formed from loop 2xload+eq-compare"); static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", @@ -111,6 +117,26 @@ static cl::opt UseLIRCodeSizeHeurs( namespace { +// FIXME: reinventing the wheel much? Is there a cleaner solution? +struct PMAbstraction { + virtual void markLoopAsDeleted(Loop *L) = 0; + virtual ~PMAbstraction() = default; +}; +struct LegacyPMAbstraction : PMAbstraction { + LPPassManager &LPM; + LegacyPMAbstraction(LPPassManager &LPM) : LPM(LPM) {} + virtual ~LegacyPMAbstraction() = default; + void markLoopAsDeleted(Loop *L) override { LPM.markLoopAsDeleted(*L); } +}; +struct NewPMAbstraction : PMAbstraction { + LPMUpdater &Updater; + NewPMAbstraction(LPMUpdater &Updater) : Updater(Updater) {} + virtual ~NewPMAbstraction() = default; + void markLoopAsDeleted(Loop *L) override { + Updater.markLoopAsDeleted(*L, L->getName()); + } +}; + class LoopIdiomRecognize { Loop *CurLoop = nullptr; AliasAnalysis *AA; @@ -120,6 +146,7 @@ class LoopIdiomRecognize { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; const DataLayout *DL; + PMAbstraction &LoopDeleter; OptimizationRemarkEmitter &ORE; bool ApplyCodeSizeHeuristics; @@ -128,9 +155,10 @@ public: LoopInfo *LI, ScalarEvolution *SE, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const DataLayout *DL, + const DataLayout *DL, PMAbstraction &LoopDeleter, OptimizationRemarkEmitter &ORE) - : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {} + : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), + LoopDeleter(LoopDeleter), ORE(ORE) {} bool runOnLoop(Loop *L); @@ -144,6 +172,8 @@ private: bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + bool HasMemCmp; + bool HasBCmp; /// Return code for isLegalStore() enum LegalStoreKind { @@ -186,6 +216,32 @@ private: bool runOnNoncountableLoop(); + struct CmpLoopStructure { + Value *BCmpValue, *LatchCmpValue; + BasicBlock *HeaderBrEqualBB, *HeaderBrUnequalBB; + BasicBlock *LatchBrFinishBB, *LatchBrContinueBB; + }; + bool matchBCmpLoopStructure(CmpLoopStructure &CmpLoop) const; + struct CmpOfLoads { + ICmpInst::Predicate BCmpPred; + Value *LoadSrcA, *LoadSrcB; + Value *LoadA, *LoadB; + }; + bool matchBCmpOfLoads(Value *BCmpValue, CmpOfLoads &CmpOfLoads) const; + bool recognizeBCmpLoopControlFlow(const CmpOfLoads &CmpOfLoads, + CmpLoopStructure &CmpLoop) const; + bool recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, CmpOfLoads &CmpOfLoads, + const SCEV *&SrcA, const SCEV *&SrcB, + const SCEV *&Iterations) const; + bool detectBCmpIdiom(ICmpInst *&BCmpInst, CmpInst *&LatchCmpInst, + LoadInst *&LoadA, LoadInst *&LoadB, const SCEV *&SrcA, + const SCEV *&SrcB, const SCEV *&NBytes) const; + BasicBlock *transformBCmpControlFlow(ICmpInst *ComparedEqual); + void transformLoopToBCmp(ICmpInst *BCmpInst, CmpInst *LatchCmpInst, + LoadInst *LoadA, LoadInst *LoadB, const SCEV *SrcA, + const SCEV *SrcB, const SCEV *NBytes); + bool recognizeBCmp(); + bool recognizePopcount(); void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var); @@ -217,18 +273,20 @@ public: LoopInfo *LI = &getAnalysis().getLoopInfo(); ScalarEvolution *SE = &getAnalysis().getSE(); TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI( + *L->getHeader()->getParent()); const TargetTransformInfo *TTI = &getAnalysis().getTTI( *L->getHeader()->getParent()); const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); + LegacyPMAbstraction LoopDeleter(LPM); // For the old PM, we can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE); + LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, LoopDeleter, ORE); return LIR.runOnLoop(L); } @@ -247,7 +305,7 @@ char LoopIdiomRecognizeLegacyPass::ID = 0; PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, - LPMUpdater &) { + LPMUpdater &Updater) { const auto *DL = &L.getHeader()->getModule()->getDataLayout(); const auto &FAM = @@ -261,8 +319,9 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached " "at a higher level"); + NewPMAbstraction LoopDeleter(Updater); LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL, - *ORE); + LoopDeleter, *ORE); if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -299,7 +358,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { // Disable loop idiom recognition if the function's name is a common idiom. StringRef Name = L->getHeader()->getParent()->getName(); - if (Name == "memset" || Name == "memcpy") + if (Name == "memset" || Name == "memcpy" || Name == "memcmp" || + Name == "bcmp") return false; // Determine if code size heuristics need to be applied. @@ -309,8 +369,10 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { HasMemset = TLI->has(LibFunc_memset); HasMemsetPattern = TLI->has(LibFunc_memset_pattern16); HasMemcpy = TLI->has(LibFunc_memcpy); + HasMemCmp = TLI->has(LibFunc_memcmp); + HasBCmp = TLI->has(LibFunc_bcmp); - if (HasMemset || HasMemsetPattern || HasMemcpy) + if (HasMemset || HasMemsetPattern || HasMemcpy || HasMemCmp || HasBCmp) if (SE->hasLoopInvariantBackedgeTakenCount(L)) return runOnCountableLoop(); @@ -961,7 +1023,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern"); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these. - GV->setAlignment(16); + GV->setAlignment(Align(16)); Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); } @@ -1149,7 +1211,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS(); + return recognizeBCmp() || recognizePopcount() || recognizeAndInsertFFS(); } /// Check if the given conditional branch is based on the comparison between @@ -1823,3 +1885,811 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, // loop. The loop would otherwise not be deleted even if it becomes empty. SE->forgetLoop(CurLoop); } + +bool LoopIdiomRecognize::matchBCmpLoopStructure( + CmpLoopStructure &CmpLoop) const { + ICmpInst::Predicate BCmpPred; + + // We are looking for the following basic layout: + // PreheaderBB: ; preds = ??? + // <...> + // br label %LoopHeaderBB + // LoopHeaderBB: ; preds = %PreheaderBB,%LoopLatchBB + // <...> + // %BCmpValue = icmp <...> + // br i1 %BCmpValue, label %LoopLatchBB, label %Successor0 + // LoopLatchBB: ; preds = %LoopHeaderBB + // <...> + // %LatchCmpValue = + // br i1 %LatchCmpValue, label %Successor1, label %LoopHeaderBB + // Successor0: ; preds = %LoopHeaderBB + // <...> + // Successor1: ; preds = %LoopLatchBB + // <...> + // + // Successor0 and Successor1 may or may not be the same basic block. + + // Match basic frame-work of this supposedly-comparison loop. + using namespace PatternMatch; + if (!match(CurLoop->getHeader()->getTerminator(), + m_Br(m_CombineAnd(m_ICmp(BCmpPred, m_Value(), m_Value()), + m_Value(CmpLoop.BCmpValue)), + CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB)) || + !match(CurLoop->getLoopLatch()->getTerminator(), + m_Br(m_CombineAnd(m_Cmp(), m_Value(CmpLoop.LatchCmpValue)), + CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB))) { + LLVM_DEBUG(dbgs() << "Basic control-flow layout unrecognized.\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Recognized basic control-flow layout.\n"); + return true; +} + +bool LoopIdiomRecognize::matchBCmpOfLoads(Value *BCmpValue, + CmpOfLoads &CmpOfLoads) const { + using namespace PatternMatch; + LLVM_DEBUG(dbgs() << "Analyzing header icmp " << *BCmpValue + << " as bcmp pattern.\n"); + + // Match bcmp-style loop header cmp. It must be an eq-icmp of loads. Example: + // %v0 = load <...>, <...>* %LoadSrcA + // %v1 = load <...>, <...>* %LoadSrcB + // %CmpLoop.BCmpValue = icmp eq <...> %v0, %v1 + // There won't be any no-op bitcasts between load and icmp, + // they would have been transformed into a load of bitcast. + // FIXME: {b,mem}cmp() calls have the same semantics as icmp. Match them too. + if (!match(BCmpValue, + m_ICmp(CmpOfLoads.BCmpPred, + m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcA)), + m_Value(CmpOfLoads.LoadA)), + m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcB)), + m_Value(CmpOfLoads.LoadB)))) || + !ICmpInst::isEquality(CmpOfLoads.BCmpPred)) { + LLVM_DEBUG(dbgs() << "Loop header icmp did not match bcmp pattern.\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t" + << *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads.LoadB + << "\n"); + // FIXME: handle memcmp pattern? + return true; +} + +bool LoopIdiomRecognize::recognizeBCmpLoopControlFlow( + const CmpOfLoads &CmpOfLoads, CmpLoopStructure &CmpLoop) const { + BasicBlock *LoopHeaderBB = CurLoop->getHeader(); + BasicBlock *LoopLatchBB = CurLoop->getLoopLatch(); + + // Be wary, comparisons can be inverted, canonicalize order. + // If this 'element' comparison passed, we expect to proceed to the next elt. + if (CmpOfLoads.BCmpPred != ICmpInst::Predicate::ICMP_EQ) + std::swap(CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB); + // The predicate on loop latch does not matter, just canonicalize some order. + if (CmpLoop.LatchBrContinueBB != LoopHeaderBB) + std::swap(CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB); + + SmallVector ExitBlocks; + + CurLoop->getUniqueExitBlocks(ExitBlocks); + assert(ExitBlocks.size() <= 2U && "Can't have more than two exit blocks."); + + // Check that control-flow between blocks is as expected. + if (CmpLoop.HeaderBrEqualBB != LoopLatchBB || + CmpLoop.LatchBrContinueBB != LoopHeaderBB || + !is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) || + !is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB)) { + LLVM_DEBUG(dbgs() << "Loop control-flow not recognized.\n"); + return false; + } + + assert(!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) && + !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) && + "Unexpected exit edges."); + + LLVM_DEBUG(dbgs() << "Recognized loop control-flow.\n"); + + LLVM_DEBUG(dbgs() << "Performing side-effect analysis on the loop.\n"); + assert(CurLoop->isLCSSAForm(*DT) && "Should only get LCSSA-form loops here."); + // No loop instructions must be used outside of the loop. Since we are in + // LCSSA form, we only need to check successor block's PHI nodes's incoming + // values for incoming blocks that are the loop basic blocks. + for (const BasicBlock *ExitBB : ExitBlocks) { + for (const PHINode &PHI : ExitBB->phis()) { + for (const BasicBlock *LoopBB : + make_filter_range(PHI.blocks(), [this](BasicBlock *PredecessorBB) { + return CurLoop->contains(PredecessorBB); + })) { + const auto *I = + dyn_cast(PHI.getIncomingValueForBlock(LoopBB)); + if (I && CurLoop->contains(I)) { + LLVM_DEBUG(dbgs() + << "Loop contains instruction " << *I + << " which is used outside of the loop in basic block " + << ExitBB->getName() << " in phi node " << PHI << "\n"); + return false; + } + } + } + } + // Similarly, the loop should not have any other observable side-effects + // other than the final comparison result. + for (BasicBlock *LoopBB : CurLoop->blocks()) { + for (Instruction &I : *LoopBB) { + if (isa(I)) // Ignore dbginfo. + continue; // FIXME: anything else? lifetime info? + if ((I.mayHaveSideEffects() || I.isAtomic() || I.isFenceLike()) && + &I != CmpOfLoads.LoadA && &I != CmpOfLoads.LoadB) { + LLVM_DEBUG( + dbgs() << "Loop contains instruction with potential side-effects: " + << I << "\n"); + return false; + } + } + } + LLVM_DEBUG(dbgs() << "No loop instructions deemed to have side-effects.\n"); + return true; +} + +bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, + CmpOfLoads &CmpOfLoads, + const SCEV *&SrcA, + const SCEV *&SrcB, + const SCEV *&Iterations) const { + // Try to compute SCEV of the loads, for this loop's scope. + const auto *ScevForSrcA = dyn_cast( + SE->getSCEVAtScope(CmpOfLoads.LoadSrcA, CurLoop)); + const auto *ScevForSrcB = dyn_cast( + SE->getSCEVAtScope(CmpOfLoads.LoadSrcB, CurLoop)); + if (!ScevForSrcA || !ScevForSrcB) { + LLVM_DEBUG(dbgs() << "Failed to get SCEV expressions for load sources.\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t" + << *ScevForSrcA << "\n\t" << *ScevForSrcB << "\n"); + + // Loads must have folloving SCEV exprs: {%ptr,+,BCmpTyBytes}<%LoopHeaderBB> + const SCEV *RecStepForA = ScevForSrcA->getStepRecurrence(*SE); + const SCEV *RecStepForB = ScevForSrcB->getStepRecurrence(*SE); + if (!ScevForSrcA->isAffine() || !ScevForSrcB->isAffine() || + ScevForSrcA->getLoop() != CurLoop || ScevForSrcB->getLoop() != CurLoop || + RecStepForA != RecStepForB || !isa(RecStepForA) || + cast(RecStepForA)->getAPInt() != BCmpTyBytes) { + LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads. Only support " + "affine SCEV expressions originating in the loop we " + "are analysing with identical constant positive step, " + "equal to the count of bytes compared. Got:\n\t" + << *RecStepForA << "\n\t" << *RecStepForB << "\n"); + return false; + // FIXME: can support BCmpTyBytes > Step. + // But will need to account for the extra bytes compared at the end. + } + + SrcA = ScevForSrcA->getStart(); + SrcB = ScevForSrcB->getStart(); + LLVM_DEBUG(dbgs() << "Got SCEV expressions for load sources:\n\t" << *SrcA + << "\n\t" << *SrcB << "\n"); + + // The load sources must be loop-invants that dominate the loop header. + if (SrcA == SE->getCouldNotCompute() || SrcB == SE->getCouldNotCompute() || + !SE->isAvailableAtLoopEntry(SrcA, CurLoop) || + !SE->isAvailableAtLoopEntry(SrcB, CurLoop)) { + LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads, unavaliable " + "prior to loop header.\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "SCEV expressions for loads are acceptable.\n"); + + // bcmp / memcmp take length argument as size_t, so let's conservatively + // assume that the iteration count should be not wider than that. + Type *CmpFuncSizeTy = DL->getIntPtrType(SE->getContext()); + + // For how many iterations is loop guaranteed not to exit via LoopLatch? + // This is one less than the maximal number of comparisons,and is: n + -1 + const SCEV *LoopExitCount = + SE->getExitCount(CurLoop, CurLoop->getLoopLatch()); + LLVM_DEBUG(dbgs() << "Got SCEV expression for loop latch exit count: " + << *LoopExitCount << "\n"); + // Exit count, similarly, must be loop-invant that dominates the loop header. + if (LoopExitCount == SE->getCouldNotCompute() || + !LoopExitCount->getType()->isIntOrPtrTy() || + LoopExitCount->getType()->getScalarSizeInBits() > + CmpFuncSizeTy->getScalarSizeInBits() || + !SE->isAvailableAtLoopEntry(LoopExitCount, CurLoop)) { + LLVM_DEBUG(dbgs() << "Unsupported SCEV expression for loop latch exit.\n"); + return false; + } + + // LoopExitCount is always one less than the actual count of iterations. + // Do this before cast, else we will be stuck with 1 + zext(-1 + n) + Iterations = SE->getAddExpr( + LoopExitCount, SE->getOne(LoopExitCount->getType()), SCEV::FlagNUW); + assert(Iterations != SE->getCouldNotCompute() && + "Shouldn't fail to increment by one."); + + LLVM_DEBUG(dbgs() << "Computed iteration count: " << *Iterations << "\n"); + return true; +} + +/// Return true iff the bcmp idiom is detected in the loop. +/// +/// Additionally: +/// 1) \p BCmpInst is set to the root byte-comparison instruction. +/// 2) \p LatchCmpInst is set to the comparison that controls the latch. +/// 3) \p LoadA is set to the first LoadInst. +/// 4) \p LoadB is set to the second LoadInst. +/// 5) \p SrcA is set to the first source location that is being compared. +/// 6) \p SrcB is set to the second source location that is being compared. +/// 7) \p NBytes is set to the number of bytes to compare. +bool LoopIdiomRecognize::detectBCmpIdiom(ICmpInst *&BCmpInst, + CmpInst *&LatchCmpInst, + LoadInst *&LoadA, LoadInst *&LoadB, + const SCEV *&SrcA, const SCEV *&SrcB, + const SCEV *&NBytes) const { + LLVM_DEBUG(dbgs() << "Recognizing bcmp idiom\n"); + + // Give up if the loop is not in normal form, or has more than 2 blocks. + if (!CurLoop->isLoopSimplifyForm() || CurLoop->getNumBlocks() > 2) { + LLVM_DEBUG(dbgs() << "Basic loop structure unrecognized.\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Recognized basic loop structure.\n"); + + CmpLoopStructure CmpLoop; + if (!matchBCmpLoopStructure(CmpLoop)) + return false; + + CmpOfLoads CmpOfLoads; + if (!matchBCmpOfLoads(CmpLoop.BCmpValue, CmpOfLoads)) + return false; + + if (!recognizeBCmpLoopControlFlow(CmpOfLoads, CmpLoop)) + return false; + + BCmpInst = cast(CmpLoop.BCmpValue); // FIXME: is there no + LatchCmpInst = cast(CmpLoop.LatchCmpValue); // way to combine + LoadA = cast(CmpOfLoads.LoadA); // these cast with + LoadB = cast(CmpOfLoads.LoadB); // m_Value() matcher? + + Type *BCmpValTy = BCmpInst->getOperand(0)->getType(); + LLVMContext &Context = BCmpValTy->getContext(); + uint64_t BCmpTyBits = DL->getTypeSizeInBits(BCmpValTy); + static constexpr uint64_t ByteTyBits = 8; + + LLVM_DEBUG(dbgs() << "Got comparison between values of type " << *BCmpValTy + << " of size " << BCmpTyBits + << " bits (while byte = " << ByteTyBits << " bits).\n"); + // bcmp()/memcmp() minimal unit of work is a byte. Therefore we must check + // that we are dealing with a multiple of a byte here. + if (BCmpTyBits % ByteTyBits != 0) { + LLVM_DEBUG(dbgs() << "Value size is not a multiple of byte.\n"); + return false; + // FIXME: could still be done under a run-time check that the total bit + // count is a multiple of a byte i guess? Or handle remainder separately? + } + + // Each comparison is done on this many bytes. + uint64_t BCmpTyBytes = BCmpTyBits / ByteTyBits; + LLVM_DEBUG(dbgs() << "Size is exactly " << BCmpTyBytes + << " bytes, eligible for bcmp conversion.\n"); + + const SCEV *Iterations; + if (!recognizeBCmpLoopSCEV(BCmpTyBytes, CmpOfLoads, SrcA, SrcB, Iterations)) + return false; + + // bcmp / memcmp take length argument as size_t, do promotion now. + Type *CmpFuncSizeTy = DL->getIntPtrType(Context); + Iterations = SE->getNoopOrZeroExtend(Iterations, CmpFuncSizeTy); + assert(Iterations != SE->getCouldNotCompute() && "Promotion failed."); + // Note that it didn't do ptrtoint cast, we will need to do it manually. + + // We will be comparing *bytes*, not BCmpTy, we need to recalculate size. + // It's a multiplication, and it *could* overflow. But for it to overflow + // we'd want to compare more bytes than could be represented by size_t, But + // allocation functions also take size_t. So how'd you produce such buffer? + // FIXME: we likely need to actually check that we know this won't overflow, + // via llvm::computeOverflowForUnsignedMul(). + NBytes = SE->getMulExpr( + Iterations, SE->getConstant(CmpFuncSizeTy, BCmpTyBytes), SCEV::FlagNUW); + assert(NBytes != SE->getCouldNotCompute() && + "Shouldn't fail to increment by one."); + + LLVM_DEBUG(dbgs() << "Computed total byte count: " << *NBytes << "\n"); + + if (LoadA->getPointerAddressSpace() != LoadB->getPointerAddressSpace() || + LoadA->getPointerAddressSpace() != 0 || !LoadA->isSimple() || + !LoadB->isSimple()) { + StringLiteral L("Unsupported loads in idiom - only support identical, " + "simple loads from address space 0.\n"); + LLVM_DEBUG(dbgs() << L); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "BCmpIdiomUnsupportedLoads", + BCmpInst->getDebugLoc(), + CurLoop->getHeader()) + << L; + }); + return false; // FIXME: support non-simple loads. + } + + LLVM_DEBUG(dbgs() << "Recognized bcmp idiom\n"); + ORE.emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "RecognizedBCmpIdiom", + CurLoop->getStartLoc(), + CurLoop->getHeader()) + << "Loop recognized as a bcmp idiom"; + }); + + return true; +} + +BasicBlock * +LoopIdiomRecognize::transformBCmpControlFlow(ICmpInst *ComparedEqual) { + LLVM_DEBUG(dbgs() << "Transforming control-flow.\n"); + SmallVector DTUpdates; + + BasicBlock *PreheaderBB = CurLoop->getLoopPreheader(); + BasicBlock *HeaderBB = CurLoop->getHeader(); + BasicBlock *LoopLatchBB = CurLoop->getLoopLatch(); + SmallString<32> LoopName = CurLoop->getName(); + Function *Func = PreheaderBB->getParent(); + LLVMContext &Context = Func->getContext(); + + // Before doing anything, drop SCEV info. + SE->forgetLoop(CurLoop); + + // Here we start with: (0/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br label %LoopHeaderBB + // LoopHeaderBB: ; preds = %PreheaderBB,%LoopLatchBB + // <...> + // br i1 %<...>, label %LoopLatchBB, label %Successor0BB + // LoopLatchBB: ; preds = %LoopHeaderBB + // <...> + // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB + // Successor0BB: ; preds = %LoopHeaderBB + // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ] + // <...> + // Successor1BB: ; preds = %LoopLatchBB + // %S1PHI = phi <...> [ <...>, %LoopLatchBB ] + // <...> + // + // Successor0 and Successor1 may or may not be the same basic block. + + // Decouple the edge between loop preheader basic block and loop header basic + // block. Thus the loop has become unreachable. + assert(cast(PreheaderBB->getTerminator())->isUnconditional() && + PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB && + "Preheader bb must end with an unconditional branch to header bb."); + PreheaderBB->getTerminator()->eraseFromParent(); + DTUpdates.push_back({DominatorTree::Delete, PreheaderBB, HeaderBB}); + + // Create a new preheader basic block before loop header basic block. + auto *PhonyPreheaderBB = BasicBlock::Create( + Context, LoopName + ".phonypreheaderbb", Func, HeaderBB); + // And insert an unconditional branch from phony preheader basic block to + // loop header basic block. + IRBuilder<>(PhonyPreheaderBB).CreateBr(HeaderBB); + DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB}); + + // Create a *single* new empty block that we will substitute as a + // successor basic block for the loop's exits. This one is temporary. + // Much like phony preheader basic block, it is not connected. + auto *PhonySuccessorBB = + BasicBlock::Create(Context, LoopName + ".phonysuccessorbb", Func, + LoopLatchBB->getNextNode()); + // That block must have *some* non-PHI instruction, or else deleteDeadLoop() + // will mess up cleanup of dbginfo, and verifier will complain. + IRBuilder<>(PhonySuccessorBB).CreateUnreachable(); + + // Create two new empty blocks that we will use to preserve the original + // loop exit control-flow, and preserve the incoming values in the PHI nodes + // in loop's successor exit blocks. These will live one. + auto *ComparedUnequalBB = + BasicBlock::Create(Context, ComparedEqual->getName() + ".unequalbb", Func, + PhonySuccessorBB->getNextNode()); + auto *ComparedEqualBB = + BasicBlock::Create(Context, ComparedEqual->getName() + ".equalbb", Func, + PhonySuccessorBB->getNextNode()); + + // By now we have: (1/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // [no terminator instruction!] + // PhonyPreheaderBB: ; No preds, UNREACHABLE! + // br label %LoopHeaderBB + // LoopHeaderBB: ; preds = %PhonyPreheaderBB, %LoopLatchBB + // <...> + // br i1 %<...>, label %LoopLatchBB, label %Successor0BB + // LoopLatchBB: ; preds = %LoopHeaderBB + // <...> + // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB + // PhonySuccessorBB: ; No preds, UNREACHABLE! + // unreachable + // EqualBB: ; No preds, UNREACHABLE! + // [no terminator instruction!] + // UnequalBB: ; No preds, UNREACHABLE! + // [no terminator instruction!] + // Successor0BB: ; preds = %LoopHeaderBB + // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ] + // <...> + // Successor1BB: ; preds = %LoopLatchBB + // %S1PHI = phi <...> [ <...>, %LoopLatchBB ] + // <...> + + // What is the mapping/replacement basic block for exiting out of the loop + // from either of old's loop basic blocks? + auto GetReplacementBB = [this, ComparedEqualBB, + ComparedUnequalBB](const BasicBlock *OldBB) { + assert(CurLoop->contains(OldBB) && "Only for loop's basic blocks."); + if (OldBB == CurLoop->getLoopLatch()) // "all elements compared equal". + return ComparedEqualBB; + if (OldBB == CurLoop->getHeader()) // "element compared unequal". + return ComparedUnequalBB; + llvm_unreachable("Only had two basic blocks in loop."); + }; + + // What are the exits out of this loop? + SmallVector LoopExitEdges; + CurLoop->getExitEdges(LoopExitEdges); + assert(LoopExitEdges.size() == 2 && "Should have only to two exit edges."); + + // Populate new basic blocks, update the exiting control-flow, PHI nodes. + for (const Loop::Edge &Edge : LoopExitEdges) { + auto *OldLoopBB = const_cast(Edge.first); + auto *SuccessorBB = const_cast(Edge.second); + assert(CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) && + "Unexpected edge."); + + // If we would exit the loop from this loop's basic block, + // what semantically would that mean? Did comparison succeed or fail? + BasicBlock *NewBB = GetReplacementBB(OldLoopBB); + assert(NewBB->empty() && "Should not get same new basic block here twice."); + IRBuilder<> Builder(NewBB); + Builder.SetCurrentDebugLocation(OldLoopBB->getTerminator()->getDebugLoc()); + Builder.CreateBr(SuccessorBB); + DTUpdates.push_back({DominatorTree::Insert, NewBB, SuccessorBB}); + // Also, be *REALLY* careful with PHI nodes in successor basic block, + // update them to recieve the same input value, but not from current loop's + // basic block, but from new basic block instead. + SuccessorBB->replacePhiUsesWith(OldLoopBB, NewBB); + // Also, change loop control-flow. This loop's basic block shall no longer + // exit from the loop to it's original successor basic block, but to our new + // phony successor basic block. Note that new successor will be unique exit. + OldLoopBB->getTerminator()->replaceSuccessorWith(SuccessorBB, + PhonySuccessorBB); + DTUpdates.push_back({DominatorTree::Delete, OldLoopBB, SuccessorBB}); + DTUpdates.push_back({DominatorTree::Insert, OldLoopBB, PhonySuccessorBB}); + } + + // Inform DomTree about edge changes. Note that LoopInfo is still out-of-date. + assert(DTUpdates.size() == 8 && "Update count prediction failed."); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + DTU.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // By now we have: (2/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // [no terminator instruction!] + // PhonyPreheaderBB: ; No preds, UNREACHABLE! + // br label %LoopHeaderBB + // LoopHeaderBB: ; preds = %PhonyPreheaderBB, %LoopLatchBB + // <...> + // br i1 %<...>, label %LoopLatchBB, label %PhonySuccessorBB + // LoopLatchBB: ; preds = %LoopHeaderBB + // <...> + // br i1 %<...>, label %PhonySuccessorBB, label %LoopHeaderBB + // PhonySuccessorBB: ; preds = %LoopHeaderBB, %LoopLatchBB + // unreachable + // EqualBB: ; No preds, UNREACHABLE! + // br label %Successor1BB + // UnequalBB: ; No preds, UNREACHABLE! + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // *Finally*, zap the original loop. Record it's parent loop though. + Loop *ParentLoop = CurLoop->getParentLoop(); + LLVM_DEBUG(dbgs() << "Deleting old loop.\n"); + LoopDeleter.markLoopAsDeleted(CurLoop); // Mark as deleted *BEFORE* deleting! + deleteDeadLoop(CurLoop, DT, SE, LI); // And actually delete the loop. + CurLoop = nullptr; + + // By now we have: (3/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // [no terminator instruction!] + // PhonyPreheaderBB: ; No preds, UNREACHABLE! + // br label %PhonySuccessorBB + // PhonySuccessorBB: ; preds = %PhonyPreheaderBB + // unreachable + // EqualBB: ; No preds, UNREACHABLE! + // br label %Successor1BB + // UnequalBB: ; No preds, UNREACHABLE! + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // Now, actually restore the CFG. + + // Insert an unconditional branch from an actual preheader basic block to + // phony preheader basic block. + IRBuilder<>(PreheaderBB).CreateBr(PhonyPreheaderBB); + DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB}); + // Insert proper conditional branch from phony successor basic block to the + // "dispatch" basic blocks, which were used to preserve incoming values in + // original loop's successor basic blocks. + assert(isa(PhonySuccessorBB->getTerminator()) && + "Yep, that's the one we created to keep deleteDeadLoop() happy."); + PhonySuccessorBB->getTerminator()->eraseFromParent(); + { + IRBuilder<> Builder(PhonySuccessorBB); + Builder.SetCurrentDebugLocation(ComparedEqual->getDebugLoc()); + Builder.CreateCondBr(ComparedEqual, ComparedEqualBB, ComparedUnequalBB); + } + DTUpdates.push_back( + {DominatorTree::Insert, PhonySuccessorBB, ComparedEqualBB}); + DTUpdates.push_back( + {DominatorTree::Insert, PhonySuccessorBB, ComparedUnequalBB}); + + BasicBlock *DispatchBB = PhonySuccessorBB; + DispatchBB->setName(LoopName + ".bcmpdispatchbb"); + + assert(DTUpdates.size() == 3 && "Update count prediction failed."); + DTU.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // By now we have: (4/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br label %PhonyPreheaderBB + // PhonyPreheaderBB: ; preds = %PreheaderBB + // br label %DispatchBB + // DispatchBB: ; preds = %PhonyPreheaderBB + // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB + // EqualBB: ; preds = %DispatchBB + // br label %Successor1BB + // UnequalBB: ; preds = %DispatchBB + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // The basic CFG has been restored! Now let's merge redundant basic blocks. + + // Merge phony successor basic block into it's only predecessor, + // phony preheader basic block. It is fully pointlessly redundant. + MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU); + + // By now we have: (5/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br label %DispatchBB + // DispatchBB: ; preds = %PreheaderBB + // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB + // EqualBB: ; preds = %DispatchBB + // br label %Successor1BB + // UnequalBB: ; preds = %DispatchBB + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // Was this loop nested? + if (!ParentLoop) { + // If the loop was *NOT* nested, then let's also merge phony successor + // basic block into it's only predecessor, preheader basic block. + // Also, here we need to update LoopInfo. + LI->removeBlock(PreheaderBB); + MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU); + + // By now we have: (6/6) + // DispatchBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB + // EqualBB: ; preds = %DispatchBB + // br label %Successor1BB + // UnequalBB: ; preds = %DispatchBB + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + return DispatchBB; + } + + // Otherwise, we need to "preserve" the LoopSimplify form of the deleted loop. + // To achieve that, we shall keep the preheader basic block (mainly so that + // the loop header block will be guaranteed to have a predecessor outside of + // the loop), and create a phony loop with all these new three basic blocks. + Loop *PhonyLoop = LI->AllocateLoop(); + ParentLoop->addChildLoop(PhonyLoop); + PhonyLoop->addBasicBlockToLoop(DispatchBB, *LI); + PhonyLoop->addBasicBlockToLoop(ComparedEqualBB, *LI); + PhonyLoop->addBasicBlockToLoop(ComparedUnequalBB, *LI); + + // But we only have a preheader basic block, a header basic block block and + // two exiting basic blocks. For a proper loop we also need a backedge from + // non-header basic block to header bb. + // Let's just add a never-taken branch from both of the exiting basic blocks. + for (BasicBlock *BB : {ComparedEqualBB, ComparedUnequalBB}) { + BranchInst *OldTerminator = cast(BB->getTerminator()); + assert(OldTerminator->isUnconditional() && "That's the one we created."); + BasicBlock *SuccessorBB = OldTerminator->getSuccessor(0); + + IRBuilder<> Builder(OldTerminator); + Builder.SetCurrentDebugLocation(OldTerminator->getDebugLoc()); + Builder.CreateCondBr(ConstantInt::getTrue(Context), SuccessorBB, + DispatchBB); + OldTerminator->eraseFromParent(); + // Yes, the backedge will never be taken. The control-flow is redundant. + // If it can be simplified further, other passes will take care. + DTUpdates.push_back({DominatorTree::Delete, BB, SuccessorBB}); + DTUpdates.push_back({DominatorTree::Insert, BB, SuccessorBB}); + DTUpdates.push_back({DominatorTree::Insert, BB, DispatchBB}); + } + assert(DTUpdates.size() == 6 && "Update count prediction failed."); + DTU.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // By now we have: (6/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br label %BCmpDispatchBB + // BCmpDispatchBB:

; preds = %PreheaderBB + // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB + // EqualBB: ; preds = %BCmpDispatchBB + // br i1 %true, label %Successor1BB, label %BCmpDispatchBB + // UnequalBB: ; preds = %BCmpDispatchBB + // br i1 %true, label %Successor0BB, label %BCmpDispatchBB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // Finally fully DONE! + return DispatchBB; +} + +void LoopIdiomRecognize::transformLoopToBCmp(ICmpInst *BCmpInst, + CmpInst *LatchCmpInst, + LoadInst *LoadA, LoadInst *LoadB, + const SCEV *SrcA, const SCEV *SrcB, + const SCEV *NBytes) { + // We will be inserting before the terminator instruction of preheader block. + IRBuilder<> Builder(CurLoop->getLoopPreheader()->getTerminator()); + + LLVM_DEBUG(dbgs() << "Transforming bcmp loop idiom into a call.\n"); + LLVM_DEBUG(dbgs() << "Emitting new instructions.\n"); + + // Expand the SCEV expressions for both sources to compare, and produce value + // for the byte len (beware of Iterations potentially being a pointer, and + // account for element size being BCmpTyBytes bytes, which may be not 1 byte) + Value *PtrA, *PtrB, *Len; + { + SCEVExpander SExp(*SE, *DL, "LoopToBCmp"); + SExp.setInsertPoint(&*Builder.GetInsertPoint()); + + auto HandlePtr = [&SExp](LoadInst *Load, const SCEV *Src) { + SExp.SetCurrentDebugLocation(DebugLoc()); + // If the pointer operand of original load had dbgloc - use it. + if (const auto *I = dyn_cast(Load->getPointerOperand())) + SExp.SetCurrentDebugLocation(I->getDebugLoc()); + return SExp.expandCodeFor(Src); + }; + PtrA = HandlePtr(LoadA, SrcA); + PtrB = HandlePtr(LoadB, SrcB); + + // For len calculation let's use dbgloc for the loop's latch condition. + Builder.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc()); + SExp.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc()); + Len = SExp.expandCodeFor(NBytes); + + Type *CmpFuncSizeTy = DL->getIntPtrType(Builder.getContext()); + assert(SE->getTypeSizeInBits(Len->getType()) == + DL->getTypeSizeInBits(CmpFuncSizeTy) && + "Len should already have the correct size."); + + // Make sure that iteration count is a number, insert ptrtoint cast if not. + if (Len->getType()->isPointerTy()) + Len = Builder.CreatePtrToInt(Len, CmpFuncSizeTy); + assert(Len->getType() == CmpFuncSizeTy && "Should have correct type now."); + + Len->setName(Len->getName() + ".bytecount"); + + // There is no legality check needed. We want to compare that the memory + // regions [PtrA, PtrA+Len) and [PtrB, PtrB+Len) are fully identical, equal. + // For them to be fully equal, they must match bit-by-bit. And likewise, + // for them to *NOT* be fully equal, they have to differ just by one bit. + // The step of comparison (bits compared at once) simply does not matter. + } + + // For the rest of new instructions, dbgloc should point at the value cmp. + Builder.SetCurrentDebugLocation(BCmpInst->getDebugLoc()); + + // Emit the comparison itself. + auto *CmpCall = + cast(HasBCmp ? emitBCmp(PtrA, PtrB, Len, Builder, *DL, TLI) + : emitMemCmp(PtrA, PtrB, Len, Builder, *DL, TLI)); + // FIXME: add {B,Mem}CmpInst with MemoryCompareInst + // (based on MemIntrinsicBase) as base? + // FIXME: propagate metadata from loads? (alignments, AS, TBAA, ...) + + // {b,mem}cmp returned 0 if they were equal, or non-zero if not equal. + auto *ComparedEqual = cast(Builder.CreateICmpEQ( + CmpCall, ConstantInt::get(CmpCall->getType(), 0), + PtrA->getName() + ".vs." + PtrB->getName() + ".eqcmp")); + + BasicBlock *BB = transformBCmpControlFlow(ComparedEqual); + Builder.ClearInsertionPoint(); + + // We're done. + LLVM_DEBUG(dbgs() << "Transformed loop bcmp idiom into a call.\n"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "TransformedBCmpIdiomToCall", + CmpCall->getDebugLoc(), BB) + << "Transformed bcmp idiom into a call to " + << ore::NV("NewFunction", CmpCall->getCalledFunction()) + << "() function"; + }); + ++NumBCmp; +} + +/// Recognizes a bcmp idiom in a non-countable loop. +/// +/// If detected, transforms the relevant code to issue the bcmp (or memcmp) +/// intrinsic function call, and returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeBCmp() { + if (!HasMemCmp && !HasBCmp) + return false; + + ICmpInst *BCmpInst; + CmpInst *LatchCmpInst; + LoadInst *LoadA, *LoadB; + const SCEV *SrcA, *SrcB, *NBytes; + if (!detectBCmpIdiom(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, + NBytes)) { + LLVM_DEBUG(dbgs() << "bcmp idiom recognition failed.\n"); + return false; + } + + transformLoopToBCmp(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, NBytes); + return true; +} diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 31191b52895c..368b9d4e8df1 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -192,7 +192,8 @@ public: getAnalysis().getAssumptionCache( *L->getHeader()->getParent()); const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); + getAnalysis().getTLI( + *L->getHeader()->getParent()); MemorySSA *MSSA = nullptr; Optional MSSAU; if (EnableMSSALoopDependency) { @@ -233,7 +234,7 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, auto PA = getLoopPassPreservedAnalyses(); PA.preserveSet(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 9a42365adc1b..1af4b21b432e 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -410,8 +410,6 @@ public: void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); private: - void splitInnerLoopLatch(Instruction *); - void splitInnerLoopHeader(); bool adjustLoopLinks(); void adjustLoopPreheaders(); bool adjustLoopBranches(); @@ -1226,7 +1224,7 @@ bool LoopInterchangeTransform::transform() { if (InnerLoop->getSubLoops().empty()) { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); - LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n"); + LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n"); PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); if (!InductionPHI) { LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); @@ -1242,11 +1240,55 @@ bool LoopInterchangeTransform::transform() { if (&InductionPHI->getParent()->front() != InductionPHI) InductionPHI->moveBefore(&InductionPHI->getParent()->front()); - // Split at the place were the induction variable is - // incremented/decremented. - // TODO: This splitting logic may not work always. Fix this. - splitInnerLoopLatch(InnerIndexVar); - LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n"); + // Create a new latch block for the inner loop. We split at the + // current latch's terminator and then move the condition and all + // operands that are not either loop-invariant or the induction PHI into the + // new latch block. + BasicBlock *NewLatch = + SplitBlock(InnerLoop->getLoopLatch(), + InnerLoop->getLoopLatch()->getTerminator(), DT, LI); + + SmallSetVector WorkList; + unsigned i = 0; + auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() { + for (; i < WorkList.size(); i++) { + // Duplicate instruction and move it the new latch. Update uses that + // have been moved. + Instruction *NewI = WorkList[i]->clone(); + NewI->insertBefore(NewLatch->getFirstNonPHI()); + assert(!NewI->mayHaveSideEffects() && + "Moving instructions with side-effects may change behavior of " + "the loop nest!"); + for (auto UI = WorkList[i]->use_begin(), UE = WorkList[i]->use_end(); + UI != UE;) { + Use &U = *UI++; + Instruction *UserI = cast(U.getUser()); + if (!InnerLoop->contains(UserI->getParent()) || + UserI->getParent() == NewLatch || UserI == InductionPHI) + U.set(NewI); + } + // Add operands of moved instruction to the worklist, except if they are + // outside the inner loop or are the induction PHI. + for (Value *Op : WorkList[i]->operands()) { + Instruction *OpI = dyn_cast(Op); + if (!OpI || + this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop || + OpI == InductionPHI) + continue; + WorkList.insert(OpI); + } + } + }; + + // FIXME: Should we interchange when we have a constant condition? + Instruction *CondI = dyn_cast( + cast(InnerLoop->getLoopLatch()->getTerminator()) + ->getCondition()); + if (CondI) + WorkList.insert(CondI); + MoveInstructions(); + WorkList.insert(cast(InnerIndexVar)); + MoveInstructions(); // Splits the inner loops phi nodes out into a separate basic block. BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); @@ -1263,10 +1305,6 @@ bool LoopInterchangeTransform::transform() { return true; } -void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { - SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI); -} - /// \brief Move all instructions except the terminator from FromBB right before /// InsertBefore static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index 2b3d5e0ce9b7..e8dc879a184b 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -435,7 +435,8 @@ public: PH->getTerminator()); Value *Initial = new LoadInst( Cand.Load->getType(), InitialPtr, "load_initial", - /* isVolatile */ false, Cand.Load->getAlignment(), PH->getTerminator()); + /* isVolatile */ false, MaybeAlign(Cand.Load->getAlignment()), + PH->getTerminator()); PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded", &L->getHeader()->front()); diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 507a1e251ca6..885c0e8f4b8b 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -543,7 +543,7 @@ bool LoopPredication::isLoopInvariantValue(const SCEV* S) { if (const auto *LI = dyn_cast(U->getValue())) if (LI->isUnordered() && L->hasLoopInvariantOperands(LI)) if (AA->pointsToConstantMemory(LI->getOperand(0)) || - LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) + LI->hasMetadata(LLVMContext::MD_invariant_load)) return true; return false; } diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 166b57f20b43..96e2c2a3ac6b 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -1644,7 +1644,8 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { AA = &getAnalysis().getAAResults(); LI = &getAnalysis().getLoopInfo(); SE = &getAnalysis().getSE(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI( + *L->getHeader()->getParent()); DT = &getAnalysis().getDomTree(); PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index e009947690af..94517996df39 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -55,7 +55,7 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, AR.MSSA->verifyMemorySSA(); auto PA = getLoopPassPreservedAnalyses(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; } @@ -94,17 +94,15 @@ public: auto *LI = &getAnalysis().getLoopInfo(); const auto *TTI = &getAnalysis().getTTI(F); auto *AC = &getAnalysis().getAssumptionCache(F); - auto *DTWP = getAnalysisIfAvailable(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *SEWP = getAnalysisIfAvailable(); - auto *SE = SEWP ? &SEWP->getSE() : nullptr; + auto &DT = getAnalysis().getDomTree(); + auto &SE = getAnalysis().getSE(); const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); Optional MSSAU; if (EnableMSSALoopDependency) { MemorySSA *MSSA = &getAnalysis().getMSSA(); MSSAU = MemorySSAUpdater(MSSA); } - return LoopRotation(L, LI, TTI, AC, DT, SE, + return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false, MaxHeaderSize, false); } diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index 046f4c8af492..299f3fc5fb19 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -690,7 +690,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &LPMU) { Optional MSSAU; - if (EnableMSSALoopDependency && AR.MSSA) + if (AR.MSSA) MSSAU = MemorySSAUpdater(AR.MSSA); bool DeleteCurrentLoop = false; if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE, @@ -702,7 +702,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, LPMU.markLoopAsDeleted(L, "loop-simplifycfg"); auto PA = getLoopPassPreservedAnalyses(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp index 975452e13f09..65e0dee0225a 100644 --- a/lib/Transforms/Scalar/LoopSink.cpp +++ b/lib/Transforms/Scalar/LoopSink.cpp @@ -230,12 +230,9 @@ static bool sinkInstruction(Loop &L, Instruction &I, IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); // Replaces uses of I with IC in N - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) { - Use &U = *UI++; - auto *I = cast(U.getUser()); - if (I->getParent() == N) - U.set(IC); - } + I.replaceUsesWithIf(IC, [N](Use &U) { + return cast(U.getUser())->getParent() == N; + }); // Replaces uses of I with IC in blocks dominated by N replaceDominatedUsesWith(&I, IC, DT, N); LLVM_DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName() diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 59a387a186b8..7f119175c4a8 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1386,7 +1386,9 @@ void Cost::RateFormula(const Formula &F, // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as // additional instruction (at least fill). - unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1; + // TODO: Need distinguish register class? + unsigned TTIRegNum = TTI->getNumberOfRegisters( + TTI->getRegisterClassForType(false, F.getType())) - 1; if (C.NumRegs > TTIRegNum) { // Cost already exceeded TTIRegNum, then only newly added register can add // new instructions. @@ -3165,6 +3167,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); return; } + assert(IVSrc && "Failed to find IV chain source"); LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); Type *IVTy = IVSrc->getType(); @@ -3265,12 +3268,12 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // requirements for both N and i at the same time. Limiting this code to // equality icmps is not a problem because all interesting loops use // equality icmps, thanks to IndVarSimplify. - if (ICmpInst *CI = dyn_cast(UserInst)) + if (ICmpInst *CI = dyn_cast(UserInst)) { + // If CI can be saved in some target, like replaced inside hardware loop + // in PowerPC, no need to generate initial formulae for it. + if (SaveCmp && CI == dyn_cast(ExitBranch->getCondition())) + continue; if (CI->isEquality()) { - // If CI can be saved in some target, like replaced inside hardware loop - // in PowerPC, no need to generate initial formulae for it. - if (SaveCmp && CI == dyn_cast(ExitBranch->getCondition())) - continue; // Swap the operands if needed to put the OperandValToReplace on the // left, for consistency. Value *NV = CI->getOperand(1); @@ -3298,6 +3301,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { Factors.insert(-(uint64_t)Factors[i]); Factors.insert(-1); } + } // Get or create an LSRUse. std::pair P = getUse(S, Kind, AccessTy); @@ -4834,6 +4838,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { } } } + assert(Best && "Failed to find best LSRUse candidate"); LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best << " will yield profitable reuse.\n"); @@ -5740,7 +5745,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { *L->getHeader()->getParent()); auto &AC = getAnalysis().getAssumptionCache( *L->getHeader()->getParent()); - auto &LibInfo = getAnalysis().getTLI(); + auto &LibInfo = getAnalysis().getTLI( + *L->getHeader()->getParent()); return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, LibInfo); } diff --git a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 86891eb451bb..8d88be420314 100644 --- a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -166,7 +166,7 @@ static bool computeUnrollAndJamCount( bool UseUpperBound = false; bool ExplicitUnroll = computeUnrollCount( L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount, - OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); + /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); if (ExplicitUnroll || UseUpperBound) { // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it // for the unroller instead. @@ -293,9 +293,9 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, if (Latch != Exit || SubLoopLatch != SubLoopExit) return LoopUnrollResult::Unmodified; - TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, SE, TTI, nullptr, nullptr, OptLevel, - None, None, None, None, None, None); + TargetTransformInfo::UnrollingPreferences UP = + gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None, + None, None, None, None, None, None, None); if (AllowUnrollAndJam.getNumOccurrences() > 0) UP.UnrollAndJam = AllowUnrollAndJam; if (UnrollAndJamThreshold.getNumOccurrences() > 0) diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 2fa7436213dd..a6d4164c3645 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -178,7 +178,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling) { + Optional UserUpperBound, Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling, + Optional UserFullUnrollMaxCount) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults @@ -202,6 +204,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.UpperBound = false; UP.AllowPeeling = true; UP.UnrollAndJam = false; + UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; // Override with any target specific settings @@ -257,6 +260,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.UpperBound = *UserUpperBound; if (UserAllowPeeling.hasValue()) UP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + if (UserFullUnrollMaxCount.hasValue()) + UP.FullUnrollMaxCount = *UserFullUnrollMaxCount; return UP; } @@ -730,7 +737,7 @@ bool llvm::computeUnrollCount( Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, - unsigned &TripMultiple, unsigned LoopSize, + bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { // Check for explicit Count. @@ -781,18 +788,34 @@ bool llvm::computeUnrollCount( // Also we need to check if we exceed FullUnrollMaxCount. // If using the upper bound to unroll, TripMultiple should be set to 1 because // we do not know when loop may exit. - // MaxTripCount and ExactTripCount cannot both be non zero since we only + + // We can unroll by the upper bound amount if it's generally allowed or if + // we know that the loop is executed either the upper bound or zero times. + // (MaxOrZero unrolling keeps only the first loop test, so the number of + // loop tests remains the same compared to the non-unrolled version, whereas + // the generic upper bound unrolling keeps all but the last loop test so the + // number of loop tests goes up which may end up being worse on targets with + // constrained branch predictor resources so is controlled by an option.) + // In addition we only unroll small upper bounds. + unsigned FullUnrollMaxTripCount = MaxTripCount; + if (!(UP.UpperBound || MaxOrZero) || + FullUnrollMaxTripCount > UnrollMaxUpperBound) + FullUnrollMaxTripCount = 0; + + // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only // compute the former when the latter is zero. unsigned ExactTripCount = TripCount; - assert((ExactTripCount == 0 || MaxTripCount == 0) && - "ExtractTripCount and MaxTripCount cannot both be non zero."); - unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount; + assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && + "ExtractTripCount and UnrollByMaxCount cannot both be non zero."); + + unsigned FullUnrollTripCount = + ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; UP.Count = FullUnrollTripCount; if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { // When computing the unrolled size, note that BEInsns are not replicated // like the rest of the loop body. if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) { - UseUpperBound = (MaxTripCount == FullUnrollTripCount); + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; return ExplicitUnroll; @@ -806,7 +829,7 @@ bool llvm::computeUnrollCount( unsigned Boost = getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { - UseUpperBound = (MaxTripCount == FullUnrollTripCount); + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; return ExplicitUnroll; @@ -882,6 +905,8 @@ bool llvm::computeUnrollCount( "because " "unrolled size is too large."; }); + LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + << "\n"); return ExplicitUnroll; } assert(TripCount == 0 && @@ -903,6 +928,12 @@ bool llvm::computeUnrollCount( return false; } + // Don't unroll a small upper bound loop unless user or TTI asked to do so. + if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) { + UP.Count = 0; + return false; + } + // Check if the runtime trip count is too small when profile is available. if (L->getHeader()->getParent()->hasProfileData()) { if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { @@ -966,7 +997,11 @@ bool llvm::computeUnrollCount( if (UP.Count > UP.MaxCount) UP.Count = UP.MaxCount; - LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + + if (MaxTripCount && UP.Count > MaxTripCount) + UP.Count = MaxTripCount; + + LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count << "\n"); if (UP.Count < 2) UP.Count = 0; @@ -976,13 +1011,14 @@ bool llvm::computeUnrollCount( static LoopUnrollResult tryToUnrollLoop( Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - bool PreserveLCSSA, int OptLevel, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional ProvidedCount, Optional ProvidedThreshold, Optional ProvidedAllowPartial, Optional ProvidedRuntime, Optional ProvidedUpperBound, - Optional ProvidedAllowPeeling) { + Optional ProvidedAllowPeeling, + Optional ProvidedAllowProfileBasedPeeling, + Optional ProvidedFullUnrollMaxCount) { LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); @@ -1007,7 +1043,8 @@ static LoopUnrollResult tryToUnrollLoop( TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, - ProvidedAllowPeeling); + ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, + ProvidedFullUnrollMaxCount); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. @@ -1028,10 +1065,10 @@ static LoopUnrollResult tryToUnrollLoop( return LoopUnrollResult::Unmodified; } - // When optimizing for size, use LoopSize as threshold, to (fully) unroll - // loops, if it does not increase code size. + // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold + // later), to (fully) unroll loops, if it does not increase code size. if (OptForSize) - UP.Threshold = std::max(UP.Threshold, LoopSize); + UP.Threshold = std::max(UP.Threshold, LoopSize + 1); if (NumInlineCandidates != 0) { LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); @@ -1040,7 +1077,6 @@ static LoopUnrollResult tryToUnrollLoop( // Find trip count and trip multiple if count is not available unsigned TripCount = 0; - unsigned MaxTripCount = 0; unsigned TripMultiple = 1; // If there are multiple exiting blocks but one of them is the latch, use the // latch for the trip count estimation. Otherwise insist on a single exiting @@ -1070,28 +1106,18 @@ static LoopUnrollResult tryToUnrollLoop( // Try to find the trip count upper bound if we cannot find the exact trip // count. + unsigned MaxTripCount = 0; bool MaxOrZero = false; if (!TripCount) { MaxTripCount = SE.getSmallConstantMaxTripCount(L); MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L); - // We can unroll by the upper bound amount if it's generally allowed or if - // we know that the loop is executed either the upper bound or zero times. - // (MaxOrZero unrolling keeps only the first loop test, so the number of - // loop tests remains the same compared to the non-unrolled version, whereas - // the generic upper bound unrolling keeps all but the last loop test so the - // number of loop tests goes up which may end up being worse on targets with - // constrained branch predictor resources so is controlled by an option.) - // In addition we only unroll small upper bounds. - if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) { - MaxTripCount = 0; - } } // computeUnrollCount() decides whether it is beneficial to use upper bound to // fully unroll the loop. bool UseUpperBound = false; bool IsCountSetExplicitly = computeUnrollCount( - L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, + L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, UseUpperBound); if (!UP.Count) return LoopUnrollResult::Unmodified; @@ -1139,7 +1165,7 @@ static LoopUnrollResult tryToUnrollLoop( // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || UP.PeelCount)) + (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount))) L->setLoopAlreadyUnrolled(); return UnrollResult; @@ -1169,18 +1195,24 @@ public: Optional ProvidedRuntime; Optional ProvidedUpperBound; Optional ProvidedAllowPeeling; + Optional ProvidedAllowProfileBasedPeeling; + Optional ProvidedFullUnrollMaxCount; LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false, Optional Threshold = None, Optional Count = None, Optional AllowPartial = None, Optional Runtime = None, Optional UpperBound = None, - Optional AllowPeeling = None) + Optional AllowPeeling = None, + Optional AllowProfileBasedPeeling = None, + Optional ProvidedFullUnrollMaxCount = None) : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), - ProvidedAllowPeeling(AllowPeeling) { + ProvidedAllowPeeling(AllowPeeling), + ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), + ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1203,10 +1235,11 @@ public: bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, - PreserveLCSSA, OptLevel, OnlyWhenForced, - ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); + L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, + OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, + ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, + ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, + ProvidedFullUnrollMaxCount); if (Result == LoopUnrollResult::FullyUnrolled) LPM.markLoopAsDeleted(*L); @@ -1283,14 +1316,16 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, std::string LoopName = L.getName(); - bool Changed = - tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, - /*BFI*/ nullptr, /*PSI*/ nullptr, - /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, - ForgetSCEV, /*Count*/ None, - /*Threshold*/ None, /*AllowPartial*/ false, - /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified; + bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, + /*BFI*/ nullptr, /*PSI*/ nullptr, + /*PreserveLCSSA*/ true, OptLevel, + OnlyWhenForced, ForgetSCEV, /*Count*/ None, + /*Threshold*/ None, /*AllowPartial*/ false, + /*Runtime*/ false, /*UpperBound*/ false, + /*AllowPeeling*/ false, + /*AllowProfileBasedPeeling*/ false, + /*FullUnrollMaxCount*/ None) != + LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1430,7 +1465,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV, /*Count*/ None, /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, - UnrollOpts.AllowUpperBound, LocalAllowPeeling); + UnrollOpts.AllowUpperBound, LocalAllowPeeling, + UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); Changed |= Result != LoopUnrollResult::Unmodified; // The parent must not be damaged by unrolling! diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index b5b8e720069c..b410df0c5f68 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -420,7 +420,8 @@ enum OperatorChain { /// cost of creating an entirely new loop. static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, OperatorChain &ParentChain, - DenseMap &Cache) { + DenseMap &Cache, + MemorySSAUpdater *MSSAU) { auto CacheIt = Cache.find(Cond); if (CacheIt != Cache.end()) return CacheIt->second; @@ -438,7 +439,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, // TODO: Handle: br (VARIANT|INVARIANT). // Hoist simple values out. - if (L->makeLoopInvariant(Cond, Changed)) { + if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) { Cache[Cond] = Cond; return Cond; } @@ -478,7 +479,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, // which will cause the branch to go away in one loop and the condition to // simplify in the other one. if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed, - ParentChain, Cache)) { + ParentChain, Cache, MSSAU)) { Cache[Cond] = LHS; return LHS; } @@ -486,7 +487,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, // operand(1). ParentChain = NewChain; if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed, - ParentChain, Cache)) { + ParentChain, Cache, MSSAU)) { Cache[Cond] = RHS; return RHS; } @@ -500,12 +501,12 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, /// Cond is a condition that occurs in L. If it is invariant in the loop, or has /// an invariant piece, return the invariant along with the operator chain type. /// Otherwise, return null. -static std::pair FindLIVLoopCondition(Value *Cond, - Loop *L, - bool &Changed) { +static std::pair +FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, + MemorySSAUpdater *MSSAU) { DenseMap Cache; OperatorChain OpChain = OC_OpChainNone; - Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache); + Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU); // In case we do find a LIV, it can not be obtained by walking up a mixed // operator chain. @@ -525,7 +526,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { DT = &getAnalysis().getDomTree(); if (EnableMSSALoopDependency) { MSSA = &getAnalysis().getMSSA(); - MSSAU = make_unique(MSSA); + MSSAU = std::make_unique(MSSA); assert(DT && "Cannot update MemorySSA without a valid DomTree."); } currentLoop = L; @@ -694,8 +695,9 @@ bool LoopUnswitch::processCurrentLoop() { } for (IntrinsicInst *Guard : Guards) { - Value *LoopCond = - FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(Guard->getOperand(0), currentLoop, + Changed, MSSAU.get()) + .first; if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { // NB! Unswitching (if successful) could have erased some of the @@ -735,8 +737,9 @@ bool LoopUnswitch::processCurrentLoop() { if (BI->isConditional()) { // See if this, or some part of it, is loop invariant. If so, we can // unswitch on it if we desire. - Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), - currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, + Changed, MSSAU.get()) + .first; if (LoopCond && !EqualityPropUnSafe(*LoopCond) && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { ++NumBranches; @@ -748,7 +751,7 @@ bool LoopUnswitch::processCurrentLoop() { Value *LoopCond; OperatorChain OpChain; std::tie(LoopCond, OpChain) = - FindLIVLoopCondition(SC, currentLoop, Changed); + FindLIVLoopCondition(SC, currentLoop, Changed, MSSAU.get()); unsigned NumCases = SI->getNumCases(); if (LoopCond && NumCases) { @@ -808,8 +811,9 @@ bool LoopUnswitch::processCurrentLoop() { for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); BBI != E; ++BBI) if (SelectInst *SI = dyn_cast(BBI)) { - Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), - currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, + Changed, MSSAU.get()) + .first; if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { ++NumSelects; @@ -1123,8 +1127,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) { if (!BI->isConditional()) return false; - Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), - currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, + Changed, MSSAU.get()) + .first; // Unswitch only if the trivial condition itself is an LIV (not // partial LIV which could occur in and/or) @@ -1157,8 +1162,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) { return true; } else if (SwitchInst *SI = dyn_cast(CurrentTerm)) { // If this isn't switching on an invariant condition, we can't unswitch it. - Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), - currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, + Changed, MSSAU.get()) + .first; // Unswitch only if the trivial condition itself is an LIV (not // partial LIV which could occur in and/or) @@ -1240,6 +1246,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, LoopBlocks.clear(); NewBlocks.clear(); + if (MSSAU && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + // First step, split the preheader and exit blocks, and add these blocks to // the LoopBlocks list. BasicBlock *NewPreheader = @@ -1607,36 +1616,30 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { // If BI's parent is the only pred of the successor, fold the two blocks // together. BasicBlock *Pred = BI->getParent(); + (void)Pred; BasicBlock *Succ = BI->getSuccessor(0); BasicBlock *SinglePred = Succ->getSinglePredecessor(); if (!SinglePred) continue; // Nothing to do. assert(SinglePred == Pred && "CFG broken"); - LLVM_DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " - << Succ->getName() << "\n"); - - // Resolve any single entry PHI nodes in Succ. - while (PHINode *PN = dyn_cast(Succ->begin())) - ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM, - MSSAU.get()); - - // If Succ has any successors with PHI nodes, update them to have - // entries coming from Pred instead of Succ. - Succ->replaceAllUsesWith(Pred); - - // Move all of the successor contents from Succ to Pred. - Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(), - Succ->begin(), Succ->end()); - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(Succ, Pred, BI); + // Make the LPM and Worklist updates specific to LoopUnswitch. LPM->deleteSimpleAnalysisValue(BI, L); RemoveFromWorklist(BI, Worklist); - BI->eraseFromParent(); - - // Remove Succ from the loop tree. - LI->removeBlock(Succ); LPM->deleteSimpleAnalysisValue(Succ, L); - Succ->eraseFromParent(); + auto SuccIt = Succ->begin(); + while (PHINode *PN = dyn_cast(SuccIt++)) { + for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It) + if (Instruction *Use = dyn_cast(PN->getOperand(It))) + Worklist.push_back(Use); + for (User *U : PN->users()) + Worklist.push_back(cast(U)); + LPM->deleteSimpleAnalysisValue(PN, L); + RemoveFromWorklist(PN, Worklist); + ++NumSimplify; + } + // Merge the block and make the remaining analyses updates. + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get()); ++NumSimplify; continue; } diff --git a/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 896dd8bcb922..2ccb7cae3079 100644 --- a/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -112,37 +112,6 @@ static cl::opt LVLoopDepthThreshold( "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"), cl::init(2), cl::Hidden); -/// Create MDNode for input string. -static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { - LLVMContext &Context = TheLoop->getHeader()->getContext(); - Metadata *MDs[] = { - MDString::get(Context, Name), - ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; - return MDNode::get(Context, MDs); -} - -/// Set input string into loop metadata by keeping other values intact. -void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *MDString, - unsigned V) { - SmallVector MDs(1); - // If the loop already has metadata, retain it. - MDNode *LoopID = TheLoop->getLoopID(); - if (LoopID) { - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - MDNode *Node = cast(LoopID->getOperand(i)); - MDs.push_back(Node); - } - } - // Add new metadata. - MDs.push_back(createStringMetadata(TheLoop, MDString, V)); - // Replace current metadata node with new one. - LLVMContext &Context = TheLoop->getHeader()->getContext(); - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - TheLoop->setLoopID(NewLoopID); -} - namespace { struct LoopVersioningLICM : public LoopPass { diff --git a/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp new file mode 100644 index 000000000000..d0fcf38b5a7b --- /dev/null +++ b/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp @@ -0,0 +1,170 @@ +//===- LowerConstantIntrinsics.cpp - Lower constant intrinsic calls -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers all remaining 'objectsize' 'is.constant' intrinsic calls +// and provides constant propagation and basic CFG cleanup on the result. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "lower-is-constant-intrinsic" + +STATISTIC(IsConstantIntrinsicsHandled, + "Number of 'is.constant' intrinsic calls handled"); +STATISTIC(ObjectSizeIntrinsicsHandled, + "Number of 'objectsize' intrinsic calls handled"); + +static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) { + Value *Op = II->getOperand(0); + + return isa(Op) ? ConstantInt::getTrue(II->getType()) + : ConstantInt::getFalse(II->getType()); +} + +static bool replaceConditionalBranchesOnConstant(Instruction *II, + Value *NewValue) { + bool HasDeadBlocks = false; + SmallSetVector Worklist; + replaceAndRecursivelySimplify(II, NewValue, nullptr, nullptr, nullptr, + &Worklist); + for (auto I : Worklist) { + BranchInst *BI = dyn_cast(I); + if (!BI) + continue; + if (BI->isUnconditional()) + continue; + + BasicBlock *Target, *Other; + if (match(BI->getOperand(0), m_Zero())) { + Target = BI->getSuccessor(1); + Other = BI->getSuccessor(0); + } else if (match(BI->getOperand(0), m_One())) { + Target = BI->getSuccessor(0); + Other = BI->getSuccessor(1); + } else { + Target = nullptr; + Other = nullptr; + } + if (Target && Target != Other) { + BasicBlock *Source = BI->getParent(); + Other->removePredecessor(Source); + BI->eraseFromParent(); + BranchInst::Create(Target, Source); + if (pred_begin(Other) == pred_end(Other)) + HasDeadBlocks = true; + } + } + return HasDeadBlocks; +} + +static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI) { + bool HasDeadBlocks = false; + const auto &DL = F.getParent()->getDataLayout(); + SmallVector Worklist; + + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) { + for (Instruction &I: *BB) { + IntrinsicInst *II = dyn_cast(&I); + if (!II) + continue; + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::is_constant: + case Intrinsic::objectsize: + Worklist.push_back(WeakTrackingVH(&I)); + break; + } + } + } + for (WeakTrackingVH &VH: Worklist) { + // Items on the worklist can be mutated by earlier recursive replaces. + // This can remove the intrinsic as dead (VH == null), but also replace + // the intrinsic in place. + if (!VH) + continue; + IntrinsicInst *II = dyn_cast(&*VH); + if (!II) + continue; + Value *NewValue; + switch (II->getIntrinsicID()) { + default: + continue; + case Intrinsic::is_constant: + NewValue = lowerIsConstantIntrinsic(II); + IsConstantIntrinsicsHandled++; + break; + case Intrinsic::objectsize: + NewValue = lowerObjectSizeCall(II, DL, TLI, true); + ObjectSizeIntrinsicsHandled++; + break; + } + HasDeadBlocks |= replaceConditionalBranchesOnConstant(II, NewValue); + } + if (HasDeadBlocks) + removeUnreachableBlocks(F); + return !Worklist.empty(); +} + +PreservedAnalyses +LowerConstantIntrinsicsPass::run(Function &F, FunctionAnalysisManager &AM) { + if (lowerConstantIntrinsics(F, AM.getCachedResult(F))) + return PreservedAnalyses::none(); + + return PreservedAnalyses::all(); +} + +namespace { +/// Legacy pass for lowering is.constant intrinsics out of the IR. +/// +/// When this pass is run over a function it converts is.constant intrinsics +/// into 'true' or 'false'. This is completements the normal constand folding +/// to 'true' as part of Instruction Simplify passes. +class LowerConstantIntrinsics : public FunctionPass { +public: + static char ID; + LowerConstantIntrinsics() : FunctionPass(ID) { + initializeLowerConstantIntrinsicsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *TLIP = getAnalysisIfAvailable(); + const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; + return lowerConstantIntrinsics(F, TLI); + } +}; +} // namespace + +char LowerConstantIntrinsics::ID = 0; +INITIALIZE_PASS(LowerConstantIntrinsics, "lower-constant-intrinsics", + "Lower constant intrinsics", false, false) + +FunctionPass *llvm::createLowerConstantIntrinsicsPass() { + return new LowerConstantIntrinsics(); +} diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index 0d67c0d740ec..d85f20b3f80c 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/MisExpect.h" using namespace llvm; @@ -71,15 +72,20 @@ static bool handleSwitchExpect(SwitchInst &SI) { unsigned n = SI.getNumCases(); // +1 for default case. SmallVector Weights(n + 1, UnlikelyBranchWeight); - if (Case == *SI.case_default()) - Weights[0] = LikelyBranchWeight; - else - Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight; + uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1; + Weights[Index] = LikelyBranchWeight; + + SI.setMetadata( + LLVMContext::MD_misexpect, + MDBuilder(CI->getContext()) + .createMisExpect(Index, LikelyBranchWeight, UnlikelyBranchWeight)); + + SI.setCondition(ArgValue); + misexpect::checkFrontendInstrumentation(SI); SI.setMetadata(LLVMContext::MD_prof, MDBuilder(CI->getContext()).createBranchWeights(Weights)); - SI.setCondition(ArgValue); return true; } @@ -155,7 +161,7 @@ static void handlePhiDef(CallInst *Expect) { return Result; }; - auto *PhiDef = dyn_cast(V); + auto *PhiDef = cast(V); // Get the first dominating conditional branch of the operand // i's incoming block. @@ -280,19 +286,28 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { MDBuilder MDB(CI->getContext()); MDNode *Node; + MDNode *ExpNode; if ((ExpectedValue->getZExtValue() == ValueComparedTo) == - (Predicate == CmpInst::ICMP_EQ)) + (Predicate == CmpInst::ICMP_EQ)) { Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); - else + ExpNode = MDB.createMisExpect(0, LikelyBranchWeight, UnlikelyBranchWeight); + } else { Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); + ExpNode = MDB.createMisExpect(1, LikelyBranchWeight, UnlikelyBranchWeight); + } - BSI.setMetadata(LLVMContext::MD_prof, Node); + BSI.setMetadata(LLVMContext::MD_misexpect, ExpNode); if (CmpI) CmpI->setOperand(0, ArgValue); else BSI.setCondition(ArgValue); + + misexpect::checkFrontendInstrumentation(BSI); + + BSI.setMetadata(LLVMContext::MD_prof, Node); + return true; } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 5a055139be4f..2364748efb05 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -69,90 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); -static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, - bool &VariableIdxFound, - const DataLayout &DL) { - // Skip over the first indices. - gep_type_iterator GTI = gep_type_begin(GEP); - for (unsigned i = 1; i != Idx; ++i, ++GTI) - /*skip along*/; - - // Compute the offset implied by the rest of the indices. - int64_t Offset = 0; - for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { - ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); - if (!OpC) - return VariableIdxFound = true; - if (OpC->isZero()) continue; // No offset. - - // Handle struct indices, which add their field offset to the pointer. - if (StructType *STy = GTI.getStructTypeOrNull()) { - Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - continue; - } - - // Otherwise, we have a sequential type like an array or vector. Multiply - // the index by the ElementSize. - uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size*OpC->getSExtValue(); - } - - return Offset; -} - -/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and -/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2 -/// might be &A[40]. In this case offset would be -8. -static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, - const DataLayout &DL) { - Ptr1 = Ptr1->stripPointerCasts(); - Ptr2 = Ptr2->stripPointerCasts(); - - // Handle the trivial case first. - if (Ptr1 == Ptr2) { - Offset = 0; - return true; - } - - GEPOperator *GEP1 = dyn_cast(Ptr1); - GEPOperator *GEP2 = dyn_cast(Ptr2); - - bool VariableIdxFound = false; - - // If one pointer is a GEP and the other isn't, then see if the GEP is a - // constant offset from the base, as in "P" and "gep P, 1". - if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { - Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL); - return !VariableIdxFound; - } - - if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { - Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL); - return !VariableIdxFound; - } - - // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical - // base. After that base, they may have some number of common (and - // potentially variable) indices. After that they handle some constant - // offset, which determines their offset from each other. At this point, we - // handle no other case. - if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) - return false; - - // Skip any common indices and track the GEP types. - unsigned Idx = 1; - for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) - if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) - break; - - int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL); - int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL); - if (VariableIdxFound) return false; - - Offset = Offset2-Offset1; - return true; -} - namespace { /// Represents a range of memset'd bytes with the ByteVal value. @@ -419,12 +335,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, break; // Check to see if this store is to a constant offset from the start ptr. - int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, - DL)) + Optional Offset = + isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL); + if (!Offset) break; - Ranges.addStore(Offset, NextStore); + Ranges.addStore(*Offset, NextStore); } else { MemSetInst *MSI = cast(BI); @@ -433,11 +349,11 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, break; // Check to see if this store is to a constant offset from the start ptr. - int64_t Offset; - if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL)) + Optional Offset = isPointerOffset(StartPtr, MSI->getDest(), DL); + if (!Offset) break; - Ranges.addMemSet(Offset, MSI); + Ranges.addMemSet(*Offset, MSI); } } @@ -597,9 +513,13 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, ToLift.push_back(C); for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k) - if (auto *A = dyn_cast(C->getOperand(k))) - if (A->getParent() == SI->getParent()) + if (auto *A = dyn_cast(C->getOperand(k))) { + if (A->getParent() == SI->getParent()) { + // Cannot hoist user of P above P + if(A == P) return false; Args.insert(A); + } + } } // We made it, we need to lift @@ -979,7 +899,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, // If the destination wasn't sufficiently aligned then increase its alignment. if (!isDestSufficientlyAligned) { assert(isa(cpyDest) && "Can only increase alloca alignment!"); - cast(cpyDest)->setAlignment(srcAlign); + cast(cpyDest)->setAlignment(MaybeAlign(srcAlign)); } // Drop any cached information about the call, because we may have changed @@ -1516,7 +1436,7 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) { return false; auto *MD = &getAnalysis().getMemDep(); - auto *TLI = &getAnalysis().getTLI(); + auto *TLI = &getAnalysis().getTLI(F); auto LookupAliasAnalysis = [this]() -> AliasAnalysis & { return getAnalysis().getAAResults(); diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp index 3d047a193267..98a45b391319 100644 --- a/lib/Transforms/Scalar/MergeICmps.cpp +++ b/lib/Transforms/Scalar/MergeICmps.cpp @@ -897,7 +897,7 @@ public: bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; - const auto &TLI = getAnalysis().getTLI(); + const auto &TLI = getAnalysis().getTLI(F); const auto &TTI = getAnalysis().getTTI(F); // MergeICmps does not need the DominatorTree, but we update it if it's // already available. diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 30645f4400e3..9799ea7960ec 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -14,9 +14,11 @@ // diamond (hammock) and merges them into a single load in the header. Similar // it sinks and merges two stores to the tail block (footer). The algorithm // iterates over the instructions of one side of the diamond and attempts to -// find a matching load/store on the other side. It hoists / sinks when it -// thinks it safe to do so. This optimization helps with eg. hiding load -// latencies, triggering if-conversion, and reducing static code size. +// find a matching load/store on the other side. New tail/footer block may be +// insterted if the tail/footer block has more predecessors (not only the two +// predecessors that are forming the diamond). It hoists / sinks when it thinks +// it safe to do so. This optimization helps with eg. hiding load latencies, +// triggering if-conversion, and reducing static code size. // // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist. // @@ -103,7 +105,9 @@ class MergedLoadStoreMotion { // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. const int MagicCompileTimeControl = 250; + const bool SplitFooterBB; public: + MergedLoadStoreMotion(bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {} bool run(Function &F, AliasAnalysis &AA); private: @@ -114,7 +118,9 @@ private: PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); bool isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc); - bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); + bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const; + void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand, + StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); }; } // end anonymous namespace @@ -216,75 +222,83 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, return NewPN; } +/// +/// Check if 2 stores can be sunk together with corresponding GEPs +/// +bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0, + StoreInst *S1) const { + auto *A0 = dyn_cast(S0->getPointerOperand()); + auto *A1 = dyn_cast(S1->getPointerOperand()); + return A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() && + (A0->getParent() == S0->getParent()) && A1->hasOneUse() && + (A1->getParent() == S1->getParent()) && isa(A0); +} + /// /// Merge two stores to same address and sink into \p BB /// /// Also sinks GEP instruction computing the store address /// -bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0, - StoreInst *S1) { +void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0, + StoreInst *S1) { // Only one definition? auto *A0 = dyn_cast(S0->getPointerOperand()); auto *A1 = dyn_cast(S1->getPointerOperand()); - if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() && - (A0->getParent() == S0->getParent()) && A1->hasOneUse() && - (A1->getParent() == S1->getParent()) && isa(A0)) { - LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump(); - dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n"; - dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n"); - // Hoist the instruction. - BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); - // Intersect optional metadata. - S0->andIRFlags(S1); - S0->dropUnknownNonDebugMetadata(); - - // Create the new store to be inserted at the join point. - StoreInst *SNew = cast(S0->clone()); - Instruction *ANew = A0->clone(); - SNew->insertBefore(&*InsertPt); - ANew->insertBefore(SNew); - - assert(S0->getParent() == A0->getParent()); - assert(S1->getParent() == A1->getParent()); - - // New PHI operand? Use it. - if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) - SNew->setOperand(0, NewPN); - S0->eraseFromParent(); - S1->eraseFromParent(); - A0->replaceAllUsesWith(ANew); - A0->eraseFromParent(); - A1->replaceAllUsesWith(ANew); - A1->eraseFromParent(); - return true; - } - return false; + LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump(); + dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n"; + dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n"); + // Hoist the instruction. + BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); + // Intersect optional metadata. + S0->andIRFlags(S1); + S0->dropUnknownNonDebugMetadata(); + + // Create the new store to be inserted at the join point. + StoreInst *SNew = cast(S0->clone()); + Instruction *ANew = A0->clone(); + SNew->insertBefore(&*InsertPt); + ANew->insertBefore(SNew); + + assert(S0->getParent() == A0->getParent()); + assert(S1->getParent() == A1->getParent()); + + // New PHI operand? Use it. + if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) + SNew->setOperand(0, NewPN); + S0->eraseFromParent(); + S1->eraseFromParent(); + A0->replaceAllUsesWith(ANew); + A0->eraseFromParent(); + A1->replaceAllUsesWith(ANew); + A1->eraseFromParent(); } /// /// True when two stores are equivalent and can sink into the footer /// -/// Starting from a diamond tail block, iterate over the instructions in one -/// predecessor block and try to match a store in the second predecessor. +/// Starting from a diamond head block, iterate over the instructions in one +/// successor block and try to match a store in the second successor. /// -bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { +bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) { bool MergedStores = false; - assert(T && "Footer of a diamond cannot be empty"); - - pred_iterator PI = pred_begin(T), E = pred_end(T); - assert(PI != E); - BasicBlock *Pred0 = *PI; - ++PI; - BasicBlock *Pred1 = *PI; - ++PI; + BasicBlock *TailBB = getDiamondTail(HeadBB); + BasicBlock *SinkBB = TailBB; + assert(SinkBB && "Footer of a diamond cannot be empty"); + + succ_iterator SI = succ_begin(HeadBB); + assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors"); + BasicBlock *Pred0 = *SI; + ++SI; + assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor"); + BasicBlock *Pred1 = *SI; // tail block of a diamond/hammock? if (Pred0 == Pred1) return false; // No. - if (PI != E) - return false; // No. More than 2 predecessors. - - // #Instructions in Succ1 for Compile Time Control + // bail out early if we can not merge into the footer BB + if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(3)) + return false; + // #Instructions in Pred1 for Compile Time Control auto InstsNoDbg = Pred1->instructionsWithoutDebug(); int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end()); int NStores = 0; @@ -304,14 +318,23 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { if (NStores * Size1 >= MagicCompileTimeControl) break; if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) { - bool Res = sinkStore(T, S0, S1); - MergedStores |= Res; - // Don't attempt to sink below stores that had to stick around - // But after removal of a store and some of its feeding - // instruction search again from the beginning since the iterator - // is likely stale at this point. - if (!Res) + if (!canSinkStoresAndGEPs(S0, S1)) + // Don't attempt to sink below stores that had to stick around + // But after removal of a store and some of its feeding + // instruction search again from the beginning since the iterator + // is likely stale at this point. break; + + if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) { + // We have more than 2 predecessors. Insert a new block + // postdominating 2 predecessors we're going to sink from. + SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split"); + if (!SinkBB) + break; + } + + MergedStores = true; + sinkStoresAndGEPs(SinkBB, S0, S1); RBI = Pred0->rbegin(); RBE = Pred0->rend(); LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump()); @@ -328,13 +351,15 @@ bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) { // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. + // This loop doesn't care about newly inserted/split blocks + // since they never will be diamond heads. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { BasicBlock *BB = &*FI++; // Hoist equivalent loads and sink stores // outside diamonds when possible if (isDiamondHead(BB)) { - Changed |= mergeStores(getDiamondTail(BB)); + Changed |= mergeStores(BB); } } return Changed; @@ -342,9 +367,11 @@ bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) { namespace { class MergedLoadStoreMotionLegacyPass : public FunctionPass { + const bool SplitFooterBB; public: static char ID; // Pass identification, replacement for typeid - MergedLoadStoreMotionLegacyPass() : FunctionPass(ID) { + MergedLoadStoreMotionLegacyPass(bool SplitFooterBB = false) + : FunctionPass(ID), SplitFooterBB(SplitFooterBB) { initializeMergedLoadStoreMotionLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -355,13 +382,14 @@ public: bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; - MergedLoadStoreMotion Impl; + MergedLoadStoreMotion Impl(SplitFooterBB); return Impl.run(F, getAnalysis().getAAResults()); } private: void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); + if (!SplitFooterBB) + AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); } @@ -373,8 +401,8 @@ char MergedLoadStoreMotionLegacyPass::ID = 0; /// /// createMergedLoadStoreMotionPass - The public interface to this file. /// -FunctionPass *llvm::createMergedLoadStoreMotionPass() { - return new MergedLoadStoreMotionLegacyPass(); +FunctionPass *llvm::createMergedLoadStoreMotionPass(bool SplitFooterBB) { + return new MergedLoadStoreMotionLegacyPass(SplitFooterBB); } INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion", @@ -385,13 +413,14 @@ INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion", PreservedAnalyses MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) { - MergedLoadStoreMotion Impl; + MergedLoadStoreMotion Impl(Options.SplitFooterBB); auto &AA = AM.getResult(F); if (!Impl.run(F, AA)) return PreservedAnalyses::all(); PreservedAnalyses PA; - PA.preserveSet(); + if (!Options.SplitFooterBB) + PA.preserveSet(); PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index 94436b55752a..1260bd39cdee 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -170,7 +170,7 @@ bool NaryReassociateLegacyPass::runOnFunction(Function &F) { auto *AC = &getAnalysis().getAssumptionCache(F); auto *DT = &getAnalysis().getDomTree(); auto *SE = &getAnalysis().getSE(); - auto *TLI = &getAnalysis().getTLI(); + auto *TLI = &getAnalysis().getTLI(F); auto *TTI = &getAnalysis().getTTI(F); return Impl.runImpl(F, AC, DT, SE, TLI, TTI); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 08ac2b666fce..b213264de557 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -89,6 +89,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -122,6 +123,7 @@ using namespace llvm; using namespace llvm::GVNExpression; using namespace llvm::VNCoercion; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "newgvn" @@ -656,7 +658,7 @@ public: TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, const DataLayout &DL) : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), - PredInfo(make_unique(F, *DT, *AC)), + PredInfo(std::make_unique(F, *DT, *AC)), SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false) {} bool runGVN(); @@ -1332,7 +1334,7 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, E->setOpcode(0); E->op_push_back(PointerOp); if (LI) - E->setAlignment(LI->getAlignment()); + E->setAlignment(MaybeAlign(LI->getAlignment())); // TODO: Value number heap versions. We may be able to discover // things alias analysis can't on it's own (IE that a store and a @@ -1637,8 +1639,11 @@ const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const { if (AA->doesNotAccessMemory(CI)) { return createCallExpression(CI, TOPClass->getMemoryLeader()); } else if (AA->onlyReadsMemory(CI)) { - MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI); - return createCallExpression(CI, DefiningAccess); + if (auto *MA = MSSA->getMemoryAccess(CI)) { + auto *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(MA); + return createCallExpression(CI, DefiningAccess); + } else // MSSA determined that CI does not access memory. + return createCallExpression(CI, TOPClass->getMemoryLeader()); } return nullptr; } @@ -1754,7 +1759,7 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef PHIOps, return true; }); // If we are left with no operands, it's dead. - if (empty(Filtered)) { + if (Filtered.empty()) { // If it has undef at this point, it means there are no-non-undef arguments, // and thus, the value of the phi node must be undef. if (HasUndef) { @@ -2464,9 +2469,9 @@ Value *NewGVN::findConditionEquivalence(Value *Cond) const { // Process the outgoing edges of a block for reachability. void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) { // Evaluate reachability of terminator instruction. - BranchInst *BR; - if ((BR = dyn_cast(TI)) && BR->isConditional()) { - Value *Cond = BR->getCondition(); + Value *Cond; + BasicBlock *TrueSucc, *FalseSucc; + if (match(TI, m_Br(m_Value(Cond), TrueSucc, FalseSucc))) { Value *CondEvaluated = findConditionEquivalence(Cond); if (!CondEvaluated) { if (auto *I = dyn_cast(Cond)) { @@ -2479,8 +2484,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) { } } ConstantInt *CI; - BasicBlock *TrueSucc = BR->getSuccessor(0); - BasicBlock *FalseSucc = BR->getSuccessor(1); if (CondEvaluated && (CI = dyn_cast(CondEvaluated))) { if (CI->isOne()) { LLVM_DEBUG(dbgs() << "Condition for Terminator " << *TI @@ -4196,7 +4199,7 @@ bool NewGVNLegacyPass::runOnFunction(Function &F) { return false; return NewGVN(F, &getAnalysis().getDomTree(), &getAnalysis().getAssumptionCache(F), - &getAnalysis().getTLI(), + &getAnalysis().getTLI(F), &getAnalysis().getAAResults(), &getAnalysis().getMSSA(), F.getParent()->getDataLayout()) diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 039123218544..68a0f5151ad5 100644 --- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -161,7 +161,7 @@ public: return false; TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); const TargetTransformInfo *TTI = &getAnalysis().getTTI(F); return runPartiallyInlineLibCalls(F, TLI, TTI); diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index b544f0a39ea8..beb299272ed8 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -131,7 +131,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass { SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI(F); for (Loop *I : *LI) { runOnLoopAndSubLoops(I); } @@ -240,7 +240,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header, static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, BasicBlock *Pred) { // A conservative bound on the loop as a whole. - const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L); + const SCEV *MaxTrips = SE->getConstantMaxBackedgeTakenCount(L); if (MaxTrips != SE->getCouldNotCompute() && SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN( CountedLoopTripWidth)) @@ -478,7 +478,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) { return false; const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); + getAnalysis().getTLI(F); bool Modified = false; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index fa8c9e2a5fe4..124f625ef7b6 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -861,7 +861,7 @@ static Value *NegateValue(Value *V, Instruction *BI, // this use. We do this by moving it to the entry block (if it is a // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. - BinaryOperator *TheNeg = cast(U); + Instruction *TheNeg = cast(U); // Verify that the negate is in this function, V might be a constant expr. if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) @@ -1938,88 +1938,132 @@ void ReassociatePass::EraseInst(Instruction *I) { MadeChange = true; } -// Canonicalize expressions of the following form: -// x + (-Constant * y) -> x - (Constant * y) -// x - (-Constant * y) -> x + (Constant * y) -Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) { - if (!I->hasOneUse() || I->getType()->isVectorTy()) - return nullptr; - - // Must be a fmul or fdiv instruction. - unsigned Opcode = I->getOpcode(); - if (Opcode != Instruction::FMul && Opcode != Instruction::FDiv) - return nullptr; - - auto *C0 = dyn_cast(I->getOperand(0)); - auto *C1 = dyn_cast(I->getOperand(1)); - - // Both operands are constant, let it get constant folded away. - if (C0 && C1) - return nullptr; - - ConstantFP *CF = C0 ? C0 : C1; - - // Must have one constant operand. - if (!CF) - return nullptr; +/// Recursively analyze an expression to build a list of instructions that have +/// negative floating-point constant operands. The caller can then transform +/// the list to create positive constants for better reassociation and CSE. +static void getNegatibleInsts(Value *V, + SmallVectorImpl &Candidates) { + // Handle only one-use instructions. Combining negations does not justify + // replicating instructions. + Instruction *I; + if (!match(V, m_OneUse(m_Instruction(I)))) + return; - // Must be a negative ConstantFP. - if (!CF->isNegative()) - return nullptr; + // Handle expressions of multiplications and divisions. + // TODO: This could look through floating-point casts. + const APFloat *C; + switch (I->getOpcode()) { + case Instruction::FMul: + // Not expecting non-canonical code here. Bail out and wait. + if (match(I->getOperand(0), m_Constant())) + break; - // User must be a binary operator with one or more uses. - Instruction *User = I->user_back(); - if (!isa(User) || User->use_empty()) - return nullptr; + if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) { + Candidates.push_back(I); + LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n'); + } + getNegatibleInsts(I->getOperand(0), Candidates); + getNegatibleInsts(I->getOperand(1), Candidates); + break; + case Instruction::FDiv: + // Not expecting non-canonical code here. Bail out and wait. + if (match(I->getOperand(0), m_Constant()) && + match(I->getOperand(1), m_Constant())) + break; - unsigned UserOpcode = User->getOpcode(); - if (UserOpcode != Instruction::FAdd && UserOpcode != Instruction::FSub) - return nullptr; + if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) || + (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) { + Candidates.push_back(I); + LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n'); + } + getNegatibleInsts(I->getOperand(0), Candidates); + getNegatibleInsts(I->getOperand(1), Candidates); + break; + default: + break; + } +} - // Subtraction is not commutative. Explicitly, the following transform is - // not valid: (-Constant * y) - x -> x + (Constant * y) - if (!User->isCommutative() && User->getOperand(1) != I) +/// Given an fadd/fsub with an operand that is a one-use instruction +/// (the fadd/fsub), try to change negative floating-point constants into +/// positive constants to increase potential for reassociation and CSE. +Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I, + Instruction *Op, + Value *OtherOp) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub"); + + // Collect instructions with negative FP constants from the subtree that ends + // in Op. + SmallVector Candidates; + getNegatibleInsts(Op, Candidates); + if (Candidates.empty()) return nullptr; // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the // resulting subtract will be broken up later. This can get us into an // infinite loop during reassociation. - if (UserOpcode == Instruction::FAdd && ShouldBreakUpSubtract(User)) + bool IsFSub = I->getOpcode() == Instruction::FSub; + bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1; + if (NeedsSubtract && ShouldBreakUpSubtract(I)) return nullptr; - // Change the sign of the constant. - APFloat Val = CF->getValueAPF(); - Val.changeSign(); - I->setOperand(C0 ? 0 : 1, ConstantFP::get(CF->getContext(), Val)); - - // Canonicalize I to RHS to simplify the next bit of logic. E.g., - // ((-Const*y) + x) -> (x + (-Const*y)). - if (User->getOperand(0) == I && User->isCommutative()) - cast(User)->swapOperands(); - - Value *Op0 = User->getOperand(0); - Value *Op1 = User->getOperand(1); - BinaryOperator *NI; - switch (UserOpcode) { - default: - llvm_unreachable("Unexpected Opcode!"); - case Instruction::FAdd: - NI = BinaryOperator::CreateFSub(Op0, Op1); - NI->setFastMathFlags(cast(User)->getFastMathFlags()); - break; - case Instruction::FSub: - NI = BinaryOperator::CreateFAdd(Op0, Op1); - NI->setFastMathFlags(cast(User)->getFastMathFlags()); - break; + for (Instruction *Negatible : Candidates) { + const APFloat *C; + if (match(Negatible->getOperand(0), m_APFloat(C))) { + assert(!match(Negatible->getOperand(1), m_Constant()) && + "Expecting only 1 constant operand"); + assert(C->isNegative() && "Expected negative FP constant"); + Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C))); + MadeChange = true; + } + if (match(Negatible->getOperand(1), m_APFloat(C))) { + assert(!match(Negatible->getOperand(0), m_Constant()) && + "Expecting only 1 constant operand"); + assert(C->isNegative() && "Expected negative FP constant"); + Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C))); + MadeChange = true; + } } + assert(MadeChange == true && "Negative constant candidate was not changed"); - NI->insertBefore(User); - NI->setName(User->getName()); - User->replaceAllUsesWith(NI); - NI->setDebugLoc(I->getDebugLoc()); + // Negations cancelled out. + if (Candidates.size() % 2 == 0) + return I; + + // Negate the final operand in the expression by flipping the opcode of this + // fadd/fsub. + assert(Candidates.size() % 2 == 1 && "Expected odd number"); + IRBuilder<> Builder(I); + Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I) + : Builder.CreateFSubFMF(OtherOp, Op, I); + I->replaceAllUsesWith(NewInst); RedoInsts.insert(I); - MadeChange = true; - return NI; + return dyn_cast(NewInst); +} + +/// Canonicalize expressions that contain a negative floating-point constant +/// of the following form: +/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree) +/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree) +/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree) +/// +/// The fadd/fsub opcode may be switched to allow folding a negation into the +/// input instruction. +Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) { + LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n'); + Value *X; + Instruction *Op; + if (match(I, m_FAdd(m_Value(X), m_OneUse(m_Instruction(Op))))) + if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) + I = R; + if (match(I, m_FAdd(m_OneUse(m_Instruction(Op)), m_Value(X)))) + if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) + I = R; + if (match(I, m_FSub(m_Value(X), m_OneUse(m_Instruction(Op))))) + if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) + I = R; + return I; } /// Inspect and optimize the given instruction. Note that erasing @@ -2042,16 +2086,16 @@ void ReassociatePass::OptimizeInst(Instruction *I) { I = NI; } - // Canonicalize negative constants out of expressions. - if (Instruction *Res = canonicalizeNegConstExpr(I)) - I = Res; - // Commute binary operators, to canonicalize the order of their operands. // This can potentially expose more CSE opportunities, and makes writing other // transformations simpler. if (I->isCommutative()) canonicalizeOperands(I); + // Canonicalize negative constants out of expressions. + if (Instruction *Res = canonicalizeNegFPConstants(I)) + I = Res; + // Don't optimize floating-point instructions unless they are 'fast'. if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) return; diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index c358258d24cf..48bbdd8d1b33 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -172,8 +172,6 @@ public: bool runOnModule(Module &M) override { bool Changed = false; - const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); for (Function &F : M) { // Nothing to do for declarations. if (F.isDeclaration() || F.empty()) @@ -186,6 +184,8 @@ public: TargetTransformInfo &TTI = getAnalysis().getTTI(F); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); auto &DT = getAnalysis(F).getDomTree(); Changed |= Impl.runOnFunction(F, DT, TTI, TLI); @@ -2530,7 +2530,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, // statepoints surviving this pass. This makes testing easier and the // resulting IR less confusing to human readers. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - bool MadeChange = removeUnreachableBlocks(F, nullptr, &DTU); + bool MadeChange = removeUnreachableBlocks(F, &DTU); // Flush the Dominator Tree. DTU.getDomTree(); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 4093e50ce899..10fbdc8aacd2 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -191,7 +191,7 @@ public: /// class SCCPSolver : public InstVisitor { const DataLayout &DL; - const TargetLibraryInfo *TLI; + std::function GetTLI; SmallPtrSet BBExecutable; // The BBs that are executable. DenseMap ValueState; // The state each value is in. // The state each parameter is in. @@ -268,8 +268,9 @@ public: return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy}; } - SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli) - : DL(DL), TLI(tli) {} + SCCPSolver(const DataLayout &DL, + std::function GetTLI) + : DL(DL), GetTLI(std::move(GetTLI)) {} /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. @@ -1290,7 +1291,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. if (Constant *C = ConstantFoldCall(cast(CS.getInstruction()), F, - Operands, TLI)) { + Operands, &GetTLI(*F))) { // call -> undef. if (isa(C)) return; @@ -1465,7 +1466,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { } LatticeVal &LV = getValueState(&I); - if (!LV.isUnknown()) continue; + if (!LV.isUnknown()) + continue; + + // There are two reasons a call can have an undef result + // 1. It could be tracked. + // 2. It could be constant-foldable. + // Because of the way we solve return values, tracked calls must + // never be marked overdefined in ResolvedUndefsIn. + if (CallSite CS = CallSite(&I)) { + if (Function *F = CS.getCalledFunction()) + if (TrackedRetVals.count(F)) + continue; + + // If the call is constant-foldable, we mark it overdefined because + // we do not know what return values are valid. + markOverdefined(&I); + return true; + } // extractvalue is safe; check here because the argument is a struct. if (isa(I)) @@ -1638,19 +1656,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { case Instruction::Call: case Instruction::Invoke: case Instruction::CallBr: - // There are two reasons a call can have an undef result - // 1. It could be tracked. - // 2. It could be constant-foldable. - // Because of the way we solve return values, tracked calls must - // never be marked overdefined in ResolvedUndefsIn. - if (Function *F = CallSite(&I).getCalledFunction()) - if (TrackedRetVals.count(F)) - break; - - // If the call is constant-foldable, we mark it overdefined because - // we do not know what return values are valid. - markOverdefined(&I); - return true; + llvm_unreachable("Call-like instructions should have be handled early"); default: // If we don't know what should happen here, conservatively mark it // overdefined. @@ -1751,7 +1757,7 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { [](const LatticeVal &LV) { return LV.isOverdefined(); })) return false; std::vector ConstVals; - auto *ST = dyn_cast(V->getType()); + auto *ST = cast(V->getType()); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { LatticeVal V = IVs[i]; ConstVals.push_back(V.isConstant() @@ -1796,7 +1802,8 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { static bool runSCCP(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVM_DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); - SCCPSolver Solver(DL, TLI); + SCCPSolver Solver( + DL, [TLI](Function &F) -> const TargetLibraryInfo & { return *TLI; }); // Mark the first block of the function as being executable. Solver.MarkBlockExecutable(&F.front()); @@ -1891,7 +1898,7 @@ public: return false; const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); return runSCCP(F, DL, TLI); } }; @@ -1924,6 +1931,27 @@ static void findReturnsToZap(Function &F, return; } + assert( + all_of(F.users(), + [&Solver](User *U) { + if (isa(U) && + !Solver.isBlockExecutable(cast(U)->getParent())) + return true; + // Non-callsite uses are not impacted by zapping. Also, constant + // uses (like blockaddresses) could stuck around, without being + // used in the underlying IR, meaning we do not have lattice + // values for them. + if (!CallSite(U)) + return true; + if (U->getType()->isStructTy()) { + return all_of( + Solver.getStructLatticeValueFor(U), + [](const LatticeVal &LV) { return !LV.isOverdefined(); }); + } + return !Solver.getLatticeValueFor(U).isOverdefined(); + }) && + "We can only zap functions where all live users have a concrete value"); + for (BasicBlock &BB : F) { if (CallInst *CI = BB.getTerminatingMustTailCall()) { LLVM_DEBUG(dbgs() << "Can't zap return of the block due to present " @@ -1974,9 +2002,10 @@ static void forceIndeterminateEdge(Instruction* I, SCCPSolver &Solver) { } bool llvm::runIPSCCP( - Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI, + Module &M, const DataLayout &DL, + std::function GetTLI, function_ref getAnalysis) { - SCCPSolver Solver(DL, TLI); + SCCPSolver Solver(DL, GetTLI); // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 33f90d0b01e4..74b8ff913050 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -959,14 +959,16 @@ private: std::tie(UsedI, I) = Uses.pop_back_val(); if (LoadInst *LI = dyn_cast(I)) { - Size = std::max(Size, DL.getTypeStoreSize(LI->getType())); + Size = std::max(Size, + DL.getTypeStoreSize(LI->getType()).getFixedSize()); continue; } if (StoreInst *SI = dyn_cast(I)) { Value *Op = SI->getOperand(0); if (Op == UsedI) return SI; - Size = std::max(Size, DL.getTypeStoreSize(Op->getType())); + Size = std::max(Size, + DL.getTypeStoreSize(Op->getType()).getFixedSize()); continue; } @@ -1197,7 +1199,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { // TODO: Allow recursive phi users. // TODO: Allow stores. BasicBlock *BB = PN.getParent(); - unsigned MaxAlign = 0; + MaybeAlign MaxAlign; uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType()); APInt MaxSize(APWidth, 0); bool HaveLoad = false; @@ -1218,8 +1220,8 @@ static bool isSafePHIToSpeculate(PHINode &PN) { if (BBI->mayWriteToMemory()) return false; - uint64_t Size = DL.getTypeStoreSizeInBits(LI->getType()); - MaxAlign = std::max(MaxAlign, LI->getAlignment()); + uint64_t Size = DL.getTypeStoreSize(LI->getType()); + MaxAlign = std::max(MaxAlign, MaybeAlign(LI->getAlignment())); MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize; HaveLoad = true; } @@ -1266,11 +1268,11 @@ static void speculatePHINodeLoads(PHINode &PN) { PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); - // Get the AA tags and alignment to use from one of the loads. It doesn't + // Get the AA tags and alignment to use from one of the loads. It does not // matter which one we get and if any differ. AAMDNodes AATags; SomeLoad->getAAMetadata(AATags); - unsigned Align = SomeLoad->getAlignment(); + const MaybeAlign Align = MaybeAlign(SomeLoad->getAlignment()); // Rewrite all loads of the PN to use the new PHI. while (!PN.use_empty()) { @@ -1338,11 +1340,11 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) { // Both operands to the select need to be dereferenceable, either // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. - if (!isSafeToLoadUnconditionally(TValue, LI->getType(), LI->getAlignment(), - DL, LI)) + if (!isSafeToLoadUnconditionally(TValue, LI->getType(), + MaybeAlign(LI->getAlignment()), DL, LI)) return false; - if (!isSafeToLoadUnconditionally(FValue, LI->getType(), LI->getAlignment(), - DL, LI)) + if (!isSafeToLoadUnconditionally(FValue, LI->getType(), + MaybeAlign(LI->getAlignment()), DL, LI)) return false; } @@ -1368,8 +1370,8 @@ static void speculateSelectInstLoads(SelectInst &SI) { NumLoadsSpeculated += 2; // Transfer alignment and AA info if present. - TL->setAlignment(LI->getAlignment()); - FL->setAlignment(LI->getAlignment()); + TL->setAlignment(MaybeAlign(LI->getAlignment())); + FL->setAlignment(MaybeAlign(LI->getAlignment())); AAMDNodes Tags; LI->getAAMetadata(Tags); @@ -1888,6 +1890,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { bool HaveCommonEltTy = true; auto CheckCandidateType = [&](Type *Ty) { if (auto *VTy = dyn_cast(Ty)) { + // Return if bitcast to vectors is different for total size in bits. + if (!CandidateTys.empty()) { + VectorType *V = CandidateTys[0]; + if (DL.getTypeSizeInBits(VTy) != DL.getTypeSizeInBits(V)) { + CandidateTys.clear(); + return; + } + } CandidateTys.push_back(VTy); if (!CommonEltTy) CommonEltTy = VTy->getElementType(); @@ -3110,7 +3120,7 @@ private: unsigned LoadAlign = LI->getAlignment(); if (!LoadAlign) LoadAlign = DL.getABITypeAlignment(LI->getType()); - LI->setAlignment(std::min(LoadAlign, getSliceAlign())); + LI->setAlignment(MaybeAlign(std::min(LoadAlign, getSliceAlign()))); continue; } if (StoreInst *SI = dyn_cast(I)) { @@ -3119,7 +3129,7 @@ private: Value *Op = SI->getOperand(0); StoreAlign = DL.getABITypeAlignment(Op->getType()); } - SI->setAlignment(std::min(StoreAlign, getSliceAlign())); + SI->setAlignment(MaybeAlign(std::min(StoreAlign, getSliceAlign()))); continue; } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 869cf00e0a89..1d2e40bf62be 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -79,6 +79,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopVersioningLICMPass(Registry); initializeLoopIdiomRecognizeLegacyPassPass(Registry); initializeLowerAtomicLegacyPassPass(Registry); + initializeLowerConstantIntrinsicsPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeLowerGuardIntrinsicLegacyPassPass(Registry); initializeLowerWidenableConditionLegacyPassPass(Registry); @@ -123,6 +124,10 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createAggressiveDCEPass()); } +void LLVMAddDCEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadCodeEliminationPass()); +} + void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createBitTrackingDCEPass()); } @@ -280,6 +285,10 @@ void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createBasicAAWrapperPass()); } +void LLVMAddLowerConstantIntrinsicsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerConstantIntrinsicsPass()); +} + void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLowerExpectIntrinsicPass()); } diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index f6a12fb13142..41554fccdf08 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1121,7 +1121,7 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { DT = &getAnalysis().getDomTree(); SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI(F); bool Changed = false; for (BasicBlock &B : F) { for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;) diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index aeac6f548b32..ac832b9b4567 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1909,7 +1909,7 @@ static void unswitchNontrivialInvariants( // We can only unswitch switches, conditional branches with an invariant // condition, or combining invariant conditions with an instruction. - assert((SI || BI->isConditional()) && + assert((SI || (BI && BI->isConditional())) && "Can only unswitch switches and conditional branch!"); bool FullUnswitch = SI || BI->getCondition() == Invariants[0]; if (FullUnswitch) @@ -2141,17 +2141,21 @@ static void unswitchNontrivialInvariants( buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); + + if (MSSAU) { + DT.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // Perform MSSA cloning updates. + for (auto &VMap : VMaps) + MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap, + /*IgnoreIncomingWithNoClones=*/true); + MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT); + } } // Apply the updates accumulated above to get an up-to-date dominator tree. DT.applyUpdates(DTUpdates); - if (!FullUnswitch && MSSAU) { - // Update MSSA for partial unswitch, after DT update. - SmallVector Updates; - Updates.push_back( - {cfg::UpdateKind::Insert, SplitBB, ClonedPHs.begin()->second}); - MSSAU->applyInsertUpdates(Updates, DT); - } // Now that we have an accurate dominator tree, first delete the dead cloned // blocks so that we can accurately build any cloned loops. It is important to @@ -2720,7 +2724,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, return Cost * (SuccessorsCount - 1); }; Instruction *BestUnswitchTI = nullptr; - int BestUnswitchCost; + int BestUnswitchCost = 0; ArrayRef BestUnswitchInvariants; for (auto &TerminatorAndInvariants : UnswitchCandidates) { Instruction &TI = *TerminatorAndInvariants.first; @@ -2752,6 +2756,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, BestUnswitchInvariants = Invariants; } } + assert(BestUnswitchTI && "Failed to find loop unswitch candidate"); if (BestUnswitchCost >= UnswitchThreshold) { LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " @@ -2880,7 +2885,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast)); auto PA = getLoopPassPreservedAnalyses(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp index c13fb3e04516..e6db11f47ead 100644 --- a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -777,8 +777,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl &PNs, // speculation if the predecessor is an invoke. This doesn't seem // fundamental and we should probably be splitting critical edges // differently. - if (isa(PredBB->getTerminator()) || - isa(PredBB->getTerminator())) { + const auto *TermInst = PredBB->getTerminator(); + if (isa(TermInst) || + isa(TermInst) || + isa(TermInst)) { LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: " << PredBB->getName() << "\n"); return false; diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index e5400676c7e8..9791cf41f621 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -65,7 +65,7 @@ static cl::opt ForceSkipUniformRegions( static cl::opt RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, cl::desc("Allow relaxed uniform region checks"), - cl::init(false)); + cl::init(true)); // Definition of the complex types used in this pass. diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index f0b79079d817..b27a36b67d62 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -341,7 +341,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { const DataLayout &DL = L->getModule()->getDataLayout(); if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) || !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(), - L->getAlignment(), DL, L)) + MaybeAlign(L->getAlignment()), DL, L)) return false; } } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 5fa371377c85..d85cc40c372a 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -170,7 +170,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, - MemoryDependenceResults *MemDep) { + MemoryDependenceResults *MemDep, + bool PredecessorWithTwoSuccessors) { if (BB->hasAddressTaken()) return false; @@ -185,9 +186,24 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, return false; // Can't merge if there are multiple distinct successors. - if (PredBB->getUniqueSuccessor() != BB) + if (!PredecessorWithTwoSuccessors && PredBB->getUniqueSuccessor() != BB) return false; + // Currently only allow PredBB to have two predecessors, one being BB. + // Update BI to branch to BB's only successor instead of BB. + BranchInst *PredBB_BI; + BasicBlock *NewSucc = nullptr; + unsigned FallThruPath; + if (PredecessorWithTwoSuccessors) { + if (!(PredBB_BI = dyn_cast(PredBB->getTerminator()))) + return false; + BranchInst *BB_JmpI = dyn_cast(BB->getTerminator()); + if (!BB_JmpI || !BB_JmpI->isUnconditional()) + return false; + NewSucc = BB_JmpI->getSuccessor(0); + FallThruPath = PredBB_BI->getSuccessor(0) == BB ? 0 : 1; + } + // Can't merge if there is PHI loop. for (PHINode &PN : BB->phis()) for (Value *IncValue : PN.incoming_values()) @@ -227,18 +243,39 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, Updates.push_back({DominatorTree::Delete, PredBB, BB}); } - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin())); + Instruction *PTI = PredBB->getTerminator(); + Instruction *STI = BB->getTerminator(); + Instruction *Start = &*BB->begin(); + // If there's nothing to move, mark the starting instruction as the last + // instruction in the block. + if (Start == STI) + Start = PTI; + + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PTI->getIterator(), BB->getInstList(), + BB->begin(), STI->getIterator()); - // Delete the unconditional branch from the predecessor... - PredBB->getInstList().pop_back(); + if (MSSAU) + MSSAU->moveAllAfterMergeBlocks(BB, PredBB, Start); // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(PredBB); - // Move all definitions in the successor to the predecessor... - PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + if (PredecessorWithTwoSuccessors) { + // Delete the unconditional branch from BB. + BB->getInstList().pop_back(); + + // Update branch in the predecessor. + PredBB_BI->setSuccessor(FallThruPath, NewSucc); + } else { + // Delete the unconditional branch from the predecessor. + PredBB->getInstList().pop_back(); + + // Move terminator instruction. + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + } + // Add unreachable to now empty BB. new UnreachableInst(BB->getContext(), BB); // Eliminate duplicate dbg.values describing the entry PHI node post-splice. @@ -274,11 +311,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, "applying corresponding DTU updates."); DTU->applyUpdatesPermissive(Updates); DTU->deleteBB(BB); - } - - else { + } else { BB->eraseFromParent(); // Nuke BB if DTU is nullptr. } + return true; } @@ -365,11 +401,13 @@ llvm::SplitAllCriticalEdges(Function &F, BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, const Twine &BBName) { BasicBlock::iterator SplitIt = SplitPt->getIterator(); while (isa(SplitIt) || SplitIt->isEHPad()) ++SplitIt; - BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); + std::string Name = BBName.str(); + BasicBlock *New = Old->splitBasicBlock( + SplitIt, Name.empty() ? Old->getName() + ".split" : Name); // The new block lives in whichever loop the old one did. This preserves // LCSSA as well, because we force the split point to be after any PHI nodes. diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 27f110e24f9c..71316ce8f758 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -88,6 +88,14 @@ static bool setDoesNotCapture(Function &F, unsigned ArgNo) { return true; } +static bool setDoesNotAlias(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoAlias)) + return false; + F.addParamAttr(ArgNo, Attribute::NoAlias); + ++NumNoAlias; + return true; +} + static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) return false; @@ -175,6 +183,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_strcpy: case LibFunc_strncpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + LLVM_FALLTHROUGH; case LibFunc_strcat: case LibFunc_strncat: Changed |= setReturnedArg(F, 0); @@ -249,12 +260,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_sprintf: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_snprintf: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); return Changed; @@ -291,11 +304,23 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memcpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + Changed |= setReturnedArg(F, 0); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; case LibFunc_memmove: Changed |= setReturnedArg(F, 0); - LLVM_FALLTHROUGH; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; case LibFunc_mempcpy: case LibFunc_memccpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); @@ -760,9 +785,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { } } -bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { +bool llvm::hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) { switch (Ty->getTypeID()) { case Type::HalfTyID: return false; @@ -775,10 +799,10 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, } } -StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { - assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && +StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn) { + assert(hasFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && "Cannot get name for unavailable function!"); switch (Ty->getTypeID()) { @@ -827,6 +851,12 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI); } +Value *llvm::emitStrDup(Value *Ptr, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(), + castToCStr(Ptr, B), B, TLI); +} + Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetLibraryInfo *TLI) { Type *I8Ptr = B.getInt8PtrTy(); @@ -1045,24 +1075,28 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, LibFunc LongDoubleFn, IRBuilder<> &B, const AttributeList &Attrs) { // Get the name of the function according to TLI. - StringRef Name = getUnaryFloatFn(TLI, Op->getType(), - DoubleFn, FloatFn, LongDoubleFn); + StringRef Name = getFloatFnName(TLI, Op->getType(), + DoubleFn, FloatFn, LongDoubleFn); return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs); } -Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, - IRBuilder<> &B, const AttributeList &Attrs) { +static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2, + StringRef Name, IRBuilder<> &B, + const AttributeList &Attrs) { assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); - SmallString<20> NameBuffer; - appendTypeSuffix(Op1, Name, NameBuffer); - Module *M = B.GetInsertBlock()->getModule(); - FunctionCallee Callee = M->getOrInsertFunction( - Name, Op1->getType(), Op1->getType(), Op2->getType()); - CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); - CI->setAttributes(Attrs); + FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(), + Op1->getType(), Op2->getType()); + CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes(Attrs.removeAttribute(B.getContext(), + AttributeList::FunctionIndex, + Attribute::Speculatable)); if (const Function *F = dyn_cast(Callee.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1070,6 +1104,28 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, return CI; } +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, + IRBuilder<> &B, const AttributeList &Attrs) { + assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); + + SmallString<20> NameBuffer; + appendTypeSuffix(Op1, Name, NameBuffer); + + return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs); +} + +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn, IRBuilder<> &B, + const AttributeList &Attrs) { + // Get the name of the function according to TLI. + StringRef Name = getFloatFnName(TLI, Op1->getType(), + DoubleFn, FloatFn, LongDoubleFn); + + return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs); +} + Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc_putchar)) diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index df299f673f65..9a6761040bd8 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -448,13 +448,17 @@ bool llvm::bypassSlowDivision(BasicBlock *BB, DivCacheTy PerBBDivCache; bool MadeChange = false; - Instruction* Next = &*BB->begin(); + Instruction *Next = &*BB->begin(); while (Next != nullptr) { // We may add instructions immediately after I, but we want to skip over // them. - Instruction* I = Next; + Instruction *I = Next; Next = Next->getNextNode(); + // Ignore dead code to save time and avoid bugs. + if (I->hasNUses(0)) + continue; + FastDivInsertionTask Task(I, BypassWidths); if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { I->replaceAllUsesWith(Replacement); diff --git a/lib/Transforms/Utils/CanonicalizeAliases.cpp b/lib/Transforms/Utils/CanonicalizeAliases.cpp index 455fcbb1cf98..3c7c8d872595 100644 --- a/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1026c9d37038..75e8963303c2 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -210,6 +210,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, RemapInstruction(&II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); + + // Register all DICompileUnits of the old parent module in the new parent module + auto* OldModule = OldFunc->getParent(); + auto* NewModule = NewFunc->getParent(); + if (OldModule && NewModule && OldModule != NewModule && DIFinder.compile_unit_count()) { + auto* NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + // Avoid multiple insertions of the same DICompileUnit to NMD. + SmallPtrSet Visited; + for (auto* Operand : NMD->operands()) + Visited.insert(Operand); + for (auto* Unit : DIFinder.compile_units()) + // VMap.MD()[Unit] == Unit + if (Visited.insert(Unit).second) + NMD->addOperand(Unit); + } } /// Return a copy of the specified function and add it to that function's diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 7ddf59becba9..2c8c3abb2922 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -48,7 +48,7 @@ std::unique_ptr llvm::CloneModule( function_ref ShouldCloneDefinition) { // First off, we need to create the new module. std::unique_ptr New = - llvm::make_unique(M.getModuleIdentifier(), M.getContext()); + std::make_unique(M.getModuleIdentifier(), M.getContext()); New->setSourceFileName(M.getSourceFileName()); New->setDataLayout(M.getDataLayout()); New->setTargetTriple(M.getTargetTriple()); @@ -181,13 +181,25 @@ std::unique_ptr llvm::CloneModule( } // And named metadata.... + const auto* LLVM_DBG_CU = M.getNamedMetadata("llvm.dbg.cu"); for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), E = M.named_metadata_end(); I != E; ++I) { const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); - for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); + if (&NMD == LLVM_DBG_CU) { + // Do not insert duplicate operands. + SmallPtrSet Visited; + for (const auto* Operand : NewNMD->operands()) + Visited.insert(Operand); + for (const auto* Operand : NMD.operands()) { + auto* MappedOperand = MapMetadata(Operand, VMap); + if (Visited.insert(MappedOperand).second) + NewNMD->addOperand(MappedOperand); + } + } else + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); } return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index fa6d3f8ae873..0298ff9a395f 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -293,10 +293,8 @@ static BasicBlock *getCommonExitBlock(const SetVector &Blocks) { CommonExitBlock = Succ; continue; } - if (CommonExitBlock == Succ) - continue; - - return true; + if (CommonExitBlock != Succ) + return true; } return false; }; @@ -307,52 +305,79 @@ static BasicBlock *getCommonExitBlock(const SetVector &Blocks) { return CommonExitBlock; } -bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( - Instruction *Addr) const { - AllocaInst *AI = cast(Addr->stripInBoundsConstantOffsets()); - Function *Func = (*Blocks.begin())->getParent(); - for (BasicBlock &BB : *Func) { - if (Blocks.count(&BB)) - continue; - for (Instruction &II : BB) { - if (isa(II)) - continue; +CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) { + for (BasicBlock &BB : F) { + for (Instruction &II : BB.instructionsWithoutDebug()) + if (auto *AI = dyn_cast(&II)) + Allocas.push_back(AI); - unsigned Opcode = II.getOpcode(); - Value *MemAddr = nullptr; - switch (Opcode) { - case Instruction::Store: - case Instruction::Load: { - if (Opcode == Instruction::Store) { - StoreInst *SI = cast(&II); - MemAddr = SI->getPointerOperand(); - } else { - LoadInst *LI = cast(&II); - MemAddr = LI->getPointerOperand(); - } - // Global variable can not be aliased with locals. - if (dyn_cast(MemAddr)) - break; - Value *Base = MemAddr->stripInBoundsConstantOffsets(); - if (!isa(Base) || Base == AI) - return false; + findSideEffectInfoForBlock(BB); + } +} + +void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) { + for (Instruction &II : BB.instructionsWithoutDebug()) { + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (dyn_cast(MemAddr)) break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!isa(Base)) { + SideEffectingBlocks.insert(&BB); + return; } - default: { - IntrinsicInst *IntrInst = dyn_cast(&II); - if (IntrInst) { - if (IntrInst->isLifetimeStartOrEnd()) - break; - return false; - } - // Treat all the other cases conservatively if it has side effects. - if (II.mayHaveSideEffects()) - return false; + BaseMemAddrs[&BB].insert(Base); + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast(&II); + if (IntrInst) { + if (IntrInst->isLifetimeStartOrEnd()) + break; + SideEffectingBlocks.insert(&BB); + return; } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) { + SideEffectingBlocks.insert(&BB); + return; } } + } } +} +bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr( + BasicBlock &BB, AllocaInst *Addr) const { + if (SideEffectingBlocks.count(&BB)) + return true; + auto It = BaseMemAddrs.find(&BB); + if (It != BaseMemAddrs.end()) + return It->second.count(Addr); + return false; +} + +bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( + const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const { + AllocaInst *AI = cast(Addr->stripInBoundsConstantOffsets()); + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + if (CEAC.doesBlockContainClobberOfAddr(BB, AI)) + return false; + } return true; } @@ -415,7 +440,8 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { // outline region. If there are not other untracked uses of the address, return // the pair of markers if found; otherwise return a pair of nullptr. CodeExtractor::LifetimeMarkerInfo -CodeExtractor::getLifetimeMarkers(Instruction *Addr, +CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *Addr, BasicBlock *ExitBlock) const { LifetimeMarkerInfo Info; @@ -447,7 +473,7 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr, Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd); // Do legality check. if ((Info.SinkLifeStart || Info.HoistLifeEnd) && - !isLegalToShrinkwrapLifetimeMarkers(Addr)) + !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr)) return {}; // Check to see if we have a place to do hoisting, if not, bail. @@ -457,7 +483,8 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr, return Info; } -void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, +void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, + ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); @@ -478,74 +505,104 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, return true; }; - for (BasicBlock &BB : *Func) { - if (Blocks.count(&BB)) + // Look up allocas in the original function in CodeExtractorAnalysisCache, as + // this is much faster than walking all the instructions. + for (AllocaInst *AI : CEAC.getAllocas()) { + BasicBlock *BB = AI->getParent(); + if (Blocks.count(BB)) continue; - for (Instruction &II : BB) { - auto *AI = dyn_cast(&II); - if (!AI) - continue; - LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock); - bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); - if (Moved) { - LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); - SinkCands.insert(AI); - continue; - } + // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca, + // check whether it is actually still in the original function. + Function *AIFunc = BB->getParent(); + if (AIFunc != Func) + continue; - // Follow any bitcasts. - SmallVector Bitcasts; - SmallVector BitcastLifetimeInfo; - for (User *U : AI->users()) { - if (U->stripInBoundsConstantOffsets() == AI) { - Instruction *Bitcast = cast(U); - LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock); - if (LMI.LifeStart) { - Bitcasts.push_back(Bitcast); - BitcastLifetimeInfo.push_back(LMI); - continue; - } - } + LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock); + bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); + if (Moved) { + LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); + SinkCands.insert(AI); + continue; + } - // Found unknown use of AI. - if (!definedInRegion(Blocks, U)) { - Bitcasts.clear(); - break; + // Follow any bitcasts. + SmallVector Bitcasts; + SmallVector BitcastLifetimeInfo; + for (User *U : AI->users()) { + if (U->stripInBoundsConstantOffsets() == AI) { + Instruction *Bitcast = cast(U); + LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock); + if (LMI.LifeStart) { + Bitcasts.push_back(Bitcast); + BitcastLifetimeInfo.push_back(LMI); + continue; } } - // Either no bitcasts reference the alloca or there are unknown uses. - if (Bitcasts.empty()) - continue; + // Found unknown use of AI. + if (!definedInRegion(Blocks, U)) { + Bitcasts.clear(); + break; + } + } - LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); - SinkCands.insert(AI); - for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { - Instruction *BitcastAddr = Bitcasts[I]; - const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; - assert(LMI.LifeStart && - "Unsafe to sink bitcast without lifetime markers"); - moveOrIgnoreLifetimeMarkers(LMI); - if (!definedInRegion(Blocks, BitcastAddr)) { - LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr - << "\n"); - SinkCands.insert(BitcastAddr); - } + // Either no bitcasts reference the alloca or there are unknown uses. + if (Bitcasts.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + SinkCands.insert(AI); + for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { + Instruction *BitcastAddr = Bitcasts[I]; + const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; + assert(LMI.LifeStart && + "Unsafe to sink bitcast without lifetime markers"); + moveOrIgnoreLifetimeMarkers(LMI); + if (!definedInRegion(Blocks, BitcastAddr)) { + LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr + << "\n"); + SinkCands.insert(BitcastAddr); } } } } +bool CodeExtractor::isEligible() const { + if (Blocks.empty()) + return false; + BasicBlock *Header = *Blocks.begin(); + Function *F = Header->getParent(); + + // For functions with varargs, check that varargs handling is only done in the + // outlined function, i.e vastart and vaend are only used in outlined blocks. + if (AllowVarArgs && F->getFunctionType()->isVarArg()) { + auto containsVarArgIntrinsic = [](const Instruction &I) { + if (const CallInst *CI = dyn_cast(&I)) + if (const Function *Callee = CI->getCalledFunction()) + return Callee->getIntrinsicID() == Intrinsic::vastart || + Callee->getIntrinsicID() == Intrinsic::vaend; + return false; + }; + + for (auto &BB : *F) { + if (Blocks.count(&BB)) + continue; + if (llvm::any_of(BB, containsVarArgIntrinsic)) + return false; + } + } + return true; +} + void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &SinkCands) const { for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { - for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) { - Value *V = *OI; + for (auto &OI : II.operands()) { + Value *V = OI; if (!SinkCands.count(V) && definedInCaller(Blocks, V)) Inputs.insert(V); } @@ -904,12 +961,12 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // within the new function. This must be done before we lose track of which // blocks were originally in the code region. std::vector Users(header->user_begin(), header->user_end()); - for (unsigned i = 0, e = Users.size(); i != e; ++i) + for (auto &U : Users) // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block - if (Instruction *I = dyn_cast(Users[i])) - if (I->isTerminator() && !Blocks.count(I->getParent()) && - I->getParent()->getParent() == oldFunction) + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) I->replaceUsesOfWith(header, newHeader); return newFunction; @@ -1277,13 +1334,6 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { // Insert this basic block into the new function newBlocks.push_back(Block); - - // Remove @llvm.assume calls that were moved to the new function from the - // old function's assumption cache. - if (AC) - for (auto &I : *Block) - if (match(&I, m_Intrinsic())) - AC->unregisterAssumption(cast(&I)); } } @@ -1332,7 +1382,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights( MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } -Function *CodeExtractor::extractCodeRegion() { +Function * +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { if (!isEligible()) return nullptr; @@ -1341,27 +1392,6 @@ Function *CodeExtractor::extractCodeRegion() { BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); - // For functions with varargs, check that varargs handling is only done in the - // outlined function, i.e vastart and vaend are only used in outlined blocks. - if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) { - auto containsVarArgIntrinsic = [](Instruction &I) { - if (const CallInst *CI = dyn_cast(&I)) - if (const Function *F = CI->getCalledFunction()) - return F->getIntrinsicID() == Intrinsic::vastart || - F->getIntrinsicID() == Intrinsic::vaend; - return false; - }; - - for (auto &BB : *oldFunction) { - if (Blocks.count(&BB)) - continue; - if (llvm::any_of(BB, containsVarArgIntrinsic)) - return nullptr; - } - } - ValueSet inputs, outputs, SinkingCands, HoistingCands; - BasicBlock *CommonExit = nullptr; - // Calculate the entry frequency of the new function before we change the root // block. BlockFrequency EntryFreq; @@ -1375,6 +1405,15 @@ Function *CodeExtractor::extractCodeRegion() { } } + if (AC) { + // Remove @llvm.assume calls that were moved to the new function from the + // old function's assumption cache. + for (BasicBlock *Block : Blocks) + for (auto &I : *Block) + if (match(&I, m_Intrinsic())) + AC->unregisterAssumption(cast(&I)); + } + // If we have any return instructions in the region, split those blocks so // that the return is not in the region. splitReturnBlocks(); @@ -1428,7 +1467,9 @@ Function *CodeExtractor::extractCodeRegion() { } newFuncRoot->getInstList().push_back(BranchI); - findAllocas(SinkingCands, HoistingCands, CommonExit); + ValueSet inputs, outputs, SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; + findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); // Find inputs to, outputs from the code region. @@ -1563,5 +1604,17 @@ Function *CodeExtractor::extractCodeRegion() { }); LLVM_DEBUG(if (verifyFunction(*oldFunction)) report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, AC)) + report_fatal_error("Stale Asumption cache for old Function!")); return newFunction; } + +bool CodeExtractor::verifyAssumptionCache(const Function& F, + AssumptionCache *AC) { + for (auto AssumeVH : AC->assumptions()) { + CallInst *I = cast(AssumeVH); + if (I->getFunction() != &F) + return true; + } + return false; +} diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp index 4aa40eeadda4..57e2ff0251a9 100644 --- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -24,7 +24,7 @@ static void insertCall(Function &CurFn, StringRef Func, if (Func == "mcount" || Func == ".mcount" || - Func == "\01__gnu_mcount_nc" || + Func == "llvm.arm.gnu.eabi.mcount" || Func == "\01_mcount" || Func == "\01mcount" || Func == "__mcount" || diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index 0e203f4e075d..ad36790b8c6a 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -469,7 +469,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; // Cannot handle array allocs. } Type *Ty = AI->getAllocatedType(); - AllocaTmps.push_back(llvm::make_unique( + AllocaTmps.push_back(std::make_unique( Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal, AI->getType()->getPointerAddressSpace())); diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 0c52e6f3703b..893f23eb6048 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -67,7 +67,7 @@ public: /// Before: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 -/// br i1 %cmp1, label %if.then, label %lor.rhs +/// br i1 %cmp10, label %if.then, label %lor.rhs /// /// lor.rhs: /// ...... @@ -251,8 +251,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { bool EverChanged = false; for (; CurrBlock != FirstCondBlock; CurrBlock = CurrBlock->getSinglePredecessor()) { - BranchInst *BI = dyn_cast(CurrBlock->getTerminator()); - CmpInst *CI = dyn_cast(BI->getCondition()); + auto *BI = cast(CurrBlock->getTerminator()); + auto *CI = dyn_cast(BI->getCondition()); if (!CI) continue; @@ -278,7 +278,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { // Do the transformation. BasicBlock *CB; - BranchInst *PBI = dyn_cast(FirstCondBlock->getTerminator()); + BranchInst *PBI = cast(FirstCondBlock->getTerminator()); bool Iteration = true; IRBuilder<>::InsertPointGuard Guard(Builder); Value *PC = PBI->getCondition(); @@ -444,7 +444,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { FirstEntryBlock->getInstList().pop_back(); FirstEntryBlock->getInstList() .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); - BranchInst *PBI = dyn_cast(FirstEntryBlock->getTerminator()); + BranchInst *PBI = cast(FirstEntryBlock->getTerminator()); Value *CC = PBI->getCondition(); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); @@ -453,6 +453,16 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { PBI->replaceUsesOfWith(CC, NC); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + // Handle PHI node to replace its predecessors to FirstEntryBlock. + for (BasicBlock *Succ : successors(PBI)) { + for (PHINode &Phi : Succ->phis()) { + for (unsigned i = 0, e = Phi.getNumIncomingValues(); i != e; ++i) { + if (Phi.getIncomingBlock(i) == SecondEntryBlock) + Phi.setIncomingBlock(i, FirstEntryBlock); + } + } + } + // Remove IfTrue1 if (IfTrue1 != FirstEntryBlock) { IfTrue1->dropAllReferences(); diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index c9cc0990f237..76b4635ad501 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -210,7 +210,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { if (Function *F = dyn_cast(&GV)) { if (!F->isDeclaration()) { for (auto &S : VI.getSummaryList()) { - FunctionSummary *FS = dyn_cast(S->getBaseObject()); + auto *FS = cast(S->getBaseObject()); if (FS->modulePath() == M.getModuleIdentifier()) { F->setEntryCount(Function::ProfileCount(FS->entryCount(), Function::PCT_Synthetic)); diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp index 8041e66e6c4c..ea93f99d69e3 100644 --- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -25,8 +25,8 @@ ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { auto &ValueLookup = NodesMap[F.getName()]; if (!ValueLookup) { - ValueLookup = llvm::make_unique(); - ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr; + ValueLookup = std::make_unique(); + ValueLookup->Imported = F.hasMetadata("thinlto_src_module"); } return *ValueLookup; } @@ -64,7 +64,7 @@ void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { if (F.isDeclaration()) continue; AllFunctions++; - ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr); + ImportedFunctions += int(F.hasMetadata("thinlto_src_module")); } } static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index 8c67d1dc6eb3..ed28fffc22b5 100644 --- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -533,7 +533,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI, } bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto *DTWP = getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; return runImpl(F, TLI, DT); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 39b6b889f91c..5bcd05757ec1 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -324,8 +324,14 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, Value *Address = IBI->getAddress(); IBI->eraseFromParent(); if (DeleteDeadConditions) + // Delete pointer cast instructions. RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); + // Also zap the blockaddress constant if there are no users remaining, + // otherwise the destination is still marked as having its address taken. + if (BA->use_empty()) + BA->destroyConstant(); + // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an // 'unreachable' instruction. @@ -633,17 +639,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, // Control Flow Graph Restructuring. // -/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this -/// method is called when we're about to delete Pred as a predecessor of BB. If -/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. -/// -/// Unlike the removePredecessor method, this attempts to simplify uses of PHI -/// nodes that collapse into identity values. For example, if we have: -/// x = phi(1, 0, 0, 0) -/// y = and x, z -/// -/// .. and delete the predecessor corresponding to the '1', this will attempt to -/// recursively fold the and to 0. void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU) { // This only adjusts blocks with PHI nodes. @@ -672,10 +667,6 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}}); } -/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its -/// predecessor is known to have one successor (DestBB!). Eliminate the edge -/// between them, moving the instructions in the predecessor into DestBB and -/// deleting the predecessor block. void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DomTreeUpdater *DTU) { @@ -755,15 +746,14 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, } } -/// CanMergeValues - Return true if we can choose one of these values to use -/// in place of the other. Note that we will always choose the non-undef -/// value to keep. +/// Return true if we can choose one of these values to use in place of the +/// other. Note that we will always choose the non-undef value to keep. static bool CanMergeValues(Value *First, Value *Second) { return First == Second || isa(First) || isa(Second); } -/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an -/// almost-empty BB ending in an unconditional branch to Succ, into Succ. +/// Return true if we can fold BB, an almost-empty BB ending in an unconditional +/// branch to Succ, into Succ. /// /// Assumption: Succ is the single successor for BB. static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { @@ -956,11 +946,6 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, replaceUndefValuesInPhi(PN, IncomingValues); } -/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an -/// unconditional branch, and contains no instructions other than PHI nodes, -/// potential side-effect free intrinsics and the branch. If possible, -/// eliminate BB by rewriting all the predecessors to branch to the successor -/// block and return true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU) { assert(BB != &BB->getParent()->getEntryBlock() && @@ -1088,10 +1073,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, return true; } -/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI -/// nodes in this block. This doesn't try to be clever about PHI nodes -/// which differ only in the order of the incoming values, but instcombine -/// orders them so it usually won't matter. bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for @@ -1151,10 +1132,10 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// often possible though. If alignment is important, a more reliable approach /// is to simply align all global variables and allocation instructions to /// their preferred alignment from the beginning. -static unsigned enforceKnownAlignment(Value *V, unsigned Align, +static unsigned enforceKnownAlignment(Value *V, unsigned Alignment, unsigned PrefAlign, const DataLayout &DL) { - assert(PrefAlign > Align); + assert(PrefAlign > Alignment); V = V->stripPointerCasts(); @@ -1165,36 +1146,36 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // stripPointerCasts recurses through infinite layers of bitcasts, // while computeKnownBits is not allowed to traverse more than 6 // levels. - Align = std::max(AI->getAlignment(), Align); - if (PrefAlign <= Align) - return Align; + Alignment = std::max(AI->getAlignment(), Alignment); + if (PrefAlign <= Alignment) + return Alignment; // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. - if (DL.exceedsNaturalStackAlignment(PrefAlign)) - return Align; - AI->setAlignment(PrefAlign); + if (DL.exceedsNaturalStackAlignment(Align(PrefAlign))) + return Alignment; + AI->setAlignment(MaybeAlign(PrefAlign)); return PrefAlign; } if (auto *GO = dyn_cast(V)) { // TODO: as above, this shouldn't be necessary. - Align = std::max(GO->getAlignment(), Align); - if (PrefAlign <= Align) - return Align; + Alignment = std::max(GO->getAlignment(), Alignment); + if (PrefAlign <= Alignment) + return Alignment; // If there is a large requested alignment and we can, bump up the alignment // of the global. If the memory we set aside for the global may not be the // memory used by the final program then it is impossible for us to reliably // enforce the preferred alignment. if (!GO->canIncreaseAlignment()) - return Align; + return Alignment; - GO->setAlignment(PrefAlign); + GO->setAlignment(MaybeAlign(PrefAlign)); return PrefAlign; } - return Align; + return Alignment; } unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, @@ -1397,7 +1378,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, /// Determine whether this alloca is either a VLA or an array. static bool isArray(AllocaInst *AI) { return AI->isArrayAllocation() || - AI->getType()->getElementType()->isArrayTy(); + (AI->getAllocatedType() && AI->getAllocatedType()->isArrayTy()); +} + +/// Determine whether this alloca is a structure. +static bool isStructure(AllocaInst *AI) { + return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy(); } /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set @@ -1422,7 +1408,7 @@ bool llvm::LowerDbgDeclare(Function &F) { // stored on the stack, while the dbg.declare can only describe // the stack slot (and at a lexical-scope granularity). Later // passes will attempt to elide the stack slot. - if (!AI || isArray(AI)) + if (!AI || isArray(AI) || isStructure(AI)) continue; // A volatile load/store means that the alloca can't be elided anyway. @@ -1591,15 +1577,10 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, DIExpr->getElement(0) != dwarf::DW_OP_deref) return; - // Insert the offset immediately after the first deref. + // Insert the offset before the first deref. // We could just change the offset argument of dbg.value, but it's unsigned... - if (Offset) { - SmallVector Ops; - Ops.push_back(dwarf::DW_OP_deref); - DIExpression::appendOffset(Ops, Offset); - Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); - DIExpr = Builder.createExpression(Ops); - } + if (Offset) + DIExpr = DIExpression::prepend(DIExpr, 0, Offset); Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI); DVI->eraseFromParent(); @@ -1957,18 +1938,24 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, return NumInstrsRemoved; } -/// changeToCall - Convert the specified invoke into a normal call. -static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) { - SmallVector Args(II->arg_begin(), II->arg_end()); +CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) { + SmallVector Args(II->arg_begin(), II->arg_end()); SmallVector OpBundles; II->getOperandBundlesAsDefs(OpBundles); - CallInst *NewCall = CallInst::Create( - II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II); - NewCall->takeName(II); + CallInst *NewCall = CallInst::Create(II->getFunctionType(), + II->getCalledValue(), Args, OpBundles); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); NewCall->setDebugLoc(II->getDebugLoc()); NewCall->copyMetadata(*II); + return NewCall; +} + +/// changeToCall - Convert the specified invoke into a normal call. +void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { + CallInst *NewCall = createCallMatchingInvoke(II); + NewCall->takeName(II); + NewCall->insertBefore(II); II->replaceAllUsesWith(NewCall); // Follow the call by a branch to the normal destination. @@ -2223,12 +2210,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { /// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false -/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo -/// after modifying the CFG. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, - DomTreeUpdater *DTU, +/// otherwise. +bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU) { - SmallPtrSet Reachable; + SmallPtrSet Reachable; bool Changed = markAliveBlocks(F, Reachable, DTU); // If there are unreachable blocks in the CFG... @@ -2236,21 +2221,21 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, return Changed; assert(Reachable.size() < F.size()); - NumRemoved += F.size()-Reachable.size(); + NumRemoved += F.size() - Reachable.size(); SmallSetVector DeadBlockSet; - for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { - auto *BB = &*I; - if (Reachable.count(BB)) + for (BasicBlock &BB : F) { + // Skip reachable basic blocks + if (Reachable.find(&BB) != Reachable.end()) continue; - DeadBlockSet.insert(BB); + DeadBlockSet.insert(&BB); } if (MSSAU) MSSAU->removeBlocks(DeadBlockSet); // Loop over all of the basic blocks that are not reachable, dropping all of - // their internal references. Update DTU and LVI if available. + // their internal references. Update DTU if available. std::vector Updates; for (auto *BB : DeadBlockSet) { for (BasicBlock *Successor : successors(BB)) { @@ -2259,26 +2244,18 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, if (DTU) Updates.push_back({DominatorTree::Delete, BB, Successor}); } - if (LVI) - LVI->eraseBlock(BB); BB->dropAllReferences(); - } - for (Function::iterator I = ++F.begin(); I != F.end();) { - auto *BB = &*I; - if (Reachable.count(BB)) { - ++I; - continue; - } if (DTU) { - // Remove the terminator of BB to clear the successor list of BB. - if (BB->getTerminator()) - BB->getInstList().pop_back(); + Instruction *TI = BB->getTerminator(); + assert(TI && "Basic block should have a terminator"); + // Terminators like invoke can have users. We have to replace their users, + // before removing them. + if (!TI->use_empty()) + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + TI->eraseFromParent(); new UnreachableInst(BB->getContext(), BB); assert(succ_empty(BB) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); - ++I; - } else { - I = F.getBasicBlockList().erase(I); } } @@ -2294,7 +2271,11 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, } if (!Deleted) return false; + } else { + for (auto *BB : DeadBlockSet) + BB->eraseFromParent(); } + return true; } @@ -2363,6 +2344,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; + case LLVMContext::MD_preserve_access_index: + // Preserve !preserve.access.index in K. + break; } } // Set !invariant.group from J if J has it. If both instructions have it @@ -2385,10 +2369,61 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, LLVMContext::MD_invariant_group, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(K, J, KnownIDs, KDominatesJ); } +void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) { + SmallVector, 8> MD; + Source.getAllMetadata(MD); + MDBuilder MDB(Dest.getContext()); + Type *NewType = Dest.getType(); + const DataLayout &DL = Source.getModule()->getDataLayout(); + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a load instruction changing *only its type*. + // The only metadata it makes sense to drop is metadata which is invalidated + // when the pointer type changes. This should essentially never be the case + // in LLVM, but we explicitly switch over only known metadata to be + // conservatively correct. If you are adding metadata to LLVM which pertains + // to loads, you almost certainly want to add it here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + case LLVMContext::MD_access_group: + // All of these directly apply. + Dest.setMetadata(ID, N); + break; + + case LLVMContext::MD_nonnull: + copyNonnullMetadata(Source, N, Dest); + break; + + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + // These only directly apply if the new type is also a pointer. + if (NewType->isPointerTy()) + Dest.setMetadata(ID, N); + break; + + case LLVMContext::MD_range: + copyRangeMetadata(DL, Source, N, Dest); + break; + } + } +} + void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { auto *ReplInst = dyn_cast(Repl); if (!ReplInst) @@ -2417,7 +2452,7 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { LLVMContext::MD_noalias, LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(ReplInst, I, KnownIDs, false); } diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp index 37389a695b45..889ea5ca9970 100644 --- a/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -615,30 +615,9 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) { LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " << LastExit->getName() << "\n"); - // Hoist the instructions from Latch into LastExit. - Instruction *FirstLatchInst = &*(Latch->begin()); - LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), - Latch->begin(), Jmp->getIterator()); - - // Update MemorySSA - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst); - - unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; - BasicBlock *Header = Jmp->getSuccessor(0); - assert(Header == L->getHeader() && "expected a backward branch"); - - // Remove Latch from the CFG so that LastExit becomes the new Latch. - BI->setSuccessor(FallThruPath, Header); - Latch->replaceSuccessorsPhiUsesWith(LastExit); - Jmp->eraseFromParent(); - - // Nuke the Latch block. - assert(Latch->empty() && "unable to evacuate Latch"); - LI->removeBlock(Latch); - if (DT) - DT->eraseNode(Latch); - Latch->eraseFromParent(); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr, + /*PredecessorWithTwoSuccessors=*/true); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 7e6da02d5707..d0f89dc54bfb 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -808,7 +808,7 @@ bool LoopSimplify::runOnFunction(Function &F) { auto *MSSAAnalysis = getAnalysisIfAvailable(); if (MSSAAnalysis) { MSSA = &MSSAAnalysis->getMSSA(); - MSSAU = make_unique(MSSA); + MSSAU = std::make_unique(MSSA); } } @@ -835,12 +835,19 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, DominatorTree *DT = &AM.getResult(F); ScalarEvolution *SE = AM.getCachedResult(F); AssumptionCache *AC = &AM.getResult(F); + auto *MSSAAnalysis = AM.getCachedResult(F); + std::unique_ptr MSSAU; + if (MSSAAnalysis) { + auto *MSSA = &MSSAAnalysis->getMSSA(); + MSSAU = std::make_unique(MSSA); + } + // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA - // after simplifying the loops. MemorySSA is not preserved either. + // after simplifying the loops. MemorySSA is preserved if it exists. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) Changed |= - simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false); + simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false); if (!Changed) return PreservedAnalyses::all(); @@ -853,6 +860,8 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, PA.preserve(); PA.preserve(); PA.preserve(); + if (MSSAAnalysis) + PA.preserve(); // BPI maps conditional terminators to probabilities, LoopSimplify can insert // blocks, but it does so only by splitting existing blocks and edges. This // results in the interesting property that all new terminators inserted are diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index e39ade523714..a7590fc32545 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -711,7 +711,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest, ArrayRef NextBlocks, - BasicBlock *CurrentHeader, + BasicBlock *BlockInLoop, bool NeedConditional) { auto *Term = cast(Src->getTerminator()); if (NeedConditional) { @@ -723,7 +723,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (Dest != LoopExit) { BasicBlock *BB = Src; for (BasicBlock *Succ : successors(BB)) { - if (Succ == CurrentHeader) + // Preserve the incoming value from BB if we are jumping to the block + // in the current loop. + if (Succ == BlockInLoop) continue; for (PHINode &Phi : Succ->phis()) Phi.removeIncomingValue(BB, false); @@ -794,7 +796,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // unconditional branch for some iterations. NeedConditional = false; - setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional); + setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional); } // Set up latches to branch to the new header in the unrolled iterations or @@ -868,7 +870,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, assert(!DT || !UnrollVerifyDomtree || DT->verify(DominatorTree::VerificationLevel::Fast)); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); // Merge adjacent basic blocks, if possible. for (BasicBlock *Latch : Latches) { BranchInst *Term = dyn_cast(Latch->getTerminator()); @@ -888,6 +890,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } } + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp index ff49d83f25c5..bf2e87b0d49f 100644 --- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -517,6 +517,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop( movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); } + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the // new ones required. if (Count != 1) { @@ -530,7 +531,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop( ForeBlocksLast.back(), SubLoopBlocksFirst[0]); DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, SubLoopBlocksLast.back(), AftBlocksFirst[0]); - DT->applyUpdates(DTUpdates); + DTU.applyUpdatesPermissive(DTUpdates); } // Merge adjacent basic blocks, if possible. @@ -538,7 +539,6 @@ LoopUnrollResult llvm::UnrollAndJamLoop( MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); while (!MergeBlocks.empty()) { BasicBlock *BB = *MergeBlocks.begin(); BranchInst *Term = dyn_cast(BB->getTerminator()); @@ -555,6 +555,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop( } else MergeBlocks.erase(BB); } + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 005306cf1898..58e42074f963 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -62,9 +62,11 @@ static cl::opt UnrollForcePeelCount( cl::desc("Force a peel count regardless of profiling information.")); static cl::opt UnrollPeelMultiDeoptExit( - "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden, + "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, cl::desc("Allow peeling of loops with multiple deopt exits.")); +static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; + // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). @@ -275,6 +277,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount << " iterations.\n"); UP.PeelCount = UnrollForcePeelCount; + UP.PeelProfiledIterations = true; return; } @@ -282,6 +285,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!UP.AllowPeeling) return; + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + // Stop if we already peeled off the maximum number of iterations. + if (AlreadyPeeled >= UnrollPeelMaxCount) + return; + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N // iterations of the loop. For this we compute the number for iterations after // which every Phi is guaranteed to become an invariant, and try to peel the @@ -317,11 +327,14 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - UP.PeelCount = DesiredPeelCount; - return; + if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; + UP.PeelProfiledIterations = false; + return; + } } } @@ -330,6 +343,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (TripCount) return; + // Do not apply profile base peeling if it is disabled. + if (!UP.PeelProfiledIterations) + return; // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. @@ -344,7 +360,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, << "\n"); if (*PeelCount) { - if ((*PeelCount <= UnrollPeelMaxCount) && + if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); @@ -352,6 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, return; } LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); @@ -364,88 +381,77 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, /// iteration. /// This sets the branch weights for the latch of the recently peeled off loop /// iteration correctly. -/// Our goal is to make sure that: -/// a) The total weight of all the copies of the loop body is preserved. -/// b) The total weight of the loop exit is preserved. -/// c) The body weight is reasonably distributed between the peeled iterations. +/// Let F is a weight of the edge from latch to header. +/// Let E is a weight of the edge from latch to exit. +/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to +/// go to exit. +/// Then, Estimated TripCount = F / E. +/// For I-th (counting from 0) peeled off iteration we set the the weights for +/// the peeled latch as (TC - I, 1). It gives us reasonable distribution, +/// The probability to go to exit 1/(TC-I) increases. At the same time +/// the estimated trip count of remaining loop reduces by I. +/// To avoid dealing with division rounding we can just multiple both part +/// of weights to E and use weight as (F - I * E, E). /// /// \param Header The copy of the header block that belongs to next iteration. /// \param LatchBR The copy of the latch branch that belongs to this iteration. -/// \param IterNumber The serial number of the iteration that was just -/// peeled off. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop -/// iterations that are unaccounted for. As an input, it represents the number -/// of times we expect to enter the header of the iteration currently being -/// peeled off. The output is the number of times we expect to enter the -/// header of the next iteration. +/// \param[in,out] FallThroughWeight The weight of the edge from latch to +/// header before peeling (in) and after peeled off one iteration (out). static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned IterNumber, unsigned AvgIters, - uint64_t &PeeledHeaderWeight) { - if (!PeeledHeaderWeight) + uint64_t ExitWeight, + uint64_t &FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) return; - // FIXME: Pick a more realistic distribution. - // Currently the proportion of weight we assign to the fall-through - // side of the branch drops linearly with the iteration number, and we use - // a 0.9 fudge factor to make the drop-off less sharp... - uint64_t FallThruWeight = - PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); - uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; - PeeledHeaderWeight -= ExitWeight; unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) - : MDB.createBranchWeights(FallThruWeight, ExitWeight); + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + FallThroughWeight = + FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1; } /// Initialize the weights. /// /// \param Header The header block. /// \param LatchBR The latch branch. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken. -/// \param[out] CurHeaderWeight The # of times the header is executed. +/// \param[out] ExitWeight The weight of the edge from Latch to Exit. +/// \param[out] FallThroughWeight The weight of the edge from Latch to Header. static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned AvgIters, uint64_t &ExitWeight, - uint64_t &CurHeaderWeight) { + uint64_t &ExitWeight, + uint64_t &FallThroughWeight) { uint64_t TrueWeight, FalseWeight; if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) return; unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - // The # of times the loop body executes is the sum of the exit block - // is taken and the # of times the backedges are taken. - CurHeaderWeight = TrueWeight + FalseWeight; + FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight; } /// Update the weights of original Latch block after peeling off all iterations. /// /// \param Header The header block. /// \param LatchBR The latch branch. -/// \param ExitWeight The weight of the edge from Latch to Exit block. -/// \param CurHeaderWeight The # of time the header is executed. +/// \param ExitWeight The weight of the edge from Latch to Exit. +/// \param FallThroughWeight The weight of the edge from Latch to Header. static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, uint64_t CurHeaderWeight) { - // Adjust the branch weights on the loop exit. - if (!ExitWeight) + uint64_t ExitWeight, + uint64_t FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) return; - // The backedge count is the difference of current header weight and - // current loop exit weight. If the current header weight is smaller than - // the current loop exit weight, we mark the loop backedge weight as 1. - uint64_t BackEdgeWeight = 0; - if (ExitWeight < CurHeaderWeight) - BackEdgeWeight = CurHeaderWeight - ExitWeight; - else - BackEdgeWeight = 1; + // Sets the branch weights on the loop exit. MDBuilder MDB(LatchBR->getContext()); unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } @@ -586,11 +592,30 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, DenseMap ExitIDom; if (DT) { + // We'd like to determine the idom of exit block after peeling one + // iteration. + // Let Exit is exit block. + // Let ExitingSet - is a set of predecessors of Exit block. They are exiting + // blocks. + // Let Latch' and ExitingSet' are copies after a peeling. + // We'd like to find an idom'(Exit) - idom of Exit after peeling. + // It is an evident that idom'(Exit) will be the nearest common dominator + // of ExitingSet and ExitingSet'. + // idom(Exit) is a nearest common dominator of ExitingSet. + // idom(Exit)' is a nearest common dominator of ExitingSet'. + // Taking into account that we have a single Latch, Latch' will dominate + // Header and idom(Exit). + // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. + // All these basic blocks are in the same loop, so what we find is + // (nearest common dominator of idom(Exit) and Latch)'. + // In the loop below we remember nearest common dominator of idom(Exit) and + // Latch to update idom of Exit later. assert(L->hasDedicatedExits() && "No dedicated exits?"); for (auto Edge : ExitEdges) { if (ExitIDom.count(Edge.second)) continue; - BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock(); + BasicBlock *BB = DT->findNearestCommonDominator( + DT->getNode(Edge.second)->getIDom()->getBlock(), Latch); assert(L->contains(BB) && "IDom is not in a loop"); ExitIDom[Edge.second] = BB; } @@ -659,23 +684,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // newly created branches. BranchInst *LatchBR = cast(cast(Latch)->getTerminator()); - uint64_t ExitWeight = 0, CurHeaderWeight = 0; - initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight); + uint64_t ExitWeight = 0, FallThroughWeight = 0; + initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector NewBlocks; ValueToValueMapTy VMap; - // Subtract the exit weight from the current header weight -- the exit - // weight is exactly the weight of the previous iteration's header. - // FIXME: due to the way the distribution is constructed, we need a - // guard here to make sure we don't end up with non-positive weights. - if (ExitWeight < CurHeaderWeight) - CurHeaderWeight -= ExitWeight; - else - CurHeaderWeight = 1; - cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); @@ -697,8 +713,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, } auto *LatchBRCopy = cast(VMap[LatchBR]); - updateBranchWeights(InsertBot, LatchBRCopy, Iter, - PeelCount, ExitWeight); + updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); @@ -724,7 +739,13 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } - fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight); + fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); + + // Update Metadata for count of peeled off iterations. + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); if (Loop *ParentLoop = L->getParentLoop()) L = ParentLoop; diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index ec226e65f650..b4d7f35d2d9a 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -45,6 +46,7 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-utils" static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; +static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -169,6 +171,8 @@ void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + // FIXME: When all loop passes preserve MemorySSA, it can be required and + // preserved here instead of the individual handling in each pass. } /// Manually defined generic "LoopPass" dependency initialization. This is used @@ -189,6 +193,54 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) + INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +} + +/// Create MDNode for input string. +static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { + LLVMContext &Context = TheLoop->getHeader()->getContext(); + Metadata *MDs[] = { + MDString::get(Context, Name), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; + return MDNode::get(Context, MDs); +} + +/// Set input string into loop metadata by keeping other values intact. +/// If the string is already in loop metadata update value if it is +/// different. +void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, + unsigned V) { + SmallVector MDs(1); + // If the loop already has metadata, retain it. + MDNode *LoopID = TheLoop->getLoopID(); + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast(LoopID->getOperand(i)); + // If it is of form key = value, try to parse it. + if (Node->getNumOperands() == 2) { + MDString *S = dyn_cast(Node->getOperand(0)); + if (S && S->getString().equals(StringMD)) { + ConstantInt *IntMD = + mdconst::extract_or_null(Node->getOperand(1)); + if (IntMD && IntMD->getSExtValue() == V) + // It is already in place. Do nothing. + return; + // We need to update the value, so just skip it here and it will + // be added after copying other existed nodes. + continue; + } + } + MDs.push_back(Node); + } + } + // Add new metadata. + MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); + // Replace current metadata node with new one. + LLVMContext &Context = TheLoop->getHeader()->getContext(); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + TheLoop->setLoopID(NewLoopID); } /// Find string metadata for loop @@ -332,6 +384,10 @@ bool llvm::hasDisableAllTransformsHint(const Loop *L) { return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); } +bool llvm::hasDisableLICMTransformsHint(const Loop *L) { + return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); +} + TransformationMode llvm::hasUnrollTransformation(Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) return TM_SuppressedByUser; diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp index a9a480a4b7f9..5d7759056c7d 100644 --- a/lib/Transforms/Utils/LoopVersioning.cpp +++ b/lib/Transforms/Utils/LoopVersioning.cpp @@ -92,8 +92,8 @@ void LoopVersioning::versionLoop( // Create empty preheader for the loop (and after cloning for the // non-versioned loop). BasicBlock *PH = - SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI); - PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); + SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI, + nullptr, VersionedLoop->getHeader()->getName() + ".ph"); // Clone the loop including the preheader. // diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index c0b7edc547fd..60bb2775a194 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -121,15 +121,14 @@ namespace { } // Rename all functions - const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); for (auto &F : M) { StringRef Name = F.getName(); LibFunc Tmp; // Leave library functions alone because their presence or absence could // affect the behavior of other passes. if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || - TLI.getLibFunc(F, Tmp)) + getAnalysis().getTLI(F).getLibFunc( + F, Tmp)) continue; // Leave @main alone. The output of -metarenamer might be passed to diff --git a/lib/Transforms/Utils/MisExpect.cpp b/lib/Transforms/Utils/MisExpect.cpp new file mode 100644 index 000000000000..26d3402bd279 --- /dev/null +++ b/lib/Transforms/Utils/MisExpect.cpp @@ -0,0 +1,177 @@ +//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of the +// llvm.expect intrinsic. This utility extracts the threshold values from +// metadata associated with the instrumented Branch or Switch instruction. The +// threshold values are then used to determine if a warning should be emmited. +// +// MisExpect metadata is generated when llvm.expect intrinsics are lowered see +// LowerExpectIntrinsic.cpp +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MisExpect.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include +#include +#include + +#define DEBUG_TYPE "misexpect" + +using namespace llvm; +using namespace misexpect; + +namespace llvm { + +// Command line option to enable/disable the warning when profile data suggests +// a mismatch with the use of the llvm.expect intrinsic +static cl::opt PGOWarnMisExpect( + "pgo-warn-misexpect", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn on/off " + "warnings about incorrect usage of llvm.expect intrinsics.")); + +} // namespace llvm + +namespace { + +Instruction *getOprndOrInst(Instruction *I) { + assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); + Instruction *Ret = nullptr; + if (auto *B = dyn_cast(I)) { + Ret = dyn_cast(B->getCondition()); + } + // TODO: Find a way to resolve condition location for switches + // Using the condition of the switch seems to often resolve to an earlier + // point in the program, i.e. the calculation of the switch condition, rather + // than the switches location in the source code. Thus, we should use the + // instruction to get source code locations rather than the condition to + // improve diagnostic output, such as the caret. If the same problem exists + // for branch instructions, then we should remove this function and directly + // use the instruction + // + // else if (auto S = dyn_cast(I)) { + // Ret = I; + //} + return Ret ? Ret : I; +} + +void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, + uint64_t ProfCount, uint64_t TotalCount) { + double PercentageCorrect = (double)ProfCount / TotalCount; + auto PerString = + formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); + auto RemStr = formatv( + "Potential performance regression from use of the llvm.expect intrinsic: " + "Annotation was correct on {0} of profiled executions.", + PerString); + Twine Msg(PerString); + Instruction *Cond = getOprndOrInst(I); + if (PGOWarnMisExpect) + Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); + OptimizationRemarkEmitter ORE(I->getParent()->getParent()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); +} + +} // namespace + +namespace llvm { +namespace misexpect { + +void verifyMisExpect(Instruction *I, const SmallVector &Weights, + LLVMContext &Ctx) { + if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) { + auto *MisExpectDataName = dyn_cast(MisExpectData->getOperand(0)); + if (MisExpectDataName && + MisExpectDataName->getString().equals("misexpect")) { + LLVM_DEBUG(llvm::dbgs() << "------------------\n"); + LLVM_DEBUG(llvm::dbgs() + << "Function: " << I->getFunction()->getName() << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n"); + LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) { + llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n"; + }); + + // extract values from misexpect metadata + const auto *IndexCint = + mdconst::dyn_extract(MisExpectData->getOperand(1)); + const auto *LikelyCInt = + mdconst::dyn_extract(MisExpectData->getOperand(2)); + const auto *UnlikelyCInt = + mdconst::dyn_extract(MisExpectData->getOperand(3)); + + if (!IndexCint || !LikelyCInt || !UnlikelyCInt) + return; + + const uint64_t Index = IndexCint->getZExtValue(); + const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue(); + const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue(); + const uint64_t ProfileCount = Weights[Index]; + const uint64_t CaseTotal = std::accumulate( + Weights.begin(), Weights.end(), (uint64_t)0, std::plus()); + const uint64_t NumUnlikelyTargets = Weights.size() - 1; + + const uint64_t TotalBranchWeight = + LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); + + const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight, + TotalBranchWeight); + uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal); + + LLVM_DEBUG(llvm::dbgs() + << "Unlikely Targets: " << NumUnlikelyTargets << ":\n"); + LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n"); + LLVM_DEBUG(llvm::dbgs() + << "Scaled Threshold: " << ScaledThreshold << ":\n"); + LLVM_DEBUG(llvm::dbgs() << "------------------\n"); + if (ProfileCount < ScaledThreshold) + emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal); + } + } +} + +void checkFrontendInstrumentation(Instruction &I) { + if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) { + unsigned NOps = MD->getNumOperands(); + + // Only emit misexpect diagnostics if at least 2 branch weights are present. + // Less than 2 branch weights means that the profiling metadata is: + // 1) incorrect/corrupted + // 2) not branch weight metadata + // 3) completely deterministic + // In these cases we should not emit any diagnostic related to misexpect. + if (NOps < 3) + return; + + // Operand 0 is a string tag "branch_weights" + if (MDString *Tag = cast(MD->getOperand(0))) { + if (Tag->getString().equals("branch_weights")) { + SmallVector RealWeights(NOps - 1); + for (unsigned i = 1; i < NOps; i++) { + ConstantInt *Value = + mdconst::dyn_extract(MD->getOperand(i)); + RealWeights[i - 1] = Value->getZExtValue(); + } + verifyMisExpect(&I, RealWeights, I.getContext()); + } + } + } +} + +} // namespace misexpect +} // namespace llvm +#undef DEBUG_TYPE diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index c84beceee191..1ef3757017a8 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -73,7 +73,7 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef SmallPtrSet InitAsSet; SmallVector Init; if (GV) { - ConstantArray *CA = dyn_cast(GV->getInitializer()); + auto *CA = cast(GV->getInitializer()); for (auto &Op : CA->operands()) { Constant *C = cast_or_null(Op); if (InitAsSet.insert(C).second) diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp index bdf24d80bd17..44859eafb9c1 100644 --- a/lib/Transforms/Utils/PredicateInfo.cpp +++ b/lib/Transforms/Utils/PredicateInfo.cpp @@ -125,8 +125,10 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A, // necessary to compare uses/defs in the same block. Doing so allows us to walk // the minimum number of instructions necessary to compute our def/use ordering. struct ValueDFS_Compare { + DominatorTree &DT; OrderedInstructions &OI; - ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {} + ValueDFS_Compare(DominatorTree &DT, OrderedInstructions &OI) + : DT(DT), OI(OI) {} bool operator()(const ValueDFS &A, const ValueDFS &B) const { if (&A == &B) @@ -136,7 +138,9 @@ struct ValueDFS_Compare { // comesbefore to see what the real ordering is, because they are in the // same basic block. - bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut); + assert((A.DFSIn != B.DFSIn || A.DFSOut == B.DFSOut) && + "Equal DFS-in numbers imply equal out numbers"); + bool SameBlock = A.DFSIn == B.DFSIn; // We want to put the def that will get used for a given set of phi uses, // before those phi uses. @@ -145,9 +149,11 @@ struct ValueDFS_Compare { if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last) return comparePHIRelated(A, B); + bool isADef = A.Def; + bool isBDef = B.Def; if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) - return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) < - std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U); + return std::tie(A.DFSIn, A.LocalNum, isADef) < + std::tie(B.DFSIn, B.LocalNum, isBDef); return localComesBefore(A, B); } @@ -164,10 +170,35 @@ struct ValueDFS_Compare { // For two phi related values, return the ordering. bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const { - auto &ABlockEdge = getBlockEdge(A); - auto &BBlockEdge = getBlockEdge(B); - // Now sort by block edge and then defs before uses. - return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U); + BasicBlock *ASrc, *ADest, *BSrc, *BDest; + std::tie(ASrc, ADest) = getBlockEdge(A); + std::tie(BSrc, BDest) = getBlockEdge(B); + +#ifndef NDEBUG + // This function should only be used for values in the same BB, check that. + DomTreeNode *DomASrc = DT.getNode(ASrc); + DomTreeNode *DomBSrc = DT.getNode(BSrc); + assert(DomASrc->getDFSNumIn() == (unsigned)A.DFSIn && + "DFS numbers for A should match the ones of the source block"); + assert(DomBSrc->getDFSNumIn() == (unsigned)B.DFSIn && + "DFS numbers for B should match the ones of the source block"); + assert(A.DFSIn == B.DFSIn && "Values must be in the same block"); +#endif + (void)ASrc; + (void)BSrc; + + // Use DFS numbers to compare destination blocks, to guarantee a + // deterministic order. + DomTreeNode *DomADest = DT.getNode(ADest); + DomTreeNode *DomBDest = DT.getNode(BDest); + unsigned AIn = DomADest->getDFSNumIn(); + unsigned BIn = DomBDest->getDFSNumIn(); + bool isADef = A.Def; + bool isBDef = B.Def; + assert((!A.Def || !A.U) && (!B.Def || !B.U) && + "Def and U cannot be set at the same time"); + // Now sort by edge destination and then defs before uses. + return std::tie(AIn, isADef) < std::tie(BIn, isBDef); } // Get the definition of an instruction that occurs in the middle of a block. @@ -306,10 +337,11 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl &CmpOperands) { } // Add Op, PB to the list of value infos for Op, and mark Op to be renamed. -void PredicateInfo::addInfoFor(SmallPtrSetImpl &OpsToRename, Value *Op, +void PredicateInfo::addInfoFor(SmallVectorImpl &OpsToRename, Value *Op, PredicateBase *PB) { - OpsToRename.insert(Op); auto &OperandInfo = getOrCreateValueInfo(Op); + if (OperandInfo.Infos.empty()) + OpsToRename.push_back(Op); AllInfos.push_back(PB); OperandInfo.Infos.push_back(PB); } @@ -317,7 +349,7 @@ void PredicateInfo::addInfoFor(SmallPtrSetImpl &OpsToRename, Value *Op, // Process an assume instruction and place relevant operations we want to rename // into OpsToRename. void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, - SmallPtrSetImpl &OpsToRename) { + SmallVectorImpl &OpsToRename) { // See if we have a comparison we support SmallVector CmpOperands; SmallVector ConditionsToProcess; @@ -357,7 +389,7 @@ void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, // Process a block terminating branch, and place relevant operations to be // renamed into OpsToRename. void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, - SmallPtrSetImpl &OpsToRename) { + SmallVectorImpl &OpsToRename) { BasicBlock *FirstBB = BI->getSuccessor(0); BasicBlock *SecondBB = BI->getSuccessor(1); SmallVector SuccsToProcess; @@ -427,7 +459,7 @@ void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, // Process a block terminating switch, and place relevant operations to be // renamed into OpsToRename. void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB, - SmallPtrSetImpl &OpsToRename) { + SmallVectorImpl &OpsToRename) { Value *Op = SI->getCondition(); if ((!isa(Op) && !isa(Op)) || Op->hasOneUse()) return; @@ -457,7 +489,7 @@ void PredicateInfo::buildPredicateInfo() { DT.updateDFSNumbers(); // Collect operands to rename from all conditional branch terminators, as well // as assume statements. - SmallPtrSet OpsToRename; + SmallVector OpsToRename; for (auto DTN : depth_first(DT.getRootNode())) { BasicBlock *BranchBB = DTN->getBlock(); if (auto *BI = dyn_cast(BranchBB->getTerminator())) { @@ -524,7 +556,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, if (isa(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) + if (IF->users().empty()) CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); @@ -536,7 +568,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, "Should not have gotten here without it being an assume"); IRBuilder<> B(PAssume->AssumeInst); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) + if (IF->users().empty()) CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op); PredicateMap.insert({PIC, ValInfo}); @@ -565,14 +597,8 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, // // TODO: Use this algorithm to perform fast single-variable renaming in // promotememtoreg and memoryssa. -void PredicateInfo::renameUses(SmallPtrSetImpl &OpSet) { - // Sort OpsToRename since we are going to iterate it. - SmallVector OpsToRename(OpSet.begin(), OpSet.end()); - auto Comparator = [&](const Value *A, const Value *B) { - return valueComesBefore(OI, A, B); - }; - llvm::sort(OpsToRename, Comparator); - ValueDFS_Compare Compare(OI); +void PredicateInfo::renameUses(SmallVectorImpl &OpsToRename) { + ValueDFS_Compare Compare(DT, OI); // Compute liveness, and rename in O(uses) per Op. for (auto *Op : OpsToRename) { LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n"); @@ -772,7 +798,7 @@ static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); - auto PredInfo = make_unique(F, DT, AC); + auto PredInfo = std::make_unique(F, DT, AC); PredInfo->print(dbgs()); if (VerifyPredicateInfo) PredInfo->verifyPredicateInfo(); @@ -786,7 +812,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); OS << "PredicateInfo for function: " << F.getName() << "\n"; - auto PredInfo = make_unique(F, DT, AC); + auto PredInfo = std::make_unique(F, DT, AC); PredInfo->print(OS); replaceCreatedSSACopys(*PredInfo, F); @@ -845,7 +871,7 @@ PreservedAnalyses PredicateInfoVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); - make_unique(F, DT, AC)->verifyPredicateInfo(); + std::make_unique(F, DT, AC)->verifyPredicateInfo(); return PreservedAnalyses::all(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 11651d040dc0..3a5e3293ed4f 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -94,6 +94,12 @@ static cl::opt PHINodeFoldingThreshold( cl::desc( "Control the amount of phi node folding to perform (default = 2)")); +static cl::opt TwoEntryPHINodeFoldingThreshold( + "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), + cl::desc("Control the maximal total instruction cost that we are willing " + "to speculatively execute to fold a 2-entry PHI node into a " + "select (default = 4)")); + static cl::opt DupRet( "simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -332,7 +338,7 @@ static unsigned ComputeSpeculationCost(const User *I, /// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl &AggressiveInsts, - unsigned &CostRemaining, + int &BudgetRemaining, const TargetTransformInfo &TTI, unsigned Depth = 0) { // It is possible to hit a zero-cost cycle (phi/gep instructions for example), @@ -375,7 +381,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = ComputeSpeculationCost(I, TTI); + BudgetRemaining -= ComputeSpeculationCost(I, TTI); // Allow exactly one instruction to be speculated regardless of its cost // (as long as it is safe to do so). @@ -383,17 +389,14 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // or other expensive operation. The speculation of an expensive instruction // is expected to be undone in CodeGenPrepare if the speculation has not // enabled further IR optimizations. - if (Cost > CostRemaining && + if (BudgetRemaining < 0 && (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0)) return false; - // Avoid unsigned wrap. - CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost; - // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI, + if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI, Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. @@ -629,8 +632,7 @@ private: /// vector. /// One "Extra" case is allowed to differ from the other. void gather(Value *V) { - Instruction *I = dyn_cast(V); - bool isEQ = (I->getOpcode() == Instruction::Or); + bool isEQ = (cast(V)->getOpcode() == Instruction::Or); // Keep a stack (SmallVector for efficiency) for depth-first traversal SmallVector DFT; @@ -1313,7 +1315,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, + LLVMContext::MD_preserve_access_index}; combineMetadata(I1, I2, KnownIDs, true); // I1 and I2 are being combined into a single instruction. Its debug @@ -1420,6 +1423,20 @@ HoistTerminator: return true; } +// Check lifetime markers. +static bool isLifeTimeMarker(const Instruction *I) { + if (auto II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + return false; +} + // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common @@ -1474,20 +1491,25 @@ static bool canSinkInstructions( return false; } - // Because SROA can't handle speculating stores of selects, try not - // to sink loads or stores of allocas when we'd have to create a PHI for - // the address operand. Also, because it is likely that loads or stores - // of allocas will disappear when Mem2Reg/SROA is run, don't sink them. + // Because SROA can't handle speculating stores of selects, try not to sink + // loads, stores or lifetime markers of allocas when we'd have to create a + // PHI for the address operand. Also, because it is likely that loads or + // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink + // them. // This can cause code churn which can have unintended consequences down // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. // FIXME: This is a workaround for a deficiency in SROA - see // https://llvm.org/bugs/show_bug.cgi?id=30188 if (isa(I0) && any_of(Insts, [](const Instruction *I) { - return isa(I->getOperand(1)); + return isa(I->getOperand(1)->stripPointerCasts()); })) return false; if (isa(I0) && any_of(Insts, [](const Instruction *I) { - return isa(I->getOperand(0)); + return isa(I->getOperand(0)->stripPointerCasts()); + })) + return false; + if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) { + return isa(I->getOperand(1)->stripPointerCasts()); })) return false; @@ -1959,7 +1981,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, SmallVector SpeculatedDbgIntrinsics; - unsigned SpeculationCost = 0; + unsigned SpeculatedInstructions = 0; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; for (BasicBlock::iterator BBI = ThenBB->begin(), @@ -1974,8 +1996,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Only speculatively execute a single instruction (not counting the // terminator) for now. - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; // Don't hoist the instruction if it's unsafe or expensive. @@ -2012,8 +2034,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, E = SinkCandidateUseCounts.end(); I != E; ++I) if (I->first->hasNUses(I->second)) { - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; } @@ -2053,8 +2075,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // getting expanded into Instructions. // FIXME: This doesn't account for how many operations are combined in the // constant expression. - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; } @@ -2302,10 +2324,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet AggressiveInsts; - unsigned MaxCostVal0 = PHINodeFoldingThreshold, - MaxCostVal1 = PHINodeFoldingThreshold; - MaxCostVal0 *= TargetTransformInfo::TCC_Basic; - MaxCostVal1 *= TargetTransformInfo::TCC_Basic; + int BudgetRemaining = + TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); @@ -2316,9 +2336,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, - MaxCostVal0, TTI) || + BudgetRemaining, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, - MaxCostVal1, TTI)) + BudgetRemaining, TTI)) return false; } @@ -2328,12 +2348,24 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, if (!PN) return true; - // Don't fold i1 branches on PHIs which contain binary operators. These can - // often be turned into switches and other things. + // Return true if at least one of these is a 'not', and another is either + // a 'not' too, or a constant. + auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) { + if (!match(V0, m_Not(m_Value()))) + std::swap(V0, V1); + auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant()); + return match(V0, m_Not(m_Value())) && match(V1, Invertible); + }; + + // Don't fold i1 branches on PHIs which contain binary operators, unless one + // of the incoming values is an 'not' and another one is freely invertible. + // These can often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && (isa(PN->getIncomingValue(0)) || isa(PN->getIncomingValue(1)) || - isa(IfCond))) + isa(IfCond)) && + !CanHoistNotFromBothValues(PN->getIncomingValue(0), + PN->getIncomingValue(1))) return false; // If all PHI nodes are promotable, check to make sure that all instructions @@ -2368,6 +2400,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, return false; } } + assert(DomBlock && "Failed to find root DomBlock"); LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() @@ -2913,42 +2946,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, - const DataLayout &DL) { - auto IsaBitcastOfPointerType = [](const Instruction &I) { - return Operator::getOpcode(&I) == Instruction::BitCast && - I.getType()->isPointerTy(); - }; - - // If we're not in aggressive mode, we only optimize if we have some - // confidence that by optimizing we'll allow P and/or Q to be if-converted. - auto IsWorthwhile = [&](BasicBlock *BB) { - if (!BB) - return true; - // Heuristic: if the block can be if-converted/phi-folded and the - // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to - // thread this store. - unsigned N = 0; - for (auto &I : BB->instructionsWithoutDebug()) { - // Cheap instructions viable for folding. - if (isa(I) || isa(I) || - isa(I)) - ++N; - // Free instructions. - else if (I.isTerminator() || IsaBitcastOfPointerType(I)) - continue; - else - return false; - } - // The store we want to merge is counted in N, so add 1 to make sure - // we're counting the instructions that would be left. - return N <= (PHINodeFoldingThreshold + 1); - }; - - if (!MergeCondStoresAggressively && - (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) || - !IsWorthwhile(QFB))) - return false; - + const DataLayout &DL, + const TargetTransformInfo &TTI) { // For every pointer, there must be exactly two stores, one coming from // PTB or PFB, and the other from QTB or QFB. We don't support more than one // store (to any address) in PTB,PFB or QTB,QFB. @@ -2989,6 +2988,46 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, if (&*I != PStore && I->mayReadOrWriteMemory()) return false; + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef FreeStores) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + int BudgetRemaining = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + for (auto &I : BB->instructionsWithoutDebug()) { + // Consider terminator instruction to be free. + if (I.isTerminator()) + continue; + // If this is one the stores that we want to speculate out of this BB, + // then don't count it's cost, consider it to be free. + if (auto *S = dyn_cast(&I)) + if (llvm::find(FreeStores, S)) + continue; + // Else, we have a white-list of instructions that we are ak speculating. + if (!isa(I) && !isa(I)) + return false; // Not in white-list - not worthwhile folding. + // And finally, if this is a non-free instruction that we are okay + // speculating, ensure that we consider the speculation budget. + BudgetRemaining -= TTI.getUserCost(&I); + if (BudgetRemaining < 0) + return false; // Eagerly refuse to fold as soon as we're out of budget. + } + assert(BudgetRemaining >= 0 && + "When we run out of budget we will eagerly return from within the " + "per-instruction loop."); + return true; + }; + + const SmallVector FreeStores = {PStore, QStore}; + if (!MergeCondStoresAggressively && + (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || + !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) + return false; + // If PostBB has more than two predecessors, we need to split it so we can // sink the store. if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { @@ -3048,15 +3087,15 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, // store that doesn't execute. if (MinAlignment != 0) { // Choose the minimum of all non-zero alignments. - SI->setAlignment(MinAlignment); + SI->setAlignment(Align(MinAlignment)); } else if (MaxAlignment != 0) { // Choose the minimal alignment between the non-zero alignment and the ABI // default alignment for the type of the stored value. - SI->setAlignment(std::min(MaxAlignment, TypeAlignment)); + SI->setAlignment(Align(std::min(MaxAlignment, TypeAlignment))); } else { // If both alignments are zero, use ABI default alignment for the type of // the stored value. - SI->setAlignment(TypeAlignment); + SI->setAlignment(Align(TypeAlignment)); } QStore->eraseFromParent(); @@ -3066,7 +3105,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these // stores are conditional, so they can't be unconditionally sunk. But it may @@ -3168,7 +3208,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, bool Changed = false; for (auto *Address : CommonAddresses) Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL); + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI); return Changed; } @@ -3177,7 +3217,8 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); @@ -3233,7 +3274,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI, DL)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI)) return true; // If this is a conditional branch in an empty block, and if any @@ -3697,12 +3738,17 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, BasicBlock *BB = BI->getParent(); + // MSAN does not like undefs as branch condition which can be introduced + // with "explicit branch". + if (ExtraCase && BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory)) + return false; + LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); // If there are any extra values that couldn't be folded into the switch - // then we evaluate them with an explicit branch first. Split the block + // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { BasicBlock *NewBB = @@ -3851,7 +3897,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { // Simplify resume that is only used by a single (non-phi) landing pad. bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); - LandingPadInst *LPInst = dyn_cast(BB->getFirstNonPHI()); + auto *LPInst = cast(BB->getFirstNonPHI()); assert(RI->getValue() == LPInst && "Resume must unwind the exception that caused control to here"); @@ -4178,23 +4224,22 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { IRBuilder<> Builder(TI); if (auto *BI = dyn_cast(TI)) { if (BI->isUnconditional()) { - if (BI->getSuccessor(0) == BB) { - new UnreachableInst(TI->getContext(), TI); - TI->eraseFromParent(); - Changed = true; - } + assert(BI->getSuccessor(0) == BB && "Incorrect CFG"); + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; } else { Value* Cond = BI->getCondition(); if (BI->getSuccessor(0) == BB) { Builder.CreateAssumption(Builder.CreateNot(Cond)); Builder.CreateBr(BI->getSuccessor(1)); - EraseTerminatorAndDCECond(BI); - } else if (BI->getSuccessor(1) == BB) { + } else { + assert(BI->getSuccessor(1) == BB && "Incorrect CFG"); Builder.CreateAssumption(Cond); Builder.CreateBr(BI->getSuccessor(0)); - EraseTerminatorAndDCECond(BI); - Changed = true; } + EraseTerminatorAndDCECond(BI); + Changed = true; } } else if (auto *SI = dyn_cast(TI)) { SwitchInstProfUpdateWrapper SU(*SI); @@ -4276,6 +4321,17 @@ static bool CasesAreContiguous(SmallVectorImpl &Cases) { return true; } +static void createUnreachableSwitchDefault(SwitchInst *Switch) { + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + BasicBlock *NewDefaultBlock = + SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), ""); + Switch->setDefaultDest(&*NewDefaultBlock); + SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front()); + auto *NewTerminator = NewDefaultBlock->getTerminator(); + new UnreachableInst(Switch->getContext(), NewTerminator); + EraseTerminatorAndDCECond(NewTerminator); +} + /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { @@ -4384,6 +4440,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { cast(BBI)->removeIncomingValue(SI->getParent()); } + // Clean up the default block - it may have phis or other instructions before + // the unreachable terminator. + if (!HasDefault) + createUnreachableSwitchDefault(SI); + // Drop the switch. SI->eraseFromParent(); @@ -4428,14 +4489,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { - LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - BasicBlock *NewDefault = - SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), ""); - SI->setDefaultDest(&*NewDefault); - SplitBlock(&*NewDefault, &NewDefault->front()); - auto *OldTI = NewDefault->getTerminator(); - new UnreachableInst(SI->getContext(), OldTI); - EraseTerminatorAndDCECond(OldTI); + createUnreachableSwitchDefault(SI); return true; } @@ -5031,7 +5085,7 @@ SwitchLookupTable::SwitchLookupTable( Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Set the alignment to that of an array items. We will be only loading one // value out of it. - Array->setAlignment(DL.getPrefTypeAlignment(ValueType)); + Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType))); Kind = ArrayKind; } @@ -5260,7 +5314,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Figure out the corresponding result for each case value and phi node in the // common destination, as well as the min and max case values. - assert(!empty(SI->cases())); + assert(!SI->cases().empty()); SwitchInst::CaseIt CI = SI->case_begin(); ConstantInt *MinCaseVal = CI->getCaseValue(); ConstantInt *MaxCaseVal = CI->getCaseValue(); @@ -5892,7 +5946,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI)) return requestResimplify(); // Look for diamond patterns. @@ -5900,7 +5954,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DL)) + if (mergeConditionalStores(PBI, BI, DL, TTI)) return requestResimplify(); return false; diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index e0def81d5eee..0324993a8203 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -47,7 +48,6 @@ static cl::opt cl::desc("Enable unsafe double to float " "shrinking for math lib calls")); - //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// @@ -177,7 +177,8 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, if (!isOnlyUsedInComparisonWithZero(CI)) return false; - if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL)) + if (!isDereferenceableAndAlignedPointer(Str, Align::None(), APInt(64, Len), + DL)) return false; if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory)) @@ -186,6 +187,67 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, return true; } +static void annotateDereferenceableBytes(CallInst *CI, + ArrayRef ArgNos, + uint64_t DereferenceableBytes) { + const Function *F = CI->getCaller(); + if (!F) + return; + for (unsigned ArgNo : ArgNos) { + uint64_t DerefBytes = DereferenceableBytes; + unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); + if (!llvm::NullPointerIsDefined(F, AS) || + CI->paramHasAttr(ArgNo, Attribute::NonNull)) + DerefBytes = std::max(CI->getDereferenceableOrNullBytes( + ArgNo + AttributeList::FirstArgIndex), + DereferenceableBytes); + + if (CI->getDereferenceableBytes(ArgNo + AttributeList::FirstArgIndex) < + DerefBytes) { + CI->removeParamAttr(ArgNo, Attribute::Dereferenceable); + if (!llvm::NullPointerIsDefined(F, AS) || + CI->paramHasAttr(ArgNo, Attribute::NonNull)) + CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull); + CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes( + CI->getContext(), DerefBytes)); + } + } +} + +static void annotateNonNullBasedOnAccess(CallInst *CI, + ArrayRef ArgNos) { + Function *F = CI->getCaller(); + if (!F) + return; + + for (unsigned ArgNo : ArgNos) { + if (CI->paramHasAttr(ArgNo, Attribute::NonNull)) + continue; + unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); + if (llvm::NullPointerIsDefined(F, AS)) + continue; + + CI->addParamAttr(ArgNo, Attribute::NonNull); + annotateDereferenceableBytes(CI, ArgNo, 1); + } +} + +static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef ArgNos, + Value *Size, const DataLayout &DL) { + if (ConstantInt *LenC = dyn_cast(Size)) { + annotateNonNullBasedOnAccess(CI, ArgNos); + annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue()); + } else if (isKnownNonZero(Size, DL)) { + annotateNonNullBasedOnAccess(CI, ArgNos); + const APInt *X, *Y; + uint64_t DerefMin = 1; + if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) { + DerefMin = std::min(X->getZExtValue(), Y->getZExtValue()); + annotateDereferenceableBytes(CI, ArgNos, DerefMin); + } + } +} + //===----------------------------------------------------------------------===// // String and Memory Library Call Optimizations //===----------------------------------------------------------------------===// @@ -194,10 +256,13 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); + annotateNonNullBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; --Len; // Unbias length. @@ -232,24 +297,34 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { // Extract some information from the instruction. Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); uint64_t Len; + annotateNonNullBasedOnAccess(CI, 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, 1); // We don't do anything if length is not constant. - if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) + ConstantInt *LengthArg = dyn_cast(Size); + if (LengthArg) { Len = LengthArg->getZExtValue(); - else + // strncat(x, c, 0) -> x + if (!Len) + return Dst; + } else { return nullptr; + } // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) + if (SrcLen) { + annotateDereferenceableBytes(CI, 1, SrcLen); + --SrcLen; // Unbias length. + } else { return nullptr; - --SrcLen; // Unbias length. + } - // Handle the simple, do-nothing cases: // strncat(x, "", c) -> x - // strncat(x, c, 0) -> x - if (SrcLen == 0 || Len == 0) + if (SrcLen == 0) return Dst; // We don't optimize this case. @@ -265,13 +340,18 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); Value *SrcStr = CI->getArgOperand(0); + annotateNonNullBasedOnAccess(CI, 0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); if (!CharC) { uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. + if (Len) + annotateDereferenceableBytes(CI, 0, Len); + else + return nullptr; + if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. @@ -304,6 +384,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); + annotateNonNullBasedOnAccess(CI, 0); // Cannot fold anything if we're not looking for a constant. if (!CharC) @@ -351,7 +432,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); + if (Len1) + annotateDereferenceableBytes(CI, 0, Len1); uint64_t Len2 = GetStringLength(Str2P); + if (Len2) + annotateDereferenceableBytes(CI, 1, Len2); + if (Len1 && Len2) { return emitMemCmp(Str1P, Str2P, ConstantInt::get(DL.getIntPtrType(CI->getContext()), @@ -374,17 +460,22 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { TLI); } + annotateNonNullBasedOnAccess(CI, {0, 1}); return nullptr; } Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + Value *Str1P = CI->getArgOperand(0); + Value *Str2P = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, {0, 1}); // Get the length argument if it is constant. uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) + if (ConstantInt *LengthArg = dyn_cast(Size)) Length = LengthArg->getZExtValue(); else return nullptr; @@ -393,7 +484,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { return ConstantInt::get(CI->getType(), 0); if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); + return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -415,7 +506,11 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { CI->getType()); uint64_t Len1 = GetStringLength(Str1P); + if (Len1) + annotateDereferenceableBytes(CI, 0, Len1); uint64_t Len2 = GetStringLength(Str2P); + if (Len2) + annotateDereferenceableBytes(CI, 1, Len2); // strncmp to memcmp if (!HasStr1 && HasStr2) { @@ -437,20 +532,38 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilder<> &B) { + Value *Src = CI->getArgOperand(0); + ConstantInt *Size = dyn_cast(CI->getArgOperand(1)); + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen && Size) { + annotateDereferenceableBytes(CI, 0, SrcLen); + if (SrcLen <= Size->getZExtValue() + 1) + return emitStrDup(Src, B, TLI); + } + + return nullptr; +} + Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; - + + annotateNonNullBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, 1, Src, 1, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); + CallInst *NewCI = + B.CreateMemCpy(Dst, 1, Src, 1, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); + NewCI->setAttributes(CI->getAttributes()); return Dst; } @@ -464,7 +577,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; Type *PT = Callee->getFunctionType()->getParamType(0); @@ -474,7 +589,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, 1, Src, 1, LenV); + CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, LenV); + NewCI->setAttributes(CI->getAttributes()); return DstEnd; } @@ -482,37 +598,47 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); - Value *LenOp = CI->getArgOperand(2); + Value *Size = CI->getArgOperand(2); + annotateNonNullBasedOnAccess(CI, 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, 1); + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast(Size)) + Len = LengthArg->getZExtValue(); + else + return nullptr; + + // strncpy(x, y, 0) -> x + if (Len == 0) + return Dst; // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) + if (SrcLen) { + annotateDereferenceableBytes(CI, 1, SrcLen); + --SrcLen; // Unbias length. + } else { return nullptr; - --SrcLen; + } if (SrcLen == 0) { // strncpy(x, "", y) -> memset(align 1 x, '\0', y) - B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); + CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, 1); + AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0)); + NewCI->setAttributes(NewCI->getAttributes().addParamAttributes( + CI->getContext(), 0, ArgAttrs)); return Dst; } - uint64_t Len; - if (ConstantInt *LengthArg = dyn_cast(LenOp)) - Len = LengthArg->getZExtValue(); - else - return nullptr; - - if (Len == 0) - return Dst; // strncpy(x, y, 0) -> x - // Let strncpy handle the zero padding if (Len > SrcLen + 1) return nullptr; Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] - B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); - + CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); + NewCI->setAttributes(CI->getAttributes()); return Dst; } @@ -608,7 +734,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, } Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { - return optimizeStringLength(CI, B, 8); + if (Value *V = optimizeStringLength(CI, B, 8)) + return V; + annotateNonNullBasedOnAccess(CI, 0); + return nullptr; } Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { @@ -756,21 +885,35 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } + + annotateNonNullBasedOnAccess(CI, {0, 1}); + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilder<> &B) { + if (isKnownNonZero(CI->getOperand(2), DL)) + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { Value *SrcStr = CI->getArgOperand(0); + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); - ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); + ConstantInt *LenC = dyn_cast(Size); // memchr(x, y, 0) -> null - if (LenC && LenC->isZero()) - return Constant::getNullValue(CI->getType()); + if (LenC) { + if (LenC->isZero()) + return Constant::getNullValue(CI->getType()); + } else { + // From now on we need at least constant length and string. + return nullptr; + } - // From now on we need at least constant length and string. StringRef Str; - if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) + if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) return nullptr; // Truncate the string to LenC. If Str is smaller than LenC we will still only @@ -913,6 +1056,7 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, Ret = 1; return ConstantInt::get(CI->getType(), Ret); } + return nullptr; } @@ -925,12 +1069,19 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); // Handle constant lengths. - if (ConstantInt *LenC = dyn_cast(Size)) - if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS, - LenC->getZExtValue(), B, DL)) - return Res; + ConstantInt *LenC = dyn_cast(Size); + if (!LenC) + return nullptr; + // memcmp(d,s,0) -> 0 + if (LenC->getZExtValue() == 0) + return Constant::getNullValue(CI->getType()); + + if (Value *Res = + optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL)) + return Res; return nullptr; } @@ -939,9 +1090,9 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { return V; // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 - // `bcmp` can be more efficient than memcmp because it only has to know that - // there is a difference, not where it is. - if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) { + // bcmp can be more efficient than memcmp because it only has to know that + // there is a difference, not how different one is to the other. + if (TLI->has(LibFunc_bcmp) && isOnlyUsedInZeroEqualityComparison(CI)) { Value *LHS = CI->getArgOperand(0); Value *RHS = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); @@ -956,16 +1107,37 @@ Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + if (isa(CI)) + return nullptr; + // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0); + Value *N = CI->getArgOperand(2); + // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n + CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N); + NewCI->setAttributes(CI->getAttributes()); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); +} + Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + if (isa(CI)) + return nullptr; + // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = + B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } @@ -1003,25 +1175,29 @@ Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) { B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); const DataLayout &DL = Malloc->getModule()->getDataLayout(); IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); - Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), - Malloc->getArgOperand(0), Malloc->getAttributes(), - B, *TLI); - if (!Calloc) - return nullptr; - - Malloc->replaceAllUsesWith(Calloc); - eraseFromParent(Malloc); + if (Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), + Malloc->getArgOperand(0), + Malloc->getAttributes(), B, *TLI)) { + substituteInParent(Malloc, Calloc); + return Calloc; + } - return Calloc; + return nullptr; } Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); + if (isa(CI)) + return nullptr; + if (auto *Calloc = foldMallocMemset(CI, B)) return Calloc; // memset(p, v, n) -> llvm.memset(align 1 p, v, n) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, 1); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } @@ -1096,21 +1272,18 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, if (!V[0] || (isBinary && !V[1])) return nullptr; - StringRef CalleeNm = CalleeFn->getName(); - AttributeList CalleeAt = CalleeFn->getAttributes(); - bool CalleeIn = CalleeFn->isIntrinsic(); - // If call isn't an intrinsic, check that it isn't within a function with the // same name as the float version of this call, otherwise the result is an // infinite loop. For example, from MinGW-w64: // // float expf(float val) { return (float) exp((double) val); } - if (!CalleeIn) { - const Function *Fn = CI->getFunction(); - StringRef FnName = Fn->getName(); - if (FnName.back() == 'f' && - FnName.size() == (CalleeNm.size() + 1) && - FnName.startswith(CalleeNm)) + StringRef CalleeName = CalleeFn->getName(); + bool IsIntrinsic = CalleeFn->isIntrinsic(); + if (!IsIntrinsic) { + StringRef CallerName = CI->getFunction()->getName(); + if (!CallerName.empty() && CallerName.back() == 'f' && + CallerName.size() == (CalleeName.size() + 1) && + CallerName.startswith(CalleeName)) return nullptr; } @@ -1120,16 +1293,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, // g((double) float) -> (double) gf(float) Value *R; - if (CalleeIn) { + if (IsIntrinsic) { Module *M = CI->getModule(); Intrinsic::ID IID = CalleeFn->getIntrinsicID(); Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]); + } else { + AttributeList CalleeAttrs = CalleeFn->getAttributes(); + R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeName, B, CalleeAttrs) + : emitUnaryFloatFnCall(V[0], CalleeName, B, CalleeAttrs); } - else - R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt) - : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt); - return B.CreateFPExt(R, B.getDoubleTy()); } @@ -1234,9 +1407,25 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { return InnerChain[Exp]; } +// Return a properly extended 32-bit integer if the operation is an itofp. +static Value *getIntToFPVal(Value *I2F, IRBuilder<> &B) { + if (isa(I2F) || isa(I2F)) { + Value *Op = cast(I2F)->getOperand(0); + // Make sure that the exponent fits inside an int32_t, + // thus avoiding any range issues that FP has not. + unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits(); + if (BitWidth < 32 || + (BitWidth == 32 && isa(I2F))) + return isa(I2F) ? B.CreateSExt(Op, B.getInt32Ty()) + : B.CreateZExt(Op, B.getInt32Ty()); + } + + return nullptr; +} + /// Use exp{,2}(x * y) for pow(exp{,2}(x), y); -/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x); -/// exp2(log2(n) * x) for pow(n, x). +/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x); +/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x). Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); @@ -1269,9 +1458,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { StringRef ExpName; Intrinsic::ID ID; Value *ExpFn; - LibFunc LibFnFloat; - LibFunc LibFnDouble; - LibFunc LibFnLongDouble; + LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble; switch (LibFn) { default: @@ -1305,9 +1492,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { // elimination cannot be trusted to remove it, since it may have side // effects (e.g., errno). When the only consumer for the original // exp{,2}() is pow(), then it has to be explicitly erased. - BaseFn->replaceAllUsesWith(ExpFn); - eraseFromParent(BaseFn); - + substituteInParent(BaseFn, ExpFn); return ExpFn; } } @@ -1318,8 +1503,18 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { if (!match(Pow->getArgOperand(0), m_APFloat(BaseF))) return nullptr; + // pow(2.0, itofp(x)) -> ldexp(1.0, x) + if (match(Base, m_SpecificFP(2.0)) && + (isa(Expo) || isa(Expo)) && + hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + if (Value *ExpoI = getIntToFPVal(Expo, B)) + return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI, + LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, + B, Attrs); + } + // pow(2.0 ** n, x) -> exp2(n * x) - if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { + if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; @@ -1344,7 +1539,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { // pow(10.0, x) -> exp10(x) // TODO: There is no exp10() intrinsic yet, but some day there shall be one. if (match(Base, m_SpecificFP(10.0)) && - hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) + hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l, B, Attrs); @@ -1359,17 +1554,15 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { if (Log) { Value *FMul = B.CreateFMul(Log, Expo, "mul"); - if (Pow->doesNotAccessMemory()) { + if (Pow->doesNotAccessMemory()) return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), FMul, "exp2"); - } else { - if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l)) - return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs); - } + else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) + return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l, B, Attrs); } } + return nullptr; } @@ -1384,8 +1577,7 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, } // Otherwise, use the libcall for sqrt(). - if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) + if (hasFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) // TODO: We also should check that the target can in fact lower the sqrt() // libcall. We currently have no way to ask this question, so we ask if // the target has a sqrt() libcall, which is not exactly the same. @@ -1452,7 +1644,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { bool Ignored; // Bail out if simplifying libcalls to pow() is disabled. - if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) + if (!hasFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) return nullptr; // Propagate the math semantics from the call to any created instructions. @@ -1480,8 +1672,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { if (match(Expo, m_SpecificFP(-1.0))) return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); - // pow(x, 0.0) -> 1.0 - if (match(Expo, m_SpecificFP(0.0))) + // pow(x, +/-0.0) -> 1.0 + if (match(Expo, m_AnyZeroFP())) return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x @@ -1558,16 +1750,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // powf(x, itofp(y)) -> powi(x, y) if (AllowApprox && (isa(Expo) || isa(Expo))) { - Value *IntExpo = cast(Expo)->getOperand(0); - Value *NewExpo = nullptr; - unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); - if (isa(Expo) && BitWidth == 32) - NewExpo = IntExpo; - else if (BitWidth < 32) - NewExpo = isa(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) - : B.CreateZExt(IntExpo, B.getInt32Ty()); - if (NewExpo) - return createPowWithIntegerExponent(Base, NewExpo, M, B); + if (Value *ExpoI = getIntToFPVal(Expo, B)) + return createPowWithIntegerExponent(Base, ExpoI, M, B); } return Shrunk; @@ -1575,45 +1759,25 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; StringRef Name = Callee->getName(); - if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name)) + Value *Ret = nullptr; + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) && + hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); + Type *Ty = CI->getType(); Value *Op = CI->getArgOperand(0); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 - LibFunc LdExp = LibFunc_ldexpl; - if (Op->getType()->isFloatTy()) - LdExp = LibFunc_ldexpf; - else if (Op->getType()->isDoubleTy()) - LdExp = LibFunc_ldexp; - - if (TLI->has(LdExp)) { - Value *LdExpArg = nullptr; - if (SIToFPInst *OpC = dyn_cast(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); - } else if (UIToFPInst *OpC = dyn_cast(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); - } - - if (LdExpArg) { - Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f)); - if (!Op->getType()->isFloatTy()) - One = ConstantExpr::getFPExtend(One, Op->getType()); - - Module *M = CI->getModule(); - FunctionCallee NewCallee = M->getOrInsertFunction( - TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty()); - CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); - if (const Function *F = dyn_cast(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; - } + if ((isa(Op) || isa(Op)) && + hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + if (Value *Exp = getIntToFPVal(Op, B)) + return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, + LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, + B, CI->getCalledFunction()->getAttributes()); } + return Ret; } @@ -1644,48 +1808,155 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) }); } -Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); +Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) { + Function *LogFn = Log->getCalledFunction(); + AttributeList Attrs = LogFn->getAttributes(); + StringRef LogNm = LogFn->getName(); + Intrinsic::ID LogID = LogFn->getIntrinsicID(); + Module *Mod = Log->getModule(); + Type *Ty = Log->getType(); Value *Ret = nullptr; - StringRef Name = Callee->getName(); - if (UnsafeFPShrink && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!CI->isFast()) - return Ret; - Value *Op1 = CI->getArgOperand(0); - auto *OpC = dyn_cast(Op1); + if (UnsafeFPShrink && hasFloatVersion(LogNm)) + Ret = optimizeUnaryDoubleFP(Log, B, true); // The earlier call must also be 'fast' in order to do these transforms. - if (!OpC || !OpC->isFast()) + CallInst *Arg = dyn_cast(Log->getArgOperand(0)); + if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse()) return Ret; - // log(pow(x,y)) -> y*log(x) - // This is only applicable to log, log2, log10. - if (Name != "log" && Name != "log2" && Name != "log10") + LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb; + + // This is only applicable to log(), log2(), log10(). + if (TLI->getLibFunc(LogNm, LogLb)) + switch (LogLb) { + case LibFunc_logf: + LogID = Intrinsic::log; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log: + LogID = Intrinsic::log; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_logl: + LogID = Intrinsic::log; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + case LibFunc_log2f: + LogID = Intrinsic::log2; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log2: + LogID = Intrinsic::log2; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_log2l: + LogID = Intrinsic::log2; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + case LibFunc_log10f: + LogID = Intrinsic::log10; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log10: + LogID = Intrinsic::log10; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_log10l: + LogID = Intrinsic::log10; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + default: + return Ret; + } + else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 || + LogID == Intrinsic::log10) { + if (Ty->getScalarType()->isFloatTy()) { + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + } else if (Ty->getScalarType()->isDoubleTy()) { + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + } else + return Ret; + } else return Ret; IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - FMF.setFast(); - B.setFastMathFlags(FMF); + B.setFastMathFlags(FastMathFlags::getFast()); + + Intrinsic::ID ArgID = Arg->getIntrinsicID(); + LibFunc ArgLb = NotLibFunc; + TLI->getLibFunc(Arg, ArgLb); + + // log(pow(x,y)) -> y*log(x) + if (ArgLb == PowLb || ArgID == Intrinsic::pow) { + Value *LogX = + Log->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), + Arg->getOperand(0), "log") + : emitUnaryFloatFnCall(Arg->getOperand(0), LogNm, B, Attrs); + Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul"); + // Since pow() may have side effects, e.g. errno, + // dead code elimination may not be trusted to remove it. + substituteInParent(Arg, MulY); + return MulY; + } + + // log(exp{,2,10}(y)) -> y*log({e,2,10}) + // TODO: There is no exp10() intrinsic yet. + if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb || + ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) { + Constant *Eul; + if (ArgLb == ExpLb || ArgID == Intrinsic::exp) + // FIXME: Add more precise value of e for long double. + Eul = ConstantFP::get(Log->getType(), numbers::e); + else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2) + Eul = ConstantFP::get(Log->getType(), 2.0); + else + Eul = ConstantFP::get(Log->getType(), 10.0); + Value *LogE = Log->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), + Eul, "log") + : emitUnaryFloatFnCall(Eul, LogNm, B, Attrs); + Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul"); + // Since exp() may have side effects, e.g. errno, + // dead code elimination may not be trusted to remove it. + substituteInParent(Arg, MulY); + return MulY; + } - LibFunc Func; - Function *F = OpC->getCalledFunction(); - if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && - Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow)) - return B.CreateFMul(OpC->getArgOperand(1), - emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, - Callee->getAttributes()), "mul"); - - // log(exp2(y)) -> y*log(2) - if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && - TLI->has(Func) && Func == LibFunc_exp2) - return B.CreateFMul( - OpC->getArgOperand(0), - emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), - Callee->getName(), B, Callee->getAttributes()), - "logmul"); return Ret; } @@ -2137,6 +2408,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { return New; } + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } @@ -2231,21 +2503,21 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { return New; } + annotateNonNullBasedOnAccess(CI, {0, 1}); return nullptr; } Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) - return nullptr; - // Check for size ConstantInt *Size = dyn_cast(CI->getArgOperand(1)); if (!Size) return nullptr; uint64_t N = Size->getZExtValue(); + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) + return nullptr; // If we just have a format string (nothing else crazy) transform it. if (CI->getNumArgOperands() == 3) { @@ -2318,6 +2590,8 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) { return V; } + if (isKnownNonZero(CI->getOperand(1), DL)) + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } @@ -2503,6 +2777,7 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { + annotateNonNullBasedOnAccess(CI, 0); if (!CI->use_empty()) return nullptr; @@ -2515,6 +2790,12 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) { + // bcopy(src, dst, n) -> llvm.memmove(dst, src, n) + return B.CreateMemMove(CI->getArgOperand(1), 1, CI->getArgOperand(0), 1, + CI->getArgOperand(2)); +} + bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { LibFunc Func; SmallString<20> FloatFuncName = FuncName; @@ -2557,6 +2838,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrLen(CI, Builder); case LibFunc_strpbrk: return optimizeStrPBrk(CI, Builder); + case LibFunc_strndup: + return optimizeStrNDup(CI, Builder); case LibFunc_strtol: case LibFunc_strtod: case LibFunc_strtof: @@ -2573,12 +2856,16 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrStr(CI, Builder); case LibFunc_memchr: return optimizeMemChr(CI, Builder); + case LibFunc_memrchr: + return optimizeMemRChr(CI, Builder); case LibFunc_bcmp: return optimizeBCmp(CI, Builder); case LibFunc_memcmp: return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); + case LibFunc_mempcpy: + return optimizeMemPCpy(CI, Builder); case LibFunc_memmove: return optimizeMemMove(CI, Builder); case LibFunc_memset: @@ -2587,6 +2874,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeRealloc(CI, Builder); case LibFunc_wcslen: return optimizeWcslen(CI, Builder); + case LibFunc_bcopy: + return optimizeBCopy(CI, Builder); default: break; } @@ -2626,11 +2915,21 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_sqrt: case LibFunc_sqrtl: return optimizeSqrt(CI, Builder); + case LibFunc_logf: case LibFunc_log: + case LibFunc_logl: + case LibFunc_log10f: case LibFunc_log10: + case LibFunc_log10l: + case LibFunc_log1pf: case LibFunc_log1p: + case LibFunc_log1pl: + case LibFunc_log2f: case LibFunc_log2: + case LibFunc_log2l: + case LibFunc_logbf: case LibFunc_logb: + case LibFunc_logbl: return optimizeLog(CI, Builder); case LibFunc_tan: case LibFunc_tanf: @@ -2721,10 +3020,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { case Intrinsic::exp2: return optimizeExp2(CI, Builder); case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: return optimizeLog(CI, Builder); case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); // TODO: Use foldMallocMemset() with memset intrinsic. + case Intrinsic::memset: + return optimizeMemSet(CI, Builder); + case Intrinsic::memcpy: + return optimizeMemCpy(CI, Builder); + case Intrinsic::memmove: + return optimizeMemMove(CI, Builder); default: return nullptr; } @@ -2740,8 +3047,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { IRBuilder<> TmpBuilder(SimplifiedCI); if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) { // If we were able to further simplify, remove the now redundant call. - SimplifiedCI->replaceAllUsesWith(V); - eraseFromParent(SimplifiedCI); + substituteInParent(SimplifiedCI, V); return V; } } @@ -2898,7 +3204,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp)); // If the length is 0 we don't know how long it is and so we can't // remove the check. - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, *StrOp, Len); + else return false; return ObjSizeCI->getZExtValue() >= Len; } @@ -2915,8 +3223,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2)) { - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = B.CreateMemCpy( + CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2925,8 +3234,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2)) { - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = B.CreateMemMove( + CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2938,7 +3248,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, if (isFortifiedCallFoldable(CI, 3, 2)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + CallInst *NewCI = + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2974,7 +3286,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; Type *SizeTTy = DL.getIntPtrType(CI->getContext()); diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 456724779b43..5d380dcf231c 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -380,11 +380,11 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, // TODO see if there is a more elegant solution to selecting the rewrite // descriptor type if (!Target.empty()) - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Target, Naked)); else DL->push_back( - llvm::make_unique(Source, Transform)); + std::make_unique(Source, Transform)); return true; } @@ -442,11 +442,11 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, } if (!Target.empty()) - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Target, /*Naked*/ false)); else - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Transform)); return true; @@ -505,11 +505,11 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, } if (!Target.empty()) - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Target, /*Naked*/ false)); else - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Transform)); return true; diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index a77bf50fe10b..591e1fd2dbee 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -431,7 +431,7 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal); NewLoad->takeName(SrcVal); - NewLoad->setAlignment(SrcVal->getAlignment()); + NewLoad->setAlignment(MaybeAlign(SrcVal->getAlignment())); LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fbc3407c301f..da68d3713b40 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -27,8 +27,8 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" @@ -66,7 +66,7 @@ struct WorklistEntry { enum EntryKind { MapGlobalInit, MapAppendingVar, - MapGlobalAliasee, + MapGlobalIndirectSymbol, RemapFunction }; struct GVInitTy { @@ -77,9 +77,9 @@ struct WorklistEntry { GlobalVariable *GV; Constant *InitPrefix; }; - struct GlobalAliaseeTy { - GlobalAlias *GA; - Constant *Aliasee; + struct GlobalIndirectSymbolTy { + GlobalIndirectSymbol *GIS; + Constant *Target; }; unsigned Kind : 2; @@ -89,7 +89,7 @@ struct WorklistEntry { union { GVInitTy GVInit; AppendingGVTy AppendingGV; - GlobalAliaseeTy GlobalAliasee; + GlobalIndirectSymbolTy GlobalIndirectSymbol; Function *RemapF; } Data; }; @@ -161,8 +161,8 @@ public: bool IsOldCtorDtor, ArrayRef NewMembers, unsigned MCID); - void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID); + void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target, + unsigned MCID); void scheduleRemapFunction(Function &F, unsigned MCID); void flush(); @@ -172,7 +172,7 @@ private: void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, bool IsOldCtorDtor, ArrayRef NewMembers); - void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee); + void mapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target); void remapFunction(Function &F, ValueToValueMapTy &VM); ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; } @@ -774,20 +774,6 @@ Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) { return *getMappedOp(&FirstN); } -namespace { - -struct MapMetadataDisabler { - ValueToValueMapTy &VM; - - MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) { - VM.disableMapMetadata(); - } - - ~MapMetadataDisabler() { VM.enableMapMetadata(); } -}; - -} // end anonymous namespace - Optional Mapper::mapSimpleMetadata(const Metadata *MD) { // If the value already exists in the map, use it. if (Optional NewMD = getVM().getMappedMD(MD)) @@ -802,9 +788,6 @@ Optional Mapper::mapSimpleMetadata(const Metadata *MD) { return const_cast(MD); if (auto *CMD = dyn_cast(MD)) { - // Disallow recursion into metadata mapping through mapValue. - MapMetadataDisabler MMD(getVM()); - // Don't memoize ConstantAsMetadata. Instead of lasting until the // LLVMContext is destroyed, they can be deleted when the GlobalValue they // reference is destructed. These aren't super common, so the extra @@ -846,9 +829,9 @@ void Mapper::flush() { AppendingInits.resize(PrefixSize); break; } - case WorklistEntry::MapGlobalAliasee: - E.Data.GlobalAliasee.GA->setAliasee( - mapConstant(E.Data.GlobalAliasee.Aliasee)); + case WorklistEntry::MapGlobalIndirectSymbol: + E.Data.GlobalIndirectSymbol.GIS->setIndirectSymbol( + mapConstant(E.Data.GlobalIndirectSymbol.Target)); break; case WorklistEntry::RemapFunction: remapFunction(*E.Data.RemapF); @@ -1041,16 +1024,16 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, AppendingInits.append(NewMembers.begin(), NewMembers.end()); } -void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID) { - assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule"); +void Mapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, + Constant &Target, unsigned MCID) { + assert(AlreadyScheduled.insert(&GIS).second && "Should not reschedule"); assert(MCID < MCs.size() && "Invalid mapping context"); WorklistEntry WE; - WE.Kind = WorklistEntry::MapGlobalAliasee; + WE.Kind = WorklistEntry::MapGlobalIndirectSymbol; WE.MCID = MCID; - WE.Data.GlobalAliasee.GA = &GA; - WE.Data.GlobalAliasee.Aliasee = &Aliasee; + WE.Data.GlobalIndirectSymbol.GIS = &GIS; + WE.Data.GlobalIndirectSymbol.Target = &Target; Worklist.push_back(WE); } @@ -1147,9 +1130,10 @@ void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV, GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID); } -void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID) { - getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID); +void ValueMapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, + Constant &Target, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapGlobalIndirectSymbol(GIS, Target, MCID); } void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) { diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 4273080ddd91..f44976c723ec 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -147,7 +147,7 @@ private: static const unsigned MaxDepth = 3; bool isConsecutiveAccess(Value *A, Value *B); - bool areConsecutivePointers(Value *PtrA, Value *PtrB, const APInt &PtrDelta, + bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth = 0) const; bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth) const; @@ -336,14 +336,29 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { } bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, - const APInt &PtrDelta, - unsigned Depth) const { + APInt PtrDelta, unsigned Depth) const { unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType()); APInt OffsetA(PtrBitWidth, 0); APInt OffsetB(PtrBitWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); + unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType()); + + if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType())) + return false; + + // In case if we have to shrink the pointer + // stripAndAccumulateInBoundsConstantOffsets should properly handle a + // possible overflow and the value should fit into a smallest data type + // used in the cast/gep chain. + assert(OffsetA.getMinSignedBits() <= NewPtrBitWidth && + OffsetB.getMinSignedBits() <= NewPtrBitWidth); + + OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth); + OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth); + PtrDelta = PtrDelta.sextOrTrunc(NewPtrBitWidth); + APInt OffsetDelta = OffsetB - OffsetA; // Check if they are based on the same pointer. That makes the offsets @@ -650,7 +665,7 @@ Vectorizer::getVectorizablePrefix(ArrayRef Chain) { // We can ignore the alias if the we have a load store pair and the load // is known to be invariant. The load cannot be clobbered by the store. auto IsInvariantLoad = [](const LoadInst *LI) -> bool { - return LI->getMetadata(LLVMContext::MD_invariant_load); + return LI->hasMetadata(LLVMContext::MD_invariant_load); }; // We can ignore the alias as long as the load comes before the store, @@ -1077,7 +1092,7 @@ bool Vectorizer::vectorizeLoadChain( LoadInst *L0 = cast(Chain[0]); // If the vector has an int element, default to int for the whole load. - Type *LoadTy; + Type *LoadTy = nullptr; for (const auto &V : Chain) { LoadTy = cast(V)->getType(); if (LoadTy->isIntOrIntVectorTy()) @@ -1089,6 +1104,7 @@ bool Vectorizer::vectorizeLoadChain( break; } } + assert(LoadTy && "Can't determine LoadInst type from chain"); unsigned Sz = DL.getTypeSizeInBits(LoadTy); unsigned AS = L0->getPointerAddressSpace(); diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 6ef8dc2d3cd7..f43842be5357 100644 --- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -13,7 +13,10 @@ // pass. It should be easy to create an analysis pass around it if there // is a need (but D45420 needs to happen first). // +#include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IntrinsicInst.h" @@ -47,38 +50,6 @@ static const unsigned MaxInterleaveFactor = 16; namespace llvm { -#ifndef NDEBUG -static void debugVectorizationFailure(const StringRef DebugMsg, - Instruction *I) { - dbgs() << "LV: Not vectorizing: " << DebugMsg; - if (I != nullptr) - dbgs() << " " << *I; - else - dbgs() << '.'; - dbgs() << '\n'; -} -#endif - -OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, - StringRef RemarkName, - Loop *TheLoop, - Instruction *I) { - Value *CodeRegion = TheLoop->getHeader(); - DebugLoc DL = TheLoop->getStartLoc(); - - if (I) { - CodeRegion = I->getParent(); - // If there is no debug location attached to the instruction, revert back to - // using the loop's. - if (I->getDebugLoc()) - DL = I->getDebugLoc(); - } - - OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); - R << "loop not vectorized: "; - return R; -} - bool LoopVectorizeHints::Hint::validate(unsigned Val) { switch (Kind) { case HK_WIDTH: @@ -88,6 +59,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) { case HK_FORCE: return (Val <= 1); case HK_ISVECTORIZED: + case HK_PREDICATE: return (Val == 0 || Val == 1); } return false; @@ -99,7 +71,9 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), - IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) { + IsVectorized("isvectorized", 0, HK_ISVECTORIZED), + Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L), + ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -250,7 +224,7 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { return; unsigned Val = C->getZExtValue(); - Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized}; + Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; for (auto H : Hints) { if (Name == H->Name) { if (H->validate(Val)) @@ -435,7 +409,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); - int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); + bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); if (Stride == 1 || Stride == -1) return Stride; return 0; @@ -445,14 +420,6 @@ bool LoopVectorizationLegality::isUniform(Value *V) { return LAI->isUniform(V); } -void LoopVectorizationLegality::reportVectorizationFailure( - const StringRef DebugMsg, const StringRef OREMsg, - const StringRef ORETag, Instruction *I) const { - LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I)); - ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(), - ORETag, TheLoop, I) << OREMsg); -} - bool LoopVectorizationLegality::canVectorizeOuterLoop() { assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); // Store the result and return it at the end instead of exiting early, in case @@ -467,7 +434,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { if (!Br) { reportVectorizationFailure("Unsupported basic block terminator", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -486,7 +453,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { !LI->isLoopHeader(Br->getSuccessor(1))) { reportVectorizationFailure("Unsupported conditional branch", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -500,7 +467,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { TheLoop /*context outer loop*/)) { reportVectorizationFailure("Outer loop contains divergent loops", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -511,7 +478,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { if (!setupOuterLoopInductions()) { reportVectorizationFailure("Unsupported outer loop Phi(s)", "Unsupported outer loop Phi(s)", - "UnsupportedPhi"); + "UnsupportedPhi", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -618,7 +585,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { !PhiTy->isPointerTy()) { reportVectorizationFailure("Found a non-int non-pointer PHI", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); return false; } @@ -631,6 +598,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Unsafe cyclic dependencies with header phis are identified during // legalization for reduction, induction and first order // recurrences. + AllowedExit.insert(&I); continue; } @@ -638,7 +606,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (Phi->getNumIncomingValues() != 2) { reportVectorizationFailure("Found an invalid PHI", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood", Phi); + "CFGNotUnderstood", ORE, TheLoop, Phi); return false; } @@ -690,7 +658,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { reportVectorizationFailure("Found an unidentified PHI", "value that could not be identified as " "reduction is used outside the loop", - "NonReductionValueUsedOutsideLoop", Phi); + "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); return false; } // end of PHI handling @@ -721,11 +689,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { "library call cannot be vectorized. " "Try compiling with -fno-math-errno, -ffast-math, " "or similar flags", - "CantVectorizeLibcall", CI); + "CantVectorizeLibcall", ORE, TheLoop, CI); } else { reportVectorizationFailure("Found a non-intrinsic callsite", "call instruction cannot be vectorized", - "CantVectorizeLibcall", CI); + "CantVectorizeLibcall", ORE, TheLoop, CI); } return false; } @@ -740,7 +708,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { reportVectorizationFailure("Found unvectorizable intrinsic", "intrinsic instruction cannot be vectorized", - "CantVectorizeIntrinsic", CI); + "CantVectorizeIntrinsic", ORE, TheLoop, CI); return false; } } @@ -753,7 +721,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { isa(I)) { reportVectorizationFailure("Found unvectorizable type", "instruction return type cannot be vectorized", - "CantVectorizeInstructionReturnType", &I); + "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); return false; } @@ -763,7 +731,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!VectorType::isValidElementType(T)) { reportVectorizationFailure("Store instruction cannot be vectorized", "store instruction cannot be vectorized", - "CantVectorizeStore", ST); + "CantVectorizeStore", ORE, TheLoop, ST); return false; } @@ -773,12 +741,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Arbitrarily try a vector of 2 elements. Type *VecTy = VectorType::get(T, /*NumElements=*/2); assert(VecTy && "did not find vectorized version of stored type"); - unsigned Alignment = getLoadStoreAlignment(ST); - if (!TTI->isLegalNTStore(VecTy, Alignment)) { + const MaybeAlign Alignment = getLoadStoreAlignment(ST); + assert(Alignment && "Alignment should be set"); + if (!TTI->isLegalNTStore(VecTy, *Alignment)) { reportVectorizationFailure( "nontemporal store instruction cannot be vectorized", "nontemporal store instruction cannot be vectorized", - "CantVectorizeNontemporalStore", ST); + "CantVectorizeNontemporalStore", ORE, TheLoop, ST); return false; } } @@ -789,12 +758,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // supported on the target (arbitrarily try a vector of 2 elements). Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); assert(VecTy && "did not find vectorized version of load type"); - unsigned Alignment = getLoadStoreAlignment(LD); - if (!TTI->isLegalNTLoad(VecTy, Alignment)) { + const MaybeAlign Alignment = getLoadStoreAlignment(LD); + assert(Alignment && "Alignment should be set"); + if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { reportVectorizationFailure( "nontemporal load instruction cannot be vectorized", "nontemporal load instruction cannot be vectorized", - "CantVectorizeNontemporalLoad", LD); + "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); return false; } } @@ -823,7 +793,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } reportVectorizationFailure("Value cannot be used outside the loop", "value cannot be used outside the loop", - "ValueUsedOutsideLoop", &I); + "ValueUsedOutsideLoop", ORE, TheLoop, &I); return false; } } // next instr. @@ -833,12 +803,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (Inductions.empty()) { reportVectorizationFailure("Did not find one integer induction var", "loop induction variable could not be identified", - "NoInductionVariable"); + "NoInductionVariable", ORE, TheLoop); return false; } else if (!WidestIndTy) { reportVectorizationFailure("Did not find one integer induction var", "integer loop induction variable could not be identified", - "NoIntegerInductionVariable"); + "NoIntegerInductionVariable", ORE, TheLoop); return false; } else { LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); @@ -869,7 +839,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { reportVectorizationFailure("Stores to a uniform address", "write to a loop invariant address could not be vectorized", - "CantVectorizeStoreToLoopInvariantAddress"); + "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); return false; } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); @@ -905,7 +875,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { } bool LoopVectorizationLegality::blockCanBePredicated( - BasicBlock *BB, SmallPtrSetImpl &SafePtrs) { + BasicBlock *BB, SmallPtrSetImpl &SafePtrs, bool PreserveGuards) { const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); for (Instruction &I : *BB) { @@ -924,7 +894,7 @@ bool LoopVectorizationLegality::blockCanBePredicated( // !llvm.mem.parallel_loop_access implies if-conversion safety. // Otherwise, record that the load needs (real or emulated) masking // and let the cost model decide. - if (!IsAnnotatedParallel) + if (!IsAnnotatedParallel || PreserveGuards) MaskedOp.insert(LI); continue; } @@ -953,23 +923,41 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!EnableIfConversion) { reportVectorizationFailure("If-conversion is disabled", "if-conversion is disabled", - "IfConversionDisabled"); + "IfConversionDisabled", + ORE, TheLoop); return false; } assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); - // A list of pointers that we can safely read and write to. + // A list of pointers which are known to be dereferenceable within scope of + // the loop body for each iteration of the loop which executes. That is, + // the memory pointed to can be dereferenced (with the access size implied by + // the value's type) unconditionally within the loop header without + // introducing a new fault. SmallPtrSet SafePointes; // Collect safe addresses. for (BasicBlock *BB : TheLoop->blocks()) { - if (blockNeedsPredication(BB)) + if (!blockNeedsPredication(BB)) { + for (Instruction &I : *BB) + if (auto *Ptr = getLoadStorePointerOperand(&I)) + SafePointes.insert(Ptr); continue; + } - for (Instruction &I : *BB) - if (auto *Ptr = getLoadStorePointerOperand(&I)) - SafePointes.insert(Ptr); + // For a block which requires predication, a address may be safe to access + // in the loop w/o predication if we can prove dereferenceability facts + // sufficient to ensure it'll never fault within the loop. For the moment, + // we restrict this to loads; stores are more complicated due to + // concurrency restrictions. + ScalarEvolution &SE = *PSE.getSE(); + for (Instruction &I : *BB) { + LoadInst *LI = dyn_cast(&I); + if (LI && !mustSuppressSpeculation(*LI) && + isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) + SafePointes.insert(LI->getPointerOperand()); + } } // Collect the blocks that need predication. @@ -979,7 +967,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!isa(BB->getTerminator())) { reportVectorizationFailure("Loop contains a switch statement", "loop contains a switch statement", - "LoopContainsSwitch", BB->getTerminator()); + "LoopContainsSwitch", ORE, TheLoop, + BB->getTerminator()); return false; } @@ -989,14 +978,16 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { reportVectorizationFailure( "Control flow cannot be substituted for a select", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } else if (BB != Header && !canIfConvertPHINodes(BB)) { reportVectorizationFailure( "Control flow cannot be substituted for a select", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } @@ -1026,7 +1017,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (!Lp->getLoopPreheader()) { reportVectorizationFailure("Loop doesn't have a legal pre-header", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1037,7 +1028,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (Lp->getNumBackEdges() != 1) { reportVectorizationFailure("The loop must have a single backedge", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1048,7 +1039,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (!Lp->getExitingBlock()) { reportVectorizationFailure("The loop must have an exiting block", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1061,7 +1052,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (Lp->getExitingBlock() != Lp->getLoopLatch()) { reportVectorizationFailure("The exiting block is not the loop latch", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1124,7 +1115,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { if (!canVectorizeOuterLoop()) { reportVectorizationFailure("Unsupported outer loop", "unsupported outer loop", - "UnsupportedOuterLoop"); + "UnsupportedOuterLoop", + ORE, TheLoop); // TODO: Implement DoExtraAnalysis when subsequent legal checks support // outer loops. return false; @@ -1176,7 +1168,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { reportVectorizationFailure("Too many SCEV checks needed", "Too many SCEV assumptions need to be made and checked at runtime", - "TooManySCEVRunTimeChecks"); + "TooManySCEVRunTimeChecks", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1190,7 +1182,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return Result; } -bool LoopVectorizationLegality::canFoldTailByMasking() { +bool LoopVectorizationLegality::prepareToFoldTailByMasking() { LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); @@ -1199,22 +1191,21 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { "No primary induction, cannot fold tail by masking", "Missing a primary induction variable in the loop, which is " "needed in order to fold tail by masking as required.", - "NoPrimaryInduction"); + "NoPrimaryInduction", ORE, TheLoop); return false; } - // TODO: handle reductions when tail is folded by masking. - if (!Reductions.empty()) { - reportVectorizationFailure( - "Loop has reductions, cannot fold tail by masking", - "Cannot fold tail by masking in the presence of reductions.", - "ReductionFoldingTailByMasking"); - return false; - } + SmallPtrSet ReductionLiveOuts; - // TODO: handle outside users when tail is folded by masking. + for (auto &Reduction : *getReductionVars()) + ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); + + // TODO: handle non-reduction outside users when tail is folded by masking. for (auto *AE : AllowedExit) { - // Check that all users of allowed exit values are inside the loop. + // Check that all users of allowed exit values are inside the loop or + // are the live-out of a reduction. + if (ReductionLiveOuts.count(AE)) + continue; for (User *U : AE->users()) { Instruction *UI = cast(U); if (TheLoop->contains(UI)) @@ -1222,7 +1213,7 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { reportVectorizationFailure( "Cannot fold tail by masking, loop has an outside user for", "Cannot fold tail by masking in the presence of live outs.", - "LiveOutFoldingTailByMasking", UI); + "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); return false; } } @@ -1233,11 +1224,12 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { // Check and mark all blocks for predication, including those that ordinarily // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { - if (!blockCanBePredicated(BB, SafePointers)) { + if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { reportVectorizationFailure( "Cannot fold tail by masking as required", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } diff --git a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 97077cce83e3..a5e85f27fabf 100644 --- a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -228,11 +228,11 @@ public: /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. - Optional plan(bool OptForSize, unsigned UserVF); + Optional plan(unsigned UserVF); /// Use the VPlan-native path to plan how to best vectorize, return the best /// VF and its cost. - VectorizationFactor planInVPlanNativePath(bool OptForSize, unsigned UserVF); + VectorizationFactor planInVPlanNativePath(unsigned UserVF); /// Finalize the best decision and dispose of all other VPlans. void setBestPlan(unsigned VF, unsigned UF); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 46265e3f3e13..8f0bf70f873c 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -177,6 +177,14 @@ static cl::opt TinyTripCountVectorThreshold( "value are vectorized only if no scalar iteration overheads " "are incurred.")); +// Indicates that an epilogue is undesired, predication is preferred. +// This means that the vectorizer will try to fold the loop-tail (epilogue) +// into the loop and predicate the loop body accordingly. +static cl::opt PreferPredicateOverEpilog( + "prefer-predicate-over-epilog", cl::init(false), cl::Hidden, + cl::desc("Indicate that an epilogue is undesired, predication should be " + "used instead.")); + static cl::opt MaximizeBandwidth( "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " @@ -347,6 +355,29 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { : ConstantFP::get(Ty, C); } +/// Returns "best known" trip count for the specified loop \p L as defined by +/// the following procedure: +/// 1) Returns exact trip count if it is known. +/// 2) Returns expected trip count according to profile data if any. +/// 3) Returns upper bound estimate if it is known. +/// 4) Returns None if all of the above failed. +static Optional getSmallBestKnownTC(ScalarEvolution &SE, Loop *L) { + // Check if exact trip count is known. + if (unsigned ExpectedTC = SE.getSmallConstantTripCount(L)) + return ExpectedTC; + + // Check if there is an expected trip count available from profile data. + if (LoopVectorizeWithBlockFrequency) + if (auto EstimatedTC = getLoopEstimatedTripCount(L)) + return EstimatedTC; + + // Check if upper bound estimate is known. + if (unsigned ExpectedTC = SE.getSmallConstantMaxTripCount(L)) + return ExpectedTC; + + return None; +} + namespace llvm { /// InnerLoopVectorizer vectorizes loops which contain only one basic @@ -795,6 +826,59 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) B.SetCurrentDebugLocation(DebugLoc()); } +/// Write a record \p DebugMsg about vectorization failure to the debug +/// output stream. If \p I is passed, it is an instruction that prevents +/// vectorization. +#ifndef NDEBUG +static void debugVectorizationFailure(const StringRef DebugMsg, + Instruction *I) { + dbgs() << "LV: Not vectorizing: " << DebugMsg; + if (I != nullptr) + dbgs() << " " << *I; + else + dbgs() << '.'; + dbgs() << '\n'; +} +#endif + +/// Create an analysis remark that explains why vectorization failed +/// +/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p +/// RemarkName is the identifier for the remark. If \p I is passed it is an +/// instruction that prevents vectorization. Otherwise \p TheLoop is used for +/// the location of the remark. \return the remark object that can be +/// streamed to. +static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName, + StringRef RemarkName, Loop *TheLoop, Instruction *I) { + Value *CodeRegion = TheLoop->getHeader(); + DebugLoc DL = TheLoop->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); + R << "loop not vectorized: "; + return R; +} + +namespace llvm { + +void reportVectorizationFailure(const StringRef DebugMsg, + const StringRef OREMsg, const StringRef ORETag, + OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I) { + LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I)); + LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE); + ORE->emit(createLVAnalysis(Hints.vectorizeAnalysisPassName(), + ORETag, TheLoop, I) << OREMsg); +} + +} // end namespace llvm + #ifndef NDEBUG /// \return string containing a file name and a line # for the given loop. static std::string getDebugLocString(const Loop *L) { @@ -836,6 +920,26 @@ void InnerLoopVectorizer::addMetadata(ArrayRef To, namespace llvm { +// Loop vectorization cost-model hints how the scalar epilogue loop should be +// lowered. +enum ScalarEpilogueLowering { + + // The default: allowing scalar epilogues. + CM_ScalarEpilogueAllowed, + + // Vectorization with OptForSize: don't allow epilogues. + CM_ScalarEpilogueNotAllowedOptSize, + + // A special case of vectorisation with OptForSize: loops with a very small + // trip count are considered for vectorization under OptForSize, thereby + // making sure the cost of their loop body is dominant, free of runtime + // guards and scalar iteration overheads. + CM_ScalarEpilogueNotAllowedLowTripLoop, + + // Loop hint predicate indicating an epilogue is undesired. + CM_ScalarEpilogueNotNeededUsePredicate +}; + /// LoopVectorizationCostModel - estimates the expected speedups due to /// vectorization. /// In many cases vectorization is not profitable. This can happen because of @@ -845,20 +949,26 @@ namespace llvm { /// different operations. class LoopVectorizationCostModel { public: - LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE, - LoopInfo *LI, LoopVectorizationLegality *Legal, + LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L, + PredicatedScalarEvolution &PSE, LoopInfo *LI, + LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, const Function *F, const LoopVectorizeHints *Hints, InterleavedAccessInfo &IAI) - : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), - AC(AC), ORE(ORE), TheFunction(F), Hints(Hints), InterleaveInfo(IAI) {} + : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), + TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F), + Hints(Hints), InterleaveInfo(IAI) {} /// \return An upper bound for the vectorization factor, or None if /// vectorization and interleaving should be avoided up front. - Optional computeMaxVF(bool OptForSize); + Optional computeMaxVF(); + + /// \return True if runtime checks are required for vectorization, and false + /// otherwise. + bool runtimeChecksRequired(); /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to MaxVF. If UserVF is not ZERO @@ -881,8 +991,7 @@ public: /// If interleave count has been specified by metadata it will be returned. /// Otherwise, the interleave count is computed and returned. VF and LoopCost /// are the selected vectorization factor and the cost of the selected VF. - unsigned selectInterleaveCount(bool OptForSize, unsigned VF, - unsigned LoopCost); + unsigned selectInterleaveCount(unsigned VF, unsigned LoopCost); /// Memory access instruction may be vectorized in more than one way. /// Form of instruction after vectorization depends on cost. @@ -897,10 +1006,11 @@ public: /// of a loop. struct RegisterUsage { /// Holds the number of loop invariant values that are used in the loop. - unsigned LoopInvariantRegs; - + /// The key is ClassID of target-provided register class. + SmallMapVector LoopInvariantRegs; /// Holds the maximum number of concurrent live intervals in the loop. - unsigned MaxLocalUsers; + /// The key is ClassID of target-provided register class. + SmallMapVector MaxLocalUsers; }; /// \return Returns information about the register usages of the loop for the @@ -1080,14 +1190,16 @@ public: /// Returns true if the target machine supports masked store operation /// for the given \p DataType and kind of access to \p Ptr. - bool isLegalMaskedStore(Type *DataType, Value *Ptr) { - return Legal->isConsecutivePtr(Ptr) && TTI.isLegalMaskedStore(DataType); + bool isLegalMaskedStore(Type *DataType, Value *Ptr, MaybeAlign Alignment) { + return Legal->isConsecutivePtr(Ptr) && + TTI.isLegalMaskedStore(DataType, Alignment); } /// Returns true if the target machine supports masked load operation /// for the given \p DataType and kind of access to \p Ptr. - bool isLegalMaskedLoad(Type *DataType, Value *Ptr) { - return Legal->isConsecutivePtr(Ptr) && TTI.isLegalMaskedLoad(DataType); + bool isLegalMaskedLoad(Type *DataType, Value *Ptr, MaybeAlign Alignment) { + return Legal->isConsecutivePtr(Ptr) && + TTI.isLegalMaskedLoad(DataType, Alignment); } /// Returns true if the target machine supports masked scatter operation @@ -1157,11 +1269,14 @@ public: /// to handle accesses with gaps, and there is nothing preventing us from /// creating a scalar epilogue. bool requiresScalarEpilogue() const { - return IsScalarEpilogueAllowed && InterleaveInfo.requiresScalarEpilogue(); + return isScalarEpilogueAllowed() && InterleaveInfo.requiresScalarEpilogue(); } - /// Returns true if a scalar epilogue is not allowed due to optsize. - bool isScalarEpilogueAllowed() const { return IsScalarEpilogueAllowed; } + /// Returns true if a scalar epilogue is not allowed due to optsize or a + /// loop hint annotation. + bool isScalarEpilogueAllowed() const { + return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed; + } /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { return FoldTailByMasking; } @@ -1187,7 +1302,7 @@ private: /// \return An upper bound for the vectorization factor, larger than zero. /// One is returned if vectorization should best be avoided due to cost. - unsigned computeFeasibleMaxVF(bool OptForSize, unsigned ConstTripCount); + unsigned computeFeasibleMaxVF(unsigned ConstTripCount); /// The vectorization cost is a combination of the cost itself and a boolean /// indicating whether any of the contributing operations will actually @@ -1246,15 +1361,6 @@ private: /// should be used. bool useEmulatedMaskMemRefHack(Instruction *I); - /// Create an analysis remark that explains why vectorization failed - /// - /// \p RemarkName is the identifier for the remark. \return the remark object - /// that can be streamed to. - OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) { - return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(), - RemarkName, TheLoop); - } - /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. @@ -1270,13 +1376,13 @@ private: SmallPtrSet PredicatedBBsAfterVectorization; /// Records whether it is allowed to have the original scalar loop execute at - /// least once. This may be needed as a fallback loop in case runtime + /// least once. This may be needed as a fallback loop in case runtime /// aliasing/dependence checks fail, or to handle the tail/remainder /// iterations when the trip count is unknown or doesn't divide by the VF, /// or as a peel-loop to handle gaps in interleave-groups. /// Under optsize and when the trip count is very small we don't allow any /// iterations to execute in the scalar loop. - bool IsScalarEpilogueAllowed = true; + ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; /// All blocks of loop are to be masked to fold tail of scalar iterations. bool FoldTailByMasking = false; @@ -1496,7 +1602,7 @@ struct LoopVectorize : public FunctionPass { auto *DT = &getAnalysis().getDomTree(); auto *BFI = &getAnalysis().getBFI(); auto *TLIP = getAnalysisIfAvailable(); - auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; auto *AA = &getAnalysis().getAAResults(); auto *AC = &getAnalysis().getAssumptionCache(F); auto *LAA = &getAnalysis(); @@ -2253,12 +2359,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Type *ScalarDataTy = getMemInstValueType(Instr); Type *DataTy = VectorType::get(ScalarDataTy, VF); Value *Ptr = getLoadStorePointerOperand(Instr); - unsigned Alignment = getLoadStoreAlignment(Instr); // An alignment of 0 means target abi alignment. We need to use the scalar's // target abi alignment in such a case. const DataLayout &DL = Instr->getModule()->getDataLayout(); - if (!Alignment) - Alignment = DL.getABITypeAlignment(ScalarDataTy); + const Align Alignment = + DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy); unsigned AddressSpace = getLoadStoreAddressSpace(Instr); // Determine if the pointer operand of the access is either consecutive or @@ -2322,8 +2427,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; Value *VectorGep = getOrCreateVectorValue(Ptr, Part); - NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, - MaskPart); + NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, + Alignment.value(), MaskPart); } else { if (Reverse) { // If we store to reverse consecutive memory locations, then we need @@ -2334,10 +2439,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, } auto *VecPtr = CreateVecPtr(Part, Ptr); if (isMaskRequired) - NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, - Mask[Part]); + NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, + Alignment.value(), Mask[Part]); else - NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); + NewSI = + Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment.value()); } addMetadata(NewSI, SI); } @@ -2352,18 +2458,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; Value *VectorGep = getOrCreateVectorValue(Ptr, Part); - NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart, + NewLI = Builder.CreateMaskedGather(VectorGep, Alignment.value(), MaskPart, nullptr, "wide.masked.gather"); addMetadata(NewLI, LI); } else { auto *VecPtr = CreateVecPtr(Part, Ptr); if (isMaskRequired) - NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part], + NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment.value(), Mask[Part], UndefValue::get(DataTy), "wide.masked.load"); else - NewLI = - Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); + NewLI = Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment.value(), + "wide.load"); // Add metadata to the load, but setVectorValue to the reverse shuffle. addMetadata(NewLI, LI); @@ -2615,8 +2721,9 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { if (C->isZero()) return; - assert(!Cost->foldTailByMasking() && - "Cannot SCEV check stride or overflow when folding tail"); + assert(!BB->getParent()->hasOptSize() && + "Cannot SCEV check stride or overflow when optimizing for size"); + // Create a new block containing the stride check. BB->setName("vector.scevcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); @@ -2649,7 +2756,20 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { if (!MemRuntimeCheck) return; - assert(!Cost->foldTailByMasking() && "Cannot check memory when folding tail"); + if (BB->getParent()->hasOptSize()) { + assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled && + "Cannot emit memory checks when optimizing for size, unless forced " + "to vectorize."); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize", + L->getStartLoc(), L->getHeader()) + << "Code-size may be reduced by not forcing " + "vectorization, or by source-code modifications " + "eliminating the need for runtime checks " + "(e.g., adding 'restrict')."; + }); + } + // Create a new block containing the memory check. BB->setName("vector.memcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); @@ -2666,7 +2786,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { // We currently don't use LoopVersioning for the actual loop cloning but we // still use it to add the noalias metadata. - LVer = llvm::make_unique(*Legal->getLAI(), OrigLoop, LI, DT, + LVer = std::make_unique(*Legal->getLAI(), OrigLoop, LI, DT, PSE.getSE()); LVer->prepareNoAliasMetadata(); } @@ -3598,6 +3718,26 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { setDebugLocFromInst(Builder, LoopExitInst); + // If tail is folded by masking, the vector value to leave the loop should be + // a Select choosing between the vectorized LoopExitInst and vectorized Phi, + // instead of the former. + if (Cost->foldTailByMasking()) { + for (unsigned Part = 0; Part < UF; ++Part) { + Value *VecLoopExitInst = + VectorLoopValueMap.getVectorValue(LoopExitInst, Part); + Value *Sel = nullptr; + for (User *U : VecLoopExitInst->users()) { + if (isa(U)) { + assert(!Sel && "Reduction exit feeding two selects"); + Sel = U; + } else + assert(isa(U) && "Reduction exit must feed Phi's or select"); + } + assert(Sel && "Reduction exit feeds no select"); + VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, Sel); + } + } + // If the vector reduction can be performed in a smaller type, we truncate // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. @@ -4064,7 +4204,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { case Instruction::FCmp: { // Widen compares. Generate vector compares. bool FCmp = (I.getOpcode() == Instruction::FCmp); - auto *Cmp = dyn_cast(&I); + auto *Cmp = cast(&I); setDebugLocFromInst(Builder, Cmp); for (unsigned Part = 0; Part < UF; ++Part) { Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part); @@ -4097,7 +4237,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - auto *CI = dyn_cast(&I); + auto *CI = cast(&I); setDebugLocFromInst(Builder, CI); /// Vectorize casts. @@ -4421,9 +4561,10 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne "Widening decision should be ready at this moment"); return WideningDecision == CM_Scalarize; } + const MaybeAlign Alignment = getLoadStoreAlignment(I); return isa(I) ? - !(isLegalMaskedLoad(Ty, Ptr) || isLegalMaskedGather(Ty)) - : !(isLegalMaskedStore(Ty, Ptr) || isLegalMaskedScatter(Ty)); + !(isLegalMaskedLoad(Ty, Ptr, Alignment) || isLegalMaskedGather(Ty)) + : !(isLegalMaskedStore(Ty, Ptr, Alignment) || isLegalMaskedScatter(Ty)); } case Instruction::UDiv: case Instruction::SDiv: @@ -4452,10 +4593,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I, // Check if masking is required. // A Group may need masking for one of two reasons: it resides in a block that // needs predication, or it was decided to use masking to deal with gaps. - bool PredicatedAccessRequiresMasking = + bool PredicatedAccessRequiresMasking = Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I); - bool AccessWithGapsRequiresMasking = - Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed; + bool AccessWithGapsRequiresMasking = + Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking) return true; @@ -4466,8 +4607,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I, "Masked interleave-groups for predicated accesses are not enabled."); auto *Ty = getMemInstValueType(I); - return isa(I) ? TTI.isLegalMaskedLoad(Ty) - : TTI.isLegalMaskedStore(Ty); + const MaybeAlign Alignment = getLoadStoreAlignment(I); + return isa(I) ? TTI.isLegalMaskedLoad(Ty, Alignment) + : TTI.isLegalMaskedStore(Ty, Alignment); } bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I, @@ -4675,82 +4817,96 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { Uniforms[VF].insert(Worklist.begin(), Worklist.end()); } -Optional LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { - if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { - // TODO: It may by useful to do since it's still likely to be dynamically - // uniform if the target can skip. - LLVM_DEBUG( - dbgs() << "LV: Not inserting runtime ptr check for divergent target"); - - ORE->emit( - createMissedAnalysis("CantVersionLoopWithDivergentTarget") - << "runtime pointer checks needed. Not enabled for divergent target"); - - return None; - } - - unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); - if (!OptForSize) // Remaining checks deal with scalar loop when OptForSize. - return computeFeasibleMaxVF(OptForSize, TC); +bool LoopVectorizationCostModel::runtimeChecksRequired() { + LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n"); if (Legal->getRuntimePointerChecking()->Need) { - ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize") - << "runtime pointer checks needed. Enable vectorization of this " - "loop with '#pragma clang loop vectorize(enable)' when " - "compiling with -Os/-Oz"); - LLVM_DEBUG( - dbgs() - << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); - return None; + reportVectorizationFailure("Runtime ptr check is required with -Os/-Oz", + "runtime pointer checks needed. Enable vectorization of this " + "loop with '#pragma clang loop vectorize(enable)' when " + "compiling with -Os/-Oz", + "CantVersionLoopWithOptForSize", ORE, TheLoop); + return true; } if (!PSE.getUnionPredicate().getPredicates().empty()) { - ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize") - << "runtime SCEV checks needed. Enable vectorization of this " - "loop with '#pragma clang loop vectorize(enable)' when " - "compiling with -Os/-Oz"); - LLVM_DEBUG( - dbgs() - << "LV: Aborting. Runtime SCEV check is required with -Os/-Oz.\n"); - return None; + reportVectorizationFailure("Runtime SCEV check is required with -Os/-Oz", + "runtime SCEV checks needed. Enable vectorization of this " + "loop with '#pragma clang loop vectorize(enable)' when " + "compiling with -Os/-Oz", + "CantVersionLoopWithOptForSize", ORE, TheLoop); + return true; } // FIXME: Avoid specializing for stride==1 instead of bailing out. if (!Legal->getLAI()->getSymbolicStrides().empty()) { - ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize") - << "runtime stride == 1 checks needed. Enable vectorization of " - "this loop with '#pragma clang loop vectorize(enable)' when " - "compiling with -Os/-Oz"); - LLVM_DEBUG( - dbgs() - << "LV: Aborting. Runtime stride check is required with -Os/-Oz.\n"); + reportVectorizationFailure("Runtime stride check is required with -Os/-Oz", + "runtime stride == 1 checks needed. Enable vectorization of " + "this loop with '#pragma clang loop vectorize(enable)' when " + "compiling with -Os/-Oz", + "CantVersionLoopWithOptForSize", ORE, TheLoop); + return true; + } + + return false; +} + +Optional LoopVectorizationCostModel::computeMaxVF() { + if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { + // TODO: It may by useful to do since it's still likely to be dynamically + // uniform if the target can skip. + reportVectorizationFailure( + "Not inserting runtime ptr check for divergent target", + "runtime pointer checks needed. Not enabled for divergent target", + "CantVersionLoopWithDivergentTarget", ORE, TheLoop); return None; } - // If we optimize the program for size, avoid creating the tail loop. + unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); - if (TC == 1) { - ORE->emit(createMissedAnalysis("SingleIterationLoop") - << "loop trip count is one, irrelevant for vectorization"); - LLVM_DEBUG(dbgs() << "LV: Aborting, single iteration (non) loop.\n"); + reportVectorizationFailure("Single iteration (non) loop", + "loop trip count is one, irrelevant for vectorization", + "SingleIterationLoop", ORE, TheLoop); return None; } - // Record that scalar epilogue is not allowed. - LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n"); + switch (ScalarEpilogueStatus) { + case CM_ScalarEpilogueAllowed: + return computeFeasibleMaxVF(TC); + case CM_ScalarEpilogueNotNeededUsePredicate: + LLVM_DEBUG( + dbgs() << "LV: vector predicate hint/switch found.\n" + << "LV: Not allowing scalar epilogue, creating predicated " + << "vector loop.\n"); + break; + case CM_ScalarEpilogueNotAllowedLowTripLoop: + // fallthrough as a special case of OptForSize + case CM_ScalarEpilogueNotAllowedOptSize: + if (ScalarEpilogueStatus == CM_ScalarEpilogueNotAllowedOptSize) + LLVM_DEBUG( + dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n"); + else + LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to low trip " + << "count.\n"); + + // Bail if runtime checks are required, which are not good when optimising + // for size. + if (runtimeChecksRequired()) + return None; + break; + } - IsScalarEpilogueAllowed = !OptForSize; + // Now try the tail folding - // We don't create an epilogue when optimizing for size. // Invalidate interleave groups that require an epilogue if we can't mask // the interleave-group. - if (!useMaskedInterleavedAccesses(TTI)) + if (!useMaskedInterleavedAccesses(TTI)) InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); - unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC); - + unsigned MaxVF = computeFeasibleMaxVF(TC); if (TC > 0 && TC % MaxVF == 0) { + // Accept MaxVF if we do not have a tail. LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); return MaxVF; } @@ -4759,28 +4915,30 @@ Optional LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - if (Legal->canFoldTailByMasking()) { + if (Legal->prepareToFoldTailByMasking()) { FoldTailByMasking = true; return MaxVF; } if (TC == 0) { - ORE->emit( - createMissedAnalysis("UnknownLoopCountComplexCFG") - << "unable to calculate the loop count due to complex control flow"); + reportVectorizationFailure( + "Unable to calculate the loop count due to complex control flow", + "unable to calculate the loop count due to complex control flow", + "UnknownLoopCountComplexCFG", ORE, TheLoop); return None; } - ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize") - << "cannot optimize for size and vectorize at the same time. " - "Enable vectorization of this loop with '#pragma clang loop " - "vectorize(enable)' when compiling with -Os/-Oz"); + reportVectorizationFailure( + "Cannot optimize for size and vectorize at the same time.", + "cannot optimize for size and vectorize at the same time. " + "Enable vectorization of this loop with '#pragma clang loop " + "vectorize(enable)' when compiling with -Os/-Oz", + "NoTailLoopWithOptForSize", ORE, TheLoop); return None; } unsigned -LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, - unsigned ConstTripCount) { +LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) { MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); unsigned SmallestType, WidestType; std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); @@ -4818,8 +4976,8 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, } unsigned MaxVF = MaxVectorSize; - if (TTI.shouldMaximizeVectorBandwidth(OptForSize) || - (MaximizeBandwidth && !OptForSize)) { + if (TTI.shouldMaximizeVectorBandwidth(!isScalarEpilogueAllowed()) || + (MaximizeBandwidth && isScalarEpilogueAllowed())) { // Collect all viable vectorization factors larger than the default MaxVF // (i.e. MaxVectorSize). SmallVector VFs; @@ -4832,9 +4990,14 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, // Select the largest VF which doesn't require more registers than existing // ones. - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true); for (int i = RUs.size() - 1; i >= 0; --i) { - if (RUs[i].MaxLocalUsers <= TargetNumRegisters) { + bool Selected = true; + for (auto& pair : RUs[i].MaxLocalUsers) { + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); + if (pair.second > TargetNumRegisters) + Selected = false; + } + if (Selected) { MaxVF = VFs[i]; break; } @@ -4886,10 +5049,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { } if (!EnableCondStoresVectorization && NumPredStores) { - ORE->emit(createMissedAnalysis("ConditionalStore") - << "store that is conditionally executed prevents vectorization"); - LLVM_DEBUG( - dbgs() << "LV: No vectorization. There are conditional stores.\n"); + reportVectorizationFailure("There are conditional stores.", + "store that is conditionally executed prevents vectorization", + "ConditionalStore", ORE, TheLoop); Width = 1; Cost = ScalarCost; } @@ -4958,8 +5120,7 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() { return {MinWidth, MaxWidth}; } -unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, - unsigned VF, +unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF, unsigned LoopCost) { // -- The interleave heuristics -- // We interleave the loop in order to expose ILP and reduce the loop overhead. @@ -4975,8 +5136,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // 3. We don't interleave if we think that we will spill registers to memory // due to the increased register pressure. - // When we optimize for size, we don't interleave. - if (OptForSize) + if (!isScalarEpilogueAllowed()) return 1; // We used the distance for the interleave count. @@ -4988,22 +5148,12 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); - LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters - << " registers\n"); - - if (VF == 1) { - if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) - TargetNumRegisters = ForceTargetNumScalarRegs; - } else { - if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) - TargetNumRegisters = ForceTargetNumVectorRegs; - } - RegisterUsage R = calculateRegisterUsage({VF})[0]; // We divide by these constants so assume that we have at least one // instruction that uses at least one register. - R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U); + for (auto& pair : R.MaxLocalUsers) { + pair.second = std::max(pair.second, 1U); + } // We calculate the interleave count using the following formula. // Subtract the number of loop invariants from the number of available @@ -5016,13 +5166,35 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // We also want power of two interleave counts to ensure that the induction // variable of the vector loop wraps to zero, when tail is folded by masking; // this currently happens when OptForSize, in which case IC is set to 1 above. - unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) / - R.MaxLocalUsers); + unsigned IC = UINT_MAX; - // Don't count the induction variable as interleaved. - if (EnableIndVarRegisterHeur) - IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) / - std::max(1U, (R.MaxLocalUsers - 1))); + for (auto& pair : R.MaxLocalUsers) { + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); + LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters + << " registers of " + << TTI.getRegisterClassName(pair.first) << " register class\n"); + if (VF == 1) { + if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) + TargetNumRegisters = ForceTargetNumScalarRegs; + } else { + if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) + TargetNumRegisters = ForceTargetNumVectorRegs; + } + unsigned MaxLocalUsers = pair.second; + unsigned LoopInvariantRegs = 0; + if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end()) + LoopInvariantRegs = R.LoopInvariantRegs[pair.first]; + + unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers); + // Don't count the induction variable as interleaved. + if (EnableIndVarRegisterHeur) { + TmpIC = + PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) / + std::max(1U, (MaxLocalUsers - 1))); + } + + IC = std::min(IC, TmpIC); + } // Clamp the interleave ranges to reasonable counts. unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF); @@ -5036,6 +5208,14 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor; } + // If the trip count is constant, limit the interleave count to be less than + // the trip count divided by VF. + if (TC > 0) { + assert(TC >= VF && "VF exceeds trip count?"); + if ((TC / VF) < MaxInterleaveCount) + MaxInterleaveCount = (TC / VF); + } + // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. if (LoopCost == 0) @@ -5044,7 +5224,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, assert(LoopCost && "Non-zero loop cost expected"); // Clamp the calculated IC to be between the 1 and the max interleave count - // that the target allows. + // that the target and trip count allows. if (IC > MaxInterleaveCount) IC = MaxInterleaveCount; else if (IC < 1) @@ -5196,7 +5376,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { const DataLayout &DL = TheFunction->getParent()->getDataLayout(); SmallVector RUs(VFs.size()); - SmallVector MaxUsages(VFs.size(), 0); + SmallVector, 8> MaxUsages(VFs.size()); LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); @@ -5226,21 +5406,45 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { + // Count the number of live intervals. + SmallMapVector RegUsage; + if (VFs[j] == 1) { - MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size()); - continue; + for (auto Inst : OpenIntervals) { + unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = 1; + else + RegUsage[ClassID] += 1; + } + } else { + collectUniformsAndScalars(VFs[j]); + for (auto Inst : OpenIntervals) { + // Skip ignored values for VF > 1. + if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end()) + continue; + if (isScalarAfterVectorization(Inst, VFs[j])) { + unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = 1; + else + RegUsage[ClassID] += 1; + } else { + unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]); + else + RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]); + } + } } - collectUniformsAndScalars(VFs[j]); - // Count the number of live intervals. - unsigned RegUsage = 0; - for (auto Inst : OpenIntervals) { - // Skip ignored values for VF > 1. - if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() || - isScalarAfterVectorization(Inst, VFs[j])) - continue; - RegUsage += GetRegUsage(Inst->getType(), VFs[j]); + + for (auto& pair : RegUsage) { + if (MaxUsages[j].find(pair.first) != MaxUsages[j].end()) + MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second); + else + MaxUsages[j][pair.first] = pair.second; } - MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " @@ -5251,18 +5455,34 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { } for (unsigned i = 0, e = VFs.size(); i < e; ++i) { - unsigned Invariant = 0; - if (VFs[i] == 1) - Invariant = LoopInvariants.size(); - else { - for (auto Inst : LoopInvariants) - Invariant += GetRegUsage(Inst->getType(), VFs[i]); + SmallMapVector Invariant; + + for (auto Inst : LoopInvariants) { + unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]); + unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType()); + if (Invariant.find(ClassID) == Invariant.end()) + Invariant[ClassID] = Usage; + else + Invariant[ClassID] += Usage; } - LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); - LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); - LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant - << '\n'); + LLVM_DEBUG({ + dbgs() << "LV(REG): VF = " << VFs[i] << '\n'; + dbgs() << "LV(REG): Found max usage: " << MaxUsages[i].size() + << " item\n"; + for (const auto &pair : MaxUsages[i]) { + dbgs() << "LV(REG): RegisterClass: " + << TTI.getRegisterClassName(pair.first) << ", " << pair.second + << " registers\n"; + } + dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() + << " item\n"; + for (const auto &pair : Invariant) { + dbgs() << "LV(REG): RegisterClass: " + << TTI.getRegisterClassName(pair.first) << ", " << pair.second + << " registers\n"; + } + }); RU.LoopInvariantRegs = Invariant; RU.MaxLocalUsers = MaxUsages[i]; @@ -5511,7 +5731,6 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, Type *ValTy = getMemInstValueType(I); auto SE = PSE.getSE(); - unsigned Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); Value *Ptr = getLoadStorePointerOperand(I); Type *PtrTy = ToVectorTy(Ptr->getType(), VF); @@ -5525,9 +5744,9 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. - Cost += VF * - TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, - AS); + const MaybeAlign Alignment = getLoadStoreAlignment(I); + Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), + Alignment ? Alignment->value() : 0, AS); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. @@ -5552,18 +5771,20 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); Value *Ptr = getLoadStorePointerOperand(I); unsigned AS = getLoadStoreAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Stride should be 1 or -1 for consecutive memory access"); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned Cost = 0; if (Legal->isMaskRequired(I)) - Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, + Alignment ? Alignment->value() : 0, AS); else - Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, + Alignment ? Alignment->value() : 0, AS, I); bool Reverse = ConsecutiveStride < 0; if (Reverse) @@ -5575,33 +5796,37 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); if (isa(I)) { return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) + + TTI.getMemoryOpCost(Instruction::Load, ValTy, + Alignment ? Alignment->value() : 0, AS) + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); } StoreInst *SI = cast(I); bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand()); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) + - (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost( - Instruction::ExtractElement, - VectorTy, VF - 1)); + TTI.getMemoryOpCost(Instruction::Store, ValTy, + Alignment ? Alignment->value() : 0, AS) + + (isLoopInvariantStoreValue + ? 0 + : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, + VF - 1)); } unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); Value *Ptr = getLoadStorePointerOperand(I); return TTI.getAddressComputationCost(VectorTy) + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, - Legal->isMaskRequired(I), Alignment); + Legal->isMaskRequired(I), + Alignment ? Alignment->value() : 0); } unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, @@ -5626,8 +5851,8 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, } // Calculate the cost of the whole interleaved group. - bool UseMaskForGaps = - Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed; + bool UseMaskForGaps = + Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); unsigned Cost = TTI.getInterleavedMemoryOpCost( I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlignment(), AS, Legal->isMaskRequired(I), UseMaskForGaps); @@ -5648,11 +5873,12 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, // moment. if (VF == 1) { Type *ValTy = getMemInstValueType(I); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); + TTI.getMemoryOpCost(I->getOpcode(), ValTy, + Alignment ? Alignment->value() : 0, AS, I); } return getWideningCost(I, VF); } @@ -6167,8 +6393,7 @@ static unsigned determineVPlanVF(const unsigned WidestVectorRegBits, } VectorizationFactor -LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize, - unsigned UserVF) { +LoopVectorizationPlanner::planInVPlanNativePath(unsigned UserVF) { unsigned VF = UserVF; // Outer loop handling: They may require CFG and instruction level // transformations before even evaluating whether vectorization is profitable. @@ -6207,10 +6432,9 @@ LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize, return VectorizationFactor::Disabled(); } -Optional LoopVectorizationPlanner::plan(bool OptForSize, - unsigned UserVF) { +Optional LoopVectorizationPlanner::plan(unsigned UserVF) { assert(OrigLoop->empty() && "Inner loop expected."); - Optional MaybeMaxVF = CM.computeMaxVF(OptForSize); + Optional MaybeMaxVF = CM.computeMaxVF(); if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved. return None; @@ -6840,8 +7064,15 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, // If the tail is to be folded by masking, the primary induction variable // needs to be represented in VPlan for it to model early-exit masking. - if (CM.foldTailByMasking()) + // Also, both the Phi and the live-out instruction of each reduction are + // required in order to introduce a select between them in VPlan. + if (CM.foldTailByMasking()) { NeedDef.insert(Legal->getPrimaryInduction()); + for (auto &Reduction : *Legal->getReductionVars()) { + NeedDef.insert(Reduction.first); + NeedDef.insert(Reduction.second.getLoopExitInstr()); + } + } // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For @@ -6873,7 +7104,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // Create a dummy pre-entry VPBasicBlock to start building the VPlan. VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); - auto Plan = llvm::make_unique(VPBB); + auto Plan = std::make_unique(VPBB); VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // Represent values that will have defs inside VPlan. @@ -6968,6 +7199,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::disconnectBlocks(PreEntry, Entry); delete PreEntry; + // Finally, if tail is folded by masking, introduce selects between the phi + // and the live-out instruction of each reduction, at the end of the latch. + if (CM.foldTailByMasking()) { + Builder.setInsertPoint(VPBB); + auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan); + for (auto &Reduction : *Legal->getReductionVars()) { + VPValue *Phi = Plan->getVPValue(Reduction.first); + VPValue *Red = Plan->getVPValue(Reduction.second.getLoopExitInstr()); + Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi}); + } + } + std::string PlanName; raw_string_ostream RSO(PlanName); unsigned VF = Range.Start; @@ -6993,7 +7236,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); // Create new empty VPlan - auto Plan = llvm::make_unique(); + auto Plan = std::make_unique(); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); @@ -7199,6 +7442,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); } +static ScalarEpilogueLowering +getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed; + if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && + (F->hasOptSize() || + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI))) + SEL = CM_ScalarEpilogueNotAllowedOptSize; + else if (PreferPredicateOverEpilog || Hints.getPredicate()) + SEL = CM_ScalarEpilogueNotNeededUsePredicate; + + return SEL; +} + // Process the loop in the VPlan-native vectorization path. This path builds // VPlan upfront in the vectorization pipeline, which allows to apply // VPlan-to-VPlan transformations from the very beginning without modifying the @@ -7213,7 +7470,9 @@ static bool processLoopInVPlanNativePath( assert(EnableVPlanNativePath && "VPlan-native path is disabled."); Function *F = L->getHeader()->getParent(); InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI()); - LoopVectorizationCostModel CM(L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, + ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI); + + LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an @@ -7223,15 +7482,8 @@ static bool processLoopInVPlanNativePath( // Get user vectorization factor. const unsigned UserVF = Hints.getWidth(); - // Check the function attributes and profiles to find out if this function - // should be optimized for size. - bool OptForSize = - Hints.getForce() != LoopVectorizeHints::FK_Enabled && - (F->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)); - // Plan how to best vectorize, return the best VF and its cost. - const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF); + const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF); // If we are stress testing VPlan builds, do not attempt to generate vector // code. Masked vector code generation support will follow soon. @@ -7310,10 +7562,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check the function attributes and profiles to find out if this function // should be optimized for size. - bool OptForSize = - Hints.getForce() != LoopVectorizeHints::FK_Enabled && - (F->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)); + ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI); // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before @@ -7325,36 +7574,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { ORE, BFI, PSI, Hints); assert(L->empty() && "Inner loop expected."); + // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads. - // Prefer constant trip counts over profile data, over upper bound estimate. - unsigned ExpectedTC = 0; - bool HasExpectedTC = false; - if (const SCEVConstant *ConstExits = - dyn_cast(SE->getBackedgeTakenCount(L))) { - const APInt &ExitsCount = ConstExits->getAPInt(); - // We are interested in small values for ExpectedTC. Skip over those that - // can't fit an unsigned. - if (ExitsCount.ult(std::numeric_limits::max())) { - ExpectedTC = static_cast(ExitsCount.getZExtValue()) + 1; - HasExpectedTC = true; - } - } - // ExpectedTC may be large because it's bound by a variable. Check - // profiling information to validate we should vectorize. - if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) { - auto EstimatedTC = getLoopEstimatedTripCount(L); - if (EstimatedTC) { - ExpectedTC = *EstimatedTC; - HasExpectedTC = true; - } - } - if (!HasExpectedTC) { - ExpectedTC = SE->getSmallConstantMaxTripCount(L); - HasExpectedTC = (ExpectedTC > 0); - } - - if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) { + auto ExpectedTC = getSmallBestKnownTC(*SE, L); + if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is worth vectorizing only if no scalar " << "iteration overheads are incurred."); @@ -7362,10 +7586,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); else { LLVM_DEBUG(dbgs() << "\n"); - // Loops with a very small trip count are considered for vectorization - // under OptForSize, thereby making sure the cost of their loop body is - // dominant, free of runtime guards and scalar iteration overheads. - OptForSize = true; + SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; } } @@ -7374,11 +7595,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { // an integer loop and the vector instructions selected are purely integer // vector instructions? if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { - LLVM_DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" - "attribute is used.\n"); - ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), - "NoImplicitFloat", L) - << "loop not vectorized due to NoImplicitFloat attribute"); + reportVectorizationFailure( + "Can't vectorize when the NoImplicitFloat attribute is used", + "loop not vectorized due to NoImplicitFloat attribute", + "NoImplicitFloat", ORE, L); Hints.emitRemarkWithHints(); return false; } @@ -7389,11 +7609,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { // additional fp-math flags can help. if (Hints.isPotentiallyUnsafe() && TTI->isFPVectorizationPotentiallyUnsafe()) { - LLVM_DEBUG( - dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); - ORE->emit( - createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L) - << "loop not vectorized due to unsafe FP support."); + reportVectorizationFailure( + "Potentially unsafe FP op prevents vectorization", + "loop not vectorized due to unsafe FP support.", + "UnsafeFP", ORE, L); Hints.emitRemarkWithHints(); return false; } @@ -7411,8 +7630,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { } // Use the cost model. - LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, - &Hints, IAI); + LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, + F, &Hints, IAI); CM.collectValuesToIgnore(); // Use the planner for vectorization. @@ -7422,7 +7641,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { unsigned UserVF = Hints.getWidth(); // Plan how to best vectorize, return the best VF and its cost. - Optional MaybeVF = LVP.plan(OptForSize, UserVF); + Optional MaybeVF = LVP.plan(UserVF); VectorizationFactor VF = VectorizationFactor::Disabled(); unsigned IC = 1; @@ -7431,7 +7650,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (MaybeVF) { VF = *MaybeVF; // Select the interleave count. - IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); + IC = CM.selectInterleaveCount(VF.Width, VF.Cost); } // Identify the diagnostic messages that should be produced. @@ -7609,7 +7828,8 @@ bool LoopVectorizePass::runImpl( // The second condition is necessary because, even if the target has no // vector registers, loop vectorization may still enable scalar // interleaving. - if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2) + if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) && + TTI->getMaxInterleaveFactor(1) < 2) return false; bool Changed = false; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 27a86c0bca91..974eff9974d9 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -194,10 +194,13 @@ static bool allSameBlock(ArrayRef VL) { return true; } -/// \returns True if all of the values in \p VL are constants. +/// \returns True if all of the values in \p VL are constants (but not +/// globals/constant expressions). static bool allConstant(ArrayRef VL) { + // Constant expressions and globals can't be vectorized like normal integer/FP + // constants. for (Value *i : VL) - if (!isa(i)) + if (!isa(i) || isa(i) || isa(i)) return false; return true; } @@ -486,6 +489,7 @@ namespace slpvectorizer { /// Bottom Up SLP Vectorizer. class BoUpSLP { struct TreeEntry; + struct ScheduleData; public: using ValueList = SmallVector; @@ -614,6 +618,15 @@ public: /// vectorizable. We do not vectorize such trees. bool isTreeTinyAndNotFullyVectorizable() const; + /// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values + /// can be load combined in the backend. Load combining may not be allowed in + /// the IR optimizer, so we do not want to alter the pattern. For example, + /// partially transforming a scalar bswap() pattern into vector code is + /// effectively impossible for the backend to undo. + /// TODO: If load combining is allowed in the IR optimizer, this analysis + /// may not be necessary. + bool isLoadCombineReductionCandidate(unsigned ReductionOpcode) const; + OptimizationRemarkEmitter *getORE() { return ORE; } /// This structure holds any data we need about the edges being traversed @@ -1117,6 +1130,14 @@ public: #endif }; + /// Checks if the instruction is marked for deletion. + bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); } + + /// Marks values operands for later deletion by replacing them with Undefs. + void eraseInstructions(ArrayRef AV); + + ~BoUpSLP(); + private: /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized(Instruction *I) const; @@ -1153,8 +1174,7 @@ private: /// Set the Builder insert point to one after the last instruction in /// the bundle - void setInsertPointAfterBundle(ArrayRef VL, - const InstructionsState &S); + void setInsertPointAfterBundle(TreeEntry *E); /// \returns a vector from a collection of scalars in \p VL. Value *Gather(ArrayRef VL, VectorType *Ty); @@ -1220,27 +1240,37 @@ private: /// reordering of operands during buildTree_rec() and vectorizeTree(). SmallVector Operands; + /// The main/alternate instruction. + Instruction *MainOp = nullptr; + Instruction *AltOp = nullptr; + public: /// Set this bundle's \p OpIdx'th operand to \p OpVL. - void setOperand(unsigned OpIdx, ArrayRef OpVL, - ArrayRef ReuseShuffleIndices) { + void setOperand(unsigned OpIdx, ArrayRef OpVL) { if (Operands.size() < OpIdx + 1) Operands.resize(OpIdx + 1); assert(Operands[OpIdx].size() == 0 && "Already resized?"); Operands[OpIdx].resize(Scalars.size()); for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane) - Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty()) - ? OpVL[ReuseShuffleIndices[Lane]] - : OpVL[Lane]; - } - - /// If there is a user TreeEntry, then set its operand. - void trySetUserTEOperand(const EdgeInfo &UserTreeIdx, - ArrayRef OpVL, - ArrayRef ReuseShuffleIndices) { - if (UserTreeIdx.UserTE) - UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL, - ReuseShuffleIndices); + Operands[OpIdx][Lane] = OpVL[Lane]; + } + + /// Set the operands of this bundle in their original order. + void setOperandsInOrder() { + assert(Operands.empty() && "Already initialized?"); + auto *I0 = cast(Scalars[0]); + Operands.resize(I0->getNumOperands()); + unsigned NumLanes = Scalars.size(); + for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands(); + OpIdx != NumOperands; ++OpIdx) { + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + assert(I->getNumOperands() == NumOperands && + "Expected same number of operands"); + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } + } } /// \returns the \p OpIdx operand of this TreeEntry. @@ -1249,6 +1279,9 @@ private: return Operands[OpIdx]; } + /// \returns the number of operands. + unsigned getNumOperands() const { return Operands.size(); } + /// \return the single \p OpIdx operand. Value *getSingleOperand(unsigned OpIdx) const { assert(OpIdx < Operands.size() && "Off bounds"); @@ -1256,6 +1289,58 @@ private: return Operands[OpIdx][0]; } + /// Some of the instructions in the list have alternate opcodes. + bool isAltShuffle() const { + return getOpcode() != getAltOpcode(); + } + + bool isOpcodeOrAlt(Instruction *I) const { + unsigned CheckedOpcode = I->getOpcode(); + return (getOpcode() == CheckedOpcode || + getAltOpcode() == CheckedOpcode); + } + + /// Chooses the correct key for scheduling data. If \p Op has the same (or + /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is + /// \p OpValue. + Value *isOneOf(Value *Op) const { + auto *I = dyn_cast(Op); + if (I && isOpcodeOrAlt(I)) + return Op; + return MainOp; + } + + void setOperations(const InstructionsState &S) { + MainOp = S.MainOp; + AltOp = S.AltOp; + } + + Instruction *getMainOp() const { + return MainOp; + } + + Instruction *getAltOp() const { + return AltOp; + } + + /// The main/alternate opcodes for the list of instructions. + unsigned getOpcode() const { + return MainOp ? MainOp->getOpcode() : 0; + } + + unsigned getAltOpcode() const { + return AltOp ? AltOp->getOpcode() : 0; + } + + /// Update operations state of this entry if reorder occurred. + bool updateStateIfReorder() { + if (ReorderIndices.empty()) + return false; + InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front()); + setOperations(S); + return true; + } + #ifndef NDEBUG /// Debug printer. LLVM_DUMP_METHOD void dump() const { @@ -1269,6 +1354,8 @@ private: for (Value *V : Scalars) dbgs().indent(2) << *V << "\n"; dbgs() << "NeedToGather: " << NeedToGather << "\n"; + dbgs() << "MainOp: " << *MainOp << "\n"; + dbgs() << "AltOp: " << *AltOp << "\n"; dbgs() << "VectorizedValue: "; if (VectorizedValue) dbgs() << *VectorizedValue; @@ -1279,12 +1366,12 @@ private: if (ReuseShuffleIndices.empty()) dbgs() << "Emtpy"; else - for (unsigned Idx : ReuseShuffleIndices) - dbgs() << Idx << ", "; + for (unsigned ReuseIdx : ReuseShuffleIndices) + dbgs() << ReuseIdx << ", "; dbgs() << "\n"; dbgs() << "ReorderIndices: "; - for (unsigned Idx : ReorderIndices) - dbgs() << Idx << ", "; + for (unsigned ReorderIdx : ReorderIndices) + dbgs() << ReorderIdx << ", "; dbgs() << "\n"; dbgs() << "UserTreeIndices: "; for (const auto &EInfo : UserTreeIndices) @@ -1295,11 +1382,13 @@ private: }; /// Create a new VectorizableTree entry. - TreeEntry *newTreeEntry(ArrayRef VL, bool Vectorized, + TreeEntry *newTreeEntry(ArrayRef VL, Optional Bundle, + const InstructionsState &S, const EdgeInfo &UserTreeIdx, ArrayRef ReuseShuffleIndices = None, ArrayRef ReorderIndices = None) { - VectorizableTree.push_back(llvm::make_unique(VectorizableTree)); + bool Vectorized = (bool)Bundle; + VectorizableTree.push_back(std::make_unique(VectorizableTree)); TreeEntry *Last = VectorizableTree.back().get(); Last->Idx = VectorizableTree.size() - 1; Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); @@ -1307,11 +1396,22 @@ private: Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); Last->ReorderIndices = ReorderIndices; + Last->setOperations(S); if (Vectorized) { for (int i = 0, e = VL.size(); i != e; ++i) { assert(!getTreeEntry(VL[i]) && "Scalar already in tree!"); - ScalarToTreeEntry[VL[i]] = Last->Idx; - } + ScalarToTreeEntry[VL[i]] = Last; + } + // Update the scheduler bundle to point to this TreeEntry. + unsigned Lane = 0; + for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember; + BundleMember = BundleMember->NextInBundle) { + BundleMember->TE = Last; + BundleMember->Lane = Lane; + ++Lane; + } + assert((!Bundle.getValue() || Lane == VL.size()) && + "Bundle and VL out of sync"); } else { MustGather.insert(VL.begin(), VL.end()); } @@ -1319,7 +1419,6 @@ private: if (UserTreeIdx.UserTE) Last->UserTreeIndices.push_back(UserTreeIdx); - Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices); return Last; } @@ -1340,19 +1439,19 @@ private: TreeEntry *getTreeEntry(Value *V) { auto I = ScalarToTreeEntry.find(V); if (I != ScalarToTreeEntry.end()) - return VectorizableTree[I->second].get(); + return I->second; return nullptr; } const TreeEntry *getTreeEntry(Value *V) const { auto I = ScalarToTreeEntry.find(V); if (I != ScalarToTreeEntry.end()) - return VectorizableTree[I->second].get(); + return I->second; return nullptr; } /// Maps a specific scalar to its tree entry. - SmallDenseMap ScalarToTreeEntry; + SmallDenseMap ScalarToTreeEntry; /// A list of scalars that we found that we need to keep as scalars. ValueSet MustGather; @@ -1408,15 +1507,14 @@ private: /// This is required to ensure that there are no incorrect collisions in the /// AliasCache, which can happen if a new instruction is allocated at the /// same address as a previously deleted instruction. - void eraseInstruction(Instruction *I) { - I->removeFromParent(); - I->dropAllReferences(); - DeletedInstructions.emplace_back(I); + void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) { + auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first; + It->getSecond() = It->getSecond() && ReplaceOpsWithUndef; } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector DeletedInstructions; + DenseMap DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -1453,6 +1551,8 @@ private: UnscheduledDepsInBundle = UnscheduledDeps; clearDependencies(); OpValue = OpVal; + TE = nullptr; + Lane = -1; } /// Returns true if the dependency information has been calculated. @@ -1559,6 +1659,12 @@ private: /// Opcode of the current instruction in the schedule data. Value *OpValue = nullptr; + + /// The TreeEntry that this instruction corresponds to. + TreeEntry *TE = nullptr; + + /// The lane of this node in the TreeEntry. + int Lane = -1; }; #ifndef NDEBUG @@ -1633,10 +1739,9 @@ private: continue; } // Handle the def-use chain dependencies. - for (Use &U : BundleMember->Inst->operands()) { - auto *I = dyn_cast(U.get()); - if (!I) - continue; + + // Decrement the unscheduled counter and insert to ready list if ready. + auto &&DecrUnsched = [this, &ReadyList](Instruction *I) { doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) { if (OpDef && OpDef->hasValidDependencies() && OpDef->incrementUnscheduledDeps(-1) == 0) { @@ -1651,6 +1756,24 @@ private: << "SLP: gets ready (def): " << *DepBundle << "\n"); } }); + }; + + // If BundleMember is a vector bundle, its operands may have been + // reordered duiring buildTree(). We therefore need to get its operands + // through the TreeEntry. + if (TreeEntry *TE = BundleMember->TE) { + int Lane = BundleMember->Lane; + assert(Lane >= 0 && "Lane not set"); + for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands(); + OpIdx != NumOperands; ++OpIdx) + if (auto *I = dyn_cast(TE->getOperand(OpIdx)[Lane])) + DecrUnsched(I); + } else { + // If BundleMember is a stand-alone instruction, no operand reordering + // has taken place, so we directly access its operands. + for (Use &U : BundleMember->Inst->operands()) + if (auto *I = dyn_cast(U.get())) + DecrUnsched(I); } // Handle the memory dependencies. for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) { @@ -1697,8 +1820,11 @@ private: /// Checks if a bundle of instructions can be scheduled, i.e. has no /// cyclic dependencies. This is only a dry-run, no instructions are /// actually moved at this stage. - bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, - const InstructionsState &S); + /// \returns the scheduling bundle. The returned Optional value is non-None + /// if \p VL is allowed to be scheduled. + Optional + tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, + const InstructionsState &S); /// Un-bundles a group of instructions. void cancelScheduling(ArrayRef VL, Value *OpValue); @@ -1945,6 +2071,30 @@ template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { } // end namespace llvm +BoUpSLP::~BoUpSLP() { + for (const auto &Pair : DeletedInstructions) { + // Replace operands of ignored instructions with Undefs in case if they were + // marked for deletion. + if (Pair.getSecond()) { + Value *Undef = UndefValue::get(Pair.getFirst()->getType()); + Pair.getFirst()->replaceAllUsesWith(Undef); + } + Pair.getFirst()->dropAllReferences(); + } + for (const auto &Pair : DeletedInstructions) { + assert(Pair.getFirst()->use_empty() && + "trying to erase instruction with users."); + Pair.getFirst()->eraseFromParent(); + } +} + +void BoUpSLP::eraseInstructions(ArrayRef AV) { + for (auto *V : AV) { + if (auto *I = dyn_cast(V)) + eraseInstruction(I, /*ReplaceWithUndef=*/true); + }; +} + void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { ExtraValueToDebugLocsMap ExternallyUsedValues; @@ -2026,28 +2176,28 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, InstructionsState S = getSameOpcode(VL); if (Depth == RecursionMaxDepth) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } // Don't handle vectors. if (S.OpValue->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } if (StoreInst *SI = dyn_cast(S.OpValue)) if (SI->getValueOperand()->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } // If all of the operands are identical or constant we have a simple solution. if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } @@ -2055,11 +2205,11 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // the same block. // Don't vectorize ephemeral values. - for (unsigned i = 0, e = VL.size(); i != e; ++i) { - if (EphValues.count(VL[i])) { - LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] + for (Value *V : VL) { + if (EphValues.count(V)) { + LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is ephemeral.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } } @@ -2069,7 +2219,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); if (!E->isSame(VL)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } // Record the reuse of the tree node. FIXME, currently this is only used to @@ -2077,19 +2227,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, E->UserTreeIndices.push_back(UserTreeIdx); LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue << ".\n"); - E->trySetUserTEOperand(UserTreeIdx, VL, None); return; } // Check that none of the instructions in the bundle are already in the tree. - for (unsigned i = 0, e = VL.size(); i != e; ++i) { - auto *I = dyn_cast(VL[i]); + for (Value *V : VL) { + auto *I = dyn_cast(V); if (!I) continue; if (getTreeEntry(I)) { - LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] + LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is already in tree.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } } @@ -2097,10 +2246,10 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // If any of the scalars is marked as a value that needs to stay scalar, then // we need to gather the scalars. // The reduction nodes (stored in UserIgnoreList) also should stay scalar. - for (unsigned i = 0, e = VL.size(); i != e; ++i) { - if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) { + for (Value *V : VL) { + if (MustGather.count(V) || is_contained(UserIgnoreList, V)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } } @@ -2114,7 +2263,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } @@ -2128,13 +2277,15 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (Res.second) UniqueValues.emplace_back(V); } - if (UniqueValues.size() == VL.size()) { + size_t NumUniqueScalarValues = UniqueValues.size(); + if (NumUniqueScalarValues == VL.size()) { ReuseShuffleIndicies.clear(); } else { LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); - if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) { + if (NumUniqueScalarValues <= 1 || + !llvm::isPowerOf2_32(NumUniqueScalarValues)) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } VL = UniqueValues; @@ -2142,16 +2293,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto &BSRef = BlocksSchedules[BB]; if (!BSRef) - BSRef = llvm::make_unique(BB); + BSRef = std::make_unique(BB); BlockScheduling &BS = *BSRef.get(); - if (!BS.tryScheduleBundle(VL, this, S)) { + Optional Bundle = BS.tryScheduleBundle(VL, this, S); + if (!Bundle) { LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); assert((!BS.getScheduleData(VL0) || !BS.getScheduleData(VL0)->isPartOfBundle()) && "tryScheduleBundle should cancelScheduling on failure"); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); @@ -2160,7 +2313,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, (unsigned) Instruction::ShuffleVector : S.getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: { - PHINode *PH = dyn_cast(VL0); + auto *PH = cast(VL0); // Check for terminator values (e.g. invoke). for (unsigned j = 0; j < VL.size(); ++j) @@ -2172,23 +2325,29 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (terminator use).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = + newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); + // Keeps the reordered operands to avoid code duplication. + SmallVector OperandsVec; for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { ValueList Operands; // Prepare the operand vector. for (Value *j : VL) Operands.push_back(cast(j)->getIncomingValueForBlock( PH->getIncomingBlock(i))); - - buildTree_rec(Operands, Depth + 1, {TE, i}); + TE->setOperand(i, Operands); + OperandsVec.push_back(Operands); } + for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx) + buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx}); return; } case Instruction::ExtractValue: @@ -2198,13 +2357,13 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (Reuse) { LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n"); ++NumOpsWantToKeepOriginalOrder; - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, + newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); // This is a special case, as it does not gather, but at the same time // we are not extending buildTree_rec() towards the operands. ValueList Op0; Op0.assign(VL.size(), VL0->getOperand(0)); - VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies); + VectorizableTree.back()->setOperand(0, Op0); return; } if (!CurrentOrder.empty()) { @@ -2220,17 +2379,19 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto StoredCurrentOrderAndNum = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++StoredCurrentOrderAndNum->getSecond(); - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies, + newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies, StoredCurrentOrderAndNum->getFirst()); // This is a special case, as it does not gather, but at the same time // we are not extending buildTree_rec() towards the operands. ValueList Op0; Op0.assign(VL.size(), VL0->getOperand(0)); - VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies); + VectorizableTree.back()->setOperand(0, Op0); return; } LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); - newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); BS.cancelScheduling(VL, VL0); return; } @@ -2246,7 +2407,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; } @@ -2259,7 +2421,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto *L = cast(V); if (!L->isSimple()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } @@ -2289,15 +2452,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (CurrentOrder.empty()) { // Original loads are consecutive and does not require reordering. ++NumOpsWantToKeepOriginalOrder; - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, - ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, + UserTreeIdx, ReuseShuffleIndicies); + TE->setOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++I->getSecond(); - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, - ReuseShuffleIndicies, I->getFirst()); + TreeEntry *TE = + newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies, I->getFirst()); + TE->setOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } return; @@ -2306,7 +2472,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } case Instruction::ZExt: @@ -2322,24 +2489,27 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); - for (unsigned i = 0; i < VL.size(); ++i) { - Type *Ty = cast(VL[i])->getOperand(0)->getType(); + for (Value *V : VL) { + Type *Ty = cast(V)->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); + TE->setOperandsInOrder(); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; // Prepare the operand vector. - for (Value *j : VL) - Operands.push_back(cast(j)->getOperand(i)); + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(i)); buildTree_rec(Operands, Depth + 1, {TE, i}); } @@ -2351,19 +2521,21 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, CmpInst::Predicate P0 = cast(VL0)->getPredicate(); CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0); Type *ComparedTy = VL0->getOperand(0)->getType(); - for (unsigned i = 1, e = VL.size(); i < e; ++i) { - CmpInst *Cmp = cast(VL[i]); + for (Value *V : VL) { + CmpInst *Cmp = cast(V); if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) || Cmp->getOperand(0)->getType() != ComparedTy) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; @@ -2384,7 +2556,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Right.push_back(RHS); } } - + TE->setOperand(0, Left); + TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; @@ -2409,7 +2582,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, case Instruction::And: case Instruction::Or: case Instruction::Xor: { - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n"); // Sort operands of the instructions so that each side is more likely to @@ -2417,11 +2591,14 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (isa(VL0) && VL0->isCommutative()) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); + TE->setOperand(0, Left); + TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; } + TE->setOperandsInOrder(); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; // Prepare the operand vector. @@ -2434,11 +2611,12 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } case Instruction::GetElementPtr: { // We don't combine GEPs with complicated (nested) indexing. - for (unsigned j = 0; j < VL.size(); ++j) { - if (cast(VL[j])->getNumOperands() != 2) { + for (Value *V : VL) { + if (cast(V)->getNumOperands() != 2) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } } @@ -2446,58 +2624,64 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // We can't combine several GEPs into one vector if they operate on // different types. Type *Ty0 = VL0->getOperand(0)->getType(); - for (unsigned j = 0; j < VL.size(); ++j) { - Type *CurTy = cast(VL[j])->getOperand(0)->getType(); + for (Value *V : VL) { + Type *CurTy = cast(V)->getOperand(0)->getType(); if (Ty0 != CurTy) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } } // We don't combine GEPs with non-constant indexes. - for (unsigned j = 0; j < VL.size(); ++j) { - auto Op = cast(VL[j])->getOperand(1); + for (Value *V : VL) { + auto Op = cast(V)->getOperand(1); if (!isa(Op)) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); + TE->setOperandsInOrder(); for (unsigned i = 0, e = 2; i < e; ++i) { ValueList Operands; // Prepare the operand vector. - for (Value *j : VL) - Operands.push_back(cast(j)->getOperand(i)); + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(i)); buildTree_rec(Operands, Depth + 1, {TE, i}); } return; } case Instruction::Store: { - // Check if the stores are consecutive or of we need to swizzle them. + // Check if the stores are consecutive or if we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); ValueList Operands; - for (Value *j : VL) - Operands.push_back(cast(j)->getOperand(0)); - + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(0)); + TE->setOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); return; } @@ -2509,7 +2693,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } @@ -2519,14 +2704,15 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned j = 0; j != NumArgs; ++j) if (hasVectorInstrinsicScalarOpd(ID, j)) ScalarArgs[j] = CI->getArgOperand(j); - for (unsigned i = 1, e = VL.size(); i != e; ++i) { - CallInst *CI2 = dyn_cast(VL[i]); + for (Value *V : VL) { + CallInst *CI2 = dyn_cast(V); if (!CI2 || CI2->getCalledFunction() != Int || getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V << "\n"); return; } @@ -2537,7 +2723,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Value *A1J = CI2->getArgOperand(j); if (ScalarArgs[j] != A1J) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument " << ScalarArgs[j] << "!=" << A1J << "\n"); @@ -2551,19 +2738,22 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" - << *CI << "!=" << *VL[i] << '\n'); + << *CI << "!=" << *V << '\n'); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + TE->setOperandsInOrder(); for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. - for (Value *j : VL) { - CallInst *CI2 = dyn_cast(j); + for (Value *V : VL) { + auto *CI2 = cast(V); Operands.push_back(CI2->getArgOperand(i)); } buildTree_rec(Operands, Depth + 1, {TE, i}); @@ -2575,27 +2765,32 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // then do not vectorize this instruction. if (!S.isAltShuffle()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. if (isa(VL0)) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); + TE->setOperand(0, Left); + TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; } + TE->setOperandsInOrder(); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; // Prepare the operand vector. - for (Value *j : VL) - Operands.push_back(cast(j)->getOperand(i)); + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(i)); buildTree_rec(Operands, Depth + 1, {TE, i}); } @@ -2603,7 +2798,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } default: BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; } @@ -2738,7 +2934,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return ReuseShuffleCost + TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); } - if (getSameOpcode(VL).getOpcode() == Instruction::ExtractElement && + if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) && allSameBlock(VL)) { Optional ShuffleKind = isShuffle(VL); if (ShuffleKind.hasValue()) { @@ -2761,11 +2957,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } return ReuseShuffleCost + getGatherCost(VL); } - InstructionsState S = getSameOpcode(VL); - assert(S.getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); - Instruction *VL0 = cast(S.OpValue); - unsigned ShuffleOrOp = S.isAltShuffle() ? - (unsigned) Instruction::ShuffleVector : S.getOpcode(); + assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); + Instruction *VL0 = E->getMainOp(); + unsigned ShuffleOrOp = + E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: return 0; @@ -2851,7 +3046,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); int ScalarEltCost = - TTI->getCastInstrCost(S.getOpcode(), ScalarTy, SrcTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } @@ -2864,7 +3059,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // Check if the values are candidates to demote. if (!MinBWs.count(VL0) || VecTy != SrcVecTy) { VecCost = ReuseShuffleCost + - TTI->getCastInstrCost(S.getOpcode(), VecTy, SrcVecTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0); } return VecCost - ScalarCost; } @@ -2872,14 +3067,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::ICmp: case Instruction::Select: { // Calculate the cost of this instruction. - int ScalarEltCost = TTI->getCmpSelInstrCost(S.getOpcode(), ScalarTy, + int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0); + int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::FNeg: @@ -2940,12 +3135,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { SmallVector Operands(VL0->operand_values()); int ScalarEltCost = TTI->getArithmeticInstrCost( - S.getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); + E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy, Op1VK, + int VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); return ReuseShuffleCost + VecCost - ScalarCost; } @@ -3027,11 +3222,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return ReuseShuffleCost + VecCallCost - ScalarCallCost; } case Instruction::ShuffleVector: { - assert(S.isAltShuffle() && - ((Instruction::isBinaryOp(S.getOpcode()) && - Instruction::isBinaryOp(S.getAltOpcode())) || - (Instruction::isCast(S.getOpcode()) && - Instruction::isCast(S.getAltOpcode()))) && + assert(E->isAltShuffle() && + ((Instruction::isBinaryOp(E->getOpcode()) && + Instruction::isBinaryOp(E->getAltOpcode())) || + (Instruction::isCast(E->getOpcode()) && + Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); int ScalarCost = 0; if (NeedToShuffleReuses) { @@ -3046,25 +3241,25 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { I, TargetTransformInfo::TCK_RecipThroughput); } } - for (Value *i : VL) { - Instruction *I = cast(i); - assert(S.isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + for (Value *V : VL) { + Instruction *I = cast(V); + assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); ScalarCost += TTI->getInstructionCost( I, TargetTransformInfo::TCK_RecipThroughput); } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. int VecCost = 0; - if (Instruction::isBinaryOp(S.getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy); - VecCost += TTI->getArithmeticInstrCost(S.getAltOpcode(), VecTy); + if (Instruction::isBinaryOp(E->getOpcode())) { + VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy); + VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy); } else { - Type *Src0SclTy = S.MainOp->getOperand(0)->getType(); - Type *Src1SclTy = S.AltOp->getOperand(0)->getType(); + Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); + Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size()); VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(S.getOpcode(), VecTy, Src0Ty); - VecCost += TTI->getCastInstrCost(S.getAltOpcode(), VecTy, Src1Ty); + VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty); + VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty); } VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0); return ReuseShuffleCost + VecCost - ScalarCost; @@ -3098,6 +3293,43 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const { return true; } +bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const { + if (RdxOpcode != Instruction::Or) + return false; + + unsigned NumElts = VectorizableTree[0]->Scalars.size(); + Value *FirstReduced = VectorizableTree[0]->Scalars[0]; + + // Look past the reduction to find a source value. Arbitrarily follow the + // path through operand 0 of any 'or'. Also, peek through optional + // shift-left-by-constant. + Value *ZextLoad = FirstReduced; + while (match(ZextLoad, m_Or(m_Value(), m_Value())) || + match(ZextLoad, m_Shl(m_Value(), m_Constant()))) + ZextLoad = cast(ZextLoad)->getOperand(0); + + // Check if the input to the reduction is an extended load. + Value *LoadPtr; + if (!match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr))))) + return false; + + // Require that the total load bit width is a legal integer type. + // For example, <8 x i8> --> i64 is a legal integer on a 64-bit target. + // But <16 x i8> --> i128 is not, so the backend probably can't reduce it. + Type *SrcTy = LoadPtr->getType()->getPointerElementType(); + unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts; + LLVMContext &Context = FirstReduced->getContext(); + if (!TTI->isTypeLegal(IntegerType::get(Context, LoadBitWidth))) + return false; + + // Everything matched - assume that we can fold the whole sequence using + // load combining. + LLVM_DEBUG(dbgs() << "SLP: Assume load combining for scalar reduction of " + << *(cast(FirstReduced)) << "\n"); + + return true; +} + bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const { // We can vectorize the tree if its size is greater than or equal to the // minimum size specified by the MinTreeSize command line option. @@ -3319,16 +3551,16 @@ void BoUpSLP::reorderInputsAccordingToOpcode( Right = Ops.getVL(1); } -void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, - const InstructionsState &S) { +void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) { // Get the basic block this bundle is in. All instructions in the bundle // should be in this block. - auto *Front = cast(S.OpValue); + auto *Front = E->getMainOp(); auto *BB = Front->getParent(); - assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { - auto *I = cast(V); - return !S.isOpcodeOrAlt(I) || I->getParent() == BB; - })); + assert(llvm::all_of(make_range(E->Scalars.begin(), E->Scalars.end()), + [=](Value *V) -> bool { + auto *I = cast(V); + return !E->isOpcodeOrAlt(I) || I->getParent() == BB; + })); // The last instruction in the bundle in program order. Instruction *LastInst = nullptr; @@ -3339,7 +3571,7 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, // bundle. The end of the bundle is marked by null ScheduleData. if (BlocksSchedules.count(BB)) { auto *Bundle = - BlocksSchedules[BB]->getScheduleData(isOneOf(S, VL.back())); + BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back())); if (Bundle && Bundle->isPartOfBundle()) for (; Bundle; Bundle = Bundle->NextInBundle) if (Bundle->OpValue == Bundle->Inst) @@ -3365,14 +3597,15 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, // we both exit early from buildTree_rec and that the bundle be out-of-order // (causing us to iterate all the way to the end of the block). if (!LastInst) { - SmallPtrSet Bundle(VL.begin(), VL.end()); + SmallPtrSet Bundle(E->Scalars.begin(), E->Scalars.end()); for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) { - if (Bundle.erase(&I) && S.isOpcodeOrAlt(&I)) + if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I)) LastInst = &I; if (Bundle.empty()) break; } } + assert(LastInst && "Failed to find last instruction in bundle"); // Set the insertion point after the last instruction in the bundle. Set the // debug location to Front. @@ -3385,7 +3618,7 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - if (Instruction *Insrt = dyn_cast(Vec)) { + if (auto *Insrt = dyn_cast(Vec)) { GatherSeq.insert(Insrt); CSEBlocks.insert(Insrt->getParent()); @@ -3494,8 +3727,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return E->VectorizedValue; } - InstructionsState S = getSameOpcode(E->Scalars); - Instruction *VL0 = cast(S.OpValue); + Instruction *VL0 = E->getMainOp(); Type *ScalarTy = VL0->getType(); if (StoreInst *SI = dyn_cast(VL0)) ScalarTy = SI->getValueOperand()->getType(); @@ -3504,7 +3736,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->NeedToGather) { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3518,11 +3750,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } - unsigned ShuffleOrOp = S.isAltShuffle() ? - (unsigned) Instruction::ShuffleVector : S.getOpcode(); + unsigned ShuffleOrOp = + E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: { - PHINode *PH = dyn_cast(VL0); + auto *PH = cast(VL0); Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); @@ -3577,7 +3809,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = V; return V; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3612,7 +3844,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = NewV; return NewV; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3637,7 +3869,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *InVec = vectorizeTree(E->getOperand(0)); @@ -3646,7 +3878,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return E->VectorizedValue; } - CastInst *CI = dyn_cast(VL0); + auto *CI = cast(VL0); Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3658,7 +3890,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } case Instruction::FCmp: case Instruction::ICmp: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *L = vectorizeTree(E->getOperand(0)); Value *R = vectorizeTree(E->getOperand(1)); @@ -3670,7 +3902,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { CmpInst::Predicate P0 = cast(VL0)->getPredicate(); Value *V; - if (S.getOpcode() == Instruction::FCmp) + if (E->getOpcode() == Instruction::FCmp) V = Builder.CreateFCmp(P0, L, R); else V = Builder.CreateICmp(P0, L, R); @@ -3685,7 +3917,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } case Instruction::Select: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Cond = vectorizeTree(E->getOperand(0)); Value *True = vectorizeTree(E->getOperand(1)); @@ -3706,7 +3938,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } case Instruction::FNeg: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Op = vectorizeTree(E->getOperand(0)); @@ -3716,7 +3948,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *V = Builder.CreateUnOp( - static_cast(S.getOpcode()), Op); + static_cast(E->getOpcode()), Op); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -3748,7 +3980,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *LHS = vectorizeTree(E->getOperand(0)); Value *RHS = vectorizeTree(E->getOperand(1)); @@ -3759,7 +3991,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *V = Builder.CreateBinOp( - static_cast(S.getOpcode()), LHS, RHS); + static_cast(E->getOpcode()), LHS, + RHS); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -3776,12 +4009,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Load: { // Loads are inserted at the head of the tree because we don't want to // sink them all the way down past store instructions. - bool IsReorder = !E->ReorderIndices.empty(); - if (IsReorder) { - S = getSameOpcode(E->Scalars, E->ReorderIndices.front()); - VL0 = cast(S.OpValue); - } - setInsertPointAfterBundle(E->Scalars, S); + bool IsReorder = E->updateStateIfReorder(); + if (IsReorder) + VL0 = E->getMainOp(); + setInsertPointAfterBundle(E); LoadInst *LI = cast(VL0); Type *ScalarLoadTy = LI->getType(); @@ -3797,11 +4028,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (getTreeEntry(PO)) ExternalUses.push_back(ExternalUser(PO, cast(VecPtr), 0)); - unsigned Alignment = LI->getAlignment(); + MaybeAlign Alignment = MaybeAlign(LI->getAlignment()); LI = Builder.CreateLoad(VecTy, VecPtr); - if (!Alignment) { - Alignment = DL->getABITypeAlignment(ScalarLoadTy); - } + if (!Alignment) + Alignment = MaybeAlign(DL->getABITypeAlignment(ScalarLoadTy)); LI->setAlignment(Alignment); Value *V = propagateMetadata(LI, E->Scalars); if (IsReorder) { @@ -3824,7 +4054,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { unsigned Alignment = SI->getAlignment(); unsigned AS = SI->getPointerAddressSpace(); - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *VecValue = vectorizeTree(E->getOperand(0)); Value *ScalarPtr = SI->getPointerOperand(); @@ -3840,7 +4070,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (!Alignment) Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); - ST->setAlignment(Alignment); + ST->setAlignment(Align(Alignment)); Value *V = propagateMetadata(ST, E->Scalars); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3851,7 +4081,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } case Instruction::GetElementPtr: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Op0 = vectorizeTree(E->getOperand(0)); @@ -3878,13 +4108,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } case Instruction::Call: { CallInst *CI = cast(VL0); - setInsertPointAfterBundle(E->Scalars, S); - Function *FI; + setInsertPointAfterBundle(E); + Intrinsic::ID IID = Intrinsic::not_intrinsic; - Value *ScalarArg = nullptr; - if (CI && (FI = CI->getCalledFunction())) { + if (Function *FI = CI->getCalledFunction()) IID = FI->getIntrinsicID(); - } + + Value *ScalarArg = nullptr; std::vector OpVecs; for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { ValueList OpVL; @@ -3926,20 +4156,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } case Instruction::ShuffleVector: { - assert(S.isAltShuffle() && - ((Instruction::isBinaryOp(S.getOpcode()) && - Instruction::isBinaryOp(S.getAltOpcode())) || - (Instruction::isCast(S.getOpcode()) && - Instruction::isCast(S.getAltOpcode()))) && + assert(E->isAltShuffle() && + ((Instruction::isBinaryOp(E->getOpcode()) && + Instruction::isBinaryOp(E->getAltOpcode())) || + (Instruction::isCast(E->getOpcode()) && + Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); - Value *LHS, *RHS; - if (Instruction::isBinaryOp(S.getOpcode())) { - setInsertPointAfterBundle(E->Scalars, S); + Value *LHS = nullptr, *RHS = nullptr; + if (Instruction::isBinaryOp(E->getOpcode())) { + setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); RHS = vectorizeTree(E->getOperand(1)); } else { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); } @@ -3949,16 +4179,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *V0, *V1; - if (Instruction::isBinaryOp(S.getOpcode())) { + if (Instruction::isBinaryOp(E->getOpcode())) { V0 = Builder.CreateBinOp( - static_cast(S.getOpcode()), LHS, RHS); + static_cast(E->getOpcode()), LHS, RHS); V1 = Builder.CreateBinOp( - static_cast(S.getAltOpcode()), LHS, RHS); + static_cast(E->getAltOpcode()), LHS, RHS); } else { V0 = Builder.CreateCast( - static_cast(S.getOpcode()), LHS, VecTy); + static_cast(E->getOpcode()), LHS, VecTy); V1 = Builder.CreateCast( - static_cast(S.getAltOpcode()), LHS, VecTy); + static_cast(E->getAltOpcode()), LHS, VecTy); } // Create shuffle to take alternate operations from the vector. @@ -3969,8 +4199,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { SmallVector Mask(e); for (unsigned i = 0; i < e; ++i) { auto *OpInst = cast(E->Scalars[i]); - assert(S.isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode"); - if (OpInst->getOpcode() == S.getAltOpcode()) { + assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode"); + if (OpInst->getOpcode() == E->getAltOpcode()) { Mask[i] = Builder.getInt32(e + i); AltScalars.push_back(E->Scalars[i]); } else { @@ -4136,20 +4366,18 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; +#ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { -#ifndef NDEBUG for (User *U : Scalar->users()) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); - // It is legal to replace users in the ignorelist by undef. + // It is legal to delete users in the ignorelist. assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) && - "Replacing out-of-tree value with undef"); + "Deleting out-of-tree value"); } -#endif - Value *Undef = UndefValue::get(Ty); - Scalar->replaceAllUsesWith(Undef); } +#endif LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast(Scalar)); } @@ -4165,7 +4393,7 @@ void BoUpSLP::optimizeGatherSequence() { << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. for (Instruction *I : GatherSeq) { - if (!isa(I) && !isa(I)) + if (isDeleted(I)) continue; // Check if this block is inside a loop. @@ -4219,6 +4447,8 @@ void BoUpSLP::optimizeGatherSequence() { // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; + if (isDeleted(In)) + continue; if (!isa(In) && !isa(In)) continue; @@ -4245,11 +4475,11 @@ void BoUpSLP::optimizeGatherSequence() { // Groups the instructions to a bundle (which is then a single scheduling entity) // and schedules instructions until the bundle gets ready. -bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, - BoUpSLP *SLP, - const InstructionsState &S) { +Optional +BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, + const InstructionsState &S) { if (isa(S.OpValue)) - return true; + return nullptr; // Initialize the instruction bundle. Instruction *OldScheduleEnd = ScheduleEnd; @@ -4262,7 +4492,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, // instructions of the bundle. for (Value *V : VL) { if (!extendSchedulingRegion(V, S)) - return false; + return None; } for (Value *V : VL) { @@ -4308,6 +4538,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, resetSchedule(); initialFillReadyList(ReadyInsts); } + assert(Bundle && "Failed to find schedule bundle"); LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " << BB->getName() << "\n"); @@ -4329,9 +4560,9 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, } if (!Bundle->isReady()) { cancelScheduling(VL, S.OpValue); - return false; + return None; } - return true; + return Bundle; } void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, @@ -4364,7 +4595,7 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() { // Allocate a new ScheduleData for the instruction. if (ChunkPos >= ChunkSize) { - ScheduleDataChunks.push_back(llvm::make_unique(ChunkSize)); + ScheduleDataChunks.push_back(std::make_unique(ChunkSize)); ChunkPos = 0; } return &(ScheduleDataChunks.back()[ChunkPos++]); @@ -4977,7 +5208,7 @@ struct SLPVectorizer : public FunctionPass { auto *SE = &getAnalysis().getSE(); auto *TTI = &getAnalysis().getTTI(F); auto *TLIP = getAnalysisIfAvailable(); - auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; auto *AA = &getAnalysis().getAAResults(); auto *LI = &getAnalysis().getLoopInfo(); auto *DT = &getAnalysis().getDomTree(); @@ -5052,7 +5283,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // If the target claims to have no vector registers don't attempt // vectorization. - if (!TTI->getNumberOfRegisters(true)) + if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) return false; // Don't vectorize when the attribute NoImplicitFloat is used. @@ -5100,19 +5331,6 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, return Changed; } -/// Check that the Values in the slice in VL array are still existent in -/// the WeakTrackingVH array. -/// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakTrackingVH -/// array. -static bool hasValueBeenRAUWed(ArrayRef VL, - ArrayRef VH, unsigned SliceBegin, - unsigned SliceSize) { - VL = VL.slice(SliceBegin, SliceSize); - VH = VH.slice(SliceBegin, SliceSize); - return !std::equal(VL.begin(), VL.end(), VH.begin()); -} - bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, unsigned VecRegSize) { const unsigned ChainLen = Chain.size(); @@ -5124,20 +5342,20 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, if (!isPowerOf2_32(Sz) || VF < 2) return false; - // Keep track of values that were deleted by vectorizing in the loop below. - const SmallVector TrackValues(Chain.begin(), Chain.end()); - bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) { + ArrayRef Operands = Chain.slice(i, VF); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) + if (llvm::any_of(Operands, [&R](Value *V) { + auto *I = dyn_cast(V); + return I && R.isDeleted(I); + })) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i << "\n"); - ArrayRef Operands = Chain.slice(i, VF); R.buildTree(Operands); if (R.isTreeTinyAndNotFullyVectorizable()) @@ -5329,12 +5547,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool CandidateFound = false; int MinCost = SLPCostThreshold; - // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector TrackValues(VL.begin(), VL.end()); - unsigned NextInst = 0, MaxInst = VL.size(); - for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; - VF /= 2) { + for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { // No actual vectorization should happen, if number of parts is the same as // provided vectorization factor (i.e. the scalar type is used for vector // code during codegen). @@ -5352,13 +5566,16 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) break; + ArrayRef Ops = VL.slice(I, OpsWidth); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) + if (llvm::any_of(Ops, [&R](Value *V) { + auto *I = dyn_cast(V); + return I && R.isDeleted(I); + })) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n"); - ArrayRef Ops = VL.slice(I, OpsWidth); R.buildTree(Ops); Optional> Order = R.bestOrder(); @@ -5571,7 +5788,7 @@ class HorizontalReduction { Value *createOp(IRBuilder<> &Builder, const Twine &Name) const { assert(isVectorizable() && "Expected add|fadd or min/max reduction operation."); - Value *Cmp; + Value *Cmp = nullptr; switch (Kind) { case RK_Arithmetic: return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, @@ -5579,23 +5796,23 @@ class HorizontalReduction { case RK_Min: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) : Builder.CreateFCmpOLT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_Max: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) : Builder.CreateFCmpOGT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMin: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpULT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMax: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpUGT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_None: - llvm_unreachable("Unknown reduction operation."); + break; } - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + llvm_unreachable("Unknown reduction operation."); } public: @@ -6203,6 +6420,8 @@ public: } if (V.isTreeTinyAndNotFullyVectorizable()) break; + if (V.isLoadCombineReductionCandidate(ReductionData.getOpcode())) + break; V.computeMinimumValueSizes(); @@ -6275,6 +6494,9 @@ public: } // Update users. ReductionRoot->replaceAllUsesWith(VectorizedTree); + // Mark all scalar reduction ops for deletion, they are replaced by the + // vector reductions. + V.eraseInstructions(IgnoreList); } return VectorizedTree != nullptr; } @@ -6323,7 +6545,7 @@ private: IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost; int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost; - int ScalarReduxCost; + int ScalarReduxCost = 0; switch (ReductionData.getKind()) { case RK_Arithmetic: ScalarReduxCost = @@ -6429,10 +6651,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem, /// \return true if it matches. static bool findBuildAggregate(InsertValueInst *IV, SmallVectorImpl &BuildVectorOpds) { - Value *V; do { BuildVectorOpds.push_back(IV->getInsertedValueOperand()); - V = IV->getAggregateOperand(); + Value *V = IV->getAggregateOperand(); if (isa(V)) break; IV = dyn_cast(V); @@ -6530,18 +6751,13 @@ static bool tryToVectorizeHorReductionOrInstOperands( // horizontal reduction. // Interrupt the process if the Root instruction itself was vectorized or all // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. - SmallVector, 8> Stack(1, {Root, 0}); + SmallVector, 8> Stack(1, {Root, 0}); SmallPtrSet VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V; + Instruction *Inst; unsigned Level; - std::tie(V, Level) = Stack.pop_back_val(); - if (!V) - continue; - auto *Inst = dyn_cast(V); - if (!Inst) - continue; + std::tie(Inst, Level) = Stack.pop_back_val(); auto *BI = dyn_cast(Inst); auto *SI = dyn_cast(Inst); if (BI || SI) { @@ -6582,8 +6798,8 @@ static bool tryToVectorizeHorReductionOrInstOperands( for (auto *Op : Inst->operand_values()) if (VisitedInstrs.insert(Op).second) if (auto *I = dyn_cast(Op)) - if (!isa(I) && I->getParent() == BB) - Stack.emplace_back(Op, Level); + if (!isa(I) && !R.isDeleted(I) && I->getParent() == BB) + Stack.emplace_back(I, Level); } return Res; } @@ -6652,11 +6868,10 @@ bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, } bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { + SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { bool OpsChanged = false; - for (auto &VH : reverse(Instructions)) { - auto *I = dyn_cast_or_null(VH); - if (!I) + for (auto *I : reverse(Instructions)) { + if (R.isDeleted(I)) continue; if (auto *LastInsertValue = dyn_cast(I)) OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); @@ -6685,7 +6900,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (!P) break; - if (!VisitedInstrs.count(P)) + if (!VisitedInstrs.count(P) && !R.isDeleted(P)) Incoming.push_back(P); } @@ -6729,9 +6944,12 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { VisitedInstrs.clear(); - SmallVector PostProcessInstructions; + SmallVector PostProcessInstructions; SmallDenseSet KeyNodes; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip instructions marked for the deletion. + if (R.isDeleted(&*it)) + continue; // We may go through BB multiple times so skip the one we have checked. if (!VisitedInstrs.insert(&*it).second) { if (it->use_empty() && KeyNodes.count(&*it) > 0 && @@ -6811,10 +7029,16 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { LLVM_DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length " << Entry.second.size() << ".\n"); - // We process the getelementptr list in chunks of 16 (like we do for - // stores) to minimize compile-time. - for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += 16) { - auto Len = std::min(BE - BI, 16); + // Process the GEP list in chunks suitable for the target's supported + // vector size. If a vector register can't hold 1 element, we are done. + unsigned MaxVecRegSize = R.getMaxVecRegSize(); + unsigned EltSize = R.getVectorElementSize(Entry.second[0]); + if (MaxVecRegSize < EltSize) + continue; + + unsigned MaxElts = MaxVecRegSize / EltSize; + for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += MaxElts) { + auto Len = std::min(BE - BI, MaxElts); auto GEPList = makeArrayRef(&Entry.second[BI], Len); // Initialize a set a candidate getelementptrs. Note that we use a @@ -6824,10 +7048,10 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakTrackingVHs will have - // nullified the - // values, so remove them from the set of candidates. - Candidates.remove(nullptr); + // initially collected them. If so, they are marked as deleted, so remove + // them from the set of candidates. + Candidates.remove_if( + [&R](Value *I) { return R.isDeleted(cast(I)); }); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good @@ -6835,18 +7059,18 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { // computed from the other. We also ensure all candidate getelementptr // indices are unique. for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { - auto *GEPI = cast(GEPList[I]); + auto *GEPI = GEPList[I]; if (!Candidates.count(GEPI)) continue; auto *SCEVI = SE->getSCEV(GEPList[I]); for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { - auto *GEPJ = cast(GEPList[J]); + auto *GEPJ = GEPList[J]; auto *SCEVJ = SE->getSCEV(GEPList[J]); if (isa(SE->getMinusSCEV(SCEVI, SCEVJ))) { - Candidates.remove(GEPList[I]); - Candidates.remove(GEPList[J]); + Candidates.remove(GEPI); + Candidates.remove(GEPJ); } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { - Candidates.remove(GEPList[J]); + Candidates.remove(GEPJ); } } } diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index 517d759d7bfc..4b80d1fb20aa 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -283,6 +283,12 @@ iplist::iterator VPRecipeBase::eraseFromParent() { return getParent()->getRecipeList().erase(getIterator()); } +void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { + InsertPos->getParent()->getRecipeList().splice( + std::next(InsertPos->getIterator()), getParent()->getRecipeList(), + getIterator()); +} + void VPInstruction::generateInstruction(VPTransformState &State, unsigned Part) { IRBuilder<> &Builder = State.Builder; @@ -309,6 +315,14 @@ void VPInstruction::generateInstruction(VPTransformState &State, State.set(this, V, Part); break; } + case Instruction::Select: { + Value *Cond = State.get(getOperand(0), Part); + Value *Op1 = State.get(getOperand(1), Part); + Value *Op2 = State.get(getOperand(2), Part); + Value *V = Builder.CreateSelect(Cond, Op1, Op2); + State.set(this, V, Part); + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -728,7 +742,7 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, auto NewIGIter = Old2New.find(IG); if (NewIGIter == Old2New.end()) Old2New[IG] = new InterleaveGroup( - IG->getFactor(), IG->isReverse(), IG->getAlignment()); + IG->getFactor(), IG->isReverse(), Align(IG->getAlignment())); if (Inst == IG->getInsertPos()) Old2New[IG]->setInsertPos(VPInst); @@ -736,7 +750,8 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, InterleaveGroupMap[VPInst] = Old2New[IG]; InterleaveGroupMap[VPInst]->insertMember( VPInst, IG->getIndex(Inst), - IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor()); + Align(IG->isReverse() ? (-1) * int(IG->getFactor()) + : IG->getFactor())); } } else if (VPRegionBlock *Region = dyn_cast(Block)) visitRegion(Region, Old2New, IAI); diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 8a06412ad590..44d8a198f27e 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -615,6 +615,10 @@ public: /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); + /// Unlink this recipe from its current VPBasicBlock and insert it into + /// the VPBasicBlock that MovePos lives in, right after MovePos. + void moveAfter(VPRecipeBase *MovePos); + /// This method unlinks 'this' from the containing basic block and deletes it. /// /// \returns an iterator pointing to the element after the erased one diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp index 7ed7d21b6caa..b22d3190d654 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp +++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp @@ -21,7 +21,7 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes( LoopVectorizationLegality::InductionList *Inductions, SmallPtrSetImpl &DeadInstructions) { - VPRegionBlock *TopRegion = dyn_cast(Plan->getEntry()); + auto *TopRegion = cast(Plan->getEntry()); ReversePostOrderTraversal RPOT(TopRegion->getEntry()); // Condition bit VPValues get deleted during transformation to VPRecipes. diff --git a/lib/Transforms/Vectorize/VPlanSLP.cpp b/lib/Transforms/Vectorize/VPlanSLP.cpp index e5ab24e52df6..9019ed15ec5f 100644 --- a/lib/Transforms/Vectorize/VPlanSLP.cpp +++ b/lib/Transforms/Vectorize/VPlanSLP.cpp @@ -346,11 +346,14 @@ SmallVector VPlanSlp::reorderMultiNodeOps() { void VPlanSlp::dumpBundle(ArrayRef Values) { dbgs() << " Ops: "; - for (auto Op : Values) - if (auto *Instr = cast_or_null(Op)->getUnderlyingInstr()) - dbgs() << *Instr << " | "; - else - dbgs() << " nullptr | "; + for (auto Op : Values) { + if (auto *VPInstr = cast_or_null(Op)) + if (auto *Instr = VPInstr->getUnderlyingInstr()) { + dbgs() << *Instr << " | "; + continue; + } + dbgs() << " nullptr | "; + } dbgs() << "\n"; } diff --git a/lib/WindowsManifest/WindowsManifestMerger.cpp b/lib/WindowsManifest/WindowsManifestMerger.cpp index d092ab493c9b..031a963cd3b0 100644 --- a/lib/WindowsManifest/WindowsManifestMerger.cpp +++ b/lib/WindowsManifest/WindowsManifestMerger.cpp @@ -58,7 +58,7 @@ private: #if LLVM_LIBXML2_ENABLED -static const std::pair MtNsHrefsPrefixes[] = { +static constexpr std::pair MtNsHrefsPrefixes[] = { {"urn:schemas-microsoft-com:asm.v1", "ms_asmv1"}, {"urn:schemas-microsoft-com:asm.v2", "ms_asmv2"}, {"urn:schemas-microsoft-com:asm.v3", "ms_asmv3"}, @@ -704,7 +704,7 @@ bool windows_manifest::isAvailable() { return false; } #endif WindowsManifestMerger::WindowsManifestMerger() - : Impl(make_unique()) {} + : Impl(std::make_unique()) {} WindowsManifestMerger::~WindowsManifestMerger() {} diff --git a/lib/XRay/FDRRecordProducer.cpp b/lib/XRay/FDRRecordProducer.cpp index 452bc6c55fb8..479b710444be 100644 --- a/lib/XRay/FDRRecordProducer.cpp +++ b/lib/XRay/FDRRecordProducer.cpp @@ -40,32 +40,32 @@ metadataRecordType(const XRayFileHeader &Header, uint8_t T) { "Invalid metadata record type: %d", T); switch (T) { case MetadataRecordKinds::NewBufferKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::EndOfBufferKind: if (Header.Version >= 2) return createStringError( std::make_error_code(std::errc::executable_format_error), "End of buffer records are no longer supported starting version " "2 of the log."); - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::NewCPUIdKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::TSCWrapKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::WalltimeMarkerKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::CustomEventMarkerKind: if (Header.Version >= 5) - return make_unique(); - return make_unique(); + return std::make_unique(); + return std::make_unique(); case MetadataRecordKinds::CallArgumentKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::BufferExtentsKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::TypedEventMarkerKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::PidKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::EnumEndMarker: llvm_unreachable("Invalid MetadataRecordKind"); } @@ -89,7 +89,7 @@ FileBasedRecordProducer::findNextBufferExtent() { if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading one byte from offset %d.", OffsetPtr); + "Failed reading one byte from offset %" PRId64 ".", OffsetPtr); if (isMetadataIntroducer(FirstByte)) { auto LoadedType = FirstByte >> 1; @@ -130,7 +130,7 @@ Expected> FileBasedRecordProducer::produce() { R = std::move(BufferExtentsOrError.get()); assert(R != nullptr); assert(isa(R.get())); - auto BE = dyn_cast(R.get()); + auto BE = cast(R.get()); CurrentBufferBytes = BE->size(); return std::move(R); } @@ -151,7 +151,7 @@ Expected> FileBasedRecordProducer::produce() { if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading one byte from offset %d.", OffsetPtr); + "Failed reading one byte from offset %" PRId64 ".", OffsetPtr); // For metadata records, handle especially here. if (isMetadataIntroducer(FirstByte)) { @@ -162,11 +162,12 @@ Expected> FileBasedRecordProducer::produce() { MetadataRecordOrErr.takeError(), createStringError( std::make_error_code(std::errc::executable_format_error), - "Encountered an unsupported metadata record (%d) at offset %d.", + "Encountered an unsupported metadata record (%d) " + "at offset %" PRId64 ".", LoadedType, PreReadOffset)); R = std::move(MetadataRecordOrErr.get()); } else { - R = llvm::make_unique(); + R = std::make_unique(); } RecordInitializer RI(E, OffsetPtr); @@ -182,8 +183,8 @@ Expected> FileBasedRecordProducer::produce() { if (OffsetPtr - PreReadOffset > CurrentBufferBytes) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Buffer over-read at offset %d (over-read by %d bytes); Record Type " - "= %s.", + "Buffer over-read at offset %" PRId64 " (over-read by %" PRId64 + " bytes); Record Type = %s.", OffsetPtr, (OffsetPtr - PreReadOffset) - CurrentBufferBytes, Record::kindToString(R->getRecordType()).data()); diff --git a/lib/XRay/FileHeaderReader.cpp b/lib/XRay/FileHeaderReader.cpp index 3fb021906a6f..6b6daf9deba5 100644 --- a/lib/XRay/FileHeaderReader.cpp +++ b/lib/XRay/FileHeaderReader.cpp @@ -12,7 +12,7 @@ namespace xray { // Populates the FileHeader reference by reading the first 32 bytes of the file. Expected readBinaryFormatHeader(DataExtractor &HeaderExtractor, - uint32_t &OffsetPtr) { + uint64_t &OffsetPtr) { // FIXME: Maybe deduce whether the data is little or big-endian using some // magic bytes in the beginning of the file? @@ -30,21 +30,24 @@ Expected readBinaryFormatHeader(DataExtractor &HeaderExtractor, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading version from file header at offset %d.", OffsetPtr); + "Failed reading version from file header at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading file type from file header at offset %d.", OffsetPtr); + "Failed reading file type from file header at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading flag bits from file header at offset %d.", OffsetPtr); + "Failed reading flag bits from file header at offset %" PRId64 ".", + OffsetPtr); FileHeader.ConstantTSC = Bitfield & 1uL; FileHeader.NonstopTSC = Bitfield & 1uL << 1; @@ -53,7 +56,8 @@ Expected readBinaryFormatHeader(DataExtractor &HeaderExtractor, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading cycle frequency from file header at offset %d.", + "Failed reading cycle frequency from file header at offset %" PRId64 + ".", OffsetPtr); std::memcpy(&FileHeader.FreeFormData, diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp index fe5e941f7ea6..7453613c7038 100644 --- a/lib/XRay/InstrumentationMap.cpp +++ b/lib/XRay/InstrumentationMap.cpp @@ -67,10 +67,11 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, StringRef Contents = ""; const auto &Sections = ObjFile.getBinary()->sections(); auto I = llvm::find_if(Sections, [&](object::SectionRef Section) { - StringRef Name = ""; - if (Section.getName(Name)) - return false; - return Name == "xray_instr_map"; + Expected NameOrErr = Section.getName(); + if (NameOrErr) + return *NameOrErr == "xray_instr_map"; + consumeError(NameOrErr.takeError()); + return false; }); if (I == Sections.end()) @@ -118,7 +119,7 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, "an XRay sled entry in ELF64."), std::make_error_code(std::errc::executable_format_error)); - auto RelocateOrElse = [&](uint32_t Offset, uint64_t Address) { + auto RelocateOrElse = [&](uint64_t Offset, uint64_t Address) { if (!Address) { uint64_t A = I->getAddress() + C - Contents.bytes_begin() + Offset; RelocMap::const_iterator R = Relocs.find(A); @@ -136,10 +137,10 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, 8); Sleds.push_back({}); auto &Entry = Sleds.back(); - uint32_t OffsetPtr = 0; - uint32_t AddrOff = OffsetPtr; + uint64_t OffsetPtr = 0; + uint64_t AddrOff = OffsetPtr; Entry.Address = RelocateOrElse(AddrOff, Extractor.getU64(&OffsetPtr)); - uint32_t FuncOff = OffsetPtr; + uint64_t FuncOff = OffsetPtr; Entry.Function = RelocateOrElse(FuncOff, Extractor.getU64(&OffsetPtr)); auto Kind = Extractor.getU8(&OffsetPtr); static constexpr SledEntry::FunctionKinds Kinds[] = { diff --git a/lib/XRay/Profile.cpp b/lib/XRay/Profile.cpp index e34b182f2e02..c1a43632b600 100644 --- a/lib/XRay/Profile.cpp +++ b/lib/XRay/Profile.cpp @@ -49,9 +49,9 @@ struct BlockHeader { }; static Expected readBlockHeader(DataExtractor &Extractor, - uint32_t &Offset) { + uint64_t &Offset) { BlockHeader H; - uint32_t CurrentOffset = Offset; + uint64_t CurrentOffset = Offset; H.Size = Extractor.getU32(&Offset); if (Offset == CurrentOffset) return make_error( @@ -76,7 +76,7 @@ static Expected readBlockHeader(DataExtractor &Extractor, } static Expected> readPath(DataExtractor &Extractor, - uint32_t &Offset) { + uint64_t &Offset) { // We're reading a sequence of int32_t's until we find a 0. std::vector Path; auto CurrentOffset = Offset; @@ -94,7 +94,7 @@ static Expected> readPath(DataExtractor &Extractor, } static Expected readData(DataExtractor &Extractor, - uint32_t &Offset) { + uint64_t &Offset) { // We expect a certain number of elements for Data: // - A 64-bit CallCount // - A 64-bit CumulativeLocalTime counter @@ -280,7 +280,7 @@ Expected loadProfile(StringRef Filename) { StringRef Data(MappedFile.data(), MappedFile.size()); Profile P; - uint32_t Offset = 0; + uint64_t Offset = 0; DataExtractor Extractor(Data, true, 8); // For each block we get from the file: diff --git a/lib/XRay/RecordInitializer.cpp b/lib/XRay/RecordInitializer.cpp index 78163031a8cc..68ab3db06208 100644 --- a/lib/XRay/RecordInitializer.cpp +++ b/lib/XRay/RecordInitializer.cpp @@ -12,15 +12,15 @@ namespace xray { Error RecordInitializer::visit(BufferExtents &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t))) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a buffer extent (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a buffer extent (%" PRId64 ").", OffsetPtr); auto PreReadOffset = OffsetPtr; R.Size = E.getU64(&OffsetPtr); if (PreReadOffset == OffsetPtr) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read buffer extent at offset %d.", + "Cannot read buffer extent at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); @@ -30,23 +30,25 @@ Error RecordInitializer::visit(BufferExtents &R) { Error RecordInitializer::visit(WallclockRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a wallclock record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a wallclock record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; R.Seconds = E.getU64(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read wall clock 'seconds' field at offset %d.", OffsetPtr); + "Cannot read wall clock 'seconds' field at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; R.Nanos = E.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read wall clock 'nanos' field at offset %d.", OffsetPtr); + "Cannot read wall clock 'nanos' field at offset %" PRId64 ".", + OffsetPtr); // Align to metadata record size boundary. assert(OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize); @@ -57,21 +59,23 @@ Error RecordInitializer::visit(WallclockRecord &R) { Error RecordInitializer::visit(NewCPUIDRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a new cpu id record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a new cpu id record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; R.CPUId = E.getU16(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read CPU id at offset %d.", OffsetPtr); + "Cannot read CPU id at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; R.TSC = E.getU64(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read CPU TSC at offset %d.", OffsetPtr); + "Cannot read CPU TSC at offset %" PRId64 ".", + OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset); return Error::success(); @@ -80,16 +84,16 @@ Error RecordInitializer::visit(NewCPUIDRecord &R) { Error RecordInitializer::visit(TSCWrapRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a new TSC wrap record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a new TSC wrap record (%" PRId64 ").", OffsetPtr); auto PreReadOffset = OffsetPtr; R.BaseTSC = E.getU64(&OffsetPtr); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read TSC wrap record at offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot read TSC wrap record at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); return Error::success(); @@ -98,9 +102,9 @@ Error RecordInitializer::visit(TSCWrapRecord &R) { Error RecordInitializer::visit(CustomEventRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a custom event record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a custom event record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; @@ -108,20 +112,22 @@ Error RecordInitializer::visit(CustomEventRecord &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a custom event record size field offset %d.", OffsetPtr); + "Cannot read a custom event record size field offset %" PRId64 ".", + OffsetPtr); if (R.Size <= 0) return createStringError( std::make_error_code(std::errc::bad_address), - "Invalid size for custom event (size = %d) at offset %d.", R.Size, - OffsetPtr); + "Invalid size for custom event (size = %d) at offset %" PRId64 ".", + R.Size, OffsetPtr); PreReadOffset = OffsetPtr; R.TSC = E.getU64(&OffsetPtr); if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a custom event TSC field at offset %d.", OffsetPtr); + "Cannot read a custom event TSC field at offset %" PRId64 ".", + OffsetPtr); // For version 4 onwards, of the FDR log, we want to also capture the CPU ID // of the custom event. @@ -131,7 +137,7 @@ Error RecordInitializer::visit(CustomEventRecord &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Missing CPU field at offset %d", OffsetPtr); + "Missing CPU field at offset %" PRId64 ".", OffsetPtr); } assert(OffsetPtr > BeginOffset && @@ -142,8 +148,8 @@ Error RecordInitializer::visit(CustomEventRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size)) return createStringError( std::make_error_code(std::errc::bad_address), - "Cannot read %d bytes of custom event data from offset %d.", R.Size, - OffsetPtr); + "Cannot read %d bytes of custom event data from offset %" PRId64 ".", + R.Size, OffsetPtr); std::vector Buffer; Buffer.resize(R.Size); @@ -151,15 +157,15 @@ Error RecordInitializer::visit(CustomEventRecord &R) { if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data()) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading data into buffer of size %d at offset %d.", R.Size, - OffsetPtr); + "Failed reading data into buffer of size %d at offset %" PRId64 ".", + R.Size, OffsetPtr); assert(OffsetPtr >= PreReadOffset); if (OffsetPtr - PreReadOffset != static_cast(R.Size)) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading enough bytes for the custom event payload -- read %d " - "expecting %d bytes at offset %d.", + "Failed reading enough bytes for the custom event payload -- read " + "%" PRId64 " expecting %d bytes at offset %" PRId64 ".", OffsetPtr - PreReadOffset, R.Size, PreReadOffset); R.Data.assign(Buffer.begin(), Buffer.end()); @@ -169,9 +175,9 @@ Error RecordInitializer::visit(CustomEventRecord &R) { Error RecordInitializer::visit(CustomEventRecordV5 &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a custom event record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a custom event record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; @@ -180,20 +186,22 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a custom event record size field offset %d.", OffsetPtr); + "Cannot read a custom event record size field offset %" PRId64 ".", + OffsetPtr); if (R.Size <= 0) return createStringError( std::make_error_code(std::errc::bad_address), - "Invalid size for custom event (size = %d) at offset %d.", R.Size, - OffsetPtr); + "Invalid size for custom event (size = %d) at offset %" PRId64 ".", + R.Size, OffsetPtr); PreReadOffset = OffsetPtr; R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t)); if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a custom event record TSC delta field at offset %d.", + "Cannot read a custom event record TSC delta field at offset " + "%" PRId64 ".", OffsetPtr); assert(OffsetPtr > BeginOffset && @@ -204,8 +212,8 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size)) return createStringError( std::make_error_code(std::errc::bad_address), - "Cannot read %d bytes of custom event data from offset %d.", R.Size, - OffsetPtr); + "Cannot read %d bytes of custom event data from offset %" PRId64 ".", + R.Size, OffsetPtr); std::vector Buffer; Buffer.resize(R.Size); @@ -213,15 +221,15 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) { if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data()) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading data into buffer of size %d at offset %d.", R.Size, - OffsetPtr); + "Failed reading data into buffer of size %d at offset %" PRId64 ".", + R.Size, OffsetPtr); assert(OffsetPtr >= PreReadOffset); if (OffsetPtr - PreReadOffset != static_cast(R.Size)) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading enough bytes for the custom event payload -- read %d " - "expecting %d bytes at offset %d.", + "Failed reading enough bytes for the custom event payload -- read " + "%" PRId64 " expecting %d bytes at offset %" PRId64 ".", OffsetPtr - PreReadOffset, R.Size, PreReadOffset); R.Data.assign(Buffer.begin(), Buffer.end()); @@ -231,9 +239,9 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) { Error RecordInitializer::visit(TypedEventRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a typed event record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a typed event record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; @@ -242,20 +250,22 @@ Error RecordInitializer::visit(TypedEventRecord &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a typed event record size field offset %d.", OffsetPtr); + "Cannot read a typed event record size field offset %" PRId64 ".", + OffsetPtr); if (R.Size <= 0) return createStringError( std::make_error_code(std::errc::bad_address), - "Invalid size for typed event (size = %d) at offset %d.", R.Size, - OffsetPtr); + "Invalid size for typed event (size = %d) at offset %" PRId64 ".", + R.Size, OffsetPtr); PreReadOffset = OffsetPtr; R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t)); if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a typed event record TSC delta field at offset %d.", + "Cannot read a typed event record TSC delta field at offset " + "%" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; @@ -263,7 +273,8 @@ Error RecordInitializer::visit(TypedEventRecord &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a typed event record type field at offset %d.", OffsetPtr); + "Cannot read a typed event record type field at offset %" PRId64 ".", + OffsetPtr); assert(OffsetPtr > BeginOffset && OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize); @@ -273,8 +284,8 @@ Error RecordInitializer::visit(TypedEventRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size)) return createStringError( std::make_error_code(std::errc::bad_address), - "Cannot read %d bytes of custom event data from offset %d.", R.Size, - OffsetPtr); + "Cannot read %d bytes of custom event data from offset %" PRId64 ".", + R.Size, OffsetPtr); std::vector Buffer; Buffer.resize(R.Size); @@ -282,15 +293,15 @@ Error RecordInitializer::visit(TypedEventRecord &R) { if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data()) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading data into buffer of size %d at offset %d.", R.Size, - OffsetPtr); + "Failed reading data into buffer of size %d at offset %" PRId64 ".", + R.Size, OffsetPtr); assert(OffsetPtr >= PreReadOffset); if (OffsetPtr - PreReadOffset != static_cast(R.Size)) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading enough bytes for the typed event payload -- read %d " - "expecting %d bytes at offset %d.", + "Failed reading enough bytes for the typed event payload -- read " + "%" PRId64 " expecting %d bytes at offset %" PRId64 ".", OffsetPtr - PreReadOffset, R.Size, PreReadOffset); R.Data.assign(Buffer.begin(), Buffer.end()); @@ -300,16 +311,17 @@ Error RecordInitializer::visit(TypedEventRecord &R) { Error RecordInitializer::visit(CallArgRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a call argument record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a call argument record (%" PRId64 ").", + OffsetPtr); auto PreReadOffset = OffsetPtr; R.Arg = E.getU64(&OffsetPtr); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read a call arg record at offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot read a call arg record at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); return Error::success(); @@ -318,16 +330,16 @@ Error RecordInitializer::visit(CallArgRecord &R) { Error RecordInitializer::visit(PIDRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a process ID record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a process ID record (%" PRId64 ").", OffsetPtr); auto PreReadOffset = OffsetPtr; R.PID = E.getSigned(&OffsetPtr, 4); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read a process ID record at offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot read a process ID record at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); return Error::success(); @@ -336,16 +348,16 @@ Error RecordInitializer::visit(PIDRecord &R) { Error RecordInitializer::visit(NewBufferRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a new buffer record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a new buffer record (%" PRId64 ").", OffsetPtr); auto PreReadOffset = OffsetPtr; R.TID = E.getSigned(&OffsetPtr, sizeof(int32_t)); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read a new buffer record at offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot read a new buffer record at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); return Error::success(); @@ -354,9 +366,10 @@ Error RecordInitializer::visit(NewBufferRecord &R) { Error RecordInitializer::visit(EndBufferRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for an end-of-buffer record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for an end-of-buffer record (%" PRId64 ").", + OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize; return Error::success(); @@ -373,17 +386,17 @@ Error RecordInitializer::visit(FunctionRecord &R) { // if (OffsetPtr == 0 || !E.isValidOffsetForDataOfSize( --OffsetPtr, FunctionRecord::kFunctionRecordSize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a function record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a function record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = BeginOffset; uint32_t Buffer = E.getU32(&OffsetPtr); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::bad_address), - "Cannot read function id field from offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Cannot read function id field from offset %" PRId64 ".", OffsetPtr); // To get the function record type, we shift the buffer one to the right // (truncating the function record indicator) then take the three bits @@ -397,18 +410,19 @@ Error RecordInitializer::visit(FunctionRecord &R) { R.Kind = static_cast(FunctionType); break; default: - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown function record type '%d' at offset %d.", - FunctionType, BeginOffset); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Unknown function record type '%d' at offset %" PRId64 ".", + FunctionType, BeginOffset); } R.FuncId = Buffer >> 4; PreReadOffset = OffsetPtr; R.Delta = E.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Failed reading TSC delta from offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Failed reading TSC delta from offset %" PRId64 ".", OffsetPtr); assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset)); return Error::success(); } diff --git a/lib/XRay/Trace.cpp b/lib/XRay/Trace.cpp index b9b67c561c66..4f107e1059cc 100644 --- a/lib/XRay/Trace.cpp +++ b/lib/XRay/Trace.cpp @@ -47,7 +47,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, std::make_error_code(std::errc::invalid_argument)); DataExtractor Reader(Data, IsLittleEndian, 8); - uint32_t OffsetPtr = 0; + uint64_t OffsetPtr = 0; auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr); if (!FileHeaderOrError) return FileHeaderOrError.takeError(); @@ -67,13 +67,14 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32)) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Not enough bytes to read a full record at offset %d.", OffsetPtr); + "Not enough bytes to read a full record at offset %" PRId64 ".", + OffsetPtr); auto PreReadOffset = OffsetPtr; auto RecordType = Reader.getU16(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading record type at offset %d.", OffsetPtr); + "Failed reading record type at offset %" PRId64 ".", OffsetPtr); switch (RecordType) { case 0: { // Normal records. @@ -86,14 +87,15 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading CPU field at offset %d.", OffsetPtr); + "Failed reading CPU field at offset %" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; auto Type = Reader.getU8(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading record type field at offset %d.", OffsetPtr); + "Failed reading record type field at offset %" PRId64 ".", + OffsetPtr); switch (Type) { case 0: @@ -111,7 +113,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, default: return createStringError( std::make_error_code(std::errc::executable_format_error), - "Unknown record type '%d' at offset %d.", Type, OffsetPtr); + "Unknown record type '%d' at offset %" PRId64 ".", Type, OffsetPtr); } PreReadOffset = OffsetPtr; @@ -119,28 +121,29 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading function id field at offset %d.", OffsetPtr); + "Failed reading function id field at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; Record.TSC = Reader.getU64(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading TSC field at offset %d.", OffsetPtr); + "Failed reading TSC field at offset %" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; Record.TId = Reader.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading thread id field at offset %d.", OffsetPtr); + "Failed reading thread id field at offset %" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; Record.PId = Reader.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading process id at offset %d.", OffsetPtr); + "Failed reading process id at offset %" PRId64 ".", OffsetPtr); break; } @@ -155,21 +158,23 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading function id field at offset %d.", OffsetPtr); + "Failed reading function id field at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; auto TId = Reader.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading thread id field at offset %d.", OffsetPtr); + "Failed reading thread id field at offset %" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; auto PId = Reader.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading process id field at offset %d.", OffsetPtr); + "Failed reading process id field at offset %" PRId64 ".", + OffsetPtr); // Make a check for versions above 3 for the Pid field if (Record.FuncId != FuncId || Record.TId != TId || @@ -178,7 +183,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, std::make_error_code(std::errc::executable_format_error), "Corrupted log, found arg payload following non-matching " "function+thread record. Record for function %d != %d at offset " - "%d", + "%" PRId64 ".", Record.FuncId, FuncId, OffsetPtr); PreReadOffset = OffsetPtr; @@ -186,7 +191,8 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading argument payload at offset %d.", OffsetPtr); + "Failed reading argument payload at offset %" PRId64 ".", + OffsetPtr); Record.CallArgs.push_back(Arg); break; @@ -194,7 +200,8 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, default: return createStringError( std::make_error_code(std::errc::executable_format_error), - "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr); + "Unknown record type '%d' at offset %" PRId64 ".", RecordType, + OffsetPtr); } // Advance the offset pointer enough bytes to align to 32-byte records for // basic mode logs. @@ -265,7 +272,7 @@ Error loadFDRLog(StringRef Data, bool IsLittleEndian, "Not enough bytes for an XRay FDR log."); DataExtractor DE(Data, IsLittleEndian, 8); - uint32_t OffsetPtr = 0; + uint64_t OffsetPtr = 0; auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr); if (!FileHeaderOrError) return FileHeaderOrError.takeError(); @@ -424,7 +431,7 @@ Expected llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) { // Only if we can't load either the binary or the YAML format will we yield an // error. DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), 8); - uint32_t OffsetPtr = 0; + uint64_t OffsetPtr = 0; uint16_t Version = HeaderExtractor.getU16(&OffsetPtr); uint16_t Type = HeaderExtractor.getU16(&OffsetPtr); diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h index 75f166b21b2c..fe5201eb2e6c 100644 --- a/tools/bugpoint/BugDriver.h +++ b/tools/bugpoint/BugDriver.h @@ -217,8 +217,7 @@ public: /// returning the transformed module on success, or a null pointer on failure. std::unique_ptr runPassesOn(Module *M, const std::vector &Passes, - unsigned NumExtraArgs = 0, - const char *const *ExtraArgs = nullptr); + ArrayRef ExtraArgs = {}); /// runPasses - Run the specified passes on Program, outputting a bitcode /// file and writting the filename into OutputFile if successful. If the @@ -231,8 +230,8 @@ public: /// bool runPasses(Module &Program, const std::vector &PassesToRun, std::string &OutputFilename, bool DeleteOutput = false, - bool Quiet = false, unsigned NumExtraArgs = 0, - const char *const *ExtraArgs = nullptr) const; + bool Quiet = false, + ArrayRef ExtraArgs = {}) const; /// runPasses - Just like the method above, but this just returns true or /// false indicating whether or not the optimizer crashed on the specified diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp index 105702de3f1d..d9047acd30e1 100644 --- a/tools/bugpoint/ExtractFunction.cpp +++ b/tools/bugpoint/ExtractFunction.cpp @@ -407,11 +407,10 @@ BugDriver::extractMappedBlocksFromModule(const std::vector &BBs, std::string uniqueFN = "--extract-blocks-file="; uniqueFN += Temp->TmpName; - const char *ExtraArg = uniqueFN.c_str(); std::vector PI; PI.push_back("extract-blocks"); - std::unique_ptr Ret = runPassesOn(M, PI, 1, &ExtraArg); + std::unique_ptr Ret = runPassesOn(M, PI, {uniqueFN}); if (!Ret) { outs() << "*** Basic Block extraction failed, please report a bug!\n"; diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp index 562de7952388..64af81fcc8a1 100644 --- a/tools/bugpoint/OptimizerDriver.cpp +++ b/tools/bugpoint/OptimizerDriver.cpp @@ -79,7 +79,7 @@ bool BugDriver::writeProgramToFile(int FD, const Module &M) const { bool BugDriver::writeProgramToFile(const std::string &Filename, const Module &M) const { std::error_code EC; - ToolOutputFile Out(Filename, EC, sys::fs::F_None); + ToolOutputFile Out(Filename, EC, sys::fs::OF_None); if (!EC) return writeProgramToFileAux(Out, M); return true; @@ -130,8 +130,7 @@ static cl::list OptArgs("opt-args", cl::Positional, bool BugDriver::runPasses(Module &Program, const std::vector &Passes, std::string &OutputFilename, bool DeleteOutput, - bool Quiet, unsigned NumExtraArgs, - const char *const *ExtraArgs) const { + bool Quiet, ArrayRef ExtraArgs) const { // setup the output file name outs().flush(); SmallString<128> UniqueFilename; @@ -223,8 +222,7 @@ bool BugDriver::runPasses(Module &Program, I != E; ++I) Args.push_back(I->c_str()); Args.push_back(Temp->TmpName.c_str()); - for (unsigned i = 0; i < NumExtraArgs; ++i) - Args.push_back(*ExtraArgs); + Args.append(ExtraArgs.begin(), ExtraArgs.end()); LLVM_DEBUG(errs() << "\nAbout to run:\t"; for (unsigned i = 0, e = Args.size() - 1; i != e; ++i) errs() @@ -268,10 +266,10 @@ bool BugDriver::runPasses(Module &Program, std::unique_ptr BugDriver::runPassesOn(Module *M, const std::vector &Passes, - unsigned NumExtraArgs, const char *const *ExtraArgs) { + ArrayRef ExtraArgs) { std::string BitcodeResult; if (runPasses(*M, Passes, BitcodeResult, false /*delete*/, true /*quiet*/, - NumExtraArgs, ExtraArgs)) { + ExtraArgs)) { return nullptr; } diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp index da4244345e3b..19b2ea2c0181 100644 --- a/tools/bugpoint/ToolRunner.cpp +++ b/tools/bugpoint/ToolRunner.cpp @@ -170,7 +170,7 @@ Expected LLI::ExecuteProgram(const std::string &Bitcode, const std::vector &SharedLibs, unsigned Timeout, unsigned MemoryLimit) { std::vector LLIArgs; - LLIArgs.push_back(LLIPath.c_str()); + LLIArgs.push_back(LLIPath); LLIArgs.push_back("-force-interpreter=true"); for (std::vector::const_iterator i = SharedLibs.begin(), @@ -266,15 +266,15 @@ Error CustomCompiler::compileProgram(const std::string &Bitcode, unsigned Timeout, unsigned MemoryLimit) { std::vector ProgramArgs; - ProgramArgs.push_back(CompilerCommand.c_str()); + ProgramArgs.push_back(CompilerCommand); - for (std::size_t i = 0; i < CompilerArgs.size(); ++i) - ProgramArgs.push_back(CompilerArgs.at(i).c_str()); + for (const auto &Arg : CompilerArgs) + ProgramArgs.push_back(Arg); ProgramArgs.push_back(Bitcode); // Add optional parameters to the running program from Argv - for (unsigned i = 0, e = CompilerArgs.size(); i != e; ++i) - ProgramArgs.push_back(CompilerArgs[i].c_str()); + for (const auto &Arg : CompilerArgs) + ProgramArgs.push_back(Arg); if (RunProgramWithTimeout(CompilerCommand, ProgramArgs, "", "", "", Timeout, MemoryLimit)) @@ -559,7 +559,7 @@ Expected JIT::ExecuteProgram(const std::string &Bitcode, unsigned Timeout, unsigned MemoryLimit) { // Construct a vector of parameters, incorporating those from the command-line std::vector JITArgs; - JITArgs.push_back(LLIPath.c_str()); + JITArgs.push_back(LLIPath); JITArgs.push_back("-force-interpreter=false"); // Add any extra LLI args. @@ -570,7 +570,7 @@ Expected JIT::ExecuteProgram(const std::string &Bitcode, JITArgs.push_back("-load"); JITArgs.push_back(SharedLibs[i]); } - JITArgs.push_back(Bitcode.c_str()); + JITArgs.push_back(Bitcode); // Add optional parameters to the running program from Argv for (unsigned i = 0, e = Args.size(); i != e; ++i) JITArgs.push_back(Args[i]); diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp index 2d5322a351ad..c7644e75ae4b 100644 --- a/tools/bugpoint/bugpoint.cpp +++ b/tools/bugpoint/bugpoint.cpp @@ -80,6 +80,10 @@ static cl::opt OptLevelOs( cl::desc( "Like -O2 with extra optimizations for size. Similar to clang -Os")); +static cl::opt +OptLevelOz("Oz", + cl::desc("Like -Os but reduces code size further. Similar to clang -Oz")); + static cl::opt OptLevelO3("O3", cl::desc("Optimization level 3. Identical to 'opt -O3'")); @@ -109,6 +113,26 @@ public: }; } +// This routine adds optimization passes based on selected optimization level, +// OptLevel. +// +// OptLevel - Optimization Level +static void AddOptimizationPasses(legacy::FunctionPassManager &FPM, + unsigned OptLevel, + unsigned SizeLevel) { + PassManagerBuilder Builder; + Builder.OptLevel = OptLevel; + Builder.SizeLevel = SizeLevel; + + if (OptLevel > 1) + Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false); + else + Builder.Inliner = createAlwaysInlinerLegacyPass(); + + Builder.populateFunctionPassManager(FPM); + Builder.populateModulePassManager(FPM); +} + #ifdef LINK_POLLY_INTO_TOOLS namespace polly { void initializePollyPasses(llvm::PassRegistry &Registry); @@ -189,18 +213,16 @@ int main(int argc, char **argv) { Builder.populateLTOPassManager(PM); } - if (OptLevelO1 || OptLevelO2 || OptLevelO3) { - PassManagerBuilder Builder; - if (OptLevelO1) - Builder.Inliner = createAlwaysInlinerLegacyPass(); - else if (OptLevelOs || OptLevelO2) - Builder.Inliner = createFunctionInliningPass( - 2, OptLevelOs ? 1 : 0, false); - else - Builder.Inliner = createFunctionInliningPass(275); - Builder.populateFunctionPassManager(PM); - Builder.populateModulePassManager(PM); - } + if (OptLevelO1) + AddOptimizationPasses(PM, 1, 0); + else if (OptLevelO2) + AddOptimizationPasses(PM, 2, 0); + else if (OptLevelO3) + AddOptimizationPasses(PM, 3, 0); + else if (OptLevelOs) + AddOptimizationPasses(PM, 2, 1); + else if (OptLevelOz) + AddOptimizationPasses(PM, 2, 2); for (const PassInfo *PI : PassList) D.addPass(PI->getPassArgument()); diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index 76da843f065e..574b15b399c3 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -239,10 +239,10 @@ static std::unique_ptr GetOutputStream(const char *TargetName, // Open the file. std::error_code EC; - sys::fs::OpenFlags OpenFlags = sys::fs::F_None; + sys::fs::OpenFlags OpenFlags = sys::fs::OF_None; if (!Binary) - OpenFlags |= sys::fs::F_Text; - auto FDOut = llvm::make_unique(OutputFilename, EC, OpenFlags); + OpenFlags |= sys::fs::OF_Text; + auto FDOut = std::make_unique(OutputFilename, EC, OpenFlags); if (EC) { WithColor::error() << EC.message() << '\n'; return nullptr; @@ -329,7 +329,7 @@ int main(int argc, char **argv) { // Set a diagnostic handler that doesn't exit on the first error bool HasError = false; Context.setDiagnosticHandler( - llvm::make_unique(&HasError)); + std::make_unique(&HasError)); Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError); Expected> RemarksFileOrErr = @@ -479,8 +479,8 @@ static int compileModule(char **argv, LLVMContext &Context) { std::unique_ptr DwoOut; if (!SplitDwarfOutputFile.empty()) { std::error_code EC; - DwoOut = llvm::make_unique(SplitDwarfOutputFile, EC, - sys::fs::F_None); + DwoOut = std::make_unique(SplitDwarfOutputFile, EC, + sys::fs::OF_None); if (EC) { WithColor::error(errs(), argv[0]) << EC.message() << '\n'; return 1; @@ -533,13 +533,14 @@ static int compileModule(char **argv, LLVMContext &Context) { if ((FileType != TargetMachine::CGFT_AssemblyFile && !Out->os().supportsSeeking()) || CompileTwice) { - BOS = make_unique(Buffer); + BOS = std::make_unique(Buffer); OS = BOS.get(); } const char *argv0 = argv[0]; - LLVMTargetMachine &LLVMTM = static_cast(*Target); - MachineModuleInfo *MMI = new MachineModuleInfo(&LLVMTM); + LLVMTargetMachine &LLVMTM = static_cast(*Target); + MachineModuleInfoWrapperPass *MMIWP = + new MachineModuleInfoWrapperPass(&LLVMTM); // Construct a custom pass pipeline that starts after instruction // selection. @@ -559,7 +560,7 @@ static int compileModule(char **argv, LLVMContext &Context) { TPC.setDisableVerify(NoVerify); PM.add(&TPC); - PM.add(MMI); + PM.add(MMIWP); TPC.printAndVerify(""); for (const std::string &RunPassName : *RunPassNames) { if (addPass(PM, argv0, RunPassName, TPC)) @@ -570,7 +571,7 @@ static int compileModule(char **argv, LLVMContext &Context) { PM.add(createFreeMachineFunctionPass()); } else if (Target->addPassesToEmitFile(PM, *OS, DwoOut ? &DwoOut->os() : nullptr, - FileType, NoVerify, MMI)) { + FileType, NoVerify, MMIWP)) { WithColor::warning(errs(), argv[0]) << "target does not support generation of this" << " file type!\n"; @@ -578,8 +579,8 @@ static int compileModule(char **argv, LLVMContext &Context) { } if (MIR) { - assert(MMI && "Forgot to create MMI?"); - if (MIR->parseMachineFunctions(*M, *MMI)) + assert(MMIWP && "Forgot to create MMIWP?"); + if (MIR->parseMachineFunctions(*M, MMIWP->getMMI())) return 1; } diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp index 8c8cd88c9711..ccad06721414 100644 --- a/tools/lli/lli.cpp +++ b/tools/lli/lli.cpp @@ -251,7 +251,7 @@ public: sys::fs::create_directories(Twine(dir)); } std::error_code EC; - raw_fd_ostream outfile(CacheName, EC, sys::fs::F_None); + raw_fd_ostream outfile(CacheName, EC, sys::fs::OF_None); outfile.write(Obj.getBufferStart(), Obj.getBufferSize()); outfile.close(); } @@ -308,7 +308,7 @@ static void addCygMingExtraModule(ExecutionEngine &EE, LLVMContext &Context, Triple TargetTriple(TargetTripleStr); // Create a new module. - std::unique_ptr M = make_unique("CygMingHelper", Context); + std::unique_ptr M = std::make_unique("CygMingHelper", Context); M->setTargetTriple(TargetTripleStr); // Create an empty function named "__main". @@ -695,18 +695,16 @@ int main(int argc, char **argv, char * const *envp) { return Result; } -static orc::IRTransformLayer::TransformFunction createDebugDumper() { +static std::function createDebugDumper() { switch (OrcDumpKind) { case DumpKind::NoDump: - return [](orc::ThreadSafeModule TSM, - const orc::MaterializationResponsibility &R) { return TSM; }; + return [](Module &M) {}; case DumpKind::DumpFuncsToStdOut: - return [](orc::ThreadSafeModule TSM, - const orc::MaterializationResponsibility &R) { + return [](Module &M) { printf("[ "); - for (const auto &F : *TSM.getModule()) { + for (const auto &F : M) { if (F.isDeclaration()) continue; @@ -718,31 +716,23 @@ static orc::IRTransformLayer::TransformFunction createDebugDumper() { } printf("]\n"); - return TSM; }; case DumpKind::DumpModsToStdOut: - return [](orc::ThreadSafeModule TSM, - const orc::MaterializationResponsibility &R) { - outs() << "----- Module Start -----\n" - << *TSM.getModule() << "----- Module End -----\n"; - - return TSM; + return [](Module &M) { + outs() << "----- Module Start -----\n" << M << "----- Module End -----\n"; }; case DumpKind::DumpModsToDisk: - return [](orc::ThreadSafeModule TSM, - const orc::MaterializationResponsibility &R) { + return [](Module &M) { std::error_code EC; - raw_fd_ostream Out(TSM.getModule()->getModuleIdentifier() + ".ll", EC, - sys::fs::F_Text); + raw_fd_ostream Out(M.getModuleIdentifier() + ".ll", EC, sys::fs::OF_Text); if (EC) { - errs() << "Couldn't open " << TSM.getModule()->getModuleIdentifier() + errs() << "Couldn't open " << M.getModuleIdentifier() << " for dumping.\nError:" << EC.message() << "\n"; exit(1); } - Out << *TSM.getModule(); - return TSM; + Out << M; }; } llvm_unreachable("Unknown DumpKind"); @@ -754,14 +744,13 @@ int runOrcLazyJIT(const char *ProgName) { // Start setting up the JIT environment. // Parse the main module. - orc::ThreadSafeContext TSCtx(llvm::make_unique()); + orc::ThreadSafeContext TSCtx(std::make_unique()); SMDiagnostic Err; - auto MainModule = orc::ThreadSafeModule( - parseIRFile(InputFile, Err, *TSCtx.getContext()), TSCtx); + auto MainModule = parseIRFile(InputFile, Err, *TSCtx.getContext()); if (!MainModule) reportError(Err, ProgName); - const auto &TT = MainModule.getModule()->getTargetTriple(); + const auto &TT = MainModule->getTargetTriple(); orc::LLLazyJITBuilder Builder; Builder.setJITTargetMachineBuilder( @@ -794,13 +783,16 @@ int runOrcLazyJIT(const char *ProgName) { J->setLazyCompileTransform([&](orc::ThreadSafeModule TSM, const orc::MaterializationResponsibility &R) { - if (verifyModule(*TSM.getModule(), &dbgs())) { - dbgs() << "Bad module: " << *TSM.getModule() << "\n"; - exit(1); - } - return Dump(std::move(TSM), R); + TSM.withModuleDo([&](Module &M) { + if (verifyModule(M, &dbgs())) { + dbgs() << "Bad module: " << &M << "\n"; + exit(1); + } + Dump(M); + }); + return TSM; }); - J->getMainJITDylib().setGenerator( + J->getMainJITDylib().addGenerator( ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( J->getDataLayout().getGlobalPrefix()))); @@ -809,7 +801,8 @@ int runOrcLazyJIT(const char *ProgName) { ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle)); // Add the main module. - ExitOnErr(J->addLazyIRModule(std::move(MainModule))); + ExitOnErr( + J->addLazyIRModule(orc::ThreadSafeModule(std::move(MainModule), TSCtx))); // Create JITDylibs and add any extra modules. { @@ -839,6 +832,16 @@ int runOrcLazyJIT(const char *ProgName) { ExitOnErr( J->addLazyIRModule(JD, orc::ThreadSafeModule(std::move(M), TSCtx))); } + + for (auto EAItr = ExtraArchives.begin(), EAEnd = ExtraArchives.end(); + EAItr != EAEnd; ++EAItr) { + auto EAIdx = ExtraArchives.getPosition(EAItr - ExtraArchives.begin()); + assert(EAIdx != 0 && "ExtraArchive should have index > 0"); + auto JDItr = std::prev(IdxToDylib.lower_bound(EAIdx)); + auto &JD = *JDItr->second; + JD.addGenerator(ExitOnErr(orc::StaticLibraryDefinitionGenerator::Load( + J->getObjLinkingLayer(), EAItr->c_str()))); + } } // Add the objects. @@ -959,6 +962,6 @@ std::unique_ptr launchRemote() { close(PipeFD[1][1]); // Return an RPC channel connected to our end of the pipes. - return llvm::make_unique(PipeFD[1][0], PipeFD[0][1]); + return std::make_unique(PipeFD[1][0], PipeFD[0][1]); #endif } diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp index 91746d0fab37..c9cf217f7688 100644 --- a/tools/llvm-ar/llvm-ar.cpp +++ b/tools/llvm-ar/llvm-ar.cpp @@ -43,6 +43,11 @@ #include #endif +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + using namespace llvm; // The name this program was invoked as. @@ -70,14 +75,14 @@ USAGE: llvm-ar [options] [-][modifiers] [relpos] [count] [f llvm-ar -M [ - Ignored for compatibility - --help - Display available options - --version - Display the version of this program + --plugin= - ignored for compatibility + -h --help - display this help and exit + --version - print the version and exit @ - read options from OPERATIONS: @@ -95,11 +100,13 @@ MODIFIERS: [b] - put [files] before [relpos] (same as [i]) [c] - do not warn if archive had to be created [D] - use zero for timestamps and uids/gids (default) + [h] - display this help and exit [i] - put [files] before [relpos] (same as [b]) [l] - ignored for compatibility [L] - add archive's contents [N] - use instance [count] of name [o] - preserve original dates + [O] - display member offsets [P] - use full names when matching (implied for thin archives) [s] - create an archive index (cf. ranlib) [S] - do not build a symbol table @@ -107,6 +114,7 @@ MODIFIERS: [u] - update only [files] newer than archive contents [U] - use actual timestamps and uids/gids [v] - be verbose about actions taken + [V] - display the version and exit )"; void printHelpMessage() { @@ -116,10 +124,19 @@ void printHelpMessage() { outs() << ArHelp; } +static unsigned MRILineNumber; +static bool ParsingMRIScript; + // Show the error message and exit. LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) { - WithColor::error(errs(), ToolName) << Error << ".\n"; - printHelpMessage(); + if (ParsingMRIScript) { + WithColor::error(errs(), ToolName) + << "script line " << MRILineNumber << ": " << Error << "\n"; + } else { + WithColor::error(errs(), ToolName) << Error << "\n"; + printHelpMessage(); + } + exit(1); } @@ -171,17 +188,18 @@ enum ArchiveOperation { }; // Modifiers to follow operation to vary behavior -static bool AddAfter = false; ///< 'a' modifier -static bool AddBefore = false; ///< 'b' modifier -static bool Create = false; ///< 'c' modifier -static bool OriginalDates = false; ///< 'o' modifier -static bool CompareFullPath = false; ///< 'P' modifier -static bool OnlyUpdate = false; ///< 'u' modifier -static bool Verbose = false; ///< 'v' modifier -static bool Symtab = true; ///< 's' modifier -static bool Deterministic = true; ///< 'D' and 'U' modifiers -static bool Thin = false; ///< 'T' modifier -static bool AddLibrary = false; ///< 'L' modifier +static bool AddAfter = false; ///< 'a' modifier +static bool AddBefore = false; ///< 'b' modifier +static bool Create = false; ///< 'c' modifier +static bool OriginalDates = false; ///< 'o' modifier +static bool DisplayMemberOffsets = false; ///< 'O' modifier +static bool CompareFullPath = false; ///< 'P' modifier +static bool OnlyUpdate = false; ///< 'u' modifier +static bool Verbose = false; ///< 'v' modifier +static bool Symtab = true; ///< 's' modifier +static bool Deterministic = true; ///< 'D' and 'U' modifiers +static bool Thin = false; ///< 'T' modifier +static bool AddLibrary = false; ///< 'L' modifier // Relative Positional Argument (for insert/move). This variable holds // the name of the archive member to which the 'a', 'b' or 'i' modifier @@ -198,6 +216,9 @@ static int CountParam = 0; // command line. static std::string ArchiveName; +static std::vector> ArchiveBuffers; +static std::vector> Archives; + // This variable holds the list of member files to proecess, as given // on the command line. static std::vector Members; @@ -209,7 +230,7 @@ static BumpPtrAllocator Alloc; // associated with a, b, and i modifiers static void getRelPos() { if (PositionalArgs.empty()) - fail("Expected [relpos] for a, b, or i modifier"); + fail("expected [relpos] for 'a', 'b', or 'i' modifier"); RelPos = PositionalArgs[0]; PositionalArgs.erase(PositionalArgs.begin()); } @@ -218,40 +239,31 @@ static void getRelPos() { // associated with the N modifier static void getCountParam() { if (PositionalArgs.empty()) - fail("Expected [count] for N modifier"); + fail("expected [count] for 'N' modifier"); auto CountParamArg = StringRef(PositionalArgs[0]); if (CountParamArg.getAsInteger(10, CountParam)) - fail("Value for [count] must be numeric, got: " + CountParamArg); + fail("value for [count] must be numeric, got: " + CountParamArg); if (CountParam < 1) - fail("Value for [count] must be positive, got: " + CountParamArg); + fail("value for [count] must be positive, got: " + CountParamArg); PositionalArgs.erase(PositionalArgs.begin()); } // Get the archive file name from the command line static void getArchive() { if (PositionalArgs.empty()) - fail("An archive name must be specified"); + fail("an archive name must be specified"); ArchiveName = PositionalArgs[0]; PositionalArgs.erase(PositionalArgs.begin()); } -// Copy over remaining items in PositionalArgs to our Members vector -static void getMembers() { - for (auto &Arg : PositionalArgs) - Members.push_back(Arg); -} - -std::vector> ArchiveBuffers; -std::vector> Archives; - static object::Archive &readLibrary(const Twine &Library) { auto BufOrErr = MemoryBuffer::getFile(Library, -1, false); - failIfError(BufOrErr.getError(), "Could not open library " + Library); + failIfError(BufOrErr.getError(), "could not open library " + Library); ArchiveBuffers.push_back(std::move(*BufOrErr)); auto LibOrErr = object::Archive::create(ArchiveBuffers.back()->getMemBufferRef()); failIfError(errorToErrorCode(LibOrErr.takeError()), - "Could not parse library"); + "could not parse library"); Archives.push_back(std::move(*LibOrErr)); return *Archives.back(); } @@ -264,7 +276,7 @@ static void runMRIScript(); static ArchiveOperation parseCommandLine() { if (MRI) { if (!PositionalArgs.empty() || !Options.empty()) - fail("Cannot mix -M and other options"); + fail("cannot mix -M and other options"); runMRIScript(); } @@ -319,6 +331,9 @@ static ArchiveOperation parseCommandLine() { case 'o': OriginalDates = true; break; + case 'O': + DisplayMemberOffsets = true; + break; case 'P': CompareFullPath = true; break; @@ -367,6 +382,12 @@ static ArchiveOperation parseCommandLine() { case 'L': AddLibrary = true; break; + case 'V': + cl::PrintVersionMessage(); + exit(0); + case 'h': + printHelpMessage(); + exit(0); default: fail(std::string("unknown option ") + Options[i]); } @@ -377,37 +398,37 @@ static ArchiveOperation parseCommandLine() { getArchive(); // Everything on the command line at this point is a member. - getMembers(); + Members.assign(PositionalArgs.begin(), PositionalArgs.end()); if (NumOperations == 0 && MaybeJustCreateSymTab) { NumOperations = 1; Operation = CreateSymTab; if (!Members.empty()) - fail("The s operation takes only an archive as argument"); + fail("the 's' operation takes only an archive as argument"); } // Perform various checks on the operation/modifier specification // to make sure we are dealing with a legal request. if (NumOperations == 0) - fail("You must specify at least one of the operations"); + fail("you must specify at least one of the operations"); if (NumOperations > 1) - fail("Only one operation may be specified"); + fail("only one operation may be specified"); if (NumPositional > 1) - fail("You may only specify one of a, b, and i modifiers"); + fail("you may only specify one of 'a', 'b', and 'i' modifiers"); if (AddAfter || AddBefore) if (Operation != Move && Operation != ReplaceOrInsert) - fail("The 'a', 'b' and 'i' modifiers can only be specified with " + fail("the 'a', 'b' and 'i' modifiers can only be specified with " "the 'm' or 'r' operations"); if (CountParam) if (Operation != Extract && Operation != Delete) - fail("The 'N' modifier can only be specified with the 'x' or 'd' " + fail("the 'N' modifier can only be specified with the 'x' or 'd' " "operations"); if (OriginalDates && Operation != Extract) - fail("The 'o' modifier is only applicable to the 'x' operation"); + fail("the 'o' modifier is only applicable to the 'x' operation"); if (OnlyUpdate && Operation != ReplaceOrInsert) - fail("The 'u' modifier is only applicable to the 'r' operation"); + fail("the 'u' modifier is only applicable to the 'r' operation"); if (AddLibrary && Operation != QuickAppend) - fail("The 'L' modifier is only applicable to the 'q' operation"); + fail("the 'L' modifier is only applicable to the 'q' operation"); // Return the parsed operation to the caller return Operation; @@ -470,12 +491,35 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) { if (!ParentDir.empty()) outs() << sys::path::convert_to_slash(ParentDir) << '/'; } + outs() << Name; + } else { + outs() << Name; + if (DisplayMemberOffsets) + outs() << " 0x" << utohexstr(C.getDataOffset(), true); } - outs() << Name << "\n"; + outs() << '\n'; } -static StringRef normalizePath(StringRef Path) { - return CompareFullPath ? Path : sys::path::filename(Path); +static std::string normalizePath(StringRef Path) { + return CompareFullPath ? sys::path::convert_to_slash(Path) + : std::string(sys::path::filename(Path)); +} + +static bool comparePaths(StringRef Path1, StringRef Path2) { +// When on Windows this function calls CompareStringOrdinal +// as Windows file paths are case-insensitive. +// CompareStringOrdinal compares two Unicode strings for +// binary equivalence and allows for case insensitivity. +#ifdef _WIN32 + SmallVector WPath1, WPath2; + failIfError(sys::path::widenPath(normalizePath(Path1), WPath1)); + failIfError(sys::path::widenPath(normalizePath(Path2), WPath2)); + + return CompareStringOrdinal(WPath1.data(), WPath1.size(), WPath2.data(), + WPath2.size(), true) == CSTR_EQUAL; +#else + return normalizePath(Path1) == normalizePath(Path2); +#endif } // Implement the 'x' operation. This function extracts files back to the file @@ -489,7 +533,7 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) { int FD; failIfError(sys::fs::openFileForWrite(sys::path::filename(Name), FD, sys::fs::CD_CreateAlways, - sys::fs::F_None, Mode), + sys::fs::OF_None, Mode), Name); { @@ -551,7 +595,7 @@ static void performReadOperation(ArchiveOperation Operation, if (Filter) { auto I = find_if(Members, [Name](StringRef Path) { - return Name == normalizePath(Path); + return comparePaths(Name, Path); }); if (I == Members.end()) continue; @@ -588,7 +632,7 @@ static void addChildMember(std::vector &Members, const object::Archive::Child &M, bool FlattenArchive = false) { if (Thin && !M.getParent()->isThin()) - fail("Cannot convert a regular archive to a thin one"); + fail("cannot convert a regular archive to a thin one"); Expected NMOrErr = NewArchiveMember::getOldMember(M, Deterministic); failIfError(NMOrErr.takeError()); @@ -681,7 +725,7 @@ static InsertAction computeInsertAction(ArchiveOperation Operation, if (Operation == QuickAppend || Members.empty()) return IA_AddOldMember; auto MI = find_if( - Members, [Name](StringRef Path) { return Name == normalizePath(Path); }); + Members, [Name](StringRef Path) { return comparePaths(Name, Path); }); if (MI == Members.end()) return IA_AddOldMember; @@ -698,9 +742,8 @@ static InsertAction computeInsertAction(ArchiveOperation Operation, return IA_MoveOldMember; if (Operation == ReplaceOrInsert) { - StringRef PosName = normalizePath(RelPos); if (!OnlyUpdate) { - if (PosName.empty()) + if (RelPos.empty()) return IA_AddNewMember; return IA_MoveNewMember; } @@ -712,12 +755,12 @@ static InsertAction computeInsertAction(ArchiveOperation Operation, auto ModTimeOrErr = Member.getLastModified(); failIfError(ModTimeOrErr.takeError()); if (Status.getLastModificationTime() < ModTimeOrErr.get()) { - if (PosName.empty()) + if (RelPos.empty()) return IA_AddOldMember; return IA_MoveOldMember; } - if (PosName.empty()) + if (RelPos.empty()) return IA_AddNewMember; return IA_MoveNewMember; } @@ -732,7 +775,6 @@ computeNewArchiveMembers(ArchiveOperation Operation, std::vector Ret; std::vector Moved; int InsertPos = -1; - StringRef PosName = normalizePath(RelPos); if (OldArchive) { Error Err = Error::success(); StringMap MemberCount; @@ -740,8 +782,8 @@ computeNewArchiveMembers(ArchiveOperation Operation, int Pos = Ret.size(); Expected NameOrErr = Child.getName(); failIfError(NameOrErr.takeError()); - StringRef Name = NameOrErr.get(); - if (Name == PosName) { + std::string Name = NameOrErr.get(); + if (comparePaths(Name, RelPos)) { assert(AddAfter || AddBefore); if (AddBefore) InsertPos = Pos; @@ -783,7 +825,7 @@ computeNewArchiveMembers(ArchiveOperation Operation, return Ret; if (!RelPos.empty() && InsertPos == -1) - fail("Insertion point not found"); + fail("insertion point not found"); if (RelPos.empty()) InsertPos = Ret.size(); @@ -859,12 +901,12 @@ static void performWriteOperation(ArchiveOperation Operation, break; case BSD: if (Thin) - fail("Only the gnu format has a thin mode"); + fail("only the gnu format has a thin mode"); Kind = object::Archive::K_BSD; break; case DARWIN: if (Thin) - fail("Only the gnu format has a thin mode"); + fail("only the gnu format has a thin mode"); Kind = object::Archive::K_DARWIN; break; case Unknown: @@ -922,14 +964,12 @@ static int performOperation(ArchiveOperation Operation, MemoryBuffer::getFile(ArchiveName, -1, false); std::error_code EC = Buf.getError(); if (EC && EC != errc::no_such_file_or_directory) - fail("error opening '" + ArchiveName + "': " + EC.message() + "!"); + fail("error opening '" + ArchiveName + "': " + EC.message()); if (!EC) { Error Err = Error::success(); object::Archive Archive(Buf.get()->getMemBufferRef(), Err); - EC = errorToErrorCode(std::move(Err)); - failIfError(EC, - "error loading '" + ArchiveName + "': " + EC.message() + "!"); + failIfError(std::move(Err), "unable to load '" + ArchiveName + "'"); if (Archive.isThin()) CompareFullPath = true; performOperation(Operation, &Archive, std::move(Buf.get()), NewMembers); @@ -960,8 +1000,10 @@ static void runMRIScript() { const MemoryBuffer &Ref = *Buf.get(); bool Saved = false; std::vector NewMembers; + ParsingMRIScript = true; for (line_iterator I(Ref, /*SkipBlanks*/ false), E; I != E; ++I) { + ++MRILineNumber; StringRef Line = *I; Line = Line.split(';').first; Line = Line.split('*').first; @@ -1003,15 +1045,15 @@ static void runMRIScript() { case MRICommand::Create: Create = true; if (!ArchiveName.empty()) - fail("Editing multiple archives not supported"); + fail("editing multiple archives not supported"); if (Saved) - fail("File already saved"); + fail("file already saved"); ArchiveName = Rest; break; case MRICommand::Delete: { - StringRef Name = normalizePath(Rest); - llvm::erase_if(NewMembers, - [=](NewArchiveMember &M) { return M.MemberName == Name; }); + llvm::erase_if(NewMembers, [=](NewArchiveMember &M) { + return comparePaths(M.MemberName, Rest); + }); break; } case MRICommand::Save: @@ -1020,10 +1062,12 @@ static void runMRIScript() { case MRICommand::End: break; case MRICommand::Invalid: - fail("Unknown command: " + CommandStr); + fail("unknown command: " + CommandStr); } } - + + ParsingMRIScript = false; + // Nothing to do if not saved. if (Saved) performOperation(ReplaceOrInsert, &NewMembers); @@ -1108,7 +1152,7 @@ static int ranlib_main(int argc, char **argv) { return 0; } else { if (ArchiveSpecified) - fail("Exactly one archive should be specified"); + fail("exactly one archive should be specified"); ArchiveSpecified = true; ArchiveName = argv[i]; } @@ -1136,5 +1180,5 @@ int main(int argc, char **argv) { if (Stem.contains_lower("ar")) return ar_main(argc, argv); - fail("Not ranlib, ar, lib or dlltool!"); + fail("not ranlib, ar, lib or dlltool"); } diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp index 234fef907a38..c9f50e38fc61 100644 --- a/tools/llvm-as/llvm-as.cpp +++ b/tools/llvm-as/llvm-as.cpp @@ -82,7 +82,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) { std::error_code EC; std::unique_ptr Out( - new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); if (EC) { errs() << EC.message() << '\n'; exit(1); diff --git a/tools/llvm-cov/CodeCoverage.cpp b/tools/llvm-cov/CodeCoverage.cpp index f707e3c7ab53..7151cfb032f3 100644 --- a/tools/llvm-cov/CodeCoverage.cpp +++ b/tools/llvm-cov/CodeCoverage.cpp @@ -712,15 +712,15 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { // Create the function filters if (!NameFilters.empty() || NameWhitelist || !NameRegexFilters.empty()) { - auto NameFilterer = llvm::make_unique(); + auto NameFilterer = std::make_unique(); for (const auto &Name : NameFilters) - NameFilterer->push_back(llvm::make_unique(Name)); + NameFilterer->push_back(std::make_unique(Name)); if (NameWhitelist) NameFilterer->push_back( - llvm::make_unique(*NameWhitelist)); + std::make_unique(*NameWhitelist)); for (const auto &Regex : NameRegexFilters) NameFilterer->push_back( - llvm::make_unique(Regex)); + std::make_unique(Regex)); Filters.push_back(std::move(NameFilterer)); } @@ -728,18 +728,18 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { RegionCoverageGtFilter.getNumOccurrences() || LineCoverageLtFilter.getNumOccurrences() || LineCoverageGtFilter.getNumOccurrences()) { - auto StatFilterer = llvm::make_unique(); + auto StatFilterer = std::make_unique(); if (RegionCoverageLtFilter.getNumOccurrences()) - StatFilterer->push_back(llvm::make_unique( + StatFilterer->push_back(std::make_unique( RegionCoverageFilter::LessThan, RegionCoverageLtFilter)); if (RegionCoverageGtFilter.getNumOccurrences()) - StatFilterer->push_back(llvm::make_unique( + StatFilterer->push_back(std::make_unique( RegionCoverageFilter::GreaterThan, RegionCoverageGtFilter)); if (LineCoverageLtFilter.getNumOccurrences()) - StatFilterer->push_back(llvm::make_unique( + StatFilterer->push_back(std::make_unique( LineCoverageFilter::LessThan, LineCoverageLtFilter)); if (LineCoverageGtFilter.getNumOccurrences()) - StatFilterer->push_back(llvm::make_unique( + StatFilterer->push_back(std::make_unique( RegionCoverageFilter::GreaterThan, LineCoverageGtFilter)); Filters.push_back(std::move(StatFilterer)); } @@ -747,7 +747,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { // Create the ignore filename filters. for (const auto &RE : IgnoreFilenameRegexFilters) IgnoreFilenameFilters.push_back( - llvm::make_unique(RE)); + std::make_unique(RE)); if (!Arches.empty()) { for (const std::string &Arch : Arches) { @@ -1040,7 +1040,7 @@ int CodeCoverageTool::doExport(int argc, const char **argv, switch (ViewOpts.Format) { case CoverageViewOptions::OutputFormat::Text: - Exporter = llvm::make_unique(*Coverage.get(), + Exporter = std::make_unique(*Coverage.get(), ViewOpts, outs()); break; case CoverageViewOptions::OutputFormat::HTML: @@ -1048,7 +1048,7 @@ int CodeCoverageTool::doExport(int argc, const char **argv, // above. llvm_unreachable("Export in HTML is not supported!"); case CoverageViewOptions::OutputFormat::Lcov: - Exporter = llvm::make_unique(*Coverage.get(), + Exporter = std::make_unique(*Coverage.get(), ViewOpts, outs()); break; } diff --git a/tools/llvm-cov/SourceCoverageView.cpp b/tools/llvm-cov/SourceCoverageView.cpp index 616f667e2c84..0e20ea63cd6f 100644 --- a/tools/llvm-cov/SourceCoverageView.cpp +++ b/tools/llvm-cov/SourceCoverageView.cpp @@ -76,9 +76,9 @@ std::unique_ptr CoveragePrinter::create(const CoverageViewOptions &Opts) { switch (Opts.Format) { case CoverageViewOptions::OutputFormat::Text: - return llvm::make_unique(Opts); + return std::make_unique(Opts); case CoverageViewOptions::OutputFormat::HTML: - return llvm::make_unique(Opts); + return std::make_unique(Opts); case CoverageViewOptions::OutputFormat::Lcov: // Unreachable because CodeCoverage.cpp should terminate with an error // before we get here. @@ -141,10 +141,10 @@ SourceCoverageView::create(StringRef SourceName, const MemoryBuffer &File, CoverageData &&CoverageInfo) { switch (Options.Format) { case CoverageViewOptions::OutputFormat::Text: - return llvm::make_unique( + return std::make_unique( SourceName, File, Options, std::move(CoverageInfo)); case CoverageViewOptions::OutputFormat::HTML: - return llvm::make_unique( + return std::make_unique( SourceName, File, Options, std::move(CoverageInfo)); case CoverageViewOptions::OutputFormat::Lcov: // Unreachable because CodeCoverage.cpp should terminate with an error diff --git a/tools/llvm-cov/TestingSupport.cpp b/tools/llvm-cov/TestingSupport.cpp index 3ee318c9c640..b99bd83157d0 100644 --- a/tools/llvm-cov/TestingSupport.cpp +++ b/tools/llvm-cov/TestingSupport.cpp @@ -8,6 +8,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" @@ -50,8 +51,13 @@ int convertForTestingMain(int argc, const char *argv[]) { auto ObjFormat = OF->getTripleObjectFormat(); for (const auto &Section : OF->sections()) { StringRef Name; - if (Section.getName(Name)) + if (Expected NameOrErr = Section.getName()) { + Name = *NameOrErr; + } else { + consumeError(NameOrErr.takeError()); return 1; + } + if (Name == llvm::getInstrProfSectionName(IPSK_name, ObjFormat, /*AddSegmentInfo=*/false)) { ProfileNames = Section; @@ -94,7 +100,7 @@ int convertForTestingMain(int argc, const char *argv[]) { encodeULEB128(ProfileNamesAddress, OS); OS << ProfileNamesData; // Coverage mapping data is expected to have an alignment of 8. - for (unsigned Pad = OffsetToAlignment(OS.tell(), 8); Pad; --Pad) + for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad) OS.write(uint8_t(0)); OS << CoverageMappingData; diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp index 833312655788..03e1bab9417e 100644 --- a/tools/llvm-cxxdump/llvm-cxxdump.cpp +++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp @@ -174,7 +174,11 @@ static void dumpCXXData(const ObjectFile *Obj) { SectionRelocMap.clear(); for (const SectionRef &Section : Obj->sections()) { - section_iterator Sec2 = Section.getRelocatedSection(); + Expected ErrOrSec = Section.getRelocatedSection(); + if (!ErrOrSec) + error(ErrOrSec.takeError()); + + section_iterator Sec2 = *ErrOrSec; if (Sec2 != Obj->section_end()) SectionRelocMap[*Sec2].push_back(Section); } diff --git a/tools/llvm-cxxmap/llvm-cxxmap.cpp b/tools/llvm-cxxmap/llvm-cxxmap.cpp index 87d4d06bbc96..b53a6364c89e 100644 --- a/tools/llvm-cxxmap/llvm-cxxmap.cpp +++ b/tools/llvm-cxxmap/llvm-cxxmap.cpp @@ -145,7 +145,7 @@ int main(int argc, const char *argv[]) { exitWithErrorCode(RemappingBufOrError.getError(), RemappingFile); std::error_code EC; - raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text); + raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_Text); if (EC) exitWithErrorCode(EC, OutputFilename); diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp index 3f337b874b16..d66299cbf767 100644 --- a/tools/llvm-dis/llvm-dis.cpp +++ b/tools/llvm-dis/llvm-dis.cpp @@ -153,7 +153,7 @@ int main(int argc, char **argv) { LLVMContext Context; Context.setDiagnosticHandler( - llvm::make_unique(argv[0])); + std::make_unique(argv[0])); cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n"); std::unique_ptr MB = @@ -186,7 +186,7 @@ int main(int argc, char **argv) { std::error_code EC; std::unique_ptr Out( - new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + new ToolOutputFile(OutputFilename, EC, sys::fs::OF_Text)); if (EC) { errs() << EC.message() << '\n'; return 1; diff --git a/tools/llvm-dwarfdump/Statistics.cpp b/tools/llvm-dwarfdump/Statistics.cpp index f26369b935cb..c29ad783a9e6 100644 --- a/tools/llvm-dwarfdump/Statistics.cpp +++ b/tools/llvm-dwarfdump/Statistics.cpp @@ -5,11 +5,18 @@ #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/JSON.h" #define DEBUG_TYPE "dwarfdump" using namespace llvm; using namespace object; +/// This represents the number of categories of debug location coverage being +/// calculated. The first category is the number of variables with 0% location +/// coverage, but the last category is the number of variables with 100% +/// location coverage. +constexpr int NumOfCoverageCategories = 12; + /// Holds statistics for one function (or other entity that has a PC range and /// contains variables, such as a compile unit). struct PerFunctionStats { @@ -43,9 +50,9 @@ struct PerFunctionStats { unsigned NumVars = 0; /// Number of variables with source location. unsigned NumVarSourceLocations = 0; - /// Number of variables wtih type. + /// Number of variables with type. unsigned NumVarTypes = 0; - /// Number of variables wtih DW_AT_location. + /// Number of variables with DW_AT_location. unsigned NumVarLocations = 0; }; @@ -56,16 +63,74 @@ struct GlobalStats { /// Total number of PC range bytes in each variable's enclosing scope, /// starting from the first definition of the variable. unsigned ScopeBytesFromFirstDefinition = 0; - /// Total number of call site entries (DW_TAG_call_site) or - /// (DW_AT_call_file & DW_AT_call_line). + /// Total number of PC range bytes covered by DW_AT_locations with + /// the debug entry values (DW_OP_entry_value). + unsigned ScopeEntryValueBytesCovered = 0; + /// Total number of PC range bytes covered by DW_AT_locations of + /// formal parameters. + unsigned ParamScopeBytesCovered = 0; + /// Total number of PC range bytes in each variable's enclosing scope, + /// starting from the first definition of the variable (only for parameters). + unsigned ParamScopeBytesFromFirstDefinition = 0; + /// Total number of PC range bytes covered by DW_AT_locations with + /// the debug entry values (DW_OP_entry_value) (only for parameters). + unsigned ParamScopeEntryValueBytesCovered = 0; + /// Total number of PC range bytes covered by DW_AT_locations (only for local + /// variables). + unsigned VarScopeBytesCovered = 0; + /// Total number of PC range bytes in each variable's enclosing scope, + /// starting from the first definition of the variable (only for local + /// variables). + unsigned VarScopeBytesFromFirstDefinition = 0; + /// Total number of PC range bytes covered by DW_AT_locations with + /// the debug entry values (DW_OP_entry_value) (only for local variables). + unsigned VarScopeEntryValueBytesCovered = 0; + /// Total number of call site entries (DW_AT_call_file & DW_AT_call_line). unsigned CallSiteEntries = 0; + /// Total number of call site DIEs (DW_TAG_call_site). + unsigned CallSiteDIEs = 0; + /// Total number of call site parameter DIEs (DW_TAG_call_site_parameter). + unsigned CallSiteParamDIEs = 0; /// Total byte size of concrete functions. This byte size includes /// inline functions contained in the concrete functions. - uint64_t FunctionSize = 0; + unsigned FunctionSize = 0; /// Total byte size of inlined functions. This is the total number of bytes /// for the top inline functions within concrete functions. This can help /// tune the inline settings when compiling to match user expectations. - uint64_t InlineFunctionSize = 0; + unsigned InlineFunctionSize = 0; +}; + +/// Holds accumulated debug location statistics about local variables and +/// formal parameters. +struct LocationStats { + /// Map the scope coverage decile to the number of variables in the decile. + /// The first element of the array (at the index zero) represents the number + /// of variables with the no debug location at all, but the last element + /// in the vector represents the number of fully covered variables within + /// its scope. + std::vector VarParamLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// Map non debug entry values coverage. + std::vector VarParamNonEntryValLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// The debug location statistics for formal parameters. + std::vector ParamLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// Map non debug entry values coverage for formal parameters. + std::vector ParamNonEntryValLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// The debug location statistics for local variables. + std::vector VarLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// Map non debug entry values coverage for local variables. + std::vector VarNonEntryValLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// Total number of local variables and function parameters processed. + unsigned NumVarParam = 0; + /// Total number of formal parameters processed. + unsigned NumParam = 0; + /// Total number of local variables processed. + unsigned NumVar = 0; }; /// Extract the low pc from a Die. @@ -81,27 +146,66 @@ static uint64_t getLowPC(DWARFDie Die) { return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0); } +/// Collect debug location statistics for one DIE. +static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope, + std::vector &VarParamLocStats, + std::vector &ParamLocStats, + std::vector &VarLocStats, bool IsParam, + bool IsLocalVar) { + auto getCoverageBucket = [BytesCovered, BytesInScope]() -> unsigned { + unsigned LocBucket = 100 * (double)BytesCovered / BytesInScope; + if (LocBucket == 0) { + // No debug location at all for the variable. + return 0; + } else if (LocBucket == 100 || BytesCovered > BytesInScope) { + // Fully covered variable within its scope. + return NumOfCoverageCategories - 1; + } else { + // Get covered range (e.g. 20%-29%). + LocBucket /= 10; + return LocBucket + 1; + } + }; + + unsigned CoverageBucket = getCoverageBucket(); + VarParamLocStats[CoverageBucket]++; + if (IsParam) + ParamLocStats[CoverageBucket]++; + else if (IsLocalVar) + VarLocStats[CoverageBucket]++; +} + /// Collect debug info quality metrics for one DIE. -static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, +static void collectStatsForDie(DWARFDie Die, uint64_t UnitLowPC, std::string FnPrefix, std::string VarPrefix, uint64_t ScopeLowPC, uint64_t BytesInScope, uint32_t InlineDepth, StringMap &FnStatMap, - GlobalStats &GlobalStats) { + GlobalStats &GlobalStats, + LocationStats &LocStats) { bool HasLoc = false; bool HasSrcLoc = false; bool HasType = false; bool IsArtificial = false; uint64_t BytesCovered = 0; + uint64_t BytesEntryValuesCovered = 0; uint64_t OffsetToFirstDefinition = 0; + auto &FnStats = FnStatMap[FnPrefix]; + bool IsParam = Die.getTag() == dwarf::DW_TAG_formal_parameter; + bool IsLocalVar = Die.getTag() == dwarf::DW_TAG_variable; + + if (Die.getTag() == dwarf::DW_TAG_call_site || + Die.getTag() == dwarf::DW_TAG_GNU_call_site) { + GlobalStats.CallSiteDIEs++; + return; + } - if (Die.getTag() == dwarf::DW_TAG_call_site) { - GlobalStats.CallSiteEntries++; + if (Die.getTag() == dwarf::DW_TAG_call_site_parameter || + Die.getTag() == dwarf::DW_TAG_GNU_call_site_parameter) { + GlobalStats.CallSiteParamDIEs++; return; } - if (Die.getTag() != dwarf::DW_TAG_formal_parameter && - Die.getTag() != dwarf::DW_TAG_variable && - Die.getTag() != dwarf::DW_TAG_member) { + if (!IsParam && !IsLocalVar && Die.getTag() != dwarf::DW_TAG_member) { // Not a variable or constant member. return; } @@ -116,6 +220,19 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, if (Die.find(dwarf::DW_AT_artificial)) IsArtificial = true; + auto IsEntryValue = [&](ArrayRef D) -> bool { + DWARFUnit *U = Die.getDwarfUnit(); + DataExtractor Data(toStringRef(D), + Die.getDwarfUnit()->getContext().isLittleEndian(), 0); + DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize()); + // Consider the expression containing the DW_OP_entry_value as + // an entry value. + return llvm::any_of(Expression, [](DWARFExpression::Operation &Op) { + return Op.getCode() == dwarf::DW_OP_entry_value || + Op.getCode() == dwarf::DW_OP_GNU_entry_value; + }); + }; + if (Die.find(dwarf::DW_AT_const_value)) { // This catches constant members *and* variables. HasLoc = true; @@ -133,11 +250,15 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, if (auto DebugLocOffset = FormValue->getAsSectionOffset()) { auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc(); if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) { - for (auto Entry : List->Entries) - BytesCovered += Entry.End - Entry.Begin; + for (auto Entry : List->Entries) { + uint64_t BytesEntryCovered = Entry.End - Entry.Begin; + BytesCovered += BytesEntryCovered; + if (IsEntryValue(Entry.Loc)) + BytesEntryValuesCovered += BytesEntryCovered; + } if (List->Entries.size()) { uint64_t FirstDef = List->Entries[0].Begin; - uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE()); + uint64_t UnitOfs = UnitLowPC; // Ranges sometimes start before the lexical scope. if (UnitOfs + FirstDef >= ScopeLowPC) OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC; @@ -154,8 +275,25 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, } } + // Calculate the debug location statistics. + if (BytesInScope) { + LocStats.NumVarParam++; + if (IsParam) + LocStats.NumParam++; + else if (IsLocalVar) + LocStats.NumVar++; + + collectLocStats(BytesCovered, BytesInScope, LocStats.VarParamLocStats, + LocStats.ParamLocStats, LocStats.VarLocStats, IsParam, + IsLocalVar); + // Non debug entry values coverage statistics. + collectLocStats(BytesCovered - BytesEntryValuesCovered, BytesInScope, + LocStats.VarParamNonEntryValLocStats, + LocStats.ParamNonEntryValLocStats, + LocStats.VarNonEntryValLocStats, IsParam, IsLocalVar); + } + // Collect PC range coverage data. - auto &FnStats = FnStatMap[FnPrefix]; if (DWARFDie D = Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) Die = D; @@ -171,6 +309,17 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, // Turns out we have a lot of ranges that extend past the lexical scope. GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered); GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope; + GlobalStats.ScopeEntryValueBytesCovered += BytesEntryValuesCovered; + if (IsParam) { + GlobalStats.ParamScopeBytesCovered += + std::min(BytesInScope, BytesCovered); + GlobalStats.ParamScopeBytesFromFirstDefinition += BytesInScope; + GlobalStats.ParamScopeEntryValueBytesCovered += BytesEntryValuesCovered; + } else if (IsLocalVar) { + GlobalStats.VarScopeBytesCovered += std::min(BytesInScope, BytesCovered); + GlobalStats.VarScopeBytesFromFirstDefinition += BytesInScope; + GlobalStats.VarScopeEntryValueBytesCovered += BytesEntryValuesCovered; + } assert(GlobalStats.ScopeBytesCovered <= GlobalStats.ScopeBytesFromFirstDefinition); } else if (Die.getTag() == dwarf::DW_TAG_member) { @@ -179,7 +328,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, FnStats.TotalVarWithLoc += (unsigned)HasLoc; } if (!IsArtificial) { - if (Die.getTag() == dwarf::DW_TAG_formal_parameter) { + if (IsParam) { FnStats.NumParams++; if (HasType) FnStats.NumParamTypes++; @@ -187,7 +336,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, FnStats.NumParamSourceLocations++; if (HasLoc) FnStats.NumParamLocations++; - } else if (Die.getTag() == dwarf::DW_TAG_variable) { + } else if (IsLocalVar) { FnStats.NumVars++; if (HasType) FnStats.NumVarTypes++; @@ -200,11 +349,12 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, } /// Recursively collect debug info quality metrics. -static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, +static void collectStatsRecursive(DWARFDie Die, uint64_t UnitLowPC, std::string FnPrefix, std::string VarPrefix, uint64_t ScopeLowPC, uint64_t BytesInScope, uint32_t InlineDepth, StringMap &FnStatMap, - GlobalStats &GlobalStats) { + GlobalStats &GlobalStats, + LocationStats &LocStats) { // Handle any kind of lexical scope. const dwarf::Tag Tag = Die.getTag(); const bool IsFunction = Tag == dwarf::DW_TAG_subprogram; @@ -272,8 +422,8 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, } } else { // Not a scope, visit the Die itself. It could be a variable. - collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope, - InlineDepth, FnStatMap, GlobalStats); + collectStatsForDie(Die, UnitLowPC, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope, + InlineDepth, FnStatMap, GlobalStats, LocStats); } // Set InlineDepth correctly for child recursion @@ -290,8 +440,9 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, if (Child.getTag() == dwarf::DW_TAG_lexical_block) ChildVarPrefix += toHex(LexicalBlockIndex++) + '.'; - collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC, - BytesInScope, InlineDepth, FnStatMap, GlobalStats); + collectStatsRecursive(Child, UnitLowPC, FnPrefix, ChildVarPrefix, ScopeLowPC, + BytesInScope, InlineDepth, FnStatMap, GlobalStats, + LocStats); Child = Child.getSibling(); } } @@ -299,14 +450,33 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, /// Print machine-readable output. /// The machine-readable format is single-line JSON output. /// \{ -static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) { - OS << ",\"" << Key << "\":\"" << Value << '"'; - LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); -} -static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) { +static void printDatum(raw_ostream &OS, const char *Key, json::Value Value) { OS << ",\"" << Key << "\":" << Value; LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); } +static void printLocationStats(raw_ostream &OS, + const char *Key, + std::vector &LocationStats) { + OS << ",\"" << Key << " with 0% of its scope covered\":" + << LocationStats[0]; + LLVM_DEBUG(llvm::dbgs() << Key << " with 0% of its scope covered: " + << LocationStats[0] << '\n'); + OS << ",\"" << Key << " with 1-9% of its scope covered\":" + << LocationStats[1]; + LLVM_DEBUG(llvm::dbgs() << Key << " with 1-9% of its scope covered: " + << LocationStats[1] << '\n'); + for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) { + OS << ",\"" << Key << " with " << (i - 1) * 10 << "-" << i * 10 - 1 + << "% of its scope covered\":" << LocationStats[i]; + LLVM_DEBUG(llvm::dbgs() + << Key << " with " << (i - 1) * 10 << "-" << i * 10 - 1 + << "% of its scope covered: " << LocationStats[i]); + } + OS << ",\"" << Key << " with 100% of its scope covered\":" + << LocationStats[NumOfCoverageCategories - 1]; + LLVM_DEBUG(llvm::dbgs() << Key << " with 100% of its scope covered: " + << LocationStats[NumOfCoverageCategories - 1]); +} /// \} /// Collect debug info quality metrics for an entire DIContext. @@ -321,10 +491,12 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, Twine Filename, raw_ostream &OS) { StringRef FormatName = Obj.getFileFormatName(); GlobalStats GlobalStats; + LocationStats LocStats; StringMap Statistics; for (const auto &CU : static_cast(&DICtx)->compile_units()) if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false)) - collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats); + collectStatsRecursive(CUDie, getLowPC(CUDie), "/", "g", 0, 0, 0, + Statistics, GlobalStats, LocStats); /// The version number should be increased every time the algorithm is changed /// (including bug fixes). New metrics may be added without increasing the @@ -387,9 +559,24 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, printDatum(OS, "source variables", VarParamTotal); printDatum(OS, "variables with location", VarParamWithLoc); printDatum(OS, "call site entries", GlobalStats.CallSiteEntries); + printDatum(OS, "call site DIEs", GlobalStats.CallSiteDIEs); + printDatum(OS, "call site parameter DIEs", GlobalStats.CallSiteParamDIEs); printDatum(OS, "scope bytes total", GlobalStats.ScopeBytesFromFirstDefinition); printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered); + printDatum(OS, "entry value scope bytes covered", + GlobalStats.ScopeEntryValueBytesCovered); + printDatum(OS, "formal params scope bytes total", + GlobalStats.ParamScopeBytesFromFirstDefinition); + printDatum(OS, "formal params scope bytes covered", + GlobalStats.ParamScopeBytesCovered); + printDatum(OS, "formal params entry value scope bytes covered", + GlobalStats.ParamScopeEntryValueBytesCovered); + printDatum(OS, "vars scope bytes total", + GlobalStats.VarScopeBytesFromFirstDefinition); + printDatum(OS, "vars scope bytes covered", GlobalStats.VarScopeBytesCovered); + printDatum(OS, "vars entry value scope bytes covered", + GlobalStats.VarScopeEntryValueBytesCovered); printDatum(OS, "total function size", GlobalStats.FunctionSize); printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize); printDatum(OS, "total formal params", ParamTotal); @@ -400,6 +587,20 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, printDatum(OS, "vars with source location", VarWithSrcLoc); printDatum(OS, "vars with type", VarWithType); printDatum(OS, "vars with binary location", VarWithLoc); + printDatum(OS, "total variables procesed by location statistics", + LocStats.NumVarParam); + printLocationStats(OS, "variables", LocStats.VarParamLocStats); + printLocationStats(OS, "variables (excluding the debug entry values)", + LocStats.VarParamNonEntryValLocStats); + printDatum(OS, "total params procesed by location statistics", + LocStats.NumParam); + printLocationStats(OS, "params", LocStats.ParamLocStats); + printLocationStats(OS, "params (excluding the debug entry values)", + LocStats.ParamNonEntryValLocStats); + printDatum(OS, "total vars procesed by location statistics", LocStats.NumVar); + printLocationStats(OS, "vars", LocStats.VarLocStats); + printLocationStats(OS, "vars (excluding the debug entry values)", + LocStats.VarNonEntryValLocStats); OS << "}\n"; LLVM_DEBUG( llvm::dbgs() << "Total Availability: " diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 05a7aef67ece..e20f6041f98d 100644 --- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -584,7 +584,7 @@ int main(int argc, char **argv) { } std::error_code EC; - ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_None); + ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_Text); error("Unable to open output file" + OutputFilename, EC); // Don't remove output file if we exit with an error. OutputFile.keep(); diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp index 300bc0b4bd52..dddc0d9baa08 100644 --- a/tools/llvm-extract/llvm-extract.cpp +++ b/tools/llvm-extract/llvm-extract.cpp @@ -74,8 +74,18 @@ static cl::list // ExtractBlocks - The blocks to extract from the module. static cl::list ExtractBlocks( - "bb", cl::desc("Specify pairs to extract"), - cl::ZeroOrMore, cl::value_desc("function:bb"), cl::cat(ExtractCat)); + "bb", + cl::desc( + "Specify pairs to extract.\n" + "Each pair will create a function.\n" + "If multiple basic blocks are specified in one pair,\n" + "the first block in the sequence should dominate the rest.\n" + "eg:\n" + " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" + " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " + "with bb2."), + cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), + cl::cat(ExtractCat)); // ExtractAlias - The alias to extract from the module. static cl::list @@ -350,7 +360,7 @@ int main(int argc, char **argv) { Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls std::error_code EC; - ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None); + ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None); if (EC) { errs() << EC.message() << '\n'; return 1; diff --git a/tools/llvm-ifs/CMakeLists.txt b/tools/llvm-ifs/CMakeLists.txt new file mode 100644 index 000000000000..544b0e41a5ed --- /dev/null +++ b/tools/llvm-ifs/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS + Object + Support + TextAPI + ObjectYAML + ) + +add_llvm_tool(llvm-ifs + llvm-ifs.cpp + ) diff --git a/tools/llvm-ifs/LLVMBuild.txt b/tools/llvm-ifs/LLVMBuild.txt new file mode 100644 index 000000000000..10dc6bd8f550 --- /dev/null +++ b/tools/llvm-ifs/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./tools/llvm-ifs/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-ifs +parent = Tools +required_libraries = Object Support TextAPI diff --git a/tools/llvm-ifs/llvm-ifs.cpp b/tools/llvm-ifs/llvm-ifs.cpp new file mode 100644 index 000000000000..f329b4633632 --- /dev/null +++ b/tools/llvm-ifs/llvm-ifs.cpp @@ -0,0 +1,532 @@ +//===- llvm-ifs.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------------===/ + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VersionTuple.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TextAPI/MachO/InterfaceFile.h" +#include "llvm/TextAPI/MachO/TextAPIReader.h" +#include "llvm/TextAPI/MachO/TextAPIWriter.h" +#include +#include + +using namespace llvm; +using namespace llvm::yaml; +using namespace llvm::MachO; + +#define DEBUG_TYPE "llvm-ifs" + +namespace { +const VersionTuple IFSVersionCurrent(1, 2); +} + +static cl::opt Action("action", cl::desc(""), + cl::value_desc("write-ifs | write-bin"), + cl::init("write-ifs")); + +static cl::opt ForceFormat("force-format", + cl::desc(""), + cl::value_desc("ELF | TBD"), + cl::init("")); + +static cl::list InputFilenames(cl::Positional, + cl::desc(""), + cl::ZeroOrMore); + +static cl::opt OutputFilename("o", cl::desc(""), + cl::value_desc("path")); + +enum class IFSSymbolType { + NoType = 0, + Object, + Func, + // Type information is 4 bits, so 16 is safely out of range. + Unknown = 16, +}; + +std::string getTypeName(IFSSymbolType Type) { + switch (Type) { + case IFSSymbolType::NoType: + return "NoType"; + case IFSSymbolType::Func: + return "Func"; + case IFSSymbolType::Object: + return "Object"; + case IFSSymbolType::Unknown: + return "Unknown"; + } + llvm_unreachable("Unexpected ifs symbol type."); +} + +struct IFSSymbol { + IFSSymbol(std::string SymbolName) : Name(SymbolName) {} + std::string Name; + uint64_t Size; + IFSSymbolType Type; + bool Weak; + Optional Warning; + bool operator<(const IFSSymbol &RHS) const { return Name < RHS.Name; } +}; + +namespace llvm { +namespace yaml { +/// YAML traits for IFSSymbolType. +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, IFSSymbolType &SymbolType) { + IO.enumCase(SymbolType, "NoType", IFSSymbolType::NoType); + IO.enumCase(SymbolType, "Func", IFSSymbolType::Func); + IO.enumCase(SymbolType, "Object", IFSSymbolType::Object); + IO.enumCase(SymbolType, "Unknown", IFSSymbolType::Unknown); + // Treat other symbol types as noise, and map to Unknown. + if (!IO.outputting() && IO.matchEnumFallback()) + SymbolType = IFSSymbolType::Unknown; + } +}; + +template <> struct ScalarTraits { + static void output(const VersionTuple &Value, void *, + llvm::raw_ostream &Out) { + Out << Value.getAsString(); + } + + static StringRef input(StringRef Scalar, void *, VersionTuple &Value) { + if (Value.tryParse(Scalar)) + return StringRef("Can't parse version: invalid version format."); + + if (Value > IFSVersionCurrent) + return StringRef("Unsupported IFS version."); + + // Returning empty StringRef indicates successful parse. + return StringRef(); + } + + // Don't place quotation marks around version value. + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +/// YAML traits for IFSSymbol. +template <> struct MappingTraits { + static void mapping(IO &IO, IFSSymbol &Symbol) { + IO.mapRequired("Type", Symbol.Type); + // The need for symbol size depends on the symbol type. + if (Symbol.Type == IFSSymbolType::NoType) + IO.mapOptional("Size", Symbol.Size, (uint64_t)0); + else if (Symbol.Type == IFSSymbolType::Func) + Symbol.Size = 0; + else + IO.mapRequired("Size", Symbol.Size); + IO.mapOptional("Weak", Symbol.Weak, false); + IO.mapOptional("Warning", Symbol.Warning); + } + + // Compacts symbol information into a single line. + static const bool flow = true; +}; + +/// YAML traits for set of IFSSymbols. +template <> struct CustomMappingTraits> { + static void inputOne(IO &IO, StringRef Key, std::set &Set) { + std::string Name = Key.str(); + IFSSymbol Sym(Name); + IO.mapRequired(Name.c_str(), Sym); + Set.insert(Sym); + } + + static void output(IO &IO, std::set &Set) { + for (auto &Sym : Set) + IO.mapRequired(Sym.Name.c_str(), const_cast(Sym)); + } +}; +} // namespace yaml +} // namespace llvm + +// A cumulative representation of ELF stubs. +// Both textual and binary stubs will read into and write from this object. +class IFSStub { + // TODO: Add support for symbol versioning. +public: + VersionTuple IfsVersion; + std::string Triple; + std::string ObjectFileFormat; + Optional SOName; + std::vector NeededLibs; + std::set Symbols; + + IFSStub() = default; + IFSStub(const IFSStub &Stub) + : IfsVersion(Stub.IfsVersion), Triple(Stub.Triple), + ObjectFileFormat(Stub.ObjectFileFormat), SOName(Stub.SOName), + NeededLibs(Stub.NeededLibs), Symbols(Stub.Symbols) {} + IFSStub(IFSStub &&Stub) + : IfsVersion(std::move(Stub.IfsVersion)), Triple(std::move(Stub.Triple)), + ObjectFileFormat(std::move(Stub.ObjectFileFormat)), + SOName(std::move(Stub.SOName)), NeededLibs(std::move(Stub.NeededLibs)), + Symbols(std::move(Stub.Symbols)) {} +}; + +namespace llvm { +namespace yaml { +/// YAML traits for IFSStub objects. +template <> struct MappingTraits { + static void mapping(IO &IO, IFSStub &Stub) { + if (!IO.mapTag("!experimental-ifs-v1", true)) + IO.setError("Not a .ifs YAML file."); + IO.mapRequired("IfsVersion", Stub.IfsVersion); + IO.mapOptional("Triple", Stub.Triple); + IO.mapOptional("ObjectFileFormat", Stub.ObjectFileFormat); + IO.mapOptional("SOName", Stub.SOName); + IO.mapOptional("NeededLibs", Stub.NeededLibs); + IO.mapRequired("Symbols", Stub.Symbols); + } +}; +} // namespace yaml +} // namespace llvm + +static Expected> readInputFile(StringRef FilePath) { + // Read in file. + ErrorOr> BufOrError = + MemoryBuffer::getFileOrSTDIN(FilePath); + if (!BufOrError) + return createStringError(BufOrError.getError(), "Could not open `%s`", + FilePath.data()); + + std::unique_ptr FileReadBuffer = std::move(*BufOrError); + yaml::Input YamlIn(FileReadBuffer->getBuffer()); + std::unique_ptr Stub(new IFSStub()); + YamlIn >> *Stub; + + if (std::error_code Err = YamlIn.error()) + return createStringError(Err, "Failed reading Interface Stub File."); + + return std::move(Stub); +} + +int writeTbdStub(const llvm::Triple &T, const std::set &Symbols, + const StringRef Format, raw_ostream &Out) { + + auto PlatformKindOrError = + [](const llvm::Triple &T) -> llvm::Expected { + if (T.isMacOSX()) + return llvm::MachO::PlatformKind::macOS; + if (T.isTvOS()) + return llvm::MachO::PlatformKind::tvOS; + if (T.isWatchOS()) + return llvm::MachO::PlatformKind::watchOS; + // Note: put isiOS last because tvOS and watchOS are also iOS according + // to the Triple. + if (T.isiOS()) + return llvm::MachO::PlatformKind::iOS; + + // TODO: Add an option for ForceTriple, but keep ForceFormat for now. + if (ForceFormat == "TBD") + return llvm::MachO::PlatformKind::macOS; + + return createStringError(errc::not_supported, "Invalid Platform.\n"); + }(T); + + if (!PlatformKindOrError) + return -1; + + PlatformKind Plat = PlatformKindOrError.get(); + TargetList Targets({Target(llvm::MachO::mapToArchitecture(T), Plat)}); + + InterfaceFile File; + File.setFileType(FileType::TBD_V3); // Only supporting v3 for now. + File.addTargets(Targets); + + for (const auto &Symbol : Symbols) { + auto Name = Symbol.Name; + auto Kind = SymbolKind::GlobalSymbol; + switch (Symbol.Type) { + default: + case IFSSymbolType::NoType: + Kind = SymbolKind::GlobalSymbol; + break; + case IFSSymbolType::Object: + Kind = SymbolKind::GlobalSymbol; + break; + case IFSSymbolType::Func: + Kind = SymbolKind::GlobalSymbol; + break; + } + if (Symbol.Weak) + File.addSymbol(Kind, Name, Targets, SymbolFlags::WeakDefined); + else + File.addSymbol(Kind, Name, Targets); + } + + SmallString<4096> Buffer; + raw_svector_ostream OS(Buffer); + if (Error Result = TextAPIWriter::writeToStream(OS, File)) + return -1; + Out << OS.str(); + return 0; +} + +int writeElfStub(const llvm::Triple &T, const std::set &Symbols, + const StringRef Format, raw_ostream &Out) { + SmallString<0> Storage; + Storage.clear(); + raw_svector_ostream OS(Storage); + + OS << "--- !ELF\n"; + OS << "FileHeader:\n"; + OS << " Class: ELFCLASS"; + OS << (T.isArch64Bit() ? "64" : "32"); + OS << "\n"; + OS << " Data: ELFDATA2"; + OS << (T.isLittleEndian() ? "LSB" : "MSB"); + OS << "\n"; + OS << " Type: ET_DYN\n"; + OS << " Machine: " + << llvm::StringSwitch(T.getArchName()) + .Case("x86_64", "EM_X86_64") + .Case("i386", "EM_386") + .Case("i686", "EM_386") + .Case("aarch64", "EM_AARCH64") + .Case("amdgcn", "EM_AMDGPU") + .Case("r600", "EM_AMDGPU") + .Case("arm", "EM_ARM") + .Case("thumb", "EM_ARM") + .Case("avr", "EM_AVR") + .Case("mips", "EM_MIPS") + .Case("mipsel", "EM_MIPS") + .Case("mips64", "EM_MIPS") + .Case("mips64el", "EM_MIPS") + .Case("msp430", "EM_MSP430") + .Case("ppc", "EM_PPC") + .Case("ppc64", "EM_PPC64") + .Case("ppc64le", "EM_PPC64") + .Case("x86", T.isOSIAMCU() ? "EM_IAMCU" : "EM_386") + .Case("x86_64", "EM_X86_64") + .Default("EM_NONE") + << "\nSections:" + << "\n - Name: .text" + << "\n Type: SHT_PROGBITS" + << "\n - Name: .data" + << "\n Type: SHT_PROGBITS" + << "\n - Name: .rodata" + << "\n Type: SHT_PROGBITS" + << "\nSymbols:\n"; + for (const auto &Symbol : Symbols) { + OS << " - Name: " << Symbol.Name << "\n" + << " Type: STT_"; + switch (Symbol.Type) { + default: + case IFSSymbolType::NoType: + OS << "NOTYPE"; + break; + case IFSSymbolType::Object: + OS << "OBJECT"; + break; + case IFSSymbolType::Func: + OS << "FUNC"; + break; + } + OS << "\n Section: .text" + << "\n Binding: STB_" << (Symbol.Weak ? "WEAK" : "GLOBAL") + << "\n"; + } + OS << "...\n"; + + std::string YamlStr = OS.str(); + + // Only or debugging. Not an offical format. + LLVM_DEBUG({ + if (ForceFormat == "ELFOBJYAML") { + Out << YamlStr; + return 0; + } + }); + + yaml::Input YIn(YamlStr); + auto ErrHandler = [](const Twine &Msg) { + WithColor::error(errs(), "llvm-ifs") << Msg << "\n"; + }; + return convertYAML(YIn, Out, ErrHandler) ? 0 : 1; +} + +int writeIfso(const IFSStub &Stub, bool IsWriteIfs, raw_ostream &Out) { + if (IsWriteIfs) { + yaml::Output YamlOut(Out, NULL, /*WrapColumn =*/0); + YamlOut << const_cast(Stub); + return 0; + } + + std::string ObjectFileFormat = + ForceFormat.empty() ? Stub.ObjectFileFormat : ForceFormat; + + if (ObjectFileFormat == "ELF" || ForceFormat == "ELFOBJYAML") + return writeElfStub(llvm::Triple(Stub.Triple), Stub.Symbols, + Stub.ObjectFileFormat, Out); + if (ObjectFileFormat == "TBD") + return writeTbdStub(llvm::Triple(Stub.Triple), Stub.Symbols, + Stub.ObjectFileFormat, Out); + + WithColor::error() + << "Invalid ObjectFileFormat: Only ELF and TBD are supported.\n"; + return -1; +} + +// New Interface Stubs Yaml Format: +// --- !experimental-ifs-v1 +// IfsVersion: 1.0 +// Triple: +// ObjectFileFormat: +// Symbols: +// _ZSymbolName: { Type: } +// ... + +int main(int argc, char *argv[]) { + // Parse arguments. + cl::ParseCommandLineOptions(argc, argv); + + if (InputFilenames.empty()) + InputFilenames.push_back("-"); + + IFSStub Stub; + std::map SymbolMap; + + std::string PreviousInputFilePath = ""; + for (const std::string &InputFilePath : InputFilenames) { + Expected> StubOrErr = readInputFile(InputFilePath); + if (!StubOrErr) { + WithColor::error() << StubOrErr.takeError() << "\n"; + return -1; + } + std::unique_ptr TargetStub = std::move(StubOrErr.get()); + + if (Stub.Triple.empty()) { + PreviousInputFilePath = InputFilePath; + Stub.IfsVersion = TargetStub->IfsVersion; + Stub.Triple = TargetStub->Triple; + Stub.ObjectFileFormat = TargetStub->ObjectFileFormat; + Stub.SOName = TargetStub->SOName; + Stub.NeededLibs = TargetStub->NeededLibs; + } else { + if (Stub.IfsVersion != TargetStub->IfsVersion) { + if (Stub.IfsVersion.getMajor() != IFSVersionCurrent.getMajor()) { + WithColor::error() + << "Interface Stub: IfsVersion Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath << "\nIfsVersion Values: " << Stub.IfsVersion + << " " << TargetStub->IfsVersion << "\n"; + return -1; + } + if (TargetStub->IfsVersion > Stub.IfsVersion) + Stub.IfsVersion = TargetStub->IfsVersion; + } + if (Stub.ObjectFileFormat != TargetStub->ObjectFileFormat) { + WithColor::error() << "Interface Stub: ObjectFileFormat Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath << "\nObjectFileFormat Values: " + << Stub.ObjectFileFormat << " " + << TargetStub->ObjectFileFormat << "\n"; + return -1; + } + if (Stub.Triple != TargetStub->Triple) { + WithColor::error() << "Interface Stub: Triple Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath + << "\nTriple Values: " << Stub.Triple << " " + << TargetStub->Triple << "\n"; + return -1; + } + if (Stub.SOName != TargetStub->SOName) { + WithColor::error() << "Interface Stub: SOName Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath + << "\nSOName Values: " << Stub.SOName << " " + << TargetStub->SOName << "\n"; + return -1; + } + if (Stub.NeededLibs != TargetStub->NeededLibs) { + WithColor::error() << "Interface Stub: NeededLibs Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath << "\n"; + return -1; + } + } + + for (auto Symbol : TargetStub->Symbols) { + auto SI = SymbolMap.find(Symbol.Name); + if (SI == SymbolMap.end()) { + SymbolMap.insert( + std::pair(Symbol.Name, Symbol)); + continue; + } + + assert(Symbol.Name == SI->second.Name && "Symbol Names Must Match."); + + // Check conflicts: + if (Symbol.Type != SI->second.Type) { + WithColor::error() << "Interface Stub: Type Mismatch for " + << Symbol.Name << ".\nFilename: " << InputFilePath + << "\nType Values: " << getTypeName(SI->second.Type) + << " " << getTypeName(Symbol.Type) << "\n"; + + return -1; + } + if (Symbol.Size != SI->second.Size) { + WithColor::error() << "Interface Stub: Size Mismatch for " + << Symbol.Name << ".\nFilename: " << InputFilePath + << "\nSize Values: " << SI->second.Size << " " + << Symbol.Size << "\n"; + + return -1; + } + if (Symbol.Weak != SI->second.Weak) { + // TODO: Add conflict resolution for Weak vs non-Weak. + WithColor::error() << "Interface Stub: Weak Mismatch for " + << Symbol.Name << ".\nFilename: " << InputFilePath + << "\nWeak Values: " << SI->second.Weak << " " + << Symbol.Weak << "\n"; + + return -1; + } + // TODO: Not checking Warning. Will be dropped. + } + + PreviousInputFilePath = InputFilePath; + } + + if (Stub.IfsVersion != IFSVersionCurrent) + if (Stub.IfsVersion.getMajor() != IFSVersionCurrent.getMajor()) { + WithColor::error() << "Interface Stub: Bad IfsVersion: " + << Stub.IfsVersion << ", llvm-ifs supported version: " + << IFSVersionCurrent << ".\n"; + return -1; + } + + for (auto &Entry : SymbolMap) + Stub.Symbols.insert(Entry.second); + + std::error_code SysErr; + + // Open file for writing. + raw_fd_ostream Out(OutputFilename, SysErr); + if (SysErr) { + WithColor::error() << "Couldn't open " << OutputFilename + << " for writing.\n"; + return -1; + } + + return writeIfso(Stub, (Action == "write-ifs"), Out); +} diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp index 50ba57178d02..fa36e083b6f8 100644 --- a/tools/llvm-link/llvm-link.cpp +++ b/tools/llvm-link/llvm-link.cpp @@ -351,13 +351,13 @@ int main(int argc, char **argv) { LLVMContext Context; Context.setDiagnosticHandler( - llvm::make_unique(), true); + std::make_unique(), true); cl::ParseCommandLineOptions(argc, argv, "llvm linker\n"); if (!DisableDITypeMap) Context.enableDebugTypeODRUniquing(); - auto Composite = make_unique("llvm-link", Context); + auto Composite = std::make_unique("llvm-link", Context); Linker L(*Composite); unsigned Flags = Linker::Flags::None; @@ -381,7 +381,7 @@ int main(int argc, char **argv) { errs() << "Here's the assembly:\n" << *Composite; std::error_code EC; - ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None); + ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None); if (EC) { WithColor::error() << EC.message() << '\n'; return 1; diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp index 585207b25185..b47e68e82850 100644 --- a/tools/llvm-lto/llvm-lto.cpp +++ b/tools/llvm-lto/llvm-lto.cpp @@ -315,8 +315,8 @@ getLocalLTOModule(StringRef Path, std::unique_ptr &Buffer, error(BufferOrErr, "error loading file '" + Path + "'"); Buffer = std::move(BufferOrErr.get()); CurrentActivity = ("loading file '" + Path + "'").str(); - std::unique_ptr Context = llvm::make_unique(); - Context->setDiagnosticHandler(llvm::make_unique(), + std::unique_ptr Context = std::make_unique(); + Context->setDiagnosticHandler(std::make_unique(), true); ErrorOr> Ret = LTOModule::createInLocalContext( std::move(Context), Buffer->getBufferStart(), Buffer->getBufferSize(), @@ -420,7 +420,7 @@ static void createCombinedModuleSummaryIndex() { std::error_code EC; assert(!OutputFilename.empty()); raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC, - sys::fs::OpenFlags::F_None); + sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'"); WriteIndexToFile(CombinedIndex, OS); OS.close(); @@ -510,7 +510,7 @@ static std::unique_ptr loadModuleFromInput(lto::InputFile &File, static void writeModuleToFile(Module &TheModule, StringRef Filename) { std::error_code EC; - raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + Filename + "'"); maybeVerifyModule(TheModule); WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); @@ -581,7 +581,7 @@ private: if (!CombinedIndex) report_fatal_error("ThinLink didn't create an index"); std::error_code EC; - raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputFilename + "'"); WriteIndexToFile(*CombinedIndex, OS); } @@ -619,7 +619,7 @@ private: } OutputName = getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix); std::error_code EC; - raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputName + "'"); WriteIndexToFile(*Index, OS, &ModuleToSummariesForIndex); } @@ -802,7 +802,7 @@ private: } std::error_code EC; - raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputName + "'"); OS << std::get<0>(BinName)->getBuffer(); } @@ -848,7 +848,7 @@ private: for (unsigned BufID = 0; BufID < Binaries.size(); ++BufID) { auto OutputName = InputFilenames[BufID] + ".thinlto.o"; std::error_code EC; - raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputName + "'"); OS << Binaries[BufID]->getBuffer(); } @@ -921,7 +921,7 @@ int main(int argc, char **argv) { unsigned BaseArg = 0; LLVMContext Context; - Context.setDiagnosticHandler(llvm::make_unique(), + Context.setDiagnosticHandler(std::make_unique(), true); LTOCodeGenerator CodeGen(Context); @@ -1020,7 +1020,7 @@ int main(int argc, char **argv) { if (Parallelism != 1) PartFilename += "." + utostr(I); std::error_code EC; - OSs.emplace_back(PartFilename, EC, sys::fs::F_None); + OSs.emplace_back(PartFilename, EC, sys::fs::OF_None); if (EC) error("error opening the file '" + PartFilename + "': " + EC.message()); OSPtrs.push_back(&OSs.back().os()); diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp index 0bd9289dc938..5e3b3dcb6c31 100644 --- a/tools/llvm-lto2/llvm-lto2.cpp +++ b/tools/llvm-lto2/llvm-lto2.cpp @@ -291,6 +291,14 @@ static int run(int argc, char **argv) { std::vector Res; for (const InputFile::Symbol &Sym : Input->symbols()) { auto I = CommandLineResolutions.find({F, Sym.getName()}); + // If it isn't found, look for "$", which would have been added + // (followed by a hash) when the symbol was promoted during module + // splitting if it was defined in one part and used in the other. + // Try looking up the symbol name before the "$". + if (I == CommandLineResolutions.end()) { + auto SplitName = Sym.getName().rsplit("$"); + I = CommandLineResolutions.find({F, SplitName.first}); + } if (I == CommandLineResolutions.end()) { llvm::errs() << argv[0] << ": missing symbol resolution for " << F << ',' << Sym.getName() << '\n'; @@ -325,9 +333,9 @@ static int run(int argc, char **argv) { std::string Path = OutputFilename + "." + utostr(Task); std::error_code EC; - auto S = llvm::make_unique(Path, EC, sys::fs::F_None); + auto S = std::make_unique(Path, EC, sys::fs::OF_None); check(EC, Path); - return llvm::make_unique(std::move(S)); + return std::make_unique(std::move(S)); }; auto AddBuffer = [&](size_t Task, std::unique_ptr MB) { diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp index e2af2e7f2e32..1ddbddfa1846 100644 --- a/tools/llvm-mc/Disassembler.cpp +++ b/tools/llvm-mc/Disassembler.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -129,13 +130,10 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray, return false; } -int Disassembler::disassemble(const Target &T, - const std::string &Triple, - MCSubtargetInfo &STI, - MCStreamer &Streamer, - MemoryBuffer &Buffer, - SourceMgr &SM, - raw_ostream &Out) { +int Disassembler::disassemble(const Target &T, const std::string &Triple, + MCSubtargetInfo &STI, MCStreamer &Streamer, + MemoryBuffer &Buffer, SourceMgr &SM, + MCContext &Ctx, raw_ostream &Out) { std::unique_ptr MRI(T.createMCRegInfo(Triple)); if (!MRI) { @@ -149,9 +147,6 @@ int Disassembler::disassemble(const Target &T, return -1; } - // Set up the MCContext for creating symbols and MCExpr's. - MCContext Ctx(MAI.get(), MRI.get(), nullptr); - std::unique_ptr DisAsm( T.createMCDisassembler(STI, Ctx)); if (!DisAsm) { diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h index 11b685233abc..dcd8c279c91a 100644 --- a/tools/llvm-mc/Disassembler.h +++ b/tools/llvm-mc/Disassembler.h @@ -22,17 +22,15 @@ class MemoryBuffer; class Target; class raw_ostream; class SourceMgr; +class MCContext; class MCSubtargetInfo; class MCStreamer; class Disassembler { public: - static int disassemble(const Target &T, - const std::string &Triple, - MCSubtargetInfo &STI, - MCStreamer &Streamer, - MemoryBuffer &Buffer, - SourceMgr &SM, + static int disassemble(const Target &T, const std::string &Triple, + MCSubtargetInfo &STI, MCStreamer &Streamer, + MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx, raw_ostream &Out); }; diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index ec189c297860..c23740a3094d 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -209,9 +209,10 @@ static const Target *GetTarget(const char *ProgName) { return TheTarget; } -static std::unique_ptr GetOutputStream(StringRef Path) { +static std::unique_ptr GetOutputStream(StringRef Path, + sys::fs::OpenFlags Flags) { std::error_code EC; - auto Out = llvm::make_unique(Path, EC, sys::fs::F_None); + auto Out = std::make_unique(Path, EC, Flags); if (EC) { WithColor::error() << EC.message() << '\n'; return nullptr; @@ -279,7 +280,7 @@ static int fillCommandLineSymbols(MCAsmParser &Parser) { static int AssembleInput(const char *ProgName, const Target *TheTarget, SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, MCAsmInfo &MAI, MCSubtargetInfo &STI, - MCInstrInfo &MCII, MCTargetOptions &MCOptions) { + MCInstrInfo &MCII, MCTargetOptions const &MCOptions) { std::unique_ptr Parser( createMCAsmParser(SrcMgr, Ctx, Str, MAI)); std::unique_ptr TAP( @@ -316,7 +317,7 @@ int main(int argc, char **argv) { cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); - MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + const MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); setDwarfDebugFlags(argc, argv); setDwarfDebugProducer(); @@ -368,7 +369,7 @@ int main(int argc, char **argv) { // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and // MCObjectFileInfo needs a MCContext reference in order to initialize itself. MCObjectFileInfo MOFI; - MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr, &MCOptions); MOFI.InitMCObjectFileInfo(TheTriple, PIC, Ctx, LargeCodeModel); if (SaveTempLabels) @@ -413,7 +414,9 @@ int main(int argc, char **argv) { FeaturesStr = Features.getString(); } - std::unique_ptr Out = GetOutputStream(OutputFilename); + sys::fs::OpenFlags Flags = (FileType == OFT_AssemblyFile) ? sys::fs::OF_Text + : sys::fs::OF_None; + std::unique_ptr Out = GetOutputStream(OutputFilename, Flags); if (!Out) return 1; @@ -423,7 +426,7 @@ int main(int argc, char **argv) { WithColor::error() << "dwo output only supported with object files\n"; return 1; } - DwoOut = GetOutputStream(SplitDwarfFile); + DwoOut = GetOutputStream(SplitDwarfFile, sys::fs::OF_None); if (!DwoOut) return 1; } @@ -459,7 +462,7 @@ int main(int argc, char **argv) { std::unique_ptr MAB( TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - auto FOut = llvm::make_unique(*OS); + auto FOut = std::make_unique(*OS); Str.reset( TheTarget->createAsmStreamer(Ctx, std::move(FOut), /*asmverbose*/ true, /*useDwarfDirectory*/ true, IP, @@ -474,7 +477,7 @@ int main(int argc, char **argv) { Ctx.setUseNamesOnTempLabels(false); if (!Out->os().supportsSeeking()) { - BOS = make_unique(Out->os()); + BOS = std::make_unique(Out->os()); OS = BOS.get(); } @@ -506,7 +509,7 @@ int main(int argc, char **argv) { break; case AC_MDisassemble: assert(IP && "Expected assembly output"); - IP->setUseMarkup(1); + IP->setUseMarkup(true); disassemble = true; break; case AC_Disassemble: @@ -514,8 +517,8 @@ int main(int argc, char **argv) { break; } if (disassemble) - Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, - *Buffer, SrcMgr, Out->os()); + Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer, + SrcMgr, Ctx, Out->os()); // Keep output if no errors. if (Res == 0) { diff --git a/tools/llvm-mca/CodeRegion.cpp b/tools/llvm-mca/CodeRegion.cpp index bf592f67245e..e05517c1ac95 100644 --- a/tools/llvm-mca/CodeRegion.cpp +++ b/tools/llvm-mca/CodeRegion.cpp @@ -18,7 +18,7 @@ namespace mca { CodeRegions::CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) { // Create a default region for the input code sequence. - Regions.emplace_back(make_unique("", SMLoc())); + Regions.emplace_back(std::make_unique("", SMLoc())); } bool CodeRegion::isLocInRange(SMLoc Loc) const { @@ -36,7 +36,7 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) { if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() && !Regions[0]->endLoc().isValid()) { ActiveRegions[Description] = 0; - Regions[0] = make_unique(Description, Loc); + Regions[0] = std::make_unique(Description, Loc); return; } } else { @@ -62,7 +62,7 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) { } ActiveRegions[Description] = Regions.size(); - Regions.emplace_back(make_unique(Description, Loc)); + Regions.emplace_back(std::make_unique(Description, Loc)); return; } diff --git a/tools/llvm-mca/CodeRegionGenerator.cpp b/tools/llvm-mca/CodeRegionGenerator.cpp index c793169e64e0..8ddcd2f4abe2 100644 --- a/tools/llvm-mca/CodeRegionGenerator.cpp +++ b/tools/llvm-mca/CodeRegionGenerator.cpp @@ -118,6 +118,8 @@ Expected AsmCodeRegionGenerator::parseCodeRegions() { MCAsmLexer &Lexer = Parser->getLexer(); MCACommentConsumer CC(Regions); Lexer.setCommentConsumer(&CC); + // Enable support for MASM literal numbers (example: 05h, 101b). + Lexer.setLexMasmIntegers(true); std::unique_ptr TAP( TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts)); diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp index 560c6c6e8a33..feff0cd6d524 100644 --- a/tools/llvm-mca/Views/BottleneckAnalysis.cpp +++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -165,10 +165,33 @@ void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, "Unsupported dependency type!"); OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; } - OS << " - CYCLES: " << DE.Cost << '\n'; + OS << " - COST: " << DE.Cost << '\n'; } #endif // NDEBUG +void DependencyGraph::pruneEdges(unsigned Iterations) { + for (DGNode &N : Nodes) { + unsigned NumPruned = 0; + const unsigned Size = N.OutgoingEdges.size(); + // Use a cut-off threshold to prune edges with a low frequency. + for (unsigned I = 0, E = Size; I < E; ++I) { + DependencyEdge &Edge = N.OutgoingEdges[I]; + if (Edge.Frequency == Iterations) + continue; + double Factor = (double)Edge.Frequency / Iterations; + if (0.10 < Factor) + continue; + Nodes[Edge.ToIID].NumPredecessors--; + std::swap(Edge, N.OutgoingEdges[E - 1]); + --E; + ++NumPruned; + } + + if (NumPruned) + N.OutgoingEdges.resize(Size - NumPruned); + } +} + void DependencyGraph::initializeRootSet( SmallVectorImpl &RootSet) const { for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { @@ -179,7 +202,7 @@ void DependencyGraph::initializeRootSet( } void DependencyGraph::propagateThroughEdges( - SmallVectorImpl &RootSet) { + SmallVectorImpl &RootSet, unsigned Iterations) { SmallVector ToVisit; // A critical sequence is computed as the longest path from a node of the @@ -189,6 +212,10 @@ void DependencyGraph::propagateThroughEdges( // Each node of the graph starts with an initial default cost of zero. The // cost of a node is a measure of criticality: the higher the cost, the bigger // is the performance impact. + // For register and memory dependencies, the cost is a function of the write + // latency as well as the actual delay (in cycles) caused to users. + // For processor resource dependencies, the cost is a function of the resource + // pressure. Resource interferences with low frequency values are ignored. // // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of // the inner loop selects (i.e. visits) a node N from a set of `unvisited @@ -277,6 +304,10 @@ static void printInstruction(formatted_raw_ostream &FOS, } void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { + // Early exit if no bottlenecks were found during the simulation. + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) + return; + SmallVector Seq; DG.getCriticalSequence(Seq); if (Seq.empty()) @@ -432,7 +463,6 @@ void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addRegisterDep(From, To + SourceSize, RegID, Cost); DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); return; @@ -445,7 +475,6 @@ void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addMemoryDep(From, To + SourceSize, Cost); DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); return; @@ -458,7 +487,6 @@ void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addResourceDep(From, To + SourceSize, Mask, Cost); DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); return; @@ -514,7 +542,7 @@ void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { // Check if this is the last simulated instruction. if (IID == ((Iterations * Source.size()) - 1)) - DG.finalizeGraph(); + DG.finalizeGraph(Iterations); } void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h index 7564b1a48206..9e3bd5978f09 100644 --- a/tools/llvm-mca/Views/BottleneckAnalysis.h +++ b/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -236,8 +236,9 @@ class DependencyGraph { void addDependency(unsigned From, unsigned To, DependencyEdge::Dependency &&DE); + void pruneEdges(unsigned Iterations); void initializeRootSet(SmallVectorImpl &RootSet) const; - void propagateThroughEdges(SmallVectorImpl &RootSet); + void propagateThroughEdges(SmallVectorImpl &RootSet, unsigned Iterations); #ifndef NDEBUG void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE, @@ -263,10 +264,11 @@ public: // Called by the bottleneck analysis at the end of simulation to propagate // costs through the edges of the graph, and compute a critical path. - void finalizeGraph() { + void finalizeGraph(unsigned Iterations) { SmallVector RootSet; + pruneEdges(Iterations); initializeRootSet(RootSet); - propagateThroughEdges(RootSet); + propagateThroughEdges(RootSet, Iterations); } // Returns a sequence of edges representing the critical sequence based on the diff --git a/tools/llvm-mca/Views/InstructionInfoView.cpp b/tools/llvm-mca/Views/InstructionInfoView.cpp index 1fbffa3e5b69..a6f9153b4945 100644 --- a/tools/llvm-mca/Views/InstructionInfoView.cpp +++ b/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/InstructionInfoView.h" +#include "llvm/Support/FormattedStream.h" namespace llvm { namespace mca { @@ -26,10 +27,17 @@ void InstructionInfoView::printView(raw_ostream &OS) const { TempStream << "\n\nInstruction Info:\n"; TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" - << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n"; + << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n"; + if (PrintEncodings) { + TempStream << "[7]: Encoding Size\n"; + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] " + << "Encodings: Instructions:\n"; + } else { + TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n"; + } - TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n"; - for (const MCInst &Inst : Source) { + for (unsigned I = 0, E = Source.size(); I < E; ++I) { + const MCInst &Inst = Source[I]; const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); // Obtain the scheduling class information from the instruction. @@ -72,7 +80,20 @@ void InstructionInfoView::printView(raw_ostream &OS) const { } TempStream << (MCDesc.mayLoad() ? " * " : " "); TempStream << (MCDesc.mayStore() ? " * " : " "); - TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + + if (PrintEncodings) { + StringRef Encoding(CE.getEncoding(I)); + unsigned EncodingSize = Encoding.size(); + TempStream << " " << EncodingSize + << (EncodingSize < 10 ? " " : " "); + TempStream.flush(); + formatted_raw_ostream FOS(TempStream); + for (unsigned i = 0, e = Encoding.size(); i != e; ++i) + FOS << format("%02x ", (uint8_t)Encoding[i]); + FOS.PadToColumn(30); + FOS.flush(); + } MCIP.printInst(&Inst, InstrStream, "", STI); InstrStream.flush(); @@ -80,7 +101,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const { // Consume any tabs or spaces at the beginning of the string. StringRef Str(Instruction); Str = Str.ltrim(); - TempStream << " " << Str << '\n'; + TempStream << Str << '\n'; Instruction = ""; } diff --git a/tools/llvm-mca/Views/InstructionInfoView.h b/tools/llvm-mca/Views/InstructionInfoView.h index 640d87383436..0e948304119f 100644 --- a/tools/llvm-mca/Views/InstructionInfoView.h +++ b/tools/llvm-mca/Views/InstructionInfoView.h @@ -40,6 +40,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "llvm-mca" @@ -51,14 +52,18 @@ namespace mca { class InstructionInfoView : public View { const llvm::MCSubtargetInfo &STI; const llvm::MCInstrInfo &MCII; + CodeEmitter &CE; + bool PrintEncodings; llvm::ArrayRef Source; llvm::MCInstPrinter &MCIP; public: - InstructionInfoView(const llvm::MCSubtargetInfo &sti, - const llvm::MCInstrInfo &mcii, - llvm::ArrayRef S, llvm::MCInstPrinter &IP) - : STI(sti), MCII(mcii), Source(S), MCIP(IP) {} + InstructionInfoView(const llvm::MCSubtargetInfo &ST, + const llvm::MCInstrInfo &II, CodeEmitter &C, + bool ShouldPrintEncodings, llvm::ArrayRef S, + llvm::MCInstPrinter &IP) + : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings), + Source(S), MCIP(IP) {} void printView(llvm::raw_ostream &OS) const override; }; diff --git a/tools/llvm-mca/Views/TimelineView.cpp b/tools/llvm-mca/Views/TimelineView.cpp index fe3f16ba344c..1e7caa297ac6 100644 --- a/tools/llvm-mca/Views/TimelineView.cpp +++ b/tools/llvm-mca/Views/TimelineView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/TimelineView.h" +#include namespace llvm { namespace mca { @@ -132,25 +133,38 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, const WaitTimeEntry &Entry, unsigned SourceIndex, unsigned Executions) const { - OS << SourceIndex << '.'; + bool PrintingTotals = SourceIndex == Source.size(); + unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; + + if (!PrintingTotals) + OS << SourceIndex << '.'; + OS.PadToColumn(7); double AverageTime1, AverageTime2, AverageTime3; - AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions; - AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions; - AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions; + AverageTime1 = + (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions; + AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions; + AverageTime3 = + (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions; OS << Executions; OS.PadToColumn(13); - int BufferSize = UsedBuffer[SourceIndex].second; - tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize); + + int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, + BufferSize); OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); OS.PadToColumn(20); - tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, + BufferSize); OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); OS.PadToColumn(27); - tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions, - STI.getSchedModel().MicroOpBufferSize); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, + CumulativeExecutions, STI.getSchedModel().MicroOpBufferSize); OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); if (OS.has_colors()) @@ -190,6 +204,24 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { ++IID; } + + // If the timeline contains more than one instruction, + // let's also print global averages. + if (Source.size() != 1) { + WaitTimeEntry TotalWaitTime = std::accumulate( + WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0}, + [](const WaitTimeEntry &A, const WaitTimeEntry &B) { + return WaitTimeEntry{ + A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, + A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, + A.CyclesSpentAfterWBAndBeforeRetire + + B.CyclesSpentAfterWBAndBeforeRetire}; + }); + printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions); + FOS << " " + << "" << '\n'; + InstrStream.flush(); + } } void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, diff --git a/tools/llvm-mca/Views/TimelineView.h b/tools/llvm-mca/Views/TimelineView.h index b63b234293cd..9bec3b87db45 100644 --- a/tools/llvm-mca/Views/TimelineView.h +++ b/tools/llvm-mca/Views/TimelineView.h @@ -84,6 +84,7 @@ /// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3 /// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4 /// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6 +/// 2 2.4 0.6 1.6 /// /// By comparing column [2] with column [1], we get an idea about how many /// cycles were spent in the scheduler's queue due to data dependencies. diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp index b3590b5910ec..99c45eebdd88 100644 --- a/tools/llvm-mca/llvm-mca.cpp +++ b/tools/llvm-mca/llvm-mca.cpp @@ -32,11 +32,17 @@ #include "Views/SchedulerStatistics.h" #include "Views/SummaryView.h" #include "Views/TimelineView.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/MCA/Context.h" +#include "llvm/MCA/InstrBuilder.h" #include "llvm/MCA/Pipeline.h" #include "llvm/MCA/Stages/EntryStage.h" #include "llvm/MCA/Stages/InstructionTables.h" @@ -83,11 +89,20 @@ static cl::opt cl::desc("Target a specific cpu type (-mcpu=help for details)"), cl::value_desc("cpu-name"), cl::cat(ToolOptions), cl::init("native")); +static cl::opt + MATTR("mattr", + cl::desc("Additional target features."), + cl::cat(ToolOptions)); + static cl::opt OutputAsmVariant("output-asm-variant", cl::desc("Syntax variant to use for output printing"), cl::cat(ToolOptions), cl::init(-1)); +static cl::opt + PrintImmHex("print-imm-hex", cl::cat(ToolOptions), cl::init(false), + cl::desc("Prefer hex format when printing immediate values")); + static cl::opt Iterations("iterations", cl::desc("Number of iterations to run"), cl::cat(ToolOptions), cl::init(0)); @@ -193,6 +208,11 @@ static cl::opt EnableBottleneckAnalysis( cl::desc("Enable bottleneck analysis (disabled by default)"), cl::cat(ViewOptions), cl::init(false)); +static cl::opt ShowEncoding( + "show-encoding", + cl::desc("Print encoding information in the instruction info view"), + cl::cat(ViewOptions), cl::init(false)); + namespace { const Target *getTarget(const char *ProgName) { @@ -218,7 +238,7 @@ ErrorOr> getOutputStream() { OutputFilename = "-"; std::error_code EC; auto Out = - llvm::make_unique(OutputFilename, EC, sys::fs::F_None); + std::make_unique(OutputFilename, EC, sys::fs::OF_Text); if (!EC) return std::move(Out); return EC; @@ -303,33 +323,11 @@ int main(int argc, char **argv) { // Apply overrides to llvm-mca specific options. processViewOptions(); - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc()); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); - assert(MAI && "Unable to create target asm info!"); - - MCObjectFileInfo MOFI; - MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); - MOFI.InitMCObjectFileInfo(TheTriple, /* PIC= */ false, Ctx); - - std::unique_ptr BOS; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - - std::unique_ptr MCIA( - TheTarget->createMCInstrAnalysis(MCII.get())); - if (!MCPU.compare("native")) MCPU = llvm::sys::getHostCPUName(); std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(TripleName, MCPU, /* FeaturesStr */ "")); + TheTarget->createMCSubtargetInfo(TripleName, MCPU, MATTR)); if (!STI->isCPUStringValid(MCPU)) return 1; @@ -352,6 +350,29 @@ int main(int argc, char **argv) { return 1; } + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + assert(MRI && "Unable to create target register info!"); + + std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); + assert(MAI && "Unable to create target asm info!"); + + MCObjectFileInfo MOFI; + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc()); + + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + + MOFI.InitMCObjectFileInfo(TheTriple, /* PIC= */ false, Ctx); + + std::unique_ptr BOS; + + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + + std::unique_ptr MCIA( + TheTarget->createMCInstrAnalysis(MCII.get())); + // Parse the input and create CodeRegions that llvm-mca can analyze. mca::AsmCodeRegionGenerator CRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI, *MCII); Expected RegionsOrErr = CRG.parseCodeRegions(); @@ -396,6 +417,9 @@ int main(int argc, char **argv) { return 1; } + // Set the display preference for hex vs. decimal immediates. + IP->setPrintImmHex(PrintImmHex); + std::unique_ptr TOF = std::move(*OF); const MCSchedModel &SM = STI->getSchedModel(); @@ -413,6 +437,12 @@ int main(int argc, char **argv) { // Number each region in the sequence. unsigned RegionIdx = 0; + std::unique_ptr MCE( + TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + std::unique_ptr MAB(TheTarget->createMCAsmBackend( + *STI, *MRI, InitMCTargetOptionsFromFlags())); + for (const std::unique_ptr &Region : Regions) { // Skip empty code regions. if (Region->empty()) @@ -430,6 +460,7 @@ int main(int argc, char **argv) { // Lower the MCInst sequence into an mca::Instruction sequence. ArrayRef Insts = Region->getInstructions(); + mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts); std::vector> LoweredSequence; for (const MCInst &MCI : Insts) { Expected> Inst = @@ -459,18 +490,18 @@ int main(int argc, char **argv) { if (PrintInstructionTables) { // Create a pipeline, stages, and a printer. - auto P = llvm::make_unique(); - P->appendStage(llvm::make_unique(S)); - P->appendStage(llvm::make_unique(SM)); + auto P = std::make_unique(); + P->appendStage(std::make_unique(S)); + P->appendStage(std::make_unique(SM)); mca::PipelinePrinter Printer(*P); // Create the views for this pipeline, execute, and emit a report. if (PrintInstructionInfoView) { - Printer.addView(llvm::make_unique( - *STI, *MCII, Insts, *IP)); + Printer.addView(std::make_unique( + *STI, *MCII, CE, ShowEncoding, Insts, *IP)); } Printer.addView( - llvm::make_unique(*STI, *IP, Insts)); + std::make_unique(*STI, *IP, Insts)); if (!runPipeline(*P)) return 1; @@ -480,42 +511,42 @@ int main(int argc, char **argv) { } // Create a basic pipeline simulating an out-of-order backend. - auto P = MCA.createDefaultPipeline(PO, IB, S); + auto P = MCA.createDefaultPipeline(PO, S); mca::PipelinePrinter Printer(*P); if (PrintSummaryView) Printer.addView( - llvm::make_unique(SM, Insts, DispatchWidth)); + std::make_unique(SM, Insts, DispatchWidth)); if (EnableBottleneckAnalysis) { - Printer.addView(llvm::make_unique( + Printer.addView(std::make_unique( *STI, *IP, Insts, S.getNumIterations())); } if (PrintInstructionInfoView) - Printer.addView( - llvm::make_unique(*STI, *MCII, Insts, *IP)); + Printer.addView(std::make_unique( + *STI, *MCII, CE, ShowEncoding, Insts, *IP)); if (PrintDispatchStats) - Printer.addView(llvm::make_unique()); + Printer.addView(std::make_unique()); if (PrintSchedulerStats) - Printer.addView(llvm::make_unique(*STI)); + Printer.addView(std::make_unique(*STI)); if (PrintRetireStats) - Printer.addView(llvm::make_unique(SM)); + Printer.addView(std::make_unique(SM)); if (PrintRegisterFileStats) - Printer.addView(llvm::make_unique(*STI)); + Printer.addView(std::make_unique(*STI)); if (PrintResourcePressureView) Printer.addView( - llvm::make_unique(*STI, *IP, Insts)); + std::make_unique(*STI, *IP, Insts)); if (PrintTimelineView) { unsigned TimelineIterations = TimelineMaxIterations ? TimelineMaxIterations : 10; - Printer.addView(llvm::make_unique( + Printer.addView(std::make_unique( *STI, *IP, Insts, std::min(TimelineIterations, S.getNumIterations()), TimelineMaxCycles)); } diff --git a/tools/llvm-modextract/llvm-modextract.cpp b/tools/llvm-modextract/llvm-modextract.cpp index 3adefc5f0d3e..7c4099625842 100644 --- a/tools/llvm-modextract/llvm-modextract.cpp +++ b/tools/llvm-modextract/llvm-modextract.cpp @@ -54,7 +54,7 @@ int main(int argc, char **argv) { std::error_code EC; std::unique_ptr Out( - new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); ExitOnErr(errorCodeToError(EC)); if (BinaryExtract) { diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index aa62e6f0209b..ee55722dc139 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -711,17 +711,21 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName, const std::string &ArchiveName, const std::string &ArchitectureName) { if (!NoSort) { - std::function Cmp; + using Comparator = bool (*)(const NMSymbol &, const NMSymbol &); + Comparator Cmp; if (NumericSort) - Cmp = compareSymbolAddress; + Cmp = &compareSymbolAddress; else if (SizeSort) - Cmp = compareSymbolSize; + Cmp = &compareSymbolSize; else - Cmp = compareSymbolName; + Cmp = &compareSymbolName; if (ReverseSort) - Cmp = [=](const NMSymbol &A, const NMSymbol &B) { return Cmp(B, A); }; - llvm::sort(SymbolList, Cmp); + llvm::sort(SymbolList, [=](const NMSymbol &A, const NMSymbol &B) -> bool { + return Cmp(B, A); + }); + else + llvm::sort(SymbolList, Cmp); } if (!PrintFileName) { @@ -913,10 +917,12 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj, if (Flags & ELF::SHF_ALLOC) return Flags & ELF::SHF_WRITE ? 'd' : 'r'; - StringRef SecName; - if (SecI->getName(SecName)) + auto NameOrErr = SecI->getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); return '?'; - if (SecName.startswith(".debug")) + } + if ((*NameOrErr).startswith(".debug")) return 'N'; if (!(Flags & ELF::SHF_WRITE)) return 'n'; @@ -1076,7 +1082,7 @@ static StringRef getNMTypeName(SymbolicFile &Obj, basic_symbol_iterator I) { static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I, StringRef &SecName) { uint32_t Symflags = I->getFlags(); - if (isa(&Obj)) { + if (ELFObjectFileBase *ELFObj = dyn_cast(&Obj)) { if (Symflags & object::SymbolRef::SF_Absolute) SecName = "*ABS*"; else if (Symflags & object::SymbolRef::SF_Common) @@ -1090,8 +1096,16 @@ static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I, consumeError(SecIOrErr.takeError()); return '?'; } - elf_section_iterator secT = *SecIOrErr; - secT->getName(SecName); + + if (*SecIOrErr == ELFObj->section_end()) + return '?'; + + Expected NameOrErr = (*SecIOrErr)->getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + return '?'; + } + SecName = *NameOrErr; } } @@ -1347,7 +1361,12 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, StringRef SectionName = StringRef(); for (const SectionRef &Section : MachO->sections()) { S.NSect++; - Section.getName(SectionName); + + if (Expected NameOrErr = Section.getName()) + SectionName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + SegmentName = MachO->getSectionFinalSegmentName( Section.getRawDataRefImpl()); if (S.Address >= Section.getAddress() && @@ -1667,7 +1686,11 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, StringRef SegmentName = StringRef(); StringRef SectionName = StringRef(); for (const SectionRef &Section : MachO->sections()) { - Section.getName(SectionName); + if (Expected NameOrErr = Section.getName()) + SectionName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + SegmentName = MachO->getSectionFinalSegmentName( Section.getRawDataRefImpl()); F.NSect++; diff --git a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp index 4ae46851a66f..2a8d816e6f3c 100644 --- a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp +++ b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp @@ -16,8 +16,8 @@ #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Errc.h" -#include "llvm/Support/JamCRC.h" #include "llvm/Support/Path.h" #include @@ -40,22 +40,13 @@ static uint64_t getNextRVA(const Object &Obj) { Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1); } -static uint32_t getCRC32(StringRef Data) { - JamCRC CRC; - CRC.update(ArrayRef(Data.data(), Data.size())); - // The CRC32 value needs to be complemented because the JamCRC dosn't - // finalize the CRC32 value. It also dosn't negate the initial CRC32 value - // but it starts by default at 0xFFFFFFFF which is the complement of zero. - return ~CRC.getCRC(); -} - static std::vector createGnuDebugLinkSectionContents(StringRef File) { ErrorOr> LinkTargetOrErr = MemoryBuffer::getFile(File); if (!LinkTargetOrErr) error("'" + File + "': " + LinkTargetOrErr.getError().message()); auto LinkTarget = std::move(*LinkTargetOrErr); - uint32_t CRC32 = getCRC32(LinkTarget->getBuffer()); + uint32_t CRC32 = llvm::crc32(arrayRefFromStringRef(LinkTarget->getBuffer())); StringRef FileName = sys::path::filename(File); size_t CRCPos = alignTo(FileName.size() + 1, 4); @@ -65,26 +56,37 @@ static std::vector createGnuDebugLinkSectionContents(StringRef File) { return Data; } -static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { - uint32_t StartRVA = getNextRVA(Obj); +// Adds named section with given contents to the object. +static void addSection(Object &Obj, StringRef Name, ArrayRef Contents, + uint32_t Characteristics) { + bool NeedVA = Characteristics & (IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE); - std::vector
Sections; Section Sec; - Sec.setOwnedContents(createGnuDebugLinkSectionContents(DebugLinkFile)); - Sec.Name = ".gnu_debuglink"; - Sec.Header.VirtualSize = Sec.getContents().size(); - Sec.Header.VirtualAddress = StartRVA; - Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize, - Obj.IsPE ? Obj.PeHeader.FileAlignment : 1); + Sec.setOwnedContents(Contents); + Sec.Name = Name; + Sec.Header.VirtualSize = NeedVA ? Sec.getContents().size() : 0u; + Sec.Header.VirtualAddress = NeedVA ? getNextRVA(Obj) : 0u; + Sec.Header.SizeOfRawData = + NeedVA ? alignTo(Sec.Header.VirtualSize, + Obj.IsPE ? Obj.PeHeader.FileAlignment : 1) + : Sec.getContents().size(); // Sec.Header.PointerToRawData is filled in by the writer. Sec.Header.PointerToRelocations = 0; Sec.Header.PointerToLinenumbers = 0; // Sec.Header.NumberOfRelocations is filled in by the writer. Sec.Header.NumberOfLinenumbers = 0; - Sec.Header.Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | - IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_DISCARDABLE; - Sections.push_back(Sec); - Obj.addSections(Sections); + Sec.Header.Characteristics = Characteristics; + + Obj.addSections(Sec); +} + +static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { + std::vector Contents = + createGnuDebugLinkSectionContents(DebugLinkFile); + addSection(Obj, ".gnu_debuglink", Contents, + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_DISCARDABLE); } static Error handleArgs(const CopyConfig &Config, Object &Obj) { @@ -92,8 +94,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { Obj.removeSections([&Config](const Section &Sec) { // Contrary to --only-keep-debug, --only-section fully removes sections that // aren't mentioned. - if (!Config.OnlySection.empty() && - !is_contained(Config.OnlySection, Sec.Name)) + if (!Config.OnlySection.empty() && !Config.OnlySection.matches(Sec.Name)) return true; if (Config.StripDebug || Config.StripAll || Config.StripAllGNU || @@ -103,7 +104,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { return true; } - if (is_contained(Config.ToRemove, Sec.Name)) + if (Config.ToRemove.matches(Sec.Name)) return true; return false; @@ -137,7 +138,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { if (Config.StripAll || Config.StripAllGNU) return true; - if (is_contained(Config.SymbolsToRemove, Sym.Name)) { + if (Config.SymbolsToRemove.matches(Sym.Name)) { // Explicitly removing a referenced symbol is an error. if (Sym.Referenced) reportError(Config.OutputFilename, @@ -156,7 +157,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC || Sym.Sym.SectionNumber == 0) if (Config.StripUnneeded || - is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) + Config.UnneededSymbolsToRemove.matches(Sym.Name)) return true; // GNU objcopy keeps referenced local symbols and external symbols @@ -171,21 +172,38 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { return false; }); + for (const auto &Flag : Config.AddSection) { + StringRef SecName, FileName; + std::tie(SecName, FileName) = Flag.split("="); + + auto BufOrErr = MemoryBuffer::getFile(FileName); + if (!BufOrErr) + return createFileError(FileName, errorCodeToError(BufOrErr.getError())); + auto Buf = std::move(*BufOrErr); + + addSection( + Obj, SecName, + makeArrayRef(reinterpret_cast(Buf->getBufferStart()), + Buf->getBufferSize()), + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES); + } + if (!Config.AddGnuDebugLink.empty()) addGnuDebugLink(Obj, Config.AddGnuDebugLink); if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() || Config.BuildIdLinkInput || Config.BuildIdLinkOutput || !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() || - !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() || - !Config.DumpSection.empty() || !Config.KeepSection.empty() || + !Config.AllocSectionsPrefix.empty() || !Config.DumpSection.empty() || + !Config.KeepSection.empty() || Config.NewSymbolVisibility || !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() || - !Config.SetSectionFlags.empty() || !Config.SymbolsToRename.empty() || - Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden || - Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc || - Config.StripSections || Config.Weaken || Config.DecompressDebugSections || + !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() || + !Config.SymbolsToRename.empty() || Config.ExtractDWO || + Config.KeepFileSymbols || Config.LocalizeHidden || Config.PreserveDates || + Config.StripDWO || Config.StripNonAlloc || Config.StripSections || + Config.Weaken || Config.DecompressDebugSections || Config.DiscardMode == DiscardType::Locals || !Config.SymbolsToAdd.empty() || Config.EntryExpr) { return createStringError(llvm::errc::invalid_argument, diff --git a/tools/llvm-objcopy/COFF/Reader.cpp b/tools/llvm-objcopy/COFF/Reader.cpp index 1f0ec9fa9691..2fcec0057c03 100644 --- a/tools/llvm-objcopy/COFF/Reader.cpp +++ b/tools/llvm-objcopy/COFF/Reader.cpp @@ -36,14 +36,9 @@ Error COFFReader::readExecutableHeaders(Object &Obj) const { DH->AddressOfNewExeHeader - sizeof(*DH)); if (COFFObj.is64()) { - const pe32plus_header *PE32Plus = nullptr; - if (auto EC = COFFObj.getPE32PlusHeader(PE32Plus)) - return errorCodeToError(EC); - Obj.PeHeader = *PE32Plus; + Obj.PeHeader = *COFFObj.getPE32PlusHeader(); } else { - const pe32_header *PE32 = nullptr; - if (auto EC = COFFObj.getPE32Header(PE32)) - return errorCodeToError(EC); + const pe32_header *PE32 = COFFObj.getPE32Header(); copyPeHeader(Obj.PeHeader, *PE32); // The pe32plus_header (stored in Object) lacks the BaseOfData field. Obj.BaseOfData = PE32->BaseOfData; @@ -196,16 +191,13 @@ Error COFFReader::setSymbolTargets(Object &Obj) const { } Expected> COFFReader::create() const { - auto Obj = llvm::make_unique(); + auto Obj = std::make_unique(); - const coff_file_header *CFH = nullptr; - const coff_bigobj_file_header *CBFH = nullptr; - COFFObj.getCOFFHeader(CFH); - COFFObj.getCOFFBigObjHeader(CBFH); bool IsBigObj = false; - if (CFH) { + if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) { Obj->CoffFileHeader = *CFH; } else { + const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader(); if (!CBFH) return createStringError(object_error::parse_failed, "no COFF file header returned"); diff --git a/tools/llvm-objcopy/COFF/Writer.cpp b/tools/llvm-objcopy/COFF/Writer.cpp index f3bb1ce331f2..6db37435fd96 100644 --- a/tools/llvm-objcopy/COFF/Writer.cpp +++ b/tools/llvm-objcopy/COFF/Writer.cpp @@ -120,12 +120,12 @@ size_t COFFWriter::finalizeStringTable() { StrTabBuilder.finalize(); for (auto &S : Obj.getMutableSections()) { + memset(S.Header.Name, 0, sizeof(S.Header.Name)); if (S.Name.size() > COFF::NameSize) { - memset(S.Header.Name, 0, sizeof(S.Header.Name)); snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d", (int)StrTabBuilder.getOffset(S.Name)); } else { - strncpy(S.Header.Name, S.Name.data(), COFF::NameSize); + memcpy(S.Header.Name, S.Name.data(), S.Name.size()); } } for (auto &S : Obj.getMutableSymbols()) { diff --git a/tools/llvm-objcopy/CommonOpts.td b/tools/llvm-objcopy/CommonOpts.td new file mode 100644 index 000000000000..e8c092b44431 --- /dev/null +++ b/tools/llvm-objcopy/CommonOpts.td @@ -0,0 +1,123 @@ +include "llvm/Option/OptParser.td" + +multiclass Eq { + def NAME : Separate<["--"], name>; + def NAME #_eq : Joined<["--"], name #"=">, + Alias(NAME)>, + HelpText; +} + +def help : Flag<["--"], "help">; +def h : Flag<["-"], "h">, Alias; + +def allow_broken_links + : Flag<["--"], "allow-broken-links">, + HelpText<"Allow the tool to remove sections even if it would leave " + "invalid section references. The appropriate sh_link fields " + "will be set to zero.">; + +def enable_deterministic_archives + : Flag<["--"], "enable-deterministic-archives">, + HelpText<"Enable deterministic mode when operating on archives (use " + "zero for UIDs, GIDs, and timestamps).">; +def D : Flag<["-"], "D">, + Alias, + HelpText<"Alias for --enable-deterministic-archives">; + +def disable_deterministic_archives + : Flag<["--"], "disable-deterministic-archives">, + HelpText<"Disable deterministic mode when operating on archives (use " + "real values for UIDs, GIDs, and timestamps).">; +def U : Flag<["-"], "U">, + Alias, + HelpText<"Alias for --disable-deterministic-archives">; + +def preserve_dates : Flag<["--"], "preserve-dates">, + HelpText<"Preserve access and modification timestamps">; +def p : Flag<["-"], "p">, + Alias, + HelpText<"Alias for --preserve-dates">; + +def strip_all : Flag<["--"], "strip-all">, + HelpText<"Remove non-allocated sections outside segments. " + ".gnu.warning* sections are not removed">; + +def strip_all_gnu + : Flag<["--"], "strip-all-gnu">, + HelpText<"Compatible with GNU's --strip-all">; + +def strip_debug : Flag<["--"], "strip-debug">, + HelpText<"Remove all debug sections">; +def g : Flag<["-"], "g">, + Alias, + HelpText<"Alias for --strip-debug">; + +def strip_unneeded : Flag<["--"], "strip-unneeded">, + HelpText<"Remove all symbols not needed by relocations">; + +defm remove_section : Eq<"remove-section", "Remove
">, + MetaVarName<"section">; +def R : JoinedOrSeparate<["-"], "R">, + Alias, + HelpText<"Alias for --remove-section">; + +def strip_sections + : Flag<["--"], "strip-sections">, + HelpText<"Remove all section headers and all sections not in segments">; + +defm strip_symbol : Eq<"strip-symbol", "Strip ">, + MetaVarName<"symbol">; +def N : JoinedOrSeparate<["-"], "N">, + Alias, + HelpText<"Alias for --strip-symbol">; + +defm keep_section : Eq<"keep-section", "Keep
">, + MetaVarName<"section">; + +defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol ">, + MetaVarName<"symbol">; +def K : JoinedOrSeparate<["-"], "K">, + Alias, + HelpText<"Alias for --keep-symbol">; + +def keep_file_symbols : Flag<["--"], "keep-file-symbols">, + HelpText<"Do not remove file symbols">; + +def only_keep_debug + : Flag<["--"], "only-keep-debug">, + HelpText<"Clear sections that would not be stripped by --strip-debug. " + "Currently only implemented for COFF.">; + +def discard_locals : Flag<["--"], "discard-locals">, + HelpText<"Remove compiler-generated local symbols, (e.g. " + "symbols starting with .L)">; +def X : Flag<["-"], "X">, + Alias, + HelpText<"Alias for --discard-locals">; + +def discard_all + : Flag<["--"], "discard-all">, + HelpText<"Remove all local symbols except file and section symbols">; +def x : Flag<["-"], "x">, + Alias, + HelpText<"Alias for --discard-all">; + +def regex + : Flag<["--"], "regex">, + HelpText<"Permit regular expressions in name comparison">; + +def version : Flag<["--"], "version">, + HelpText<"Print the version and exit.">; +def V : Flag<["-"], "V">, + Alias, + HelpText<"Alias for --version">; + +def wildcard + : Flag<["--"], "wildcard">, + HelpText<"Allow wildcard syntax for symbol-related flags. Incompatible " + "with --regex. Allows using '*' to match any number of " + "characters, '?' to match any single character, '\' to escape " + "special characters, and '[]' to define character classes. " + "Wildcards beginning with '!' will prevent a match, for example " + "\"-N '*' -N '!x'\" will strip all symbols except for \"x\".">; +def w : Flag<["-"], "w">, Alias, HelpText<"Alias for --wildcard">; diff --git a/tools/llvm-objcopy/CopyConfig.cpp b/tools/llvm-objcopy/CopyConfig.cpp index 8d6431b3044f..d707bec20c49 100644 --- a/tools/llvm-objcopy/CopyConfig.cpp +++ b/tools/llvm-objcopy/CopyConfig.cpp @@ -14,10 +14,10 @@ #include "llvm/ADT/StringSet.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Errc.h" -#include "llvm/Support/JamCRC.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/StringSaver.h" #include @@ -155,6 +155,25 @@ static Expected parseRenameSectionValue(StringRef FlagValue) { return SR; } +static Expected> +parseSetSectionAlignment(StringRef FlagValue) { + if (!FlagValue.contains('=')) + return createStringError( + errc::invalid_argument, + "bad format for --set-section-alignment: missing '='"); + auto Split = StringRef(FlagValue).split('='); + if (Split.first.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --set-section-alignment: missing section name"); + uint64_t NewAlign; + if (Split.second.getAsInteger(0, NewAlign)) + return createStringError(errc::invalid_argument, + "invalid alignment for --set-section-alignment: '%s'", + Split.second.str().c_str()); + return std::make_pair(Split.first, NewAlign); +} + static Expected parseSetSectionFlagValue(StringRef FlagValue) { if (!StringRef(FlagValue).contains('=')) @@ -177,106 +196,6 @@ parseSetSectionFlagValue(StringRef FlagValue) { return SFU; } -static Expected parseNewSymbolInfo(StringRef FlagValue) { - // Parse value given with --add-symbol option and create the - // new symbol if possible. The value format for --add-symbol is: - // - // =[
:][,] - // - // where: - // - symbol name, can be empty string - //
- optional section name. If not given ABS symbol is created - // - symbol value, can be decimal or hexadecimal number prefixed - // with 0x. - // - optional flags affecting symbol type, binding or visibility: - // The following are currently supported: - // - // global, local, weak, default, hidden, file, section, object, - // indirect-function. - // - // The following flags are ignored and provided for GNU - // compatibility only: - // - // warning, debug, constructor, indirect, synthetic, - // unique-object, before=. - NewSymbolInfo SI; - StringRef Value; - std::tie(SI.SymbolName, Value) = FlagValue.split('='); - if (Value.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --add-symbol, missing '=' after '%s'", - SI.SymbolName.str().c_str()); - - if (Value.contains(':')) { - std::tie(SI.SectionName, Value) = Value.split(':'); - if (SI.SectionName.empty() || Value.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --add-symbol, missing section name or symbol value"); - } - - SmallVector Flags; - Value.split(Flags, ','); - if (Flags[0].getAsInteger(0, SI.Value)) - return createStringError(errc::invalid_argument, "bad symbol value: '%s'", - Flags[0].str().c_str()); - - using Functor = std::function; - SmallVector UnsupportedFlags; - for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I) - static_cast( - StringSwitch(Flags[I]) - .CaseLower("global", [&SI] { SI.Bind = ELF::STB_GLOBAL; }) - .CaseLower("local", [&SI] { SI.Bind = ELF::STB_LOCAL; }) - .CaseLower("weak", [&SI] { SI.Bind = ELF::STB_WEAK; }) - .CaseLower("default", [&SI] { SI.Visibility = ELF::STV_DEFAULT; }) - .CaseLower("hidden", [&SI] { SI.Visibility = ELF::STV_HIDDEN; }) - .CaseLower("file", [&SI] { SI.Type = ELF::STT_FILE; }) - .CaseLower("section", [&SI] { SI.Type = ELF::STT_SECTION; }) - .CaseLower("object", [&SI] { SI.Type = ELF::STT_OBJECT; }) - .CaseLower("function", [&SI] { SI.Type = ELF::STT_FUNC; }) - .CaseLower("indirect-function", - [&SI] { SI.Type = ELF::STT_GNU_IFUNC; }) - .CaseLower("debug", [] {}) - .CaseLower("constructor", [] {}) - .CaseLower("warning", [] {}) - .CaseLower("indirect", [] {}) - .CaseLower("synthetic", [] {}) - .CaseLower("unique-object", [] {}) - .StartsWithLower("before", [] {}) - .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))(); - if (!UnsupportedFlags.empty()) - return createStringError(errc::invalid_argument, - "unsupported flag%s for --add-symbol: '%s'", - UnsupportedFlags.size() > 1 ? "s" : "", - join(UnsupportedFlags, "', '").c_str()); - return SI; -} - -static const StringMap ArchMap{ - // Name, {EMachine, 64bit, LittleEndian} - {"aarch64", {ELF::EM_AARCH64, true, true}}, - {"arm", {ELF::EM_ARM, false, true}}, - {"i386", {ELF::EM_386, false, true}}, - {"i386:x86-64", {ELF::EM_X86_64, true, true}}, - {"mips", {ELF::EM_MIPS, false, false}}, - {"powerpc:common64", {ELF::EM_PPC64, true, true}}, - {"riscv:rv32", {ELF::EM_RISCV, false, true}}, - {"riscv:rv64", {ELF::EM_RISCV, true, true}}, - {"sparc", {ELF::EM_SPARC, false, false}}, - {"sparcel", {ELF::EM_SPARC, false, true}}, - {"x86-64", {ELF::EM_X86_64, true, true}}, -}; - -static Expected getMachineInfo(StringRef Arch) { - auto Iter = ArchMap.find(Arch); - if (Iter == std::end(ArchMap)) - return createStringError(errc::invalid_argument, - "invalid architecture: '%s'", Arch.str().c_str()); - return Iter->getValue(); -} - struct TargetInfo { FileFormat Format; MachineInfo Machine; @@ -341,9 +260,10 @@ getOutputTargetInfoByTargetName(StringRef TargetName) { return {TargetInfo{Format, MI}}; } -static Error addSymbolsFromFile(std::vector &Symbols, - BumpPtrAllocator &Alloc, StringRef Filename, - bool UseRegex) { +static Error +addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc, + StringRef Filename, MatchStyle MS, + llvm::function_ref ErrorCallback) { StringSaver Saver(Alloc); SmallVector Lines; auto BufOrErr = MemoryBuffer::getFile(Filename); @@ -356,21 +276,47 @@ static Error addSymbolsFromFile(std::vector &Symbols, // it's not empty. auto TrimmedLine = Line.split('#').first.trim(); if (!TrimmedLine.empty()) - Symbols.emplace_back(Saver.save(TrimmedLine), UseRegex); + if (Error E = Symbols.addMatcher(NameOrPattern::create( + Saver.save(TrimmedLine), MS, ErrorCallback))) + return E; } return Error::success(); } -NameOrRegex::NameOrRegex(StringRef Pattern, bool IsRegex) { - if (!IsRegex) { - Name = Pattern; - return; - } +Expected +NameOrPattern::create(StringRef Pattern, MatchStyle MS, + llvm::function_ref ErrorCallback) { + switch (MS) { + case MatchStyle::Literal: + return NameOrPattern(Pattern); + case MatchStyle::Wildcard: { + SmallVector Data; + bool IsPositiveMatch = true; + if (Pattern[0] == '!') { + IsPositiveMatch = false; + Pattern = Pattern.drop_front(); + } + Expected GlobOrErr = GlobPattern::create(Pattern); + + // If we couldn't create it as a glob, report the error, but try again with + // a literal if the error reporting is non-fatal. + if (!GlobOrErr) { + if (Error E = ErrorCallback(GlobOrErr.takeError())) + return std::move(E); + return create(Pattern, MatchStyle::Literal, ErrorCallback); + } - SmallVector Data; - R = std::make_shared( - ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)); + return NameOrPattern(std::make_shared(*GlobOrErr), + IsPositiveMatch); + } + case MatchStyle::Regex: { + SmallVector Data; + return NameOrPattern(std::make_shared( + ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data))); + } + } + llvm_unreachable("Unhandled llvm.objcopy.MatchStyle enum"); } static Error addSymbolsToRenameFromFile(StringMap &SymbolsToRename, @@ -407,10 +353,22 @@ template static ErrorOr getAsInteger(StringRef Val) { return Result; } +static void printHelp(const opt::OptTable &OptTable, raw_ostream &OS, + StringRef ToolName) { + OptTable.PrintHelp(OS, (ToolName + " input [output]").str().c_str(), + (ToolName + " tool").str().c_str()); + // TODO: Replace this with libOption call once it adds extrahelp support. + // The CommandLine library has a cl::extrahelp class to support this, + // but libOption does not have that yet. + OS << "\nPass @FILE as argument to read options from FILE.\n"; +} + // ParseObjcopyOptions returns the config and sets the input arguments. If a // help flag is set then ParseObjcopyOptions will print the help messege and // exit. -Expected parseObjcopyOptions(ArrayRef ArgsArr) { +Expected +parseObjcopyOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback) { DriverConfig DC; ObjcopyOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; @@ -418,12 +376,12 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0) { - T.PrintHelp(errs(), "llvm-objcopy input [output]", "objcopy tool"); + printHelp(T, errs(), "llvm-objcopy"); exit(1); } if (InputArgs.hasArg(OBJCOPY_help)) { - T.PrintHelp(outs(), "llvm-objcopy input [output]", "objcopy tool"); + printHelp(T, outs(), "llvm-objcopy"); exit(0); } @@ -459,7 +417,18 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { errc::invalid_argument, "--target cannot be used with --input-target or --output-target"); - bool UseRegex = InputArgs.hasArg(OBJCOPY_regex); + if (InputArgs.hasArg(OBJCOPY_regex) && InputArgs.hasArg(OBJCOPY_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + + MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : InputArgs.hasArg(OBJCOPY_wildcard) + ? MatchStyle::Wildcard + : MatchStyle::Literal; StringRef InputFormat, OutputFormat; if (InputArgs.hasArg(OBJCOPY_target)) { InputFormat = InputArgs.getLastArgValue(OBJCOPY_target); @@ -476,28 +445,26 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { .Case("binary", FileFormat::Binary) .Case("ihex", FileFormat::IHex) .Default(FileFormat::Unspecified); - if (Config.InputFormat == FileFormat::Binary) { - auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture); - if (BinaryArch.empty()) - return createStringError( - errc::invalid_argument, - "specified binary input without specifiying an architecture"); - Expected MI = getMachineInfo(BinaryArch); - if (!MI) - return MI.takeError(); - Config.BinaryArch = *MI; - } + + if (InputArgs.hasArg(OBJCOPY_new_symbol_visibility)) + Config.NewSymbolVisibility = + InputArgs.getLastArgValue(OBJCOPY_new_symbol_visibility); Config.OutputFormat = StringSwitch(OutputFormat) .Case("binary", FileFormat::Binary) .Case("ihex", FileFormat::IHex) .Default(FileFormat::Unspecified); - if (Config.OutputFormat == FileFormat::Unspecified && !OutputFormat.empty()) { - Expected Target = getOutputTargetInfoByTargetName(OutputFormat); - if (!Target) - return Target.takeError(); - Config.OutputFormat = Target->Format; - Config.OutputArch = Target->Machine; + if (Config.OutputFormat == FileFormat::Unspecified) { + if (OutputFormat.empty()) { + Config.OutputFormat = Config.InputFormat; + } else { + Expected Target = + getOutputTargetInfoByTargetName(OutputFormat); + if (!Target) + return Target.takeError(); + Config.OutputFormat = Target->Format; + Config.OutputArch = Target->Machine; + } } if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections, @@ -535,12 +502,8 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { if (!DebugOrErr) return createFileError(Config.AddGnuDebugLink, DebugOrErr.getError()); auto Debug = std::move(*DebugOrErr); - JamCRC CRC; - CRC.update( - ArrayRef(Debug->getBuffer().data(), Debug->getBuffer().size())); - // The CRC32 value needs to be complemented because the JamCRC doesn't - // finalize the CRC32 value. - Config.GnuDebugLinkCRC32 = ~CRC.getCRC(); + Config.GnuDebugLinkCRC32 = + llvm::crc32(arrayRefFromStringRef(Debug->getBuffer())); } Config.BuildIdLinkDir = InputArgs.getLastArgValue(OBJCOPY_build_id_link_dir); if (InputArgs.hasArg(OBJCOPY_build_id_link_input)) @@ -582,6 +545,13 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { "multiple renames of section '%s'", SR->OriginalName.str().c_str()); } + for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_alignment)) { + Expected> NameAndAlign = + parseSetSectionAlignment(Arg->getValue()); + if (!NameAndAlign) + return NameAndAlign.takeError(); + Config.SetSectionAlignment[NameAndAlign->first] = NameAndAlign->second; + } for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_flags)) { Expected SFU = parseSetSectionFlagValue(Arg->getValue()); @@ -612,13 +582,28 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { } for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section)) - Config.ToRemove.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section)) - Config.KeepSection.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_only_section)) - Config.OnlySection.emplace_back(Arg->getValue(), UseRegex); - for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) - Config.AddSection.push_back(Arg->getValue()); + if (Error E = Config.OnlySection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) { + StringRef ArgValue(Arg->getValue()); + if (!ArgValue.contains('=')) + return createStringError(errc::invalid_argument, + "bad format for --add-section: missing '='"); + if (ArgValue.split("=").second.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --add-section: missing file name"); + Config.AddSection.push_back(ArgValue); + } for (auto Arg : InputArgs.filtered(OBJCOPY_dump_section)) Config.DumpSection.push_back(Arg->getValue()); Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all); @@ -645,53 +630,71 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { if (Config.DiscardMode == DiscardType::All) Config.StripDebug = true; for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) - Config.SymbolsToLocalize.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol)) - Config.SymbolsToKeepGlobal.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToKeepGlobal.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol)) - Config.SymbolsToGlobalize.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToGlobalize.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol)) - Config.SymbolsToWeaken.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToWeaken.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol)) - Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol)) - Config.UnneededSymbolsToRemove.emplace_back(Arg->getValue(), UseRegex); + if (Error E = + Config.UnneededSymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols)) if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol)) - Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols)) - if (Error E = addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, - Arg->getValue(), UseRegex)) + if (Error E = + addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, Arg->getValue(), + SymbolMatchStyle, ErrorCallback)) return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) { - Expected NSI = parseNewSymbolInfo(Arg->getValue()); - if (!NSI) - return NSI.takeError(); - Config.SymbolsToAdd.push_back(*NSI); - } + for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) + Config.SymbolsToAdd.push_back(Arg->getValue()); Config.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links); @@ -754,19 +757,19 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { // exit. Expected parseStripOptions(ArrayRef ArgsArr, - std::function ErrorCallback) { + llvm::function_ref ErrorCallback) { StripOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; llvm::opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0) { - T.PrintHelp(errs(), "llvm-strip [options] file...", "strip tool"); + printHelp(T, errs(), "llvm-strip"); exit(1); } if (InputArgs.hasArg(STRIP_help)) { - T.PrintHelp(outs(), "llvm-strip [options] file...", "strip tool"); + printHelp(T, outs(), "llvm-strip"); exit(0); } @@ -792,7 +795,17 @@ parseStripOptions(ArrayRef ArgsArr, "multiple input files cannot be used in combination with -o"); CopyConfig Config; - bool UseRegexp = InputArgs.hasArg(STRIP_regex); + + if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + MatchStyle SectionMatchStyle = + InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle = InputArgs.hasArg(STRIP_regex) + ? MatchStyle::Regex + : InputArgs.hasArg(STRIP_wildcard) + ? MatchStyle::Wildcard + : MatchStyle::Literal; Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links); Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug); @@ -801,6 +814,7 @@ parseStripOptions(ArrayRef ArgsArr, InputArgs.hasFlag(STRIP_discard_all, STRIP_discard_locals) ? DiscardType::All : DiscardType::Locals; + Config.StripSections = InputArgs.hasArg(STRIP_strip_sections); Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded); if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all)) Config.StripAll = Arg->getOption().getID() == STRIP_strip_all; @@ -809,16 +823,24 @@ parseStripOptions(ArrayRef ArgsArr, Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); for (auto Arg : InputArgs.filtered(STRIP_keep_section)) - Config.KeepSection.emplace_back(Arg->getValue(), UseRegexp); + if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_remove_section)) - Config.ToRemove.emplace_back(Arg->getValue(), UseRegexp); + if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_strip_symbol)) - Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegexp); + if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_keep_symbol)) - Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegexp); + if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug && !Config.StripUnneeded && Config.DiscardMode == DiscardType::None && diff --git a/tools/llvm-objcopy/CopyConfig.h b/tools/llvm-objcopy/CopyConfig.h index aff3631a487c..55a55d3a2bc2 100644 --- a/tools/llvm-objcopy/CopyConfig.h +++ b/tools/llvm-objcopy/CopyConfig.h @@ -9,6 +9,7 @@ #ifndef LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H #define LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H +#include "ELF/ELFConfig.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/Optional.h" @@ -18,6 +19,7 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/Regex.h" // Necessary for llvm::DebugCompressionType::None #include "llvm/Target/TargetOptions.h" @@ -87,36 +89,71 @@ enum class DiscardType { Locals, // --discard-locals (-X) }; -class NameOrRegex { +enum class MatchStyle { + Literal, // Default for symbols. + Wildcard, // Default for sections, or enabled with --wildcard (-w). + Regex, // Enabled with --regex. +}; + +class NameOrPattern { StringRef Name; // Regex is shared between multiple CopyConfig instances. std::shared_ptr R; + std::shared_ptr G; + bool IsPositiveMatch = true; + + NameOrPattern(StringRef N) : Name(N) {} + NameOrPattern(std::shared_ptr R) : R(R) {} + NameOrPattern(std::shared_ptr G, bool IsPositiveMatch) + : G(G), IsPositiveMatch(IsPositiveMatch) {} public: - NameOrRegex(StringRef Pattern, bool IsRegex); - bool operator==(StringRef S) const { return R ? R->match(S) : Name == S; } + // ErrorCallback is used to handle recoverable errors. An Error returned + // by the callback aborts the parsing and is then returned by this function. + static Expected + create(StringRef Pattern, MatchStyle MS, + llvm::function_ref ErrorCallback); + + bool isPositiveMatch() const { return IsPositiveMatch; } + bool operator==(StringRef S) const { + return R ? R->match(S) : G ? G->match(S) : Name == S; + } bool operator!=(StringRef S) const { return !operator==(S); } }; -struct NewSymbolInfo { - StringRef SymbolName; - StringRef SectionName; - uint64_t Value = 0; - uint8_t Type = ELF::STT_NOTYPE; - uint8_t Bind = ELF::STB_GLOBAL; - uint8_t Visibility = ELF::STV_DEFAULT; +// Matcher that checks symbol or section names against the command line flags +// provided for that option. +class NameMatcher { + std::vector PosMatchers; + std::vector NegMatchers; + +public: + Error addMatcher(Expected Matcher) { + if (!Matcher) + return Matcher.takeError(); + if (Matcher->isPositiveMatch()) + PosMatchers.push_back(std::move(*Matcher)); + else + NegMatchers.push_back(std::move(*Matcher)); + return Error::success(); + } + bool matches(StringRef S) const { + return is_contained(PosMatchers, S) && !is_contained(NegMatchers, S); + } + bool empty() const { return PosMatchers.empty() && NegMatchers.empty(); } }; // Configuration for copying/stripping a single file. struct CopyConfig { + // Format-specific options to be initialized lazily when needed. + Optional ELF; + // Main input/output options StringRef InputFilename; FileFormat InputFormat; StringRef OutputFilename; FileFormat OutputFormat; - // Only applicable for --input-format=binary - MachineInfo BinaryArch; // Only applicable when --output-format!=binary (e.g. elf64-x86-64). Optional OutputArch; @@ -132,24 +169,30 @@ struct CopyConfig { StringRef SymbolsPrefix; StringRef AllocSectionsPrefix; DiscardType DiscardMode = DiscardType::None; + Optional NewSymbolVisibility; // Repeated options std::vector AddSection; std::vector DumpSection; - std::vector SymbolsToAdd; - std::vector KeepSection; - std::vector OnlySection; - std::vector SymbolsToGlobalize; - std::vector SymbolsToKeep; - std::vector SymbolsToLocalize; - std::vector SymbolsToRemove; - std::vector UnneededSymbolsToRemove; - std::vector SymbolsToWeaken; - std::vector ToRemove; - std::vector SymbolsToKeepGlobal; + std::vector SymbolsToAdd; + + // Section matchers + NameMatcher KeepSection; + NameMatcher OnlySection; + NameMatcher ToRemove; + + // Symbol matchers + NameMatcher SymbolsToGlobalize; + NameMatcher SymbolsToKeep; + NameMatcher SymbolsToLocalize; + NameMatcher SymbolsToRemove; + NameMatcher UnneededSymbolsToRemove; + NameMatcher SymbolsToWeaken; + NameMatcher SymbolsToKeepGlobal; // Map options StringMap SectionsToRename; + StringMap SetSectionAlignment; StringMap SetSectionFlags; StringMap SymbolsToRename; @@ -178,6 +221,18 @@ struct CopyConfig { bool Weaken = false; bool DecompressDebugSections = false; DebugCompressionType CompressionType = DebugCompressionType::None; + + // parseELFConfig performs ELF-specific command-line parsing. Fills `ELF` on + // success or returns an Error otherwise. + Error parseELFConfig() { + if (!ELF) { + Expected ELFConfig = elf::parseConfig(*this); + if (!ELFConfig) + return ELFConfig.takeError(); + ELF = *ELFConfig; + } + return Error::success(); + } }; // Configuration for the overall invocation of this tool. When invoked as @@ -190,8 +245,11 @@ struct DriverConfig { // ParseObjcopyOptions returns the config and sets the input arguments. If a // help flag is set then ParseObjcopyOptions will print the help messege and -// exit. -Expected parseObjcopyOptions(ArrayRef ArgsArr); +// exit. ErrorCallback is used to handle recoverable errors. An Error returned +// by the callback aborts the parsing and is then returned by this function. +Expected +parseObjcopyOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback); // ParseStripOptions returns the config and sets the input arguments. If a // help flag is set then ParseStripOptions will print the help messege and @@ -199,7 +257,7 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr); // by the callback aborts the parsing and is then returned by this function. Expected parseStripOptions(ArrayRef ArgsArr, - std::function ErrorCallback); + llvm::function_ref ErrorCallback); } // namespace objcopy } // namespace llvm diff --git a/tools/llvm-objcopy/ELF/ELFConfig.cpp b/tools/llvm-objcopy/ELF/ELFConfig.cpp new file mode 100644 index 000000000000..40993760add7 --- /dev/null +++ b/tools/llvm-objcopy/ELF/ELFConfig.cpp @@ -0,0 +1,133 @@ +//===- ELFConfig.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CopyConfig.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { +namespace elf { + +static Expected parseNewSymbolInfo(StringRef FlagValue, + uint8_t DefaultVisibility) { + // Parse value given with --add-symbol option and create the + // new symbol if possible. The value format for --add-symbol is: + // + // =[
:][,] + // + // where: + // - symbol name, can be empty string + //
- optional section name. If not given ABS symbol is created + // - symbol value, can be decimal or hexadecimal number prefixed + // with 0x. + // - optional flags affecting symbol type, binding or visibility: + // The following are currently supported: + // + // global, local, weak, default, hidden, file, section, object, + // indirect-function. + // + // The following flags are ignored and provided for GNU + // compatibility only: + // + // warning, debug, constructor, indirect, synthetic, + // unique-object, before=. + NewSymbolInfo SI; + StringRef Value; + std::tie(SI.SymbolName, Value) = FlagValue.split('='); + if (Value.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --add-symbol, missing '=' after '%s'", + SI.SymbolName.str().c_str()); + + if (Value.contains(':')) { + std::tie(SI.SectionName, Value) = Value.split(':'); + if (SI.SectionName.empty() || Value.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --add-symbol, missing section name or symbol value"); + } + + SmallVector Flags; + Value.split(Flags, ','); + if (Flags[0].getAsInteger(0, SI.Value)) + return createStringError(errc::invalid_argument, "bad symbol value: '%s'", + Flags[0].str().c_str()); + + SI.Visibility = DefaultVisibility; + + using Functor = std::function; + SmallVector UnsupportedFlags; + for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I) + static_cast( + StringSwitch(Flags[I]) + .CaseLower("global", [&SI] { SI.Bind = ELF::STB_GLOBAL; }) + .CaseLower("local", [&SI] { SI.Bind = ELF::STB_LOCAL; }) + .CaseLower("weak", [&SI] { SI.Bind = ELF::STB_WEAK; }) + .CaseLower("default", [&SI] { SI.Visibility = ELF::STV_DEFAULT; }) + .CaseLower("hidden", [&SI] { SI.Visibility = ELF::STV_HIDDEN; }) + .CaseLower("protected", + [&SI] { SI.Visibility = ELF::STV_PROTECTED; }) + .CaseLower("file", [&SI] { SI.Type = ELF::STT_FILE; }) + .CaseLower("section", [&SI] { SI.Type = ELF::STT_SECTION; }) + .CaseLower("object", [&SI] { SI.Type = ELF::STT_OBJECT; }) + .CaseLower("function", [&SI] { SI.Type = ELF::STT_FUNC; }) + .CaseLower("indirect-function", + [&SI] { SI.Type = ELF::STT_GNU_IFUNC; }) + .CaseLower("debug", [] {}) + .CaseLower("constructor", [] {}) + .CaseLower("warning", [] {}) + .CaseLower("indirect", [] {}) + .CaseLower("synthetic", [] {}) + .CaseLower("unique-object", [] {}) + .StartsWithLower("before", [] {}) + .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))(); + if (!UnsupportedFlags.empty()) + return createStringError(errc::invalid_argument, + "unsupported flag%s for --add-symbol: '%s'", + UnsupportedFlags.size() > 1 ? "s" : "", + join(UnsupportedFlags, "', '").c_str()); + return SI; +} + +Expected parseConfig(const CopyConfig &Config) { + ELFCopyConfig ELFConfig; + if (Config.NewSymbolVisibility) { + const uint8_t Invalid = 0xff; + ELFConfig.NewSymbolVisibility = + StringSwitch(*Config.NewSymbolVisibility) + .Case("default", ELF::STV_DEFAULT) + .Case("hidden", ELF::STV_HIDDEN) + .Case("internal", ELF::STV_INTERNAL) + .Case("protected", ELF::STV_PROTECTED) + .Default(Invalid); + + if (ELFConfig.NewSymbolVisibility == Invalid) + return createStringError(errc::invalid_argument, + "'%s' is not a valid symbol visibility", + Config.NewSymbolVisibility->str().c_str()); + } + + for (StringRef Arg : Config.SymbolsToAdd) { + Expected NSI = parseNewSymbolInfo( + Arg, + ELFConfig.NewSymbolVisibility.getValueOr((uint8_t)ELF::STV_DEFAULT)); + if (!NSI) + return NSI.takeError(); + ELFConfig.SymbolsToAdd.push_back(*NSI); + } + + return ELFConfig; +} + +} // end namespace elf +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/ELF/ELFConfig.h b/tools/llvm-objcopy/ELF/ELFConfig.h new file mode 100644 index 000000000000..977efbc4166f --- /dev/null +++ b/tools/llvm-objcopy/ELF/ELFConfig.h @@ -0,0 +1,44 @@ +//===- ELFConfig.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_OBJCOPY_ELFCONFIG_H +#define LLVM_TOOLS_OBJCOPY_ELFCONFIG_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace objcopy { +struct CopyConfig; + +namespace elf { + +struct NewSymbolInfo { + StringRef SymbolName; + StringRef SectionName; + uint64_t Value = 0; + uint8_t Type = ELF::STT_NOTYPE; + uint8_t Bind = ELF::STB_GLOBAL; + uint8_t Visibility = ELF::STV_DEFAULT; +}; + +struct ELFCopyConfig { + Optional NewSymbolVisibility; + std::vector SymbolsToAdd; +}; + +Expected parseConfig(const CopyConfig &Config); + +} // namespace elf +} // namespace objcopy +} // namespace llvm + +#endif diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index b366c6e55987..8bf7e0f88010 100644 --- a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -136,16 +136,16 @@ static std::unique_ptr createELFWriter(const CopyConfig &Config, // Depending on the initial ELFT and OutputFormat we need a different Writer. switch (OutputElfType) { case ELFT_ELF32LE: - return llvm::make_unique>(Obj, Buf, + return std::make_unique>(Obj, Buf, !Config.StripSections); case ELFT_ELF64LE: - return llvm::make_unique>(Obj, Buf, + return std::make_unique>(Obj, Buf, !Config.StripSections); case ELFT_ELF32BE: - return llvm::make_unique>(Obj, Buf, + return std::make_unique>(Obj, Buf, !Config.StripSections); case ELFT_ELF64BE: - return llvm::make_unique>(Obj, Buf, + return std::make_unique>(Obj, Buf, !Config.StripSections); } llvm_unreachable("Invalid output format"); @@ -156,9 +156,9 @@ static std::unique_ptr createWriter(const CopyConfig &Config, ElfType OutputElfType) { switch (Config.OutputFormat) { case FileFormat::Binary: - return llvm::make_unique(Obj, Buf); + return std::make_unique(Obj, Buf); case FileFormat::IHex: - return llvm::make_unique(Obj, Buf); + return std::make_unique(Obj, Buf); default: return createELFWriter(Config, Obj, Buf, OutputElfType); } @@ -263,7 +263,7 @@ static Error linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink, static Error splitDWOToFile(const CopyConfig &Config, const Reader &Reader, StringRef File, ElfType OutputElfType) { - auto DWOFile = Reader.create(); + auto DWOFile = Reader.create(false); auto OnlyKeepDWOPred = [&DWOFile](const SectionBase &Sec) { return onlyKeepDWOPred(*DWOFile, Sec); }; @@ -305,9 +305,9 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename, SecName.str().c_str()); } -static bool isCompressable(const SectionBase &Section) { - return !(Section.Flags & ELF::SHF_COMPRESSED) && - StringRef(Section.Name).startswith(".debug"); +static bool isCompressable(const SectionBase &Sec) { + return !(Sec.Flags & ELF::SHF_COMPRESSED) && + StringRef(Sec.Name).startswith(".debug"); } static void replaceDebugSections( @@ -356,7 +356,7 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && ((Config.LocalizeHidden && (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || - is_contained(Config.SymbolsToLocalize, Sym.Name))) + Config.SymbolsToLocalize.matches(Sym.Name))) Sym.Binding = STB_LOCAL; // Note: these two globalize flags have very similar names but different @@ -370,16 +370,15 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { // --keep-global-symbol. Because of that, make sure to check // --globalize-symbol second. if (!Config.SymbolsToKeepGlobal.empty() && - !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) && + !Config.SymbolsToKeepGlobal.matches(Sym.Name) && Sym.getShndx() != SHN_UNDEF) Sym.Binding = STB_LOCAL; - if (is_contained(Config.SymbolsToGlobalize, Sym.Name) && + if (Config.SymbolsToGlobalize.matches(Sym.Name) && Sym.getShndx() != SHN_UNDEF) Sym.Binding = STB_GLOBAL; - if (is_contained(Config.SymbolsToWeaken, Sym.Name) && - Sym.Binding == STB_GLOBAL) + if (Config.SymbolsToWeaken.matches(Sym.Name) && Sym.Binding == STB_GLOBAL) Sym.Binding = STB_WEAK; if (Config.Weaken && Sym.Binding == STB_GLOBAL && @@ -399,12 +398,12 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { // symbols are still 'needed' and which are not. if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() || !Config.OnlySection.empty()) { - for (auto &Section : Obj.sections()) - Section.markSymbols(); + for (SectionBase &Sec : Obj.sections()) + Sec.markSymbols(); } auto RemoveSymbolsPred = [&](const Symbol &Sym) { - if (is_contained(Config.SymbolsToKeep, Sym.Name) || + if (Config.SymbolsToKeep.matches(Sym.Name) || (Config.KeepFileSymbols && Sym.Type == STT_FILE)) return false; @@ -418,12 +417,12 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { if (Config.StripAll || Config.StripAllGNU) return true; - if (is_contained(Config.SymbolsToRemove, Sym.Name)) + if (Config.SymbolsToRemove.matches(Sym.Name)) return true; if ((Config.StripUnneeded || - is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) && - isUnneededSymbol(Sym)) + Config.UnneededSymbolsToRemove.matches(Sym.Name)) && + (!Obj.isRelocatable() || isUnneededSymbol(Sym))) return true; // We want to remove undefined symbols if all references have been stripped. @@ -443,7 +442,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { // Removes: if (!Config.ToRemove.empty()) { RemovePred = [&Config](const SectionBase &Sec) { - return is_contained(Config.ToRemove, Sec.Name); + return Config.ToRemove.matches(Sec.Name); }; } @@ -481,7 +480,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { }; } - if (Config.StripDebug) { + if (Config.StripDebug || Config.StripUnneeded) { RemovePred = [RemovePred](const SectionBase &Sec) { return RemovePred(Sec) || isDebugSection(Sec); }; @@ -523,7 +522,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { if (!Config.OnlySection.empty()) { RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) { // Explicitly keep these sections regardless of previous removes. - if (is_contained(Config.OnlySection, Sec.Name)) + if (Config.OnlySection.matches(Sec.Name)) return false; // Allow all implicit removes. @@ -545,7 +544,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { if (!Config.KeepSection.empty()) { RemovePred = [&Config, RemovePred](const SectionBase &Sec) { // Explicitly keep these sections regardless of previous removes. - if (is_contained(Config.KeepSection, Sec.Name)) + if (Config.KeepSection.matches(Sec.Name)) return false; // Otherwise defer to RemovePred. return RemovePred(Sec); @@ -614,9 +613,8 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, if (Error E = updateAndRemoveSymbols(Config, Obj)) return E; - if (!Config.SectionsToRename.empty() || !Config.AllocSectionsPrefix.empty()) { - DenseSet PrefixedSections; - for (auto &Sec : Obj.sections()) { + if (!Config.SectionsToRename.empty()) { + for (SectionBase &Sec : Obj.sections()) { const auto Iter = Config.SectionsToRename.find(Sec.Name); if (Iter != Config.SectionsToRename.end()) { const SectionRename &SR = Iter->second; @@ -624,63 +622,62 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, if (SR.NewFlags.hasValue()) setSectionFlagsAndType(Sec, SR.NewFlags.getValue()); } + } + } - // Add a prefix to allocated sections and their relocation sections. This - // should be done after renaming the section by Config.SectionToRename to - // imitate the GNU objcopy behavior. - if (!Config.AllocSectionsPrefix.empty()) { - if (Sec.Flags & SHF_ALLOC) { - Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str(); - PrefixedSections.insert(&Sec); - - // Rename relocation sections associated to the allocated sections. - // For example, if we rename .text to .prefix.text, we also rename - // .rel.text to .rel.prefix.text. - // - // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled - // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not - // .rela.prefix.plt since GNU objcopy does so. - } else if (auto *RelocSec = dyn_cast(&Sec)) { - auto *TargetSec = RelocSec->getSection(); - if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) { - StringRef prefix; - switch (Sec.Type) { - case SHT_REL: - prefix = ".rel"; - break; - case SHT_RELA: - prefix = ".rela"; - break; - default: - continue; - } - - // If the relocation section comes *after* the target section, we - // don't add Config.AllocSectionsPrefix because we've already added - // the prefix to TargetSec->Name. Otherwise, if the relocation - // section comes *before* the target section, we add the prefix. - if (PrefixedSections.count(TargetSec)) { - Sec.Name = (prefix + TargetSec->Name).str(); - } else { - const auto Iter = Config.SectionsToRename.find(TargetSec->Name); - if (Iter != Config.SectionsToRename.end()) { - // Both `--rename-section` and `--prefix-alloc-sections` are - // given but the target section is not yet renamed. - Sec.Name = - (prefix + Config.AllocSectionsPrefix + Iter->second.NewName) - .str(); - } else { - Sec.Name = - (prefix + Config.AllocSectionsPrefix + TargetSec->Name) - .str(); - } - } + // Add a prefix to allocated sections and their relocation sections. This + // should be done after renaming the section by Config.SectionToRename to + // imitate the GNU objcopy behavior. + if (!Config.AllocSectionsPrefix.empty()) { + DenseSet PrefixedSections; + for (SectionBase &Sec : Obj.sections()) { + if (Sec.Flags & SHF_ALLOC) { + Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str(); + PrefixedSections.insert(&Sec); + } else if (auto *RelocSec = dyn_cast(&Sec)) { + // Rename relocation sections associated to the allocated sections. + // For example, if we rename .text to .prefix.text, we also rename + // .rel.text to .rel.prefix.text. + // + // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled + // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not + // .rela.prefix.plt since GNU objcopy does so. + const SectionBase *TargetSec = RelocSec->getSection(); + if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) { + StringRef prefix; + switch (Sec.Type) { + case SHT_REL: + prefix = ".rel"; + break; + case SHT_RELA: + prefix = ".rela"; + break; + default: + llvm_unreachable("not a relocation section"); } + + // If the relocation section comes *after* the target section, we + // don't add Config.AllocSectionsPrefix because we've already added + // the prefix to TargetSec->Name. Otherwise, if the relocation + // section comes *before* the target section, we add the prefix. + if (PrefixedSections.count(TargetSec)) + Sec.Name = (prefix + TargetSec->Name).str(); + else + Sec.Name = + (prefix + Config.AllocSectionsPrefix + TargetSec->Name).str(); } } } } + if (!Config.SetSectionAlignment.empty()) { + for (SectionBase &Sec : Obj.sections()) { + auto I = Config.SetSectionAlignment.find(Sec.Name); + if (I != Config.SetSectionAlignment.end()) + Sec.Align = I->second; + } + } + if (!Config.SetSectionFlags.empty()) { for (auto &Sec : Obj.sections()) { const auto Iter = Config.SetSectionFlags.find(Sec.Name); @@ -721,7 +718,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, Obj.addSection(Config.AddGnuDebugLink, Config.GnuDebugLinkCRC32); - for (const NewSymbolInfo &SI : Config.SymbolsToAdd) { + for (const NewSymbolInfo &SI : Config.ELF->SymbolsToAdd) { SectionBase *Sec = Obj.findSection(SI.SectionName); uint64_t Value = Sec ? Sec->Addr + SI.Value : SI.Value; Obj.SymbolTable->addSymbol( @@ -746,9 +743,9 @@ static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out, Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In, Buffer &Out) { IHexReader Reader(&In); - std::unique_ptr Obj = Reader.create(); + std::unique_ptr Obj = Reader.create(true); const ElfType OutputElfType = - getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch)); + getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType)) return E; return writeOutput(Config, *Obj, Out, OutputElfType); @@ -756,13 +753,15 @@ Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In, Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, Buffer &Out) { - BinaryReader Reader(Config.BinaryArch, &In); - std::unique_ptr Obj = Reader.create(); + uint8_t NewSymbolVisibility = + Config.ELF->NewSymbolVisibility.getValueOr((uint8_t)ELF::STV_DEFAULT); + BinaryReader Reader(&In, NewSymbolVisibility); + std::unique_ptr Obj = Reader.create(true); // Prefer OutputArch (-O) if set, otherwise fallback to BinaryArch // (-B). const ElfType OutputElfType = - getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch)); + getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType)) return E; return writeOutput(Config, *Obj, Out, OutputElfType); @@ -771,7 +770,7 @@ Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, Error executeObjcopyOnBinary(const CopyConfig &Config, object::ELFObjectFileBase &In, Buffer &Out) { ELFReader Reader(&In, Config.ExtractPartition); - std::unique_ptr Obj = Reader.create(); + std::unique_ptr Obj = Reader.create(!Config.SymbolsToAdd.empty()); // Prefer OutputArch (-O) if set, otherwise infer it from the input. const ElfType OutputElfType = Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue()) diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp index fa696380e17c..74145dad6e6b 100644 --- a/tools/llvm-objcopy/ELF/Object.cpp +++ b/tools/llvm-objcopy/ELF/Object.cpp @@ -397,7 +397,7 @@ void SectionWriter::visit(const OwnedDataSection &Sec) { llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset); } -static const std::vector ZlibGnuMagic = {'Z', 'L', 'I', 'B'}; +static constexpr std::array ZlibGnuMagic = {{'Z', 'L', 'I', 'B'}}; static bool isDataGnuCompressed(ArrayRef Data) { return Data.size() > ZlibGnuMagic.size() && @@ -665,7 +665,7 @@ void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type, Sym.Visibility = Visibility; Sym.Size = SymbolSize; Sym.Index = Symbols.size(); - Symbols.emplace_back(llvm::make_unique(Sym)); + Symbols.emplace_back(std::make_unique(Sym)); Size += this->EntrySize; } @@ -1055,29 +1055,28 @@ void GroupSection::accept(MutableSectionVisitor &Visitor) { } // Returns true IFF a section is wholly inside the range of a segment -static bool sectionWithinSegment(const SectionBase &Section, - const Segment &Segment) { +static bool sectionWithinSegment(const SectionBase &Sec, const Segment &Seg) { // If a section is empty it should be treated like it has a size of 1. This is // to clarify the case when an empty section lies on a boundary between two // segments and ensures that the section "belongs" to the second segment and // not the first. - uint64_t SecSize = Section.Size ? Section.Size : 1; + uint64_t SecSize = Sec.Size ? Sec.Size : 1; - if (Section.Type == SHT_NOBITS) { - if (!(Section.Flags & SHF_ALLOC)) + if (Sec.Type == SHT_NOBITS) { + if (!(Sec.Flags & SHF_ALLOC)) return false; - bool SectionIsTLS = Section.Flags & SHF_TLS; - bool SegmentIsTLS = Segment.Type == PT_TLS; + bool SectionIsTLS = Sec.Flags & SHF_TLS; + bool SegmentIsTLS = Seg.Type == PT_TLS; if (SectionIsTLS != SegmentIsTLS) return false; - return Segment.VAddr <= Section.Addr && - Segment.VAddr + Segment.MemSize >= Section.Addr + SecSize; + return Seg.VAddr <= Sec.Addr && + Seg.VAddr + Seg.MemSize >= Sec.Addr + SecSize; } - return Segment.Offset <= Section.OriginalOffset && - Segment.Offset + Segment.FileSize >= Section.OriginalOffset + SecSize; + return Seg.Offset <= Sec.OriginalOffset && + Seg.Offset + Seg.FileSize >= Sec.OriginalOffset + SecSize; } // Returns true IFF a segment's original offset is inside of another segment's @@ -1113,7 +1112,7 @@ void BasicELFBuilder::initFileHeader() { Obj->OSABI = ELFOSABI_NONE; Obj->ABIVersion = 0; Obj->Entry = 0x0; - Obj->Machine = EMachine; + Obj->Machine = EM_NONE; Obj->Version = 1; } @@ -1141,8 +1140,8 @@ SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) { } void BasicELFBuilder::initSections() { - for (auto &Section : Obj->sections()) - Section.initialize(Obj->sections()); + for (SectionBase &Sec : Obj->sections()) + Sec.initialize(Obj->sections()); } void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { @@ -1161,11 +1160,12 @@ void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { Twine Prefix = Twine("_binary_") + SanitizedFilename; SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection, - /*Value=*/0, STV_DEFAULT, 0, 0); + /*Value=*/0, NewSymbolVisibility, 0, 0); SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection, - /*Value=*/DataSection.Size, STV_DEFAULT, 0, 0); + /*Value=*/DataSection.Size, NewSymbolVisibility, 0, 0); SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr, - /*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0); + /*Value=*/DataSection.Size, NewSymbolVisibility, SHN_ABS, + 0); } std::unique_ptr BinaryELFBuilder::build() { @@ -1255,10 +1255,9 @@ template void ELFBuilder::findEhdrOffset() { if (!ExtractPartition) return; - for (const SectionBase &Section : Obj.sections()) { - if (Section.Type == SHT_LLVM_PART_EHDR && - Section.Name == *ExtractPartition) { - EhdrOffset = Section.Offset; + for (const SectionBase &Sec : Obj.sections()) { + if (Sec.Type == SHT_LLVM_PART_EHDR && Sec.Name == *ExtractPartition) { + EhdrOffset = Sec.Offset; return; } } @@ -1287,15 +1286,12 @@ void ELFBuilder::readProgramHeaders(const ELFFile &HeadersFile) { Seg.MemSize = Phdr.p_memsz; Seg.Align = Phdr.p_align; Seg.Index = Index++; - for (SectionBase &Section : Obj.sections()) { - if (sectionWithinSegment(Section, Seg)) { - Seg.addSection(&Section); - if (!Section.ParentSegment || - Section.ParentSegment->Offset > Seg.Offset) { - Section.ParentSegment = &Seg; - } + for (SectionBase &Sec : Obj.sections()) + if (sectionWithinSegment(Sec, Seg)) { + Seg.addSection(&Sec); + if (!Sec.ParentSegment || Sec.ParentSegment->Offset > Seg.Offset) + Sec.ParentSegment = &Seg; } - } } auto &ElfHdr = Obj.ElfHdrSegment; @@ -1531,7 +1527,7 @@ template void ELFBuilder::readSectionHeaders() { } } -template void ELFBuilder::readSections() { +template void ELFBuilder::readSections(bool EnsureSymtab) { // If a section index table exists we'll need to initialize it before we // initialize the symbol table because the symbol table might need to // reference it. @@ -1544,16 +1540,37 @@ template void ELFBuilder::readSections() { if (Obj.SymbolTable) { Obj.SymbolTable->initialize(Obj.sections()); initSymbolTable(Obj.SymbolTable); + } else if (EnsureSymtab) { + // Reuse the existing SHT_STRTAB section if exists. + StringTableSection *StrTab = nullptr; + for (auto &Sec : Obj.sections()) { + if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) { + StrTab = static_cast(&Sec); + + // Prefer .strtab to .shstrtab. + if (Obj.SectionNames != &Sec) + break; + } + } + if (!StrTab) + StrTab = &Obj.addSection(); + + SymbolTableSection &SymTab = Obj.addSection(); + SymTab.Name = ".symtab"; + SymTab.Link = StrTab->Index; + SymTab.initialize(Obj.sections()); + SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); + Obj.SymbolTable = &SymTab; } // Now that all sections and symbols have been added we can add // relocations that reference symbols and set the link and info fields for // relocation sections. - for (auto &Section : Obj.sections()) { - if (&Section == Obj.SymbolTable) + for (auto &Sec : Obj.sections()) { + if (&Sec == Obj.SymbolTable) continue; - Section.initialize(Obj.sections()); - if (auto RelSec = dyn_cast(&Section)) { + Sec.initialize(Obj.sections()); + if (auto RelSec = dyn_cast(&Sec)) { auto Shdr = unwrapOrError(ElfFile.sections()).begin() + RelSec->Index; if (RelSec->Type == SHT_REL) initRelocations(RelSec, Obj.SymbolTable, @@ -1561,7 +1578,7 @@ template void ELFBuilder::readSections() { else initRelocations(RelSec, Obj.SymbolTable, unwrapOrError(ElfFile.relas(Shdr))); - } else if (auto GroupSec = dyn_cast(&Section)) { + } else if (auto GroupSec = dyn_cast(&Sec)) { initGroupSection(GroupSec); } } @@ -1582,7 +1599,7 @@ template void ELFBuilder::readSections() { " is not a string table"); } -template void ELFBuilder::build() { +template void ELFBuilder::build(bool EnsureSymtab) { readSectionHeaders(); findEhdrOffset(); @@ -1601,7 +1618,7 @@ template void ELFBuilder::build() { Obj.Entry = Ehdr.e_entry; Obj.Flags = Ehdr.e_flags; - readSections(); + readSections(EnsureSymtab); readProgramHeaders(HeadersFile); } @@ -1609,8 +1626,8 @@ Writer::~Writer() {} Reader::~Reader() {} -std::unique_ptr BinaryReader::create() const { - return BinaryELFBuilder(MInfo.EMachine, MemBuf).build(); +std::unique_ptr BinaryReader::create(bool /*EnsureSymtab*/) const { + return BinaryELFBuilder(MemBuf, NewSymbolVisibility).build(); } Expected> IHexReader::parse() const { @@ -1639,28 +1656,28 @@ Expected> IHexReader::parse() const { return std::move(Records); } -std::unique_ptr IHexReader::create() const { +std::unique_ptr IHexReader::create(bool /*EnsureSymtab*/) const { std::vector Records = unwrapOrError(parse()); return IHexELFBuilder(Records).build(); } -std::unique_ptr ELFReader::create() const { - auto Obj = llvm::make_unique(); +std::unique_ptr ELFReader::create(bool EnsureSymtab) const { + auto Obj = std::make_unique(); if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(); + Builder.build(EnsureSymtab); return Obj; } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(); + Builder.build(EnsureSymtab); return Obj; } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(); + Builder.build(EnsureSymtab); return Obj; } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(); + Builder.build(EnsureSymtab); return Obj; } error("invalid file type"); @@ -1693,7 +1710,7 @@ template void ELFWriter::writeEhdr() { Ehdr.e_ehsize = sizeof(Elf_Ehdr); if (WriteSectionHeaders && Obj.sections().size() != 0) { Ehdr.e_shentsize = sizeof(Elf_Shdr); - Ehdr.e_shoff = Obj.SHOffset; + Ehdr.e_shoff = Obj.SHOff; // """ // If the number of sections is greater than or equal to // SHN_LORESERVE (0xff00), this member has the value zero and the actual @@ -1732,7 +1749,7 @@ template void ELFWriter::writeShdrs() { // This reference serves to write the dummy section header at the begining // of the file. It is not used for anything else Elf_Shdr &Shdr = - *reinterpret_cast(Buf.getBufferStart() + Obj.SHOffset); + *reinterpret_cast(Buf.getBufferStart() + Obj.SHOff); Shdr.sh_name = 0; Shdr.sh_type = SHT_NULL; Shdr.sh_flags = 0; @@ -1862,26 +1879,13 @@ void Object::sortSections() { }); } -static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) { - // Calculate Diff such that (Offset + Diff) & -Align == Addr & -Align. - if (Align == 0) - Align = 1; - auto Diff = - static_cast(Addr % Align) - static_cast(Offset % Align); - // We only want to add to Offset, however, so if Diff < 0 we can add Align and - // (Offset + Diff) & -Align == Addr & -Align will still hold. - if (Diff < 0) - Diff += Align; - return Offset + Diff; -} - // Orders segments such that if x = y->ParentSegment then y comes before x. static void orderSegments(std::vector &Segments) { llvm::stable_sort(Segments, compareSegmentsByOffset); } // This function finds a consistent layout for a list of segments starting from -// an Offset. It assumes that Segments have been sorted by OrderSegments and +// an Offset. It assumes that Segments have been sorted by orderSegments and // returns an Offset one past the end of the last segment. static uint64_t layoutSegments(std::vector &Segments, uint64_t Offset) { @@ -1902,8 +1906,8 @@ static uint64_t layoutSegments(std::vector &Segments, Seg->Offset = Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset; } else { - Offset = alignToAddr(Offset, Seg->VAddr, Seg->Align); - Seg->Offset = Offset; + Seg->Offset = + alignTo(Offset, std::max(Seg->Align, 1), Seg->VAddr); } Offset = std::max(Offset, Seg->Offset + Seg->FileSize); } @@ -1925,17 +1929,17 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) { // of the segment we can assign a new offset to the section. For sections not // covered by segments we can just bump Offset to the next valid location. uint32_t Index = 1; - for (auto &Section : Sections) { - Section.Index = Index++; - if (Section.ParentSegment != nullptr) { - auto Segment = *Section.ParentSegment; - Section.Offset = - Segment.Offset + (Section.OriginalOffset - Segment.OriginalOffset); + for (auto &Sec : Sections) { + Sec.Index = Index++; + if (Sec.ParentSegment != nullptr) { + auto Segment = *Sec.ParentSegment; + Sec.Offset = + Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset); } else { - Offset = alignTo(Offset, Section.Align == 0 ? 1 : Section.Align); - Section.Offset = Offset; - if (Section.Type != SHT_NOBITS) - Offset += Section.Size; + Offset = alignTo(Offset, Sec.Align == 0 ? 1 : Sec.Align); + Sec.Offset = Offset; + if (Sec.Type != SHT_NOBITS) + Offset += Sec.Size; } } return Offset; @@ -1971,16 +1975,16 @@ template void ELFWriter::assignOffsets() { // Offset so that SHOffset is valid. if (WriteSectionHeaders) Offset = alignTo(Offset, sizeof(Elf_Addr)); - Obj.SHOffset = Offset; + Obj.SHOff = Offset; } template size_t ELFWriter::totalSize() const { // We already have the section header offset so we can calculate the total // size by just adding up the size of each section header. if (!WriteSectionHeaders) - return Obj.SHOffset; + return Obj.SHOff; size_t ShdrCount = Obj.sections().size() + 1; // Includes null shdr. - return Obj.SHOffset + ShdrCount * sizeof(Elf_Shdr); + return Obj.SHOff + ShdrCount * sizeof(Elf_Shdr); } template Error ELFWriter::write() { @@ -1995,6 +1999,25 @@ template Error ELFWriter::write() { return Buf.commit(); } +static Error removeUnneededSections(Object &Obj) { + // We can remove an empty symbol table from non-relocatable objects. + // Relocatable objects typically have relocation sections whose + // sh_link field points to .symtab, so we can't remove .symtab + // even if it is empty. + if (Obj.isRelocatable() || Obj.SymbolTable == nullptr || + !Obj.SymbolTable->empty()) + return Error::success(); + + // .strtab can be used for section names. In such a case we shouldn't + // remove it. + auto *StrTab = Obj.SymbolTable->getStrTab() == Obj.SectionNames + ? nullptr + : Obj.SymbolTable->getStrTab(); + return Obj.removeSections(false, [&](const SectionBase &Sec) { + return &Sec == Obj.SymbolTable || &Sec == StrTab; + }); +} + template Error ELFWriter::finalize() { // It could happen that SectionNames has been removed and yet the user wants // a section header table output. We need to throw an error if a user tries @@ -2004,6 +2027,8 @@ template Error ELFWriter::finalize() { "cannot write section header table because " "section header string table was removed"); + if (Error E = removeUnneededSections(Obj)) + return E; Obj.sortSections(); // We need to assign indexes before we perform layout because we need to know @@ -2045,9 +2070,8 @@ template Error ELFWriter::finalize() { // Make sure we add the names of all the sections. Importantly this must be // done after we decide to add or remove SectionIndexes. if (Obj.SectionNames != nullptr) - for (const auto &Section : Obj.sections()) { - Obj.SectionNames->addString(Section.Name); - } + for (const SectionBase &Sec : Obj.sections()) + Obj.SectionNames->addString(Sec.Name); initEhdrSegment(); @@ -2055,8 +2079,8 @@ template Error ELFWriter::finalize() { // Also, the output arch may not be the same as the input arch, so fix up // size-related fields before doing layout calculations. uint64_t Index = 0; - auto SecSizer = llvm::make_unique>(); - for (auto &Sec : Obj.sections()) { + auto SecSizer = std::make_unique>(); + for (SectionBase &Sec : Obj.sections()) { Sec.Index = Index++; Sec.accept(*SecSizer); } @@ -2082,40 +2106,36 @@ template Error ELFWriter::finalize() { // Finally now that all offsets and indexes have been set we can finalize any // remaining issues. - uint64_t Offset = Obj.SHOffset + sizeof(Elf_Shdr); - for (SectionBase &Section : Obj.sections()) { - Section.HeaderOffset = Offset; + uint64_t Offset = Obj.SHOff + sizeof(Elf_Shdr); + for (SectionBase &Sec : Obj.sections()) { + Sec.HeaderOffset = Offset; Offset += sizeof(Elf_Shdr); if (WriteSectionHeaders) - Section.NameIndex = Obj.SectionNames->findIndex(Section.Name); - Section.finalize(); + Sec.NameIndex = Obj.SectionNames->findIndex(Sec.Name); + Sec.finalize(); } if (Error E = Buf.allocate(totalSize())) return E; - SecWriter = llvm::make_unique>(Buf); + SecWriter = std::make_unique>(Buf); return Error::success(); } Error BinaryWriter::write() { - for (auto &Section : Obj.sections()) - if (Section.Flags & SHF_ALLOC) - Section.accept(*SecWriter); + for (const SectionBase &Sec : Obj.allocSections()) + Sec.accept(*SecWriter); return Buf.commit(); } Error BinaryWriter::finalize() { - // TODO: Create a filter range to construct OrderedSegments from so that this - // code can be deduped with assignOffsets above. This should also solve the - // todo below for LayoutSections. // We need a temporary list of segments that has a special order to it // so that we know that anytime ->ParentSegment is set that segment has // already had it's offset properly set. We only want to consider the segments // that will affect layout of allocated sections so we only add those. std::vector OrderedSegments; - for (SectionBase &Section : Obj.sections()) - if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr) - OrderedSegments.push_back(Section.ParentSegment); + for (const SectionBase &Sec : Obj.allocSections()) + if (Sec.ParentSegment != nullptr) + OrderedSegments.push_back(Sec.ParentSegment); // For binary output, we're going to use physical addresses instead of // virtual addresses, since a binary output is used for cases like ROM @@ -2130,7 +2150,7 @@ Error BinaryWriter::finalize() { llvm::stable_sort(OrderedSegments, compareSegmentsByPAddr); // Because we add a ParentSegment for each section we might have duplicate - // segments in OrderedSegments. If there were duplicates then LayoutSegments + // segments in OrderedSegments. If there were duplicates then layoutSegments // would do very strange things. auto End = std::unique(std::begin(OrderedSegments), std::end(OrderedSegments)); @@ -2158,28 +2178,20 @@ Error BinaryWriter::finalize() { } } - // TODO: generalize LayoutSections to take a range. Pass a special range - // constructed from an iterator that skips values for which a predicate does - // not hold. Then pass such a range to LayoutSections instead of constructing - // AllocatedSections here. - std::vector AllocatedSections; - for (SectionBase &Section : Obj.sections()) - if (Section.Flags & SHF_ALLOC) - AllocatedSections.push_back(&Section); - layoutSections(make_pointee_range(AllocatedSections), Offset); + layoutSections(Obj.allocSections(), Offset); // Now that every section has been laid out we just need to compute the total // file size. This might not be the same as the offset returned by - // LayoutSections, because we want to truncate the last segment to the end of + // layoutSections, because we want to truncate the last segment to the end of // its last section, to match GNU objcopy's behaviour. TotalSize = 0; - for (SectionBase *Section : AllocatedSections) - if (Section->Type != SHT_NOBITS) - TotalSize = std::max(TotalSize, Section->Offset + Section->Size); + for (const SectionBase &Sec : Obj.allocSections()) + if (Sec.Type != SHT_NOBITS) + TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size); if (Error E = Buf.allocate(TotalSize)) return E; - SecWriter = llvm::make_unique(Buf); + SecWriter = std::make_unique(Buf); return Error::success(); } @@ -2259,17 +2271,17 @@ Error IHexWriter::finalize() { // If any section we're to write has segment then we // switch to using physical addresses. Otherwise we // use section virtual address. - for (auto &Section : Obj.sections()) - if (ShouldWrite(Section) && IsInPtLoad(Section)) { + for (const SectionBase &Sec : Obj.sections()) + if (ShouldWrite(Sec) && IsInPtLoad(Sec)) { UseSegments = true; break; } - for (auto &Section : Obj.sections()) - if (ShouldWrite(Section) && (!UseSegments || IsInPtLoad(Section))) { - if (Error E = checkSection(Section)) + for (const SectionBase &Sec : Obj.sections()) + if (ShouldWrite(Sec) && (!UseSegments || IsInPtLoad(Sec))) { + if (Error E = checkSection(Sec)) return E; - Sections.insert(&Section); + Sections.insert(&Sec); } IHexSectionWriterBase LengthCalc(Buf); diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h index f3df93b9662f..eeacb014e4dc 100644 --- a/tools/llvm-objcopy/ELF/Object.h +++ b/tools/llvm-objcopy/ELF/Object.h @@ -57,8 +57,8 @@ public: : Sections(Secs) {} SectionTableRef(const SectionTableRef &) = default; - iterator begin() { return iterator(Sections.data()); } - iterator end() { return iterator(Sections.data() + Sections.size()); } + iterator begin() const { return iterator(Sections.data()); } + iterator end() const { return iterator(Sections.data() + Sections.size()); } size_t size() const { return Sections.size(); } SectionBase *getSection(uint32_t Index, Twine ErrMsg); @@ -863,7 +863,7 @@ public: class Reader { public: virtual ~Reader(); - virtual std::unique_ptr create() const = 0; + virtual std::unique_ptr create(bool EnsureSymtab) const = 0; }; using object::Binary; @@ -873,7 +873,6 @@ using object::OwningBinary; class BasicELFBuilder { protected: - uint16_t EMachine; std::unique_ptr Obj; void initFileHeader(); @@ -883,17 +882,18 @@ protected: void initSections(); public: - BasicELFBuilder(uint16_t EM) - : EMachine(EM), Obj(llvm::make_unique()) {} + BasicELFBuilder() : Obj(std::make_unique()) {} }; class BinaryELFBuilder : public BasicELFBuilder { MemoryBuffer *MemBuf; + uint8_t NewSymbolVisibility; void addData(SymbolTableSection *SymTab); public: - BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB) - : BasicELFBuilder(EM), MemBuf(MB) {} + BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility) + : BasicELFBuilder(), MemBuf(MB), + NewSymbolVisibility(NewSymbolVisibility) {} std::unique_ptr build(); }; @@ -905,7 +905,7 @@ class IHexELFBuilder : public BasicELFBuilder { public: IHexELFBuilder(const std::vector &Records) - : BasicELFBuilder(ELF::EM_386), Records(Records) {} + : BasicELFBuilder(), Records(Records) {} std::unique_ptr build(); }; @@ -926,7 +926,7 @@ private: void initGroupSection(GroupSection *GroupSec); void initSymbolTable(SymbolTableSection *SymTab); void readSectionHeaders(); - void readSections(); + void readSections(bool EnsureSymtab); void findEhdrOffset(); SectionBase &makeSection(const Elf_Shdr &Shdr); @@ -936,17 +936,17 @@ public: : ElfFile(*ElfObj.getELFFile()), Obj(Obj), ExtractPartition(ExtractPartition) {} - void build(); + void build(bool EnsureSymtab); }; class BinaryReader : public Reader { - const MachineInfo &MInfo; MemoryBuffer *MemBuf; + uint8_t NewSymbolVisibility; public: - BinaryReader(const MachineInfo &MI, MemoryBuffer *MB) - : MInfo(MI), MemBuf(MB) {} - std::unique_ptr create() const override; + BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility) + : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} + std::unique_ptr create(bool EnsureSymtab) const override; }; class IHexReader : public Reader { @@ -968,7 +968,7 @@ class IHexReader : public Reader { public: IHexReader(MemoryBuffer *MB) : MemBuf(MB) {} - std::unique_ptr create() const override; + std::unique_ptr create(bool EnsureSymtab) const override; }; class ELFReader : public Reader { @@ -976,7 +976,7 @@ class ELFReader : public Reader { Optional ExtractPartition; public: - std::unique_ptr create() const override; + std::unique_ptr create(bool EnsureSymtab) const override; explicit ELFReader(Binary *B, Optional ExtractPartition) : Bin(B), ExtractPartition(ExtractPartition) {} }; @@ -990,6 +990,10 @@ private: std::vector Segments; std::vector RemovedSections; + static bool sectionIsAlloc(const SectionBase &Sec) { + return Sec.Flags & ELF::SHF_ALLOC; + }; + public: template using Range = iterator_range< @@ -1011,13 +1015,14 @@ public: uint8_t OSABI; uint8_t ABIVersion; uint64_t Entry; - uint64_t SHOffset; + uint64_t SHOff; uint32_t Type; uint32_t Machine; uint32_t Version; uint32_t Flags; bool HadShdrs = true; + bool MustBeRelocatable = false; StringTableSection *SectionNames = nullptr; SymbolTableSection *SymbolTable = nullptr; SectionIndexSection *SectionIndexTable = nullptr; @@ -1027,6 +1032,13 @@ public: ConstRange sections() const { return make_pointee_range(Sections); } + iterator_range< + filter_iterator::const_iterator>, + decltype(§ionIsAlloc)>> + allocSections() const { + return make_filter_range(make_pointee_range(Sections), sectionIsAlloc); + } + SectionBase *findSection(StringRef Name) { auto SecIt = find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; }); @@ -1041,16 +1053,20 @@ public: std::function ToRemove); Error removeSymbols(function_ref ToRemove); template T &addSection(Ts &&... Args) { - auto Sec = llvm::make_unique(std::forward(Args)...); + auto Sec = std::make_unique(std::forward(Args)...); auto Ptr = Sec.get(); + MustBeRelocatable |= isa(*Ptr); Sections.emplace_back(std::move(Sec)); Ptr->Index = Sections.size(); return *Ptr; } Segment &addSegment(ArrayRef Data) { - Segments.emplace_back(llvm::make_unique(Data)); + Segments.emplace_back(std::make_unique(Data)); return *Segments.back(); } + bool isRelocatable() const { + return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable; + } }; } // end namespace elf diff --git a/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp new file mode 100644 index 000000000000..f621f3aa09cf --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp @@ -0,0 +1,350 @@ +//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOLayoutBuilder.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { + uint32_t Size = 0; + for (const auto &LC : O.LoadCommands) { + const MachO::macho_load_command &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SEGMENT: + Size += sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + continue; + case MachO::LC_SEGMENT_64: + Size += sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + continue; + } + + switch (cmd) { +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ + break; +#include "llvm/BinaryFormat/MachO.def" +#undef HANDLE_LOAD_COMMAND + } + } + + return Size; +} + +void MachOLayoutBuilder::constructStringTable() { + for (std::unique_ptr &Sym : O.SymTable.Symbols) + StrTableBuilder.add(Sym->Name); + StrTableBuilder.finalize(); +} + +void MachOLayoutBuilder::updateSymbolIndexes() { + uint32_t Index = 0; + for (auto &Symbol : O.SymTable.Symbols) + Symbol->Index = Index++; +} + +// Updates the index and the number of local/external/undefined symbols. +void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { + assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); + // Make sure that nlist entries in the symbol table are sorted by the those + // types. The order is: local < defined external < undefined external. + assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(), + [](const std::unique_ptr &A, + const std::unique_ptr &B) { + return (A->isLocalSymbol() && !B->isLocalSymbol()) || + (!A->isUndefinedSymbol() && + B->isUndefinedSymbol()); + }) && + "Symbols are not sorted by their types."); + + uint32_t NumLocalSymbols = 0; + auto Iter = O.SymTable.Symbols.begin(); + auto End = O.SymTable.Symbols.end(); + for (; Iter != End; ++Iter) { + if ((*Iter)->isExternalSymbol()) + break; + + ++NumLocalSymbols; + } + + uint32_t NumExtDefSymbols = 0; + for (; Iter != End; ++Iter) { + if ((*Iter)->isUndefinedSymbol()) + break; + + ++NumExtDefSymbols; + } + + MLC.dysymtab_command_data.ilocalsym = 0; + MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; + MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; + MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; + MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; + MLC.dysymtab_command_data.nundefsym = + O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); +} + +// Recomputes and updates offset and size fields in load commands and sections +// since they could be modified. +uint64_t MachOLayoutBuilder::layoutSegments() { + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + const bool IsObjectFile = + O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; + uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + StringRef Segname; + uint64_t SegmentVmAddr; + uint64_t SegmentVmSize; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + SegmentVmAddr = MLC.segment_command_data.vmaddr; + SegmentVmSize = MLC.segment_command_data.vmsize; + Segname = StringRef(MLC.segment_command_data.segname, + strnlen(MLC.segment_command_data.segname, + sizeof(MLC.segment_command_data.segname))); + break; + case MachO::LC_SEGMENT_64: + SegmentVmAddr = MLC.segment_command_64_data.vmaddr; + SegmentVmSize = MLC.segment_command_64_data.vmsize; + Segname = StringRef(MLC.segment_command_64_data.segname, + strnlen(MLC.segment_command_64_data.segname, + sizeof(MLC.segment_command_64_data.segname))); + break; + default: + continue; + } + + if (Segname == "__LINKEDIT") { + // We update the __LINKEDIT segment later (in layoutTail). + assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); + LinkEditLoadCommand = &MLC; + continue; + } + + // Update file offsets and sizes of sections. + uint64_t SegOffset = Offset; + uint64_t SegFileSize = 0; + uint64_t VMSize = 0; + for (auto &Sec : LC.Sections) { + if (IsObjectFile) { + if (Sec.isVirtualSection()) { + Sec.Offset = 0; + } else { + uint64_t PaddingSize = + offsetToAlignment(SegFileSize, Align(1ull << Sec.Align)); + Sec.Offset = SegOffset + SegFileSize + PaddingSize; + Sec.Size = Sec.Content.size(); + SegFileSize += PaddingSize + Sec.Size; + } + VMSize = std::max(VMSize, Sec.Addr + Sec.Size); + } else { + if (Sec.isVirtualSection()) { + Sec.Offset = 0; + VMSize += Sec.Size; + } else { + uint32_t SectOffset = Sec.Addr - SegmentVmAddr; + Sec.Offset = SegOffset + SectOffset; + Sec.Size = Sec.Content.size(); + SegFileSize = std::max(SegFileSize, SectOffset + Sec.Size); + VMSize = std::max(VMSize, SegFileSize); + } + } + } + + if (IsObjectFile) { + Offset += SegFileSize; + } else { + Offset = alignTo(Offset + SegFileSize, PageSize); + SegFileSize = alignTo(SegFileSize, PageSize); + // Use the original vmsize if the segment is __PAGEZERO. + VMSize = + Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); + } + + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC.segment_command_data.cmdsize = + sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + MLC.segment_command_data.nsects = LC.Sections.size(); + MLC.segment_command_data.fileoff = SegOffset; + MLC.segment_command_data.vmsize = VMSize; + MLC.segment_command_data.filesize = SegFileSize; + break; + case MachO::LC_SEGMENT_64: + MLC.segment_command_64_data.cmdsize = + sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + MLC.segment_command_64_data.nsects = LC.Sections.size(); + MLC.segment_command_64_data.fileoff = SegOffset; + MLC.segment_command_64_data.vmsize = VMSize; + MLC.segment_command_64_data.filesize = SegFileSize; + break; + } + } + + return Offset; +} + +uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { + for (auto &LC : O.LoadCommands) + for (auto &Sec : LC.Sections) { + Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; + Sec.NReloc = Sec.Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; + } + + return Offset; +} + +Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { + // The order of LINKEDIT elements is as follows: + // rebase info, binding info, weak binding info, lazy binding info, export + // trie, data-in-code, symbol table, indirect symbol table, symbol table + // strings. + uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + uint64_t StartOfLinkEdit = Offset; + uint64_t StartOfRebaseInfo = StartOfLinkEdit; + uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size(); + uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size(); + uint64_t StartOfLazyBindingInfo = + StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size(); + uint64_t StartOfExportTrie = + StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size(); + uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); + uint64_t StartOfDataInCode = + StartOfFunctionStarts + O.FunctionStarts.Data.size(); + uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size(); + uint64_t StartOfIndirectSymbols = + StartOfSymbols + NListSize * O.SymTable.Symbols.size(); + uint64_t StartOfSymbolStrings = + StartOfIndirectSymbols + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); + uint64_t LinkEditSize = + (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit; + + // Now we have determined the layout of the contents of the __LINKEDIT + // segment. Update its load command. + if (LinkEditLoadCommand) { + MachO::macho_load_command *MLC = LinkEditLoadCommand; + switch (LinkEditLoadCommand->load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); + MLC->segment_command_data.fileoff = StartOfLinkEdit; + MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_data.filesize = LinkEditSize; + break; + case MachO::LC_SEGMENT_64: + MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); + MLC->segment_command_64_data.fileoff = StartOfLinkEdit; + MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_64_data.filesize = LinkEditSize; + break; + } + } + + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SYMTAB: + MLC.symtab_command_data.symoff = StartOfSymbols; + MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); + MLC.symtab_command_data.stroff = StartOfSymbolStrings; + MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); + break; + case MachO::LC_DYSYMTAB: { + if (MLC.dysymtab_command_data.ntoc != 0 || + MLC.dysymtab_command_data.nmodtab != 0 || + MLC.dysymtab_command_data.nextrefsyms != 0 || + MLC.dysymtab_command_data.nlocrel != 0 || + MLC.dysymtab_command_data.nextrel != 0) + return createStringError(llvm::errc::not_supported, + "shared library is not yet supported"); + + if (!O.IndirectSymTable.Symbols.empty()) { + MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; + MLC.dysymtab_command_data.nindirectsyms = + O.IndirectSymTable.Symbols.size(); + } + + updateDySymTab(MLC); + break; + } + case MachO::LC_DATA_IN_CODE: + MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; + MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); + break; + case MachO::LC_FUNCTION_STARTS: + MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; + MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + MLC.dyld_info_command_data.rebase_off = + O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; + MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); + MLC.dyld_info_command_data.bind_off = + O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; + MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); + MLC.dyld_info_command_data.weak_bind_off = + O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; + MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); + MLC.dyld_info_command_data.lazy_bind_off = + O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; + MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); + MLC.dyld_info_command_data.export_off = + O.Exports.Trie.empty() ? 0 : StartOfExportTrie; + MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); + break; + case MachO::LC_LOAD_DYLINKER: + case MachO::LC_MAIN: + case MachO::LC_RPATH: + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + case MachO::LC_VERSION_MIN_MACOSX: + case MachO::LC_BUILD_VERSION: + case MachO::LC_ID_DYLIB: + case MachO::LC_LOAD_DYLIB: + case MachO::LC_UUID: + case MachO::LC_SOURCE_VERSION: + // Nothing to update. + break; + default: + // Abort if it's unsupported in order to prevent corrupting the object. + return createStringError(llvm::errc::not_supported, + "unsupported load command (cmd=0x%x)", cmd); + } + } + + return Error::success(); +} + +Error MachOLayoutBuilder::layout() { + O.Header.NCmds = O.LoadCommands.size(); + O.Header.SizeOfCmds = computeSizeOfCmds(); + constructStringTable(); + updateSymbolIndexes(); + uint64_t Offset = layoutSegments(); + Offset = layoutRelocations(Offset); + return layoutTail(Offset); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h new file mode 100644 index 000000000000..21cbe56605de --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h @@ -0,0 +1,50 @@ +//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H +#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H + +#include "MachOObjcopy.h" +#include "Object.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +class MachOLayoutBuilder { + Object &O; + bool Is64Bit; + uint64_t PageSize; + + // Points to the __LINKEDIT segment if it exists. + MachO::macho_load_command *LinkEditLoadCommand = nullptr; + StringTableBuilder StrTableBuilder{StringTableBuilder::MachO}; + + uint32_t computeSizeOfCmds() const; + void constructStringTable(); + void updateSymbolIndexes(); + void updateDySymTab(MachO::macho_load_command &MLC); + uint64_t layoutSegments(); + uint64_t layoutRelocations(uint64_t Offset); + Error layoutTail(uint64_t Offset); + +public: + MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize) + : O(O), Is64Bit(Is64Bit), PageSize(PageSize) {} + + // Recomputes and updates fields in the given object such as file offsets. + Error layout(); + + StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; } +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index 19343b65dd1e..6d586e7d73f1 100644 --- a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -25,18 +25,20 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() || !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() || !Config.DumpSection.empty() || !Config.KeepSection.empty() || - !Config.OnlySection.empty() || !Config.SymbolsToGlobalize.empty() || - !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() || - !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() || - !Config.SectionsToRename.empty() || !Config.SymbolsToRename.empty() || + Config.NewSymbolVisibility || !Config.OnlySection.empty() || + !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() || + !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() || + !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() || + !Config.SymbolsToRename.empty() || !Config.UnneededSymbolsToRemove.empty() || - !Config.SetSectionFlags.empty() || !Config.ToRemove.empty() || - Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden || - Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc || - Config.StripSections || Config.Weaken || Config.DecompressDebugSections || - Config.StripDebug || Config.StripNonAlloc || Config.StripSections || - Config.StripUnneeded || Config.DiscardMode != DiscardType::None || - !Config.SymbolsToAdd.empty() || Config.EntryExpr) { + !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() || + !Config.ToRemove.empty() || Config.ExtractDWO || Config.KeepFileSymbols || + Config.LocalizeHidden || Config.PreserveDates || Config.StripDWO || + Config.StripNonAlloc || Config.StripSections || Config.Weaken || + Config.DecompressDebugSections || Config.StripDebug || + Config.StripNonAlloc || Config.StripSections || Config.StripUnneeded || + Config.DiscardMode != DiscardType::None || !Config.SymbolsToAdd.empty() || + Config.EntryExpr) { return createStringError(llvm::errc::invalid_argument, "option not supported by llvm-objcopy for MachO"); } @@ -57,7 +59,11 @@ Error executeObjcopyOnBinary(const CopyConfig &Config, if (Error E = handleArgs(Config, *O)) return createFileError(Config.InputFilename, std::move(E)); - MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out); + // TODO: Support 16KB pages which are employed in iOS arm64 binaries: + // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb + const uint64_t PageSize = 4096; + + MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out); if (auto E = Writer.finalize()) return E; return Writer.write(); diff --git a/tools/llvm-objcopy/MachO/MachOReader.cpp b/tools/llvm-objcopy/MachO/MachOReader.cpp index d31293034608..b48a0d8952d0 100644 --- a/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -129,10 +129,19 @@ void MachOReader::readLoadCommands(Object &O) const { case MachO::LC_SYMTAB: O.SymTabCommandIndex = O.LoadCommands.size(); break; + case MachO::LC_DYSYMTAB: + O.DySymTabCommandIndex = O.LoadCommands.size(); + break; case MachO::LC_DYLD_INFO: case MachO::LC_DYLD_INFO_ONLY: O.DyLdInfoCommandIndex = O.LoadCommands.size(); break; + case MachO::LC_DATA_IN_CODE: + O.DataInCodeCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_FUNCTION_STARTS: + O.FunctionStartsCommandIndex = O.LoadCommands.size(); + break; } #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ case MachO::LCName: \ @@ -188,7 +197,7 @@ void MachOReader::readSymbolTable(Object &O) const { StrTable, MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()))); - O.SymTable.Symbols.push_back(llvm::make_unique(SE)); + O.SymTable.Symbols.push_back(std::make_unique(SE)); } } @@ -222,8 +231,37 @@ void MachOReader::readExportInfo(Object &O) const { O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); } +void MachOReader::readDataInCodeData(Object &O) const { + if (!O.DataInCodeCommandIndex) + return; + const MachO::linkedit_data_command &LDC = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + O.DataInCode.Data = arrayRefFromStringRef( + MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +} + +void MachOReader::readFunctionStartsData(Object &O) const { + if (!O.FunctionStartsCommandIndex) + return; + const MachO::linkedit_data_command &LDC = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + O.FunctionStarts.Data = arrayRefFromStringRef( + MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +} + +void MachOReader::readIndirectSymbolTable(Object &O) const { + MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); + for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) + O.IndirectSymTable.Symbols.push_back( + MachOObj.getIndirectSymbolTableEntry(DySymTab, i)); +} + std::unique_ptr MachOReader::create() const { - auto Obj = llvm::make_unique(); + auto Obj = std::make_unique(); readHeader(*Obj); readLoadCommands(*Obj); readSymbolTable(*Obj); @@ -233,6 +271,9 @@ std::unique_ptr MachOReader::create() const { readWeakBindInfo(*Obj); readLazyBindInfo(*Obj); readExportInfo(*Obj); + readDataInCodeData(*Obj); + readFunctionStartsData(*Obj); + readIndirectSymbolTable(*Obj); return Obj; } diff --git a/tools/llvm-objcopy/MachO/MachOReader.h b/tools/llvm-objcopy/MachO/MachOReader.h index 795e5cc2363d..00c8f0d55f61 100644 --- a/tools/llvm-objcopy/MachO/MachOReader.h +++ b/tools/llvm-objcopy/MachO/MachOReader.h @@ -36,6 +36,9 @@ class MachOReader : public Reader { void readWeakBindInfo(Object &O) const; void readLazyBindInfo(Object &O) const; void readExportInfo(Object &O) const; + void readDataInCodeData(Object &O) const; + void readFunctionStartsData(Object &O) const; + void readIndirectSymbolTable(Object &O) const; public: explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} diff --git a/tools/llvm-objcopy/MachO/MachOWriter.cpp b/tools/llvm-objcopy/MachO/MachOWriter.cpp index 74200c5aa62a..4ec91cc9eb7a 100644 --- a/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "MachOWriter.h" +#include "MachOLayoutBuilder.h" #include "Object.h" #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/MachO.h" @@ -40,16 +41,10 @@ size_t MachOWriter::totalSize() const { const MachO::symtab_command &SymTabCommand = O.LoadCommands[*O.SymTabCommandIndex] .MachOLoadCommand.symtab_command_data; - if (SymTabCommand.symoff) { - assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) && - "Incorrect number of symbols"); + if (SymTabCommand.symoff) Ends.push_back(SymTabCommand.symoff + symTableSize()); - } - if (SymTabCommand.stroff) { - assert((SymTabCommand.strsize == StrTableBuilder.getSize()) && - "Incorrect string table size"); + if (SymTabCommand.stroff) Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); - } } if (O.DyLdInfoCommandIndex) { const MachO::dyld_info_command &DyLdInfoCommand = @@ -84,6 +79,36 @@ size_t MachOWriter::totalSize() const { } } + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Ends.push_back(DySymTabCommand.indirectsymoff + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); + } + + if (O.DataInCodeCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + + if (O.FunctionStartsCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + // Otherwise, use the last section / reloction. for (const auto &LC : O.LoadCommands) for (const auto &S : LC.Sections) { @@ -120,14 +145,6 @@ void MachOWriter::writeHeader() { memcpy(B.getBufferStart(), &Header, HeaderSize); } -void MachOWriter::updateSymbolIndexes() { - uint32_t Index = 0; - for (auto &Symbol : O.SymTable.Symbols) { - Symbol->Index = Index; - Index++; - } -} - void MachOWriter::writeLoadCommands() { uint8_t *Begin = B.getBufferStart() + headerSize(); for (const auto &LC : O.LoadCommands) { @@ -253,7 +270,7 @@ void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, Out += sizeof(NListType); } -void MachOWriter::writeSymbolTable() { +void MachOWriter::writeStringTable() { if (!O.SymTabCommandIndex) return; const MachO::symtab_command &SymTabCommand = @@ -261,10 +278,10 @@ void MachOWriter::writeSymbolTable() { .MachOLoadCommand.symtab_command_data; uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff; - StrTableBuilder.write(StrTable); + LayoutBuilder.getStringTableBuilder().write(StrTable); } -void MachOWriter::writeStringTable() { +void MachOWriter::writeSymbolTable() { if (!O.SymTabCommandIndex) return; const MachO::symtab_command &SymTabCommand = @@ -275,7 +292,7 @@ void MachOWriter::writeStringTable() { for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end(); Iter != End; Iter++) { SymbolEntry *Sym = Iter->get(); - auto Nstrx = StrTableBuilder.getOffset(Sym->Name); + uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name); if (Is64Bit) writeNListEntry(*Sym, IsLittleEndian, SymTable, Nstrx); @@ -344,6 +361,45 @@ void MachOWriter::writeExportInfo() { memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); } +void MachOWriter::writeIndirectSymbolTable() { + if (!O.DySymTabCommandIndex) + return; + + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff; + assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) && + "Incorrect indirect symbol table size"); + memcpy(Out, O.IndirectSymTable.Symbols.data(), + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); +} + +void MachOWriter::writeDataInCodeData() { + if (!O.DataInCodeCommandIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) && + "Incorrect data in code data size"); + memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size()); +} + +void MachOWriter::writeFunctionStartsData() { + if (!O.FunctionStartsCommandIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) && + "Incorrect function starts data size"); + memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size()); +} + void MachOWriter::writeTail() { typedef void (MachOWriter::*WriteHandlerType)(void); typedef std::pair WriteOperation; @@ -379,206 +435,51 @@ void MachOWriter::writeTail() { {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); } - llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { - return LHS.first < RHS.first; - }); - - for (auto WriteOp : Queue) - (this->*WriteOp.second)(); -} - -void MachOWriter::updateSizeOfCmds() { - auto Size = 0; - for (const auto &LC : O.LoadCommands) { - auto &MLC = LC.MachOLoadCommand; - auto cmd = MLC.load_command_data.cmd; - - switch (cmd) { - case MachO::LC_SEGMENT: - Size += sizeof(MachO::segment_command) + - sizeof(MachO::section) * LC.Sections.size(); - continue; - case MachO::LC_SEGMENT_64: - Size += sizeof(MachO::segment_command_64) + - sizeof(MachO::section_64) * LC.Sections.size(); - continue; - } - - switch (cmd) { -#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ - case MachO::LCName: \ - Size += sizeof(MachO::LCStruct); \ - break; -#include "llvm/BinaryFormat/MachO.def" -#undef HANDLE_LOAD_COMMAND - } - } - - O.Header.SizeOfCmds = Size; -} - -// Updates the index and the number of local/external/undefined symbols. Here we -// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table -// are already sorted by the those types. -void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) { - uint32_t NumLocalSymbols = 0; - auto Iter = O.SymTable.Symbols.begin(); - auto End = O.SymTable.Symbols.end(); - for (; Iter != End; Iter++) { - if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT)) - break; + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; - NumLocalSymbols++; + if (DySymTabCommand.indirectsymoff) + Queue.emplace_back(DySymTabCommand.indirectsymoff, + &MachOWriter::writeIndirectSymbolTable); } - uint32_t NumExtDefSymbols = 0; - for (; Iter != End; Iter++) { - if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF) - break; - - NumExtDefSymbols++; - } - - MLC.dysymtab_command_data.ilocalsym = 0; - MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; - MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; - MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; - MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; - MLC.dysymtab_command_data.nundefsym = - O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); -} - -// Recomputes and updates offset and size fields in load commands and sections -// since they could be modified. -Error MachOWriter::layout() { - auto SizeOfCmds = loadCommandsSize(); - auto Offset = headerSize() + SizeOfCmds; - O.Header.NCmds = O.LoadCommands.size(); - O.Header.SizeOfCmds = SizeOfCmds; - - // Lay out sections. - for (auto &LC : O.LoadCommands) { - uint64_t FileOff = Offset; - uint64_t VMSize = 0; - uint64_t FileOffsetInSegment = 0; - for (auto &Sec : LC.Sections) { - if (!Sec.isVirtualSection()) { - auto FilePaddingSize = - OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align); - Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize; - Sec.Size = Sec.Content.size(); - FileOffsetInSegment += FilePaddingSize + Sec.Size; - } - - VMSize = std::max(VMSize, Sec.Addr + Sec.Size); - } - - // TODO: Handle the __PAGEZERO segment. - auto &MLC = LC.MachOLoadCommand; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - MLC.segment_command_data.cmdsize = - sizeof(MachO::segment_command) + - sizeof(MachO::section) * LC.Sections.size(); - MLC.segment_command_data.nsects = LC.Sections.size(); - MLC.segment_command_data.fileoff = FileOff; - MLC.segment_command_data.vmsize = VMSize; - MLC.segment_command_data.filesize = FileOffsetInSegment; - break; - case MachO::LC_SEGMENT_64: - MLC.segment_command_64_data.cmdsize = - sizeof(MachO::segment_command_64) + - sizeof(MachO::section_64) * LC.Sections.size(); - MLC.segment_command_64_data.nsects = LC.Sections.size(); - MLC.segment_command_64_data.fileoff = FileOff; - MLC.segment_command_64_data.vmsize = VMSize; - MLC.segment_command_64_data.filesize = FileOffsetInSegment; - break; - } + if (O.DataInCodeCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; - Offset += FileOffsetInSegment; + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeDataInCodeData); } - // Lay out relocations. - for (auto &LC : O.LoadCommands) - for (auto &Sec : LC.Sections) { - Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; - Sec.NReloc = Sec.Relocations.size(); - Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; - } + if (O.FunctionStartsCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; - // Lay out tail stuff. - auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); - for (auto &LC : O.LoadCommands) { - auto &MLC = LC.MachOLoadCommand; - auto cmd = MLC.load_command_data.cmd; - switch (cmd) { - case MachO::LC_SYMTAB: - MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); - MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); - MLC.symtab_command_data.symoff = Offset; - Offset += NListSize * MLC.symtab_command_data.nsyms; - MLC.symtab_command_data.stroff = Offset; - Offset += MLC.symtab_command_data.strsize; - break; - case MachO::LC_DYSYMTAB: { - if (MLC.dysymtab_command_data.ntoc != 0 || - MLC.dysymtab_command_data.nmodtab != 0 || - MLC.dysymtab_command_data.nextrefsyms != 0 || - MLC.dysymtab_command_data.nlocrel != 0 || - MLC.dysymtab_command_data.nextrel != 0) - return createStringError(llvm::errc::not_supported, - "shared library is not yet supported"); - - if (MLC.dysymtab_command_data.nindirectsyms != 0) - return createStringError(llvm::errc::not_supported, - "indirect symbol table is not yet supported"); - - updateDySymTab(MLC); - break; - } - case MachO::LC_SEGMENT: - case MachO::LC_SEGMENT_64: - case MachO::LC_VERSION_MIN_MACOSX: - case MachO::LC_BUILD_VERSION: - case MachO::LC_ID_DYLIB: - case MachO::LC_LOAD_DYLIB: - case MachO::LC_UUID: - case MachO::LC_SOURCE_VERSION: - // Nothing to update. - break; - default: - // Abort if it's unsupported in order to prevent corrupting the object. - return createStringError(llvm::errc::not_supported, - "unsupported load command (cmd=0x%x)", cmd); - } + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeFunctionStartsData); } - return Error::success(); -} + llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { + return LHS.first < RHS.first; + }); -void MachOWriter::constructStringTable() { - for (std::unique_ptr &Sym : O.SymTable.Symbols) - StrTableBuilder.add(Sym->Name); - StrTableBuilder.finalize(); + for (auto WriteOp : Queue) + (this->*WriteOp.second)(); } -Error MachOWriter::finalize() { - updateSizeOfCmds(); - constructStringTable(); - - if (auto E = layout()) - return E; - - return Error::success(); -} +Error MachOWriter::finalize() { return LayoutBuilder.layout(); } Error MachOWriter::write() { if (Error E = B.allocate(totalSize())) return E; memset(B.getBufferStart(), 0, totalSize()); writeHeader(); - updateSymbolIndexes(); writeLoadCommands(); writeSections(); writeTail(); diff --git a/tools/llvm-objcopy/MachO/MachOWriter.h b/tools/llvm-objcopy/MachO/MachOWriter.h index ecf12d62de2c..22abbad56f41 100644 --- a/tools/llvm-objcopy/MachO/MachOWriter.h +++ b/tools/llvm-objcopy/MachO/MachOWriter.h @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "../Buffer.h" +#include "MachOLayoutBuilder.h" #include "MachOObjcopy.h" #include "Object.h" #include "llvm/BinaryFormat/MachO.h" @@ -22,20 +23,15 @@ class MachOWriter { Object &O; bool Is64Bit; bool IsLittleEndian; + uint64_t PageSize; Buffer &B; - StringTableBuilder StrTableBuilder{StringTableBuilder::MachO}; + MachOLayoutBuilder LayoutBuilder; size_t headerSize() const; size_t loadCommandsSize() const; size_t symTableSize() const; size_t strTableSize() const; - void updateDySymTab(MachO::macho_load_command &MLC); - void updateSizeOfCmds(); - void updateSymbolIndexes(); - void constructStringTable(); - Error layout(); - void writeHeader(); void writeLoadCommands(); template @@ -48,11 +44,16 @@ class MachOWriter { void writeWeakBindInfo(); void writeLazyBindInfo(); void writeExportInfo(); + void writeIndirectSymbolTable(); + void writeDataInCodeData(); + void writeFunctionStartsData(); void writeTail(); public: - MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B) - : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {} + MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize, + Buffer &B) + : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), + PageSize(PageSize), B(B), LayoutBuilder(O, Is64Bit, PageSize) {} size_t totalSize() const; Error finalize(); diff --git a/tools/llvm-objcopy/MachO/Object.h b/tools/llvm-objcopy/MachO/Object.h index ed85fcbc47f7..1cebf8253d19 100644 --- a/tools/llvm-objcopy/MachO/Object.h +++ b/tools/llvm-objcopy/MachO/Object.h @@ -90,6 +90,16 @@ struct SymbolEntry { uint8_t n_sect; uint16_t n_desc; uint64_t n_value; + + bool isExternalSymbol() const { + return n_type & ((MachO::N_EXT | MachO::N_PEXT)); + } + + bool isLocalSymbol() const { return !isExternalSymbol(); } + + bool isUndefinedSymbol() const { + return (n_type & MachO::N_TYPE) == MachO::N_UNDF; + } }; /// The location of the symbol table inside the binary is described by LC_SYMTAB @@ -100,6 +110,10 @@ struct SymbolTable { const SymbolEntry *getSymbolByIndex(uint32_t Index) const; }; +struct IndirectSymbolTable { + std::vector Symbols; +}; + /// The location of the string table inside the binary is described by LC_SYMTAB /// load command. struct StringTable { @@ -206,6 +220,10 @@ struct ExportInfo { ArrayRef Trie; }; +struct LinkData { + ArrayRef Data; +}; + struct Object { MachHeader Header; std::vector LoadCommands; @@ -218,11 +236,20 @@ struct Object { WeakBindInfo WeakBinds; LazyBindInfo LazyBinds; ExportInfo Exports; + IndirectSymbolTable IndirectSymTable; + LinkData DataInCode; + LinkData FunctionStarts; /// The index of LC_SYMTAB load command if present. Optional SymTabCommandIndex; /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. Optional DyLdInfoCommandIndex; + /// The index LC_DYSYMTAB load comamnd if present. + Optional DySymTabCommandIndex; + /// The index LC_DATA_IN_CODE load comamnd if present. + Optional DataInCodeCommandIndex; + /// The index LC_FUNCTION_STARTS load comamnd if present. + Optional FunctionStartsCommandIndex; }; } // end namespace macho diff --git a/tools/llvm-objcopy/ObjcopyOpts.td b/tools/llvm-objcopy/ObjcopyOpts.td index 5fce4fbde539..9e6b6f0005cd 100644 --- a/tools/llvm-objcopy/ObjcopyOpts.td +++ b/tools/llvm-objcopy/ObjcopyOpts.td @@ -1,37 +1,33 @@ -include "llvm/Option/OptParser.td" - -multiclass Eq { - def NAME : Separate<["--"], name>; - def NAME #_eq : Joined<["--"], name #"=">, - Alias(NAME)>, - HelpText; -} - -def help : Flag<["--"], "help">; -def h : Flag<["-"], "h">, Alias; - -def allow_broken_links - : Flag<["--"], "allow-broken-links">, - HelpText<"Allow llvm-objcopy to remove sections even if it would leave " - "invalid section references. The appropriate sh_link fields " - "will be set to zero.">; +include "CommonOpts.td" defm binary_architecture - : Eq<"binary-architecture", "Used when transforming an architecture-less " - "format (such as binary) to another format">; -def B : JoinedOrSeparate<["-"], "B">, Alias; + : Eq<"binary-architecture", "Ignored for compatibility">; +def B : JoinedOrSeparate<["-"], "B">, + Alias, + HelpText<"Alias for --binary-architecture">; defm target : Eq<"target", "Format of the input and output file">, Values<"binary">; -def F : JoinedOrSeparate<["-"], "F">, Alias; +def F : JoinedOrSeparate<["-"], "F">, + Alias, + HelpText<"Alias for --target">; defm input_target : Eq<"input-target", "Format of the input file">, Values<"binary">; -def I : JoinedOrSeparate<["-"], "I">, Alias; +def I : JoinedOrSeparate<["-"], "I">, + Alias, + HelpText<"Alias for --input-target">; defm output_target : Eq<"output-target", "Format of the output file">, Values<"binary">; -def O : JoinedOrSeparate<["-"], "O">, Alias; +def O : JoinedOrSeparate<["-"], "O">, + Alias, + HelpText<"Alias for --output-target">; + +defm new_symbol_visibility : Eq<"new-symbol-visibility", "Visibility of " + "symbols generated for binary input or added" + " with --add-symbol unless otherwise" + " specified. The default value is 'default'.">; def compress_debug_sections : Flag<["--"], "compress-debug-sections">; def compress_debug_sections_eq @@ -46,34 +42,10 @@ defm split_dwo ", then strip-dwo on the input file">, MetaVarName<"dwo-file">; -def enable_deterministic_archives - : Flag<["--"], "enable-deterministic-archives">, - HelpText<"Enable deterministic mode when copying archives (use zero for " - "UIDs, GIDs, and timestamps).">; -def D : Flag<["-"], "D">, - Alias, - HelpText<"Alias for --enable-deterministic-archives">; - -def disable_deterministic_archives - : Flag<["--"], "disable-deterministic-archives">, - HelpText<"Disable deterministic mode when copying archives (use real " - "values for UIDs, GIDs, and timestamps).">; -def U : Flag<["-"], "U">, - Alias, - HelpText<"Alias for --disable-deterministic-archives">; - -def preserve_dates : Flag<["--"], "preserve-dates">, - HelpText<"Preserve access and modification timestamps">; -def p : Flag<["-"], "p">, Alias; - defm add_gnu_debuglink : Eq<"add-gnu-debuglink", "Add a .gnu_debuglink for ">, MetaVarName<"debug-file">; -defm remove_section : Eq<"remove-section", "Remove
">, - MetaVarName<"section">; -def R : JoinedOrSeparate<["-"], "R">, Alias; - defm rename_section : Eq<"rename-section", "Renames a section from old to new, optionally with specified flags. " @@ -93,16 +65,20 @@ defm redefine_symbols "symbols from many files.">, MetaVarName<"filename">; -defm keep_section : Eq<"keep-section", "Keep
">, - MetaVarName<"section">; defm only_section : Eq<"only-section", "Remove all but
">, MetaVarName<"section">; -def j : JoinedOrSeparate<["-"], "j">, Alias; +def j : JoinedOrSeparate<["-"], "j">, + Alias, + HelpText<"Alias for --only-section">; defm add_section : Eq<"add-section", "Make a section named
with the contents of .">, MetaVarName<"section=file">; +defm set_section_alignment + : Eq<"set-section-alignment", "Set alignment for a given section.">, + MetaVarName<"section=align">; + defm set_section_flags : Eq<"set-section-flags", "Set section flags for a given section. Flags supported for GNU " @@ -110,26 +86,14 @@ defm set_section_flags "rom, share, contents, merge, strings.">, MetaVarName<"section=flag1[,flag2,...]">; -def strip_all : Flag<["--"], "strip-all">, - HelpText<"Remove non-allocated sections outside segments. " - ".gnu.warning* sections are not removed">; -def S : Flag<["-"], "S">, Alias; -def strip_all_gnu : Flag<["--"], "strip-all-gnu">, - HelpText<"Compatible with GNU objcopy's --strip-all">; -def strip_debug : Flag<["--"], "strip-debug">, - HelpText<"Remove all debug information">; -def g : Flag<["-"], "g">, Alias, - HelpText<"Alias for --strip-debug">; +def S : Flag<["-"], "S">, + Alias, + HelpText<"Alias for --strip-all">; def strip_dwo : Flag<["--"], "strip-dwo">, HelpText<"Remove all DWARF .dwo sections from file">; -def strip_sections - : Flag<["--"], "strip-sections">, - HelpText<"Remove all section headers and all sections not in segments">; def strip_non_alloc : Flag<["--"], "strip-non-alloc">, HelpText<"Remove all non-allocated sections outside segments">; -def strip_unneeded : Flag<["--"], "strip-unneeded">, - HelpText<"Remove all symbols not needed by relocations">; defm strip_unneeded_symbol : Eq<"strip-unneeded-symbol", "Remove symbol if it is not needed by relocations">, @@ -163,7 +127,9 @@ defm localize_symbols "Reads a list of symbols from and marks them local.">, MetaVarName<"filename">; -def L : JoinedOrSeparate<["-"], "L">, Alias; +def L : JoinedOrSeparate<["-"], "L">, + Alias, + HelpText<"Alias for --localize-symbol">; defm globalize_symbol : Eq<"globalize-symbol", "Mark as global">, MetaVarName<"symbol">; @@ -178,7 +144,9 @@ defm keep_global_symbol "Convert all symbols except to local. May be repeated to " "convert all except a set of symbols to local.">, MetaVarName<"symbol">; -def G : JoinedOrSeparate<["-"], "G">, Alias; +def G : JoinedOrSeparate<["-"], "G">, + Alias, + HelpText<"Alias for --keep-global-symbol">; defm keep_global_symbols : Eq<"keep-global-symbols", @@ -196,31 +164,17 @@ defm weaken_symbols "Reads a list of symbols from and marks them weak.">, MetaVarName<"filename">; -def W : JoinedOrSeparate<["-"], "W">, Alias; +def W : JoinedOrSeparate<["-"], "W">, + Alias, + HelpText<"Alias for --weaken-symbol">; def weaken : Flag<["--"], "weaken">, HelpText<"Mark all global symbols as weak">; -def discard_locals : Flag<["--"], "discard-locals">, - HelpText<"Remove compiler-generated local symbols, (e.g. " - "symbols starting with .L)">; -def X : Flag<["-"], "X">, Alias; - -def discard_all - : Flag<["--"], "discard-all">, - HelpText<"Remove all local symbols except file and section symbols">; -def x : Flag<["-"], "x">, Alias; -defm strip_symbol : Eq<"strip-symbol", "Remove symbol ">, - MetaVarName<"symbol">; defm strip_symbols : Eq<"strip-symbols", "Reads a list of symbols from and removes them.">, MetaVarName<"filename">; -def N : JoinedOrSeparate<["-"], "N">, Alias; -defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol ">, - MetaVarName<"symbol">; -def K : JoinedOrSeparate<["-"], "K">, Alias; - defm keep_symbols : Eq<"keep-symbols", "Reads a list of symbols from and runs as if " @@ -230,13 +184,6 @@ defm keep_symbols "be repeated to read symbols from many files.">, MetaVarName<"filename">; -def only_keep_debug - : Flag<["--"], "only-keep-debug">, - HelpText<"Clear sections that would not be stripped by --strip-debug. " - "Currently only implemented for COFF.">; - -def keep_file_symbols : Flag<["--"], "keep-file-symbols">, - HelpText<"Do not remove file symbols">; defm dump_section : Eq<"dump-section", "Dump contents of section named
into file ">, @@ -249,9 +196,6 @@ defm prefix_alloc_sections : Eq<"prefix-alloc-sections", "Add to the start of every allocated section name">, MetaVarName<"prefix">; -def version : Flag<["--"], "version">, - HelpText<"Print the version and exit.">; -def V : Flag<["-"], "V">, Alias; defm build_id_link_dir : Eq<"build-id-link-dir", "Set directory for --build-id-link-input and " "--build-id-link-output to ">, @@ -265,10 +209,6 @@ defm build_id_link_output "name derived from hex build ID">, MetaVarName<"suffix">; -def regex - : Flag<["--"], "regex">, - HelpText<"Permit regular expressions in name comparison">; - defm set_start : Eq<"set-start", "Set the start address to . Overrides " "any previous --change-start or --adjust-start values.">, MetaVarName<"addr">; @@ -277,11 +217,12 @@ defm change_start : Eq<"change-start", "Add to the start address. Can be "cumulatively.">, MetaVarName<"incr">; def adjust_start : JoinedOrSeparate<["--"], "adjust-start">, - Alias; + Alias, + HelpText<"Alias for --change-start">; defm add_symbol : Eq<"add-symbol", "Add new symbol to .symtab. Accepted flags: " - "global, local, weak, default, hidden, file, section, object, " + "global, local, weak, default, hidden, protected, file, section, object, " "function, indirect-function. Accepted but ignored for " "compatibility: debug, constructor, warning, indirect, synthetic, " "unique-object, before.">, diff --git a/tools/llvm-objcopy/StripOpts.td b/tools/llvm-objcopy/StripOpts.td index 1d06bb3dfb38..cd02cffae673 100644 --- a/tools/llvm-objcopy/StripOpts.td +++ b/tools/llvm-objcopy/StripOpts.td @@ -1,96 +1,17 @@ -include "llvm/Option/OptParser.td" +include "CommonOpts.td" -multiclass Eq { - def NAME : Separate<["--"], name>; - def NAME #_eq : Joined<["--"], name #"=">, - Alias(NAME)>, - HelpText; -} +def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to ">, + MetaVarName<"">; -def help : Flag<["--"], "help">; -def h : Flag<["-"], "h">, Alias; - -def allow_broken_links - : Flag<["--"], "allow-broken-links">, - HelpText<"Allow llvm-strip to remove sections even if it would leave " - "invalid section references. The appropriate sh_link fields " - "will be set to zero.">; - -def enable_deterministic_archives - : Flag<["--"], "enable-deterministic-archives">, - HelpText<"Enable deterministic mode when stripping archives (use zero " - "for UIDs, GIDs, and timestamps).">; -def D : Flag<["-"], "D">, - Alias, - HelpText<"Alias for --enable-deterministic-archives">; - -def disable_deterministic_archives - : Flag<["--"], "disable-deterministic-archives">, - HelpText<"Disable deterministic mode when stripping archives (use real " - "values for UIDs, GIDs, and timestamps).">; -def U : Flag<["-"], "U">, - Alias, - HelpText<"Alias for --disable-deterministic-archives">; - -def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to ">; - -def preserve_dates : Flag<["--"], "preserve-dates">, - HelpText<"Preserve access and modification timestamps">; -def p : Flag<["-"], "p">, Alias; - -def strip_all : Flag<["--"], "strip-all">, - HelpText<"Remove non-allocated sections outside segments. " - ".gnu.warning* sections are not removed">; -def s : Flag<["-"], "s">, Alias; +def s : Flag<["-"], "s">, + Alias, + HelpText<"Alias for --strip-all">; def no_strip_all : Flag<["--"], "no-strip-all">, HelpText<"Disable --strip-all">; -def strip_all_gnu : Flag<["--"], "strip-all-gnu">, - HelpText<"Compatible with GNU strip's --strip-all">; -def strip_debug : Flag<["--"], "strip-debug">, - HelpText<"Remove debugging symbols only">; -def d : Flag<["-"], "d">, Alias; -def g : Flag<["-"], "g">, Alias; -def S : Flag<["-"], "S">, Alias; -def strip_unneeded : Flag<["--"], "strip-unneeded">, - HelpText<"Remove all symbols not needed by relocations">; - -defm remove_section : Eq<"remove-section", "Remove
">, - MetaVarName<"section">; -def R : JoinedOrSeparate<["-"], "R">, Alias; - -defm strip_symbol : Eq<"strip-symbol", "Strip ">, - MetaVarName<"symbol">; -def N : JoinedOrSeparate<["-"], "N">, Alias; - -defm keep_section : Eq<"keep-section", "Keep
">, - MetaVarName<"section">; -defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol ">, - MetaVarName<"symbol">; -def keep_file_symbols : Flag<["--"], "keep-file-symbols">, - HelpText<"Do not remove file symbols">; - -def K : JoinedOrSeparate<["-"], "K">, Alias; - -def only_keep_debug - : Flag<["--"], "only-keep-debug">, - HelpText<"Clear sections that would not be stripped by --strip-debug. " - "Currently only implemented for COFF.">; - -def discard_locals : Flag<["--"], "discard-locals">, - HelpText<"Remove compiler-generated local symbols, (e.g. " - "symbols starting with .L)">; -def X : Flag<["-"], "X">, Alias; - -def discard_all - : Flag<["--"], "discard-all">, - HelpText<"Remove all local symbols except file and section symbols">; -def x : Flag<["-"], "x">, Alias; - -def regex - : Flag<["--"], "regex">, - HelpText<"Permit regular expressions in name comparison">; - -def version : Flag<["--"], "version">, - HelpText<"Print the version and exit.">; -def V : Flag<["-"], "V">, Alias; +def d : Flag<["-"], "d">, + Alias, + HelpText<"Alias for --strip-debug">; +def S : Flag<["-"], "S">, + Alias, + HelpText<"Alias for --strip-debug">; diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp index e9372176e43b..a68210f3fdd3 100644 --- a/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/tools/llvm-objcopy/llvm-objcopy.cpp @@ -29,6 +29,7 @@ #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" @@ -36,6 +37,7 @@ #include "llvm/Support/Memory.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include @@ -84,7 +86,7 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) { ErrorSuccess reportWarning(Error E) { assert(E); - WithColor::warning(errs(), ToolName) << toString(std::move(E)); + WithColor::warning(errs(), ToolName) << toString(std::move(E)) << '\n'; return Error::success(); } @@ -130,16 +132,18 @@ static Error deepWriteArchive(StringRef ArcName, /// The function executeObjcopyOnIHex does the dispatch based on the format /// of the output specified by the command line options. -static Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In, +static Error executeObjcopyOnIHex(CopyConfig &Config, MemoryBuffer &In, Buffer &Out) { // TODO: support output formats other than ELF. + if (Error E = Config.parseELFConfig()) + return E; return elf::executeObjcopyOnIHex(Config, In, Out); } /// The function executeObjcopyOnRawBinary does the dispatch based on the format /// of the output specified by the command line options. -static Error executeObjcopyOnRawBinary(const CopyConfig &Config, - MemoryBuffer &In, Buffer &Out) { +static Error executeObjcopyOnRawBinary(CopyConfig &Config, MemoryBuffer &In, + Buffer &Out) { switch (Config.OutputFormat) { case FileFormat::ELF: // FIXME: Currently, we call elf::executeObjcopyOnRawBinary even if the @@ -148,6 +152,8 @@ static Error executeObjcopyOnRawBinary(const CopyConfig &Config, case FileFormat::Binary: case FileFormat::IHex: case FileFormat::Unspecified: + if (Error E = Config.parseELFConfig()) + return E; return elf::executeObjcopyOnRawBinary(Config, In, Out); } @@ -156,11 +162,13 @@ static Error executeObjcopyOnRawBinary(const CopyConfig &Config, /// The function executeObjcopyOnBinary does the dispatch based on the format /// of the input binary (ELF, MachO or COFF). -static Error executeObjcopyOnBinary(const CopyConfig &Config, - object::Binary &In, Buffer &Out) { - if (auto *ELFBinary = dyn_cast(&In)) +static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, + Buffer &Out) { + if (auto *ELFBinary = dyn_cast(&In)) { + if (Error E = Config.parseELFConfig()) + return E; return elf::executeObjcopyOnBinary(Config, *ELFBinary, Out); - else if (auto *COFFBinary = dyn_cast(&In)) + } else if (auto *COFFBinary = dyn_cast(&In)) return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out); else if (auto *MachOBinary = dyn_cast(&In)) return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out); @@ -169,8 +177,7 @@ static Error executeObjcopyOnBinary(const CopyConfig &Config, "unsupported object file format"); } -static Error executeObjcopyOnArchive(const CopyConfig &Config, - const Archive &Ar) { +static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { std::vector NewArchiveMembers; Error Err = Error::success(); for (const Archive::Child &Child : Ar.children(Err)) { @@ -246,7 +253,7 @@ static Error restoreStatOnFile(StringRef Filename, /// The function executeObjcopy does the higher level dispatch based on the type /// of input (raw binary, archive or single object file) and takes care of the /// format-agnostic modifications, i.e. preserving dates. -static Error executeObjcopy(const CopyConfig &Config) { +static Error executeObjcopy(CopyConfig &Config) { sys::fs::file_status Stat; if (Config.InputFilename != "-") { if (auto EC = sys::fs::status(Config.InputFilename, Stat)) @@ -255,7 +262,7 @@ static Error executeObjcopy(const CopyConfig &Config) { Stat.permissions(static_cast(0777)); } - typedef Error (*ProcessRawFn)(const CopyConfig &, MemoryBuffer &, Buffer &); + using ProcessRawFn = Error (*)(CopyConfig &, MemoryBuffer &, Buffer &); ProcessRawFn ProcessRaw; switch (Config.InputFormat) { case FileFormat::Binary: @@ -310,15 +317,31 @@ int main(int argc, char **argv) { InitLLVM X(argc, argv); ToolName = argv[0]; bool IsStrip = sys::path::stem(ToolName).contains("strip"); + + // Expand response files. + // TODO: Move these lines, which are copied from lib/Support/CommandLine.cpp, + // into a separate function in the CommandLine library and call that function + // here. This is duplicated code. + SmallVector NewArgv(argv, argv + argc); + BumpPtrAllocator A; + StringSaver Saver(A); + cl::ExpandResponseFiles(Saver, + Triple(sys::getProcessTriple()).isOSWindows() + ? cl::TokenizeWindowsCommandLine + : cl::TokenizeGNUCommandLine, + NewArgv); + + auto Args = makeArrayRef(NewArgv).drop_front(); + Expected DriverConfig = - IsStrip ? parseStripOptions(makeArrayRef(argv + 1, argc), reportWarning) - : parseObjcopyOptions(makeArrayRef(argv + 1, argc)); + IsStrip ? parseStripOptions(Args, reportWarning) + : parseObjcopyOptions(Args, reportWarning); if (!DriverConfig) { logAllUnhandledErrors(DriverConfig.takeError(), WithColor::error(errs(), ToolName)); return 1; } - for (const CopyConfig &CopyConfig : DriverConfig->CopyConfigs) { + for (CopyConfig &CopyConfig : DriverConfig->CopyConfigs) { if (Error E = executeObjcopy(CopyConfig)) { logAllUnhandledErrors(std::move(E), WithColor::error(errs(), ToolName)); return 1; diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp index 1ba0a68902c9..60b0f5a3cbd1 100644 --- a/tools/llvm-objdump/COFFDump.cpp +++ b/tools/llvm-objdump/COFFDump.cpp @@ -234,15 +234,14 @@ printSEHTable(const COFFObjectFile *Obj, uint32_t TableVA, int Count) { if (Count == 0) return; - const pe32_header *PE32Header; - error(Obj->getPE32Header(PE32Header)); - uint32_t ImageBase = PE32Header->ImageBase; uintptr_t IntPtr = 0; - error(Obj->getVaPtr(TableVA, IntPtr)); + if (std::error_code EC = Obj->getVaPtr(TableVA, IntPtr)) + reportError(errorCodeToError(EC), Obj->getFileName()); + const support::ulittle32_t *P = (const support::ulittle32_t *)IntPtr; outs() << "SEH Table:"; for (int I = 0; I < Count; ++I) - outs() << format(" 0x%x", P[I] + ImageBase); + outs() << format(" 0x%x", P[I] + Obj->getPE32Header()->ImageBase); outs() << "\n\n"; } @@ -268,22 +267,24 @@ static void printTLSDirectoryT(const coff_tls_directory *TLSDir) { } static void printTLSDirectory(const COFFObjectFile *Obj) { - const pe32_header *PE32Header; - error(Obj->getPE32Header(PE32Header)); - - const pe32plus_header *PE32PlusHeader; - error(Obj->getPE32PlusHeader(PE32PlusHeader)); + const pe32_header *PE32Header = Obj->getPE32Header(); + const pe32plus_header *PE32PlusHeader = Obj->getPE32PlusHeader(); // Skip if it's not executable. if (!PE32Header && !PE32PlusHeader) return; const data_directory *DataDir; - error(Obj->getDataDirectory(COFF::TLS_TABLE, DataDir)); - uintptr_t IntPtr = 0; + if (std::error_code EC = Obj->getDataDirectory(COFF::TLS_TABLE, DataDir)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (DataDir->RelativeVirtualAddress == 0) return; - error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)); + + uintptr_t IntPtr = 0; + if (std::error_code EC = + Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)) + reportError(errorCodeToError(EC), Obj->getFileName()); if (PE32Header) { auto *TLSDir = reinterpret_cast(IntPtr); @@ -298,9 +299,7 @@ static void printTLSDirectory(const COFFObjectFile *Obj) { static void printLoadConfiguration(const COFFObjectFile *Obj) { // Skip if it's not executable. - const pe32_header *PE32Header; - error(Obj->getPE32Header(PE32Header)); - if (!PE32Header) + if (!Obj->getPE32Header()) return; // Currently only x86 is supported @@ -308,11 +307,18 @@ static void printLoadConfiguration(const COFFObjectFile *Obj) { return; const data_directory *DataDir; - error(Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir)); + + if (std::error_code EC = + Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir)) + reportError(errorCodeToError(EC), Obj->getFileName()); + uintptr_t IntPtr = 0; if (DataDir->RelativeVirtualAddress == 0) return; - error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)); + + if (std::error_code EC = + Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)) + reportError(errorCodeToError(EC), Obj->getFileName()); auto *LoadConf = reinterpret_cast(IntPtr); outs() << "Load configuration:" @@ -442,8 +448,7 @@ static bool getPDataSection(const COFFObjectFile *Obj, std::vector &Rels, const RuntimeFunction *&RFStart, int &NumRFs) { for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); if (Name != ".pdata") continue; @@ -455,7 +460,9 @@ static bool getPDataSection(const COFFObjectFile *Obj, llvm::sort(Rels, isRelocAddressLess); ArrayRef Contents; - error(Obj->getSectionContents(Pdata, Contents)); + if (Error E = Obj->getSectionContents(Pdata, Contents)) + reportError(std::move(E), Obj->getFileName()); + if (Contents.empty()) continue; @@ -571,10 +578,12 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj, ArrayRef XContents; uint64_t UnwindInfoOffset = 0; - error(getSectionContents( - Obj, Rels, SectionOffset + - /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8, - XContents, UnwindInfoOffset)); + if (Error E = getSectionContents( + Obj, Rels, + SectionOffset + + /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8, + XContents, UnwindInfoOffset)) + reportError(std::move(E), Obj->getFileName()); if (XContents.empty()) return; @@ -650,9 +659,12 @@ void printCOFFSymbolTable(const object::COFFImportFile *i) { void printCOFFSymbolTable(const COFFObjectFile *coff) { for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) { Expected Symbol = coff->getSymbol(SI); + if (!Symbol) + reportError(Symbol.takeError(), coff->getFileName()); + StringRef Name; - error(Symbol.takeError()); - error(coff->getSymbolName(*Symbol, Name)); + if (std::error_code EC = coff->getSymbolName(*Symbol, Name)) + reportError(errorCodeToError(EC), coff->getFileName()); outs() << "[" << format("%2d", SI) << "]" << "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")" @@ -682,7 +694,9 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) { for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) { if (Symbol->isSectionDefinition()) { const coff_aux_section_definition *asd; - error(coff->getAuxSymbol(SI + 1, asd)); + if (std::error_code EC = + coff->getAuxSymbol(SI + 1, asd)) + reportError(errorCodeToError(EC), coff->getFileName()); int32_t AuxNumber = asd->getNumber(Symbol->isBigObj()); @@ -697,7 +711,8 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) { , unsigned(asd->Selection)); } else if (Symbol->isFileRecord()) { const char *FileName; - error(coff->getAuxSymbol(SI + 1, FileName)); + if (std::error_code EC = coff->getAuxSymbol(SI + 1, FileName)) + reportError(errorCodeToError(EC), coff->getFileName()); StringRef Name(FileName, Symbol->getNumberOfAuxSymbols() * coff->getSymbolTableEntrySize()); @@ -707,7 +722,9 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) { break; } else if (Symbol->isWeakExternal()) { const coff_aux_weak_external *awe; - error(coff->getAuxSymbol(SI + 1, awe)); + if (std::error_code EC = + coff->getAuxSymbol(SI + 1, awe)) + reportError(errorCodeToError(EC), coff->getFileName()); outs() << "AUX " << format("indx %d srch %d\n", static_cast(awe->TagIndex), diff --git a/tools/llvm-objdump/ELFDump.cpp b/tools/llvm-objdump/ELFDump.cpp index 9c4d67d0f1bd..93d070eee16c 100644 --- a/tools/llvm-objdump/ELFDump.cpp +++ b/tools/llvm-objdump/ELFDump.cpp @@ -178,7 +178,7 @@ void printDynamicSection(const ELFFile *Elf, StringRef Filename) { outs() << (Data + Dyn.d_un.d_val) << "\n"; continue; } - warn(toString(StrTabOrErr.takeError())); + reportWarning(toString(StrTabOrErr.takeError()), Filename); consumeError(StrTabOrErr.takeError()); } outs() << format(Fmt, (uint64_t)Dyn.d_un.d_val); diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 58ff7be4543c..e4684d0f1601 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -236,11 +236,11 @@ struct SymbolSorter { bool operator()(const SymbolRef &A, const SymbolRef &B) { Expected ATypeOrErr = A.getType(); if (!ATypeOrErr) - report_error(ATypeOrErr.takeError(), A.getObject()->getFileName()); + reportError(ATypeOrErr.takeError(), A.getObject()->getFileName()); SymbolRef::Type AType = *ATypeOrErr; Expected BTypeOrErr = B.getType(); if (!BTypeOrErr) - report_error(BTypeOrErr.takeError(), B.getObject()->getFileName()); + reportError(BTypeOrErr.takeError(), B.getObject()->getFileName()); SymbolRef::Type BType = *BTypeOrErr; uint64_t AAddr = (AType != SymbolRef::ST_Function) ? 0 : A.getValue(); uint64_t BAddr = (BType != SymbolRef::ST_Function) ? 0 : B.getValue(); @@ -371,11 +371,8 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj, Symbols.push_back(Symbol); } - for (const SectionRef &Section : MachOObj->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : MachOObj->sections()) Sections.push_back(Section); - } bool BaseSegmentAddressSet = false; for (const auto &Command : MachOObj->load_commands()) { @@ -393,10 +390,40 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj, BaseSegmentAddressSet = true; BaseSegmentAddress = SLC.vmaddr; } + } else if (Command.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 SLC = MachOObj->getSegment64LoadCommand(Command); + StringRef SegName = SLC.segname; + if (!BaseSegmentAddressSet && SegName != "__PAGEZERO") { + BaseSegmentAddressSet = true; + BaseSegmentAddress = SLC.vmaddr; + } } } } +static bool DumpAndSkipDataInCode(uint64_t PC, const uint8_t *bytes, + DiceTable &Dices, uint64_t &InstSize) { + // Check the data in code table here to see if this is data not an + // instruction to be disassembled. + DiceTable Dice; + Dice.push_back(std::make_pair(PC, DiceRef())); + dice_table_iterator DTI = + std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(), + compareDiceTableEntries); + if (DTI != Dices.end()) { + uint16_t Length; + DTI->second.getLength(Length); + uint16_t Kind; + DTI->second.getKind(Kind); + InstSize = DumpDataInCode(bytes, Length, Kind); + if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) && + (PC == (DTI->first + Length - 1)) && (Length & 1)) + InstSize++; + return true; + } + return false; +} + static void printRelocationTargetName(const MachOObjectFile *O, const MachO::any_relocation_info &RE, raw_string_ostream &Fmt) { @@ -419,13 +446,11 @@ static void printRelocationTargetName(const MachOObjectFile *O, // If we couldn't find a symbol that this relocation refers to, try // to find a section beginning instead. for (const SectionRef &Section : ToolSectionFilter(*O)) { - StringRef Name; uint64_t Addr = Section.getAddress(); if (Addr != Val) continue; - if (std::error_code EC = Section.getName(Name)) - report_error(errorCodeToError(EC), O->getFileName()); - Fmt << Name; + StringRef NameOrErr = unwrapOrError(Section.getName(), O->getFileName()); + Fmt << NameOrErr; return; } @@ -458,10 +483,14 @@ static void printRelocationTargetName(const MachOObjectFile *O, --I; advance(SI, 1); } - if (SI == O->section_end()) + if (SI == O->section_end()) { Fmt << Val << " (?,?)"; - else - SI->getName(S); + } else { + if (Expected NameOrErr = SI->getName()) + S = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + } } Fmt << S; @@ -504,8 +533,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj, // NOTE: Scattered relocations don't exist on x86_64. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::X86_64_RELOC_UNSIGNED) - report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after " - "X86_64_RELOC_SUBTRACTOR."); + reportError(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after " + "X86_64_RELOC_SUBTRACTOR."); // The X86_64_RELOC_UNSIGNED contains the minuend symbol; // X86_64_RELOC_SUBTRACTOR contains the subtrahend. @@ -553,8 +582,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj, unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::GENERIC_RELOC_PAIR) - report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_SECTDIFF."); + reportError(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " + "GENERIC_RELOC_SECTDIFF."); printRelocationTargetName(Obj, RE, Fmt); Fmt << "-"; @@ -574,8 +603,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj, // GENERIC_RELOC_PAIR. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::GENERIC_RELOC_PAIR) - report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_LOCAL_SECTDIFF."); + reportError(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " + "GENERIC_RELOC_LOCAL_SECTDIFF."); printRelocationTargetName(Obj, RE, Fmt); Fmt << "-"; @@ -614,8 +643,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj, // ARM_RELOC_PAIR. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::ARM_RELOC_PAIR) - report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after " - "ARM_RELOC_HALF"); + reportError(Obj->getFileName(), "Expected ARM_RELOC_PAIR after " + "ARM_RELOC_HALF"); // NOTE: The half of the target virtual address is stashed in the // address field of the secondary relocation, but we can't reverse @@ -1501,7 +1530,12 @@ static void DumpLiteralPointerSection(MachOObjectFile *O, uint64_t SectSize = Sect->getSize(); StringRef SectName; - Sect->getName(SectName); + Expected SectNameOrErr = Sect->getName(); + if (SectNameOrErr) + SectName = *SectNameOrErr; + else + consumeError(SectNameOrErr.takeError()); + DataRefImpl Ref = Sect->getRawDataRefImpl(); StringRef SegmentName = O->getSectionFinalSegmentName(Ref); outs() << SegmentName << ":" << SectName << ":"; @@ -1713,7 +1747,12 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O, } for (const SectionRef &Section : O->sections()) { StringRef SectName; - Section.getName(SectName); + Expected SecNameOrErr = Section.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = Section.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); if ((DumpSegName.empty() || SegName == DumpSegName) && @@ -1809,7 +1848,12 @@ static void DumpInfoPlistSectionContents(StringRef Filename, MachOObjectFile *O) { for (const SectionRef &Section : O->sections()) { StringRef SectName; - Section.getName(SectName); + Expected SecNameOrErr = Section.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = Section.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); if (SegName == "__TEXT" && SectName == "__info_plist") { @@ -1901,12 +1945,16 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF, // the error message. if (Disassemble || IndirectSymbols || !FilterSections.empty() || UnwindInfo) if (Error Err = MachOOF->checkSymbolTable()) - report_error(std::move(Err), ArchiveName, FileName, ArchitectureName); + reportError(std::move(Err), FileName, ArchiveName, ArchitectureName); if (DisassembleAll) { for (const SectionRef &Section : MachOOF->sections()) { StringRef SectName; - Section.getName(SectName); + if (Expected NameOrErr = Section.getName()) + SectName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (SectName.equals("__text")) { DataRefImpl Ref = Section.getRawDataRefImpl(); StringRef SegName = MachOOF->getSectionFinalSegmentName(Ref); @@ -2151,7 +2199,7 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB, outs() << " offset " << OFA.getOffset(); if (OFA.getOffset() > size) outs() << " (past end of file)"; - if (OFA.getOffset() % (1 << OFA.getAlign()) != 0) + if (OFA.getOffset() % (1ull << OFA.getAlign()) != 0) outs() << " (not aligned on it's alignment (2^" << OFA.getAlign() << ")"; outs() << "\n"; outs() << " size " << OFA.getSize(); @@ -2165,12 +2213,14 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB, } static void printArchiveChild(StringRef Filename, const Archive::Child &C, - bool verbose, bool print_offset, + size_t ChildIndex, bool verbose, + bool print_offset, StringRef ArchitectureName = StringRef()) { if (print_offset) outs() << C.getChildOffset() << "\t"; sys::fs::perms Mode = - unwrapOrError(C.getAccessMode(), Filename, C, ArchitectureName); + unwrapOrError(C.getAccessMode(), getFileNameForError(C, ChildIndex), + Filename, ArchitectureName); if (verbose) { // FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG. // But there is nothing in sys::fs::perms for S_IFMT or S_IFREG. @@ -2188,11 +2238,14 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C, outs() << format("0%o ", Mode); } - outs() << format( - "%3d/%-3d %5" PRId64 " ", - unwrapOrError(C.getUID(), Filename, C, ArchitectureName), - unwrapOrError(C.getGID(), Filename, C, ArchitectureName), - unwrapOrError(C.getRawSize(), Filename, C, ArchitectureName)); + outs() << format("%3d/%-3d %5" PRId64 " ", + unwrapOrError(C.getUID(), getFileNameForError(C, ChildIndex), + Filename, ArchitectureName), + unwrapOrError(C.getGID(), getFileNameForError(C, ChildIndex), + Filename, ArchitectureName), + unwrapOrError(C.getRawSize(), + getFileNameForError(C, ChildIndex), Filename, + ArchitectureName)); StringRef RawLastModified = C.getRawLastModified(); if (verbose) { @@ -2215,14 +2268,17 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C, Expected NameOrErr = C.getName(); if (!NameOrErr) { consumeError(NameOrErr.takeError()); - outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName) + outs() << unwrapOrError(C.getRawName(), + getFileNameForError(C, ChildIndex), Filename, + ArchitectureName) << "\n"; } else { StringRef Name = NameOrErr.get(); outs() << Name << "\n"; } } else { - outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName) + outs() << unwrapOrError(C.getRawName(), getFileNameForError(C, ChildIndex), + Filename, ArchitectureName) << "\n"; } } @@ -2231,11 +2287,13 @@ static void printArchiveHeaders(StringRef Filename, Archive *A, bool verbose, bool print_offset, StringRef ArchitectureName = StringRef()) { Error Err = Error::success(); + size_t I = 0; for (const auto &C : A->children(Err, false)) - printArchiveChild(Filename, C, verbose, print_offset, ArchitectureName); + printArchiveChild(Filename, C, I++, verbose, print_offset, + ArchitectureName); if (Err) - report_error(std::move(Err), StringRef(), Filename, ArchitectureName); + reportError(std::move(Err), Filename, "", ArchitectureName); } static bool ValidateArchFlags() { @@ -2267,7 +2325,7 @@ void parseInputMachO(StringRef Filename) { Expected> BinaryOrErr = createBinary(Filename); if (!BinaryOrErr) { if (Error E = isNotObjectErrorInvalidFileType(BinaryOrErr.takeError())) - report_error(std::move(E), Filename); + reportError(std::move(E), Filename); else outs() << Filename << ": is not an object file\n"; return; @@ -2280,11 +2338,13 @@ void parseInputMachO(StringRef Filename) { printArchiveHeaders(Filename, A, !NonVerbose, ArchiveMemberOffsets); Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), Filename, C); + reportError(std::move(E), getFileNameForError(C, I), Filename); continue; } if (MachOObjectFile *O = dyn_cast(&*ChildOrErr.get())) { @@ -2294,7 +2354,7 @@ void parseInputMachO(StringRef Filename) { } } if (Err) - report_error(std::move(Err), Filename); + reportError(std::move(Err), Filename); return; } if (MachOUniversalBinary *UB = dyn_cast(&Bin)) { @@ -2346,7 +2406,7 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, MachOOF, "", ArchitectureName); } else if (Error E = isNotObjectErrorInvalidFileType( ObjOrErr.takeError())) { - report_error(std::move(E), Filename, StringRef(), ArchitectureName); + reportError(std::move(E), "", Filename, ArchitectureName); continue; } else if (Expected> AOrErr = I->getAsArchive()) { @@ -2359,11 +2419,15 @@ void parseInputMachO(MachOUniversalBinary *UB) { printArchiveHeaders(Filename, A.get(), !NonVerbose, ArchiveMemberOffsets, ArchitectureName); Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { - if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), Filename, C, ArchitectureName); + if (Error E = + isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) + reportError(std::move(E), getFileNameForError(C, I), Filename, + ArchitectureName); continue; } if (MachOObjectFile *O = @@ -2371,12 +2435,13 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, O, O->getFileName(), ArchitectureName); } if (Err) - report_error(std::move(Err), Filename); + reportError(std::move(Err), Filename); } else { consumeError(AOrErr.takeError()); - error("Mach-O universal file: " + Filename + " for " + - "architecture " + StringRef(I->getArchFlagName()) + - " is not a Mach-O file or an archive file"); + reportError(Filename, + "Mach-O universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a Mach-O file or an archive file"); } } } @@ -2406,7 +2471,7 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, MachOOF); } else if (Error E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { - report_error(std::move(E), Filename); + reportError(std::move(E), Filename); } else if (Expected> AOrErr = I->getAsArchive()) { std::unique_ptr &A = *AOrErr; @@ -2415,12 +2480,14 @@ void parseInputMachO(MachOUniversalBinary *UB) { printArchiveHeaders(Filename, A.get(), !NonVerbose, ArchiveMemberOffsets); Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), Filename, C); + reportError(std::move(E), getFileNameForError(C, I), Filename); continue; } if (MachOObjectFile *O = @@ -2428,12 +2495,12 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, O, O->getFileName()); } if (Err) - report_error(std::move(Err), Filename); + reportError(std::move(Err), Filename); } else { consumeError(AOrErr.takeError()); - error("Mach-O universal file: " + Filename + " for architecture " + - StringRef(I->getArchFlagName()) + - " is not a Mach-O file or an archive file"); + reportError(Filename, "Mach-O universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a Mach-O file or an archive file"); } return; } @@ -2455,7 +2522,7 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, MachOOF, "", ArchitectureName); } else if (Error E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { - report_error(std::move(E), StringRef(), Filename, ArchitectureName); + reportError(std::move(E), Filename, "", ArchitectureName); } else if (Expected> AOrErr = I->getAsArchive()) { std::unique_ptr &A = *AOrErr; outs() << "Archive : " << Filename; @@ -2466,11 +2533,14 @@ void parseInputMachO(MachOUniversalBinary *UB) { printArchiveHeaders(Filename, A.get(), !NonVerbose, ArchiveMemberOffsets, ArchitectureName); Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), Filename, C, ArchitectureName); + reportError(std::move(E), getFileNameForError(C, I), Filename, + ArchitectureName); continue; } if (MachOObjectFile *O = @@ -2481,12 +2551,12 @@ void parseInputMachO(MachOUniversalBinary *UB) { } } if (Err) - report_error(std::move(Err), Filename); + reportError(std::move(Err), Filename); } else { consumeError(AOrErr.takeError()); - error("Mach-O universal file: " + Filename + " for architecture " + - StringRef(I->getArchFlagName()) + - " is not a Mach-O file or an archive file"); + reportError(Filename, "Mach-O universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a Mach-O file or an archive file"); } } } @@ -3083,7 +3153,7 @@ static void method_reference(struct DisassembleInfo *info, if (strcmp(*ReferenceName, "_objc_msgSend") == 0) { if (info->selector_name != nullptr) { if (info->class_name != nullptr) { - info->method = llvm::make_unique( + info->method = std::make_unique( 5 + strlen(info->class_name) + strlen(info->selector_name)); char *method = info->method.get(); if (method != nullptr) { @@ -3097,7 +3167,7 @@ static void method_reference(struct DisassembleInfo *info, } } else { info->method = - llvm::make_unique(9 + strlen(info->selector_name)); + std::make_unique(9 + strlen(info->selector_name)); char *method = info->method.get(); if (method != nullptr) { if (Arch == Triple::x86_64) @@ -3117,7 +3187,7 @@ static void method_reference(struct DisassembleInfo *info, } else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) { if (info->selector_name != nullptr) { info->method = - llvm::make_unique(17 + strlen(info->selector_name)); + std::make_unique(17 + strlen(info->selector_name)); char *method = info->method.get(); if (method != nullptr) { if (Arch == Triple::x86_64) @@ -3217,7 +3287,13 @@ static const char *get_pointer_64(uint64_t Address, uint32_t &offset, continue; if (objc_only) { StringRef SectName; - ((*(info->Sections))[SectIdx]).getName(SectName); + Expected SecNameOrErr = + ((*(info->Sections))[SectIdx]).getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = ((*(info->Sections))[SectIdx]).getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); if (SegName != "__OBJC" && SectName != "__cstring") @@ -4009,7 +4085,12 @@ static const SectionRef get_section(MachOObjectFile *O, const char *segname, const char *sectname) { for (const SectionRef &Section : O->sections()) { StringRef SectName; - Section.getName(SectName); + Expected SecNameOrErr = Section.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = Section.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); if (SegName == segname && SectName == sectname) @@ -4026,7 +4107,12 @@ walk_pointer_list_64(const char *listname, const SectionRef S, return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -4075,8 +4161,7 @@ walk_pointer_list_32(const char *listname, const SectionRef S, if (S == SectionRef()) return; - StringRef SectName; - S.getName(SectName); + StringRef SectName = unwrapOrError(S.getName(), O->getFileName()); DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5750,7 +5835,12 @@ static void print_message_refs64(SectionRef S, struct DisassembleInfo *info) { return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5813,7 +5903,12 @@ static void print_message_refs32(SectionRef S, struct DisassembleInfo *info) { return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5859,7 +5954,12 @@ static void print_image_info64(SectionRef S, struct DisassembleInfo *info) { return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5916,7 +6016,12 @@ static void print_image_info32(SectionRef S, struct DisassembleInfo *info) { return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5966,7 +6071,12 @@ static void print_image_info(SectionRef S, struct DisassembleInfo *info) { const char *r; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -6001,11 +6111,8 @@ static void printObjc2_64bit_MetaData(MachOObjectFile *O, bool verbose) { CreateSymbolAddressMap(O, &AddrMap); std::vector Sections; - for (const SectionRef &Section : O->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : O->sections()) Sections.push_back(Section); - } struct DisassembleInfo info(O, &AddrMap, &Sections, verbose); @@ -6086,11 +6193,8 @@ static void printObjc2_32bit_MetaData(MachOObjectFile *O, bool verbose) { CreateSymbolAddressMap(O, &AddrMap); std::vector Sections; - for (const SectionRef &Section : O->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : O->sections()) Sections.push_back(Section); - } struct DisassembleInfo info(O, &AddrMap, &Sections, verbose); @@ -6184,11 +6288,8 @@ static bool printObjc1_32bit_MetaData(MachOObjectFile *O, bool verbose) { CreateSymbolAddressMap(O, &AddrMap); std::vector Sections; - for (const SectionRef &Section : O->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : O->sections()) Sections.push_back(Section); - } struct DisassembleInfo info(O, &AddrMap, &Sections, verbose); @@ -6345,11 +6446,8 @@ static void DumpProtocolSection(MachOObjectFile *O, const char *sect, CreateSymbolAddressMap(O, &AddrMap); std::vector Sections; - for (const SectionRef &Section : O->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : O->sections()) Sections.push_back(Section); - } struct DisassembleInfo info(O, &AddrMap, &Sections, true); @@ -7203,7 +7301,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, std::vector Sections; std::vector Symbols; SmallVector FoundFns; - uint64_t BaseSegmentAddress; + uint64_t BaseSegmentAddress = 0; getSectionsAndSymbols(MachOOF, Sections, Symbols, FoundFns, BaseSegmentAddress); @@ -7242,10 +7340,24 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, // A separate DSym file path was specified, parse it as a macho file, // get the sections and supply it to the section name parsing machinery. if (!DSYMFile.empty()) { + std::string DSYMPath(DSYMFile); + + // If DSYMPath is a .dSYM directory, append the Mach-O file. + if (llvm::sys::fs::is_directory(DSYMPath) && + llvm::sys::path::extension(DSYMPath) == ".dSYM") { + SmallString<128> ShortName(llvm::sys::path::filename(DSYMPath)); + llvm::sys::path::replace_extension(ShortName, ""); + SmallString<1024> FullPath(DSYMPath); + llvm::sys::path::append(FullPath, "Contents", "Resources", "DWARF", + ShortName); + DSYMPath = FullPath.str(); + } + + // Load the file. ErrorOr> BufOrErr = - MemoryBuffer::getFileOrSTDIN(DSYMFile); + MemoryBuffer::getFileOrSTDIN(DSYMPath); if (std::error_code EC = BufOrErr.getError()) { - report_error(errorCodeToError(EC), DSYMFile); + reportError(errorCodeToError(EC), DSYMPath); return; } @@ -7255,13 +7367,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, Expected> BinaryOrErr = createBinary(DSYMBuf.get()->getMemBufferRef()); if (!BinaryOrErr) { - report_error(BinaryOrErr.takeError(), DSYMFile); + reportError(BinaryOrErr.takeError(), DSYMPath); return; } - // We need to keep the Binary elive with the buffer + // We need to keep the Binary alive with the buffer DSYMBinary = std::move(BinaryOrErr.get()); - if (ObjectFile *O = dyn_cast(DSYMBinary.get())) { // this is a Mach-O object file, use it if (MachOObjectFile *MachDSYM = dyn_cast(&*O)) { @@ -7269,7 +7380,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, } else { WithColor::error(errs(), "llvm-objdump") - << DSYMFile << " is not a Mach-O file type.\n"; + << DSYMPath << " is not a Mach-O file type.\n"; return; } } @@ -7289,19 +7400,19 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, Triple T = MachOObjectFile::getArchTriple(CPUType, CPUSubType, nullptr, &ArchFlag); Expected> MachDSYM = - UB->getObjectForArch(ArchFlag); + UB->getMachOObjectForArch(ArchFlag); if (!MachDSYM) { - report_error(MachDSYM.takeError(), DSYMFile); + reportError(MachDSYM.takeError(), DSYMPath); return; } - // We need to keep the Binary elive with the buffer + // We need to keep the Binary alive with the buffer DbgObj = &*MachDSYM.get(); DSYMBinary = std::move(*MachDSYM); } else { WithColor::error(errs(), "llvm-objdump") - << DSYMFile << " is not a Mach-O or Universal file type.\n"; + << DSYMPath << " is not a Mach-O or Universal file type.\n"; return; } } @@ -7314,8 +7425,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, outs() << "(" << DisSegName << "," << DisSectName << ") section\n"; for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { - StringRef SectName; - if (Sections[SectIdx].getName(SectName) || SectName != DisSectName) + Expected SecNameOrErr = Sections[SectIdx].getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); + continue; + } + if (*SecNameOrErr != DisSectName) continue; DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl(); @@ -7496,24 +7611,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, if (!NoShowRawInsn || Arch == Triple::arm) outs() << "\t"; - // Check the data in code table here to see if this is data not an - // instruction to be disassembled. - DiceTable Dice; - Dice.push_back(std::make_pair(PC, DiceRef())); - dice_table_iterator DTI = - std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(), - compareDiceTableEntries); - if (DTI != Dices.end()) { - uint16_t Length; - DTI->second.getLength(Length); - uint16_t Kind; - DTI->second.getKind(Kind); - Size = DumpDataInCode(Bytes.data() + Index, Length, Kind); - if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) && - (PC == (DTI->first + Length - 1)) && (Length & 1)) - Size++; + if (DumpAndSkipDataInCode(PC, Bytes.data() + Index, Dices, Size)) continue; - } SmallVector AnnotationsBytes; raw_svector_ostream Annotations(AnnotationsBytes); @@ -7588,6 +7687,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, MCInst Inst; uint64_t PC = SectAddress + Index; + + if (DumpAndSkipDataInCode(PC, Bytes.data() + Index, Dices, InstSize)) + continue; + SmallVector AnnotationsBytes; raw_svector_ostream Annotations(AnnotationsBytes); if (DisAsm->getInstruction(Inst, InstSize, Bytes.slice(Index), PC, @@ -7724,8 +7827,12 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj, auto Sym = Symbols.upper_bound(Addr); if (Sym == Symbols.begin()) { // The first symbol in the object is after this reference, the best we can - // do is section-relative notation. - RelocSection.getName(Name); + // do is section-relative notation. + if (Expected NameOrErr = RelocSection.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + Addend = Addr - SectionAddr; return; } @@ -7744,7 +7851,11 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj, // There is a symbol before this reference, but it's in a different // section. Probably not helpful to mention it, so use the section name. - RelocSection.getName(Name); + if (Expected NameOrErr = RelocSection.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + Addend = Addr - SectionAddr; } @@ -8109,7 +8220,11 @@ void printMachOUnwindInfo(const MachOObjectFile *Obj) { for (const SectionRef &Section : Obj->sections()) { StringRef SectName; - Section.getName(SectName); + if (Expected NameOrErr = Section.getName()) + SectName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (SectName == "__compact_unwind") printMachOCompactUnwindSection(Obj, Symbols, Section); else if (SectName == "__unwind_info") @@ -10191,7 +10306,7 @@ void printMachOExportsTrie(const object::MachOObjectFile *Obj) { outs() << "\n"; } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } //===----------------------------------------------------------------------===// @@ -10212,7 +10327,7 @@ void printMachORebaseTable(object::MachOObjectFile *Obj) { Address, Entry.typeName().str().c_str()); } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) { @@ -10264,7 +10379,7 @@ void printMachOBindTable(object::MachOObjectFile *Obj) { << Entry.symbolName() << Attr << "\n"; } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } //===----------------------------------------------------------------------===// @@ -10289,7 +10404,7 @@ void printMachOLazyBindTable(object::MachOObjectFile *Obj) { << Entry.symbolName() << "\n"; } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } //===----------------------------------------------------------------------===// @@ -10321,7 +10436,7 @@ void printMachOWeakBindTable(object::MachOObjectFile *Obj) { << "\n"; } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } // get_dyld_bind_info_symbolname() is used for disassembly and passed an @@ -10331,7 +10446,7 @@ void printMachOWeakBindTable(object::MachOObjectFile *Obj) { static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue, struct DisassembleInfo *info) { if (info->bindtable == nullptr) { - info->bindtable = llvm::make_unique(); + info->bindtable = std::make_unique(); Error Err = Error::success(); for (const object::MachOBindEntry &Entry : info->O->bindTable(Err)) { uint64_t Address = Entry.address(); @@ -10340,7 +10455,7 @@ static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue, (*info->bindtable)[Address] = name; } if (Err) - report_error(std::move(Err), info->O->getFileName()); + reportError(std::move(Err), info->O->getFileName()); } auto name = info->bindtable->lookup(ReferenceValue); return !name.empty() ? name.data() : nullptr; diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 58981203c59e..34a44b3b7fa9 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/Host.h" #include "llvm/Support/InitLLVM.h" @@ -341,78 +342,84 @@ static StringRef ToolName; typedef std::vector> SectionSymbolsTy; -static bool shouldKeep(object::SectionRef S) { +namespace { +struct FilterResult { + // True if the section should not be skipped. + bool Keep; + + // True if the index counter should be incremented, even if the section should + // be skipped. For example, sections may be skipped if they are not included + // in the --section flag, but we still want those to count toward the section + // count. + bool IncrementIndex; +}; +} // namespace + +static FilterResult checkSectionFilter(object::SectionRef S) { if (FilterSections.empty()) - return true; - StringRef SecName; - std::error_code error = S.getName(SecName); - if (error) - return false; + return {/*Keep=*/true, /*IncrementIndex=*/true}; + + Expected SecNameOrErr = S.getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); + return {/*Keep=*/false, /*IncrementIndex=*/false}; + } + StringRef SecName = *SecNameOrErr; + // StringSet does not allow empty key so avoid adding sections with // no name (such as the section with index 0) here. if (!SecName.empty()) FoundSectionSet.insert(SecName); - return is_contained(FilterSections, SecName); -} -SectionFilter ToolSectionFilter(object::ObjectFile const &O) { - return SectionFilter([](object::SectionRef S) { return shouldKeep(S); }, O); -} - -void error(std::error_code EC) { - if (!EC) - return; - WithColor::error(errs(), ToolName) - << "reading file: " << EC.message() << ".\n"; - errs().flush(); - exit(1); -} - -void error(Error E) { - if (!E) - return; - WithColor::error(errs(), ToolName) << toString(std::move(E)); - exit(1); + // Only show the section if it's in the FilterSections list, but always + // increment so the indexing is stable. + return {/*Keep=*/is_contained(FilterSections, SecName), + /*IncrementIndex=*/true}; } -LLVM_ATTRIBUTE_NORETURN void error(Twine Message) { - WithColor::error(errs(), ToolName) << Message << ".\n"; - errs().flush(); - exit(1); +SectionFilter ToolSectionFilter(object::ObjectFile const &O, uint64_t *Idx) { + // Start at UINT64_MAX so that the first index returned after an increment is + // zero (after the unsigned wrap). + if (Idx) + *Idx = UINT64_MAX; + return SectionFilter( + [Idx](object::SectionRef S) { + FilterResult Result = checkSectionFilter(S); + if (Idx != nullptr && Result.IncrementIndex) + *Idx += 1; + return Result.Keep; + }, + O); } -void warn(StringRef Message) { - WithColor::warning(errs(), ToolName) << Message << ".\n"; - errs().flush(); +std::string getFileNameForError(const object::Archive::Child &C, + unsigned Index) { + Expected NameOrErr = C.getName(); + if (NameOrErr) + return NameOrErr.get(); + // If we have an error getting the name then we print the index of the archive + // member. Since we are already in an error state, we just ignore this error. + consumeError(NameOrErr.takeError()); + return ""; } -static void warn(Twine Message) { +void reportWarning(Twine Message, StringRef File) { // Output order between errs() and outs() matters especially for archive // files where the output is per member object. outs().flush(); - WithColor::warning(errs(), ToolName) << Message << "\n"; + WithColor::warning(errs(), ToolName) + << "'" << File << "': " << Message << "\n"; errs().flush(); } -LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message) { - WithColor::error(errs(), ToolName) - << "'" << File << "': " << Message << ".\n"; - exit(1); -} - -LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File) { - assert(E); - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(std::move(E), OS); - OS.flush(); - WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf; +LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Twine Message) { + WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n"; exit(1); } -LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName, - StringRef FileName, - StringRef ArchitectureName) { +LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName, + StringRef ArchiveName, + StringRef ArchitectureName) { assert(E); WithColor::error(errs(), ToolName); if (ArchiveName != "") @@ -429,18 +436,13 @@ LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName, exit(1); } -LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName, - const object::Archive::Child &C, - StringRef ArchitectureName) { - Expected NameOrErr = C.getName(); - // TODO: if we have a error getting the name then it would be nice to print - // the index of which archive member this is and or its offset in the - // archive instead of "???" as the name. - if (!NameOrErr) { - consumeError(NameOrErr.takeError()); - report_error(std::move(E), ArchiveName, "???", ArchitectureName); - } else - report_error(std::move(E), ArchiveName, NameOrErr.get(), ArchitectureName); +static void reportCmdLineWarning(Twine Message) { + WithColor::warning(errs(), ToolName) << Message << "\n"; +} + +LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(Twine Message) { + WithColor::error(errs(), ToolName) << Message << "\n"; + exit(1); } static void warnOnNoMatchForSections() { @@ -455,37 +457,29 @@ static void warnOnNoMatchForSections() { // Warn only if no section in FilterSections is matched. for (StringRef S : MissingSections) - warn("section '" + S + "' mentioned in a -j/--section option, but not " - "found in any input file"); + reportCmdLineWarning("section '" + S + + "' mentioned in a -j/--section option, but not " + "found in any input file"); } -static const Target *getTarget(const ObjectFile *Obj = nullptr) { +static const Target *getTarget(const ObjectFile *Obj) { // Figure out the target triple. Triple TheTriple("unknown-unknown-unknown"); if (TripleName.empty()) { - if (Obj) - TheTriple = Obj->makeTriple(); + TheTriple = Obj->makeTriple(); } else { TheTriple.setTriple(Triple::normalize(TripleName)); - - // Use the triple, but also try to combine with ARM build attributes. - if (Obj) { - auto Arch = Obj->getArch(); - if (Arch == Triple::arm || Arch == Triple::armeb) - Obj->setARMSubArch(TheTriple); - } + auto Arch = Obj->getArch(); + if (Arch == Triple::arm || Arch == Triple::armeb) + Obj->setARMSubArch(TheTriple); } // Get the target specific parser. std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, Error); - if (!TheTarget) { - if (Obj) - report_error(Obj->getFileName(), "can't find target: " + Error); - else - error("can't find target: " + Error); - } + if (!TheTarget) + reportError(Obj->getFileName(), "can't find target: " + Error); // Update the triple name and return the found target. TripleName = TheTriple.getTriple(); @@ -548,17 +542,22 @@ protected: DILineInfo OldLineInfo; const ObjectFile *Obj = nullptr; std::unique_ptr Symbolizer; - // File name to file contents of source + // File name to file contents of source. std::unordered_map> SourceCache; - // Mark the line endings of the cached source + // Mark the line endings of the cached source. std::unordered_map> LineCache; + // Keep track of missing sources. + StringSet<> MissingSources; + // Only emit 'no debug info' warning once. + bool WarnedNoDebugInfo; private: bool cacheSource(const DILineInfo& LineInfoFile); public: SourcePrinter() = default; - SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) { + SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) + : Obj(Obj), WarnedNoDebugInfo(false) { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::None; SymbolizerOpts.Demangle = false; @@ -568,6 +567,7 @@ public: virtual ~SourcePrinter() = default; virtual void printSourceLine(raw_ostream &OS, object::SectionedAddress Address, + StringRef ObjectFilename, StringRef Delimiter = "; "); }; @@ -577,8 +577,12 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { Buffer = MemoryBuffer::getMemBuffer(*LineInfo.Source); } else { auto BufferOrError = MemoryBuffer::getFile(LineInfo.FileName); - if (!BufferOrError) + if (!BufferOrError) { + if (MissingSources.insert(LineInfo.FileName).second) + reportWarning("failed to find source " + LineInfo.FileName, + Obj->getFileName()); return false; + } Buffer = std::move(*BufferOrError); } // Chomp the file to get lines @@ -599,20 +603,33 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { void SourcePrinter::printSourceLine(raw_ostream &OS, object::SectionedAddress Address, + StringRef ObjectFilename, StringRef Delimiter) { if (!Symbolizer) return; DILineInfo LineInfo = DILineInfo(); auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address); + std::string ErrorMessage; if (!ExpectedLineInfo) - consumeError(ExpectedLineInfo.takeError()); + ErrorMessage = toString(ExpectedLineInfo.takeError()); else LineInfo = *ExpectedLineInfo; - if ((LineInfo.FileName == "") || LineInfo.Line == 0 || - ((OldLineInfo.Line == LineInfo.Line) && - (OldLineInfo.FileName == LineInfo.FileName))) + if (LineInfo.FileName == DILineInfo::BadString) { + if (!WarnedNoDebugInfo) { + std::string Warning = + "failed to parse debug information for " + ObjectFilename.str(); + if (!ErrorMessage.empty()) + Warning += ": " + ErrorMessage; + reportWarning(Warning, ObjectFilename); + WarnedNoDebugInfo = true; + } + return; + } + + if (LineInfo.Line == 0 || ((OldLineInfo.Line == LineInfo.Line) && + (OldLineInfo.FileName == LineInfo.FileName))) return; if (PrintLines) @@ -623,8 +640,14 @@ void SourcePrinter::printSourceLine(raw_ostream &OS, return; auto LineBuffer = LineCache.find(LineInfo.FileName); if (LineBuffer != LineCache.end()) { - if (LineInfo.Line > LineBuffer->second.size()) + if (LineInfo.Line > LineBuffer->second.size()) { + reportWarning( + formatv( + "debug info line number {0} exceeds the number of lines in {1}", + LineInfo.Line, LineInfo.FileName), + ObjectFilename); return; + } // Vector begins at 0, line numbers are non-zero OS << Delimiter << LineBuffer->second[LineInfo.Line - 1] << '\n'; } @@ -646,13 +669,14 @@ static bool hasMappingSymbols(const ObjectFile *Obj) { return isArmElf(Obj) || isAArch64Elf(Obj); } -static void printRelocation(const RelocationRef &Rel, uint64_t Address, - bool Is64Bits) { +static void printRelocation(StringRef FileName, const RelocationRef &Rel, + uint64_t Address, bool Is64Bits) { StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": "; SmallString<16> Name; SmallString<32> Val; Rel.getTypeName(Name); - error(getRelocationValueString(Rel, Val)); + if (Error E = getRelocationValueString(Rel, Val)) + reportError(std::move(E), FileName); outs() << format(Fmt.data(), Address) << Name << "\t" << Val << "\n"; } @@ -663,29 +687,25 @@ public: ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, - SourcePrinter *SP, + SourcePrinter *SP, StringRef ObjectFilename, std::vector *Rels = nullptr) { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address); + SP->printSourceLine(OS, Address, ObjectFilename); - { - formatted_raw_ostream FOS(OS); - if (!NoLeadingAddr) - FOS << format("%8" PRIx64 ":", Address.Address); - if (!NoShowRawInsn) { - FOS << ' '; - dumpBytes(Bytes, FOS); - } - FOS.flush(); - // The output of printInst starts with a tab. Print some spaces so that - // the tab has 1 column and advances to the target tab stop. - unsigned TabStop = NoShowRawInsn ? 16 : 40; - unsigned Column = FOS.getColumn(); - FOS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8); - - // The dtor calls flush() to ensure the indent comes before printInst(). + size_t Start = OS.tell(); + if (!NoLeadingAddr) + OS << format("%8" PRIx64 ":", Address.Address); + if (!NoShowRawInsn) { + OS << ' '; + dumpBytes(Bytes, OS); } + // The output of printInst starts with a tab. Print some spaces so that + // the tab has 1 column and advances to the target tab stop. + unsigned TabStop = NoShowRawInsn ? 16 : 40; + unsigned Column = OS.tell() - Start; + OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8); + if (MI) IP.printInst(MI, OS, "", STI); else @@ -711,9 +731,10 @@ public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + StringRef ObjectFilename, std::vector *Rels) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ""); + SP->printSourceLine(OS, Address, ObjectFilename, ""); if (!MI) { printLead(Bytes, Address.Address, OS); OS << " "; @@ -739,7 +760,7 @@ public: auto PrintReloc = [&]() -> void { while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) { if (RelCur->getOffset() == Address.Address) { - printRelocation(*RelCur, Address.Address, false); + printRelocation(ObjectFilename, *RelCur, Address.Address, false); return; } ++RelCur; @@ -750,7 +771,7 @@ public: OS << Separator; Separator = "\n"; if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ""); + SP->printSourceLine(OS, Address, ObjectFilename, ""); printLead(Bytes, Address.Address, OS); OS << Preamble; Preamble = " "; @@ -780,9 +801,10 @@ public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + StringRef ObjectFilename, std::vector *Rels) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address); + SP->printSourceLine(OS, Address, ObjectFilename); if (MI) { SmallString<40> InstStr; @@ -831,9 +853,10 @@ public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + StringRef ObjectFilename, std::vector *Rels) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address); + SP->printSourceLine(OS, Address, ObjectFilename); if (!NoLeadingAddr) OS << format("%8" PRId64 ":", Address.Address / 8); if (!NoShowRawInsn) { @@ -924,10 +947,12 @@ static void addPltEntries(const ObjectFile *Obj, StringSaver &Saver) { Optional Plt = None; for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - if (Section.getName(Name)) + Expected SecNameOrErr = Section.getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); continue; - if (Name == ".plt") + } + if (*SecNameOrErr == ".plt") Plt = Section; } if (!Plt) @@ -968,9 +993,18 @@ static size_t countSkippableZeroBytes(ArrayRef Buf) { static std::map> getRelocsMap(object::ObjectFile const &Obj) { std::map> Ret; + uint64_t I = (uint64_t)-1; for (SectionRef Sec : Obj.sections()) { - section_iterator Relocated = Sec.getRelocatedSection(); - if (Relocated == Obj.section_end() || !shouldKeep(*Relocated)) + ++I; + Expected RelocatedOrErr = Sec.getRelocatedSection(); + if (!RelocatedOrErr) + reportError(Obj.getFileName(), + "section (" + Twine(I) + + "): failed to get a relocated section: " + + toString(RelocatedOrErr.takeError())); + + section_iterator Relocated = *RelocatedOrErr; + if (Relocated == Obj.section_end() || !checkSectionFilter(*Relocated).Keep) continue; std::vector &V = Ret[*Relocated]; for (const RelocationRef &R : Sec.relocations()) @@ -1137,11 +1171,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (const auto *COFFObj = dyn_cast(Obj)) { for (const auto &ExportEntry : COFFObj->export_directories()) { StringRef Name; - error(ExportEntry.getSymbolName(Name)); + if (std::error_code EC = ExportEntry.getSymbolName(Name)) + reportError(errorCodeToError(EC), Obj->getFileName()); if (Name.empty()) continue; + uint32_t RVA; - error(ExportEntry.getExportRVA(RVA)); + if (std::error_code EC = ExportEntry.getExportRVA(RVA)) + reportError(errorCodeToError(EC), Obj->getFileName()); uint64_t VA = COFFObj->getImageBase() + RVA; auto Sec = partition_point( @@ -1210,9 +1247,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, DataRefImpl DR = Section.getRawDataRefImpl(); SegmentName = MachO->getSectionFinalSegmentName(DR); } - StringRef SectionName; - error(Section.getName(SectionName)); + StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName()); // If the section has no symbol at the start, just insert a dummy one. if (Symbols.empty() || std::get<0>(Symbols[0]) != 0) { Symbols.insert( @@ -1381,10 +1417,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (Size == 0) Size = 1; - PIP.printInst( - *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), - {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(), - "", *STI, &SP, &Rels); + PIP.printInst(*IP, Disassembled ? &Inst : nullptr, + Bytes.slice(Index, Size), + {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, + outs(), "", *STI, &SP, Obj->getFileName(), &Rels); outs() << CommentStream.str(); Comments.clear(); @@ -1470,7 +1506,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, Offset += AdjustVMA; } - printRelocation(*RelCur, SectionAddr + Offset, Is64Bits); + printRelocation(Obj->getFileName(), *RelCur, SectionAddr + Offset, + Is64Bits); ++RelCur; } } @@ -1482,7 +1519,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, StringSet<> MissingDisasmFuncsSet = set_difference(DisasmFuncsSet, FoundDisasmFuncsSet); for (StringRef MissingDisasmFunc : MissingDisasmFuncsSet.keys()) - warn("failed to disassemble missing function " + MissingDisasmFunc); + reportWarning("failed to disassemble missing function " + MissingDisasmFunc, + FileName); } static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { @@ -1497,24 +1535,24 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::unique_ptr MRI( TheTarget->createMCRegInfo(TripleName)); if (!MRI) - report_error(Obj->getFileName(), - "no register info for target " + TripleName); + reportError(Obj->getFileName(), + "no register info for target " + TripleName); // Set up disassembler. std::unique_ptr AsmInfo( TheTarget->createMCAsmInfo(*MRI, TripleName)); if (!AsmInfo) - report_error(Obj->getFileName(), - "no assembly info for target " + TripleName); + reportError(Obj->getFileName(), + "no assembly info for target " + TripleName); std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); if (!STI) - report_error(Obj->getFileName(), - "no subtarget info for target " + TripleName); + reportError(Obj->getFileName(), + "no subtarget info for target " + TripleName); std::unique_ptr MII(TheTarget->createMCInstrInfo()); if (!MII) - report_error(Obj->getFileName(), - "no instruction info for target " + TripleName); + reportError(Obj->getFileName(), + "no instruction info for target " + TripleName); MCObjectFileInfo MOFI; MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI); // FIXME: for now initialize MCObjectFileInfo with default values @@ -1523,8 +1561,7 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::unique_ptr DisAsm( TheTarget->createMCDisassembler(*STI, Ctx)); if (!DisAsm) - report_error(Obj->getFileName(), - "no disassembler for target " + TripleName); + reportError(Obj->getFileName(), "no disassembler for target " + TripleName); // If we have an ARM object file, we need a second disassembler, because // ARM CPUs have two different instruction sets: ARM mode, and Thumb mode. @@ -1549,8 +1586,8 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::unique_ptr IP(TheTarget->createMCInstPrinter( Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); if (!IP) - report_error(Obj->getFileName(), - "no instruction printer for target " + TripleName); + reportError(Obj->getFileName(), + "no instruction printer for target " + TripleName); IP->setPrintImmHex(PrintImmHex); PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); @@ -1558,7 +1595,8 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { for (StringRef Opt : DisassemblerOptions) if (!IP->applyTargetSpecificCLOption(Opt)) - error("Unrecognized disassembler option: " + Opt); + reportError(Obj->getFileName(), + "Unrecognized disassembler option: " + Opt); disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(), MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP, @@ -1577,16 +1615,21 @@ void printRelocations(const ObjectFile *Obj) { // sections. Usually, there is an only one relocation section for // each relocated section. MapVector> SecToRelSec; - for (const SectionRef &Section : ToolSectionFilter(*Obj)) { + uint64_t Ndx; + for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) { if (Section.relocation_begin() == Section.relocation_end()) continue; - const SectionRef TargetSec = *Section.getRelocatedSection(); - SecToRelSec[TargetSec].push_back(Section); + Expected SecOrErr = Section.getRelocatedSection(); + if (!SecOrErr) + reportError(Obj->getFileName(), + "section (" + Twine(Ndx) + + "): unable to get a relocation target: " + + toString(SecOrErr.takeError())); + SecToRelSec[**SecOrErr].push_back(Section); } for (std::pair> &P : SecToRelSec) { - StringRef SecName; - error(P.first.getName(SecName)); + StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName()); outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n"; for (SectionRef Section : P.second) { @@ -1597,7 +1640,9 @@ void printRelocations(const ObjectFile *Obj) { if (Address < StartAddress || Address > StopAddress || getHidden(Reloc)) continue; Reloc.getTypeName(RelocName); - error(getRelocationValueString(Reloc, ValueStr)); + if (Error E = getRelocationValueString(Reloc, ValueStr)) + reportError(std::move(E), Obj->getFileName()); + outs() << format(Fmt.data(), Address) << " " << RelocName << " " << ValueStr << "\n"; } @@ -1613,7 +1658,7 @@ void printDynamicRelocations(const ObjectFile *Obj) { const auto *Elf = dyn_cast(Obj); if (!Elf || Elf->getEType() != ELF::ET_DYN) { - error("not a dynamic object"); + reportError(Obj->getFileName(), "not a dynamic object"); return; } @@ -1629,7 +1674,8 @@ void printDynamicRelocations(const ObjectFile *Obj) { SmallString<32> RelocName; SmallString<32> ValueStr; Reloc.getTypeName(RelocName); - error(getRelocationValueString(Reloc, ValueStr)); + if (Error E = getRelocationValueString(Reloc, ValueStr)) + reportError(std::move(E), Obj->getFileName()); outs() << format(Fmt.data(), Address) << " " << RelocName << " " << ValueStr << "\n"; } @@ -1647,47 +1693,64 @@ static bool shouldDisplayLMA(const ObjectFile *Obj) { return ShowLMA; } +static size_t getMaxSectionNameWidth(const ObjectFile *Obj) { + // Default column width for names is 13 even if no names are that long. + size_t MaxWidth = 13; + for (const SectionRef &Section : ToolSectionFilter(*Obj)) { + StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); + MaxWidth = std::max(MaxWidth, Name.size()); + } + return MaxWidth; +} + void printSectionHeaders(const ObjectFile *Obj) { + size_t NameWidth = getMaxSectionNameWidth(Obj); + size_t AddressWidth = 2 * Obj->getBytesInAddress(); bool HasLMAColumn = shouldDisplayLMA(Obj); if (HasLMAColumn) outs() << "Sections:\n" - "Idx Name Size VMA LMA " - "Type\n"; + "Idx " + << left_justify("Name", NameWidth) << " Size " + << left_justify("VMA", AddressWidth) << " " + << left_justify("LMA", AddressWidth) << " Type\n"; else outs() << "Sections:\n" - "Idx Name Size VMA Type\n"; + "Idx " + << left_justify("Name", NameWidth) << " Size " + << left_justify("VMA", AddressWidth) << " Type\n"; - for (const SectionRef &Section : ToolSectionFilter(*Obj)) { - StringRef Name; - error(Section.getName(Name)); + uint64_t Idx; + for (const SectionRef &Section : ToolSectionFilter(*Obj, &Idx)) { + StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); uint64_t VMA = Section.getAddress(); if (shouldAdjustVA(Section)) VMA += AdjustVMA; uint64_t Size = Section.getSize(); - bool Text = Section.isText(); - bool Data = Section.isData(); - bool BSS = Section.isBSS(); - std::string Type = (std::string(Text ? "TEXT " : "") + - (Data ? "DATA " : "") + (BSS ? "BSS" : "")); + + std::string Type = Section.isText() ? "TEXT" : ""; + if (Section.isData()) + Type += Type.empty() ? "DATA" : " DATA"; + if (Section.isBSS()) + Type += Type.empty() ? "BSS" : " BSS"; if (HasLMAColumn) - outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %016" PRIx64 - " %s\n", - (unsigned)Section.getIndex(), Name.str().c_str(), Size, - VMA, getELFSectionLMA(Section), Type.c_str()); + outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth, + Name.str().c_str(), Size) + << format_hex_no_prefix(VMA, AddressWidth) << " " + << format_hex_no_prefix(getELFSectionLMA(Section), AddressWidth) + << " " << Type << "\n"; else - outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %s\n", - (unsigned)Section.getIndex(), Name.str().c_str(), Size, - VMA, Type.c_str()); + outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth, + Name.str().c_str(), Size) + << format_hex_no_prefix(VMA, AddressWidth) << " " << Type << "\n"; } outs() << "\n"; } void printSectionContents(const ObjectFile *Obj) { for (const SectionRef &Section : ToolSectionFilter(*Obj)) { - StringRef Name; - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); uint64_t BaseAddr = Section.getAddress(); uint64_t Size = Section.getSize(); if (!Size) @@ -1741,21 +1804,26 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName, const StringRef FileName = O->getFileName(); for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) { const SymbolRef &Symbol = *I; - uint64_t Address = unwrapOrError(Symbol.getAddress(), ArchiveName, FileName, + uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName, ArchitectureName); if ((Address < StartAddress) || (Address > StopAddress)) continue; - SymbolRef::Type Type = unwrapOrError(Symbol.getType(), ArchiveName, - FileName, ArchitectureName); + SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName, + ArchiveName, ArchitectureName); uint32_t Flags = Symbol.getFlags(); - section_iterator Section = unwrapOrError(Symbol.getSection(), ArchiveName, - FileName, ArchitectureName); + section_iterator Section = unwrapOrError(Symbol.getSection(), FileName, + ArchiveName, ArchitectureName); StringRef Name; - if (Type == SymbolRef::ST_Debug && Section != O->section_end()) - Section->getName(Name); - else - Name = unwrapOrError(Symbol.getName(), ArchiveName, FileName, + if (Type == SymbolRef::ST_Debug && Section != O->section_end()) { + if (Expected NameOrErr = Section->getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + + } else { + Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName, ArchitectureName); + } bool Global = Flags & SymbolRef::SF_Global; bool Weak = Flags & SymbolRef::SF_Weak; @@ -1801,8 +1869,8 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName, StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); outs() << SegmentName << ","; } - StringRef SectionName; - error(Section->getName(SectionName)); + StringRef SectionName = + unwrapOrError(Section->getName(), O->getFileName()); outs() << SectionName; } @@ -1875,7 +1943,11 @@ void printRawClangAST(const ObjectFile *Obj) { Optional ClangASTSection; for (auto Sec : ToolSectionFilter(*Obj)) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == ClangASTSectionName) { ClangASTSection = Sec; break; @@ -1907,7 +1979,11 @@ static void printFaultMaps(const ObjectFile *Obj) { for (auto Sec : ToolSectionFilter(*Obj)) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == FaultMapSectionName) { FaultMapSection = Sec; break; @@ -1946,12 +2022,12 @@ static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) { printMachOLoadCommands(O); return; } - report_error(O->getFileName(), "Invalid/Unsupported object file format"); + reportError(O->getFileName(), "Invalid/Unsupported object file format"); } static void printFileHeaders(const ObjectFile *O) { if (!O->isELF() && !O->isCOFF()) - report_error(O->getFileName(), "Invalid/Unsupported object file format"); + reportError(O->getFileName(), "Invalid/Unsupported object file format"); Triple::ArchType AT = O->getArch(); outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n"; @@ -2010,6 +2086,43 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) { outs() << Name << "\n"; } +// For ELF only now. +static bool shouldWarnForInvalidStartStopAddress(ObjectFile *Obj) { + if (const auto *Elf = dyn_cast(Obj)) { + if (Elf->getEType() != ELF::ET_REL) + return true; + } + return false; +} + +static void checkForInvalidStartStopAddress(ObjectFile *Obj, + uint64_t Start, uint64_t Stop) { + if (!shouldWarnForInvalidStartStopAddress(Obj)) + return; + + for (const SectionRef &Section : Obj->sections()) + if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) { + uint64_t BaseAddr = Section.getAddress(); + uint64_t Size = Section.getSize(); + if ((Start < BaseAddr + Size) && Stop > BaseAddr) + return; + } + + if (StartAddress.getNumOccurrences() == 0) + reportWarning("no section has address less than 0x" + + Twine::utohexstr(Stop) + " specified by --stop-address", + Obj->getFileName()); + else if (StopAddress.getNumOccurrences() == 0) + reportWarning("no section has address greater than or equal to 0x" + + Twine::utohexstr(Start) + " specified by --start-address", + Obj->getFileName()); + else + reportWarning("no section overlaps the range [0x" + + Twine::utohexstr(Start) + ",0x" + Twine::utohexstr(Stop) + + ") specified by --start-address/--stop-address", + Obj->getFileName()); +} + static void dumpObject(ObjectFile *O, const Archive *A = nullptr, const Archive::Child *C = nullptr) { // Avoid other output when using a raw option. @@ -2022,27 +2135,40 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, outs() << ":\tfile format " << O->getFileFormatName() << "\n\n"; } + if (StartAddress.getNumOccurrences() || StopAddress.getNumOccurrences()) + checkForInvalidStartStopAddress(O, StartAddress, StopAddress); + + // Note: the order here matches GNU objdump for compatability. StringRef ArchiveName = A ? A->getFileName() : ""; - if (FileHeaders) - printFileHeaders(O); if (ArchiveHeaders && !MachOOpt && C) printArchiveChild(ArchiveName, *C); - if (Disassemble) - disassembleObject(O, Relocations); + if (FileHeaders) + printFileHeaders(O); + if (PrivateHeaders || FirstPrivateHeader) + printPrivateFileHeaders(O, FirstPrivateHeader); + if (SectionHeaders) + printSectionHeaders(O); + if (SymbolTable) + printSymbolTable(O, ArchiveName); + if (DwarfDumpType != DIDT_Null) { + std::unique_ptr DICtx = DWARFContext::create(*O); + // Dump the complete DWARF structure. + DIDumpOptions DumpOpts; + DumpOpts.DumpType = DwarfDumpType; + DICtx->dump(outs(), DumpOpts); + } if (Relocations && !Disassemble) printRelocations(O); if (DynamicRelocations) printDynamicRelocations(O); - if (SectionHeaders) - printSectionHeaders(O); if (SectionContents) printSectionContents(O); - if (SymbolTable) - printSymbolTable(O, ArchiveName); + if (Disassemble) + disassembleObject(O, Relocations); if (UnwindInfo) printUnwindInfo(O); - if (PrivateHeaders || FirstPrivateHeader) - printPrivateFileHeaders(O, FirstPrivateHeader); + + // Mach-O specific options: if (ExportsTrie) printExportsTrie(O); if (Rebase) @@ -2053,17 +2179,12 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, printLazyBindTable(O); if (WeakBind) printWeakBindTable(O); + + // Other special sections: if (RawClangAST) printRawClangAST(O); if (FaultMapSection) printFaultMaps(O); - if (DwarfDumpType != DIDT_Null) { - std::unique_ptr DICtx = DWARFContext::create(*O); - // Dump the complete DWARF structure. - DIDumpOptions DumpOpts; - DumpOpts.DumpType = DwarfDumpType; - DICtx->dump(outs(), DumpOpts); - } } static void dumpObject(const COFFImportFile *I, const Archive *A, @@ -2086,11 +2207,13 @@ static void dumpObject(const COFFImportFile *I, const Archive *A, /// Dump each object file in \a a; static void dumpArchive(const Archive *A) { Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), A->getFileName(), C); + reportError(std::move(E), getFileNameForError(C, I), A->getFileName()); continue; } if (ObjectFile *O = dyn_cast(&*ChildOrErr.get())) @@ -2098,11 +2221,11 @@ static void dumpArchive(const Archive *A) { else if (COFFImportFile *I = dyn_cast(&*ChildOrErr.get())) dumpObject(I, A, &C); else - report_error(errorCodeToError(object_error::invalid_file_type), - A->getFileName()); + reportError(errorCodeToError(object_error::invalid_file_type), + A->getFileName()); } if (Err) - report_error(std::move(Err), A->getFileName()); + reportError(std::move(Err), A->getFileName()); } /// Open file and figure out how to dump it. @@ -2126,7 +2249,7 @@ static void dumpInput(StringRef file) { else if (MachOUniversalBinary *UB = dyn_cast(&Binary)) parseInputMachO(UB); else - report_error(errorCodeToError(object_error::invalid_file_type), file); + reportError(errorCodeToError(object_error::invalid_file_type), file); } } // namespace llvm @@ -2147,7 +2270,7 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n"); if (StartAddress >= StopAddress) - error("start address should be less than stop address"); + reportCmdLineError("start address should be less than stop address"); ToolName = argv[0]; diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h index e58d4a05c2e6..43ce02ae0bc2 100644 --- a/tools/llvm-objdump/llvm-objdump.h +++ b/tools/llvm-objdump/llvm-objdump.h @@ -31,6 +31,8 @@ extern cl::opt Demangle; typedef std::function FilterPredicate; +/// A filtered iterator for SectionRefs that skips sections based on some given +/// predicate. class SectionFilterIterator { public: SectionFilterIterator(FilterPredicate P, @@ -60,6 +62,8 @@ private: llvm::object::section_iterator End; }; +/// Creates an iterator range of SectionFilterIterators for a given Object and +/// predicate. class SectionFilter { public: SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O) @@ -79,7 +83,15 @@ private: }; // Various helper functions. -SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O); + +/// Creates a SectionFilter with a standard predicate that conditionally skips +/// sections when the --section objdump flag is provided. +/// +/// Idx is an optional output parameter that keeps track of which section index +/// this is. This may be different than the actual section number, as some +/// sections may be filtered (e.g. symbol tables). +SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O, + uint64_t *Idx = nullptr); Error getELFRelocationValueString(const object::ELFObjectFileBase *Obj, const object::RelocationRef &Rel, @@ -96,8 +108,6 @@ Error getMachORelocationValueString(const object::MachOObjectFile *Obj, uint64_t getELFSectionLMA(const object::ELFSectionRef& Sec); -void error(std::error_code ec); -void error(Error E); bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B); void parseInputMachO(StringRef Filename); void parseInputMachO(object::MachOUniversalBinary *UB); @@ -129,24 +139,22 @@ void printSectionHeaders(const object::ObjectFile *O); void printSectionContents(const object::ObjectFile *O); void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName, StringRef ArchitectureName = StringRef()); -void warn(StringRef Message); -LLVM_ATTRIBUTE_NORETURN void error(Twine Message); -LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message); -LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File); -LLVM_ATTRIBUTE_NORETURN void -report_error(Error E, StringRef FileName, StringRef ArchiveName, - StringRef ArchitectureName = StringRef()); -LLVM_ATTRIBUTE_NORETURN void -report_error(Error E, StringRef ArchiveName, const object::Archive::Child &C, - StringRef ArchitectureName = StringRef()); +LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Twine Message); +LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName, + StringRef ArchiveName = "", + StringRef ArchitectureName = ""); +void reportWarning(Twine Message, StringRef File); template T unwrapOrError(Expected EO, Ts &&... Args) { if (EO) return std::move(*EO); - report_error(EO.takeError(), std::forward(Args)...); + reportError(EO.takeError(), std::forward(Args)...); } +std::string getFileNameForError(const object::Archive::Child &C, + unsigned Index); + } // end namespace llvm #endif diff --git a/tools/llvm-pdbutil/BytesOutputStyle.cpp b/tools/llvm-pdbutil/BytesOutputStyle.cpp index 162d12c120b4..ffc907e09f11 100644 --- a/tools/llvm-pdbutil/BytesOutputStyle.cpp +++ b/tools/llvm-pdbutil/BytesOutputStyle.cpp @@ -457,7 +457,7 @@ BytesOutputStyle::initializeTypes(uint32_t StreamIdx) { uint32_t Count = Tpi->getNumTypeRecords(); auto Offsets = Tpi->getTypeIndexOffsets(); TypeCollection = - llvm::make_unique(Types, Count, Offsets); + std::make_unique(Types, Count, Offsets); return *TypeCollection; } diff --git a/tools/llvm-pdbutil/DumpOutputStyle.cpp b/tools/llvm-pdbutil/DumpOutputStyle.cpp index 962d4cf88a8a..4d82e0fd9174 100644 --- a/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ b/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -1369,9 +1369,10 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() { LazyRandomTypeCollection Types(100); for (const auto &S : getObj().sections()) { - StringRef SectionName; - if (auto EC = S.getName(SectionName)) - return errorCodeToError(EC); + Expected NameOrErr = S.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef SectionName = *NameOrErr; // .debug$T is a standard CodeView type section, while .debug$P is the same // format but used for MSVC precompiled header object files. @@ -1551,7 +1552,7 @@ Error DumpOutputStyle::dumpModuleSymsForObj() { Dumper.setSymbolGroup(&Strings); for (auto Symbol : Symbols) { if (auto EC = Visitor.visitSymbolRecord(Symbol)) { - SymbolError = llvm::make_unique(std::move(EC)); + SymbolError = std::make_unique(std::move(EC)); return; } } diff --git a/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/tools/llvm-pdbutil/ExplainOutputStyle.cpp index 94faa0463981..3d2490509c03 100644 --- a/tools/llvm-pdbutil/ExplainOutputStyle.cpp +++ b/tools/llvm-pdbutil/ExplainOutputStyle.cpp @@ -64,7 +64,7 @@ Error ExplainOutputStyle::explainPdbFile() { Error ExplainOutputStyle::explainBinaryFile() { std::unique_ptr Stream = - llvm::make_unique(File.unknown().getBuffer(), + std::make_unique(File.unknown().getBuffer(), llvm::support::little); switch (opts::explain::InputType) { case opts::explain::InputFileType::DBIStream: { diff --git a/tools/llvm-pdbutil/InputFile.cpp b/tools/llvm-pdbutil/InputFile.cpp index bd23bfdbe31a..b316882de64d 100644 --- a/tools/llvm-pdbutil/InputFile.cpp +++ b/tools/llvm-pdbutil/InputFile.cpp @@ -66,12 +66,13 @@ getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { static inline bool isCodeViewDebugSubsection(object::SectionRef Section, StringRef Name, BinaryStreamReader &Reader) { - StringRef SectionName; - if (Section.getName(SectionName)) - return false; - - if (SectionName != Name) + if (Expected NameOrErr = Section.getName()) { + if (*NameOrErr != Name) + return false; + } else { + consumeError(NameOrErr.takeError()); return false; + } Expected ContentsOrErr = Section.getContents(); if (!ContentsOrErr) { @@ -384,7 +385,7 @@ InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { uint32_t Count = Stream.getNumTypeRecords(); auto Offsets = Stream.getTypeIndexOffsets(); Collection = - llvm::make_unique(Array, Count, Offsets); + std::make_unique(Array, Count, Offsets); return *Collection; } @@ -397,11 +398,11 @@ InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { if (!isDebugTSection(Section, Records)) continue; - Types = llvm::make_unique(Records, 100); + Types = std::make_unique(Records, 100); return *Types; } - Types = llvm::make_unique(100); + Types = std::make_unique(100); return *Types; } diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp index e5ae47050678..ebfa50625e76 100644 --- a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp +++ b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp @@ -569,8 +569,9 @@ Error MinimalSymbolDumper::visitKnownRecord( Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeFramePointerRelSym &Def) { AutoIndent Indent(P, 7); - P.formatLine("offset = {0}, range = {1}", Def.Offset, formatRange(Def.Range)); - P.formatLine("gaps = {2}", Def.Offset, + P.formatLine("offset = {0}, range = {1}", Def.Hdr.Offset, + formatRange(Def.Range)); + P.formatLine("gaps = {2}", Def.Hdr.Offset, formatGaps(P.getIndentLevel() + 9, Def.Gaps)); return Error::success(); } diff --git a/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/tools/llvm-pdbutil/PrettyTypeDumper.cpp index e8f8e5aa62c9..2f7a39803ca5 100644 --- a/tools/llvm-pdbutil/PrettyTypeDumper.cpp +++ b/tools/llvm-pdbutil/PrettyTypeDumper.cpp @@ -117,7 +117,7 @@ filterAndSortClassDefs(LinePrinter &Printer, Enumerator &E, continue; } - auto Layout = llvm::make_unique(std::move(Class)); + auto Layout = std::make_unique(std::move(Class)); if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold) { ++Discarded; continue; @@ -259,7 +259,7 @@ void TypeDumper::start(const PDBSymbolExe &Exe) { continue; } - auto Layout = llvm::make_unique(std::move(Class)); + auto Layout = std::make_unique(std::move(Class)); if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold) continue; diff --git a/tools/llvm-pdbutil/llvm-pdbutil.cpp b/tools/llvm-pdbutil/llvm-pdbutil.cpp index 785a98086791..9307300861d4 100644 --- a/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -863,8 +863,8 @@ static void pdb2Yaml(StringRef Path) { std::unique_ptr Session; auto &File = loadPDB(Path, Session); - auto O = llvm::make_unique(File); - O = llvm::make_unique(File); + auto O = std::make_unique(File); + O = std::make_unique(File); ExitOnErr(O->dump()); } @@ -872,7 +872,7 @@ static void pdb2Yaml(StringRef Path) { static void dumpRaw(StringRef Path) { InputFile IF = ExitOnErr(InputFile::open(Path)); - auto O = llvm::make_unique(IF); + auto O = std::make_unique(IF); ExitOnErr(O->dump()); } @@ -880,7 +880,7 @@ static void dumpBytes(StringRef Path) { std::unique_ptr Session; auto &File = loadPDB(Path, Session); - auto O = llvm::make_unique(File); + auto O = std::make_unique(File); ExitOnErr(O->dump()); } @@ -1347,7 +1347,7 @@ static void explain() { ExitOnErr(InputFile::open(opts::explain::InputFilename.front(), true)); for (uint64_t Off : opts::explain::Offsets) { - auto O = llvm::make_unique(IF, Off); + auto O = std::make_unique(IF, Off); ExitOnErr(O->dump()); } diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index 16d3ebe3fcbc..41e9abb82b1f 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -37,6 +38,7 @@ enum ProfileFormat { PF_None = 0, PF_Text, PF_Compact_Binary, + PF_Ext_Binary, PF_GCC, PF_Binary }; @@ -84,6 +86,15 @@ static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") { namespace { enum ProfileKinds { instr, sample }; +enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid }; +} + +static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC, + StringRef Whence = "") { + if (FailMode == failIfAnyAreInvalid) + exitWithErrorCode(EC, Whence); + else + warn(EC.message(), Whence); } static void handleMergeWriterError(Error E, StringRef WhenceFile = "", @@ -136,7 +147,7 @@ public: if (!BufOrError) exitWithErrorCode(BufOrError.getError(), InputFile); - auto Remapper = llvm::make_unique(); + auto Remapper = std::make_unique(); Remapper->File = std::move(BufOrError.get()); for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#'); @@ -173,33 +184,16 @@ typedef SmallVector WeightedFileVector; struct WriterContext { std::mutex Lock; InstrProfWriter Writer; - Error Err; - std::string ErrWhence; + std::vector> Errors; std::mutex &ErrLock; SmallSet &WriterErrorCodes; WriterContext(bool IsSparse, std::mutex &ErrLock, SmallSet &WriterErrorCodes) - : Lock(), Writer(IsSparse), Err(Error::success()), ErrWhence(""), - ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {} + : Lock(), Writer(IsSparse), Errors(), ErrLock(ErrLock), + WriterErrorCodes(WriterErrorCodes) {} }; -/// Determine whether an error is fatal for profile merging. -static bool isFatalError(instrprof_error IPE) { - switch (IPE) { - default: - return true; - case instrprof_error::success: - case instrprof_error::eof: - case instrprof_error::unknown_function: - case instrprof_error::hash_mismatch: - case instrprof_error::count_mismatch: - case instrprof_error::counter_overflow: - case instrprof_error::value_site_count_mismatch: - return false; - } -} - /// Computer the overlap b/w profile BaseFilename and TestFileName, /// and store the program level result to Overlap. static void overlapInput(const std::string &BaseFilename, @@ -212,7 +206,7 @@ static void overlapInput(const std::string &BaseFilename, // Skip the empty profiles by returning sliently. instrprof_error IPE = InstrProfError::take(std::move(E)); if (IPE != instrprof_error::empty_raw_profile) - WC->Err = make_error(IPE); + WC->Errors.emplace_back(make_error(IPE), TestFilename); return; } @@ -231,21 +225,17 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, WriterContext *WC) { std::unique_lock CtxGuard{WC->Lock}; - // If there's a pending hard error, don't do more work. - if (WC->Err) - return; - // Copy the filename, because llvm::ThreadPool copied the input "const // WeightedFile &" by value, making a reference to the filename within it // invalid outside of this packaged task. - WC->ErrWhence = Input.Filename; + std::string Filename = Input.Filename; auto ReaderOrErr = InstrProfReader::create(Input.Filename); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. instrprof_error IPE = InstrProfError::take(std::move(E)); if (IPE != instrprof_error::empty_raw_profile) - WC->Err = make_error(IPE); + WC->Errors.emplace_back(make_error(IPE), Filename); return; } @@ -253,9 +243,11 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, bool IsIRProfile = Reader->isIRLevelProfile(); bool HasCSIRProfile = Reader->hasCSIRLevelProfile(); if (WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { - WC->Err = make_error( - "Merge IR generated profile with Clang generated profile.", - std::error_code()); + WC->Errors.emplace_back( + make_error( + "Merge IR generated profile with Clang generated profile.", + std::error_code()), + Filename); return; } @@ -278,30 +270,23 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, FuncName, firstTime); }); } - if (Reader->hasError()) { - if (Error E = Reader->getError()) { - instrprof_error IPE = InstrProfError::take(std::move(E)); - if (isFatalError(IPE)) - WC->Err = make_error(IPE); - } - } + if (Reader->hasError()) + if (Error E = Reader->getError()) + WC->Errors.emplace_back(std::move(E), Filename); } /// Merge the \p Src writer context into \p Dst. static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) { - // If we've already seen a hard error, continuing with the merge would - // clobber it. - if (Dst->Err || Src->Err) - return; + for (auto &ErrorPair : Src->Errors) + Dst->Errors.push_back(std::move(ErrorPair)); + Src->Errors.clear(); - bool Reported = false; Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) { - if (Reported) { - consumeError(std::move(E)); - return; - } - Reported = true; - Dst->Err = std::move(E); + instrprof_error IPE = InstrProfError::take(std::move(E)); + std::unique_lock ErrGuard{Dst->ErrLock}; + bool firstTime = Dst->WriterErrorCodes.insert(IPE).second; + if (firstTime) + warn(toString(make_error(IPE))); }); } @@ -309,12 +294,12 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, - unsigned NumThreads) { + unsigned NumThreads, FailureMode FailMode) { if (OutputFilename.compare("-") == 0) exitWithError("Cannot write indexed profdata format to stdout."); if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && - OutputFormat != PF_Text) + OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) exitWithError("Unknown format is specified."); std::mutex ErrorLock; @@ -328,7 +313,7 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, // Initialize the writer contexts. SmallVector, 4> Contexts; for (unsigned I = 0; I < NumThreads; ++I) - Contexts.emplace_back(llvm::make_unique( + Contexts.emplace_back(std::make_unique( OutputSparse, ErrorLock, WriterErrorCodes)); if (NumThreads == 1) { @@ -364,23 +349,21 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, } while (Mid > 0); } - // Handle deferred hard errors encountered during merging. + // Handle deferred errors encountered during merging. If the number of errors + // is equal to the number of inputs the merge failed. + unsigned NumErrors = 0; for (std::unique_ptr &WC : Contexts) { - if (!WC->Err) - continue; - if (!WC->Err.isA()) - exitWithError(std::move(WC->Err), WC->ErrWhence); - - instrprof_error IPE = InstrProfError::take(std::move(WC->Err)); - if (isFatalError(IPE)) - exitWithError(make_error(IPE), WC->ErrWhence); - else - warn(toString(make_error(IPE)), - WC->ErrWhence); + for (auto &ErrorPair : WC->Errors) { + ++NumErrors; + warn(toString(std::move(ErrorPair.first)), ErrorPair.second); + } } + if (NumErrors == Inputs.size() || + (NumErrors > 0 && FailMode == failIfAnyAreInvalid)) + exitWithError("No profiles could be merged."); std::error_code EC; - raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None); + raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::OF_None); if (EC) exitWithErrorCode(EC, OutputFilename); @@ -425,21 +408,78 @@ remapSamples(const sampleprof::FunctionSamples &Samples, } static sampleprof::SampleProfileFormat FormatMap[] = { - sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Compact_Binary, - sampleprof::SPF_GCC, sampleprof::SPF_Binary}; + sampleprof::SPF_None, + sampleprof::SPF_Text, + sampleprof::SPF_Compact_Binary, + sampleprof::SPF_Ext_Binary, + sampleprof::SPF_GCC, + sampleprof::SPF_Binary}; + +static std::unique_ptr +getInputFileBuf(const StringRef &InputFile) { + if (InputFile == "") + return {}; + + auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); + if (!BufOrError) + exitWithErrorCode(BufOrError.getError(), InputFile); + + return std::move(*BufOrError); +} + +static void populateProfileSymbolList(MemoryBuffer *Buffer, + sampleprof::ProfileSymbolList &PSL) { + if (!Buffer) + return; + + SmallVector SymbolVec; + StringRef Data = Buffer->getBuffer(); + Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + + for (StringRef symbol : SymbolVec) + PSL.add(symbol); +} + +static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer, + ProfileFormat OutputFormat, + MemoryBuffer *Buffer, + sampleprof::ProfileSymbolList &WriterList, + bool CompressAllSections) { + populateProfileSymbolList(Buffer, WriterList); + if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) + warn("Profile Symbol list is not empty but the output format is not " + "ExtBinary format. The list will be lost in the output. "); + + Writer.setProfileSymbolList(&WriterList); + + if (CompressAllSections) { + if (OutputFormat != PF_Ext_Binary) { + warn("-compress-all-section is ignored. Specify -extbinary to enable it"); + } else { + auto ExtBinaryWriter = + static_cast(&Writer); + ExtBinaryWriter->setToCompressAllSections(); + } + } +} static void mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, - ProfileFormat OutputFormat) { + ProfileFormat OutputFormat, + StringRef ProfileSymbolListFile, + bool CompressAllSections, FailureMode FailMode) { using namespace sampleprof; StringMap ProfileMap; SmallVector, 5> Readers; LLVMContext Context; + sampleprof::ProfileSymbolList WriterList; for (const auto &Input : Inputs) { auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context); - if (std::error_code EC = ReaderOrErr.getError()) - exitWithErrorCode(EC, Input.Filename); + if (std::error_code EC = ReaderOrErr.getError()) { + warnOrExitGivenError(FailMode, EC, Input.Filename); + continue; + } // We need to keep the readers around until after all the files are // read so that we do not lose the function names stored in each @@ -447,8 +487,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, // merged profile map. Readers.push_back(std::move(ReaderOrErr.get())); const auto Reader = Readers.back().get(); - if (std::error_code EC = Reader->read()) - exitWithErrorCode(EC, Input.Filename); + if (std::error_code EC = Reader->read()) { + warnOrExitGivenError(FailMode, EC, Input.Filename); + Readers.pop_back(); + continue; + } StringMap &Profiles = Reader->getProfiles(); for (StringMap::iterator I = Profiles.begin(), @@ -466,6 +509,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName); } } + + std::unique_ptr ReaderList = + Reader->getProfileSymbolList(); + if (ReaderList) + WriterList.merge(*ReaderList); } auto WriterOrErr = SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); @@ -473,6 +521,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, exitWithErrorCode(EC, OutputFilename); auto Writer = std::move(WriterOrErr.get()); + // WriterList will have StringRef refering to string in Buffer. + // Make sure Buffer lives as long as WriterList. + auto Buffer = getInputFileBuf(ProfileSymbolListFile); + handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, + CompressAllSections); Writer->write(ProfileMap); } @@ -487,18 +540,6 @@ static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { return {FileName, Weight}; } -static std::unique_ptr -getInputFilenamesFileBuf(const StringRef &InputFilenamesFile) { - if (InputFilenamesFile == "") - return {}; - - auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile); - if (!BufOrError) - exitWithErrorCode(BufOrError.getError(), InputFilenamesFile); - - return std::move(*BufOrError); -} - static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) { StringRef Filename = WF.Filename; uint64_t Weight = WF.Weight; @@ -583,12 +624,20 @@ static int merge_main(int argc, const char *argv[]) { clEnumVal(sample, "Sample profile"))); cl::opt OutputFormat( cl::desc("Format of output profile"), cl::init(PF_Binary), - cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding (default)"), - clEnumValN(PF_Compact_Binary, "compbinary", - "Compact binary encoding"), - clEnumValN(PF_Text, "text", "Text encoding"), - clEnumValN(PF_GCC, "gcc", - "GCC encoding (only meaningful for -sample)"))); + cl::values( + clEnumValN(PF_Binary, "binary", "Binary encoding (default)"), + clEnumValN(PF_Compact_Binary, "compbinary", + "Compact binary encoding"), + clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding"), + clEnumValN(PF_Text, "text", "Text encoding"), + clEnumValN(PF_GCC, "gcc", + "GCC encoding (only meaningful for -sample)"))); + cl::opt FailureMode( + "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"), + cl::values(clEnumValN(failIfAnyAreInvalid, "any", + "Fail if any profile is invalid."), + clEnumValN(failIfAllAreInvalid, "all", + "Fail only if all profiles are invalid."))); cl::opt OutputSparse("sparse", cl::init(false), cl::desc("Generate a sparse profile (only meaningful for -instr)")); cl::opt NumThreads( @@ -596,6 +645,14 @@ static int merge_main(int argc, const char *argv[]) { cl::desc("Number of merge threads to use (default: autodetect)")); cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"), cl::aliasopt(NumThreads)); + cl::opt ProfileSymbolListFile( + "prof-sym-list", cl::init(""), + cl::desc("Path to file containing the list of function symbols " + "used to populate profile symbol list")); + cl::opt CompressAllSections( + "compress-all-sections", cl::init(false), cl::Hidden, + cl::desc("Compress all sections when writing the profile (only " + "meaningful for -extbinary)")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -607,7 +664,7 @@ static int merge_main(int argc, const char *argv[]) { // Make sure that the file buffer stays alive for the duration of the // weighted input vector's lifetime. - auto Buffer = getInputFilenamesFileBuf(InputFilenamesFile); + auto Buffer = getInputFileBuf(InputFilenamesFile); parseInputFilenamesFile(Buffer.get(), WeightedInputs); if (WeightedInputs.empty()) @@ -626,10 +683,11 @@ static int merge_main(int argc, const char *argv[]) { if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename, - OutputFormat, OutputSparse, NumThreads); + OutputFormat, OutputSparse, NumThreads, FailureMode); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, - OutputFormat); + OutputFormat, ProfileSymbolListFile, CompressAllSections, + FailureMode); return 0; } @@ -644,7 +702,7 @@ static void overlapInstrProfile(const std::string &BaseFilename, WriterContext Context(false, ErrorLock, WriterErrorCodes); WeightedFile WeightedInput{BaseFilename, 1}; OverlapStats Overlap; - Error E = Overlap.accumuateCounts(BaseFilename, TestFilename, IsCS); + Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS); if (E) exitWithError(std::move(E), "Error in getting profile count sums"); if (Overlap.Base.CountSum < 1.0f) { @@ -682,7 +740,7 @@ static int overlap_main(int argc, const char *argv[]) { cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n"); std::error_code EC; - raw_fd_ostream OS(Output.data(), EC, sys::fs::F_Text); + raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_Text); if (EC) exitWithErrorCode(EC, Output); @@ -944,10 +1002,21 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, return 0; } +static void showSectionInfo(sampleprof::SampleProfileReader *Reader, + raw_fd_ostream &OS) { + if (!Reader->dumpSectionInfo(OS)) { + WithColor::warning() << "-show-sec-info-only is only supported for " + << "sample profile in extbinary format and is " + << "ignored for other formats.\n"; + return; + } +} + static int showSampleProfile(const std::string &Filename, bool ShowCounts, bool ShowAllFunctions, const std::string &ShowFunction, - raw_fd_ostream &OS) { + bool ShowProfileSymbolList, + bool ShowSectionInfoOnly, raw_fd_ostream &OS) { using namespace sampleprof; LLVMContext Context; auto ReaderOrErr = SampleProfileReader::create(Filename, Context); @@ -955,6 +1024,12 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, exitWithErrorCode(EC, Filename); auto Reader = std::move(ReaderOrErr.get()); + + if (ShowSectionInfoOnly) { + showSectionInfo(Reader.get(), OS); + return 0; + } + if (std::error_code EC = Reader->read()) exitWithErrorCode(EC, Filename); @@ -963,6 +1038,12 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, else Reader->dumpFunctionProfile(ShowFunction, OS); + if (ShowProfileSymbolList) { + std::unique_ptr ReaderList = + Reader->getProfileSymbolList(); + ReaderList->dump(OS); + } + return 0; } @@ -1015,6 +1096,15 @@ static int show_main(int argc, const char *argv[]) { "list-below-cutoff", cl::init(false), cl::desc("Only output names of functions whose max count values are " "below the cutoff value")); + cl::opt ShowProfileSymbolList( + "show-prof-sym-list", cl::init(false), + cl::desc("Show profile symbol list if it exists in the profile. ")); + cl::opt ShowSectionInfoOnly( + "show-sec-info-only", cl::init(false), + cl::desc("Show the information of each section in the sample profile. " + "The flag is only usable when the sample profile is in " + "extbinary format")); + cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); if (OutputFilename.empty()) @@ -1027,7 +1117,7 @@ static int show_main(int argc, const char *argv[]) { } std::error_code EC; - raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text); + raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_Text); if (EC) exitWithErrorCode(EC, OutputFilename); @@ -1042,7 +1132,8 @@ static int show_main(int argc, const char *argv[]) { OnlyListBelow, ShowFunction, TextFormat, OS); else return showSampleProfile(Filename, ShowCounts, ShowAllFunctions, - ShowFunction, OS); + ShowFunction, ShowProfileSymbolList, + ShowSectionInfoOnly, OS); } int main(int argc, const char *argv[]) { diff --git a/tools/llvm-readobj/ARMEHABIPrinter.h b/tools/llvm-readobj/ARMEHABIPrinter.h index 11f9d6166a59..2c0912038c31 100644 --- a/tools/llvm-readobj/ARMEHABIPrinter.h +++ b/tools/llvm-readobj/ARMEHABIPrinter.h @@ -329,6 +329,7 @@ class PrinterContext { ScopedPrinter &SW; const object::ELFFile *ELF; + StringRef FileName; const Elf_Shdr *Symtab; ArrayRef ShndxTable; @@ -352,8 +353,8 @@ class PrinterContext { public: PrinterContext(ScopedPrinter &SW, const object::ELFFile *ELF, - const Elf_Shdr *Symtab) - : SW(SW), ELF(ELF), Symtab(Symtab) {} + StringRef FileName, const Elf_Shdr *Symtab) + : SW(SW), ELF(ELF), FileName(FileName), Symtab(Symtab) {} void PrintUnwindInformation() const; }; @@ -369,10 +370,10 @@ PrinterContext::FunctionAtAddress(unsigned Section, return readobj_error::unknown_symbol; auto StrTableOrErr = ELF->getStringTableForSymtab(*Symtab); if (!StrTableOrErr) - error(StrTableOrErr.takeError()); + reportError(StrTableOrErr.takeError(), FileName); StringRef StrTable = *StrTableOrErr; - for (const Elf_Sym &Sym : unwrapOrError(ELF->symbols(Symtab))) + for (const Elf_Sym &Sym : unwrapOrError(FileName, ELF->symbols(Symtab))) if (Sym.st_shndx == Section && Sym.st_value == Address && Sym.getType() == ELF::STT_FUNC) { auto NameOrErr = Sym.getName(StrTable); @@ -398,16 +399,16 @@ PrinterContext::FindExceptionTable(unsigned IndexSectionIndex, /// handling table. Use this symbol to recover the actual exception handling /// table. - for (const Elf_Shdr &Sec : unwrapOrError(ELF->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF->sections())) { if (Sec.sh_type != ELF::SHT_REL || Sec.sh_info != IndexSectionIndex) continue; auto SymTabOrErr = ELF->getSection(Sec.sh_link); if (!SymTabOrErr) - error(SymTabOrErr.takeError()); + reportError(SymTabOrErr.takeError(), FileName); const Elf_Shdr *SymTab = *SymTabOrErr; - for (const Elf_Rel &R : unwrapOrError(ELF->rels(&Sec))) { + for (const Elf_Rel &R : unwrapOrError(FileName, ELF->rels(&Sec))) { if (R.r_offset != static_cast(IndexTableOffset)) continue; @@ -417,7 +418,7 @@ PrinterContext::FindExceptionTable(unsigned IndexSectionIndex, RelA.r_addend = 0; const Elf_Sym *Symbol = - unwrapOrError(ELF->getRelocationSymbol(&RelA, SymTab)); + unwrapOrError(FileName, ELF->getRelocationSymbol(&RelA, SymTab)); auto Ret = ELF->getSection(Symbol, SymTab, ShndxTable); if (!Ret) @@ -570,7 +571,7 @@ void PrinterContext::PrintUnwindInformation() const { DictScope UI(SW, "UnwindInformation"); int SectionIndex = 0; - for (const Elf_Shdr &Sec : unwrapOrError(ELF->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF->sections())) { if (Sec.sh_type == ELF::SHT_ARM_EXIDX) { DictScope UIT(SW, "UnwindIndexTable"); diff --git a/tools/llvm-readobj/ARMWinEHPrinter.cpp b/tools/llvm-readobj/ARMWinEHPrinter.cpp index 4de14e2e78d5..3e026f58871b 100644 --- a/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -842,8 +842,10 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF, if ((int64_t)(Contents.size() - Offset - 4 * HeaderWords(XData) - (XData.E() ? 0 : XData.EpilogueCount() * 4) - - (XData.X() ? 8 : 0)) < (int64_t)ByteCodeLength) + (XData.X() ? 8 : 0)) < (int64_t)ByteCodeLength) { + SW.flush(); report_fatal_error("Malformed unwind data"); + } if (XData.E()) { ArrayRef UC = XData.UnwindByteCode(); @@ -1039,10 +1041,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF, } FunctionAddress = *FunctionAddressOrErr; } else { - const pe32_header *PEHeader; - if (COFF.getPE32Header(PEHeader)) - return false; - FunctionAddress = PEHeader->ImageBase + RF.BeginAddress; + FunctionAddress = COFF.getPE32Header()->ImageBase + RF.BeginAddress; } SW.printString("Function", formatSymbol(FunctionName, FunctionAddress)); diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 4c2e39dfa3cc..9b2c6adb9d93 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -60,6 +60,10 @@ using namespace llvm::codeview; using namespace llvm::support; using namespace llvm::Win64EH; +static inline Error createError(const Twine &Err) { + return make_error(Err, object_error::parse_failed); +} + namespace { struct LoadConfigTables { @@ -167,9 +171,6 @@ private: void printDelayImportedSymbols( const DelayImportDirectoryEntryRef &I, iterator_range Range); - ErrorOr - getResourceDirectoryTableEntry(const coff_resource_dir_table &Table, - uint32_t Index); typedef DenseMap > RelocMapTy; @@ -627,14 +628,10 @@ void COFFDumper::printFileHeaders() { // Print PE header. This header does not exist if this is an object file and // not an executable. - const pe32_header *PEHeader = nullptr; - error(Obj->getPE32Header(PEHeader)); - if (PEHeader) + if (const pe32_header *PEHeader = Obj->getPE32Header()) printPEHeader(PEHeader); - const pe32plus_header *PEPlusHeader = nullptr; - error(Obj->getPE32PlusHeader(PEPlusHeader)); - if (PEPlusHeader) + if (const pe32plus_header *PEPlusHeader = Obj->getPE32PlusHeader()) printPEHeader(PEPlusHeader); if (const dos_header *DH = Obj->getDOSHeader()) @@ -728,7 +725,9 @@ void COFFDumper::printCOFFDebugDirectory() { if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) { const codeview::DebugInfo *DebugInfo; StringRef PDBFileName; - error(Obj->getDebugPDBInfo(&D, DebugInfo, PDBFileName)); + if (std::error_code EC = Obj->getDebugPDBInfo(&D, DebugInfo, PDBFileName)) + reportError(errorCodeToError(EC), Obj->getFileName()); + DictScope PDBScope(W, "PDBInfo"); W.printHex("PDBSignature", DebugInfo->Signature.CVSignature); if (DebugInfo->Signature.CVSignature == OMF::Signature::PDB70) { @@ -740,8 +739,9 @@ void COFFDumper::printCOFFDebugDirectory() { // FIXME: Type values of 12 and 13 are commonly observed but are not in // the documented type enum. Figure out what they mean. ArrayRef RawData; - error( - Obj->getRvaAndSizeAsBytes(D.AddressOfRawData, D.SizeOfData, RawData)); + if (std::error_code EC = Obj->getRvaAndSizeAsBytes(D.AddressOfRawData, + D.SizeOfData, RawData)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printBinaryBlock("RawData", RawData); } } @@ -750,8 +750,11 @@ void COFFDumper::printCOFFDebugDirectory() { void COFFDumper::printRVATable(uint64_t TableVA, uint64_t Count, uint64_t EntrySize, PrintExtraCB PrintExtra) { uintptr_t TableStart, TableEnd; - error(Obj->getVaPtr(TableVA, TableStart)); - error(Obj->getVaPtr(TableVA + Count * EntrySize - 1, TableEnd)); + if (std::error_code EC = Obj->getVaPtr(TableVA, TableStart)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (std::error_code EC = + Obj->getVaPtr(TableVA + Count * EntrySize - 1, TableEnd)) + reportError(errorCodeToError(EC), Obj->getFileName()); TableEnd++; for (uintptr_t I = TableStart; I < TableEnd; I += EntrySize) { uint32_t RVA = *reinterpret_cast(I); @@ -887,16 +890,14 @@ void COFFDumper::printBaseOfDataField(const pe32plus_header *) {} void COFFDumper::printCodeViewDebugInfo() { // Print types first to build CVUDTNames, then print symbols. for (const SectionRef &S : Obj->sections()) { - StringRef SectionName; - error(S.getName(SectionName)); + StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName()); // .debug$T is a standard CodeView type section, while .debug$P is the same // format but used for MSVC precompiled header object files. if (SectionName == ".debug$T" || SectionName == ".debug$P") printCodeViewTypeSection(SectionName, S); } for (const SectionRef &S : Obj->sections()) { - StringRef SectionName; - error(S.getName(SectionName)); + StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName()); if (SectionName == ".debug$S") printCodeViewSymbolSection(SectionName, S); } @@ -908,32 +909,40 @@ void COFFDumper::initializeFileAndStringTables(BinaryStreamReader &Reader) { // The section consists of a number of subsection in the following format: // |SubSectionType|SubSectionSize|Contents...| uint32_t SubType, SubSectionSize; - error(Reader.readInteger(SubType)); - error(Reader.readInteger(SubSectionSize)); + + if (Error E = Reader.readInteger(SubType)) + reportError(std::move(E), Obj->getFileName()); + if (Error E = Reader.readInteger(SubSectionSize)) + reportError(std::move(E), Obj->getFileName()); StringRef Contents; - error(Reader.readFixedString(Contents, SubSectionSize)); + if (Error E = Reader.readFixedString(Contents, SubSectionSize)) + reportError(std::move(E), Obj->getFileName()); BinaryStreamRef ST(Contents, support::little); switch (DebugSubsectionKind(SubType)) { case DebugSubsectionKind::FileChecksums: - error(CVFileChecksumTable.initialize(ST)); + if (Error E = CVFileChecksumTable.initialize(ST)) + reportError(std::move(E), Obj->getFileName()); break; case DebugSubsectionKind::StringTable: - error(CVStringTable.initialize(ST)); + if (Error E = CVStringTable.initialize(ST)) + reportError(std::move(E), Obj->getFileName()); break; default: break; } uint32_t PaddedSize = alignTo(SubSectionSize, 4); - error(Reader.skip(PaddedSize - SubSectionSize)); + if (Error E = Reader.skip(PaddedSize - SubSectionSize)) + reportError(std::move(E), Obj->getFileName()); } } void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, const SectionRef &Section) { - StringRef SectionContents = unwrapOrError(Section.getContents()); + StringRef SectionContents = + unwrapOrError(Obj->getFileName(), Section.getContents()); StringRef Data = SectionContents; SmallVector FunctionNames; @@ -944,10 +953,13 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, W.printNumber("Section", SectionName, Obj->getSectionID(Section)); uint32_t Magic; - error(consume(Data, Magic)); + if (Error E = consume(Data, Magic)) + reportError(std::move(E), Obj->getFileName()); + W.printHex("Magic", Magic); if (Magic != COFF::DEBUG_SECTION_MAGIC) - return error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); BinaryStreamReader FSReader(Data, support::little); initializeFileAndStringTables(FSReader); @@ -957,8 +969,10 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, // The section consists of a number of subsection in the following format: // |SubSectionType|SubSectionSize|Contents...| uint32_t SubType, SubSectionSize; - error(consume(Data, SubType)); - error(consume(Data, SubSectionSize)); + if (Error E = consume(Data, SubType)) + reportError(std::move(E), Obj->getFileName()); + if (Error E = consume(Data, SubSectionSize)) + reportError(std::move(E), Obj->getFileName()); ListScope S(W, "Subsection"); // Dump the subsection as normal even if the ignore bit is set. @@ -971,7 +985,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, // Get the contents of the subsection. if (SubSectionSize > Data.size()) - return error(object_error::parse_failed); + return reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); StringRef Contents = Data.substr(0, SubSectionSize); // Add SubSectionSize to the current offset and align that offset to find @@ -980,7 +995,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, size_t NextOffset = SectionOffset + SubSectionSize; NextOffset = alignTo(NextOffset, 4); if (NextOffset > SectionContents.size()) - return error(object_error::parse_failed); + return reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); Data = SectionContents.drop_front(NextOffset); // Optionally print the subsection bytes in case our parsing gets confused @@ -1010,17 +1026,21 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, if (SubSectionSize < 12) { // There should be at least three words to store two function // relocations and size of the code. - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); return; } StringRef LinkageName; - error(resolveSymbolName(Obj->getCOFFSection(Section), SectionOffset, - LinkageName)); + if (std::error_code EC = resolveSymbolName(Obj->getCOFFSection(Section), + SectionOffset, LinkageName)) + reportError(errorCodeToError(EC), Obj->getFileName()); + W.printString("LinkageName", LinkageName); if (FunctionLineTables.count(LinkageName) != 0) { // Saw debug info for this function already? - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); return; } @@ -1033,17 +1053,21 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, BinaryStreamReader SR(Contents, llvm::support::little); DebugFrameDataSubsectionRef FrameData; - error(FrameData.initialize(SR)); + if (Error E = FrameData.initialize(SR)) + reportError(std::move(E), Obj->getFileName()); StringRef LinkageName; - error(resolveSymbolName(Obj->getCOFFSection(Section), SectionContents, - FrameData.getRelocPtr(), LinkageName)); + if (std::error_code EC = + resolveSymbolName(Obj->getCOFFSection(Section), SectionContents, + FrameData.getRelocPtr(), LinkageName)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printString("LinkageName", LinkageName); // To find the active frame description, search this array for the // smallest PC range that includes the current PC. for (const auto &FD : FrameData) { - StringRef FrameFunc = error(CVStringTable.getString(FD.FrameFunc)); + StringRef FrameFunc = unwrapOrError( + Obj->getFileName(), CVStringTable.getString(FD.FrameFunc)); DictScope S(W, "FrameData"); W.printHex("RvaStart", FD.RvaStart); @@ -1094,7 +1118,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, BinaryStreamReader Reader(FunctionLineTables[Name], support::little); DebugLinesSubsectionRef LineInfo; - error(LineInfo.initialize(Reader)); + if (Error E = LineInfo.initialize(Reader)) + reportError(std::move(E), Obj->getFileName()); W.printHex("Flags", LineInfo.header()->Flags); W.printHex("CodeSize", LineInfo.header()->CodeSize); @@ -1105,7 +1130,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, uint32_t ColumnIndex = 0; for (const auto &Line : Entry.LineNumbers) { if (Line.Offset >= LineInfo.header()->CodeSize) { - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); return; } @@ -1136,21 +1162,20 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection, StringRef SectionContents) { ArrayRef BinaryData(Subsection.bytes_begin(), Subsection.bytes_end()); - auto CODD = llvm::make_unique(*this, Section, Obj, + auto CODD = std::make_unique(*this, Section, Obj, SectionContents); CVSymbolDumper CVSD(W, Types, CodeViewContainer::ObjectFile, std::move(CODD), CompilationCPUType, opts::CodeViewSubsectionBytes); CVSymbolArray Symbols; BinaryStreamReader Reader(BinaryData, llvm::support::little); - if (auto EC = Reader.readArray(Symbols, Reader.getLength())) { - consumeError(std::move(EC)); + if (Error E = Reader.readArray(Symbols, Reader.getLength())) { W.flush(); - error(object_error::parse_failed); + reportError(std::move(E), Obj->getFileName()); } - if (auto EC = CVSD.dump(Symbols)) { + if (Error E = CVSD.dump(Symbols)) { W.flush(); - error(std::move(EC)); + reportError(std::move(E), Obj->getFileName()); } CompilationCPUType = CVSD.getCompilationCPUType(); W.flush(); @@ -1159,12 +1184,14 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection, void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) { BinaryStreamRef Stream(Subsection, llvm::support::little); DebugChecksumsSubsectionRef Checksums; - error(Checksums.initialize(Stream)); + if (Error E = Checksums.initialize(Stream)) + reportError(std::move(E), Obj->getFileName()); for (auto &FC : Checksums) { DictScope S(W, "FileChecksum"); - StringRef Filename = error(CVStringTable.getString(FC.FileNameOffset)); + StringRef Filename = unwrapOrError( + Obj->getFileName(), CVStringTable.getString(FC.FileNameOffset)); W.printHex("Filename", Filename, FC.FileNameOffset); W.printHex("ChecksumSize", FC.Checksum.size()); W.printEnum("ChecksumKind", uint8_t(FC.Kind), @@ -1177,7 +1204,8 @@ void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) { void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) { BinaryStreamReader SR(Subsection, llvm::support::little); DebugInlineeLinesSubsectionRef Lines; - error(Lines.initialize(SR)); + if (Error E = Lines.initialize(SR)) + reportError(std::move(E), Obj->getFileName()); for (auto &Line : Lines) { DictScope S(W, "InlineeSourceLine"); @@ -1198,15 +1226,18 @@ void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) { StringRef COFFDumper::getFileNameForFileOffset(uint32_t FileOffset) { // The file checksum subsection should precede all references to it. if (!CVFileChecksumTable.valid() || !CVStringTable.valid()) - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); auto Iter = CVFileChecksumTable.getArray().at(FileOffset); // Check if the file checksum table offset is valid. if (Iter == CVFileChecksumTable.end()) - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); - return error(CVStringTable.getString(Iter->FileNameOffset)); + return unwrapOrError(Obj->getFileName(), + CVStringTable.getString(Iter->FileNameOffset)); } void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) { @@ -1219,35 +1250,38 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs, GlobalTypeTableBuilder &GlobalCVTypes, bool GHash) { for (const SectionRef &S : Obj->sections()) { - StringRef SectionName; - error(S.getName(SectionName)); + StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName()); if (SectionName == ".debug$T") { - StringRef Data = unwrapOrError(S.getContents()); + StringRef Data = unwrapOrError(Obj->getFileName(), S.getContents()); uint32_t Magic; - error(consume(Data, Magic)); + if (Error E = consume(Data, Magic)) + reportError(std::move(E), Obj->getFileName()); + if (Magic != 4) - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); CVTypeArray Types; BinaryStreamReader Reader(Data, llvm::support::little); if (auto EC = Reader.readArray(Types, Reader.getLength())) { consumeError(std::move(EC)); W.flush(); - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); } SmallVector SourceToDest; Optional PCHSignature; if (GHash) { std::vector Hashes = GloballyHashedType::hashTypes(Types); - if (auto EC = + if (Error E = mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest, Types, Hashes, PCHSignature)) - return error(std::move(EC)); + return reportError(std::move(E), Obj->getFileName()); } else { - if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, + if (Error E = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, PCHSignature)) - return error(std::move(EC)); + return reportError(std::move(E), Obj->getFileName()); } } } @@ -1258,20 +1292,25 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName, ListScope D(W, "CodeViewTypes"); W.printNumber("Section", SectionName, Obj->getSectionID(Section)); - StringRef Data = unwrapOrError(Section.getContents()); + StringRef Data = unwrapOrError(Obj->getFileName(), Section.getContents()); if (opts::CodeViewSubsectionBytes) W.printBinaryBlock("Data", Data); uint32_t Magic; - error(consume(Data, Magic)); + if (Error E = consume(Data, Magic)) + reportError(std::move(E), Obj->getFileName()); + W.printHex("Magic", Magic); if (Magic != COFF::DEBUG_SECTION_MAGIC) - return error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); Types.reset(Data, 100); TypeDumpVisitor TDV(Types, &W, opts::CodeViewSubsectionBytes); - error(codeview::visitTypeStream(Types, TDV)); + if (Error E = codeview::visitTypeStream(Types, TDV)) + reportError(std::move(E), Obj->getFileName()); + W.flush(); } @@ -1282,8 +1321,7 @@ void COFFDumper::printSectionHeaders() { ++SectionNumber; const coff_section *Section = Obj->getCOFFSection(Sec); - StringRef Name; - error(Sec.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), Sec.getName()); DictScope D(W, "Section"); W.printNumber("Number", SectionNumber); @@ -1318,7 +1356,7 @@ void COFFDumper::printSectionHeaders() { if (opts::SectionData && !(Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)) { - StringRef Data = unwrapOrError(Sec.getContents()); + StringRef Data = unwrapOrError(Obj->getFileName(), Sec.getContents()); W.printBinaryBlock("SectionData", Data); } } @@ -1330,8 +1368,7 @@ void COFFDumper::printRelocations() { int SectionNumber = 0; for (const SectionRef &Section : Obj->sections()) { ++SectionNumber; - StringRef Name; - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); bool PrintedGroup = false; for (const RelocationRef &Reloc : Section.relocations()) { @@ -1362,7 +1399,9 @@ void COFFDumper::printRelocation(const SectionRef &Section, int64_t SymbolIndex = -1; if (Symbol != Obj->symbol_end()) { Expected SymbolNameOrErr = Symbol->getName(); - error(errorToErrorCode(SymbolNameOrErr.takeError())); + if (!SymbolNameOrErr) + reportError(SymbolNameOrErr.takeError(), Obj->getFileName()); + SymbolName = *SymbolNameOrErr; SymbolIndex = Obj->getSymbolIndex(Obj->getCOFFSymbol(*Symbol)); } @@ -1439,7 +1478,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { for (uint8_t I = 0; I < Symbol.getNumberOfAuxSymbols(); ++I) { if (Symbol.isFunctionDefinition()) { const coff_aux_function_definition *Aux; - error(getSymbolAuxData(Obj, Symbol, I, Aux)); + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope AS(W, "AuxFunctionDef"); W.printNumber("TagIndex", Aux->TagIndex); @@ -1449,15 +1489,16 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { } else if (Symbol.isAnyUndefined()) { const coff_aux_weak_external *Aux; - error(getSymbolAuxData(Obj, Symbol, I, Aux)); + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux)) + reportError(errorCodeToError(EC), Obj->getFileName()); Expected Linked = Obj->getSymbol(Aux->TagIndex); + if (!Linked) + reportError(Linked.takeError(), Obj->getFileName()); + StringRef LinkedName; - std::error_code EC = errorToErrorCode(Linked.takeError()); - if (EC || (EC = Obj->getSymbolName(*Linked, LinkedName))) { - LinkedName = ""; - error(EC); - } + if (std::error_code EC = Obj->getSymbolName(*Linked, LinkedName)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope AS(W, "AuxWeakExternal"); W.printNumber("Linked", LinkedName, Aux->TagIndex); @@ -1466,8 +1507,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { } else if (Symbol.isFileRecord()) { const char *FileName; - error(getSymbolAuxData(Obj, Symbol, I, FileName)); - + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, FileName)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope AS(W, "AuxFileRecord"); StringRef Name(FileName, Symbol.getNumberOfAuxSymbols() * @@ -1476,7 +1517,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { break; } else if (Symbol.isSectionDefinition()) { const coff_aux_section_definition *Aux; - error(getSymbolAuxData(Obj, Symbol, I, Aux)); + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux)) + reportError(errorCodeToError(EC), Obj->getFileName()); int32_t AuxNumber = Aux->getNumber(Symbol.isBigObj()); @@ -1493,26 +1535,27 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { const coff_section *Assoc; StringRef AssocName = ""; if (std::error_code EC = Obj->getSection(AuxNumber, Assoc)) - error(EC); + reportError(errorCodeToError(EC), Obj->getFileName()); Expected Res = getSectionName(Obj, AuxNumber, Assoc); if (!Res) - error(Res.takeError()); + reportError(Res.takeError(), Obj->getFileName()); AssocName = *Res; W.printNumber("AssocSection", AssocName, AuxNumber); } } else if (Symbol.isCLRToken()) { const coff_aux_clr_token *Aux; - error(getSymbolAuxData(Obj, Symbol, I, Aux)); + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux)) + reportError(errorCodeToError(EC), Obj->getFileName()); Expected ReferredSym = Obj->getSymbol(Aux->SymbolTableIndex); + if (!ReferredSym) + reportError(ReferredSym.takeError(), Obj->getFileName()); + StringRef ReferredName; - std::error_code EC = errorToErrorCode(ReferredSym.takeError()); - if (EC || (EC = Obj->getSymbolName(*ReferredSym, ReferredName))) { - ReferredName = ""; - error(EC); - } + if (std::error_code EC = Obj->getSymbolName(*ReferredSym, ReferredName)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope AS(W, "AuxCLRToken"); W.printNumber("AuxType", Aux->AuxType); @@ -1578,9 +1621,11 @@ void COFFDumper::printImportedSymbols( iterator_range Range) { for (const ImportedSymbolRef &I : Range) { StringRef Sym; - error(I.getSymbolName(Sym)); + if (std::error_code EC = I.getSymbolName(Sym)) + reportError(errorCodeToError(EC), Obj->getFileName()); uint16_t Ordinal; - error(I.getOrdinal(Ordinal)); + if (std::error_code EC = I.getOrdinal(Ordinal)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printNumber("Symbol", Sym, Ordinal); } } @@ -1592,12 +1637,17 @@ void COFFDumper::printDelayImportedSymbols( for (const ImportedSymbolRef &S : Range) { DictScope Import(W, "Import"); StringRef Sym; - error(S.getSymbolName(Sym)); + if (std::error_code EC = S.getSymbolName(Sym)) + reportError(errorCodeToError(EC), Obj->getFileName()); + uint16_t Ordinal; - error(S.getOrdinal(Ordinal)); + if (std::error_code EC = S.getOrdinal(Ordinal)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printNumber("Symbol", Sym, Ordinal); + uint64_t Addr; - error(I.getImportAddress(Index++, Addr)); + if (std::error_code EC = I.getImportAddress(Index++, Addr)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printHex("Address", Addr); } } @@ -1607,13 +1657,16 @@ void COFFDumper::printCOFFImports() { for (const ImportDirectoryEntryRef &I : Obj->import_directories()) { DictScope Import(W, "Import"); StringRef Name; - error(I.getName(Name)); + if (std::error_code EC = I.getName(Name)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printString("Name", Name); uint32_t ILTAddr; - error(I.getImportLookupTableRVA(ILTAddr)); + if (std::error_code EC = I.getImportLookupTableRVA(ILTAddr)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printHex("ImportLookupTableRVA", ILTAddr); uint32_t IATAddr; - error(I.getImportAddressTableRVA(IATAddr)); + if (std::error_code EC = I.getImportAddressTableRVA(IATAddr)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printHex("ImportAddressTableRVA", IATAddr); // The import lookup table can be missing with certain older linkers, so // fall back to the import address table in that case. @@ -1627,10 +1680,12 @@ void COFFDumper::printCOFFImports() { for (const DelayImportDirectoryEntryRef &I : Obj->delay_import_directories()) { DictScope Import(W, "DelayImport"); StringRef Name; - error(I.getName(Name)); + if (std::error_code EC = I.getName(Name)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printString("Name", Name); const delay_import_directory_table_entry *Table; - error(I.getDelayImportTable(Table)); + if (std::error_code EC = I.getDelayImportTable(Table)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printHex("Attributes", Table->Attributes); W.printHex("ModuleHandle", Table->ModuleHandle); W.printHex("ImportAddressTable", Table->DelayImportAddressTable); @@ -1648,9 +1703,12 @@ void COFFDumper::printCOFFExports() { StringRef Name; uint32_t Ordinal, RVA; - error(E.getSymbolName(Name)); - error(E.getOrdinal(Ordinal)); - error(E.getExportRVA(RVA)); + if (std::error_code EC = E.getSymbolName(Name)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (std::error_code EC = E.getOrdinal(Ordinal)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (std::error_code EC = E.getExportRVA(RVA)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printNumber("Ordinal", Ordinal); W.printString("Name", Name); @@ -1660,13 +1718,12 @@ void COFFDumper::printCOFFExports() { void COFFDumper::printCOFFDirectives() { for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); if (Name != ".drectve") continue; - StringRef Contents = unwrapOrError(Section.getContents()); + StringRef Contents = + unwrapOrError(Obj->getFileName(), Section.getContents()); W.printString("Directive(s)", Contents); } } @@ -1689,8 +1746,10 @@ void COFFDumper::printCOFFBaseReloc() { for (const BaseRelocRef &I : Obj->base_relocs()) { uint8_t Type; uint32_t RVA; - error(I.getRVA(RVA)); - error(I.getType(Type)); + if (std::error_code EC = I.getRVA(RVA)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (std::error_code EC = I.getType(Type)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope Import(W, "Entry"); W.printString("Type", getBaseRelocTypeName(Type)); W.printHex("Address", RVA); @@ -1700,16 +1759,18 @@ void COFFDumper::printCOFFBaseReloc() { void COFFDumper::printCOFFResources() { ListScope ResourcesD(W, "Resources"); for (const SectionRef &S : Obj->sections()) { - StringRef Name; - error(S.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), S.getName()); if (!Name.startswith(".rsrc")) continue; - StringRef Ref = unwrapOrError(S.getContents()); + StringRef Ref = unwrapOrError(Obj->getFileName(), S.getContents()); if ((Name == ".rsrc") || (Name == ".rsrc$01")) { - ResourceSectionRef RSF(Ref); - auto &BaseTable = unwrapOrError(RSF.getBaseTable()); + ResourceSectionRef RSF; + Error E = RSF.load(Obj, S); + if (E) + reportError(std::move(E), Obj->getFileName()); + auto &BaseTable = unwrapOrError(Obj->getFileName(), RSF.getBaseTable()); W.printNumber("Total Number of Resources", countTotalTableEntries(RSF, BaseTable, "Type")); W.printHex("Base Table Address", @@ -1729,14 +1790,15 @@ COFFDumper::countTotalTableEntries(ResourceSectionRef RSF, uint32_t TotalEntries = 0; for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries; i++) { - auto Entry = unwrapOrError(getResourceDirectoryTableEntry(Table, i)); + auto Entry = unwrapOrError(Obj->getFileName(), RSF.getTableEntry(Table, i)); if (Entry.Offset.isSubDir()) { StringRef NextLevel; if (Level == "Name") NextLevel = "Language"; else NextLevel = "Name"; - auto &NextTable = unwrapOrError(RSF.getEntrySubDir(Entry)); + auto &NextTable = + unwrapOrError(Obj->getFileName(), RSF.getEntrySubDir(Entry)); TotalEntries += countTotalTableEntries(RSF, NextTable, NextLevel); } else { TotalEntries += 1; @@ -1755,13 +1817,13 @@ void COFFDumper::printResourceDirectoryTable( // Iterate through level in resource directory tree. for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries; i++) { - auto Entry = unwrapOrError(getResourceDirectoryTableEntry(Table, i)); + auto Entry = unwrapOrError(Obj->getFileName(), RSF.getTableEntry(Table, i)); StringRef Name; SmallString<20> IDStr; raw_svector_ostream OS(IDStr); if (i < Table.NumberOfNameEntries) { ArrayRef RawEntryNameString = - unwrapOrError(RSF.getEntryNameString(Entry)); + unwrapOrError(Obj->getFileName(), RSF.getEntryNameString(Entry)); std::vector EndianCorrectedNameString; if (llvm::sys::IsBigEndianHost) { EndianCorrectedNameString.resize(RawEntryNameString.size() + 1); @@ -1772,14 +1834,14 @@ void COFFDumper::printResourceDirectoryTable( } std::string EntryNameString; if (!llvm::convertUTF16ToUTF8String(RawEntryNameString, EntryNameString)) - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); OS << ": "; OS << EntryNameString; } else { if (Level == "Type") { OS << ": "; printResourceTypeName(Entry.Identifier.ID, OS); - IDStr = IDStr.slice(0, IDStr.find_first_of(")", 0) + 1); } else { OS << ": (ID " << Entry.Identifier.ID << ")"; } @@ -1793,7 +1855,8 @@ void COFFDumper::printResourceDirectoryTable( NextLevel = "Language"; else NextLevel = "Name"; - auto &NextTable = unwrapOrError(RSF.getEntrySubDir(Entry)); + auto &NextTable = + unwrapOrError(Obj->getFileName(), RSF.getEntrySubDir(Entry)); printResourceDirectoryTable(RSF, NextTable, NextLevel); } else { W.printHex("Entry Offset", Entry.Offset.value()); @@ -1804,24 +1867,29 @@ void COFFDumper::printResourceDirectoryTable( W.printNumber("Major Version", Table.MajorVersion); W.printNumber("Minor Version", Table.MinorVersion); W.printNumber("Characteristics", Table.Characteristics); + ListScope DataScope(W, "Data"); + auto &DataEntry = + unwrapOrError(Obj->getFileName(), RSF.getEntryData(Entry)); + W.printHex("DataRVA", DataEntry.DataRVA); + W.printNumber("DataSize", DataEntry.DataSize); + W.printNumber("Codepage", DataEntry.Codepage); + W.printNumber("Reserved", DataEntry.Reserved); + StringRef Contents = + unwrapOrError(Obj->getFileName(), RSF.getContents(DataEntry)); + W.printBinaryBlock("Data", Contents); } } } -ErrorOr -COFFDumper::getResourceDirectoryTableEntry(const coff_resource_dir_table &Table, - uint32_t Index) { - if (Index >= (uint32_t)(Table.NumberOfNameEntries + Table.NumberOfIDEntries)) - return object_error::parse_failed; - auto TablePtr = reinterpret_cast(&Table + 1); - return TablePtr[Index]; -} - void COFFDumper::printStackMap() const { object::SectionRef StackMapSection; for (auto Sec : Obj->sections()) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == ".llvm_stackmaps") { StackMapSection = Sec; break; @@ -1831,7 +1899,8 @@ void COFFDumper::printStackMap() const { if (StackMapSection == object::SectionRef()) return; - StringRef StackMapContents = unwrapOrError(StackMapSection.getContents()); + StringRef StackMapContents = + unwrapOrError(Obj->getFileName(), StackMapSection.getContents()); ArrayRef StackMapContentsArray = arrayRefFromStringRef(StackMapContents); @@ -1847,7 +1916,11 @@ void COFFDumper::printAddrsig() { object::SectionRef AddrsigSection; for (auto Sec : Obj->sections()) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == ".llvm_addrsig") { AddrsigSection = Sec; break; @@ -1857,7 +1930,8 @@ void COFFDumper::printAddrsig() { if (AddrsigSection == object::SectionRef()) return; - StringRef AddrsigContents = unwrapOrError(AddrsigSection.getContents()); + StringRef AddrsigContents = + unwrapOrError(Obj->getFileName(), AddrsigSection.getContents()); ArrayRef AddrsigContentsArray(AddrsigContents.bytes_begin(), AddrsigContents.size()); @@ -1869,15 +1943,15 @@ void COFFDumper::printAddrsig() { const char *Err; uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err); if (Err) - reportError(Err); + reportError(createError(Err), Obj->getFileName()); Expected Sym = Obj->getSymbol(SymIndex); + if (!Sym) + reportError(Sym.takeError(), Obj->getFileName()); + StringRef SymName; - std::error_code EC = errorToErrorCode(Sym.takeError()); - if (EC || (EC = Obj->getSymbolName(*Sym, SymName))) { - SymName = ""; - error(EC); - } + if (std::error_code EC = Obj->getSymbolName(*Sym, SymName)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printNumber("Sym", SymName, SymIndex); Cur += Size; @@ -1891,7 +1965,8 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer, { ListScope S(Writer, "MergedTypeStream"); TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes); - error(codeview::visitTypeStream(TpiTypes, TDV)); + if (Error Err = codeview::visitTypeStream(TpiTypes, TDV)) + reportError(std::move(Err), ""); Writer.flush(); } @@ -1902,7 +1977,8 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer, ListScope S(Writer, "MergedIDStream"); TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes); TDV.setIpiTypes(IpiTypes); - error(codeview::visitTypeStream(IpiTypes, TDV)); + if (Error Err = codeview::visitTypeStream(IpiTypes, TDV)) + reportError(std::move(Err), ""); Writer.flush(); } } diff --git a/tools/llvm-readobj/DwarfCFIEHPrinter.h b/tools/llvm-readobj/DwarfCFIEHPrinter.h index 7055510ef2f2..0a365d4fe72a 100644 --- a/tools/llvm-readobj/DwarfCFIEHPrinter.h +++ b/tools/llvm-readobj/DwarfCFIEHPrinter.h @@ -44,12 +44,12 @@ public: void printUnwindInformation() const; }; -template -static const typename ELFO::Elf_Shdr *findSectionByAddress(const ELFO *Obj, - uint64_t Addr) { - auto Sections = Obj->sections(); +template +static const typename object::ELFObjectFile::Elf_Shdr * +findSectionByAddress(const object::ELFObjectFile *ObjF, uint64_t Addr) { + auto Sections = ObjF->getELFFile()->sections(); if (Error E = Sections.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); for (const auto &Shdr : *Sections) if (Shdr.sh_addr == Addr) @@ -64,13 +64,15 @@ void PrinterContext::printUnwindInformation() const { auto PHs = Obj->program_headers(); if (Error E = PHs.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); for (const auto &Phdr : *PHs) { if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) { EHFramePhdr = &Phdr; if (Phdr.p_memsz != Phdr.p_filesz) - reportError("p_memsz does not match p_filesz for GNU_EH_FRAME"); + reportError(object::createError( + "p_memsz does not match p_filesz for GNU_EH_FRAME"), + ObjF->getFileName()); break; } } @@ -81,12 +83,12 @@ void PrinterContext::printUnwindInformation() const { auto Sections = Obj->sections(); if (Error E = Sections.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); for (const auto &Shdr : *Sections) { auto SectionName = Obj->getSectionName(&Shdr); if (Error E = SectionName.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); if (*SectionName == ".eh_frame") printEHFrame(&Shdr); @@ -97,49 +99,52 @@ template void PrinterContext::printEHFrameHdr(uint64_t EHFrameHdrOffset, uint64_t EHFrameHdrAddress, uint64_t EHFrameHdrSize) const { - ListScope L(W, "EH_FRAME Header"); + DictScope L(W, "EHFrameHeader"); W.startLine() << format("Address: 0x%" PRIx64 "\n", EHFrameHdrAddress); W.startLine() << format("Offset: 0x%" PRIx64 "\n", EHFrameHdrOffset); W.startLine() << format("Size: 0x%" PRIx64 "\n", EHFrameHdrSize); const object::ELFFile *Obj = ObjF->getELFFile(); - const auto *EHFrameHdrShdr = findSectionByAddress(Obj, EHFrameHdrAddress); + const auto *EHFrameHdrShdr = findSectionByAddress(ObjF, EHFrameHdrAddress); if (EHFrameHdrShdr) { auto SectionName = Obj->getSectionName(EHFrameHdrShdr); if (Error E = SectionName.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); W.printString("Corresponding Section", *SectionName); } - DataExtractor DE( - StringRef(reinterpret_cast(Obj->base()) + EHFrameHdrOffset, - EHFrameHdrSize), - ELFT::TargetEndianness == support::endianness::little, - ELFT::Is64Bits ? 8 : 4); + DataExtractor DE(makeArrayRef(Obj->base() + EHFrameHdrOffset, EHFrameHdrSize), + ELFT::TargetEndianness == support::endianness::little, + ELFT::Is64Bits ? 8 : 4); DictScope D(W, "Header"); - uint32_t Offset = 0; + uint64_t Offset = 0; auto Version = DE.getU8(&Offset); W.printNumber("version", Version); if (Version != 1) - reportError("only version 1 of .eh_frame_hdr is supported"); + reportError( + object::createError("only version 1 of .eh_frame_hdr is supported"), + ObjF->getFileName()); uint64_t EHFramePtrEnc = DE.getU8(&Offset); W.startLine() << format("eh_frame_ptr_enc: 0x%" PRIx64 "\n", EHFramePtrEnc); if (EHFramePtrEnc != (dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4)) - reportError("unexpected encoding eh_frame_ptr_enc"); + reportError(object::createError("unexpected encoding eh_frame_ptr_enc"), + ObjF->getFileName()); uint64_t FDECountEnc = DE.getU8(&Offset); W.startLine() << format("fde_count_enc: 0x%" PRIx64 "\n", FDECountEnc); if (FDECountEnc != dwarf::DW_EH_PE_udata4) - reportError("unexpected encoding fde_count_enc"); + reportError(object::createError("unexpected encoding fde_count_enc"), + ObjF->getFileName()); uint64_t TableEnc = DE.getU8(&Offset); W.startLine() << format("table_enc: 0x%" PRIx64 "\n", TableEnc); if (TableEnc != (dwarf::DW_EH_PE_datarel | dwarf::DW_EH_PE_sdata4)) - reportError("unexpected encoding table_enc"); + reportError(object::createError("unexpected encoding table_enc"), + ObjF->getFileName()); auto EHFramePtr = DE.getSigned(&Offset, 4) + EHFrameHdrAddress + 4; W.startLine() << format("eh_frame_ptr: 0x%" PRIx64 "\n", EHFramePtr); @@ -158,7 +163,8 @@ void PrinterContext::printEHFrameHdr(uint64_t EHFrameHdrOffset, W.startLine() << format("address: 0x%" PRIx64 "\n", Address); if (InitialPC < PrevPC) - reportError("initial_location is out of order"); + reportError(object::createError("initial_location is out of order"), + ObjF->getFileName()); PrevPC = InitialPC; ++NumEntries; @@ -178,7 +184,7 @@ void PrinterContext::printEHFrame( const object::ELFFile *Obj = ObjF->getELFFile(); auto Result = Obj->getSectionContents(EHFrameShdr); if (Error E = Result.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); auto Contents = Result.get(); DWARFDataExtractor DE( diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp index 4e1cb7d544e7..57144882c4b4 100644 --- a/tools/llvm-readobj/ELFDumper.cpp +++ b/tools/llvm-readobj/ELFDumper.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" @@ -36,6 +37,7 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/RelocationResolver.h" #include "llvm/Object/StackMapParser.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/ARMAttributeParser.h" @@ -61,6 +63,7 @@ #include #include #include +#include #include using namespace llvm; @@ -119,9 +122,9 @@ template class DumpStyle; /// the size, entity size and virtual address are different entries in arbitrary /// order (DT_REL, DT_RELSZ, DT_RELENT for example). struct DynRegionInfo { - DynRegionInfo() = default; - DynRegionInfo(const void *A, uint64_t S, uint64_t ES) - : Addr(A), Size(S), EntSize(ES) {} + DynRegionInfo(StringRef ObjName) : FileName(ObjName) {} + DynRegionInfo(const void *A, uint64_t S, uint64_t ES, StringRef ObjName) + : Addr(A), Size(S), EntSize(ES), FileName(ObjName) {} /// Address in current address space. const void *Addr = nullptr; @@ -130,14 +133,18 @@ struct DynRegionInfo { /// Size of each entity in the region. uint64_t EntSize = 0; + /// Name of the file. Used for error reporting. + StringRef FileName; + template ArrayRef getAsArrayRef() const { const Type *Start = reinterpret_cast(Addr); if (!Start) return {Start, Start}; if (EntSize != sizeof(Type) || Size % EntSize) { // TODO: Add a section index to this warning. - reportWarning("invalid section size (" + Twine(Size) + - ") or entity size (" + Twine(EntSize) + ")"); + reportWarning(createError("invalid section size (" + Twine(Size) + + ") or entity size (" + Twine(EntSize) + ")"), + FileName); return {Start, Start}; } return {Start, Start + (Size / EntSize)}; @@ -166,11 +173,7 @@ public: void printVersionInfo() override; void printGroupSections() override; - void printAttributes() override; - void printMipsPLTGOT() override; - void printMipsABIFlags() override; - void printMipsReginfo() override; - void printMipsOptions() override; + void printArchSpecificInfo() override; void printStackMap() const override; @@ -182,6 +185,7 @@ public: void printNotes() override; void printELFLinkerOptions() override; + void printStackSizes() override; const object::ELFObjectFile *getElfObject() const { return ObjF; }; @@ -195,20 +199,27 @@ private: if (DRI.Addr < Obj->base() || reinterpret_cast(DRI.Addr) + DRI.Size > Obj->base() + Obj->getBufSize()) - error(llvm::object::object_error::parse_failed); + reportError(errorCodeToError(llvm::object::object_error::parse_failed), + ObjF->getFileName()); return DRI; } DynRegionInfo createDRIFrom(const Elf_Phdr *P, uintX_t EntSize) { - return checkDRI( - {ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize}); + return checkDRI({ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, + EntSize, ObjF->getFileName()}); } DynRegionInfo createDRIFrom(const Elf_Shdr *S) { - return checkDRI( - {ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize}); + return checkDRI({ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, + S->sh_entsize, ObjF->getFileName()}); } + void printAttributes(); + void printMipsReginfo(); + void printMipsOptions(); + + std::pair + findDynamic(const ELFFile *Obj); void loadDynamicTable(const ELFFile *Obj); void parseDynamicTable(); @@ -226,7 +237,7 @@ private: DynRegionInfo DynSymRegion; DynRegionInfo DynamicTable; StringRef DynamicStringTable; - StringRef SOName = ""; + std::string SOName = ""; const Elf_Hash *HashTable = nullptr; const Elf_GnuHash *GnuHashTable = nullptr; const Elf_Shdr *DotSymtabSec = nullptr; @@ -291,7 +302,8 @@ public: void getSectionNameIndex(const Elf_Sym *Symbol, const Elf_Sym *FirstSym, StringRef &SectionName, unsigned &SectionIndex) const; - std::string getStaticSymbolName(uint32_t Index) const; + Expected getStaticSymbolName(uint32_t Index) const; + std::string getDynamicString(uint64_t Value) const; StringRef getSymbolVersionByIndex(StringRef StrTab, uint32_t VersionSymbolIndex, bool &IsDefault) const; @@ -328,16 +340,27 @@ void ELFDumper::printSymbolsHelper(bool IsDynamic) const { } else { if (!DotSymtabSec) return; - StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec)); - Syms = unwrapOrError(Obj->symbols(DotSymtabSec)); - SymtabName = unwrapOrError(Obj->getSectionName(DotSymtabSec)); + StrTable = unwrapOrError(ObjF->getFileName(), + Obj->getStringTableForSymtab(*DotSymtabSec)); + Syms = unwrapOrError(ObjF->getFileName(), Obj->symbols(DotSymtabSec)); + SymtabName = + unwrapOrError(ObjF->getFileName(), Obj->getSectionName(DotSymtabSec)); Entries = DotSymtabSec->getEntityCount(); } if (Syms.begin() == Syms.end()) return; - ELFDumperStyle->printSymtabMessage(Obj, SymtabName, Entries); + + // The st_other field has 2 logical parts. The first two bits hold the symbol + // visibility (STV_*) and the remainder hold other platform-specific values. + bool NonVisibilityBitsUsed = llvm::find_if(Syms, [](const Elf_Sym &S) { + return S.st_other & ~0x3; + }) != Syms.end(); + + ELFDumperStyle->printSymtabMessage(Obj, SymtabName, Entries, + NonVisibilityBitsUsed); for (const auto &Sym : Syms) - ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic); + ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic, + NonVisibilityBitsUsed); } template class MipsGOTParser; @@ -346,8 +369,20 @@ template class DumpStyle { public: using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; + using Elf_Addr = typename ELFT::Addr; + + DumpStyle(ELFDumper *Dumper) : Dumper(Dumper) { + FileName = this->Dumper->getElfObject()->getFileName(); + + // Dumper reports all non-critical errors as warnings. + // It does not print the same warning more than once. + WarningHandler = [this](const Twine &Msg) { + if (Warnings.insert(Msg.str()).second) + reportWarning(createError(Msg), FileName); + return Error::success(); + }; + } - DumpStyle(ELFDumper *Dumper) : Dumper(Dumper) {} virtual ~DumpStyle() = default; virtual void printFileHeaders(const ELFFile *Obj) = 0; @@ -360,10 +395,10 @@ public: virtual void printDynamic(const ELFFile *Obj) {} virtual void printDynamicRelocations(const ELFFile *Obj) = 0; virtual void printSymtabMessage(const ELFFile *Obj, StringRef Name, - size_t Offset) {} + size_t Offset, bool NonVisibilityBitsUsed) {} virtual void printSymbol(const ELFFile *Obj, const Elf_Sym *Symbol, const Elf_Sym *FirstSym, StringRef StrTable, - bool IsDynamic) = 0; + bool IsDynamic, bool NonVisibilityBitsUsed) = 0; virtual void printProgramHeaders(const ELFFile *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) = 0; @@ -378,11 +413,31 @@ public: virtual void printAddrsig(const ELFFile *Obj) = 0; virtual void printNotes(const ELFFile *Obj) = 0; virtual void printELFLinkerOptions(const ELFFile *Obj) = 0; + virtual void printStackSizes(const ELFObjectFile *Obj) = 0; + void printNonRelocatableStackSizes(const ELFObjectFile *Obj, + std::function PrintHeader); + void printRelocatableStackSizes(const ELFObjectFile *Obj, + std::function PrintHeader); + void printFunctionStackSize(const ELFObjectFile *Obj, uint64_t SymValue, + SectionRef FunctionSec, + const StringRef SectionName, DataExtractor Data, + uint64_t *Offset); + void printStackSize(const ELFObjectFile *Obj, RelocationRef Rel, + SectionRef FunctionSec, + const StringRef &StackSizeSectionName, + const RelocationResolver &Resolver, DataExtractor Data); + virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0; virtual void printMipsGOT(const MipsGOTParser &Parser) = 0; virtual void printMipsPLT(const MipsGOTParser &Parser) = 0; + virtual void printMipsABIFlags(const ELFObjectFile *Obj) = 0; const ELFDumper *dumper() const { return Dumper; } +protected: + std::function WarningHandler; + StringRef FileName; + private: + std::unordered_set Warnings; const ELFDumper *Dumper; }; @@ -407,8 +462,8 @@ public: void printHashSymbols(const ELFO *Obj) override; void printDynamic(const ELFFile *Obj) override; void printDynamicRelocations(const ELFO *Obj) override; - void printSymtabMessage(const ELFO *Obj, StringRef Name, - size_t Offset) override; + void printSymtabMessage(const ELFO *Obj, StringRef Name, size_t Offset, + bool NonVisibilityBitsUsed) override; void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; void printVersionSymbolSection(const ELFFile *Obj, @@ -422,8 +477,11 @@ public: void printAddrsig(const ELFFile *Obj) override; void printNotes(const ELFFile *Obj) override; void printELFLinkerOptions(const ELFFile *Obj) override; + void printStackSizes(const ELFObjectFile *Obj) override; + void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; void printMipsPLT(const MipsGOTParser &Parser) override; + void printMipsABIFlags(const ELFObjectFile *Obj) override; private: struct Field { @@ -484,7 +542,8 @@ private: void printRelocation(const ELFO *Obj, const Elf_Sym *Sym, StringRef SymbolName, const Elf_Rela &R, bool IsRela); void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, - StringRef StrTable, bool IsDynamic) override; + StringRef StrTable, bool IsDynamic, + bool NonVisibilityBitsUsed) override; std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *FirstSym); void printDynamicRelocation(const ELFO *Obj, Elf_Rela R, bool IsRela); @@ -525,8 +584,11 @@ public: void printAddrsig(const ELFFile *Obj) override; void printNotes(const ELFFile *Obj) override; void printELFLinkerOptions(const ELFFile *Obj) override; + void printStackSizes(const ELFObjectFile *Obj) override; + void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; void printMipsPLT(const MipsGOTParser &Parser) override; + void printMipsABIFlags(const ELFObjectFile *Obj) override; private: void printRelocation(const ELFO *Obj, Elf_Rela Rel, const Elf_Shdr *SymTab); @@ -534,7 +596,8 @@ private: void printSymbols(const ELFO *Obj); void printDynamicSymbols(const ELFO *Obj); void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, - StringRef StrTable, bool IsDynamic) override; + StringRef StrTable, bool IsDynamic, + bool /*NonVisibilityBitsUsed*/) override; void printProgramHeaders(const ELFO *Obj); void printSectionMapping(const ELFO *Obj) {} @@ -680,9 +743,9 @@ StringRef ELFDumper::getSymbolVersion(StringRef StrTab, sizeof(Elf_Sym); // Get the corresponding version index entry. - const Elf_Versym *Versym = - unwrapOrError(ObjF->getELFFile()->template getEntry( - SymbolVersionSection, EntryIndex)); + const Elf_Versym *Versym = unwrapOrError( + ObjF->getFileName(), ObjF->getELFFile()->template getEntry( + SymbolVersionSection, EntryIndex)); return this->getSymbolVersionByIndex(StrTab, Versym->vs_index, IsDefault); } @@ -691,15 +754,22 @@ static std::string maybeDemangle(StringRef Name) { } template -std::string ELFDumper::getStaticSymbolName(uint32_t Index) const { +Expected +ELFDumper::getStaticSymbolName(uint32_t Index) const { const ELFFile *Obj = ObjF->getELFFile(); - StringRef StrTable = - unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec)); - Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec)); - if (Index >= Syms.size()) - reportError("Invalid symbol index"); - const Elf_Sym *Sym = &Syms[Index]; - return maybeDemangle(unwrapOrError(Sym->getName(StrTable))); + Expected SymOrErr = + Obj->getSymbol(DotSymtabSec, Index); + if (!SymOrErr) + return SymOrErr.takeError(); + + Expected StrTabOrErr = Obj->getStringTableForSymtab(*DotSymtabSec); + if (!StrTabOrErr) + return StrTabOrErr.takeError(); + + Expected NameOrErr = (*SymOrErr)->getName(*StrTabOrErr); + if (!NameOrErr) + return NameOrErr.takeError(); + return maybeDemangle(*NameOrErr); } template @@ -717,7 +787,7 @@ StringRef ELFDumper::getSymbolVersionByIndex(StringRef StrTab, // Lookup this symbol in the version table. LoadVersionMap(); if (VersionIndex >= VersionMap.size() || VersionMap[VersionIndex].isNull()) - reportError("Invalid version entry"); + reportError(createError("Invalid version entry"), ObjF->getFileName()); const VersionMapEntry &Entry = VersionMap[VersionIndex]; // Get the version name string. @@ -731,7 +801,7 @@ StringRef ELFDumper::getSymbolVersionByIndex(StringRef StrTab, IsDefault = false; } if (NameOffset >= StrTab.size()) - reportError("Invalid string offset"); + reportError(createError("Invalid string offset"), ObjF->getFileName()); return StrTab.data() + NameOffset; } @@ -739,14 +809,14 @@ template std::string ELFDumper::getFullSymbolName(const Elf_Sym *Symbol, StringRef StrTable, bool IsDynamic) const { - std::string SymbolName = - maybeDemangle(unwrapOrError(Symbol->getName(StrTable))); + std::string SymbolName = maybeDemangle( + unwrapOrError(ObjF->getFileName(), Symbol->getName(StrTable))); if (SymbolName.empty() && Symbol->getType() == ELF::STT_SECTION) { unsigned SectionIndex; StringRef SectionName; - Elf_Sym_Range Syms = - unwrapOrError(ObjF->getELFFile()->symbols(DotSymtabSec)); + Elf_Sym_Range Syms = unwrapOrError( + ObjF->getFileName(), ObjF->getELFFile()->symbols(DotSymtabSec)); getSectionNameIndex(Symbol, Syms.begin(), SectionName, SectionIndex); return SectionName; } @@ -783,31 +853,32 @@ void ELFDumper::getSectionNameIndex(const Elf_Sym *Symbol, SectionName = "Reserved"; else { if (SectionIndex == SHN_XINDEX) - SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex( - Symbol, FirstSym, ShndxTable)); + SectionIndex = unwrapOrError(ObjF->getFileName(), + object::getExtendedSymbolTableIndex( + Symbol, FirstSym, ShndxTable)); const ELFFile *Obj = ObjF->getELFFile(); const typename ELFT::Shdr *Sec = - unwrapOrError(Obj->getSection(SectionIndex)); - SectionName = unwrapOrError(Obj->getSectionName(Sec)); + unwrapOrError(ObjF->getFileName(), Obj->getSection(SectionIndex)); + SectionName = unwrapOrError(ObjF->getFileName(), Obj->getSectionName(Sec)); } } template static const typename ELFO::Elf_Shdr * -findNotEmptySectionByAddress(const ELFO *Obj, uint64_t Addr) { - for (const auto &Shdr : unwrapOrError(Obj->sections())) +findNotEmptySectionByAddress(const ELFO *Obj, StringRef FileName, + uint64_t Addr) { + for (const auto &Shdr : unwrapOrError(FileName, Obj->sections())) if (Shdr.sh_addr == Addr && Shdr.sh_size > 0) return &Shdr; return nullptr; } template -static const typename ELFO::Elf_Shdr *findSectionByName(const ELFO &Obj, - StringRef Name) { - for (const auto &Shdr : unwrapOrError(Obj.sections())) { - if (Name == unwrapOrError(Obj.getSectionName(&Shdr))) +static const typename ELFO::Elf_Shdr * +findSectionByName(const ELFO &Obj, StringRef FileName, StringRef Name) { + for (const auto &Shdr : unwrapOrError(FileName, Obj.sections())) + if (Name == unwrapOrError(FileName, Obj.getSectionName(&Shdr))) return &Shdr; - } return nullptr; } @@ -1356,10 +1427,12 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) { } template -void ELFDumper::loadDynamicTable(const ELFFile *Obj) { +std::pair +ELFDumper::findDynamic(const ELFFile *Obj) { // Try to locate the PT_DYNAMIC header. const Elf_Phdr *DynamicPhdr = nullptr; - for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) { + for (const Elf_Phdr &Phdr : + unwrapOrError(ObjF->getFileName(), Obj->program_headers())) { if (Phdr.p_type != ELF::PT_DYNAMIC) continue; DynamicPhdr = &Phdr; @@ -1368,61 +1441,132 @@ void ELFDumper::loadDynamicTable(const ELFFile *Obj) { // Try to locate the .dynamic section in the sections header table. const Elf_Shdr *DynamicSec = nullptr; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : + unwrapOrError(ObjF->getFileName(), Obj->sections())) { if (Sec.sh_type != ELF::SHT_DYNAMIC) continue; DynamicSec = &Sec; break; } - // Information in the section header has priority over the information - // in a PT_DYNAMIC header. + if (DynamicPhdr && DynamicPhdr->p_offset + DynamicPhdr->p_filesz > + ObjF->getMemoryBufferRef().getBufferSize()) { + reportWarning( + createError( + "PT_DYNAMIC segment offset + size exceeds the size of the file"), + ObjF->getFileName()); + // Don't use the broken dynamic header. + DynamicPhdr = nullptr; + } + + if (DynamicPhdr && DynamicSec) { + StringRef Name = + unwrapOrError(ObjF->getFileName(), Obj->getSectionName(DynamicSec)); + if (DynamicSec->sh_addr + DynamicSec->sh_size > + DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz || + DynamicSec->sh_addr < DynamicPhdr->p_vaddr) + reportWarning(createError("The SHT_DYNAMIC section '" + Name + + "' is not contained within the " + "PT_DYNAMIC segment"), + ObjF->getFileName()); + + if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr) + reportWarning(createError("The SHT_DYNAMIC section '" + Name + + "' is not at the start of " + "PT_DYNAMIC segment"), + ObjF->getFileName()); + } + + return std::make_pair(DynamicPhdr, DynamicSec); +} + +template +void ELFDumper::loadDynamicTable(const ELFFile *Obj) { + const Elf_Phdr *DynamicPhdr; + const Elf_Shdr *DynamicSec; + std::tie(DynamicPhdr, DynamicSec) = findDynamic(Obj); + if (!DynamicPhdr && !DynamicSec) + return; + + DynRegionInfo FromPhdr(ObjF->getFileName()); + bool IsPhdrTableValid = false; + if (DynamicPhdr) { + FromPhdr = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn)); + IsPhdrTableValid = !FromPhdr.getAsArrayRef().empty(); + } + + // Locate the dynamic table described in a section header. // Ignore sh_entsize and use the expected value for entry size explicitly. - // This allows us to dump the dynamic sections with a broken sh_entsize + // This allows us to dump dynamic sections with a broken sh_entsize // field. + DynRegionInfo FromSec(ObjF->getFileName()); + bool IsSecTableValid = false; if (DynamicSec) { - DynamicTable = checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset, - DynamicSec->sh_size, sizeof(Elf_Dyn)}); - parseDynamicTable(); + FromSec = + checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset, + DynamicSec->sh_size, sizeof(Elf_Dyn), ObjF->getFileName()}); + IsSecTableValid = !FromSec.getAsArrayRef().empty(); } - // If we have a PT_DYNAMIC header, we will either check the found dynamic - // section or take the dynamic table data directly from the header. - if (!DynamicPhdr) + // When we only have information from one of the SHT_DYNAMIC section header or + // PT_DYNAMIC program header, just use that. + if (!DynamicPhdr || !DynamicSec) { + if ((DynamicPhdr && IsPhdrTableValid) || (DynamicSec && IsSecTableValid)) { + DynamicTable = DynamicPhdr ? FromPhdr : FromSec; + parseDynamicTable(); + } else { + reportWarning(createError("no valid dynamic table was found"), + ObjF->getFileName()); + } return; + } - if (DynamicPhdr->p_offset + DynamicPhdr->p_filesz > - ObjF->getMemoryBufferRef().getBufferSize()) - reportError( - "PT_DYNAMIC segment offset + size exceeds the size of the file"); + // At this point we have tables found from the section header and from the + // dynamic segment. Usually they match, but we have to do sanity checks to + // verify that. - if (!DynamicSec) { - DynamicTable = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn)); - parseDynamicTable(); + if (FromPhdr.Addr != FromSec.Addr) + reportWarning(createError("SHT_DYNAMIC section header and PT_DYNAMIC " + "program header disagree about " + "the location of the dynamic table"), + ObjF->getFileName()); + + if (!IsPhdrTableValid && !IsSecTableValid) { + reportWarning(createError("no valid dynamic table was found"), + ObjF->getFileName()); return; } - StringRef Name = unwrapOrError(Obj->getSectionName(DynamicSec)); - if (DynamicSec->sh_addr + DynamicSec->sh_size > - DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz || - DynamicSec->sh_addr < DynamicPhdr->p_vaddr) - reportWarning("The SHT_DYNAMIC section '" + Name + - "' is not contained within the " - "PT_DYNAMIC segment"); + // Information in the PT_DYNAMIC program header has priority over the information + // in a section header. + if (IsPhdrTableValid) { + if (!IsSecTableValid) + reportWarning( + createError( + "SHT_DYNAMIC dynamic table is invalid: PT_DYNAMIC will be used"), + ObjF->getFileName()); + DynamicTable = FromPhdr; + } else { + reportWarning( + createError( + "PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used"), + ObjF->getFileName()); + DynamicTable = FromSec; + } - if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr) - reportWarning("The SHT_DYNAMIC section '" + Name + - "' is not at the start of " - "PT_DYNAMIC segment"); + parseDynamicTable(); } template ELFDumper::ELFDumper(const object::ELFObjectFile *ObjF, - ScopedPrinter &Writer) - : ObjDumper(Writer), ObjF(ObjF) { + ScopedPrinter &Writer) + : ObjDumper(Writer), ObjF(ObjF), DynRelRegion(ObjF->getFileName()), + DynRelaRegion(ObjF->getFileName()), DynRelrRegion(ObjF->getFileName()), + DynPLTRelRegion(ObjF->getFileName()), DynSymRegion(ObjF->getFileName()), + DynamicTable(ObjF->getFileName()) { const ELFFile *Obj = ObjF->getELFFile(); - - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : + unwrapOrError(ObjF->getFileName(), Obj->sections())) { switch (Sec.sh_type) { case ELF::SHT_SYMTAB: if (!DotSymtabSec) @@ -1433,16 +1577,17 @@ ELFDumper::ELFDumper(const object::ELFObjectFile *ObjF, DynSymRegion = createDRIFrom(&Sec); // This is only used (if Elf_Shdr present)for naming section in GNU // style - DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec)); + DynSymtabName = + unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec)); if (Expected E = Obj->getStringTableForSymtab(Sec)) DynamicStringTable = *E; else - warn(E.takeError()); + reportWarning(E.takeError(), ObjF->getFileName()); } break; case ELF::SHT_SYMTAB_SHNDX: - ShndxTable = unwrapOrError(Obj->getSHNDXTable(Sec)); + ShndxTable = unwrapOrError(ObjF->getFileName(), Obj->getSHNDXTable(Sec)); break; case ELF::SHT_GNU_versym: if (!SymbolVersionSection) @@ -1547,10 +1692,13 @@ template void ELFDumper::parseDynamicTable() { auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * { auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr); if (!MappedAddrOrError) { - reportWarning("Unable to parse DT_" + - Twine(getTypeString( - ObjF->getELFFile()->getHeader()->e_machine, Tag)) + - ": " + llvm::toString(MappedAddrOrError.takeError())); + Error Err = + createError("Unable to parse DT_" + + Twine(getTypeString( + ObjF->getELFFile()->getHeader()->e_machine, Tag)) + + ": " + llvm::toString(MappedAddrOrError.takeError())); + + reportWarning(std::move(Err), ObjF->getFileName()); return nullptr; } return MappedAddrOrError.get(); @@ -1576,10 +1724,29 @@ template void ELFDumper::parseDynamicTable() { case ELF::DT_STRSZ: StringTableSize = Dyn.getVal(); break; - case ELF::DT_SYMTAB: - DynSymRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); - DynSymRegion.EntSize = sizeof(Elf_Sym); + case ELF::DT_SYMTAB: { + // Often we find the information about the dynamic symbol table + // location in the SHT_DYNSYM section header. However, the value in + // DT_SYMTAB has priority, because it is used by dynamic loaders to + // locate .dynsym at runtime. The location we find in the section header + // and the location we find here should match. If we can't map the + // DT_SYMTAB value to an address (e.g. when there are no program headers), we + // ignore its value. + if (const uint8_t *VA = toMappedAddr(Dyn.getTag(), Dyn.getPtr())) { + // EntSize is non-zero if the dynamic symbol table has been found via a + // section header. + if (DynSymRegion.EntSize && VA != DynSymRegion.Addr) + reportWarning( + createError( + "SHT_DYNSYM section header and DT_SYMTAB disagree about " + "the location of the dynamic symbol table"), + ObjF->getFileName()); + + DynSymRegion.Addr = VA; + DynSymRegion.EntSize = sizeof(Elf_Sym); + } break; + } case ELF::DT_RELA: DynRelaRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); break; @@ -1619,8 +1786,9 @@ template void ELFDumper::parseDynamicTable() { else if (Dyn.getVal() == DT_RELA) DynPLTRelRegion.EntSize = sizeof(Elf_Rela); else - reportError(Twine("unknown DT_PLTREL value of ") + - Twine((uint64_t)Dyn.getVal())); + reportError(createError(Twine("unknown DT_PLTREL value of ") + + Twine((uint64_t)Dyn.getVal())), + ObjF->getFileName()); break; case ELF::DT_JMPREL: DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); @@ -1632,8 +1800,7 @@ template void ELFDumper::parseDynamicTable() { } if (StringTableBegin) DynamicStringTable = StringRef(StringTableBegin, StringTableSize); - if (SONameOffset && SONameOffset < DynamicStringTable.size()) - SOName = DynamicStringTable.data() + SONameOffset; + SOName = getDynamicString(SONameOffset); } template @@ -1715,6 +1882,10 @@ template void ELFDumper::printELFLinkerOptions() { ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile()); } +template void ELFDumper::printStackSizes() { + ELFDumperStyle->printStackSizes(ObjF); +} + #define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \ { #enum, prefix##_##enum } @@ -1953,13 +2124,7 @@ void ELFDumper::printDynamicEntry(raw_ostream &OS, uint64_t Type, {DT_RPATH, "Library rpath"}, {DT_RUNPATH, "Library runpath"}, }; - OS << TagNames.at(Type) << ": "; - if (DynamicStringTable.empty()) - OS << " "; - else if (Value < DynamicStringTable.size()) - OS << "[" << StringRef(DynamicStringTable.data() + Value) << "]"; - else - OS << ""; + OS << TagNames.at(Type) << ": [" << getDynamicString(Value) << "]"; break; } case DT_FLAGS: @@ -1974,6 +2139,15 @@ void ELFDumper::printDynamicEntry(raw_ostream &OS, uint64_t Type, } } +template +std::string ELFDumper::getDynamicString(uint64_t Value) const { + if (DynamicStringTable.empty()) + return ""; + if (Value < DynamicStringTable.size()) + return DynamicStringTable.data() + Value; + return Twine("").str(); +} + template void ELFDumper::printUnwindInfo() { DwarfCFIEH::PrinterContext Ctx(W, ObjF); Ctx.printUnwindInformation(); @@ -1985,7 +2159,8 @@ template <> void ELFDumper::printUnwindInfo() { const ELFFile *Obj = ObjF->getELFFile(); const unsigned Machine = Obj->getHeader()->e_machine; if (Machine == EM_ARM) { - ARM::EHABI::PrinterContext Ctx(W, Obj, DotSymtabSec); + ARM::EHABI::PrinterContext Ctx(W, Obj, ObjF->getFileName(), + DotSymtabSec); Ctx.PrintUnwindInformation(); } DwarfCFIEH::PrinterContext Ctx(W, ObjF); @@ -2001,17 +2176,10 @@ template void ELFDumper::printDynamicTable() { template void ELFDumper::printNeededLibraries() { ListScope D(W, "NeededLibraries"); - using LibsTy = std::vector; - LibsTy Libs; - + std::vector Libs; for (const auto &Entry : dynamic_table()) - if (Entry.d_tag == ELF::DT_NEEDED) { - uint64_t Value = Entry.d_un.d_val; - if (Value < DynamicStringTable.size()) - Libs.push_back(StringRef(DynamicStringTable.data() + Value)); - else - Libs.push_back(""); - } + if (Entry.d_tag == ELF::DT_NEEDED) + Libs.push_back(getDynamicString(Entry.d_un.d_val)); llvm::stable_sort(Libs); @@ -2042,7 +2210,7 @@ template void ELFDumper::printGnuHashTable() { Elf_Sym_Range Syms = dynamic_symbols(); unsigned NumSyms = std::distance(Syms.begin(), Syms.end()); if (!NumSyms) - reportError("No dynamic symbol section"); + reportError(createError("No dynamic symbol section"), ObjF->getFileName()); W.printHexList("Values", GnuHashTable->values(NumSyms)); } @@ -2050,6 +2218,30 @@ template void ELFDumper::printLoadName() { W.printString("LoadName", SOName); } +template void ELFDumper::printArchSpecificInfo() { + const ELFFile *Obj = ObjF->getELFFile(); + switch (Obj->getHeader()->e_machine) { + case EM_ARM: + printAttributes(); + break; + case EM_MIPS: { + ELFDumperStyle->printMipsABIFlags(ObjF); + printMipsOptions(); + printMipsReginfo(); + + MipsGOTParser Parser(Obj, ObjF->getFileName(), dynamic_table(), + dynamic_symbols()); + if (Parser.hasGot()) + ELFDumperStyle->printMipsGOT(Parser); + if (Parser.hasPlt()) + ELFDumperStyle->printMipsPLT(Parser); + break; + } + default: + break; + } +} + template void ELFDumper::printAttributes() { W.startLine() << "Attributes not implemented.\n"; } @@ -2064,11 +2256,13 @@ template <> void ELFDumper::printAttributes() { } DictScope BA(W, "BuildAttributes"); - for (const ELFO::Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const ELFO::Elf_Shdr &Sec : + unwrapOrError(ObjF->getFileName(), Obj->sections())) { if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES) continue; - ArrayRef Contents = unwrapOrError(Obj->getSectionContents(&Sec)); + ArrayRef Contents = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(&Sec)); if (Contents[0] != ARMBuildAttrs::Format_Version) { errs() << "unrecognised FormatVersion: 0x" << Twine::utohexstr(Contents[0]) << '\n'; @@ -2092,7 +2286,8 @@ public: const bool IsStatic; const ELFO * const Obj; - MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms); + MipsGOTParser(const ELFO *Obj, StringRef FileName, Elf_Dyn_Range DynTable, + Elf_Sym_Range DynSyms); bool hasGot() const { return !GotEntries.empty(); } bool hasPlt() const { return !PltEntries.empty(); } @@ -2126,6 +2321,8 @@ private: const Elf_Shdr *PltSec; const Elf_Shdr *PltRelSec; const Elf_Shdr *PltSymTable; + StringRef FileName; + Elf_Sym_Range GotDynSyms; StringRef PltStrTable; @@ -2136,21 +2333,24 @@ private: } // end anonymous namespace template -MipsGOTParser::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, +MipsGOTParser::MipsGOTParser(const ELFO *Obj, StringRef FileName, + Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms) : IsStatic(DynTable.empty()), Obj(Obj), GotSec(nullptr), LocalNum(0), - GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr) { + GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr), + FileName(FileName) { // See "Global Offset Table" in Chapter 5 in the following document // for detailed GOT description. // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf // Find static GOT secton. if (IsStatic) { - GotSec = findSectionByName(*Obj, ".got"); + GotSec = findSectionByName(*Obj, FileName, ".got"); if (!GotSec) - reportError("Cannot find .got section"); + return; - ArrayRef Content = unwrapOrError(Obj->getSectionContents(GotSec)); + ArrayRef Content = + unwrapOrError(FileName, Obj->getSectionContents(GotSec)); GotEntries = Entries(reinterpret_cast(Content.data()), Content.size() / sizeof(Entry)); LocalNum = GotEntries.size(); @@ -2194,17 +2394,21 @@ MipsGOTParser::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, size_t DynSymTotal = DynSyms.size(); if (*DtGotSym > DynSymTotal) - reportError("MIPS_GOTSYM exceeds a number of dynamic symbols"); + reportError( + createError("MIPS_GOTSYM exceeds a number of dynamic symbols"), + FileName); - GotSec = findNotEmptySectionByAddress(Obj, *DtPltGot); + GotSec = findNotEmptySectionByAddress(Obj, FileName, *DtPltGot); if (!GotSec) - reportError("There is no not empty GOT section at 0x" + - Twine::utohexstr(*DtPltGot)); + reportError(createError("There is no not empty GOT section at 0x" + + Twine::utohexstr(*DtPltGot)), + FileName); LocalNum = *DtLocalGotNum; GlobalNum = DynSymTotal - *DtGotSym; - ArrayRef Content = unwrapOrError(Obj->getSectionContents(GotSec)); + ArrayRef Content = + unwrapOrError(FileName, Obj->getSectionContents(GotSec)); GotEntries = Entries(reinterpret_cast(Content.data()), Content.size() / sizeof(Entry)); GotDynSyms = DynSyms.drop_front(*DtGotSym); @@ -2217,23 +2421,24 @@ MipsGOTParser::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, if (!DtJmpRel) report_fatal_error("Cannot find JMPREL dynamic table tag."); - PltSec = findNotEmptySectionByAddress(Obj, *DtMipsPltGot); + PltSec = findNotEmptySectionByAddress(Obj, FileName, * DtMipsPltGot); if (!PltSec) report_fatal_error("There is no not empty PLTGOT section at 0x " + Twine::utohexstr(*DtMipsPltGot)); - PltRelSec = findNotEmptySectionByAddress(Obj, *DtJmpRel); + PltRelSec = findNotEmptySectionByAddress(Obj, FileName, * DtJmpRel); if (!PltRelSec) report_fatal_error("There is no not empty RELPLT section at 0x" + Twine::utohexstr(*DtJmpRel)); ArrayRef PltContent = - unwrapOrError(Obj->getSectionContents(PltSec)); + unwrapOrError(FileName, Obj->getSectionContents(PltSec)); PltEntries = Entries(reinterpret_cast(PltContent.data()), PltContent.size() / sizeof(Entry)); - PltSymTable = unwrapOrError(Obj->getSection(PltRelSec->sh_link)); - PltStrTable = unwrapOrError(Obj->getStringTableForSymtab(*PltSymTable)); + PltSymTable = unwrapOrError(FileName, Obj->getSection(PltRelSec->sh_link)); + PltStrTable = + unwrapOrError(FileName, Obj->getStringTableForSymtab(*PltSymTable)); } } @@ -2334,26 +2539,16 @@ const typename MipsGOTParser::Elf_Sym * MipsGOTParser::getPltSym(const Entry *E) const { int64_t Offset = std::distance(getPltEntries().data(), E); if (PltRelSec->sh_type == ELF::SHT_REL) { - Elf_Rel_Range Rels = unwrapOrError(Obj->rels(PltRelSec)); - return unwrapOrError(Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); + Elf_Rel_Range Rels = unwrapOrError(FileName, Obj->rels(PltRelSec)); + return unwrapOrError(FileName, + Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); } else { - Elf_Rela_Range Rels = unwrapOrError(Obj->relas(PltRelSec)); - return unwrapOrError(Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); + Elf_Rela_Range Rels = unwrapOrError(FileName, Obj->relas(PltRelSec)); + return unwrapOrError(FileName, + Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); } } -template void ELFDumper::printMipsPLTGOT() { - const ELFFile *Obj = ObjF->getELFFile(); - if (Obj->getHeader()->e_machine != EM_MIPS) - reportError("MIPS PLT GOT is available for MIPS targets only"); - - MipsGOTParser Parser(Obj, dynamic_table(), dynamic_symbols()); - if (Parser.hasGot()) - ELFDumperStyle->printMipsGOT(Parser); - if (Parser.hasPlt()) - ELFDumperStyle->printMipsPLT(Parser); -} - static const EnumEntry ElfMipsISAExtType[] = { {"None", Mips::AFL_EXT_NONE}, {"Broadcom SB-1", Mips::AFL_EXT_SB1}, @@ -2427,41 +2622,6 @@ static int getMipsRegisterSize(uint8_t Flag) { } } -template void ELFDumper::printMipsABIFlags() { - const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.abiflags"); - if (!Shdr) { - W.startLine() << "There is no .MIPS.abiflags section in the file.\n"; - return; - } - ArrayRef Sec = unwrapOrError(Obj->getSectionContents(Shdr)); - if (Sec.size() != sizeof(Elf_Mips_ABIFlags)) { - W.startLine() << "The .MIPS.abiflags section has a wrong size.\n"; - return; - } - - auto *Flags = reinterpret_cast *>(Sec.data()); - - raw_ostream &OS = W.getOStream(); - DictScope GS(W, "MIPS ABI Flags"); - - W.printNumber("Version", Flags->version); - W.startLine() << "ISA: "; - if (Flags->isa_rev <= 1) - OS << format("MIPS%u", Flags->isa_level); - else - OS << format("MIPS%ur%u", Flags->isa_level, Flags->isa_rev); - OS << "\n"; - W.printEnum("ISA Extension", Flags->isa_ext, makeArrayRef(ElfMipsISAExtType)); - W.printFlags("ASEs", Flags->ases, makeArrayRef(ElfMipsASEFlags)); - W.printEnum("FP ABI", Flags->fp_abi, makeArrayRef(ElfMipsFpABIType)); - W.printNumber("GPR size", getMipsRegisterSize(Flags->gpr_size)); - W.printNumber("CPR1 size", getMipsRegisterSize(Flags->cpr1_size)); - W.printNumber("CPR2 size", getMipsRegisterSize(Flags->cpr2_size)); - W.printFlags("Flags 1", Flags->flags1, makeArrayRef(ElfMipsFlags1)); - W.printHex("Flags 2", Flags->flags2); -} - template static void printMipsReginfoData(ScopedPrinter &W, const Elf_Mips_RegInfo &Reginfo) { @@ -2475,12 +2635,13 @@ static void printMipsReginfoData(ScopedPrinter &W, template void ELFDumper::printMipsReginfo() { const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *Shdr = findSectionByName(*Obj, ".reginfo"); + const Elf_Shdr *Shdr = findSectionByName(*Obj, ObjF->getFileName(), ".reginfo"); if (!Shdr) { W.startLine() << "There is no .reginfo section in the file.\n"; return; } - ArrayRef Sec = unwrapOrError(Obj->getSectionContents(Shdr)); + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); if (Sec.size() != sizeof(Elf_Mips_RegInfo)) { W.startLine() << "The .reginfo section has a wrong size.\n"; return; @@ -2493,7 +2654,8 @@ template void ELFDumper::printMipsReginfo() { template void ELFDumper::printMipsOptions() { const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.options"); + const Elf_Shdr *Shdr = + findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.options"); if (!Shdr) { W.startLine() << "There is no .MIPS.options section in the file.\n"; return; @@ -2501,7 +2663,8 @@ template void ELFDumper::printMipsOptions() { DictScope GS(W, "MIPS Options"); - ArrayRef Sec = unwrapOrError(Obj->getSectionContents(Shdr)); + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); while (!Sec.empty()) { if (Sec.size() < sizeof(Elf_Mips_Options)) { W.startLine() << "The .MIPS.options section has a wrong size.\n"; @@ -2524,8 +2687,9 @@ template void ELFDumper::printMipsOptions() { template void ELFDumper::printStackMap() const { const ELFFile *Obj = ObjF->getELFFile(); const Elf_Shdr *StackMapSection = nullptr; - for (const auto &Sec : unwrapOrError(Obj->sections())) { - StringRef Name = unwrapOrError(Obj->getSectionName(&Sec)); + for (const auto &Sec : unwrapOrError(ObjF->getFileName(), Obj->sections())) { + StringRef Name = + unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec)); if (Name == ".llvm_stackmaps") { StackMapSection = &Sec; break; @@ -2535,8 +2699,8 @@ template void ELFDumper::printStackMap() const { if (!StackMapSection) return; - ArrayRef StackMapContentsArray = - unwrapOrError(Obj->getSectionContents(StackMapSection)); + ArrayRef StackMapContentsArray = unwrapOrError( + ObjF->getFileName(), Obj->getSectionContents(StackMapSection)); prettyPrintStackMap( W, StackMapParser(StackMapContentsArray)); @@ -2560,24 +2724,26 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, } template -static std::string getSectionHeadersNumString(const ELFFile *Obj) { +static std::string getSectionHeadersNumString(const ELFFile *Obj, + StringRef FileName) { const typename ELFT::Ehdr *ElfHeader = Obj->getHeader(); if (ElfHeader->e_shnum != 0) return to_string(ElfHeader->e_shnum); - ArrayRef Arr = unwrapOrError(Obj->sections()); + ArrayRef Arr = unwrapOrError(FileName, Obj->sections()); if (Arr.empty()) return "0"; return "0 (" + to_string(Arr[0].sh_size) + ")"; } template -static std::string getSectionHeaderTableIndexString(const ELFFile *Obj) { +static std::string getSectionHeaderTableIndexString(const ELFFile *Obj, + StringRef FileName) { const typename ELFT::Ehdr *ElfHeader = Obj->getHeader(); if (ElfHeader->e_shstrndx != SHN_XINDEX) return to_string(ElfHeader->e_shstrndx); - ArrayRef Arr = unwrapOrError(Obj->sections()); + ArrayRef Arr = unwrapOrError(FileName, Obj->sections()); if (Arr.empty()) return "65535 (corrupt: out of range)"; return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) + @@ -2639,9 +2805,9 @@ template void GNUStyle::printFileHeaders(const ELFO *Obj) { printFields(OS, "Number of program headers:", Str); Str = to_string(e->e_shentsize) + " (bytes)"; printFields(OS, "Size of section headers:", Str); - Str = getSectionHeadersNumString(Obj); + Str = getSectionHeadersNumString(Obj, this->FileName); printFields(OS, "Number of section headers:", Str); - Str = getSectionHeaderTableIndexString(Obj); + Str = getSectionHeaderTableIndexString(Obj, this->FileName); printFields(OS, "Section header string table index:", Str); } @@ -2663,26 +2829,29 @@ struct GroupSection { }; template -std::vector getGroups(const ELFFile *Obj) { +std::vector getGroups(const ELFFile *Obj, + StringRef FileName) { using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; using Elf_Word = typename ELFT::Word; std::vector Ret; uint64_t I = 0; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(FileName, Obj->sections())) { ++I; if (Sec.sh_type != ELF::SHT_GROUP) continue; - const Elf_Shdr *Symtab = unwrapOrError(Obj->getSection(Sec.sh_link)); - StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab)); - const Elf_Sym *Sym = - unwrapOrError(Obj->template getEntry(Symtab, Sec.sh_info)); - auto Data = - unwrapOrError(Obj->template getSectionContentsAsArray(&Sec)); + const Elf_Shdr *Symtab = + unwrapOrError(FileName, Obj->getSection(Sec.sh_link)); + StringRef StrTable = + unwrapOrError(FileName, Obj->getStringTableForSymtab(*Symtab)); + const Elf_Sym *Sym = unwrapOrError( + FileName, Obj->template getEntry(Symtab, Sec.sh_info)); + auto Data = unwrapOrError( + FileName, Obj->template getSectionContentsAsArray(&Sec)); - StringRef Name = unwrapOrError(Obj->getSectionName(&Sec)); + StringRef Name = unwrapOrError(FileName, Obj->getSectionName(&Sec)); StringRef Signature = StrTable.data() + Sym->st_name; Ret.push_back({Name, maybeDemangle(Signature), @@ -2695,8 +2864,8 @@ std::vector getGroups(const ELFFile *Obj) { std::vector &GM = Ret.back().Members; for (uint32_t Ndx : Data.slice(1)) { - auto Sec = unwrapOrError(Obj->getSection(Ndx)); - const StringRef Name = unwrapOrError(Obj->getSectionName(Sec)); + auto Sec = unwrapOrError(FileName, Obj->getSection(Ndx)); + const StringRef Name = unwrapOrError(FileName, Obj->getSectionName(Sec)); GM.push_back({Name, Ndx}); } } @@ -2715,7 +2884,7 @@ mapSectionsToGroups(ArrayRef Groups) { } // namespace template void GNUStyle::printGroupSections(const ELFO *Obj) { - std::vector V = getGroups(Obj); + std::vector V = getGroups(Obj, this->FileName); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { OS << "\n" @@ -2745,14 +2914,17 @@ template void GNUStyle::printGroupSections(const ELFO *Obj) { template void GNUStyle::printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab, const Elf_Rela &R, bool IsRela) { - const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&R, SymTab)); + const Elf_Sym *Sym = + unwrapOrError(this->FileName, Obj->getRelocationSymbol(&R, SymTab)); std::string TargetName; if (Sym && Sym->getType() == ELF::STT_SECTION) { const Elf_Shdr *Sec = unwrapOrError( + this->FileName, Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable())); - TargetName = unwrapOrError(Obj->getSectionName(Sec)); + TargetName = unwrapOrError(this->FileName, Obj->getSectionName(Sec)); } else if (Sym) { - StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab)); + StringRef StrTable = + unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*SymTab)); TargetName = this->dumper()->getFullSymbolName( Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */); } @@ -2821,21 +2993,21 @@ template void GNUStyle::printRelocHeader(unsigned SType) { template void GNUStyle::printRelocations(const ELFO *Obj) { bool HasRelocSections = false; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA && Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL && Sec.sh_type != ELF::SHT_ANDROID_RELA && Sec.sh_type != ELF::SHT_ANDROID_RELR) continue; HasRelocSections = true; - StringRef Name = unwrapOrError(Obj->getSectionName(&Sec)); + StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec)); unsigned Entries = Sec.getEntityCount(); std::vector AndroidRelas; if (Sec.sh_type == ELF::SHT_ANDROID_REL || Sec.sh_type == ELF::SHT_ANDROID_RELA) { // Android's packed relocation section needs to be unpacked first // to get the actual number of entries. - AndroidRelas = unwrapOrError(Obj->android_relas(&Sec)); + AndroidRelas = unwrapOrError(this->FileName, Obj->android_relas(&Sec)); Entries = AndroidRelas.size(); } std::vector RelrRelas; @@ -2843,8 +3015,8 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { Sec.sh_type == ELF::SHT_ANDROID_RELR)) { // .relr.dyn relative relocation section needs to be unpacked first // to get the actual number of entries. - Elf_Relr_Range Relrs = unwrapOrError(Obj->relrs(&Sec)); - RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs)); + Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(&Sec)); + RelrRelas = unwrapOrError(this->FileName, Obj->decode_relrs(Relrs)); Entries = RelrRelas.size(); } uintX_t Offset = Sec.sh_offset; @@ -2852,10 +3024,11 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { << to_hexString(Offset, false) << " contains " << Entries << " entries:\n"; printRelocHeader(Sec.sh_type); - const Elf_Shdr *SymTab = unwrapOrError(Obj->getSection(Sec.sh_link)); + const Elf_Shdr *SymTab = + unwrapOrError(this->FileName, Obj->getSection(Sec.sh_link)); switch (Sec.sh_type) { case ELF::SHT_REL: - for (const auto &R : unwrapOrError(Obj->rels(&Sec))) { + for (const auto &R : unwrapOrError(this->FileName, Obj->rels(&Sec))) { Elf_Rela Rela; Rela.r_offset = R.r_offset; Rela.r_info = R.r_info; @@ -2864,13 +3037,13 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { } break; case ELF::SHT_RELA: - for (const auto &R : unwrapOrError(Obj->relas(&Sec))) + for (const auto &R : unwrapOrError(this->FileName, Obj->relas(&Sec))) printRelocation(Obj, SymTab, R, true); break; case ELF::SHT_RELR: case ELF::SHT_ANDROID_RELR: if (opts::RawRelr) - for (const auto &R : unwrapOrError(Obj->relrs(&Sec))) + for (const auto &R : unwrapOrError(this->FileName, Obj->relrs(&Sec))) OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n"; else @@ -2992,6 +3165,12 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) { return "LLVM_ADDRSIG"; case SHT_LLVM_DEPENDENT_LIBRARIES: return "LLVM_DEPENDENT_LIBRARIES"; + case SHT_LLVM_SYMPART: + return "LLVM_SYMPART"; + case SHT_LLVM_PART_EHDR: + return "LLVM_PART_EHDR"; + case SHT_LLVM_PART_PHDR: + return "LLVM_PART_PHDR"; // FIXME: Parse processor specific GNU attributes case SHT_GNU_ATTRIBUTES: return "ATTRIBUTES"; @@ -3009,30 +3188,10 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) { return ""; } -template -static StringRef getSectionName(const typename ELFT::Shdr &Sec, - const ELFObjectFile &ElfObj, - ArrayRef Sections) { - const ELFFile &Obj = *ElfObj.getELFFile(); - uint32_t Index = Obj.getHeader()->e_shstrndx; - if (Index == ELF::SHN_XINDEX) - Index = Sections[0].sh_link; - if (!Index) // no section string table. - return ""; - // TODO: Test a case when the sh_link of the section with index 0 is broken. - if (Index >= Sections.size()) - reportError(ElfObj.getFileName(), - createError("section header string table index " + - Twine(Index) + " does not exist")); - StringRef Data = toStringRef(unwrapOrError( - Obj.template getSectionContentsAsArray(&Sections[Index]))); - return unwrapOrError(Obj.getSectionName(&Sec, Data)); -} - template void GNUStyle::printSectionHeaders(const ELFO *Obj) { unsigned Bias = ELFT::Is64Bits ? 0 : 8; - ArrayRef Sections = unwrapOrError(Obj->sections()); + ArrayRef Sections = unwrapOrError(this->FileName, Obj->sections()); OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " << "0x" << to_hexString(Obj->getHeader()->e_shoff, false) << ":\n\n"; @@ -3050,7 +3209,8 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { size_t SectionIndex = 0; for (const Elf_Shdr &Sec : Sections) { Fields[0].Str = to_string(SectionIndex); - Fields[1].Str = getSectionName(Sec, *ElfObj, Sections); + Fields[1].Str = unwrapOrError( + ElfObj->getFileName(), Obj->getSectionName(&Sec, this->WarningHandler)); Fields[2].Str = getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type); Fields[3].Str = @@ -3089,7 +3249,8 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { template void GNUStyle::printSymtabMessage(const ELFO *Obj, StringRef Name, - size_t Entries) { + size_t Entries, + bool NonVisibilityBitsUsed) { if (!Name.empty()) OS << "\nSymbol table '" << Name << "' contains " << Entries << " entries:\n"; @@ -3097,9 +3258,13 @@ void GNUStyle::printSymtabMessage(const ELFO *Obj, StringRef Name, OS << "\n Symbol table for image:\n"; if (ELFT::Is64Bits) - OS << " Num: Value Size Type Bind Vis Ndx Name\n"; + OS << " Num: Value Size Type Bind Vis"; else - OS << " Num: Value Size Type Bind Vis Ndx Name\n"; + OS << " Num: Value Size Type Bind Vis"; + + if (NonVisibilityBitsUsed) + OS << " "; + OS << " Ndx Name\n"; } template @@ -3115,10 +3280,11 @@ std::string GNUStyle::getSymbolSectionNdx(const ELFO *Obj, case ELF::SHN_COMMON: return "COM"; case ELF::SHN_XINDEX: - return to_string( - format_decimal(unwrapOrError(object::getExtendedSymbolTableIndex( - Symbol, FirstSym, this->dumper()->getShndxTable())), - 3)); + return to_string(format_decimal( + unwrapOrError(this->FileName, + object::getExtendedSymbolTableIndex( + Symbol, FirstSym, this->dumper()->getShndxTable())), + 3)); default: // Find if: // Processor specific @@ -3142,7 +3308,7 @@ std::string GNUStyle::getSymbolSectionNdx(const ELFO *Obj, template void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *FirstSym, StringRef StrTable, - bool IsDynamic) { + bool IsDynamic, bool NonVisibilityBitsUsed) { static int Idx = 0; static bool Dynamic = true; @@ -3156,7 +3322,7 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[8] = {0, 8, 17 + Bias, 23 + Bias, - 31 + Bias, 38 + Bias, 47 + Bias, 51 + Bias}; + 31 + Bias, 38 + Bias, 48 + Bias, 51 + Bias}; Fields[0].Str = to_string(format_decimal(Idx++, 6)) + ":"; Fields[1].Str = to_string( format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8)); @@ -3173,7 +3339,13 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings)); Fields[5].Str = printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities)); + if (Symbol->st_other & ~0x3) + Fields[5].Str += + " [st_other, 2)) + ">]"; + + Fields[6].Column += NonVisibilityBitsUsed ? 13 : 0; Fields[6].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym); + Fields[7].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic); for (auto &Entry : Fields) @@ -3193,7 +3365,7 @@ void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, const auto Symbol = FirstSym + Sym; Fields[2].Str = to_string( - format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 18 : 8)); + format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8)); Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); @@ -3246,10 +3418,21 @@ template void GNUStyle::printHashSymbols(const ELFO *Obj) { for (uint32_t Buc = 0; Buc < SysVHash->nbucket; Buc++) { if (Buckets[Buc] == ELF::STN_UNDEF) continue; + std::vector Visited(SysVHash->nchain); for (uint32_t Ch = Buckets[Buc]; Ch < SysVHash->nchain; Ch = Chains[Ch]) { if (Ch == ELF::STN_UNDEF) break; + + if (Visited[Ch]) { + reportWarning( + createError(".hash section is invalid: bucket " + Twine(Ch) + + ": a cycle was detected in the linked chain"), + this->FileName); + break; + } + printHashedSymbol(Obj, &DynSyms[0], Ch, StringTable, Buc); + Visited[Ch] = true; } } } @@ -3380,7 +3563,8 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { unsigned Width = ELFT::Is64Bits ? 18 : 10; unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7; - for (const auto &Phdr : unwrapOrError(Obj->program_headers())) { + for (const auto &Phdr : + unwrapOrError(this->FileName, Obj->program_headers())) { Fields[0].Str = getElfPtType(Header->e_machine, Phdr.p_type); Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8)); Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width)); @@ -3404,10 +3588,11 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { OS << "\n Section to Segment mapping:\n Segment Sections...\n"; DenseSet BelongsToSegment; int Phnum = 0; - for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) { + for (const Elf_Phdr &Phdr : + unwrapOrError(this->FileName, Obj->program_headers())) { std::string Sections; OS << format(" %2.2d ", Phnum++); - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { // Check if each section is in a segment and then print mapping. // readelf additionally makes sure it does not print zero sized sections // at end of segments and for PT_DYNAMIC both start and end of section @@ -3418,7 +3603,9 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { if (!TbssInNonTLS && checkTLSSections(Phdr, Sec) && checkoffsets(Phdr, Sec) && checkVMA(Phdr, Sec) && checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) { - Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + " "; + Sections += + unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + + " "; BelongsToSegment.insert(&Sec); } } @@ -3428,9 +3615,10 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { // Display sections that do not belong to a segment. std::string Sections; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { if (BelongsToSegment.find(&Sec) == BelongsToSegment.end()) - Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + ' '; + Sections += + unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + ' '; } if (!Sections.empty()) { OS << " None " << Sections << '\n'; @@ -3438,14 +3626,40 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { } } +namespace { +template struct RelSymbol { + const typename ELFT::Sym *Sym; + std::string Name; +}; + +template +RelSymbol getSymbolForReloc(const ELFFile *Obj, StringRef FileName, + const ELFDumper *Dumper, + const typename ELFT::Rela &Reloc) { + uint32_t SymIndex = Reloc.getSymbol(Obj->isMips64EL()); + const typename ELFT::Sym *Sym = Dumper->dynamic_symbols().begin() + SymIndex; + Expected ErrOrName = Sym->getName(Dumper->getDynamicStringTable()); + + std::string Name; + if (ErrOrName) { + Name = maybeDemangle(*ErrOrName); + } else { + reportWarning( + createError("unable to get name of the dynamic symbol with index " + + Twine(SymIndex) + ": " + toString(ErrOrName.takeError())), + FileName); + Name = ""; + } + + return {Sym, std::move(Name)}; +} +} // namespace + template void GNUStyle::printDynamicRelocation(const ELFO *Obj, Elf_Rela R, bool IsRela) { - uint32_t SymIndex = R.getSymbol(Obj->isMips64EL()); - const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex; - std::string SymbolName = maybeDemangle( - unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()))); - printRelocation(Obj, Sym, SymbolName, R, IsRela); + RelSymbol S = getSymbolForReloc(Obj, this->FileName, this->dumper(), R); + printRelocation(Obj, S.Sym, S.Name, R, IsRela); } template void GNUStyle::printDynamic(const ELFO *Obj) { @@ -3518,7 +3732,8 @@ void GNUStyle::printDynamicRelocations(const ELFO *Obj) { << " contains " << DynRelrRegion.Size << " bytes:\n"; printRelocHeader(ELF::SHT_REL); Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - std::vector RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs)); + std::vector RelrRelas = + unwrapOrError(this->FileName, Obj->decode_relrs(Relrs)); for (const Elf_Rela &Rela : RelrRelas) { printDynamicRelocation(Obj, Rela, false); } @@ -3550,14 +3765,15 @@ template static void printGNUVersionSectionProlog(formatted_raw_ostream &OS, const Twine &Name, unsigned EntriesNum, const ELFFile *Obj, - const typename ELFT::Shdr *Sec) { - StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + const typename ELFT::Shdr *Sec, + StringRef FileName) { + StringRef SecName = unwrapOrError(FileName, Obj->getSectionName(Sec)); OS << Name << " section '" << SecName << "' " << "contains " << EntriesNum << " entries:\n"; const typename ELFT::Shdr *SymTab = - unwrapOrError(Obj->getSection(Sec->sh_link)); - StringRef SymTabName = unwrapOrError(Obj->getSectionName(SymTab)); + unwrapOrError(FileName, Obj->getSection(Sec->sh_link)); + StringRef SymTabName = unwrapOrError(FileName, Obj->getSectionName(SymTab)); OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16) << " Offset: " << format_hex(Sec->sh_offset, 8) << " Link: " << Sec->sh_link << " (" << SymTabName << ")\n"; @@ -3570,7 +3786,8 @@ void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, return; unsigned Entries = Sec->sh_size / sizeof(Elf_Versym); - printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec); + printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec, + this->FileName); const uint8_t *VersymBuf = reinterpret_cast(Obj->base() + Sec->sh_offset); @@ -3642,14 +3859,17 @@ void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, return; unsigned VerDefsNum = Sec->sh_info; - printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec); + printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec, + this->FileName); - const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *StrTabSec = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); StringRef StringTable( reinterpret_cast(Obj->base() + StrTabSec->sh_offset), (size_t)StrTabSec->sh_size); - const uint8_t *VerdefBuf = unwrapOrError(Obj->getSectionContents(Sec)).data(); + const uint8_t *VerdefBuf = + unwrapOrError(this->FileName, Obj->getSectionContents(Sec)).data(); const uint8_t *Begin = VerdefBuf; while (VerDefsNum--) { @@ -3684,11 +3904,14 @@ void GNUStyle::printVersionDependencySection(const ELFFile *Obj, return; unsigned VerneedNum = Sec->sh_info; - printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec); + printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec, + this->FileName); - ArrayRef SecData = unwrapOrError(Obj->getSectionContents(Sec)); + ArrayRef SecData = + unwrapOrError(this->FileName, Obj->getSectionContents(Sec)); - const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *StrTabSec = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); StringRef StringTable = { reinterpret_cast(Obj->base() + StrTabSec->sh_offset), (size_t)StrTabSec->sh_size}; @@ -3745,9 +3968,21 @@ void GNUStyle::printHashHistogram(const ELFFile *Obj) { // Go over all buckets and and note chain lengths of each bucket (total // unique chain lengths). for (size_t B = 0; B < NBucket; B++) { - for (size_t C = Buckets[B]; C > 0 && C < NChain; C = Chains[C]) + std::vector Visited(NChain); + for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) { + if (C == ELF::STN_UNDEF) + break; + if (Visited[C]) { + reportWarning( + createError(".hash section is invalid: bucket " + Twine(C) + + ": a cycle was detected in the linked chain"), + this->FileName); + break; + } + Visited[C] = true; if (MaxChain <= ++ChainLen[B]) MaxChain++; + } TotalSyms += ChainLen[B]; } @@ -3829,7 +4064,7 @@ void GNUStyle::printCGProfile(const ELFFile *Obj) { template void GNUStyle::printAddrsig(const ELFFile *Obj) { - OS << "GNUStyle::printAddrsig not implemented\n"; + reportError(createError("--addrsig: not implemented"), this->FileName); } static StringRef getGenericNoteTypeName(const uint32_t NT) { @@ -3850,6 +4085,86 @@ static StringRef getGenericNoteTypeName(const uint32_t NT) { return ""; } +static StringRef getCoreNoteTypeName(const uint32_t NT) { + static const struct { + uint32_t ID; + const char *Name; + } Notes[] = { + {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, + {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, + {ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"}, + {ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"}, + {ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"}, + {ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"}, + {ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"}, + {ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"}, + {ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"}, + {ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"}, + {ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"}, + + {ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"}, + {ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"}, + {ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"}, + {ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"}, + {ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"}, + {ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"}, + {ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"}, + {ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"}, + {ELF::NT_PPC_TM_CFPR, + "NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"}, + {ELF::NT_PPC_TM_CVMX, + "NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"}, + {ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"}, + {ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"}, + {ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"}, + {ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"}, + {ELF::NT_PPC_TM_CDSCR, + "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"}, + + {ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"}, + {ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"}, + {ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"}, + + {ELF::NT_S390_HIGH_GPRS, + "NT_S390_HIGH_GPRS (s390 upper register halves)"}, + {ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"}, + {ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"}, + {ELF::NT_S390_TODPREG, + "NT_S390_TODPREG (s390 TOD programmable register)"}, + {ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"}, + {ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"}, + {ELF::NT_S390_LAST_BREAK, + "NT_S390_LAST_BREAK (s390 last breaking event address)"}, + {ELF::NT_S390_SYSTEM_CALL, + "NT_S390_SYSTEM_CALL (s390 system call restart data)"}, + {ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"}, + {ELF::NT_S390_VXRS_LOW, + "NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"}, + {ELF::NT_S390_VXRS_HIGH, + "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"}, + {ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"}, + {ELF::NT_S390_GS_BC, + "NT_S390_GS_BC (s390 guarded-storage broadcast control)"}, + + {ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"}, + {ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"}, + {ELF::NT_ARM_HW_BREAK, + "NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"}, + {ELF::NT_ARM_HW_WATCH, + "NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"}, + + {ELF::NT_FILE, "NT_FILE (mapped files)"}, + {ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"}, + {ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"}, + }; + + for (const auto &Note : Notes) + if (Note.ID == NT) + return Note.Name; + + return ""; +} + static std::string getGNUNoteTypeName(const uint32_t NT) { static const struct { uint32_t ID; @@ -4207,13 +4522,85 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef Desc) { } } +struct CoreFileMapping { + uint64_t Start, End, Offset; + StringRef Filename; +}; + +struct CoreNote { + uint64_t PageSize; + std::vector Mappings; +}; + +static Expected readCoreNote(DataExtractor Desc) { + // Expected format of the NT_FILE note description: + // 1. # of file mappings (call it N) + // 2. Page size + // 3. N (start, end, offset) triples + // 4. N packed filenames (null delimited) + // Each field is an Elf_Addr, except for filenames which are char* strings. + + CoreNote Ret; + const int Bytes = Desc.getAddressSize(); + + if (!Desc.isValidOffsetForAddress(2)) + return createStringError(object_error::parse_failed, + "malformed note: header too short"); + if (Desc.getData().back() != 0) + return createStringError(object_error::parse_failed, + "malformed note: not NUL terminated"); + + uint64_t DescOffset = 0; + uint64_t FileCount = Desc.getAddress(&DescOffset); + Ret.PageSize = Desc.getAddress(&DescOffset); + + if (!Desc.isValidOffsetForAddress(3 * FileCount * Bytes)) + return createStringError(object_error::parse_failed, + "malformed note: too short for number of files"); + + uint64_t FilenamesOffset = 0; + DataExtractor Filenames( + Desc.getData().drop_front(DescOffset + 3 * FileCount * Bytes), + Desc.isLittleEndian(), Desc.getAddressSize()); + + Ret.Mappings.resize(FileCount); + for (CoreFileMapping &Mapping : Ret.Mappings) { + if (!Filenames.isValidOffsetForDataOfSize(FilenamesOffset, 1)) + return createStringError(object_error::parse_failed, + "malformed note: too few filenames"); + Mapping.Start = Desc.getAddress(&DescOffset); + Mapping.End = Desc.getAddress(&DescOffset); + Mapping.Offset = Desc.getAddress(&DescOffset); + Mapping.Filename = Filenames.getCStrRef(&FilenamesOffset); + } + + return Ret; +} + +template +static void printCoreNote(raw_ostream &OS, const CoreNote &Note) { + // Length of "0x
" string. + const int FieldWidth = ELFT::Is64Bits ? 18 : 10; + + OS << " Page size: " << format_decimal(Note.PageSize, 0) << '\n'; + OS << " " << right_justify("Start", FieldWidth) << " " + << right_justify("End", FieldWidth) << " " + << right_justify("Page Offset", FieldWidth) << '\n'; + for (const CoreFileMapping &Mapping : Note.Mappings) { + OS << " " << format_hex(Mapping.Start, FieldWidth) << " " + << format_hex(Mapping.End, FieldWidth) << " " + << format_hex(Mapping.Offset, FieldWidth) << "\n " + << Mapping.Filename << '\n'; + } +} + template void GNUStyle::printNotes(const ELFFile *Obj) { auto PrintHeader = [&](const typename ELFT::Off Offset, const typename ELFT::Addr Size) { OS << "Displaying notes found at file offset " << format_hex(Offset, 10) << " with length " << format_hex(Size, 10) << ":\n" - << " Owner Data size\tDescription\n"; + << " Owner Data size \tDescription\n"; }; auto ProcessNote = [&](const Elf_Note &Note) { @@ -4221,55 +4608,81 @@ void GNUStyle::printNotes(const ELFFile *Obj) { ArrayRef Descriptor = Note.getDesc(); Elf_Word Type = Note.getType(); - OS << " " << Name << std::string(22 - Name.size(), ' ') + // Print the note owner/type. + OS << " " << left_justify(Name, 20) << ' ' << format_hex(Descriptor.size(), 10) << '\t'; - if (Name == "GNU") { OS << getGNUNoteTypeName(Type) << '\n'; - printGNUNote(OS, Type, Descriptor); } else if (Name == "FreeBSD") { OS << getFreeBSDNoteTypeName(Type) << '\n'; } else if (Name == "AMD") { OS << getAMDNoteTypeName(Type) << '\n'; + } else if (Name == "AMDGPU") { + OS << getAMDGPUNoteTypeName(Type) << '\n'; + } else { + StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE + ? getCoreNoteTypeName(Type) + : getGenericNoteTypeName(Type); + if (!NoteType.empty()) + OS << NoteType << '\n'; + else + OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n"; + } + + // Print the description, or fallback to printing raw bytes for unknown + // owners. + if (Name == "GNU") { + printGNUNote(OS, Type, Descriptor); + } else if (Name == "AMD") { const AMDNote N = getAMDNote(Type, Descriptor); if (!N.Type.empty()) OS << " " << N.Type << ":\n " << N.Value << '\n'; } else if (Name == "AMDGPU") { - OS << getAMDGPUNoteTypeName(Type) << '\n'; const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); if (!N.Type.empty()) OS << " " << N.Type << ":\n " << N.Value << '\n'; - } else { - StringRef NoteType = getGenericNoteTypeName(Type); - if (!NoteType.empty()) - OS << NoteType; - else - OS << "Unknown note type: (" << format_hex(Type, 10) << ')'; + } else if (Name == "CORE") { + if (Type == ELF::NT_FILE) { + DataExtractor DescExtractor(Descriptor, + ELFT::TargetEndianness == support::little, + sizeof(Elf_Addr)); + Expected Note = readCoreNote(DescExtractor); + if (Note) + printCoreNote(OS, *Note); + else + reportWarning(Note.takeError(), this->FileName); + } + } else if (!Descriptor.empty()) { + OS << " description data:"; + for (uint8_t B : Descriptor) + OS << " " << format("%02x", B); + OS << '\n'; } - OS << '\n'; }; - if (Obj->getHeader()->e_type == ELF::ET_CORE) { - for (const auto &P : unwrapOrError(Obj->program_headers())) { - if (P.p_type != PT_NOTE) + ArrayRef Sections = unwrapOrError(this->FileName, Obj->sections()); + if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { + for (const auto &S : Sections) { + if (S.sh_type != SHT_NOTE) continue; - PrintHeader(P.p_offset, P.p_filesz); + PrintHeader(S.sh_offset, S.sh_size); Error Err = Error::success(); - for (const auto &Note : Obj->notes(P, Err)) + for (const auto &Note : Obj->notes(S, Err)) ProcessNote(Note); if (Err) - error(std::move(Err)); + reportError(std::move(Err), this->FileName); } } else { - for (const auto &S : unwrapOrError(Obj->sections())) { - if (S.sh_type != SHT_NOTE) + for (const auto &P : + unwrapOrError(this->FileName, Obj->program_headers())) { + if (P.p_type != PT_NOTE) continue; - PrintHeader(S.sh_offset, S.sh_size); + PrintHeader(P.p_offset, P.p_filesz); Error Err = Error::success(); - for (const auto &Note : Obj->notes(S, Err)) + for (const auto &Note : Obj->notes(P, Err)) ProcessNote(Note); if (Err) - error(std::move(Err)); + reportError(std::move(Err), this->FileName); } } } @@ -4279,6 +4692,294 @@ void GNUStyle::printELFLinkerOptions(const ELFFile *Obj) { OS << "printELFLinkerOptions not implemented!\n"; } +// Used for printing section names in places where possible errors can be +// ignored. +static StringRef getSectionName(const SectionRef &Sec) { + Expected NameOrErr = Sec.getName(); + if (NameOrErr) + return *NameOrErr; + consumeError(NameOrErr.takeError()); + return ""; +} + +// Used for printing symbol names in places where possible errors can be +// ignored. +static std::string getSymbolName(const ELFSymbolRef &Sym) { + Expected NameOrErr = Sym.getName(); + if (NameOrErr) + return maybeDemangle(*NameOrErr); + consumeError(NameOrErr.takeError()); + return ""; +} + +template +void DumpStyle::printFunctionStackSize( + const ELFObjectFile *Obj, uint64_t SymValue, SectionRef FunctionSec, + const StringRef SectionName, DataExtractor Data, uint64_t *Offset) { + // This function ignores potentially erroneous input, unless it is directly + // related to stack size reporting. + SymbolRef FuncSym; + for (const ELFSymbolRef &Symbol : Obj->symbols()) { + Expected SymAddrOrErr = Symbol.getAddress(); + if (!SymAddrOrErr) { + consumeError(SymAddrOrErr.takeError()); + continue; + } + if (Symbol.getELFType() == ELF::STT_FUNC && *SymAddrOrErr == SymValue) { + // Check if the symbol is in the right section. + if (FunctionSec.containsSymbol(Symbol)) { + FuncSym = Symbol; + break; + } + } + } + + std::string FuncName = "?"; + // A valid SymbolRef has a non-null object file pointer. + if (FuncSym.BasicSymbolRef::getObject()) + FuncName = getSymbolName(FuncSym); + else + reportWarning( + createError("could not identify function symbol for stack size entry"), + Obj->getFileName()); + + // Extract the size. The expectation is that Offset is pointing to the right + // place, i.e. past the function address. + uint64_t PrevOffset = *Offset; + uint64_t StackSize = Data.getULEB128(Offset); + // getULEB128() does not advance Offset if it is not able to extract a valid + // integer. + if (*Offset == PrevOffset) + reportError( + createStringError(object_error::parse_failed, + "could not extract a valid stack size in section %s", + SectionName.data()), + Obj->getFileName()); + + printStackSizeEntry(StackSize, FuncName); +} + +template +void GNUStyle::printStackSizeEntry(uint64_t Size, StringRef FuncName) { + OS.PadToColumn(2); + OS << format_decimal(Size, 11); + OS.PadToColumn(18); + OS << FuncName << "\n"; +} + +template +void DumpStyle::printStackSize(const ELFObjectFile *Obj, + RelocationRef Reloc, + SectionRef FunctionSec, + const StringRef &StackSizeSectionName, + const RelocationResolver &Resolver, + DataExtractor Data) { + // This function ignores potentially erroneous input, unless it is directly + // related to stack size reporting. + object::symbol_iterator RelocSym = Reloc.getSymbol(); + uint64_t RelocSymValue = 0; + StringRef FileStr = Obj->getFileName(); + if (RelocSym != Obj->symbol_end()) { + // Ensure that the relocation symbol is in the function section, i.e. the + // section where the functions whose stack sizes we are reporting are + // located. + auto SectionOrErr = RelocSym->getSection(); + if (!SectionOrErr) { + reportWarning( + createError("cannot identify the section for relocation symbol '" + + getSymbolName(*RelocSym) + "'"), + FileStr); + consumeError(SectionOrErr.takeError()); + } else if (*SectionOrErr != FunctionSec) { + reportWarning(createError("relocation symbol '" + + getSymbolName(*RelocSym) + + "' is not in the expected section"), + FileStr); + // Pretend that the symbol is in the correct section and report its + // stack size anyway. + FunctionSec = **SectionOrErr; + } + + Expected RelocSymValueOrErr = RelocSym->getValue(); + if (RelocSymValueOrErr) + RelocSymValue = *RelocSymValueOrErr; + else + consumeError(RelocSymValueOrErr.takeError()); + } + + uint64_t Offset = Reloc.getOffset(); + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) + reportError( + createStringError(object_error::parse_failed, + "found invalid relocation offset into section %s " + "while trying to extract a stack size entry", + StackSizeSectionName.data()), + FileStr); + + uint64_t Addend = Data.getAddress(&Offset); + uint64_t SymValue = Resolver(Reloc, RelocSymValue, Addend); + this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSectionName, + Data, &Offset); +} + +template +void DumpStyle::printNonRelocatableStackSizes( + const ELFObjectFile *Obj, std::function PrintHeader) { + // This function ignores potentially erroneous input, unless it is directly + // related to stack size reporting. + const ELFFile *EF = Obj->getELFFile(); + StringRef FileStr = Obj->getFileName(); + for (const SectionRef &Sec : Obj->sections()) { + StringRef SectionName = getSectionName(Sec); + if (SectionName != ".stack_sizes") + continue; + PrintHeader(); + const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); + ArrayRef Contents = + unwrapOrError(this->FileName, EF->getSectionContents(ElfSec)); + DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr)); + // A .stack_sizes section header's sh_link field is supposed to point + // to the section that contains the functions whose stack sizes are + // described in it. + const Elf_Shdr *FunctionELFSec = + unwrapOrError(this->FileName, EF->getSection(ElfSec->sh_link)); + uint64_t Offset = 0; + while (Offset < Contents.size()) { + // The function address is followed by a ULEB representing the stack + // size. Check for an extra byte before we try to process the entry. + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { + reportError( + createStringError( + object_error::parse_failed, + "section %s ended while trying to extract a stack size entry", + SectionName.data()), + FileStr); + } + uint64_t SymValue = Data.getAddress(&Offset); + printFunctionStackSize(Obj, SymValue, Obj->toSectionRef(FunctionELFSec), + SectionName, Data, &Offset); + } + } +} + +template +void DumpStyle::printRelocatableStackSizes( + const ELFObjectFile *Obj, std::function PrintHeader) { + const ELFFile *EF = Obj->getELFFile(); + + // Build a map between stack size sections and their corresponding relocation + // sections. + llvm::MapVector StackSizeRelocMap; + const SectionRef NullSection{}; + + for (const SectionRef &Sec : Obj->sections()) { + StringRef SectionName; + if (Expected NameOrErr = Sec.getName()) + SectionName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + + // A stack size section that we haven't encountered yet is mapped to the + // null section until we find its corresponding relocation section. + if (SectionName == ".stack_sizes") + if (StackSizeRelocMap.count(Sec) == 0) { + StackSizeRelocMap[Sec] = NullSection; + continue; + } + + // Check relocation sections if they are relocating contents of a + // stack sizes section. + const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); + uint32_t SectionType = ElfSec->sh_type; + if (SectionType != ELF::SHT_RELA && SectionType != ELF::SHT_REL) + continue; + + Expected RelSecOrErr = Sec.getRelocatedSection(); + if (!RelSecOrErr) + reportError(createStringError(object_error::parse_failed, + "%s: failed to get a relocated section: %s", + SectionName.data(), + toString(RelSecOrErr.takeError()).c_str()), + Obj->getFileName()); + + const Elf_Shdr *ContentsSec = + Obj->getSection((*RelSecOrErr)->getRawDataRefImpl()); + Expected ContentsSectionNameOrErr = + EF->getSectionName(ContentsSec); + if (!ContentsSectionNameOrErr) { + consumeError(ContentsSectionNameOrErr.takeError()); + continue; + } + if (*ContentsSectionNameOrErr != ".stack_sizes") + continue; + // Insert a mapping from the stack sizes section to its relocation section. + StackSizeRelocMap[Obj->toSectionRef(ContentsSec)] = Sec; + } + + for (const auto &StackSizeMapEntry : StackSizeRelocMap) { + PrintHeader(); + const SectionRef &StackSizesSec = StackSizeMapEntry.first; + const SectionRef &RelocSec = StackSizeMapEntry.second; + + // Warn about stack size sections without a relocation section. + StringRef StackSizeSectionName = getSectionName(StackSizesSec); + if (RelocSec == NullSection) { + reportWarning(createError("section " + StackSizeSectionName + + " does not have a corresponding " + "relocation section"), + Obj->getFileName()); + continue; + } + + // A .stack_sizes section header's sh_link field is supposed to point + // to the section that contains the functions whose stack sizes are + // described in it. + const Elf_Shdr *StackSizesELFSec = + Obj->getSection(StackSizesSec.getRawDataRefImpl()); + const SectionRef FunctionSec = Obj->toSectionRef(unwrapOrError( + this->FileName, EF->getSection(StackSizesELFSec->sh_link))); + + bool (*IsSupportedFn)(uint64_t); + RelocationResolver Resolver; + std::tie(IsSupportedFn, Resolver) = getRelocationResolver(*Obj); + auto Contents = unwrapOrError(this->FileName, StackSizesSec.getContents()); + DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr)); + for (const RelocationRef &Reloc : RelocSec.relocations()) { + if (!IsSupportedFn || !IsSupportedFn(Reloc.getType())) + reportError(createStringError( + object_error::parse_failed, + "unsupported relocation type in section %s: %s", + getSectionName(RelocSec).data(), + EF->getRelocationTypeName(Reloc.getType()).data()), + Obj->getFileName()); + this->printStackSize(Obj, Reloc, FunctionSec, StackSizeSectionName, + Resolver, Data); + } + } +} + +template +void GNUStyle::printStackSizes(const ELFObjectFile *Obj) { + bool HeaderHasBeenPrinted = false; + auto PrintHeader = [&]() { + if (HeaderHasBeenPrinted) + return; + OS << "\nStack Sizes:\n"; + OS.PadToColumn(9); + OS << "Size"; + OS.PadToColumn(18); + OS << "Function\n"; + HeaderHasBeenPrinted = true; + }; + + // For non-relocatable objects, look directly for sections whose name starts + // with .stack_sizes and process the contents. + if (Obj->isRelocatableObject()) + this->printRelocatableStackSizes(Obj, PrintHeader); + else + this->printNonRelocatableStackSizes(Obj, PrintHeader); +} + template void GNUStyle::printMipsGOT(const MipsGOTParser &Parser) { size_t Bias = ELFT::Is64Bits ? 8 : 0; @@ -4402,6 +5103,45 @@ void GNUStyle::printMipsPLT(const MipsGOTParser &Parser) { } } +template +void GNUStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { + const ELFFile *Obj = ObjF->getELFFile(); + const Elf_Shdr *Shdr = + findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags"); + if (!Shdr) + return; + + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); + if (Sec.size() != sizeof(Elf_Mips_ABIFlags)) + reportError(createError(".MIPS.abiflags section has a wrong size"), + ObjF->getFileName()); + + auto *Flags = reinterpret_cast *>(Sec.data()); + + OS << "MIPS ABI Flags Version: " << Flags->version << "\n\n"; + OS << "ISA: MIPS" << int(Flags->isa_level); + if (Flags->isa_rev > 1) + OS << "r" << int(Flags->isa_rev); + OS << "\n"; + OS << "GPR size: " << getMipsRegisterSize(Flags->gpr_size) << "\n"; + OS << "CPR1 size: " << getMipsRegisterSize(Flags->cpr1_size) << "\n"; + OS << "CPR2 size: " << getMipsRegisterSize(Flags->cpr2_size) << "\n"; + OS << "FP ABI: " << printEnum(Flags->fp_abi, makeArrayRef(ElfMipsFpABIType)) + << "\n"; + OS << "ISA Extension: " + << printEnum(Flags->isa_ext, makeArrayRef(ElfMipsISAExtType)) << "\n"; + if (Flags->ases == 0) + OS << "ASEs: None\n"; + else + // FIXME: Print each flag on a separate line. + OS << "ASEs: " << printFlags(Flags->ases, makeArrayRef(ElfMipsASEFlags)) + << "\n"; + OS << "FLAGS 1: " << format_hex_no_prefix(Flags->flags1, 8, false) << "\n"; + OS << "FLAGS 2: " << format_hex_no_prefix(Flags->flags2, 8, false) << "\n"; + OS << "\n"; +} + template void LLVMStyle::printFileHeaders(const ELFO *Obj) { const Elf_Ehdr *E = Obj->getHeader(); { @@ -4455,16 +5195,17 @@ template void LLVMStyle::printFileHeaders(const ELFO *Obj) { W.printNumber("ProgramHeaderEntrySize", E->e_phentsize); W.printNumber("ProgramHeaderCount", E->e_phnum); W.printNumber("SectionHeaderEntrySize", E->e_shentsize); - W.printString("SectionHeaderCount", getSectionHeadersNumString(Obj)); + W.printString("SectionHeaderCount", + getSectionHeadersNumString(Obj, this->FileName)); W.printString("StringTableSectionIndex", - getSectionHeaderTableIndexString(Obj)); + getSectionHeaderTableIndexString(Obj, this->FileName)); } } template void LLVMStyle::printGroupSections(const ELFO *Obj) { DictScope Lists(W, "Groups"); - std::vector V = getGroups(Obj); + std::vector V = getGroups(Obj, this->FileName); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { DictScope D(W, "Group"); @@ -4499,7 +5240,7 @@ template void LLVMStyle::printRelocations(const ELFO *Obj) { ListScope D(W, "Relocations"); int SectionNumber = -1; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { ++SectionNumber; if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA && @@ -4508,7 +5249,7 @@ template void LLVMStyle::printRelocations(const ELFO *Obj) { Sec.sh_type != ELF::SHT_ANDROID_RELR) continue; - StringRef Name = unwrapOrError(Obj->getSectionName(&Sec)); + StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec)); W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n"; W.indent(); @@ -4522,11 +5263,12 @@ template void LLVMStyle::printRelocations(const ELFO *Obj) { template void LLVMStyle::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) { - const Elf_Shdr *SymTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *SymTab = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); switch (Sec->sh_type) { case ELF::SHT_REL: - for (const Elf_Rel &R : unwrapOrError(Obj->rels(Sec))) { + for (const Elf_Rel &R : unwrapOrError(this->FileName, Obj->rels(Sec))) { Elf_Rela Rela; Rela.r_offset = R.r_offset; Rela.r_info = R.r_info; @@ -4535,17 +5277,18 @@ void LLVMStyle::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) { } break; case ELF::SHT_RELA: - for (const Elf_Rela &R : unwrapOrError(Obj->relas(Sec))) + for (const Elf_Rela &R : unwrapOrError(this->FileName, Obj->relas(Sec))) printRelocation(Obj, R, SymTab); break; case ELF::SHT_RELR: case ELF::SHT_ANDROID_RELR: { - Elf_Relr_Range Relrs = unwrapOrError(Obj->relrs(Sec)); + Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(Sec)); if (opts::RawRelr) { for (const Elf_Relr &R : Relrs) W.startLine() << W.hex(R) << "\n"; } else { - std::vector RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs)); + std::vector RelrRelas = + unwrapOrError(this->FileName, Obj->decode_relrs(Relrs)); for (const Elf_Rela &R : RelrRelas) printRelocation(Obj, R, SymTab); } @@ -4553,7 +5296,8 @@ void LLVMStyle::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) { } case ELF::SHT_ANDROID_REL: case ELF::SHT_ANDROID_RELA: - for (const Elf_Rela &R : unwrapOrError(Obj->android_relas(Sec))) + for (const Elf_Rela &R : + unwrapOrError(this->FileName, Obj->android_relas(Sec))) printRelocation(Obj, R, SymTab); break; } @@ -4565,13 +5309,16 @@ void LLVMStyle::printRelocation(const ELFO *Obj, Elf_Rela Rel, SmallString<32> RelocName; Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName); std::string TargetName; - const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&Rel, SymTab)); + const Elf_Sym *Sym = + unwrapOrError(this->FileName, Obj->getRelocationSymbol(&Rel, SymTab)); if (Sym && Sym->getType() == ELF::STT_SECTION) { const Elf_Shdr *Sec = unwrapOrError( + this->FileName, Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable())); - TargetName = unwrapOrError(Obj->getSectionName(Sec)); + TargetName = unwrapOrError(this->FileName, Obj->getSectionName(Sec)); } else if (Sym) { - StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab)); + StringRef StrTable = + unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*SymTab)); TargetName = this->dumper()->getFullSymbolName( Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */); } @@ -4596,10 +5343,11 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { ListScope SectionsD(W, "Sections"); int SectionIndex = -1; - ArrayRef Sections = unwrapOrError(Obj->sections()); + ArrayRef Sections = unwrapOrError(this->FileName, Obj->sections()); const ELFObjectFile *ElfObj = this->dumper()->getElfObject(); for (const Elf_Shdr &Sec : Sections) { - StringRef Name = getSectionName(Sec, *ElfObj, Sections); + StringRef Name = unwrapOrError( + ElfObj->getFileName(), Obj->getSectionName(&Sec, this->WarningHandler)); DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); W.printNumber("Name", Name, Sec.sh_name); @@ -4652,19 +5400,25 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { if (opts::SectionSymbols) { ListScope D(W, "Symbols"); const Elf_Shdr *Symtab = this->dumper()->getDotSymtabSec(); - StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab)); + StringRef StrTable = + unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*Symtab)); - for (const Elf_Sym &Sym : unwrapOrError(Obj->symbols(Symtab))) { + for (const Elf_Sym &Sym : + unwrapOrError(this->FileName, Obj->symbols(Symtab))) { const Elf_Shdr *SymSec = unwrapOrError( + this->FileName, Obj->getSection(&Sym, Symtab, this->dumper()->getShndxTable())); if (SymSec == &Sec) - printSymbol(Obj, &Sym, unwrapOrError(Obj->symbols(Symtab)).begin(), - StrTable, false); + printSymbol( + Obj, &Sym, + unwrapOrError(this->FileName, Obj->symbols(Symtab)).begin(), + StrTable, false, false); } } if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) { - ArrayRef Data = unwrapOrError(Obj->getSectionContents(&Sec)); + ArrayRef Data = + unwrapOrError(this->FileName, Obj->getSectionContents(&Sec)); W.printBinaryBlock( "SectionData", StringRef(reinterpret_cast(Data.data()), Data.size())); @@ -4675,7 +5429,8 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { template void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, StringRef StrTable, - bool IsDynamic) { + bool IsDynamic, + bool /*NonVisibilityBitsUsed*/) { unsigned SectionIndex = 0; StringRef SectionName; this->dumper()->getSectionNameIndex(Symbol, First, SectionName, SectionIndex); @@ -4786,7 +5541,8 @@ void LLVMStyle::printDynamicRelocations(const ELFO *Obj) { } if (DynRelrRegion.Size > 0) { Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - std::vector RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs)); + std::vector RelrRelas = + unwrapOrError(this->FileName, Obj->decode_relrs(Relrs)); for (const Elf_Rela &Rela : RelrRelas) printDynamicRelocation(Obj, Rela); } @@ -4809,11 +5565,9 @@ template void LLVMStyle::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) { SmallString<32> RelocName; Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName); - std::string SymbolName; - uint32_t SymIndex = Rel.getSymbol(Obj->isMips64EL()); - const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex; - SymbolName = maybeDemangle( - unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()))); + std::string SymbolName = + getSymbolForReloc(Obj, this->FileName, this->dumper(), Rel).Name; + if (opts::ExpandRelocs) { DictScope Group(W, "Relocation"); W.printHex("Offset", Rel.r_offset); @@ -4842,7 +5596,8 @@ template void LLVMStyle::printProgramHeaders(const ELFO *Obj) { ListScope L(W, "ProgramHeaders"); - for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) { + for (const Elf_Phdr &Phdr : + unwrapOrError(this->FileName, Obj->program_headers())) { DictScope P(W, "ProgramHeader"); W.printHex("Type", getElfSegmentType(Obj->getHeader()->e_machine, Phdr.p_type), @@ -4860,23 +5615,16 @@ void LLVMStyle::printProgramHeaders(const ELFO *Obj) { template void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, const Elf_Shdr *Sec) { - DictScope SS(W, "Version symbols"); + ListScope SS(W, "VersionSymbols"); if (!Sec) return; - StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); - W.printNumber("Section Name", SecName, Sec->sh_name); - W.printHex("Address", Sec->sh_addr); - W.printHex("Offset", Sec->sh_offset); - W.printNumber("Link", Sec->sh_link); - const uint8_t *VersymBuf = reinterpret_cast(Obj->base() + Sec->sh_offset); const ELFDumper *Dumper = this->dumper(); StringRef StrTable = Dumper->getDynamicStringTable(); // Same number of entries in the dynamic symbol table (DT_SYMTAB). - ListScope Syms(W, "Symbols"); for (const Elf_Sym &Sym : Dumper->dynamic_symbols()) { DictScope S(W, "Symbol"); const Elf_Versym *Versym = reinterpret_cast(VersymBuf); @@ -4891,7 +5639,7 @@ void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, template void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, const Elf_Shdr *Sec) { - DictScope SD(W, "SHT_GNU_verdef"); + ListScope SD(W, "VersionDefinitions"); if (!Sec) return; @@ -4899,7 +5647,8 @@ void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, reinterpret_cast(Obj->base() + Sec->sh_offset); const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size; const uint8_t *VerdefBuf = SecStartAddress; - const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *StrTab = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); unsigned VerDefsNum = Sec->sh_info; while (VerDefsNum--) { @@ -4938,13 +5687,14 @@ void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, template void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, const Elf_Shdr *Sec) { - DictScope SD(W, "SHT_GNU_verneed"); + ListScope SD(W, "VersionRequirements"); if (!Sec) return; const uint8_t *SecData = reinterpret_cast(Obj->base() + Sec->sh_offset); - const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *StrTab = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); const uint8_t *VerneedBuf = SecData; unsigned VerneedNum = Sec->sh_info; @@ -4986,37 +5736,62 @@ void LLVMStyle::printCGProfile(const ELFFile *Obj) { ListScope L(W, "CGProfile"); if (!this->dumper()->getDotCGProfileSec()) return; - auto CGProfile = - unwrapOrError(Obj->template getSectionContentsAsArray( - this->dumper()->getDotCGProfileSec())); + auto CGProfile = unwrapOrError( + this->FileName, Obj->template getSectionContentsAsArray( + this->dumper()->getDotCGProfileSec())); for (const Elf_CGProfile &CGPE : CGProfile) { DictScope D(W, "CGProfileEntry"); - W.printNumber("From", this->dumper()->getStaticSymbolName(CGPE.cgp_from), - CGPE.cgp_from); - W.printNumber("To", this->dumper()->getStaticSymbolName(CGPE.cgp_to), - CGPE.cgp_to); + W.printNumber( + "From", + unwrapOrError(this->FileName, + this->dumper()->getStaticSymbolName(CGPE.cgp_from)), + CGPE.cgp_from); + W.printNumber( + "To", + unwrapOrError(this->FileName, + this->dumper()->getStaticSymbolName(CGPE.cgp_to)), + CGPE.cgp_to); W.printNumber("Weight", CGPE.cgp_weight); } } +static Expected> toULEB128Array(ArrayRef Data) { + std::vector Ret; + const uint8_t *Cur = Data.begin(); + const uint8_t *End = Data.end(); + while (Cur != End) { + unsigned Size; + const char *Err; + Ret.push_back(decodeULEB128(Cur, &Size, End, &Err)); + if (Err) + return createError(Err); + Cur += Size; + } + return Ret; +} + template void LLVMStyle::printAddrsig(const ELFFile *Obj) { ListScope L(W, "Addrsig"); if (!this->dumper()->getDotAddrsigSec()) return; ArrayRef Contents = unwrapOrError( + this->FileName, Obj->getSectionContents(this->dumper()->getDotAddrsigSec())); - const uint8_t *Cur = Contents.begin(); - const uint8_t *End = Contents.end(); - while (Cur != End) { - unsigned Size; - const char *Err; - uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err); - if (Err) - reportError(Err); - W.printNumber("Sym", this->dumper()->getStaticSymbolName(SymIndex), - SymIndex); - Cur += Size; + Expected> V = toULEB128Array(Contents); + if (!V) { + reportWarning(V.takeError(), this->FileName); + return; + } + + for (uint64_t Sym : *V) { + Expected NameOrErr = this->dumper()->getStaticSymbolName(Sym); + if (NameOrErr) { + W.printNumber("Sym", *NameOrErr, Sym); + continue; + } + reportWarning(NameOrErr.takeError(), this->FileName); + W.printNumber("Sym", "", Sym); } } @@ -5051,6 +5826,17 @@ static void printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, } } +static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) { + W.printNumber("Page Size", Note.PageSize); + for (const CoreFileMapping &Mapping : Note.Mappings) { + ListScope D(W, "Mapping"); + W.printHex("Start", Mapping.Start); + W.printHex("End", Mapping.End); + W.printHex("Offset", Mapping.Offset); + W.printString("Filename", Mapping.Filename); + } +} + template void LLVMStyle::printNotes(const ELFFile *Obj) { ListScope L(W, "Notes"); @@ -5067,56 +5853,81 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { ArrayRef Descriptor = Note.getDesc(); Elf_Word Type = Note.getType(); + // Print the note owner/type. W.printString("Owner", Name); W.printHex("Data size", Descriptor.size()); if (Name == "GNU") { W.printString("Type", getGNUNoteTypeName(Type)); - printGNUNoteLLVMStyle(Type, Descriptor, W); } else if (Name == "FreeBSD") { W.printString("Type", getFreeBSDNoteTypeName(Type)); } else if (Name == "AMD") { W.printString("Type", getAMDNoteTypeName(Type)); - const AMDNote N = getAMDNote(Type, Descriptor); - if (!N.Type.empty()) - W.printString(N.Type, N.Value); } else if (Name == "AMDGPU") { W.printString("Type", getAMDGPUNoteTypeName(Type)); - const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); - if (!N.Type.empty()) - W.printString(N.Type, N.Value); } else { - StringRef NoteType = getGenericNoteTypeName(Type); + StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE + ? getCoreNoteTypeName(Type) + : getGenericNoteTypeName(Type); if (!NoteType.empty()) W.printString("Type", NoteType); else W.printString("Type", "Unknown (" + to_string(format_hex(Type, 10)) + ")"); } + + // Print the description, or fallback to printing raw bytes for unknown + // owners. + if (Name == "GNU") { + printGNUNoteLLVMStyle(Type, Descriptor, W); + } else if (Name == "AMD") { + const AMDNote N = getAMDNote(Type, Descriptor); + if (!N.Type.empty()) + W.printString(N.Type, N.Value); + } else if (Name == "AMDGPU") { + const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); + if (!N.Type.empty()) + W.printString(N.Type, N.Value); + } else if (Name == "CORE") { + if (Type == ELF::NT_FILE) { + DataExtractor DescExtractor(Descriptor, + ELFT::TargetEndianness == support::little, + sizeof(Elf_Addr)); + Expected Note = readCoreNote(DescExtractor); + if (Note) + printCoreNoteLLVMStyle(*Note, W); + else + reportWarning(Note.takeError(), this->FileName); + } + } else if (!Descriptor.empty()) { + W.printBinaryBlock("Description data", Descriptor); + } }; - if (Obj->getHeader()->e_type == ELF::ET_CORE) { - for (const auto &P : unwrapOrError(Obj->program_headers())) { - if (P.p_type != PT_NOTE) + ArrayRef Sections = unwrapOrError(this->FileName, Obj->sections()); + if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { + for (const auto &S : Sections) { + if (S.sh_type != SHT_NOTE) continue; DictScope D(W, "NoteSection"); - PrintHeader(P.p_offset, P.p_filesz); + PrintHeader(S.sh_offset, S.sh_size); Error Err = Error::success(); - for (const auto &Note : Obj->notes(P, Err)) + for (const auto &Note : Obj->notes(S, Err)) ProcessNote(Note); if (Err) - error(std::move(Err)); + reportError(std::move(Err), this->FileName); } } else { - for (const auto &S : unwrapOrError(Obj->sections())) { - if (S.sh_type != SHT_NOTE) + for (const auto &P : + unwrapOrError(this->FileName, Obj->program_headers())) { + if (P.p_type != PT_NOTE) continue; DictScope D(W, "NoteSection"); - PrintHeader(S.sh_offset, S.sh_size); + PrintHeader(P.p_offset, P.p_filesz); Error Err = Error::success(); - for (const auto &Note : Obj->notes(S, Err)) + for (const auto &Note : Obj->notes(P, Err)) ProcessNote(Note); if (Err) - error(std::move(Err)); + reportError(std::move(Err), this->FileName); } } } @@ -5125,11 +5936,12 @@ template void LLVMStyle::printELFLinkerOptions(const ELFFile *Obj) { ListScope L(W, "LinkerOptions"); - for (const Elf_Shdr &Shdr : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Shdr : unwrapOrError(this->FileName, Obj->sections())) { if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS) continue; - ArrayRef Contents = unwrapOrError(Obj->getSectionContents(&Shdr)); + ArrayRef Contents = + unwrapOrError(this->FileName, Obj->getSectionContents(&Shdr)); for (const uint8_t *P = Contents.begin(), *E = Contents.end(); P < E; ) { StringRef Key = StringRef(reinterpret_cast(P)); StringRef Value = @@ -5142,6 +5954,22 @@ void LLVMStyle::printELFLinkerOptions(const ELFFile *Obj) { } } +template +void LLVMStyle::printStackSizes(const ELFObjectFile *Obj) { + ListScope L(W, "StackSizes"); + if (Obj->isRelocatableObject()) + this->printRelocatableStackSizes(Obj, []() {}); + else + this->printNonRelocatableStackSizes(Obj, []() {}); +} + +template +void LLVMStyle::printStackSizeEntry(uint64_t Size, StringRef FuncName) { + DictScope D(W, "Entry"); + W.printString("Function", FuncName); + W.printHex("Size", Size); +} + template void LLVMStyle::printMipsGOT(const MipsGOTParser &Parser) { auto PrintEntry = [&](const Elf_Addr *E) { @@ -5252,3 +6080,41 @@ void LLVMStyle::printMipsPLT(const MipsGOTParser &Parser) { } } } + +template +void LLVMStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { + const ELFFile *Obj = ObjF->getELFFile(); + const Elf_Shdr *Shdr = + findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags"); + if (!Shdr) { + W.startLine() << "There is no .MIPS.abiflags section in the file.\n"; + return; + } + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); + if (Sec.size() != sizeof(Elf_Mips_ABIFlags)) { + W.startLine() << "The .MIPS.abiflags section has a wrong size.\n"; + return; + } + + auto *Flags = reinterpret_cast *>(Sec.data()); + + raw_ostream &OS = W.getOStream(); + DictScope GS(W, "MIPS ABI Flags"); + + W.printNumber("Version", Flags->version); + W.startLine() << "ISA: "; + if (Flags->isa_rev <= 1) + OS << format("MIPS%u", Flags->isa_level); + else + OS << format("MIPS%ur%u", Flags->isa_level, Flags->isa_rev); + OS << "\n"; + W.printEnum("ISA Extension", Flags->isa_ext, makeArrayRef(ElfMipsISAExtType)); + W.printFlags("ASEs", Flags->ases, makeArrayRef(ElfMipsASEFlags)); + W.printEnum("FP ABI", Flags->fp_abi, makeArrayRef(ElfMipsFpABIType)); + W.printNumber("GPR size", getMipsRegisterSize(Flags->gpr_size)); + W.printNumber("CPR1 size", getMipsRegisterSize(Flags->cpr1_size)); + W.printNumber("CPR2 size", getMipsRegisterSize(Flags->cpr2_size)); + W.printFlags("Flags 1", Flags->flags1, makeArrayRef(ElfMipsFlags1)); + W.printHex("Flags 2", Flags->flags2); +} diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp index 32a3866eb2f2..20a60b3df699 100644 --- a/tools/llvm-readobj/MachODumper.cpp +++ b/tools/llvm-readobj/MachODumper.cpp @@ -214,6 +214,31 @@ static const EnumEntry MachOHeaderFlags[] = { LLVM_READOBJ_ENUM_ENT(MachO, MH_APP_EXTENSION_SAFE), }; +static const EnumEntry MachOSectionTypes[] = { + { "Regular" , MachO::S_REGULAR }, + { "ZeroFill" , MachO::S_ZEROFILL }, + { "CStringLiterals" , MachO::S_CSTRING_LITERALS }, + { "4ByteLiterals" , MachO::S_4BYTE_LITERALS }, + { "8ByteLiterals" , MachO::S_8BYTE_LITERALS }, + { "LiteralPointers" , MachO::S_LITERAL_POINTERS }, + { "NonLazySymbolPointers" , MachO::S_NON_LAZY_SYMBOL_POINTERS }, + { "LazySymbolPointers" , MachO::S_LAZY_SYMBOL_POINTERS }, + { "SymbolStubs" , MachO::S_SYMBOL_STUBS }, + { "ModInitFuncPointers" , MachO::S_MOD_INIT_FUNC_POINTERS }, + { "ModTermFuncPointers" , MachO::S_MOD_TERM_FUNC_POINTERS }, + { "Coalesced" , MachO::S_COALESCED }, + { "GBZeroFill" , MachO::S_GB_ZEROFILL }, + { "Interposing" , MachO::S_INTERPOSING }, + { "16ByteLiterals" , MachO::S_16BYTE_LITERALS }, + { "DTraceDOF" , MachO::S_DTRACE_DOF }, + { "LazyDylibSymbolPointers" , MachO::S_LAZY_DYLIB_SYMBOL_POINTERS }, + { "ThreadLocalRegular" , MachO::S_THREAD_LOCAL_REGULAR }, + { "ThreadLocalZerofill" , MachO::S_THREAD_LOCAL_ZEROFILL }, + { "ThreadLocalVariables" , MachO::S_THREAD_LOCAL_VARIABLES }, + { "ThreadLocalVariablePointers" , MachO::S_THREAD_LOCAL_VARIABLE_POINTERS }, + { "ThreadLocalInitFunctionPointers", MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS } +}; + static const EnumEntry MachOSectionAttributes[] = { { "LocReloc" , 1 << 0 /*S_ATTR_LOC_RELOC */ }, { "ExtReloc" , 1 << 1 /*S_ATTR_EXT_RELOC */ }, @@ -440,10 +465,7 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) { MachOSection MOSection; getSection(Obj, Section.getRawDataRefImpl(), MOSection); DataRefImpl DR = Section.getRawDataRefImpl(); - - StringRef Name; - error(Section.getName(Name)); - + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); ArrayRef RawName = Obj->getSectionRawName(DR); StringRef SegmentName = Obj->getSectionFinalSegmentName(DR); ArrayRef RawSegmentName = Obj->getSectionRawFinalSegmentName(DR); @@ -459,7 +481,7 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) { W.printHex("RelocationOffset", MOSection.RelocationTableOffset); W.printNumber("RelocationCount", MOSection.NumRelocationTableEntries); W.printEnum("Type", MOSection.Flags & 0xFF, - makeArrayRef(MachOSectionAttributes)); + makeArrayRef(MachOSectionTypes)); W.printFlags("Attributes", MOSection.Flags >> 8, makeArrayRef(MachOSectionAttributes)); W.printHex("Reserved1", MOSection.Reserved1); @@ -484,7 +506,8 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) { } if (opts::SectionData && !Section.isBSS()) - W.printBinaryBlock("SectionData", unwrapOrError(Section.getContents())); + W.printBinaryBlock("SectionData", unwrapOrError(Obj->getFileName(), + Section.getContents())); } } @@ -493,9 +516,7 @@ void MachODumper::printRelocations() { std::error_code EC; for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - error(Section.getName(Name)); - + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); bool PrintedGroup = false; for (const RelocationRef &Reloc : Section.relocations()) { if (!PrintedGroup) { @@ -535,14 +556,13 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj, if (Symbol != Obj->symbol_end()) { Expected TargetNameOrErr = Symbol->getName(); if (!TargetNameOrErr) - error(errorToErrorCode(TargetNameOrErr.takeError())); + reportError(TargetNameOrErr.takeError(), Obj->getFileName()); TargetName = *TargetNameOrErr; } } else if (!IsScattered) { section_iterator SecI = Obj->getRelocationSection(DR); - if (SecI != Obj->section_end()) { - error(SecI->getName(TargetName)); - } + if (SecI != Obj->section_end()) + TargetName = unwrapOrError(Obj->getFileName(), SecI->getName()); } if (TargetName.empty()) TargetName = "-"; @@ -610,10 +630,12 @@ void MachODumper::printSymbol(const SymbolRef &Symbol) { StringRef SectionName = ""; Expected SecIOrErr = Symbol.getSection(); - error(errorToErrorCode(SecIOrErr.takeError())); + if (!SecIOrErr) + reportError(SecIOrErr.takeError(), Obj->getFileName()); + section_iterator SecI = *SecIOrErr; if (SecI != Obj->section_end()) - error(SecI->getName(SectionName)); + SectionName = unwrapOrError(Obj->getFileName(), SecI->getName()); DictScope D(W, "Symbol"); W.printNumber("Name", SymbolName, MOSymbol.StringIndex); @@ -643,7 +665,11 @@ void MachODumper::printStackMap() const { object::SectionRef StackMapSection; for (auto Sec : Obj->sections()) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == "__llvm_stackmaps") { StackMapSection = Sec; break; @@ -653,7 +679,8 @@ void MachODumper::printStackMap() const { if (StackMapSection == object::SectionRef()) return; - StringRef StackMapContents = unwrapOrError(StackMapSection.getContents()); + StringRef StackMapContents = + unwrapOrError(Obj->getFileName(), StackMapSection.getContents()); ArrayRef StackMapContentsArray = arrayRefFromStringRef(StackMapContents); diff --git a/tools/llvm-readobj/ObjDumper.cpp b/tools/llvm-readobj/ObjDumper.cpp index 0a9e22c8a71c..9e5ebd99ac37 100644 --- a/tools/llvm-readobj/ObjDumper.cpp +++ b/tools/llvm-readobj/ObjDumper.cpp @@ -23,6 +23,10 @@ namespace llvm { +static inline Error createError(const Twine &Msg) { + return createStringError(object::object_error::parse_failed, Msg); +} + ObjDumper::ObjDumper(ScopedPrinter &Writer) : W(Writer) {} ObjDumper::~ObjDumper() { @@ -49,8 +53,7 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj, SecIndex = Obj->isELF() ? 0 : 1; for (object::SectionRef SecRef : Obj->sections()) { - StringRef SecName; - error(SecRef.getName(SecName)); + StringRef SecName = unwrapOrError(Obj->getFileName(), SecRef.getName()); auto NameIt = SecNames.find(SecName); if (NameIt != SecNames.end()) NameIt->second = true; @@ -64,10 +67,15 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj, for (const std::pair &S : SecNames) if (!S.second) - reportWarning(formatv("could not find section '{0}'", S.first).str()); + reportWarning( + createError(formatv("could not find section '{0}'", S.first).str()), + Obj->getFileName()); + for (std::pair S : SecIndices) if (!S.second) - reportWarning(formatv("could not find section {0}", S.first).str()); + reportWarning( + createError(formatv("could not find section {0}", S.first).str()), + Obj->getFileName()); return Ret; } @@ -77,14 +85,16 @@ void ObjDumper::printSectionsAsString(const object::ObjectFile *Obj, bool First = true; for (object::SectionRef Section : getSectionRefsByNameOrIndex(Obj, Sections)) { - StringRef SectionName; - error(Section.getName(SectionName)); + StringRef SectionName = + unwrapOrError(Obj->getFileName(), Section.getName()); + if (!First) W.startLine() << '\n'; First = false; W.startLine() << "String dump of section '" << SectionName << "':\n"; - StringRef SectionContent = unwrapOrError(Section.getContents()); + StringRef SectionContent = + unwrapOrError(Obj->getFileName(), Section.getContents()); const uint8_t *SecContent = SectionContent.bytes_begin(); const uint8_t *CurrentWord = SecContent; @@ -110,14 +120,16 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile *Obj, bool First = true; for (object::SectionRef Section : getSectionRefsByNameOrIndex(Obj, Sections)) { - StringRef SectionName; - error(Section.getName(SectionName)); + StringRef SectionName = + unwrapOrError(Obj->getFileName(), Section.getName()); + if (!First) W.startLine() << '\n'; First = false; W.startLine() << "Hex dump of section '" << SectionName << "':\n"; - StringRef SectionContent = unwrapOrError(Section.getContents()); + StringRef SectionContent = + unwrapOrError(Obj->getFileName(), Section.getContents()); const uint8_t *SecContent = SectionContent.bytes_begin(); const uint8_t *SecEnd = SecContent + SectionContent.size(); diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h index aaabfa2ca2e8..2ba441342499 100644 --- a/tools/llvm-readobj/ObjDumper.h +++ b/tools/llvm-readobj/ObjDumper.h @@ -68,15 +68,8 @@ public: virtual void printAddrsig() {} virtual void printNotes() {} virtual void printELFLinkerOptions() {} - - // Only implemented for ARM ELF at this time. - virtual void printAttributes() { } - - // Only implemented for MIPS ELF at this time. - virtual void printMipsPLTGOT() { } - virtual void printMipsABIFlags() { } - virtual void printMipsReginfo() { } - virtual void printMipsOptions() { } + virtual void printStackSizes() {} + virtual void printArchSpecificInfo() { } // Only implemented for PE/COFF. virtual void printCOFFImports() { } diff --git a/tools/llvm-readobj/WasmDumper.cpp b/tools/llvm-readobj/WasmDumper.cpp index 041a9a15bdb6..dfab9f40d71b 100644 --- a/tools/llvm-readobj/WasmDumper.cpp +++ b/tools/llvm-readobj/WasmDumper.cpp @@ -51,6 +51,7 @@ static const EnumEntry WasmSymbolFlags[] = { ENUM_ENTRY(UNDEFINED), ENUM_ENTRY(EXPORTED), ENUM_ENTRY(EXPLICIT_NAME), + ENUM_ENTRY(NO_STRIP), #undef ENUM_ENTRY }; @@ -90,7 +91,7 @@ void WasmDumper::printRelocation(const SectionRef &Section, StringRef SymName; symbol_iterator SI = Reloc.getSymbol(); if (SI != Obj->symbol_end()) - SymName = error(SI->getName()); + SymName = unwrapOrError(Obj->getFileName(), SI->getName()); bool HasAddend = false; switch (RelocType) { @@ -133,8 +134,8 @@ void WasmDumper::printRelocations() { int SectionNumber = 0; for (const SectionRef &Section : Obj->sections()) { bool PrintedGroup = false; - StringRef Name; - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); + ++SectionNumber; for (const RelocationRef &Reloc : Section.relocations()) { diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp index e64b8f157180..fa268ce9d434 100644 --- a/tools/llvm-readobj/Win64EHDumper.cpp +++ b/tools/llvm-readobj/Win64EHDumper.cpp @@ -289,7 +289,9 @@ void Dumper::printRuntimeFunction(const Context &Ctx, resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset); ArrayRef Contents; - error(Ctx.COFF.getSectionContents(XData, Contents)); + if (Error E = Ctx.COFF.getSectionContents(XData, Contents)) + reportError(std::move(E), Ctx.COFF.getFileName()); + if (Contents.empty()) return; @@ -304,14 +306,19 @@ void Dumper::printRuntimeFunction(const Context &Ctx, void Dumper::printData(const Context &Ctx) { for (const auto &Section : Ctx.COFF.sections()) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); if (Name != ".pdata" && !Name.startswith(".pdata$")) continue; const coff_section *PData = Ctx.COFF.getCOFFSection(Section); ArrayRef Contents; - error(Ctx.COFF.getSectionContents(PData, Contents)); + + if (Error E = Ctx.COFF.getSectionContents(PData, Contents)) + reportError(std::move(E), Ctx.COFF.getFileName()); if (Contents.empty()) continue; diff --git a/tools/llvm-readobj/WindowsResourceDumper.cpp b/tools/llvm-readobj/WindowsResourceDumper.cpp index 13989f696d9d..a2fb6aac3f93 100644 --- a/tools/llvm-readobj/WindowsResourceDumper.cpp +++ b/tools/llvm-readobj/WindowsResourceDumper.cpp @@ -56,8 +56,12 @@ void Dumper::printEntry(const ResourceEntryRef &Ref) { if (Ref.checkTypeString()) { auto NarrowStr = stripUTF16(Ref.getTypeString()); SW.printString("Resource type (string)", NarrowStr); - } else - SW.printNumber("Resource type (int)", Ref.getTypeID()); + } else { + SmallString<20> IDStr; + raw_svector_ostream OS(IDStr); + printResourceTypeName(Ref.getTypeID(), OS); + SW.printString("Resource type (int)", IDStr); + } if (Ref.checkNameString()) { auto NarrowStr = stripUTF16(Ref.getNameString()); diff --git a/tools/llvm-readobj/XCOFFDumper.cpp b/tools/llvm-readobj/XCOFFDumper.cpp index 6f260f91537f..fe95b6d1b494 100644 --- a/tools/llvm-readobj/XCOFFDumper.cpp +++ b/tools/llvm-readobj/XCOFFDumper.cpp @@ -22,6 +22,12 @@ using namespace object; namespace { class XCOFFDumper : public ObjDumper { + enum { + SymbolTypeMask = 0x07, + SymbolAlignmentMask = 0xF8, + SymbolAlignmentBitOffset = 3 + }; + public: XCOFFDumper(const XCOFFObjectFile &Obj, ScopedPrinter &Writer) : ObjDumper(Writer), Obj(Obj) {} @@ -37,11 +43,21 @@ public: private: template void printSectionHeaders(ArrayRef Sections); - - const XCOFFObjectFile &Obj; + template void printGenericSectionHeader(T &Sec) const; + template void printOverflowSectionHeader(T &Sec) const; + void printFileAuxEnt(const XCOFFFileAuxEnt *AuxEntPtr); + void printCsectAuxEnt32(const XCOFFCsectAuxEnt32 *AuxEntPtr); + void printSectAuxEntForStat(const XCOFFSectAuxEntForStat *AuxEntPtr); + void printSymbol(const SymbolRef &); // Least significant 3 bits are reserved. static constexpr unsigned SectionFlagsReservedMask = 0x7; + + // The low order 16 bits of section flags denotes the section type. + static constexpr unsigned SectionFlagsTypeMask = 0xffffu; + + void printRelocations(ArrayRef Sections); + const XCOFFObjectFile &Obj; }; } // anonymous namespace @@ -100,11 +116,315 @@ void XCOFFDumper::printSectionHeaders() { } void XCOFFDumper::printRelocations() { - llvm_unreachable("Unimplemented functionality for XCOFFDumper"); + if (Obj.is64Bit()) + llvm_unreachable("64-bit relocation output not implemented!"); + else + printRelocations(Obj.sections32()); +} + +static const EnumEntry RelocationTypeNameclass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(R_POS), ECase(R_RL), ECase(R_RLA), ECase(R_NEG), + ECase(R_REL), ECase(R_TOC), ECase(R_TRL), ECase(R_TRLA), + ECase(R_GL), ECase(R_TCL), ECase(R_REF), ECase(R_BA), + ECase(R_BR), ECase(R_RBA), ECase(R_RBR), ECase(R_TLS), + ECase(R_TLS_IE), ECase(R_TLS_LD), ECase(R_TLS_LE), ECase(R_TLSM), + ECase(R_TLSML), ECase(R_TOCU), ECase(R_TOCL) +#undef ECase +}; + +void XCOFFDumper::printRelocations(ArrayRef Sections) { + if (!opts::ExpandRelocs) + report_fatal_error("Unexpanded relocation output not implemented."); + + ListScope LS(W, "Relocations"); + uint16_t Index = 0; + for (const auto &Sec : Sections) { + ++Index; + // Only the .text, .data, .tdata, and STYP_DWARF sections have relocation. + if (Sec.Flags != XCOFF::STYP_TEXT && Sec.Flags != XCOFF::STYP_DATA && + Sec.Flags != XCOFF::STYP_TDATA && Sec.Flags != XCOFF::STYP_DWARF) + continue; + auto Relocations = unwrapOrError(Obj.getFileName(), Obj.relocations(Sec)); + if (Relocations.empty()) + continue; + + W.startLine() << "Section (index: " << Index << ") " << Sec.getName() + << " {\n"; + for (auto Reloc : Relocations) { + StringRef SymbolName = unwrapOrError( + Obj.getFileName(), Obj.getSymbolNameByIndex(Reloc.SymbolIndex)); + + DictScope RelocScope(W, "Relocation"); + W.printHex("Virtual Address", Reloc.VirtualAddress); + W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex); + W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No"); + W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0); + W.printNumber("Length", Reloc.getRelocatedLength()); + W.printEnum("Type", (uint8_t)Reloc.Type, + makeArrayRef(RelocationTypeNameclass)); + } + W.unindent(); + W.startLine() << "}\n"; + } +} + +static const EnumEntry FileStringType[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(XFT_FN), ECase(XFT_CT), ECase(XFT_CV), ECase(XFT_CD) +#undef ECase +}; + +void XCOFFDumper::printFileAuxEnt(const XCOFFFileAuxEnt *AuxEntPtr) { + if (Obj.is64Bit()) + report_fatal_error( + "Printing for File Auxiliary Entry in 64-bit is unimplemented."); + StringRef FileName = + unwrapOrError(Obj.getFileName(), Obj.getCFileName(AuxEntPtr)); + DictScope SymDs(W, "File Auxiliary Entry"); + W.printNumber("Index", + Obj.getSymbolIndex(reinterpret_cast(AuxEntPtr))); + W.printString("Name", FileName); + W.printEnum("Type", static_cast(AuxEntPtr->Type), + makeArrayRef(FileStringType)); +} + +static const EnumEntry CsectStorageMappingClass[] = + { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(XMC_PR), ECase(XMC_RO), ECase(XMC_DB), + ECase(XMC_GL), ECase(XMC_XO), ECase(XMC_SV), + ECase(XMC_SV64), ECase(XMC_SV3264), ECase(XMC_TI), + ECase(XMC_TB), ECase(XMC_RW), ECase(XMC_TC0), + ECase(XMC_TC), ECase(XMC_TD), ECase(XMC_DS), + ECase(XMC_UA), ECase(XMC_BS), ECase(XMC_UC), + ECase(XMC_TL), ECase(XMC_TE) +#undef ECase +}; + +static const EnumEntry CsectSymbolTypeClass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(XTY_ER), ECase(XTY_SD), ECase(XTY_LD), ECase(XTY_CM) +#undef ECase +}; + +void XCOFFDumper::printCsectAuxEnt32(const XCOFFCsectAuxEnt32 *AuxEntPtr) { + assert(!Obj.is64Bit() && "32-bit interface called on 64-bit object file."); + + DictScope SymDs(W, "CSECT Auxiliary Entry"); + W.printNumber("Index", + Obj.getSymbolIndex(reinterpret_cast(AuxEntPtr))); + if ((AuxEntPtr->SymbolAlignmentAndType & SymbolTypeMask) == XCOFF::XTY_LD) + W.printNumber("ContainingCsectSymbolIndex", AuxEntPtr->SectionOrLength); + else + W.printNumber("SectionLen", AuxEntPtr->SectionOrLength); + W.printHex("ParameterHashIndex", AuxEntPtr->ParameterHashIndex); + W.printHex("TypeChkSectNum", AuxEntPtr->TypeChkSectNum); + // Print out symbol alignment and type. + W.printNumber("SymbolAlignmentLog2", + (AuxEntPtr->SymbolAlignmentAndType & SymbolAlignmentMask) >> + SymbolAlignmentBitOffset); + W.printEnum("SymbolType", AuxEntPtr->SymbolAlignmentAndType & SymbolTypeMask, + makeArrayRef(CsectSymbolTypeClass)); + W.printEnum("StorageMappingClass", + static_cast(AuxEntPtr->StorageMappingClass), + makeArrayRef(CsectStorageMappingClass)); + W.printHex("StabInfoIndex", AuxEntPtr->StabInfoIndex); + W.printHex("StabSectNum", AuxEntPtr->StabSectNum); +} + +void XCOFFDumper::printSectAuxEntForStat( + const XCOFFSectAuxEntForStat *AuxEntPtr) { + assert(!Obj.is64Bit() && "32-bit interface called on 64-bit object file."); + + DictScope SymDs(W, "Sect Auxiliary Entry For Stat"); + W.printNumber("Index", + Obj.getSymbolIndex(reinterpret_cast(AuxEntPtr))); + W.printNumber("SectionLength", AuxEntPtr->SectionLength); + + // Unlike the corresponding fields in the section header, NumberOfRelocEnt + // and NumberOfLineNum do not handle values greater than 65535. + W.printNumber("NumberOfRelocEnt", AuxEntPtr->NumberOfRelocEnt); + W.printNumber("NumberOfLineNum", AuxEntPtr->NumberOfLineNum); +} + +static const EnumEntry SymStorageClass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(C_NULL), ECase(C_AUTO), ECase(C_EXT), ECase(C_STAT), + ECase(C_REG), ECase(C_EXTDEF), ECase(C_LABEL), ECase(C_ULABEL), + ECase(C_MOS), ECase(C_ARG), ECase(C_STRTAG), ECase(C_MOU), + ECase(C_UNTAG), ECase(C_TPDEF), ECase(C_USTATIC), ECase(C_ENTAG), + ECase(C_MOE), ECase(C_REGPARM), ECase(C_FIELD), ECase(C_BLOCK), + ECase(C_FCN), ECase(C_EOS), ECase(C_FILE), ECase(C_LINE), + ECase(C_ALIAS), ECase(C_HIDDEN), ECase(C_HIDEXT), ECase(C_BINCL), + ECase(C_EINCL), ECase(C_INFO), ECase(C_WEAKEXT), ECase(C_DWARF), + ECase(C_GSYM), ECase(C_LSYM), ECase(C_PSYM), ECase(C_RSYM), + ECase(C_RPSYM), ECase(C_STSYM), ECase(C_TCSYM), ECase(C_BCOMM), + ECase(C_ECOML), ECase(C_ECOMM), ECase(C_DECL), ECase(C_ENTRY), + ECase(C_FUN), ECase(C_BSTAT), ECase(C_ESTAT), ECase(C_GTLS), + ECase(C_STTLS), ECase(C_EFCN) +#undef ECase +}; + +static StringRef GetSymbolValueName(XCOFF::StorageClass SC) { + switch (SC) { + case XCOFF::C_EXT: + case XCOFF::C_WEAKEXT: + case XCOFF::C_HIDEXT: + case XCOFF::C_STAT: + return "Value (RelocatableAddress)"; + case XCOFF::C_FILE: + return "Value (SymbolTableIndex)"; + case XCOFF::C_FCN: + case XCOFF::C_BLOCK: + case XCOFF::C_FUN: + case XCOFF::C_STSYM: + case XCOFF::C_BINCL: + case XCOFF::C_EINCL: + case XCOFF::C_INFO: + case XCOFF::C_BSTAT: + case XCOFF::C_LSYM: + case XCOFF::C_PSYM: + case XCOFF::C_RPSYM: + case XCOFF::C_RSYM: + case XCOFF::C_ECOML: + case XCOFF::C_DWARF: + assert(false && "This StorageClass for the symbol is not yet implemented."); + return ""; + default: + return "Value"; + } +} + +static const EnumEntry CFileLangIdClass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(TB_C), ECase(TB_CPLUSPLUS) +#undef ECase +}; + +static const EnumEntry CFileCpuIdClass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(TCPU_PPC64), ECase(TCPU_COM), ECase(TCPU_970) +#undef ECase +}; + +void XCOFFDumper::printSymbol(const SymbolRef &S) { + if (Obj.is64Bit()) + report_fatal_error("64-bit support is unimplemented."); + + DataRefImpl SymbolDRI = S.getRawDataRefImpl(); + const XCOFFSymbolEntry *SymbolEntPtr = Obj.toSymbolEntry(SymbolDRI); + + XCOFFSymbolRef XCOFFSymRef(SymbolDRI, &Obj); + uint8_t NumberOfAuxEntries = XCOFFSymRef.getNumberOfAuxEntries(); + + DictScope SymDs(W, "Symbol"); + + StringRef SymbolName = + unwrapOrError(Obj.getFileName(), Obj.getSymbolName(SymbolDRI)); + + W.printNumber("Index", + Obj.getSymbolIndex(reinterpret_cast(SymbolEntPtr))); + W.printString("Name", SymbolName); + W.printHex(GetSymbolValueName(SymbolEntPtr->StorageClass), + SymbolEntPtr->Value); + + StringRef SectionName = + unwrapOrError(Obj.getFileName(), Obj.getSymbolSectionName(SymbolEntPtr)); + + W.printString("Section", SectionName); + if (XCOFFSymRef.getStorageClass() == XCOFF::C_FILE) { + W.printEnum("Source Language ID", + SymbolEntPtr->CFileLanguageIdAndTypeId.LanguageId, + makeArrayRef(CFileLangIdClass)); + W.printEnum("CPU Version ID", + SymbolEntPtr->CFileLanguageIdAndTypeId.CpuTypeId, + makeArrayRef(CFileCpuIdClass)); + } else + W.printHex("Type", SymbolEntPtr->SymbolType); + + W.printEnum("StorageClass", static_cast(SymbolEntPtr->StorageClass), + makeArrayRef(SymStorageClass)); + W.printNumber("NumberOfAuxEntries", SymbolEntPtr->NumberOfAuxEntries); + + if (NumberOfAuxEntries == 0) + return; + + switch (XCOFFSymRef.getStorageClass()) { + case XCOFF::C_FILE: + // If the symbol is C_FILE and has auxiliary entries... + for (int i = 1; i <= NumberOfAuxEntries; i++) { + const XCOFFFileAuxEnt *FileAuxEntPtr = + reinterpret_cast(SymbolEntPtr + i); +#ifndef NDEBUG + Obj.checkSymbolEntryPointer(reinterpret_cast(FileAuxEntPtr)); +#endif + printFileAuxEnt(FileAuxEntPtr); + } + break; + case XCOFF::C_EXT: + case XCOFF::C_WEAKEXT: + case XCOFF::C_HIDEXT: + // If the symbol is for a function, and it has more than 1 auxiliary entry, + // then one of them must be function auxiliary entry which we do not + // support yet. + if (XCOFFSymRef.isFunction() && NumberOfAuxEntries >= 2) + report_fatal_error("Function auxiliary entry printing is unimplemented."); + + // If there is more than 1 auxiliary entry, instead of printing out + // error information, print out the raw Auxiliary entry from 1st till + // the last - 1. The last one must be a CSECT Auxiliary Entry. + for (int i = 1; i < NumberOfAuxEntries; i++) { + W.startLine() << "!Unexpected raw auxiliary entry data:\n"; + W.startLine() << format_bytes( + ArrayRef(reinterpret_cast(SymbolEntPtr + i), + XCOFF::SymbolTableEntrySize)); + } + + // The symbol's last auxiliary entry is a CSECT Auxiliary Entry. + printCsectAuxEnt32(XCOFFSymRef.getXCOFFCsectAuxEnt32()); + break; + case XCOFF::C_STAT: + if (NumberOfAuxEntries > 1) + report_fatal_error( + "C_STAT symbol should not have more than 1 auxiliary entry."); + + const XCOFFSectAuxEntForStat *StatAuxEntPtr; + StatAuxEntPtr = + reinterpret_cast(SymbolEntPtr + 1); +#ifndef NDEBUG + Obj.checkSymbolEntryPointer(reinterpret_cast(StatAuxEntPtr)); +#endif + printSectAuxEntForStat(StatAuxEntPtr); + break; + case XCOFF::C_DWARF: + case XCOFF::C_BLOCK: + case XCOFF::C_FCN: + report_fatal_error("Symbol table entry printing for this storage class " + "type is unimplemented."); + break; + default: + for (int i = 1; i <= NumberOfAuxEntries; i++) { + W.startLine() << "!Unexpected raw auxiliary entry data:\n"; + W.startLine() << format_bytes( + ArrayRef(reinterpret_cast(SymbolEntPtr + i), + XCOFF::SymbolTableEntrySize)); + } + break; + } } void XCOFFDumper::printSymbols() { - llvm_unreachable("Unimplemented functionality for XCOFFDumper"); + ListScope Group(W, "Symbols"); + for (const SymbolRef &S : Obj.symbols()) + printSymbol(S); } void XCOFFDumper::printDynamicSymbols() { @@ -134,6 +454,39 @@ static const EnumEntry SectionTypeFlagsNames[] = { #undef ECase }; +template +void XCOFFDumper::printOverflowSectionHeader(T &Sec) const { + if (Obj.is64Bit()) { + reportWarning(make_error("An 64-bit XCOFF object file may not " + "contain an overflow section header.", + object_error::parse_failed), + Obj.getFileName()); + } + + W.printString("Name", Sec.getName()); + W.printNumber("NumberOfRelocations", Sec.PhysicalAddress); + W.printNumber("NumberOfLineNumbers", Sec.VirtualAddress); + W.printHex("Size", Sec.SectionSize); + W.printHex("RawDataOffset", Sec.FileOffsetToRawData); + W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo); + W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo); + W.printNumber("IndexOfSectionOverflowed", Sec.NumberOfRelocations); + W.printNumber("IndexOfSectionOverflowed", Sec.NumberOfLineNumbers); +} + +template +void XCOFFDumper::printGenericSectionHeader(T &Sec) const { + W.printString("Name", Sec.getName()); + W.printHex("PhysicalAddress", Sec.PhysicalAddress); + W.printHex("VirtualAddress", Sec.VirtualAddress); + W.printHex("Size", Sec.SectionSize); + W.printHex("RawDataOffset", Sec.FileOffsetToRawData); + W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo); + W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo); + W.printNumber("NumberOfRelocations", Sec.NumberOfRelocations); + W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers); +} + template void XCOFFDumper::printSectionHeaders(ArrayRef Sections) { ListScope Group(W, "Sections"); @@ -143,27 +496,28 @@ void XCOFFDumper::printSectionHeaders(ArrayRef Sections) { DictScope SecDS(W, "Section"); W.printNumber("Index", Index++); - W.printString("Name", Sec.getName()); - - W.printHex("PhysicalAddress", Sec.PhysicalAddress); - W.printHex("VirtualAddress", Sec.VirtualAddress); - W.printHex("Size", Sec.SectionSize); - W.printHex("RawDataOffset", Sec.FileOffsetToRawData); - W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo); - W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo); - - // TODO Need to add overflow handling when NumberOfX == _OVERFLOW_MARKER - // in 32-bit object files. - W.printNumber("NumberOfRelocations", Sec.NumberOfRelocations); - W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers); - - // The most significant 16-bits represent the DWARF section subtype. For - // now we just dump the section type flags. - uint16_t Flags = Sec.Flags & 0xffffu; - if (Flags & SectionFlagsReservedMask) - W.printHex("Flags", "Reserved", Flags); + + uint16_t SectionType = Sec.Flags & SectionFlagsTypeMask; + switch (SectionType) { + case XCOFF::STYP_OVRFLO: + printOverflowSectionHeader(Sec); + break; + case XCOFF::STYP_LOADER: + case XCOFF::STYP_EXCEPT: + case XCOFF::STYP_TYPCHK: + // TODO The interpretation of loader, exception and type check section + // headers are different from that of generic section headers. We will + // implement them later. We interpret them as generic section headers for + // now. + default: + printGenericSectionHeader(Sec); + break; + } + // For now we just dump the section type portion of the flags. + if (SectionType & SectionFlagsReservedMask) + W.printHex("Flags", "Reserved", SectionType); else - W.printEnum("Type", Flags, makeArrayRef(SectionTypeFlagsNames)); + W.printEnum("Type", SectionType, makeArrayRef(SectionTypeFlagsNames)); } if (opts::SectionRelocations) diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp index 1bd5bb74bf29..4db13897879d 100644 --- a/tools/llvm-readobj/llvm-readobj.cpp +++ b/tools/llvm-readobj/llvm-readobj.cpp @@ -231,26 +231,11 @@ namespace opts { "codeview-subsection-bytes", cl::desc("Dump raw contents of codeview debug sections and records")); - // --arm-attributes - cl::opt ARMAttributes("arm-attributes", - cl::desc("Display the ARM attributes section")); - - // --mips-plt-got - cl::opt - MipsPLTGOT("mips-plt-got", - cl::desc("Display the MIPS GOT and PLT GOT sections")); - - // --mips-abi-flags - cl::opt MipsABIFlags("mips-abi-flags", - cl::desc("Display the MIPS.abiflags section")); - - // --mips-reginfo - cl::opt MipsReginfo("mips-reginfo", - cl::desc("Display the MIPS .reginfo section")); - - // --mips-options - cl::opt MipsOptions("mips-options", - cl::desc("Display the MIPS .MIPS.options section")); + // --arch-specific + cl::opt ArchSpecificInfo("arch-specific", + cl::desc("Displays architecture-specific information, if there is any.")); + cl::alias ArchSpecifcInfoShort("A", cl::desc("Alias for --arch-specific"), + cl::aliasopt(ArchSpecificInfo), cl::NotHidden); // --coff-imports cl::opt @@ -324,6 +309,11 @@ namespace opts { PrintStackMap("stackmap", cl::desc("Display contents of stackmap section")); + // --stack-sizes + cl::opt + PrintStackSizes("stack-sizes", + cl::desc("Display contents of all stack sizes sections")); + // --version-info, -V cl::opt VersionInfo("version-info", @@ -368,63 +358,45 @@ namespace opts { HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); } // namespace opts +static StringRef ToolName; + namespace llvm { -LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) { +LLVM_ATTRIBUTE_NORETURN static void error(Twine Msg) { + // Flush the standard output to print the error at a + // proper place. fouts().flush(); errs() << "\n"; - WithColor::error(errs()) << Msg << "\n"; + WithColor::error(errs(), ToolName) << Msg << "\n"; exit(1); } -void reportError(StringRef Input, Error Err) { +LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input) { + assert(Err); if (Input == "-") Input = ""; - error(createFileError(Input, std::move(Err))); + handleAllErrors(createFileError(Input, std::move(Err)), + [&](const ErrorInfoBase &EI) { error(EI.message()); }); + llvm_unreachable("error() call should never return"); } -void reportWarning(Twine Msg) { - fouts().flush(); - errs() << "\n"; - WithColor::warning(errs()) << Msg << "\n"; -} - -void warn(Error Err) { - handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) { - reportWarning(EI.message()); - }); -} - -void error(Error EC) { - if (!EC) - return; - handleAllErrors(std::move(EC), - [&](const ErrorInfoBase &EI) { reportError(EI.message()); }); -} +void reportWarning(Error Err, StringRef Input) { + assert(Err); + if (Input == "-") + Input = ""; -void error(std::error_code EC) { - if (!EC) - return; - reportError(EC.message()); + // Flush the standard output to print the warning at a + // proper place. + fouts().flush(); + handleAllErrors( + createFileError(Input, std::move(Err)), [&](const ErrorInfoBase &EI) { + errs() << "\n"; + WithColor::warning(errs(), ToolName) << EI.message() << "\n"; + }); } } // namespace llvm -static void reportError(StringRef Input, std::error_code EC) { - reportError(Input, errorCodeToError(EC)); -} - -static bool isMipsArch(unsigned Arch) { - switch (Arch) { - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - return true; - default: - return false; - } -} namespace { struct ReadObjTypeTableBuilder { ReadObjTypeTableBuilder() @@ -471,19 +443,19 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer, std::unique_ptr Dumper; if (std::error_code EC = createDumper(Obj, Writer, Dumper)) - reportError(FileStr, EC); + reportError(errorCodeToError(EC), FileStr); - Writer.startLine() << "\n"; - if (opts::Output == opts::LLVM) { + if (opts::Output == opts::LLVM || opts::InputFilenames.size() > 1 || A) { + Writer.startLine() << "\n"; Writer.printString("File", FileStr); + } + if (opts::Output == opts::LLVM) { Writer.printString("Format", Obj->getFileFormatName()); Writer.printString("Arch", Triple::getArchTypeName( (llvm::Triple::ArchType)Obj->getArch())); Writer.printString("AddressSize", formatv("{0}bit", 8 * Obj->getBytesInAddress())); Dumper->printLoadName(); - } else if (opts::Output == opts::GNU && A) { - Writer.printString("File", FileStr); } if (opts::FileHeaders) @@ -519,19 +491,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer, if (Obj->isELF()) { if (opts::ELFLinkerOptions) Dumper->printELFLinkerOptions(); - if (Obj->getArch() == llvm::Triple::arm) - if (opts::ARMAttributes) - Dumper->printAttributes(); - if (isMipsArch(Obj->getArch())) { - if (opts::MipsPLTGOT) - Dumper->printMipsPLTGOT(); - if (opts::MipsABIFlags) - Dumper->printMipsABIFlags(); - if (opts::MipsReginfo) - Dumper->printMipsReginfo(); - if (opts::MipsOptions) - Dumper->printMipsOptions(); - } + if (opts::ArchSpecificInfo) + Dumper->printArchSpecificInfo(); if (opts::SectionGroups) Dumper->printGroupSections(); if (opts::HashHistogram) @@ -583,6 +544,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer, } if (opts::PrintStackMap) Dumper->printStackMap(); + if (opts::PrintStackSizes) + Dumper->printStackSizes(); } /// Dumps each object file in \a Arc; @@ -591,9 +554,8 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) { for (auto &Child : Arc->children(Err)) { Expected> ChildOrErr = Child.getAsBinary(); if (!ChildOrErr) { - if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) { - reportError(Arc->getFileName(), std::move(E)); - } + if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) + reportError(std::move(E), Arc->getFileName()); continue; } if (ObjectFile *Obj = dyn_cast(&*ChildOrErr.get())) @@ -601,10 +563,11 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) { else if (COFFImportFile *Imp = dyn_cast(&*ChildOrErr.get())) dumpCOFFImportFile(Imp, Writer); else - reportError(Arc->getFileName(), readobj_error::unrecognized_file_format); + reportError(errorCodeToError(readobj_error::unrecognized_file_format), + Arc->getFileName()); } if (Err) - reportError(Arc->getFileName(), std::move(Err)); + reportError(std::move(Err), Arc->getFileName()); } /// Dumps each object file in \a MachO Universal Binary; @@ -614,9 +577,8 @@ static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary, Expected> ObjOrErr = Obj.getAsObjectFile(); if (ObjOrErr) dumpObject(&*ObjOrErr.get(), Writer); - else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { - reportError(UBinary->getFileName(), ObjOrErr.takeError()); - } + else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) + reportError(ObjOrErr.takeError(), UBinary->getFileName()); else if (Expected> AOrErr = Obj.getAsArchive()) dumpArchive(&*AOrErr.get(), Writer); } @@ -627,7 +589,7 @@ static void dumpWindowsResourceFile(WindowsResource *WinRes, ScopedPrinter &Printer) { WindowsRes::Dumper Dumper(WinRes, Printer); if (auto Err = Dumper.printData()) - reportError(WinRes->getFileName(), std::move(Err)); + reportError(std::move(Err), WinRes->getFileName()); } @@ -636,7 +598,7 @@ static void dumpInput(StringRef File, ScopedPrinter &Writer) { // Attempt to open the binary. Expected> BinaryOrErr = createBinary(File); if (!BinaryOrErr) - reportError(File, BinaryOrErr.takeError()); + reportError(BinaryOrErr.takeError(), File); Binary &Binary = *BinaryOrErr.get().getBinary(); if (Archive *Arc = dyn_cast(&Binary)) @@ -651,7 +613,8 @@ static void dumpInput(StringRef File, ScopedPrinter &Writer) { else if (WindowsResource *WinRes = dyn_cast(&Binary)) dumpWindowsResourceFile(WinRes, Writer); else - reportError(File, readobj_error::unrecognized_file_format); + reportError(errorCodeToError(readobj_error::unrecognized_file_format), + File); CVTypes.Binaries.push_back(std::move(*BinaryOrErr)); } @@ -702,6 +665,7 @@ static void registerReadelfAliases() { int main(int argc, const char *argv[]) { InitLLVM X(argc, argv); + ToolName = argv[0]; // Register the target printer for --version. cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); @@ -727,6 +691,10 @@ int main(int argc, const char *argv[]) { opts::UnwindInfo = true; opts::SectionGroups = true; opts::HashHistogram = true; + if (opts::Output == opts::LLVM) { + opts::Addrsig = true; + opts::PrintStackSizes = true; + } } if (opts::Headers) { diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h index 0e02da4cb847..d9813f5dea62 100644 --- a/tools/llvm-readobj/llvm-readobj.h +++ b/tools/llvm-readobj/llvm-readobj.h @@ -21,30 +21,13 @@ namespace llvm { } // Various helper functions. - LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg); - void reportError(StringRef Input, Error Err); - void reportWarning(Twine Msg); - void warn(llvm::Error Err); - void error(std::error_code EC); - void error(llvm::Error EC); - template T error(llvm::Expected &&E) { - error(E.takeError()); - return std::move(*E); - } + LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input); + void reportWarning(Error Err, StringRef Input); - template T unwrapOrError(ErrorOr EO) { - if (EO) - return *EO; - reportError(EO.getError().message()); - } - template T unwrapOrError(Expected EO) { + template T unwrapOrError(StringRef Input, Expected EO) { if (EO) return *EO; - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(EO.takeError(), OS); - OS.flush(); - reportError(Buf); + reportError(EO.takeError(), Input); } } // namespace llvm diff --git a/tools/llvm-reduce/CMakeLists.txt b/tools/llvm-reduce/CMakeLists.txt new file mode 100644 index 000000000000..48de0ffa78a1 --- /dev/null +++ b/tools/llvm-reduce/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LLVM_LINK_COMPONENTS + AllTargetsAsmParsers + AllTargetsCodeGens + AllTargetsDescs + AllTargetsInfos + Core + IRReader + Support + Target + TransformUtils + ) + +add_llvm_tool(llvm-reduce + llvm-reduce.cpp + TestRunner.cpp + deltas/Delta.cpp + deltas/ReduceFunctions.cpp + deltas/ReduceGlobalVars.cpp + deltas/ReduceMetadata.cpp + deltas/ReduceArguments.cpp + deltas/ReduceBasicBlocks.cpp + deltas/ReduceInstructions.cpp + + DEPENDS + intrinsics_gen + ) diff --git a/tools/llvm-reduce/DeltaManager.h b/tools/llvm-reduce/DeltaManager.h new file mode 100644 index 000000000000..2309c3adf4e6 --- /dev/null +++ b/tools/llvm-reduce/DeltaManager.h @@ -0,0 +1,36 @@ +//===- DeltaManager.h - Runs Delta Passes to reduce Input -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file calls each specialized Delta pass in order to reduce the input IR +// file. +// +//===----------------------------------------------------------------------===// + +#include "TestRunner.h" +#include "deltas/Delta.h" +#include "deltas/ReduceArguments.h" +#include "deltas/ReduceBasicBlocks.h" +#include "deltas/ReduceFunctions.h" +#include "deltas/ReduceGlobalVars.h" +#include "deltas/ReduceMetadata.h" +#include "deltas/ReduceInstructions.h" + +namespace llvm { + +// TODO: Add CLI option to run only specified Passes (for unit tests) +inline void runDeltaPasses(TestRunner &Tester) { + reduceFunctionsDeltaPass(Tester); + reduceBasicBlocksDeltaPass(Tester); + reduceGlobalsDeltaPass(Tester); + reduceMetadataDeltaPass(Tester); + reduceArgumentsDeltaPass(Tester); + reduceInstructionsDeltaPass(Tester); + // TODO: Implement the remaining Delta Passes +} + +} // namespace llvm diff --git a/tools/llvm-reduce/LLVMBuild.txt b/tools/llvm-reduce/LLVMBuild.txt new file mode 100644 index 000000000000..7928f0503283 --- /dev/null +++ b/tools/llvm-reduce/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./tools/llvm-reduce/LLVMBuild.txt ------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-reduce +parent = Tools +required_libraries = + BitReader + IRReader + all-targets diff --git a/tools/llvm-reduce/TestRunner.cpp b/tools/llvm-reduce/TestRunner.cpp new file mode 100644 index 000000000000..d0e195d5697c --- /dev/null +++ b/tools/llvm-reduce/TestRunner.cpp @@ -0,0 +1,42 @@ +//===-- TestRunner.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestRunner.h" + +using namespace llvm; + +TestRunner::TestRunner(StringRef TestName, const std::vector &TestArgs) + : TestName(TestName), TestArgs(TestArgs) { +} + +/// Runs the interestingness test, passes file to be tested as first argument +/// and other specified test arguments after that. +int TestRunner::run(StringRef Filename) { + std::vector ProgramArgs; + ProgramArgs.push_back(TestName); + + for (const auto &Arg : TestArgs) + ProgramArgs.push_back(Arg); + + ProgramArgs.push_back(Filename); + + std::string ErrMsg; + int Result = sys::ExecuteAndWait( + TestName, ProgramArgs, /*Env=*/None, /*Redirects=*/None, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); + + if (Result < 0) { + Error E = make_error("Error running interesting-ness test: " + + ErrMsg, + inconvertibleErrorCode()); + errs() << toString(std::move(E)); + exit(1); + } + + return !Result; +} diff --git a/tools/llvm-reduce/TestRunner.h b/tools/llvm-reduce/TestRunner.h new file mode 100644 index 000000000000..2270d6bd90b2 --- /dev/null +++ b/tools/llvm-reduce/TestRunner.h @@ -0,0 +1,46 @@ +//===-- tools/llvm-reduce/TestRunner.h ---------------------------*- C++ -*-===/ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVMREDUCE_TESTRUNNER_H +#define LLVM_TOOLS_LLVMREDUCE_TESTRUNNER_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include + +namespace llvm { + +// This class contains all the info necessary for running the provided +// interesting-ness test, as well as the most reduced module and its +// respective filename. +class TestRunner { +public: + TestRunner(StringRef TestName, const std::vector &TestArgs); + + /// Runs the interesting-ness test for the specified file + /// @returns 0 if test was successful, 1 if otherwise + int run(StringRef Filename); + + /// Returns the most reduced version of the original testcase + Module *getProgram() const { return Program.get(); } + + void setProgram(std::unique_ptr P) { Program = std::move(P); } + +private: + StringRef TestName; + const std::vector &TestArgs; + std::unique_ptr Program; +}; + +} // namespace llvm + +#endif diff --git a/tools/llvm-reduce/deltas/Delta.cpp b/tools/llvm-reduce/deltas/Delta.cpp new file mode 100644 index 000000000000..0642241ddebd --- /dev/null +++ b/tools/llvm-reduce/deltas/Delta.cpp @@ -0,0 +1,162 @@ +//===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation for the Delta Debugging Algorithm: +// it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.) +// into chunks and tries to reduce the number chunks that are interesting. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include +#include + +using namespace llvm; + +bool IsReduced(Module &M, TestRunner &Test, SmallString<128> &CurrentFilepath) { + // Write Module to tmp file + int FD; + std::error_code EC = + sys::fs::createTemporaryFile("llvm-reduce", "ll", FD, CurrentFilepath); + if (EC) { + errs() << "Error making unique filename: " << EC.message() << "!\n"; + exit(1); + } + + ToolOutputFile Out(CurrentFilepath, FD); + M.print(Out.os(), /*AnnotationWriter=*/nullptr); + Out.os().close(); + if (Out.os().has_error()) { + errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n"; + exit(1); + } + + // Current Chunks aren't interesting + return Test.run(CurrentFilepath); +} + +/// Counts the amount of lines for a given file +static int getLines(StringRef Filepath) { + int Lines = 0; + std::string CurrLine; + std::ifstream FileStream(Filepath); + + while (std::getline(FileStream, CurrLine)) + ++Lines; + + return Lines; +} + +/// Splits Chunks in half and prints them. +/// If unable to split (when chunk size is 1) returns false. +static bool increaseGranularity(std::vector &Chunks) { + errs() << "Increasing granularity..."; + std::vector NewChunks; + bool SplitOne = false; + + for (auto &C : Chunks) { + if (C.end - C.begin == 0) + NewChunks.push_back(C); + else { + int Half = (C.begin + C.end) / 2; + NewChunks.push_back({C.begin, Half}); + NewChunks.push_back({Half + 1, C.end}); + SplitOne = true; + } + } + if (SplitOne) { + Chunks = NewChunks; + errs() << "Success! New Chunks:\n"; + for (auto C : Chunks) { + errs() << '\t'; + C.print(); + errs() << '\n'; + } + } + return SplitOne; +} + +/// Runs the Delta Debugging algorithm, splits the code into chunks and +/// reduces the amount of chunks that are considered interesting by the +/// given test. +void llvm::runDeltaPass( + TestRunner &Test, int Targets, + std::function &, Module *)> + ExtractChunksFromModule) { + assert(Targets >= 0); + if (!Targets) { + errs() << "\nNothing to reduce\n"; + return; + } + + if (Module *Program = Test.getProgram()) { + SmallString<128> CurrentFilepath; + if (!IsReduced(*Program, Test, CurrentFilepath)) { + errs() << "\nInput isn't interesting! Verify interesting-ness test\n"; + exit(1); + } + } + + std::vector Chunks = {{1, Targets}}; + std::set UninterestingChunks; + std::unique_ptr ReducedProgram; + + if (!increaseGranularity(Chunks)) { + errs() << "\nAlready at minimum size. Cannot reduce anymore.\n"; + return; + } + + do { + UninterestingChunks = {}; + for (int I = Chunks.size() - 1; I >= 0; --I) { + std::vector CurrentChunks; + + for (auto C : Chunks) + if (!UninterestingChunks.count(C) && C != Chunks[I]) + CurrentChunks.push_back(C); + + if (CurrentChunks.empty()) + continue; + + // Clone module before hacking it up.. + std::unique_ptr Clone = CloneModule(*Test.getProgram()); + // Generate Module with only Targets inside Current Chunks + ExtractChunksFromModule(CurrentChunks, Clone.get()); + + errs() << "Ignoring: "; + Chunks[I].print(); + for (auto C : UninterestingChunks) + C.print(); + + + + SmallString<128> CurrentFilepath; + if (!IsReduced(*Clone, Test, CurrentFilepath)) { + errs() << "\n"; + continue; + } + + UninterestingChunks.insert(Chunks[I]); + ReducedProgram = std::move(Clone); + errs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n"; + } + // Delete uninteresting chunks + erase_if(Chunks, [&UninterestingChunks](const Chunk &C) { + return UninterestingChunks.count(C); + }); + + } while (!UninterestingChunks.empty() || increaseGranularity(Chunks)); + + // If we reduced the testcase replace it + if (ReducedProgram) + Test.setProgram(std::move(ReducedProgram)); + errs() << "Couldn't increase anymore.\n"; +} diff --git a/tools/llvm-reduce/deltas/Delta.h b/tools/llvm-reduce/deltas/Delta.h new file mode 100644 index 000000000000..dbb18e4bd07f --- /dev/null +++ b/tools/llvm-reduce/deltas/Delta.h @@ -0,0 +1,76 @@ +//===- Delta.h - Delta Debugging Algorithm Implementation -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation for the Delta Debugging Algorithm: +// it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.) +// into chunks and tries to reduce the number chunks that are interesting. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVMREDUCE_LLVMREDUCE_DELTA_H +#define LLVM_TOOLS_LLVMREDUCE_LLVMREDUCE_DELTA_H + +#include "TestRunner.h" +#include +#include +#include + +namespace llvm { + +struct Chunk { + int begin; + int end; + + /// Helper function to verify if a given Target-index is inside the Chunk + bool contains(int Index) const { return Index >= begin && Index <= end; } + + void print() const { + errs() << "[" << begin; + if (end - begin != 0) + errs() << "," << end; + errs() << "]"; + } + + /// Operator when populating CurrentChunks in Generic Delta Pass + friend bool operator!=(const Chunk &C1, const Chunk &C2) { + return C1.begin != C2.begin || C1.end != C2.end; + } + + /// Operator used for sets + friend bool operator<(const Chunk &C1, const Chunk &C2) { + return std::tie(C1.begin, C1.end) < std::tie(C2.begin, C2.end); + } +}; + +/// This function implements the Delta Debugging algorithm, it receives a +/// number of Targets (e.g. Functions, Instructions, Basic Blocks, etc.) and +/// splits them in half; these chunks of targets are then tested while ignoring +/// one chunk, if a chunk is proven to be uninteresting (i.e. fails the test) +/// it is removed from consideration. The algorithm will attempt to split the +/// Chunks in half and start the process again until it can't split chunks +/// anymore. +/// +/// This function is intended to be called by each specialized delta pass (e.g. +/// RemoveFunctions) and receives three key parameters: +/// * Test: The main TestRunner instance which is used to run the provided +/// interesting-ness test, as well as to store and access the reduced Program. +/// * Targets: The amount of Targets that are going to be reduced by the +/// algorithm, for example, the RemoveGlobalVars pass would send the amount of +/// initialized GVs. +/// * ExtractChunksFromModule: A function used to tailor the main program so it +/// only contains Targets that are inside Chunks of the given iteration. +/// Note: This function is implemented by each specialized Delta pass +/// +/// Other implementations of the Delta Debugging algorithm can also be found in +/// the CReduce, Delta, and Lithium projects. +void runDeltaPass(TestRunner &Test, int Targets, + std::function &, Module *)> + ExtractChunksFromModule); +} // namespace llvm + +#endif diff --git a/tools/llvm-reduce/deltas/ReduceArguments.cpp b/tools/llvm-reduce/deltas/ReduceArguments.cpp new file mode 100644 index 000000000000..f5f14b83f42c --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceArguments.cpp @@ -0,0 +1,125 @@ +//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "ReduceArguments.h" +#include "Delta.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +using namespace llvm; + +/// Goes over OldF calls and replaces them with a call to NewF +static void replaceFunctionCalls(Function &OldF, Function &NewF, + const std::set &ArgIndexesToKeep) { + const auto &Users = OldF.users(); + for (auto I = Users.begin(), E = Users.end(); I != E; ) + if (auto *CI = dyn_cast(*I++)) { + SmallVector Args; + for (auto ArgI = CI->arg_begin(), E = CI->arg_end(); ArgI != E; ++ArgI) + if (ArgIndexesToKeep.count(ArgI - CI->arg_begin())) + Args.push_back(*ArgI); + + CallInst *NewCI = CallInst::Create(&NewF, Args); + NewCI->setCallingConv(NewF.getCallingConv()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + ReplaceInstWithInst(CI, NewCI); + } +} + +/// Removes out-of-chunk arguments from functions, and modifies their calls +/// accordingly. It also removes allocations of out-of-chunk arguments. +static void extractArgumentsFromModule(std::vector ChunksToKeep, + Module *Program) { + int I = 0, ArgCount = 0; + std::set ArgsToKeep; + std::vector Funcs; + // Get inside-chunk arguments, as well as their parent function + for (auto &F : *Program) + if (!F.isDeclaration()) { + Funcs.push_back(&F); + for (auto &A : F.args()) + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++ArgCount)) + ArgsToKeep.insert(&A); + if (ChunksToKeep[I].end == ArgCount) + ++I; + } + } + + for (auto *F : Funcs) { + ValueToValueMapTy VMap; + std::vector InstToDelete; + for (auto &A : F->args()) + if (!ArgsToKeep.count(&A)) { + // By adding undesired arguments to the VMap, CloneFunction will remove + // them from the resulting Function + VMap[&A] = UndefValue::get(A.getType()); + for (auto *U : A.users()) + if (auto *I = dyn_cast(*&U)) + InstToDelete.push_back(I); + } + // Delete any instruction that uses the argument + for (auto *I : InstToDelete) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + + // No arguments to reduce + if (VMap.empty()) + continue; + + std::set ArgIndexesToKeep; + int ArgI = 0; + for (auto &Arg : F->args()) + if (ArgsToKeep.count(&Arg)) + ArgIndexesToKeep.insert(++ArgI); + + auto *ClonedFunc = CloneFunction(F, VMap); + // In order to preserve function order, we move Clone after old Function + ClonedFunc->removeFromParent(); + Program->getFunctionList().insertAfter(F->getIterator(), ClonedFunc); + + replaceFunctionCalls(*F, *ClonedFunc, ArgIndexesToKeep); + // Rename Cloned Function to Old's name + std::string FName = F->getName(); + F->eraseFromParent(); + ClonedFunc->setName(FName); + } +} + +/// Counts the amount of arguments in non-declaration functions and prints their +/// respective name, index, and parent function name +static int countArguments(Module *Program) { + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + outs() << "Param Index Reference:\n"; + int ArgsCount = 0; + for (auto &F : *Program) + if (!F.isDeclaration() && F.arg_size()) { + outs() << " " << F.getName() << "\n"; + for (auto &A : F.args()) + outs() << "\t" << ++ArgsCount << ": " << A.getName() << "\n"; + + outs() << "----------------------------\n"; + } + + return ArgsCount; +} + +void llvm::reduceArgumentsDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Arguments...\n"; + int ArgCount = countArguments(Test.getProgram()); + runDeltaPass(Test, ArgCount, extractArgumentsFromModule); +} diff --git a/tools/llvm-reduce/deltas/ReduceArguments.h b/tools/llvm-reduce/deltas/ReduceArguments.h new file mode 100644 index 000000000000..d9682b44f74d --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceArguments.h @@ -0,0 +1,21 @@ +//===- ReduceArguments.h - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/IR/Argument.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceArgumentsDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp b/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp new file mode 100644 index 000000000000..03c3962d2fd9 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp @@ -0,0 +1,146 @@ +//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "ReduceBasicBlocks.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +/// Replaces BB Terminator with one that only contains Chunk BBs +static void replaceBranchTerminator(BasicBlock &BB, + std::set BBsToKeep) { + auto Term = BB.getTerminator(); + std::vector ChunkSucessors; + for (auto Succ : successors(&BB)) + if (BBsToKeep.count(Succ)) + ChunkSucessors.push_back(Succ); + + // BB only references Chunk BBs + if (ChunkSucessors.size() == Term->getNumSuccessors()) + return; + + bool IsBranch = isa(Term); + Value *Address = nullptr; + if (auto IndBI = dyn_cast(Term)) + Address = IndBI->getAddress(); + + Term->eraseFromParent(); + + if (ChunkSucessors.empty()) { + ReturnInst::Create(BB.getContext(), nullptr, &BB); + return; + } + + if (IsBranch) + BranchInst::Create(ChunkSucessors[0], &BB); + + if (Address) { + auto NewIndBI = + IndirectBrInst::Create(Address, ChunkSucessors.size(), &BB); + for (auto Dest : ChunkSucessors) + NewIndBI->addDestination(Dest); + } +} + +/// Removes uninteresting BBs from switch, if the default case ends up being +/// uninteresting, the switch is replaced with a void return (since it has to be +/// replace with something) +static void removeUninterestingBBsFromSwitch(SwitchInst &SwInst, + std::set BBsToKeep) { + if (!BBsToKeep.count(SwInst.getDefaultDest())) { + ReturnInst::Create(SwInst.getContext(), nullptr, SwInst.getParent()); + SwInst.eraseFromParent(); + } else + for (int I = 0, E = SwInst.getNumCases(); I != E; ++I) { + auto Case = SwInst.case_begin() + I; + if (!BBsToKeep.count(Case->getCaseSuccessor())) { + SwInst.removeCase(Case); + --I; + --E; + } + } +} + +/// Removes out-of-chunk arguments from functions, and modifies their calls +/// accordingly. It also removes allocations of out-of-chunk arguments. +static void extractBasicBlocksFromModule(std::vector ChunksToKeep, + Module *Program) { + int I = 0, BBCount = 0; + std::set BBsToKeep; + + for (auto &F : *Program) + for (auto &BB : F) + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++BBCount)) + BBsToKeep.insert(&BB); + if (ChunksToKeep[I].end == BBCount) + ++I; + } + + std::vector BBsToDelete; + for (auto &F : *Program) + for (auto &BB : F) { + if (!BBsToKeep.count(&BB)) { + BBsToDelete.push_back(&BB); + // Remove out-of-chunk BB from successor phi nodes + for (auto *Succ : successors(&BB)) + Succ->removePredecessor(&BB); + } + } + + // Replace terminators that reference out-of-chunk BBs + for (auto &F : *Program) + for (auto &BB : F) { + if (auto *SwInst = dyn_cast(BB.getTerminator())) + removeUninterestingBBsFromSwitch(*SwInst, BBsToKeep); + else + replaceBranchTerminator(BB, BBsToKeep); + } + + // Replace out-of-chunk switch uses + for (auto &BB : BBsToDelete) { + // Instructions might be referenced in other BBs + for (auto &I : *BB) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + BB->eraseFromParent(); + } +} + +/// Counts the amount of basic blocks and prints their name & respective index +static int countBasicBlocks(Module *Program) { + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + int BBCount = 0; + for (auto &F : *Program) + for (auto &BB : F) { + if (BB.hasName()) + outs() << "\t" << ++BBCount << ": " << BB.getName() << "\n"; + else + outs() << "\t" << ++BBCount << ": Unnamed\n"; + } + + return BBCount; +} + +void llvm::reduceBasicBlocksDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Basic Blocks...\n"; + int BBCount = countBasicBlocks(Test.getProgram()); + runDeltaPass(Test, BBCount, extractBasicBlocksFromModule); +} diff --git a/tools/llvm-reduce/deltas/ReduceBasicBlocks.h b/tools/llvm-reduce/deltas/ReduceBasicBlocks.h new file mode 100644 index 000000000000..cf76a0abbcd7 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceBasicBlocks.h @@ -0,0 +1,20 @@ +//===- ReduceArguments.h - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceBasicBlocksDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceFunctions.cpp b/tools/llvm-reduce/deltas/ReduceFunctions.cpp new file mode 100644 index 000000000000..3382f35a945a --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceFunctions.cpp @@ -0,0 +1,77 @@ +//===- ReduceFunctions.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce functions (and any instruction that calls it) in the provided +// Module. +// +//===----------------------------------------------------------------------===// + +#include "ReduceFunctions.h" +#include "Delta.h" +#include "llvm/ADT/SetVector.h" +#include + +using namespace llvm; + +/// Removes all the Defined Functions (as well as their calls) +/// that aren't inside any of the desired Chunks. +static void extractFunctionsFromModule(const std::vector &ChunksToKeep, + Module *Program) { + // Get functions inside desired chunks + std::set FuncsToKeep; + int I = 0, FunctionCount = 0; + for (auto &F : *Program) + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++FunctionCount)) + FuncsToKeep.insert(&F); + if (FunctionCount == ChunksToKeep[I].end) + ++I; + } + + // Delete out-of-chunk functions, and replace their calls with undef + std::vector FuncsToRemove; + SetVector CallsToRemove; + for (auto &F : *Program) + if (!FuncsToKeep.count(&F)) { + for (auto U : F.users()) + if (auto *Call = dyn_cast(U)) { + Call->replaceAllUsesWith(UndefValue::get(Call->getType())); + CallsToRemove.insert(Call); + } + F.replaceAllUsesWith(UndefValue::get(F.getType())); + FuncsToRemove.push_back(&F); + } + + for (auto *C : CallsToRemove) + C->eraseFromParent(); + + for (auto *F : FuncsToRemove) + F->eraseFromParent(); +} + +/// Counts the amount of non-declaration functions and prints their +/// respective name & index +static int countFunctions(Module *Program) { + // TODO: Silence index with --quiet flag + errs() << "----------------------------\n"; + errs() << "Function Index Reference:\n"; + int FunctionCount = 0; + for (auto &F : *Program) + errs() << "\t" << ++FunctionCount << ": " << F.getName() << "\n"; + + errs() << "----------------------------\n"; + return FunctionCount; +} + +void llvm::reduceFunctionsDeltaPass(TestRunner &Test) { + errs() << "*** Reducing Functions...\n"; + int Functions = countFunctions(Test.getProgram()); + runDeltaPass(Test, Functions, extractFunctionsFromModule); + errs() << "----------------------------\n"; +} diff --git a/tools/llvm-reduce/deltas/ReduceFunctions.h b/tools/llvm-reduce/deltas/ReduceFunctions.h new file mode 100644 index 000000000000..7c2cd3f33e9f --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceFunctions.h @@ -0,0 +1,20 @@ +//===- ReduceFunctions.h - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce functions (and any instruction that calls it) in the provided +// Module. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceFunctionsDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp b/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp new file mode 100644 index 000000000000..5732208ee0a9 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp @@ -0,0 +1,74 @@ +//===- ReduceGlobalVars.cpp - Specialized Delta Pass ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce initialized Global Variables in the provided Module. +// +//===----------------------------------------------------------------------===// + +#include "ReduceGlobalVars.h" +#include + +using namespace llvm; + +/// Removes all the Initialized GVs that aren't inside the desired Chunks. +static void extractGVsFromModule(std::vector ChunksToKeep, + Module *Program) { + // Get GVs inside desired chunks + std::set GVsToKeep; + int I = 0, GVCount = 0; + for (auto &GV : Program->globals()) + if (GV.hasInitializer() && I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++GVCount)) + GVsToKeep.insert(&GV); + if (GVCount == ChunksToKeep[I].end) + ++I; + } + + // Delete out-of-chunk GVs and their uses + std::vector ToRemove; + std::vector InstToRemove; + for (auto &GV : Program->globals()) + if (GV.hasInitializer() && !GVsToKeep.count(&GV)) { + for (auto U : GV.users()) + if (auto *Inst = dyn_cast(U)) + InstToRemove.push_back(Inst); + + GV.replaceAllUsesWith(UndefValue::get(GV.getType())); + ToRemove.push_back(&GV); + } + + // Delete Instruction uses of unwanted GVs + for (auto *Inst : InstToRemove) { + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + Inst->eraseFromParent(); + } + + for (auto *GV : ToRemove) + GV->eraseFromParent(); +} + +/// Counts the amount of initialized GVs and displays their +/// respective name & index +static int countGVs(Module *Program) { + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + outs() << "GlobalVariable Index Reference:\n"; + int GVCount = 0; + for (auto &GV : Program->globals()) + if (GV.hasInitializer()) + outs() << "\t" << ++GVCount << ": " << GV.getName() << "\n"; + outs() << "----------------------------\n"; + return GVCount; +} + +void llvm::reduceGlobalsDeltaPass(TestRunner &Test) { + outs() << "*** Reducing GVs...\n"; + int GVCount = countGVs(Test.getProgram()); + runDeltaPass(Test, GVCount, extractGVsFromModule); +} diff --git a/tools/llvm-reduce/deltas/ReduceGlobalVars.h b/tools/llvm-reduce/deltas/ReduceGlobalVars.h new file mode 100644 index 000000000000..d4a870aded58 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceGlobalVars.h @@ -0,0 +1,20 @@ +//===- ReduceGlobalVars.h - Specialized Delta Pass ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce initialized Global Variables in the provided Module. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/IR/Value.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceGlobalsDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceInstructions.cpp b/tools/llvm-reduce/deltas/ReduceInstructions.cpp new file mode 100644 index 000000000000..b3497ad2dc02 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceInstructions.cpp @@ -0,0 +1,65 @@ +//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "ReduceInstructions.h" + +using namespace llvm; + +/// Removes out-of-chunk arguments from functions, and modifies their calls +/// accordingly. It also removes allocations of out-of-chunk arguments. +static void extractInstrFromModule(std::vector ChunksToKeep, + Module *Program) { + int I = 0, InstCount = 0; + std::set InstToKeep; + + for (auto &F : *Program) + for (auto &BB : F) + for (auto &Inst : BB) + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++InstCount)) + InstToKeep.insert(&Inst); + if (ChunksToKeep[I].end == InstCount) + ++I; + } + + std::vector InstToDelete; + for (auto &F : *Program) + for (auto &BB : F) + for (auto &Inst : BB) + if (!InstToKeep.count(&Inst)) { + Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); + InstToDelete.push_back(&Inst); + } + + for (auto &I : InstToDelete) + I->eraseFromParent(); +} + +/// Counts the amount of basic blocks and prints their name & respective index +static unsigned countInstructions(Module *Program) { + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + int InstCount = 0; + for (auto &F : *Program) + for (auto &BB : F) + InstCount += BB.getInstList().size(); + outs() << "Number of instructions: " << InstCount << "\n"; + + return InstCount; +} + +void llvm::reduceInstructionsDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Insructions...\n"; + unsigned InstCount = countInstructions(Test.getProgram()); + runDeltaPass(Test, InstCount, extractInstrFromModule); +} diff --git a/tools/llvm-reduce/deltas/ReduceInstructions.h b/tools/llvm-reduce/deltas/ReduceInstructions.h new file mode 100644 index 000000000000..a9266acd051a --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceInstructions.h @@ -0,0 +1,20 @@ +//===- ReduceArguments.h - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceInstructionsDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceMetadata.cpp b/tools/llvm-reduce/deltas/ReduceMetadata.cpp new file mode 100644 index 000000000000..4ea223546efa --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceMetadata.cpp @@ -0,0 +1,138 @@ +//===- ReduceMetadata.cpp - Specialized Delta Pass ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements two functions used by the Generic Delta Debugging +// Algorithm, which are used to reduce Metadata nodes. +// +//===----------------------------------------------------------------------===// + +#include "ReduceMetadata.h" +#include "Delta.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +using namespace llvm; + +/// Adds all Unnamed Metadata Nodes that are inside desired Chunks to set +template +static void getChunkMetadataNodes(T &MDUser, int &I, + const std::vector &ChunksToKeep, + std::set &SeenNodes, + std::set &NodesToKeep) { + SmallVector, 4> MDs; + MDUser.getAllMetadata(MDs); + for (auto &MD : MDs) { + SeenNodes.insert(MD.second); + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(SeenNodes.size())) + NodesToKeep.insert(MD.second); + if (ChunksToKeep[I].end == (int)SeenNodes.size()) + ++I; + } + } +} + +/// Erases out-of-chunk unnamed metadata nodes from its user +template +static void eraseMetadataIfOutsideChunk(T &MDUser, + const std::set &NodesToKeep) { + SmallVector, 4> MDs; + MDUser.getAllMetadata(MDs); + for (int I = 0, E = MDs.size(); I != E; ++I) + if (!NodesToKeep.count(MDs[I].second)) + MDUser.setMetadata(I, NULL); +} + +/// Removes all the Named and Unnamed Metadata Nodes, as well as any debug +/// functions that aren't inside the desired Chunks. +static void extractMetadataFromModule(const std::vector &ChunksToKeep, + Module *Program) { + std::set SeenNodes; + std::set NodesToKeep; + int I = 0; + + // Add chunk MDNodes used by GVs, Functions, and Instructions to set + for (auto &GV : Program->globals()) + getChunkMetadataNodes(GV, I, ChunksToKeep, SeenNodes, NodesToKeep); + + for (auto &F : *Program) { + getChunkMetadataNodes(F, I, ChunksToKeep, SeenNodes, NodesToKeep); + for (auto &BB : F) + for (auto &Inst : BB) + getChunkMetadataNodes(Inst, I, ChunksToKeep, SeenNodes, NodesToKeep); + } + + // Once more, go over metadata nodes, but deleting the ones outside chunks + for (auto &GV : Program->globals()) + eraseMetadataIfOutsideChunk(GV, NodesToKeep); + + for (auto &F : *Program) { + eraseMetadataIfOutsideChunk(F, NodesToKeep); + for (auto &BB : F) + for (auto &Inst : BB) + eraseMetadataIfOutsideChunk(Inst, NodesToKeep); + } + + + // Get out-of-chunk Named metadata nodes + unsigned MetadataCount = SeenNodes.size(); + std::vector NamedNodesToDelete; + for (auto &MD : Program->named_metadata()) { + if (I < (int)ChunksToKeep.size()) { + if (!ChunksToKeep[I].contains(++MetadataCount)) + NamedNodesToDelete.push_back(&MD); + if (ChunksToKeep[I].end == (int)SeenNodes.size()) + ++I; + } else + NamedNodesToDelete.push_back(&MD); + } + + for (auto *NN : NamedNodesToDelete) { + for (int I = 0, E = NN->getNumOperands(); I != E; ++I) + NN->setOperand(I, NULL); + NN->eraseFromParent(); + } +} + +// Gets unnamed metadata nodes used by a given instruction/GV/function and adds +// them to the set of seen nodes +template +static void addMetadataToSet(T &MDUser, std::set &UnnamedNodes) { + SmallVector, 4> MDs; + MDUser.getAllMetadata(MDs); + for (auto &MD : MDs) + UnnamedNodes.insert(MD.second); +} + +/// Returns the amount of Named and Unnamed Metadata Nodes +static int countMetadataTargets(Module *Program) { + std::set UnnamedNodes; + int NamedMetadataNodes = Program->named_metadata_size(); + + // Get metadata nodes used by globals + for (auto &GV : Program->globals()) + addMetadataToSet(GV, UnnamedNodes); + + // Do the same for nodes used by functions & instructions + for (auto &F : *Program) { + addMetadataToSet(F, UnnamedNodes); + for (auto &BB : F) + for (auto &I : BB) + addMetadataToSet(I, UnnamedNodes); + } + + return UnnamedNodes.size() + NamedMetadataNodes; +} + +void llvm::reduceMetadataDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Metadata...\n"; + int MDCount = countMetadataTargets(Test.getProgram()); + runDeltaPass(Test, MDCount, extractMetadataFromModule); + outs() << "----------------------------\n"; +} diff --git a/tools/llvm-reduce/deltas/ReduceMetadata.h b/tools/llvm-reduce/deltas/ReduceMetadata.h new file mode 100644 index 000000000000..275b44c2aa7d --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceMetadata.h @@ -0,0 +1,18 @@ +//===- ReduceMetadata.h - Specialized Delta Pass ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements two functions used by the Generic Delta Debugging +// Algorithm, which are used to reduce Metadata nodes. +// +//===----------------------------------------------------------------------===// + +#include "TestRunner.h" + +namespace llvm { +void reduceMetadataDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/llvm-reduce.cpp b/tools/llvm-reduce/llvm-reduce.cpp new file mode 100644 index 000000000000..83dcf980a786 --- /dev/null +++ b/tools/llvm-reduce/llvm-reduce.cpp @@ -0,0 +1,114 @@ +//===- llvm-reduce.cpp - The LLVM Delta Reduction utility -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program tries to reduce an IR test case for a given interesting-ness +// test. It runs multiple delta debugging passes in order to minimize the input +// file. It's worth noting that this is a part of the bugpoint redesign +// proposal, and thus a *temporary* tool that will eventually be integrated +// into the bugpoint tool itself. +// +//===----------------------------------------------------------------------===// + +#include "DeltaManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); +static cl::opt Version("v", cl::desc("Alias for -version"), cl::Hidden); + +static cl::opt InputFilename(cl::Positional, cl::Required, + cl::desc("")); + +static cl::opt + TestFilename("test", cl::Required, + cl::desc("Name of the interesting-ness test to be run")); + +static cl::list + TestArguments("test-arg", cl::ZeroOrMore, + cl::desc("Arguments passed onto the interesting-ness test")); + +static cl::opt + OutputFilename("output", + cl::desc("Specify the output file. default: reduced.ll")); +static cl::alias OutputFileAlias("o", cl::desc("Alias for -output"), + cl::aliasopt(OutputFilename)); + +static cl::opt + ReplaceInput("in-place", + cl::desc("WARNING: This option will replace your input file" + "with the reduced version!")); + +// Parses IR into a Module and verifies it +static std::unique_ptr parseInputFile(StringRef Filename, + LLVMContext &Ctxt) { + SMDiagnostic Err; + std::unique_ptr Result = parseIRFile(Filename, Err, Ctxt); + if (!Result) { + Err.print("llvm-reduce", errs()); + return Result; + } + + if (verifyModule(*Result, &errs())) { + errs() << "Error: " << Filename << " - input module is broken!\n"; + return std::unique_ptr(); + } + + return Result; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "LLVM automatic testcase reducer.\n"); + + LLVMContext Context; + std::unique_ptr OriginalProgram = + parseInputFile(InputFilename, Context); + + // Initialize test environment + TestRunner Tester(TestFilename, TestArguments); + Tester.setProgram(std::move(OriginalProgram)); + + // Try to reduce code + runDeltaPasses(Tester); + + if (!Tester.getProgram()) { + errs() << "\nCouldnt reduce input :/\n"; + } else { + // Print reduced file to STDOUT + if (OutputFilename == "-") + Tester.getProgram()->print(outs(), nullptr); + else { + if (ReplaceInput) // In-place + OutputFilename = InputFilename.c_str(); + else if (OutputFilename.empty()) + OutputFilename = "reduced.ll"; + + std::error_code EC; + raw_fd_ostream Out(OutputFilename, EC); + if (EC) { + errs() << "Error opening output file: " << EC.message() << "!\n"; + exit(1); + } + Tester.getProgram()->print(Out, /*AnnotationWriter=*/nullptr); + errs() << "\nDone reducing! Reduced testcase: " << OutputFilename << "\n"; + } + } + + return 0; +} diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp index a7cc1deb8cf6..3a36e7709483 100644 --- a/tools/llvm-rtdyld/llvm-rtdyld.cpp +++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp @@ -27,12 +27,13 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Memory.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Path.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include @@ -138,8 +139,21 @@ PrintAllocationRequests("print-alloc-requests", "manager by RuntimeDyld"), cl::Hidden); +static cl::opt ShowTimes("show-times", + cl::desc("Show times for llvm-rtdyld phases"), + cl::init(false)); + ExitOnError ExitOnErr; +struct RTDyldTimers { + TimerGroup RTDyldTG{"llvm-rtdyld timers", "timers for llvm-rtdyld phases"}; + Timer LoadObjectsTimer{"load", "time to load/add object files", RTDyldTG}; + Timer LinkTimer{"link", "time to link object files", RTDyldTG}; + Timer RunTimer{"run", "time to execute jitlink'd code", RTDyldTG}; +}; + +std::unique_ptr Timers; + /* *** */ using SectionIDMap = StringMap; @@ -441,8 +455,6 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) { continue; } object::section_iterator Sec = *SecOrErr; - StringRef SecName; - Sec->getName(SecName); Address.SectionIndex = Sec->getIndex(); uint64_t SectionLoadAddress = LoadedObjInfo->getSectionLoadAddress(*Sec); @@ -491,35 +503,41 @@ static int executeInput() { // If we don't have any input files, read from stdin. if (!InputFileList.size()) InputFileList.push_back("-"); - for (auto &File : InputFileList) { - // Load the input memory buffer. - ErrorOr> InputBuffer = - MemoryBuffer::getFileOrSTDIN(File); - if (std::error_code EC = InputBuffer.getError()) - ErrorAndExit("unable to read input: '" + EC.message() + "'"); - Expected> MaybeObj( - ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef())); - - if (!MaybeObj) { - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(MaybeObj.takeError(), OS); - OS.flush(); - ErrorAndExit("unable to create object file: '" + Buf + "'"); - } + { + TimeRegion TR(Timers ? &Timers->LoadObjectsTimer : nullptr); + for (auto &File : InputFileList) { + // Load the input memory buffer. + ErrorOr> InputBuffer = + MemoryBuffer::getFileOrSTDIN(File); + if (std::error_code EC = InputBuffer.getError()) + ErrorAndExit("unable to read input: '" + EC.message() + "'"); + Expected> MaybeObj( + ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef())); + + if (!MaybeObj) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(MaybeObj.takeError(), OS); + OS.flush(); + ErrorAndExit("unable to create object file: '" + Buf + "'"); + } - ObjectFile &Obj = **MaybeObj; + ObjectFile &Obj = **MaybeObj; - // Load the object file - Dyld.loadObject(Obj); - if (Dyld.hasError()) { - ErrorAndExit(Dyld.getErrorString()); + // Load the object file + Dyld.loadObject(Obj); + if (Dyld.hasError()) { + ErrorAndExit(Dyld.getErrorString()); + } } } - // Resove all the relocations we can. - // FIXME: Error out if there are unresolved relocations. - Dyld.resolveRelocations(); + { + TimeRegion TR(Timers ? &Timers->LinkTimer : nullptr); + // Resove all the relocations we can. + // FIXME: Error out if there are unresolved relocations. + Dyld.resolveRelocations(); + } // Get the address of the entry point (_main by default). void *MainAddress = Dyld.getSymbolLocalAddress(EntryPoint); @@ -551,7 +569,13 @@ static int executeInput() { for (auto &Arg : InputArgv) Argv.push_back(Arg.data()); Argv.push_back(nullptr); - return Main(Argv.size() - 1, Argv.data()); + int Result = 0; + { + TimeRegion TR(Timers ? &Timers->RunTimer : nullptr); + Result = Main(Argv.size() - 1, Argv.data()); + } + + return Result; } static int checkAllExpressions(RuntimeDyldChecker &Checker) { @@ -891,7 +915,7 @@ static int linkAndVerify() { ObjectFile &Obj = **MaybeObj; if (!Checker) - Checker = llvm::make_unique( + Checker = std::make_unique( IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, GetStubInfo, Obj.isLittleEndian() ? support::little : support::big, Disassembler.get(), InstPrinter.get(), dbgs()); @@ -937,16 +961,28 @@ int main(int argc, char **argv) { ExitOnErr.setBanner(std::string(argv[0]) + ": "); + Timers = ShowTimes ? std::make_unique() : nullptr; + + int Result; switch (Action) { case AC_Execute: - return executeInput(); + Result = executeInput(); + break; case AC_PrintDebugLineInfo: - return printLineInfoForInput(/* LoadObjects */ true,/* UseDebugObj */ true); + Result = + printLineInfoForInput(/* LoadObjects */ true, /* UseDebugObj */ true); + break; case AC_PrintLineInfo: - return printLineInfoForInput(/* LoadObjects */ true,/* UseDebugObj */false); + Result = + printLineInfoForInput(/* LoadObjects */ true, /* UseDebugObj */ false); + break; case AC_PrintObjectLineInfo: - return printLineInfoForInput(/* LoadObjects */false,/* UseDebugObj */false); + Result = + printLineInfoForInput(/* LoadObjects */ false, /* UseDebugObj */ false); + break; case AC_Verify: - return linkAndVerify(); + Result = linkAndVerify(); + break; } + return Result; } diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp index a455bf13fe7b..5f36a785332b 100644 --- a/tools/llvm-stress/llvm-stress.cpp +++ b/tools/llvm-stress/llvm-stress.cpp @@ -735,7 +735,7 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n"); llvm_shutdown_obj Y; - auto M = llvm::make_unique("/tmp/autogen.bc", Context); + auto M = std::make_unique("/tmp/autogen.bc", Context); Function *F = GenEmptyFunction(M.get()); // Pick an initial seed value @@ -752,7 +752,7 @@ int main(int argc, char **argv) { OutputFilename = "-"; std::error_code EC; - Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); if (EC) { errs() << EC.message() << '\n'; return 1; diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp index ea94cf9b69a1..54ce87d47979 100644 --- a/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -55,6 +55,10 @@ static cl::opt cl::desc("Interpret addresses as relative addresses"), cl::ReallyHidden); +static cl::opt ClUntagAddresses( + "untag-addresses", cl::init(true), + cl::desc("Remove memory tags from addresses before symbolization")); + static cl::opt ClPrintInlining("inlining", cl::init(true), cl::desc("Print all inlined frames for a given address")); @@ -274,6 +278,7 @@ int main(int argc, char **argv) { ClDemangle.setInitialValue(false); ClPrintFunctions.setInitialValue(FunctionNameKind::None); ClPrintInlining.setInitialValue(false); + ClUntagAddresses.setInitialValue(false); ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU); } @@ -290,6 +295,7 @@ int main(int argc, char **argv) { Opts.UseSymbolTable = ClUseSymbolTable; Opts.Demangle = ClDemangle; Opts.RelativeAddresses = ClUseRelativeAddress; + Opts.UntagAddresses = ClUntagAddresses; Opts.DefaultArch = ClDefaultArch; Opts.FallbackDebugPath = ClFallbackDebugPath; Opts.DWPName = ClDwpName; diff --git a/tools/llvm-xray/func-id-helper.cpp b/tools/llvm-xray/func-id-helper.cpp index dc821a420c67..afc912a6398e 100644 --- a/tools/llvm-xray/func-id-helper.cpp +++ b/tools/llvm-xray/func-id-helper.cpp @@ -36,7 +36,7 @@ std::string FuncIdConversionHelper::SymbolOrNumber(int32_t FuncId) const { ModuleAddress.SectionIndex = object::SectionedAddress::UndefSection; if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, ModuleAddress)) { auto &DI = *ResOrErr; - if (DI.FunctionName == "") + if (DI.FunctionName == DILineInfo::BadString) F << "@(" << std::hex << It->second << ")"; else F << DI.FunctionName; diff --git a/tools/llvm-xray/xray-account.cpp b/tools/llvm-xray/xray-account.cpp index 2b49a311d7e3..e37cd212377a 100644 --- a/tools/llvm-xray/xray-account.cpp +++ b/tools/llvm-xray/xray-account.cpp @@ -421,7 +421,7 @@ static CommandRegistration Unused(&Account, []() -> Error { } std::error_code EC; - raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::F_Text); + raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + AccountOutput + "' for writing.", EC); diff --git a/tools/llvm-xray/xray-converter.cpp b/tools/llvm-xray/xray-converter.cpp index dfc757e0f276..7258245b95cc 100644 --- a/tools/llvm-xray/xray-converter.cpp +++ b/tools/llvm-xray/xray-converter.cpp @@ -387,8 +387,8 @@ static CommandRegistration Unused(&Convert, []() -> Error { std::error_code EC; raw_fd_ostream OS(ConvertOutput, EC, ConvertOutputFormat == ConvertFormats::BINARY - ? sys::fs::OpenFlags::F_None - : sys::fs::OpenFlags::F_Text); + ? sys::fs::OpenFlags::OF_None + : sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + ConvertOutput + "' for writing.", EC); diff --git a/tools/llvm-xray/xray-extract.cpp b/tools/llvm-xray/xray-extract.cpp index 7c7d26b5a389..7800b88d9eeb 100644 --- a/tools/llvm-xray/xray-extract.cpp +++ b/tools/llvm-xray/xray-extract.cpp @@ -80,7 +80,7 @@ static CommandRegistration Unused(&Extract, []() -> Error { InstrumentationMapOrError.takeError()); std::error_code EC; - raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::F_Text); + raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC); diff --git a/tools/llvm-xray/xray-fdr-dump.cpp b/tools/llvm-xray/xray-fdr-dump.cpp index 81a93cac57c4..295f7a78765f 100644 --- a/tools/llvm-xray/xray-fdr-dump.cpp +++ b/tools/llvm-xray/xray-fdr-dump.cpp @@ -51,7 +51,7 @@ static CommandRegistration Unused(&Dump, []() -> Error { sys::fs::closeFile(*FDOrErr); DataExtractor DE(StringRef(MappedFile.data(), MappedFile.size()), true, 8); - uint32_t OffsetPtr = 0; + uint64_t OffsetPtr = 0; auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr); if (!FileHeaderOrError) diff --git a/tools/llvm-xray/xray-graph-diff.cpp b/tools/llvm-xray/xray-graph-diff.cpp index a514be97f40b..116aa6869ec1 100644 --- a/tools/llvm-xray/xray-graph-diff.cpp +++ b/tools/llvm-xray/xray-graph-diff.cpp @@ -470,7 +470,7 @@ static CommandRegistration Unused(&GraphDiff, []() -> Error { auto &GDR = *GDROrErr; std::error_code EC; - raw_fd_ostream OS(GraphDiffOutput, EC, sys::fs::OpenFlags::F_Text); + raw_fd_ostream OS(GraphDiffOutput, EC, sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + GraphDiffOutput + "' for writing.", EC); diff --git a/tools/llvm-xray/xray-graph.cpp b/tools/llvm-xray/xray-graph.cpp index c09357fcb502..0be511219c1a 100644 --- a/tools/llvm-xray/xray-graph.cpp +++ b/tools/llvm-xray/xray-graph.cpp @@ -506,7 +506,7 @@ static CommandRegistration Unused(&GraphC, []() -> Error { auto &GR = *GROrError; std::error_code EC; - raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::F_Text); + raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + GraphOutput + "' for writing.", EC); diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index ccf8b073b82b..15495a511d06 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -523,7 +523,6 @@ int main(int argc, char **argv) { initializeDwarfEHPreparePass(Registry); initializeSafeStackLegacyPassPass(Registry); initializeSjLjEHPreparePass(Registry); - initializeStackProtectorPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeGlobalMergePass(Registry); initializeIndirectBrExpandPassPass(Registry); @@ -612,7 +611,9 @@ int main(int argc, char **argv) { OutputFilename = "-"; std::error_code EC; - Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + sys::fs::OpenFlags Flags = OutputAssembly ? sys::fs::OF_Text + : sys::fs::OF_None; + Out.reset(new ToolOutputFile(OutputFilename, EC, Flags)); if (EC) { errs() << EC.message() << '\n'; return 1; @@ -620,7 +621,7 @@ int main(int argc, char **argv) { if (!ThinLinkBitcodeFile.empty()) { ThinLinkOut.reset( - new ToolOutputFile(ThinLinkBitcodeFile, EC, sys::fs::F_None)); + new ToolOutputFile(ThinLinkBitcodeFile, EC, sys::fs::OF_None)); if (EC) { errs() << EC.message() << '\n'; return 1; @@ -720,8 +721,8 @@ int main(int argc, char **argv) { OutputFilename = "-"; std::error_code EC; - Out = llvm::make_unique(OutputFilename, EC, - sys::fs::F_None); + Out = std::make_unique(OutputFilename, EC, + sys::fs::OF_None); if (EC) { errs() << EC.message() << '\n'; return 1; @@ -867,7 +868,7 @@ int main(int argc, char **argv) { assert(Out); OS = &Out->os(); if (RunTwice) { - BOS = make_unique(Buffer); + BOS = std::make_unique(Buffer); OS = BOS.get(); } if (OutputAssembly) { diff --git a/tools/vfabi-demangle-fuzzer/CMakeLists.txt b/tools/vfabi-demangle-fuzzer/CMakeLists.txt new file mode 100644 index 000000000000..908364690f5e --- /dev/null +++ b/tools/vfabi-demangle-fuzzer/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS + Analysis + Support +) +add_llvm_fuzzer(vfabi-demangler-fuzzer + vfabi-demangler-fuzzer.cpp +) diff --git a/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp b/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp new file mode 100644 index 000000000000..13657effbbeb --- /dev/null +++ b/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp @@ -0,0 +1,26 @@ +//===-- vfabi-demangler-fuzzer.cpp - Fuzzer VFABI using lib/Fuzzer ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Build tool to fuzz the demangler for the vector function ABI names. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/VectorUtils.h" + +using namespace llvm; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + const StringRef MangledName((const char *)Data, Size); + const auto Info = VFABI::tryDemangleForVFABI(MangledName); + + // Do not optimize away the return value. Inspired by + // https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307-L345 + asm volatile("" : : "r,m"(Info) : "memory"); + + return 0; +} diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index 146d10835b8d..1d39b300091f 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -1111,6 +1111,7 @@ static std::string getEnumNameForToken(StringRef Str) { case '<': Res += "_LT_"; break; case '>': Res += "_GT_"; break; case '-': Res += "_MINUS_"; break; + case '#': Res += "_HASH_"; break; default: if ((*it >= 'A' && *it <= 'Z') || (*it >= 'a' && *it <= 'z') || @@ -1439,7 +1440,7 @@ void AsmMatcherInfo::buildOperandMatchInfo() { /// Map containing a mask with all operands indices that can be found for /// that class inside a instruction. - typedef std::map> OpClassMaskTy; + typedef std::map>> OpClassMaskTy; OpClassMaskTy OpClassMask; for (const auto &MI : Matchables) { @@ -1515,7 +1516,7 @@ void AsmMatcherInfo::buildInfo() { if (!V.empty() && V != Variant.Name) continue; - auto II = llvm::make_unique(*CGI); + auto II = std::make_unique(*CGI); II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst); @@ -1532,7 +1533,7 @@ void AsmMatcherInfo::buildInfo() { std::vector AllInstAliases = Records.getAllDerivedDefinitions("InstAlias"); for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) { - auto Alias = llvm::make_unique(AllInstAliases[i], + auto Alias = std::make_unique(AllInstAliases[i], Target); // If the tblgen -match-prefix option is specified (for tblgen hackers), @@ -1546,7 +1547,7 @@ void AsmMatcherInfo::buildInfo() { if (!V.empty() && V != Variant.Name) continue; - auto II = llvm::make_unique(std::move(Alias)); + auto II = std::make_unique(std::move(Alias)); II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst); @@ -1615,7 +1616,7 @@ void AsmMatcherInfo::buildInfo() { II->TheDef->getValueAsString("TwoOperandAliasConstraint"); if (Constraint != "") { // Start by making a copy of the original matchable. - auto AliasII = llvm::make_unique(*II); + auto AliasII = std::make_unique(*II); // Adjust it to be a two-operand alias. AliasII->formTwoOperandAlias(Constraint); @@ -2381,7 +2382,7 @@ static void emitMatchClassEnumeration(CodeGenTarget &Target, OS << " NumMatchClassKinds\n"; OS << "};\n\n"; - OS << "}\n\n"; + OS << "} // end anonymous namespace\n\n"; } /// emitMatchClassDiagStrings - Emit a function to get the diagnostic text to be @@ -2866,7 +2867,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, OS << " }\n"; OS << " };\n"; - OS << "} // end anonymous namespace.\n\n"; + OS << "} // end anonymous namespace\n\n"; OS << "static const OperandMatchEntry OperandMatchTable[" << Info.OperandMatchInfo.size() << "] = {\n"; @@ -3366,7 +3367,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n"; } OS << "};\n\n" - << "const static FeatureBitset FeatureBitsets[] {\n" + << "static constexpr FeatureBitset FeatureBitsets[] = {\n" << " {}, // AMFBS_None\n"; for (const auto &FeatureBitset : FeatureBitsets) { if (FeatureBitset.empty()) @@ -3422,7 +3423,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " }\n"; OS << " };\n"; - OS << "} // end anonymous namespace.\n\n"; + OS << "} // end anonymous namespace\n\n"; unsigned VariantCount = Target.getAsmParserVariantCount(); for (unsigned VC = 0; VC != VariantCount; ++VC) { diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp index 05d81f133505..b5c7f35be0e5 100644 --- a/utils/TableGen/AsmWriterEmitter.cpp +++ b/utils/TableGen/AsmWriterEmitter.cpp @@ -784,8 +784,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { continue; // Aliases with priority 0 are never emitted. const DagInit *DI = R->getValueAsDag("ResultInst"); - const DefInit *Op = cast(DI->getOperator()); - AliasMap[getQualifiedName(Op->getDef())].insert( + AliasMap[getQualifiedName(DI->getOperatorAsDef(R->getLoc()))].insert( std::make_pair(CodeGenInstAlias(R, Target), Priority)); } diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp index de5044e24d49..9eabb44d9004 100644 --- a/utils/TableGen/CallingConvEmitter.cpp +++ b/utils/TableGen/CallingConvEmitter.cpp @@ -264,6 +264,10 @@ void CallingConvEmitter::EmitAction(Record *Action, Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; O << IndentStr << "LocInfo = CCValAssign::BCvt;\n"; + } else if (Action->isSubClassOf("CCTruncToType")) { + Record *DestTy = Action->getValueAsDef("DestTy"); + O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; + O << IndentStr << "LocInfo = CCValAssign::Trunc;\n"; } else if (Action->isSubClassOf("CCPassIndirect")) { Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp index da65763905a8..42f69cb253d2 100644 --- a/utils/TableGen/CodeEmitterGen.cpp +++ b/utils/TableGen/CodeEmitterGen.cpp @@ -16,6 +16,7 @@ #include "CodeGenTarget.h" #include "SubtargetFeatureInfo.h" #include "Types.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Casting.h" @@ -45,12 +46,19 @@ public: private: int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); std::string getInstructionCase(Record *R, CodeGenTarget &Target); + std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, + CodeGenTarget &Target); void AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, unsigned &NumberedOp, std::set &NamedOpIndices, std::string &Case, CodeGenTarget &Target); + void emitInstructionBaseValues( + raw_ostream &o, ArrayRef NumberedInstructions, + CodeGenTarget &Target, int HwMode = -1); + unsigned BitWidth; + bool UseAPInt; }; // If the VarBitInit at position 'bit' matches the specified variable then @@ -126,7 +134,10 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, std::pair SO = CGI.Operands.getSubOperandNumber(OpIdx); std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName; - + + if (UseAPInt) + Case += " op.clearAllBits();\n"; + // If the source operand has a custom encoder, use it. This will // get the encoding for all of the suboperands. if (!EncoderMethodName.empty()) { @@ -134,18 +145,54 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, // sub-operands, if there are more than one, so only // query the encoder once per source operand. if (SO.second == 0) { - Case += " // op: " + VarName + "\n" + - " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); - Case += ", Fixups, STI"; - Case += ");\n"; + Case += " // op: " + VarName + "\n"; + if (UseAPInt) { + Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); + Case += ", op"; + } else { + Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); + } + Case += ", Fixups, STI);\n"; } } else { - Case += " // op: " + VarName + "\n" + - " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; - Case += ", Fixups, STI"; + Case += " // op: " + VarName + "\n"; + if (UseAPInt) { + Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; + Case += ", op, Fixups, STI"; + } else { + Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; + Case += ", Fixups, STI"; + } Case += ");\n"; } - + + // Precalculate the number of lits this variable contributes to in the + // operand. If there is a single lit (consecutive range of bits) we can use a + // destructive sequence on APInt that reduces memory allocations. + int numOperandLits = 0; + for (int tmpBit = bit; tmpBit >= 0;) { + int varBit = getVariableBit(VarName, BI, tmpBit); + + // If this bit isn't from a variable, skip it. + if (varBit == -1) { + --tmpBit; + continue; + } + + // Figure out the consecutive range of bits covered by this operand, in + // order to generate better encoding code. + int beginVarBit = varBit; + int N = 1; + for (--tmpBit; tmpBit >= 0;) { + varBit = getVariableBit(VarName, BI, tmpBit); + if (varBit == -1 || varBit != (beginVarBit - N)) + break; + ++N; + --tmpBit; + } + ++numOperandLits; + } + for (; bit >= 0; ) { int varBit = getVariableBit(VarName, BI, bit); @@ -166,20 +213,52 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, ++N; --bit; } - - uint64_t opMask = ~(uint64_t)0 >> (64-N); - int opShift = beginVarBit - N + 1; - opMask <<= opShift; - opShift = beginInstBit - beginVarBit; - - if (opShift > 0) { - Case += " Value |= (op & UINT64_C(" + utostr(opMask) + ")) << " + - itostr(opShift) + ";\n"; - } else if (opShift < 0) { - Case += " Value |= (op & UINT64_C(" + utostr(opMask) + ")) >> " + - itostr(-opShift) + ";\n"; + + std::string maskStr; + int opShift; + + unsigned loBit = beginVarBit - N + 1; + unsigned hiBit = loBit + N; + unsigned loInstBit = beginInstBit - N + 1; + if (UseAPInt) { + std::string extractStr; + if (N >= 64) { + extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + + itostr(loBit) + ")"; + Case += " Value.insertBits(" + extractStr + ", " + + itostr(loInstBit) + ");\n"; + } else { + extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + + ", " + itostr(loBit) + ")"; + Case += " Value.insertBits(" + extractStr + ", " + + itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; + } } else { - Case += " Value |= op & UINT64_C(" + utostr(opMask) + ");\n"; + uint64_t opMask = ~(uint64_t)0 >> (64 - N); + opShift = beginVarBit - N + 1; + opMask <<= opShift; + maskStr = "UINT64_C(" + utostr(opMask) + ")"; + opShift = beginInstBit - beginVarBit; + + if (numOperandLits == 1) { + Case += " op &= " + maskStr + ";\n"; + if (opShift > 0) { + Case += " op <<= " + itostr(opShift) + ";\n"; + } else if (opShift < 0) { + Case += " op >>= " + itostr(-opShift) + ";\n"; + } + Case += " Value |= op;\n"; + } else { + if (opShift > 0) { + Case += " Value |= (op & " + maskStr + ") << " + + itostr(opShift) + ";\n"; + } else if (opShift < 0) { + Case += " Value |= (op & " + maskStr + ") >> " + + itostr(-opShift) + ";\n"; + } else { + Case += " Value |= (op & " + maskStr + ");\n"; + } + } } } } @@ -187,7 +266,29 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, std::string CodeEmitterGen::getInstructionCase(Record *R, CodeGenTarget &Target) { std::string Case; - BitsInit *BI = R->getValueAsBitsInit("Inst"); + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + Case += " switch (HwMode) {\n"; + Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; + for (auto &KV : EBM.Map) { + Case += " case " + itostr(KV.first) + ": {\n"; + Case += getInstructionCaseForEncoding(R, KV.second, Target); + Case += " break;\n"; + Case += " }\n"; + } + Case += " }\n"; + return Case; + } + } + return getInstructionCaseForEncoding(R, R, Target); +} + +std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef, + CodeGenTarget &Target) { + std::string Case; + BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); unsigned NumberedOp = 0; std::set NamedOpIndices; @@ -207,7 +308,7 @@ std::string CodeEmitterGen::getInstructionCase(Record *R, // Loop over all of the fields in the instruction, determining which are the // operands to the instruction. - for (const RecordVal &RV : R->getValues()) { + for (const RecordVal &RV : EncodingDef->getValues()) { // Ignore fixed fields in the record, we're looking for values like: // bits<5> RST = { ?, ?, ?, ?, ? }; if (RV.getPrefix() || RV.getValue()->isComplete()) @@ -237,6 +338,54 @@ getNameForFeatureBitset(const std::vector &FeatureBitset) { return Name; } +static void emitInstBits(raw_ostream &OS, const APInt &Bits) { + for (unsigned I = 0; I < Bits.getNumWords(); ++I) + OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) + << ")"; +} + +void CodeEmitterGen::emitInstructionBaseValues( + raw_ostream &o, ArrayRef NumberedInstructions, + CodeGenTarget &Target, int HwMode) { + const CodeGenHwModes &HWM = Target.getHwModes(); + if (HwMode == -1) + o << " static const uint64_t InstBits[] = {\n"; + else + o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name + << "[] = {\n"; + + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + + if (R->getValueAsString("Namespace") == "TargetOpcode" || + R->getValueAsBit("isPseudo")) { + o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; + continue; + } + + Record *EncodingDef = R; + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + if (EBM.hasMode(HwMode)) + EncodingDef = EBM.get(HwMode); + } + } + BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); + + // Start by filling in fixed values. + APInt Value(BitWidth, 0); + for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { + if (BitInit *B = dyn_cast(BI->getBit(e - i - 1))) + Value |= APInt(BitWidth, (uint64_t)B->getValue()) << (e - i - 1); + } + o << " "; + emitInstBits(o, Value); + o << "," << '\t' << "// " << R->getName() << "\n"; + } + o << " UINT64_C(0)\n };\n"; +} + void CodeEmitterGen::run(raw_ostream &o) { CodeGenTarget Target(Records); std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); @@ -247,34 +396,66 @@ void CodeEmitterGen::run(raw_ostream &o) { ArrayRef NumberedInstructions = Target.getInstructionsByEnumValue(); - // Emit function declaration - o << "uint64_t " << Target.getName(); - o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" - << " SmallVectorImpl &Fixups,\n" - << " const MCSubtargetInfo &STI) const {\n"; - - // Emit instruction base values - o << " static const uint64_t InstBits[] = {\n"; + const CodeGenHwModes &HWM = Target.getHwModes(); + // The set of HwModes used by instruction encodings. + std::set HwModes; + BitWidth = 0; for (const CodeGenInstruction *CGI : NumberedInstructions) { Record *R = CGI->TheDef; - if (R->getValueAsString("Namespace") == "TargetOpcode" || - R->getValueAsBit("isPseudo")) { - o << " UINT64_C(0),\n"; + R->getValueAsBit("isPseudo")) continue; - } + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM.Map) { + BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); + BitWidth = std::max(BitWidth, BI->getNumBits()); + HwModes.insert(KV.first); + } + continue; + } + } BitsInit *BI = R->getValueAsBitsInit("Inst"); + BitWidth = std::max(BitWidth, BI->getNumBits()); + } + UseAPInt = BitWidth > 64; + + // Emit function declaration + if (UseAPInt) { + o << "void " << Target.getName() + << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" + << " SmallVectorImpl &Fixups,\n" + << " APInt &Inst,\n" + << " APInt &Scratch,\n" + << " const MCSubtargetInfo &STI) const {\n"; + } else { + o << "uint64_t " << Target.getName(); + o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" + << " SmallVectorImpl &Fixups,\n" + << " const MCSubtargetInfo &STI) const {\n"; + } + + // Emit instruction base values + if (HwModes.empty()) { + emitInstructionBaseValues(o, NumberedInstructions, Target, -1); + } else { + for (unsigned HwMode : HwModes) + emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); + } - // Start by filling in fixed values. - uint64_t Value = 0; - for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { - if (BitInit *B = dyn_cast(BI->getBit(e-i-1))) - Value |= (uint64_t)B->getValue() << (e-i-1); + if (!HwModes.empty()) { + o << " const uint64_t *InstBits;\n"; + o << " unsigned HwMode = STI.getHwMode();\n"; + o << " switch (HwMode) {\n"; + o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; + for (unsigned I : HwModes) { + o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name + << "; break;\n"; } - o << " UINT64_C(" << Value << ")," << '\t' << "// " << R->getName() << "\n"; + o << " };\n"; } - o << " UINT64_C(0)\n };\n"; // Map to accumulate all the cases. std::map> CaseMap; @@ -294,11 +475,26 @@ void CodeEmitterGen::run(raw_ostream &o) { } // Emit initial function code - o << " const unsigned opcode = MI.getOpcode();\n" - << " uint64_t Value = InstBits[opcode];\n" - << " uint64_t op = 0;\n" - << " (void)op; // suppress warning\n" - << " switch (opcode) {\n"; + if (UseAPInt) { + int NumWords = APInt::getNumWords(BitWidth); + int NumBytes = (BitWidth + 7) / 8; + o << " const unsigned opcode = MI.getOpcode();\n" + << " if (Inst.getBitWidth() != " << BitWidth << ")\n" + << " Inst = Inst.zext(" << BitWidth << ");\n" + << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" + << " Scratch = Scratch.zext(" << BitWidth << ");\n" + << " LoadIntFromMemory(Inst, (uint8_t*)&InstBits[opcode * " << NumWords + << "], " << NumBytes << ");\n" + << " APInt &Value = Inst;\n" + << " APInt &op = Scratch;\n" + << " switch (opcode) {\n"; + } else { + o << " const unsigned opcode = MI.getOpcode();\n" + << " uint64_t Value = InstBits[opcode];\n" + << " uint64_t op = 0;\n" + << " (void)op; // suppress warning\n" + << " switch (opcode) {\n"; + } // Emit each case statement std::map>::iterator IE, EE; @@ -322,9 +518,12 @@ void CodeEmitterGen::run(raw_ostream &o) { << " raw_string_ostream Msg(msg);\n" << " Msg << \"Not supported instr: \" << MI;\n" << " report_fatal_error(Msg.str());\n" - << " }\n" - << " return Value;\n" - << "}\n\n"; + << " }\n"; + if (UseAPInt) + o << " Inst = Value;\n"; + else + o << " return Value;\n"; + o << "}\n\n"; const auto &All = SubtargetFeatureInfo::getAll(Records); std::map SubtargetFeatures; @@ -385,8 +584,8 @@ void CodeEmitterGen::run(raw_ostream &o) { o << " " << getNameForFeatureBitset(FeatureBitset) << ",\n"; } o << "};\n\n" - << "const static FeatureBitset FeatureBitsets[] {\n" - << " {}, // CEFBS_None\n"; + << "static constexpr FeatureBitset FeatureBitsets[] = {\n" + << " {}, // CEFBS_None\n"; for (const auto &FeatureBitset : FeatureBitsets) { if (FeatureBitset.empty()) continue; diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index c8f710d66a03..46f986ca0176 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -769,7 +769,10 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out, for (MVT T : MVT::integer_valuetypes()) if (Legal.count(T)) Out.insert(T); - for (MVT T : MVT::integer_vector_valuetypes()) + for (MVT T : MVT::integer_fixedlen_vector_valuetypes()) + if (Legal.count(T)) + Out.insert(T); + for (MVT T : MVT::integer_scalable_vector_valuetypes()) if (Legal.count(T)) Out.insert(T); return; @@ -777,7 +780,10 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out, for (MVT T : MVT::fp_valuetypes()) if (Legal.count(T)) Out.insert(T); - for (MVT T : MVT::fp_vector_valuetypes()) + for (MVT T : MVT::fp_fixedlen_vector_valuetypes()) + if (Legal.count(T)) + Out.insert(T); + for (MVT T : MVT::fp_scalable_vector_valuetypes()) if (Legal.count(T)) Out.insert(T); return; @@ -883,7 +889,8 @@ std::string TreePredicateFn::getPredCode() const { if (isLoad()) { if (!isUnindexed() && !isNonExtLoad() && !isAnyExtLoad() && !isSignExtLoad() && !isZeroExtLoad() && getMemoryVT() == nullptr && - getScalarMemoryVT() == nullptr) + getScalarMemoryVT() == nullptr && getAddressSpaces() == nullptr && + getMinAlignment() < 1) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsLoad cannot be used by itself"); } else { @@ -903,7 +910,8 @@ std::string TreePredicateFn::getPredCode() const { if (isStore()) { if (!isUnindexed() && !isTruncStore() && !isNonTruncStore() && - getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr) + getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr && + getAddressSpaces() == nullptr && getMinAlignment() < 1) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsStore cannot be used by itself"); } else { @@ -917,6 +925,7 @@ std::string TreePredicateFn::getPredCode() const { if (isAtomic()) { if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() && + getAddressSpaces() == nullptr && !isAtomicOrderingAcquire() && !isAtomicOrderingRelease() && !isAtomicOrderingAcquireRelease() && !isAtomicOrderingSequentiallyConsistent() && @@ -977,6 +986,13 @@ std::string TreePredicateFn::getPredCode() const { Code += ")\nreturn false;\n"; } + int64_t MinAlign = getMinAlignment(); + if (MinAlign > 0) { + Code += "if (cast(N)->getAlignment() < "; + Code += utostr(MinAlign); + Code += ")\nreturn false;\n"; + } + Record *MemoryVT = getMemoryVT(); if (MemoryVT) @@ -1177,6 +1193,13 @@ ListInit *TreePredicateFn::getAddressSpaces() const { return R->getValueAsListInit("AddressSpaces"); } +int64_t TreePredicateFn::getMinAlignment() const { + Record *R = getOrigPatFragRecord()->getRecord(); + if (R->isValueUnset("MinAlignment")) + return 0; + return R->getValueAsInt("MinAlignment"); +} + Record *TreePredicateFn::getScalarMemoryVT() const { Record *R = getOrigPatFragRecord()->getRecord(); if (R->isValueUnset("ScalarMemoryVT")) @@ -1373,9 +1396,11 @@ getPatternComplexity(const CodeGenDAGPatterns &CGP) const { /// std::string PatternToMatch::getPredicateCheck() const { SmallVector PredList; - for (const Predicate &P : Predicates) - PredList.push_back(&P); - llvm::sort(PredList, deref()); + for (const Predicate &P : Predicates) { + if (!P.getCondString().empty()) + PredList.push_back(&P); + } + llvm::sort(PredList, deref>()); std::string Check; for (unsigned i = 0, e = PredList.size(); i != e; ++i) { @@ -2772,6 +2797,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit, if (Operator->isSubClassOf("SDNode") && Operator->getName() != "imm" && + Operator->getName() != "timm" && Operator->getName() != "fpimm" && Operator->getName() != "tglobaltlsaddr" && Operator->getName() != "tconstpool" && @@ -3083,7 +3109,7 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) { ListInit *LI = Frag->getValueAsListInit("Fragments"); TreePattern *P = - (PatternFragments[Frag] = llvm::make_unique( + (PatternFragments[Frag] = std::make_unique( Frag, LI, !Frag->isSubClassOf("OutPatFrag"), *this)).get(); diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h index 2b49a64c3f1d..80fc932a7a50 100644 --- a/utils/TableGen/CodeGenDAGPatterns.h +++ b/utils/TableGen/CodeGenDAGPatterns.h @@ -594,6 +594,7 @@ public: Record *getScalarMemoryVT() const; ListInit *getAddressSpaces() const; + int64_t getMinAlignment() const; // If true, indicates that GlobalISel-based C++ code was supplied. bool hasGISelPredicateCode() const; @@ -1075,8 +1076,11 @@ public: std::string C = IsHwMode ? std::string("MF->getSubtarget().checkFeatures(\"" + Features + "\")") : std::string(Def->getValueAsString("CondString")); + if (C.empty()) + return ""; return IfCond ? C : "!("+C+')'; } + bool operator==(const Predicate &P) const { return IfCond == P.IfCond && IsHwMode == P.IsHwMode && Def == P.Def; } diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp index 2463824469ab..fde946d06589 100644 --- a/utils/TableGen/CodeGenInstruction.cpp +++ b/utils/TableGen/CodeGenInstruction.cpp @@ -363,6 +363,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) Namespace = R->getValueAsString("Namespace"); AsmString = R->getValueAsString("AsmString"); + isPreISelOpcode = R->getValueAsBit("isPreISelOpcode"); isReturn = R->getValueAsBit("isReturn"); isEHScopeReturn = R->getValueAsBit("isEHScopeReturn"); isBranch = R->getValueAsBit("isBranch"); diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h index bb5b1369649f..2cb28425df7a 100644 --- a/utils/TableGen/CodeGenInstruction.h +++ b/utils/TableGen/CodeGenInstruction.h @@ -231,6 +231,7 @@ template class ArrayRef; std::vector ImplicitDefs, ImplicitUses; // Various boolean values we track for the instruction. + bool isPreISelOpcode : 1; bool isReturn : 1; bool isEHScopeReturn : 1; bool isBranch : 1; diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h index 7b74bb07d6e0..83e780671b43 100644 --- a/utils/TableGen/CodeGenIntrinsics.h +++ b/utils/TableGen/CodeGenIntrinsics.h @@ -141,6 +141,7 @@ struct CodeGenIntrinsic { enum ArgAttribute { NoCapture, + NoAlias, Returned, ReadOnly, WriteOnly, @@ -154,6 +155,13 @@ struct CodeGenIntrinsic { return Properties & (1 << Prop); } + /// Returns true if the parameter at \p ParamIdx is a pointer type. Returns + /// false if the parameter is not a pointer, or \p ParamIdx is greater than + /// the size of \p IS.ParamVTs. + /// + /// Note that this requires that \p IS.ParamVTs is available. + bool isParamAPointer(unsigned ParamIdx) const; + CodeGenIntrinsic(Record *R); }; diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp index b1774b01ba8c..793bb61481e7 100644 --- a/utils/TableGen/CodeGenMapTable.cpp +++ b/utils/TableGen/CodeGenMapTable.cpp @@ -132,7 +132,7 @@ public: MapRec->getName() + "' has empty " + "`ValueCols' field!"); for (Init *I : ColValList->getValues()) { - ListInit *ColI = dyn_cast(I); + auto *ColI = cast(I); // Make sure that all the sub-lists in 'ValueCols' have same number of // elements as the fields in 'ColFields'. @@ -168,7 +168,7 @@ public: return ValueCols; } }; -} // End anonymous namespace. +} // end anonymous namespace //===----------------------------------------------------------------------===// @@ -226,7 +226,7 @@ public: void emitMapFuncBody(raw_ostream &OS, unsigned TableSize); }; -} // End anonymous namespace. +} // end anonymous namespace //===----------------------------------------------------------------------===// @@ -521,7 +521,7 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) { unsigned ListSize = List->size(); for (unsigned j = 0; j < ListSize; j++) { - ListInit *ListJ = dyn_cast(List->getElement(j)); + auto *ListJ = cast(List->getElement(j)); if (ListJ->size() != ColFields->size()) PrintFatalError("Record `" + CurMap->getName() + "', field " @@ -604,8 +604,8 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) { // Emit map tables and the functions to query them. IMap.emitTablesWithFunc(OS); } - OS << "} // End " << NameSpace << " namespace\n"; - OS << "} // End llvm namespace\n"; + OS << "} // end namespace " << NameSpace << "\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRMAP_INFO\n\n"; } diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index f87c6d6c945a..6153c759b123 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -639,7 +639,8 @@ struct TupleExpander : SetTheory::Expander { // Precompute some types. Record *RegisterCl = Def->getRecords().getClass("Register"); RecTy *RegisterRecTy = RecordRecTy::get(RegisterCl); - StringInit *BlankName = StringInit::get(""); + std::vector RegNames = + Def->getValueAsListOfStrings("RegAsmNames"); // Zip them up. for (unsigned n = 0; n != Length; ++n) { @@ -656,11 +657,20 @@ struct TupleExpander : SetTheory::Expander { unsigned(Reg->getValueAsInt("CostPerUse"))); } + StringInit *AsmName = StringInit::get(""); + if (!RegNames.empty()) { + if (RegNames.size() <= n) + PrintFatalError(Def->getLoc(), + "Register tuple definition missing name for '" + + Name + "'."); + AsmName = StringInit::get(RegNames[n]); + } + // Create a new Record representing the synthesized register. This record // is only for consumption by CodeGenRegister, it is not added to the // RecordKeeper. SynthDefs.emplace_back( - llvm::make_unique(Name, Def->getLoc(), Def->getRecords())); + std::make_unique(Name, Def->getLoc(), Def->getRecords())); Record *NewReg = SynthDefs.back().get(); Elts.insert(NewReg); @@ -683,9 +693,8 @@ struct TupleExpander : SetTheory::Expander { if (Field == "SubRegs") RV.setValue(ListInit::get(Tuple, RegisterRecTy)); - // Provide a blank AsmName. MC hacks are required anyway. if (Field == "AsmName") - RV.setValue(BlankName); + RV.setValue(AsmName); // CostPerUse is aggregated from all Tuple members. if (Field == "CostPerUse") @@ -725,8 +734,8 @@ struct TupleExpander : SetTheory::Expander { //===----------------------------------------------------------------------===// static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) { - llvm::sort(M, deref()); - M.erase(std::unique(M.begin(), M.end(), deref()), M.end()); + llvm::sort(M, deref>()); + M.erase(std::unique(M.begin(), M.end(), deref>()), M.end()); } CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) @@ -851,7 +860,7 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) { bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const { return std::binary_search(Members.begin(), Members.end(), Reg, - deref()); + deref>()); } namespace llvm { @@ -887,7 +896,7 @@ static bool testSubClass(const CodeGenRegisterClass *A, return A->RSI.isSubClassOf(B->RSI) && std::includes(A->getMembers().begin(), A->getMembers().end(), B->getMembers().begin(), B->getMembers().end(), - deref()); + deref>()); } /// Sorting predicate for register classes. This provides a topological @@ -1089,7 +1098,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records, Sets.addFieldExpander("RegisterClass", "MemberList"); Sets.addFieldExpander("CalleeSavedRegs", "SaveList"); Sets.addExpander("RegisterTuples", - llvm::make_unique(SynthDefs)); + std::make_unique(SynthDefs)); // Read in the user-defined (named) sub-register indices. // More indices will be synthesized later. @@ -2131,9 +2140,10 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) { const CodeGenRegister::Vec &Memb1 = RC1->getMembers(); const CodeGenRegister::Vec &Memb2 = RC2->getMembers(); CodeGenRegister::Vec Intersection; - std::set_intersection( - Memb1.begin(), Memb1.end(), Memb2.begin(), Memb2.end(), - std::inserter(Intersection, Intersection.begin()), deref()); + std::set_intersection(Memb1.begin(), Memb1.end(), Memb2.begin(), + Memb2.end(), + std::inserter(Intersection, Intersection.begin()), + deref>()); // Skip disjoint class pairs. if (Intersection.empty()) @@ -2158,7 +2168,8 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) { void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) { // Map SubRegIndex to set of registers in RC supporting that SubRegIndex. typedef std::map> SubReg2SetMap; + deref>> + SubReg2SetMap; // Compute the set of registers supporting each SubRegIndex. SubReg2SetMap SRSets; @@ -2357,6 +2368,21 @@ CodeGenRegBank::getRegClassForRegister(Record *R) { return FoundRC; } +const CodeGenRegisterClass * +CodeGenRegBank::getMinimalPhysRegClass(Record *RegRecord, + ValueTypeByHwMode *VT) { + const CodeGenRegister *Reg = getReg(RegRecord); + const CodeGenRegisterClass *BestRC = nullptr; + for (const auto &RC : getRegClasses()) { + if ((!VT || RC.hasType(*VT)) && + RC.contains(Reg) && (!BestRC || BestRC->hasSubClass(&RC))) + BestRC = &RC; + } + + assert(BestRC && "Couldn't find the register class"); + return BestRC; +} + BitVector CodeGenRegBank::computeCoveredRegisters(ArrayRef Regs) { SetVector Set; diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h index f04a90f8fde5..6d933baec2ae 100644 --- a/utils/TableGen/CodeGenRegisters.h +++ b/utils/TableGen/CodeGenRegisters.h @@ -93,7 +93,8 @@ namespace llvm { // Map of composite subreg indices. typedef std::map> CompMap; + deref>> + CompMap; // Returns the subreg index that results from composing this with Idx. // Returns NULL if this and Idx don't compose. @@ -137,15 +138,14 @@ namespace llvm { /// list of subregisters they are composed of (if any). Do this recursively. void computeConcatTransitiveClosure(); + bool operator<(const CodeGenSubRegIndex &RHS) const { + return this->EnumValue < RHS.EnumValue; + } + private: CompMap Composed; }; - inline bool operator<(const CodeGenSubRegIndex &A, - const CodeGenSubRegIndex &B) { - return A.EnumValue < B.EnumValue; - } - /// CodeGenRegister - Represents a register definition. struct CodeGenRegister { Record *TheDef; @@ -156,7 +156,8 @@ namespace llvm { bool Artificial; // Map SubRegIndex -> Register. - typedef std::map> + typedef std::map>> SubRegMap; CodeGenRegister(Record *R, unsigned Enum); @@ -347,6 +348,10 @@ namespace llvm { ArrayRef getValueTypes() const { return VTs; } unsigned getNumValueTypes() const { return VTs.size(); } + bool hasType(const ValueTypeByHwMode &VT) const { + return std::find(VTs.begin(), VTs.end(), VT) != VTs.end(); + } + const ValueTypeByHwMode &getValueTypeNum(unsigned VTNum) const { if (VTNum < VTs.size()) return VTs[VTNum]; @@ -708,6 +713,13 @@ namespace llvm { /// return the superclass. Otherwise return null. const CodeGenRegisterClass* getRegClassForRegister(Record *R); + // Analog of TargetRegisterInfo::getMinimalPhysRegClass. Unlike + // getRegClassForRegister, this tries to find the smallest class containing + // the physical register. If \p VT is specified, it will only find classes + // with a matching type + const CodeGenRegisterClass * + getMinimalPhysRegClass(Record *RegRecord, ValueTypeByHwMode *VT = nullptr); + // Get the sum of unit weights. unsigned getRegUnitSetWeight(const std::vector &Units) const { unsigned Weight = 0; diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp index fd007044a16e..f12d7d484a8e 100644 --- a/utils/TableGen/CodeGenSchedule.cpp +++ b/utils/TableGen/CodeGenSchedule.cpp @@ -172,8 +172,8 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK, // Allow Set evaluation to recognize the dags used in InstRW records: // (instrs Op1, Op1...) - Sets.addOperator("instrs", llvm::make_unique()); - Sets.addOperator("instregex", llvm::make_unique(Target)); + Sets.addOperator("instrs", std::make_unique()); + Sets.addOperator("instregex", std::make_unique(Target)); // Instantiate a CodeGenProcModel for each SchedMachineModel with the values // that are explicitly referenced in tablegen records. Resources associated @@ -1083,9 +1083,13 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { if (RWD->getValueAsDef("SchedModel") == RWModelDef && RWModelDef->getValueAsBit("FullInstRWOverlapCheck")) { for (Record *Inst : InstDefs) { - PrintFatalError(InstRWDef->getLoc(), "Overlapping InstRW def " + - Inst->getName() + " also matches " + - RWD->getValue("Instrs")->getValue()->getAsString()); + PrintFatalError + (InstRWDef->getLoc(), + "Overlapping InstRW definition for \"" + + Inst->getName() + + "\" also matches previous \"" + + RWD->getValue("Instrs")->getValue()->getAsString() + + "\"."); } } } @@ -1115,9 +1119,13 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { for (Record *OldRWDef : SchedClasses[OldSCIdx].InstRWs) { if (OldRWDef->getValueAsDef("SchedModel") == RWModelDef) { for (Record *InstDef : InstDefs) { - PrintFatalError(OldRWDef->getLoc(), "Overlapping InstRW def " + - InstDef->getName() + " also matches " + - OldRWDef->getValue("Instrs")->getValue()->getAsString()); + PrintFatalError + (InstRWDef->getLoc(), + "Overlapping InstRW definition for \"" + + InstDef->getName() + + "\" also matches previous \"" + + OldRWDef->getValue("Instrs")->getValue()->getAsString() + + "\"."); } } assert(OldRWDef != InstRWDef && diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index b65e1b6af791..fa8b842c97f9 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -98,6 +98,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v256i8: return "MVT::v256i8"; case MVT::v1i16: return "MVT::v1i16"; case MVT::v2i16: return "MVT::v2i16"; + case MVT::v3i16: return "MVT::v3i16"; case MVT::v4i16: return "MVT::v4i16"; case MVT::v8i16: return "MVT::v8i16"; case MVT::v16i16: return "MVT::v16i16"; @@ -126,8 +127,11 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v32i64: return "MVT::v32i64"; case MVT::v1i128: return "MVT::v1i128"; case MVT::v2f16: return "MVT::v2f16"; + case MVT::v3f16: return "MVT::v3f16"; case MVT::v4f16: return "MVT::v4f16"; case MVT::v8f16: return "MVT::v8f16"; + case MVT::v16f16: return "MVT::v16f16"; + case MVT::v32f16: return "MVT::v32f16"; case MVT::v1f32: return "MVT::v1f32"; case MVT::v2f32: return "MVT::v2f32"; case MVT::v3f32: return "MVT::v3f32"; @@ -289,10 +293,57 @@ Record *CodeGenTarget::getAsmWriter() const { CodeGenRegBank &CodeGenTarget::getRegBank() const { if (!RegBank) - RegBank = llvm::make_unique(Records, getHwModes()); + RegBank = std::make_unique(Records, getHwModes()); return *RegBank; } +Optional +CodeGenTarget::getSuperRegForSubReg(const ValueTypeByHwMode &ValueTy, + CodeGenRegBank &RegBank, + const CodeGenSubRegIndex *SubIdx) const { + std::vector Candidates; + auto &RegClasses = RegBank.getRegClasses(); + + // Try to find a register class which supports ValueTy, and also contains + // SubIdx. + for (CodeGenRegisterClass &RC : RegClasses) { + // Is there a subclass of this class which contains this subregister index? + CodeGenRegisterClass *SubClassWithSubReg = RC.getSubClassWithSubReg(SubIdx); + if (!SubClassWithSubReg) + continue; + + // We have a class. Check if it supports this value type. + if (llvm::none_of(SubClassWithSubReg->VTs, + [&ValueTy](const ValueTypeByHwMode &ClassVT) { + return ClassVT == ValueTy; + })) + continue; + + // We have a register class which supports both the value type and + // subregister index. Remember it. + Candidates.push_back(SubClassWithSubReg); + } + + // If we didn't find anything, we're done. + if (Candidates.empty()) + return None; + + // Find and return the largest of our candidate classes. + llvm::stable_sort(Candidates, [&](const CodeGenRegisterClass *A, + const CodeGenRegisterClass *B) { + if (A->getMembers().size() > B->getMembers().size()) + return true; + + if (A->getMembers().size() < B->getMembers().size()) + return false; + + // Order by name as a tie-breaker. + return StringRef(A->getName()) < B->getName(); + }); + + return Candidates[0]; +} + void CodeGenTarget::ReadRegAltNameIndices() const { RegAltNameIndices = Records.getAllDerivedDefinitions("RegAltNameIndex"); llvm::sort(RegAltNameIndices, LessRecord()); @@ -339,7 +390,7 @@ void CodeGenTarget::ReadLegalValueTypes() const { CodeGenSchedModels &CodeGenTarget::getSchedModels() const { if (!SchedModels) - SchedModels = llvm::make_unique(Records, *this); + SchedModels = std::make_unique(Records, *this); return *SchedModels; } @@ -352,7 +403,7 @@ void CodeGenTarget::ReadInstructions() const { // Parse the instructions defined in the .td file. for (unsigned i = 0, e = Insts.size(); i != e; ++i) - Instructions[Insts[i]] = llvm::make_unique(Insts[i]); + Instructions[Insts[i]] = std::make_unique(Insts[i]); } static const CodeGenInstruction * @@ -427,7 +478,8 @@ void CodeGenTarget::reverseBitsForLittleEndianEncoding() { if (!isLittleEndianEncoding()) return; - std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); + std::vector Insts = + Records.getAllDerivedDefinitions("InstructionEncoding"); for (Record *R : Insts) { if (R->getValueAsString("Namespace") == "TargetOpcode" || R->getValueAsBit("isPseudo")) @@ -733,6 +785,9 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { else if (Property->isSubClassOf("NoCapture")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, NoCapture)); + } else if (Property->isSubClassOf("NoAlias")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, NoAlias)); } else if (Property->isSubClassOf("Returned")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, Returned)); @@ -758,3 +813,10 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { // Sort the argument attributes for later benefit. llvm::sort(ArgumentAttributes); } + +bool CodeGenIntrinsic::isParamAPointer(unsigned ParamIdx) const { + if (ParamIdx >= IS.ParamVTs.size()) + return false; + MVT ParamType = MVT(IS.ParamVTs[ParamIdx]); + return ParamType == MVT::iPTR || ParamType == MVT::iPTRAny; +} diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h index 1ab2de269c76..d52ffac4ce6c 100644 --- a/utils/TableGen/CodeGenTarget.h +++ b/utils/TableGen/CodeGenTarget.h @@ -103,6 +103,12 @@ public: /// getRegBank - Return the register bank description. CodeGenRegBank &getRegBank() const; + /// Return the largest register class on \p RegBank which supports \p Ty and + /// covers \p SubIdx if it exists. + Optional + getSuperRegForSubReg(const ValueTypeByHwMode &Ty, CodeGenRegBank &RegBank, + const CodeGenSubRegIndex *SubIdx) const; + /// getRegisterByName - If there is a register with the specific AsmName, /// return it. const CodeGenRegister *getRegisterByName(StringRef Name) const; diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp index fb0c6faa5295..d8e78ce55c7b 100644 --- a/utils/TableGen/DAGISelEmitter.cpp +++ b/utils/TableGen/DAGISelEmitter.cpp @@ -173,7 +173,7 @@ void DAGISelEmitter::run(raw_ostream &OS) { } std::unique_ptr TheMatcher = - llvm::make_unique(PatternMatchers); + std::make_unique(PatternMatchers); OptimizeMatcher(TheMatcher, CGP); //Matcher->dump(); diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h index 0a782e84a372..223513fc8d38 100644 --- a/utils/TableGen/DAGISelMatcher.h +++ b/utils/TableGen/DAGISelMatcher.h @@ -932,13 +932,15 @@ private: /// class EmitCopyToRegMatcher : public Matcher { unsigned SrcSlot; // Value to copy into the physreg. - Record *DestPhysReg; + const CodeGenRegister *DestPhysReg; + public: - EmitCopyToRegMatcher(unsigned srcSlot, Record *destPhysReg) + EmitCopyToRegMatcher(unsigned srcSlot, + const CodeGenRegister *destPhysReg) : Matcher(EmitCopyToReg), SrcSlot(srcSlot), DestPhysReg(destPhysReg) {} unsigned getSrcSlot() const { return SrcSlot; } - Record *getDestPhysReg() const { return DestPhysReg; } + const CodeGenRegister *getDestPhysReg() const { return DestPhysReg; } static bool classof(const Matcher *N) { return N->getKind() == EmitCopyToReg; diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp index cecbc6cccdff..e9f1fb93d516 100644 --- a/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -670,12 +670,22 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx, OS << '\n'; return 2+MN->getNumNodes(); } - case Matcher::EmitCopyToReg: - OS << "OPC_EmitCopyToReg, " - << cast(N)->getSrcSlot() << ", " - << getQualifiedName(cast(N)->getDestPhysReg()) - << ",\n"; - return 3; + case Matcher::EmitCopyToReg: { + const auto *C2RMatcher = cast(N); + int Bytes = 3; + const CodeGenRegister *Reg = C2RMatcher->getDestPhysReg(); + if (Reg->EnumValue > 255) { + assert(isUInt<16>(Reg->EnumValue) && "not handled"); + OS << "OPC_EmitCopyToReg2, " << C2RMatcher->getSrcSlot() << ", " + << "TARGET_VAL(" << getQualifiedName(Reg->TheDef) << "),\n"; + ++Bytes; + } else { + OS << "OPC_EmitCopyToReg, " << C2RMatcher->getSrcSlot() << ", " + << getQualifiedName(Reg->TheDef) << ",\n"; + } + + return Bytes; + } case Matcher::EmitNodeXForm: { const EmitNodeXFormMatcher *XF = cast(N); OS << "OPC_EmitNodeXForm, " << getNodeXFormID(XF->getNodeXForm()) << ", " diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp index 8f54beeba65b..49c09c7d195e 100644 --- a/utils/TableGen/DAGISelMatcherGen.cpp +++ b/utils/TableGen/DAGISelMatcherGen.cpp @@ -141,7 +141,7 @@ namespace { SmallVectorImpl &ResultOps); }; -} // end anon namespace. +} // end anonymous namespace MatcherGen::MatcherGen(const PatternToMatch &pattern, const CodeGenDAGPatterns &cgp) @@ -867,9 +867,13 @@ EmitResultInstructionAsOperand(const TreePatternNode *N, if (isRoot && !PhysRegInputs.empty()) { // Emit all of the CopyToReg nodes for the input physical registers. These // occur in patterns like (mul:i8 AL:i8, GR8:i8:$src). - for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i) + for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i) { + const CodeGenRegister *Reg = + CGP.getTargetInfo().getRegBank().getReg(PhysRegInputs[i].first); AddMatcher(new EmitCopyToRegMatcher(PhysRegInputs[i].second, - PhysRegInputs[i].first)); + Reg)); + } + // Even if the node has no other glue inputs, the resultant node must be // glued to the CopyFromReg nodes we just generated. TreeHasInGlue = true; diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp index 7d51b0769372..6746fdd676a7 100644 --- a/utils/TableGen/DAGISelMatcherOpt.cpp +++ b/utils/TableGen/DAGISelMatcherOpt.cpp @@ -409,13 +409,14 @@ static void FactorNodes(std::unique_ptr &InputMatcherPtr) { DenseMap TypeEntry; SmallVector, 8> Cases; for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) { - CheckTypeMatcher *CTM = - cast_or_null(FindNodeWithKind(NewOptionsToMatch[i], - Matcher::CheckType)); + Matcher* M = FindNodeWithKind(NewOptionsToMatch[i], Matcher::CheckType); + assert(M && isa(M) && "Unknown Matcher type"); + + auto *CTM = cast(M); Matcher *MatcherWithoutCTM = NewOptionsToMatch[i]->unlinkNode(CTM); MVT::SimpleValueType CTMTy = CTM->getType(); delete CTM; - + unsigned &Entry = TypeEntry[CTMTy]; if (Entry != 0) { // If we have unfactored duplicate types, then we should factor them. diff --git a/utils/TableGen/DFAEmitter.cpp b/utils/TableGen/DFAEmitter.cpp new file mode 100644 index 000000000000..dd3db7c150ba --- /dev/null +++ b/utils/TableGen/DFAEmitter.cpp @@ -0,0 +1,394 @@ +//===- DFAEmitter.cpp - Finite state automaton emitter --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class can produce a generic deterministic finite state automaton (DFA), +// given a set of possible states and transitions. +// +// The input transitions can be nondeterministic - this class will produce the +// deterministic equivalent state machine. +// +// The generated code can run the DFA and produce an accepted / not accepted +// state and also produce, given a sequence of transitions that results in an +// accepted state, the sequence of intermediate states. This is useful if the +// initial automaton was nondeterministic - it allows mapping back from the DFA +// to the NFA. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "dfa-emitter" + +#include "DFAEmitter.h" +#include "CodeGenTarget.h" +#include "SequenceToOffsetTable.h" +#include "TableGenBackends.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DfaEmitter implementation. This is independent of the GenAutomaton backend. +//===----------------------------------------------------------------------===// + +void DfaEmitter::addTransition(state_type From, state_type To, action_type A) { + Actions.insert(A); + NfaStates.insert(From); + NfaStates.insert(To); + NfaTransitions[{From, A}].push_back(To); + ++NumNfaTransitions; +} + +void DfaEmitter::visitDfaState(DfaState DS) { + // For every possible action... + auto FromId = DfaStates.idFor(DS); + for (action_type A : Actions) { + DfaState NewStates; + DfaTransitionInfo TI; + // For every represented state, word pair in the original NFA... + for (state_type &FromState : DS) { + // If this action is possible from this state add the transitioned-to + // states to NewStates. + auto I = NfaTransitions.find({FromState, A}); + if (I == NfaTransitions.end()) + continue; + for (state_type &ToState : I->second) { + NewStates.push_back(ToState); + TI.emplace_back(FromState, ToState); + } + } + if (NewStates.empty()) + continue; + // Sort and unique. + sort(NewStates); + NewStates.erase(std::unique(NewStates.begin(), NewStates.end()), + NewStates.end()); + sort(TI); + TI.erase(std::unique(TI.begin(), TI.end()), TI.end()); + unsigned ToId = DfaStates.insert(NewStates); + DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI)); + } +} + +void DfaEmitter::constructDfa() { + DfaState Initial(1, /*NFA initial state=*/0); + DfaStates.insert(Initial); + + // Note that UniqueVector starts indices at 1, not zero. + unsigned DfaStateId = 1; + while (DfaStateId <= DfaStates.size()) + visitDfaState(DfaStates[DfaStateId++]); +} + +void DfaEmitter::emit(StringRef Name, raw_ostream &OS) { + constructDfa(); + + OS << "// Input NFA has " << NfaStates.size() << " states with " + << NumNfaTransitions << " transitions.\n"; + OS << "// Generated DFA has " << DfaStates.size() << " states with " + << DfaTransitions.size() << " transitions.\n\n"; + + // Implementation note: We don't bake a simple std::pair<> here as it requires + // significantly more effort to parse. A simple test with a large array of + // struct-pairs (N=100000) took clang-10 6s to parse. The same array of + // std::pair took 242s. Instead we allow the user to + // define the pair type. + // + // FIXME: It may make sense to emit these as ULEB sequences instead of + // pairs of uint64_t. + OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n"; + OS << "// transition implies a set of NFA transitions. These are referred\n"; + OS << "// to by index in " << Name << "Transitions[].\n"; + + SequenceToOffsetTable Table; + std::map EmittedIndices; + for (auto &T : DfaTransitions) + Table.add(T.second.second); + Table.layout(); + OS << "std::array " << Name + << "TransitionInfo = {{\n"; + Table.emit( + OS, + [](raw_ostream &OS, std::pair P) { + OS << "{" << P.first << ", " << P.second << "}"; + }, + "{0ULL, 0ULL}"); + + OS << "}};\n\n"; + + OS << "// A transition in the generated " << Name << " DFA.\n"; + OS << "struct " << Name << "Transition {\n"; + OS << " unsigned FromDfaState; // The transitioned-from DFA state.\n"; + OS << " "; + printActionType(OS); + OS << " Action; // The input symbol that causes this transition.\n"; + OS << " unsigned ToDfaState; // The transitioned-to DFA state.\n"; + OS << " unsigned InfoIdx; // Start index into " << Name + << "TransitionInfo.\n"; + OS << "};\n\n"; + + OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n"; + OS << "// The initial state is 1, not zero.\n"; + OS << "std::array<" << Name << "Transition, " << DfaTransitions.size() << "> " + << Name << "Transitions = {{\n"; + for (auto &KV : DfaTransitions) { + dfa_state_type From = KV.first.first; + dfa_state_type To = KV.second.first; + action_type A = KV.first.second; + unsigned InfoIdx = Table.get(KV.second.second); + OS << " {" << From << ", "; + printActionValue(A, OS); + OS << ", " << To << ", " << InfoIdx << "},\n"; + } + OS << "\n}};\n\n"; +} + +void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; } + +void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; } + +//===----------------------------------------------------------------------===// +// AutomatonEmitter implementation +//===----------------------------------------------------------------------===// + +namespace { +// FIXME: This entire discriminated union could be removed with c++17: +// using Action = std::variant; +struct Action { + Record *R = nullptr; + unsigned I = 0; + std::string S = nullptr; + + Action() = default; + Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {} + + void print(raw_ostream &OS) const { + if (R) + OS << R->getName(); + else if (!S.empty()) + OS << '"' << S << '"'; + else + OS << I; + } + bool operator<(const Action &Other) const { + return std::make_tuple(R, I, S) < + std::make_tuple(Other.R, Other.I, Other.S); + } +}; + +using ActionTuple = std::vector; +class Automaton; + +class Transition { + uint64_t NewState; + // The tuple of actions that causes this transition. + ActionTuple Actions; + // The types of the actions; this is the same across all transitions. + SmallVector Types; + +public: + Transition(Record *R, Automaton *Parent); + const ActionTuple &getActions() { return Actions; } + SmallVector getTypes() { return Types; } + + bool canTransitionFrom(uint64_t State); + uint64_t transitionFrom(uint64_t State); +}; + +class Automaton { + RecordKeeper &Records; + Record *R; + std::vector Transitions; + /// All possible action tuples, uniqued. + UniqueVector Actions; + /// The fields within each Transition object to find the action symbols. + std::vector ActionSymbolFields; + +public: + Automaton(RecordKeeper &Records, Record *R); + void emit(raw_ostream &OS); + + ArrayRef getActionSymbolFields() { return ActionSymbolFields; } + /// If the type of action A has been overridden (there exists a field + /// "TypeOf_A") return that, otherwise return the empty string. + StringRef getActionSymbolType(StringRef A); +}; + +class AutomatonEmitter { + RecordKeeper &Records; + +public: + AutomatonEmitter(RecordKeeper &R) : Records(R) {} + void run(raw_ostream &OS); +}; + +/// A DfaEmitter implementation that can print our variant action type. +class CustomDfaEmitter : public DfaEmitter { + const UniqueVector &Actions; + std::string TypeName; + +public: + CustomDfaEmitter(const UniqueVector &Actions, StringRef TypeName) + : Actions(Actions), TypeName(TypeName) {} + + void printActionType(raw_ostream &OS) override; + void printActionValue(action_type A, raw_ostream &OS) override; +}; +} // namespace + +void AutomatonEmitter::run(raw_ostream &OS) { + for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) { + Automaton A(Records, R); + OS << "#ifdef GET_" << R->getName() << "_DECL\n"; + A.emit(OS); + OS << "#endif // GET_" << R->getName() << "_DECL\n"; + } +} + +Automaton::Automaton(RecordKeeper &Records, Record *R) + : Records(Records), R(R) { + LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n"); + ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields"); +} + +void Automaton::emit(raw_ostream &OS) { + StringRef TransitionClass = R->getValueAsString("TransitionClass"); + for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) { + assert(T->isSubClassOf("Transition")); + Transitions.emplace_back(T, this); + Actions.insert(Transitions.back().getActions()); + } + + LLVM_DEBUG(dbgs() << " Action alphabet cardinality: " << Actions.size() + << "\n"); + LLVM_DEBUG(dbgs() << " Each state has " << Transitions.size() + << " potential transitions.\n"); + + StringRef Name = R->getName(); + + CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action"); + // Starting from the initial state, build up a list of possible states and + // transitions. + std::deque Worklist(1, 0); + std::set SeenStates; + unsigned NumTransitions = 0; + SeenStates.insert(Worklist.front()); + while (!Worklist.empty()) { + uint64_t State = Worklist.front(); + Worklist.pop_front(); + for (Transition &T : Transitions) { + if (!T.canTransitionFrom(State)) + continue; + uint64_t NewState = T.transitionFrom(State); + if (SeenStates.emplace(NewState).second) + Worklist.emplace_back(NewState); + ++NumTransitions; + Emitter.addTransition(State, NewState, Actions.idFor(T.getActions())); + } + } + LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size() + << " states with " << NumTransitions << " transitions.\n"); + + const auto &ActionTypes = Transitions.back().getTypes(); + OS << "// The type of an action in the " << Name << " automaton.\n"; + if (ActionTypes.size() == 1) { + OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n"; + } else { + OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ") + << ">;\n"; + } + OS << "\n"; + + Emitter.emit(Name, OS); +} + +StringRef Automaton::getActionSymbolType(StringRef A) { + Twine Ty = "TypeOf_" + A; + if (!R->getValue(Ty.str())) + return ""; + return R->getValueAsString(Ty.str()); +} + +Transition::Transition(Record *R, Automaton *Parent) { + BitsInit *NewStateInit = R->getValueAsBitsInit("NewState"); + NewState = 0; + assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 && + "State cannot be represented in 64 bits!"); + for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) { + if (auto *Bit = dyn_cast(NewStateInit->getBit(I))) { + if (Bit->getValue()) + NewState |= 1ULL << I; + } + } + + for (StringRef A : Parent->getActionSymbolFields()) { + RecordVal *SymbolV = R->getValue(A); + if (auto *Ty = dyn_cast(SymbolV->getType())) { + Actions.emplace_back(R->getValueAsDef(A), 0, ""); + Types.emplace_back(Ty->getAsString()); + } else if (isa(SymbolV->getType())) { + Actions.emplace_back(nullptr, R->getValueAsInt(A), ""); + Types.emplace_back("unsigned"); + } else if (isa(SymbolV->getType()) || + isa(SymbolV->getType())) { + Actions.emplace_back(nullptr, 0, R->getValueAsString(A)); + Types.emplace_back("std::string"); + } else { + report_fatal_error("Unhandled symbol type!"); + } + + StringRef TypeOverride = Parent->getActionSymbolType(A); + if (!TypeOverride.empty()) + Types.back() = TypeOverride; + } +} + +bool Transition::canTransitionFrom(uint64_t State) { + if ((State & NewState) == 0) + // The bits we want to set are not set; + return true; + return false; +} + +uint64_t Transition::transitionFrom(uint64_t State) { + return State | NewState; +} + +void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; } + +void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { + const ActionTuple &AT = Actions[A]; + if (AT.size() > 1) + OS << "std::make_tuple("; + bool First = true; + for (const auto &SingleAction : AT) { + if (!First) + OS << ", "; + First = false; + SingleAction.print(OS); + } + if (AT.size() > 1) + OS << ")"; +} + +namespace llvm { + +void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) { + AutomatonEmitter(RK).run(OS); +} + +} // namespace llvm diff --git a/utils/TableGen/DFAEmitter.h b/utils/TableGen/DFAEmitter.h new file mode 100644 index 000000000000..76de8f72cd88 --- /dev/null +++ b/utils/TableGen/DFAEmitter.h @@ -0,0 +1,107 @@ +//===--------------------- DfaEmitter.h -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Defines a generic automaton builder. This takes a set of transitions and +// states that represent a nondeterministic finite state automaton (NFA) and +// emits a determinized DFA in a form that include/llvm/Support/Automaton.h can +// drive. +// +// See file llvm/TableGen/Automaton.td for the TableGen API definition. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H +#define LLVM_UTILS_TABLEGEN_DFAEMITTER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Record.h" +#include +#include + +namespace llvm { + +class raw_ostream; +/// Construct a deterministic finite state automaton from possible +/// nondeterministic state and transition data. +/// +/// The state type is a 64-bit unsigned integer. The generated automaton is +/// invariant to the sparsity of the state representation - its size is only +/// a function of the cardinality of the set of states. +/// +/// The inputs to this emitter are considered to define a nondeterministic +/// finite state automaton (NFA). This is then converted to a DFA during +/// emission. The emitted tables can be used to by +/// include/llvm/Support/Automaton.h. +class DfaEmitter { +public: + // The type of an NFA state. The initial state is always zero. + using state_type = uint64_t; + // The type of an action. + using action_type = uint64_t; + + DfaEmitter() = default; + virtual ~DfaEmitter() = default; + + void addTransition(state_type From, state_type To, action_type A); + void emit(StringRef Name, raw_ostream &OS); + +protected: + /// Emit the C++ type of an action to OS. + virtual void printActionType(raw_ostream &OS); + /// Emit the C++ value of an action A to OS. + virtual void printActionValue(action_type A, raw_ostream &OS); + +private: + /// The state type of deterministic states. These are only used internally to + /// this class. This is an ID into the DfaStates UniqueVector. + using dfa_state_type = unsigned; + + /// The actual representation of a DFA state, which is a union of one or more + /// NFA states. + using DfaState = SmallVector; + + /// A DFA transition consists of a set of NFA states transitioning to a + /// new set of NFA states. The DfaTransitionInfo tracks, for every + /// transitioned-from NFA state, a set of valid transitioned-to states. + /// + /// Emission of this transition relation allows algorithmic determination of + /// the possible candidate NFA paths taken under a given input sequence to + /// reach a given DFA state. + using DfaTransitionInfo = SmallVector, 4>; + + /// The set of all possible actions. + std::set Actions; + + /// The set of nondeterministic transitions. A state-action pair can + /// transition to multiple target states. + std::map, std::vector> + NfaTransitions; + std::set NfaStates; + unsigned NumNfaTransitions = 0; + + /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID, + /// which is dfa_state_type. Note that because UniqueVector reserves state + /// zero, the initial DFA state is always 1. + UniqueVector DfaStates; + /// The set of deterministic transitions. A state-action pair has only a + /// single target state. + std::map, + std::pair> + DfaTransitions; + + /// Visit all NFA states and construct the DFA. + void constructDfa(); + /// Visit a single DFA state and construct all possible transitions to new DFA + /// states. + void visitDfaState(DfaState DS); +}; + +} // namespace llvm + +#endif diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp index dabcc8f8ed55..ccb4ef1b9678 100644 --- a/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/utils/TableGen/DFAPacketizerEmitter.cpp @@ -17,6 +17,7 @@ #define DEBUG_TYPE "dfa-emitter" #include "CodeGenTarget.h" +#include "DFAEmitter.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -29,6 +30,7 @@ #include #include #include +#include #include using namespace llvm; @@ -154,121 +156,13 @@ public: int &maxStages, raw_ostream &OS); - void run(raw_ostream &OS); -}; - -// -// State represents the usage of machine resources if the packet contains -// a set of instruction classes. -// -// Specifically, currentState is a set of bit-masks. -// The nth bit in a bit-mask indicates whether the nth resource is being used -// by this state. The set of bit-masks in a state represent the different -// possible outcomes of transitioning to this state. -// For example: consider a two resource architecture: resource L and resource M -// with three instruction classes: L, M, and L_or_M. -// From the initial state (currentState = 0x00), if we add instruction class -// L_or_M we will transition to a state with currentState = [0x01, 0x10]. This -// represents the possible resource states that can result from adding a L_or_M -// instruction -// -// Another way of thinking about this transition is we are mapping a NDFA with -// two states [0x01] and [0x10] into a DFA with a single state [0x01, 0x10]. -// -// A State instance also contains a collection of transitions from that state: -// a map from inputs to new states. -// -class State { - public: - static int currentStateNum; - // stateNum is the only member used for equality/ordering, all other members - // can be mutated even in const State objects. - const int stateNum; - mutable bool isInitial; - mutable std::set stateInfo; - typedef std::map, const State *> TransitionMap; - mutable TransitionMap Transitions; - - State(); - - bool operator<(const State &s) const { - return stateNum < s.stateNum; - } - - // - // canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass - // may be a valid transition from this state i.e., can an instruction of type - // InsnClass be added to the packet represented by this state. - // - // Note that for multiple stages, this quick check does not take into account - // any possible resource competition between the stages themselves. That is - // enforced in AddInsnClassStages which checks the cross product of all - // stages for resource availability (which is a more involved check). - // - bool canMaybeAddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap) const; - - // - // AddInsnClass - Return all combinations of resource reservation - // which are possible from this state (PossibleStates). - // - // PossibleStates is the set of valid resource states that ensue from valid - // transitions. - // - void AddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - std::set &PossibleStates) const; - - // - // AddInsnClassStages - Return all combinations of resource reservation - // resulting from the cross product of all stages for this InsnClass - // which are possible from this state (PossibleStates). - // - void AddInsnClassStages(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - unsigned chkstage, unsigned numstages, - unsigned prevState, unsigned origState, - DenseSet &VisitedResourceStates, - std::set &PossibleStates) const; - - // - // addTransition - Add a transition from this state given the input InsnClass - // - void addTransition(std::vector InsnClass, const State *To) const; - - // - // hasTransition - Returns true if there is a transition from this state - // given the input InsnClass - // - bool hasTransition(std::vector InsnClass) const; -}; - -// -// class DFA: deterministic finite automaton for processor resource tracking. -// -class DFA { -public: - DFA() = default; - - // Set of states. Need to keep this sorted to emit the transition table. - typedef std::set StateSet; - StateSet states; + // Emit code for a subset of itineraries. + void emitForItineraries(raw_ostream &OS, + std::vector &ProcItinList, + std::string DFAName); - State *currentState = nullptr; - - // - // Modify the DFA. - // - const State &newState(); - - // - // writeTable: Print out a table representing the DFA. - // - void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName, - int numInsnClasses = 0, - int maxResources = 0, int numCombos = 0, int maxStages = 0); + void run(raw_ostream &OS); }; - } // end anonymous namespace #ifndef NDEBUG @@ -288,22 +182,6 @@ void dbgsInsnClass(const std::vector &InsnClass) { LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")"); } -// -// dbgsStateInfo - When debugging, print the set of state info. -// -void dbgsStateInfo(const std::set &stateInfo) { - LLVM_DEBUG(dbgs() << "StateInfo: "); - unsigned i = 0; - for (std::set::iterator SI = stateInfo.begin(); - SI != stateInfo.end(); ++SI, ++i) { - unsigned thisState = *SI; - if (i > 0) { - LLVM_DEBUG(dbgs() << ", "); - } - LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(thisState)); - } -} - // // dbgsIndent - When debugging, indent by the specified amount. // @@ -314,335 +192,9 @@ void dbgsIndent(unsigned indent) { } #endif // NDEBUG -// -// Constructors and destructors for State and DFA -// -State::State() : - stateNum(currentStateNum++), isInitial(false) {} - -// -// addTransition - Add a transition from this state given the input InsnClass -// -void State::addTransition(std::vector InsnClass, const State *To) - const { - assert(!Transitions.count(InsnClass) && - "Cannot have multiple transitions for the same input"); - Transitions[InsnClass] = To; -} - -// -// hasTransition - Returns true if there is a transition from this state -// given the input InsnClass -// -bool State::hasTransition(std::vector InsnClass) const { - return Transitions.count(InsnClass) > 0; -} - -// -// AddInsnClass - Return all combinations of resource reservation -// which are possible from this state (PossibleStates). -// -// PossibleStates is the set of valid resource states that ensue from valid -// transitions. -// -void State::AddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - std::set &PossibleStates) const { - // - // Iterate over all resource states in currentState. - // - unsigned numstages = InsnClass.size(); - assert((numstages > 0) && "InsnClass has no stages"); - - for (std::set::iterator SI = stateInfo.begin(); - SI != stateInfo.end(); ++SI) { - unsigned thisState = *SI; - - DenseSet VisitedResourceStates; - - LLVM_DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(thisState) - << "\n"); - AddInsnClassStages(InsnClass, ComboBitToBitsMap, - numstages - 1, numstages, - thisState, thisState, - VisitedResourceStates, PossibleStates); - } -} - -void State::AddInsnClassStages(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - unsigned chkstage, unsigned numstages, - unsigned prevState, unsigned origState, - DenseSet &VisitedResourceStates, - std::set &PossibleStates) const { - assert((chkstage < numstages) && "AddInsnClassStages: stage out of range"); - unsigned thisStage = InsnClass[chkstage]; - - LLVM_DEBUG({ - dbgsIndent((1 + numstages - chkstage) << 1); - dbgs() << "AddInsnClassStages " << chkstage << " (0x" - << Twine::utohexstr(thisStage) << ") from "; - dbgsInsnClass(InsnClass); - dbgs() << "\n"; - }); - - // - // Iterate over all possible resources used in thisStage. - // For ex: for thisStage = 0x11, all resources = {0x01, 0x10}. - // - for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) { - unsigned resourceMask = (0x1 << j); - if (resourceMask & thisStage) { - unsigned combo = ComboBitToBitsMap[resourceMask]; - if (combo && ((~prevState & combo) != combo)) { - LLVM_DEBUG(dbgs() << "\tSkipped Add 0x" << Twine::utohexstr(prevState) - << " - combo op 0x" << Twine::utohexstr(resourceMask) - << " (0x" << Twine::utohexstr(combo) - << ") cannot be scheduled\n"); - continue; - } - // - // For each possible resource used in thisStage, generate the - // resource state if that resource was used. - // - unsigned ResultingResourceState = prevState | resourceMask | combo; - LLVM_DEBUG({ - dbgsIndent((2 + numstages - chkstage) << 1); - dbgs() << "0x" << Twine::utohexstr(prevState) << " | 0x" - << Twine::utohexstr(resourceMask); - if (combo) - dbgs() << " | 0x" << Twine::utohexstr(combo); - dbgs() << " = 0x" << Twine::utohexstr(ResultingResourceState) << " "; - }); - - // - // If this is the final stage for this class - // - if (chkstage == 0) { - // - // Check if the resulting resource state can be accommodated in this - // packet. - // We compute resource OR prevState (originally started as origState). - // If the result of the OR is different than origState, it implies - // that there is at least one resource that can be used to schedule - // thisStage in the current packet. - // Insert ResultingResourceState into PossibleStates only if we haven't - // processed ResultingResourceState before. - // - if (ResultingResourceState != prevState) { - if (VisitedResourceStates.count(ResultingResourceState) == 0) { - VisitedResourceStates.insert(ResultingResourceState); - PossibleStates.insert(ResultingResourceState); - LLVM_DEBUG(dbgs() - << "\tResultingResourceState: 0x" - << Twine::utohexstr(ResultingResourceState) << "\n"); - } else { - LLVM_DEBUG(dbgs() << "\tSkipped Add - state already seen\n"); - } - } else { - LLVM_DEBUG(dbgs() - << "\tSkipped Add - no final resources available\n"); - } - } else { - // - // If the current resource can be accommodated, check the next - // stage in InsnClass for available resources. - // - if (ResultingResourceState != prevState) { - LLVM_DEBUG(dbgs() << "\n"); - AddInsnClassStages(InsnClass, ComboBitToBitsMap, - chkstage - 1, numstages, - ResultingResourceState, origState, - VisitedResourceStates, PossibleStates); - } else { - LLVM_DEBUG(dbgs() << "\tSkipped Add - no resources available\n"); - } - } - } - } -} - -// -// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass -// may be a valid transition from this state i.e., can an instruction of type -// InsnClass be added to the packet represented by this state. -// -// Note that this routine is performing conservative checks that can be -// quickly executed acting as a filter before calling AddInsnClassStages. -// Any cases allowed through here will be caught later in AddInsnClassStages -// which performs the more expensive exact check. -// -bool State::canMaybeAddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap) const { - for (std::set::const_iterator SI = stateInfo.begin(); - SI != stateInfo.end(); ++SI) { - // Check to see if all required resources are available. - bool available = true; - - // Inspect each stage independently. - // note: This is a conservative check as we aren't checking for - // possible resource competition between the stages themselves - // The full cross product is examined later in AddInsnClass. - for (unsigned i = 0; i < InsnClass.size(); ++i) { - unsigned resources = *SI; - if ((~resources & InsnClass[i]) == 0) { - available = false; - break; - } - // Make sure _all_ resources for a combo function are available. - // note: This is a quick conservative check as it won't catch an - // unscheduleable combo if this stage is an OR expression - // containing a combo. - // These cases are caught later in AddInsnClass. - unsigned combo = ComboBitToBitsMap[InsnClass[i]]; - if (combo && ((~resources & combo) != combo)) { - LLVM_DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" - << Twine::utohexstr(resources) << " - combo op 0x" - << Twine::utohexstr(InsnClass[i]) << " (0x" - << Twine::utohexstr(combo) - << ") cannot be scheduled\n"); - available = false; - break; - } - } - - if (available) { - return true; - } - } - return false; -} - -const State &DFA::newState() { - auto IterPair = states.insert(State()); - assert(IterPair.second && "State already exists"); - return *IterPair.first; -} - -int State::currentStateNum = 0; - DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R): TargetName(CodeGenTarget(R).getName()), Records(R) {} -// -// writeTableAndAPI - Print out a table representing the DFA and the -// associated API to create a DFA packetizer. -// -// Format: -// DFAStateInputTable[][2] = pairs of for all valid -// transitions. -// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable for -// the ith state. -// -// -void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName, - int numInsnClasses, - int maxResources, int numCombos, int maxStages) { - unsigned numStates = states.size(); - - LLVM_DEBUG(dbgs() << "-------------------------------------------------------" - "----------------------\n"); - LLVM_DEBUG(dbgs() << "writeTableAndAPI\n"); - LLVM_DEBUG(dbgs() << "Total states: " << numStates << "\n"); - - OS << "namespace llvm {\n"; - - OS << "\n// Input format:\n"; - OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS - << "\t// maximum AND'ed resource terms\n"; - OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES - << "\t// maximum resource bits in one term\n"; - - OS << "\n// " << TargetName << "DFAStateInputTable[][2] = " - << "pairs of for all valid\n"; - OS << "// transitions.\n"; - OS << "// " << numStates << "\tstates\n"; - OS << "// " << numInsnClasses << "\tinstruction classes\n"; - OS << "// " << maxResources << "\tresources max\n"; - OS << "// " << numCombos << "\tcombo resources\n"; - OS << "// " << maxStages << "\tstages max\n"; - OS << "const " << DFA_TBLTYPE << " " - << TargetName << "DFAStateInputTable[][2] = {\n"; - - // This table provides a map to the beginning of the transitions for State s - // in DFAStateInputTable. - std::vector StateEntry(numStates+1); - static const std::string SentinelEntry = "{-1, -1}"; - - // Tracks the total valid transitions encountered so far. It is used - // to construct the StateEntry table. - int ValidTransitions = 0; - DFA::StateSet::iterator SI = states.begin(); - for (unsigned i = 0; i < numStates; ++i, ++SI) { - assert ((SI->stateNum == (int) i) && "Mismatch in state numbers"); - StateEntry[i] = ValidTransitions; - for (State::TransitionMap::iterator - II = SI->Transitions.begin(), IE = SI->Transitions.end(); - II != IE; ++II) { - OS << "{0x" << Twine::utohexstr(getDFAInsnInput(II->first)) << ", " - << II->second->stateNum << "},\t"; - } - ValidTransitions += SI->Transitions.size(); - - // If there are no valid transitions from this stage, we need a sentinel - // transition. - if (ValidTransitions == StateEntry[i]) { - OS << SentinelEntry << ",\t"; - ++ValidTransitions; - } - - OS << " // state " << i << ": " << StateEntry[i]; - if (StateEntry[i] != (ValidTransitions-1)) { // More than one transition. - OS << "-" << (ValidTransitions-1); - } - OS << "\n"; - } - - // Print out a sentinel entry at the end of the StateInputTable. This is - // needed to iterate over StateInputTable in DFAPacketizer::ReadTable() - OS << SentinelEntry << "\t"; - OS << " // state " << numStates << ": " << ValidTransitions; - OS << "\n"; - - OS << "};\n\n"; - OS << "// " << TargetName << "DFAStateEntryTable[i] = " - << "Index of the first entry in DFAStateInputTable for\n"; - OS << "// " - << "the ith state.\n"; - OS << "// " << numStates << " states\n"; - OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n"; - - // Multiply i by 2 since each entry in DFAStateInputTable is a set of - // two numbers. - unsigned lastState = 0; - for (unsigned i = 0; i < numStates; ++i) { - if (i && ((i % 10) == 0)) { - lastState = i-1; - OS << " // states " << (i-10) << ":" << lastState << "\n"; - } - OS << StateEntry[i] << ", "; - } - - // Print out the index to the sentinel entry in StateInputTable - OS << ValidTransitions << ", "; - OS << " // states " << (lastState+1) << ":" << numStates << "\n"; - - OS << "};\n"; - OS << "} // namespace\n"; - - // - // Emit DFA Packetizer tables if the target is a VLIW machine. - // - std::string SubTargetClassName = TargetName + "GenSubtargetInfo"; - OS << "\n" << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n"; - OS << "namespace llvm {\n"; - OS << "DFAPacketizer *" << SubTargetClassName << "::" - << "createDFAPacketizer(const InstrItineraryData *IID) const {\n" - << " return new DFAPacketizer(IID, " << TargetName - << "DFAStateInputTable, " << TargetName << "DFAStateEntryTable);\n}\n\n"; - OS << "} // End llvm namespace \n"; -} - // // collectAllFuncUnits - Construct a map of function unit names to bits. // @@ -837,10 +389,32 @@ int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName, // Run the worklist algorithm to generate the DFA. // void DFAPacketizerEmitter::run(raw_ostream &OS) { + OS << "\n" + << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n"; + OS << "namespace llvm {\n"; + + OS << "\n// Input format:\n"; + OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS + << "\t// maximum AND'ed resource terms\n"; + OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES + << "\t// maximum resource bits in one term\n"; + // Collect processor iteraries. std::vector ProcItinList = Records.getAllDerivedDefinitions("ProcessorItineraries"); + std::unordered_map> ItinsByNamespace; + for (Record *R : ProcItinList) + ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R); + + for (auto &KV : ItinsByNamespace) + emitForItineraries(OS, KV.second, KV.first); + OS << "} // end namespace llvm\n"; +} + +void DFAPacketizerEmitter::emitForItineraries( + raw_ostream &OS, std::vector &ProcItinList, + std::string DFAName) { // // Collect the Functional units. // @@ -855,8 +429,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { std::map ComboBitToBitsMap; std::vector ComboFuncList = Records.getAllDerivedDefinitions("ComboFuncUnits"); - int numCombos = collectAllComboFuncs(ComboFuncList, - FUNameToBitsMap, ComboBitToBitsMap, OS); + collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS); // // Collect the itineraries. @@ -887,103 +460,89 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { FUNameToBitsMap, ItinDataList, maxStages, OS); } - // - // Run a worklist algorithm to generate the DFA. - // - DFA D; - const State *Initial = &D.newState(); - Initial->isInitial = true; - Initial->stateInfo.insert(0x0); - SmallVector WorkList; - std::map, const State*> Visited; - - WorkList.push_back(Initial); - - // - // Worklist algorithm to create a DFA for processor resource tracking. - // C = {set of InsnClasses} - // Begin with initial node in worklist. Initial node does not have - // any consumed resources, - // ResourceState = 0x0 - // Visited = {} - // While worklist != empty - // S = first element of worklist - // For every instruction class C - // if we can accommodate C in S: - // S' = state with resource states = {S Union C} - // Add a new transition: S x C -> S' - // If S' is not in Visited: - // Add S' to worklist - // Add S' to Visited - // - while (!WorkList.empty()) { - const State *current = WorkList.pop_back_val(); - LLVM_DEBUG({ - dbgs() << "---------------------\n"; - dbgs() << "Processing state: " << current->stateNum << " - "; - dbgsStateInfo(current->stateInfo); - dbgs() << "\n"; - }); - for (unsigned i = 0; i < allInsnClasses.size(); i++) { - std::vector InsnClass = allInsnClasses[i]; - LLVM_DEBUG({ - dbgs() << i << " "; - dbgsInsnClass(InsnClass); - dbgs() << "\n"; - }); - - std::set NewStateResources; - // - // If we haven't already created a transition for this input - // and the state can accommodate this InsnClass, create a transition. - // - if (!current->hasTransition(InsnClass) && - current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) { - const State *NewState = nullptr; - current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources); - if (NewStateResources.empty()) { - LLVM_DEBUG(dbgs() << " Skipped - no new states generated\n"); - continue; - } - - LLVM_DEBUG({ - dbgs() << "\t"; - dbgsStateInfo(NewStateResources); - dbgs() << "\n"; - }); - - // - // If we have seen this state before, then do not create a new state. - // - auto VI = Visited.find(NewStateResources); - if (VI != Visited.end()) { - NewState = VI->second; - LLVM_DEBUG({ - dbgs() << "\tFound existing state: " << NewState->stateNum - << " - "; - dbgsStateInfo(NewState->stateInfo); - dbgs() << "\n"; - }); - } else { - NewState = &D.newState(); - NewState->stateInfo = NewStateResources; - Visited[NewStateResources] = NewState; - WorkList.push_back(NewState); - LLVM_DEBUG({ - dbgs() << "\tAccepted new state: " << NewState->stateNum << " - "; - dbgsStateInfo(NewState->stateInfo); - dbgs() << "\n"; - }); + // The type of a state in the nondeterministic automaton we're defining. + using NfaStateTy = unsigned; + + // Given a resource state, return all resource states by applying + // InsnClass. + auto applyInsnClass = [&](ArrayRef InsnClass, + NfaStateTy State) -> std::deque { + std::deque V(1, State); + // Apply every stage in the class individually. + for (unsigned Stage : InsnClass) { + // Apply this stage to every existing member of V in turn. + size_t Sz = V.size(); + for (unsigned I = 0; I < Sz; ++I) { + unsigned S = V.front(); + V.pop_front(); + + // For this stage, state combination, try all possible resources. + for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) { + unsigned ResourceMask = 1U << J; + if ((ResourceMask & Stage) == 0) + // This resource isn't required by this stage. + continue; + unsigned Combo = ComboBitToBitsMap[ResourceMask]; + if (Combo && ((~S & Combo) != Combo)) + // This combo units bits are not available. + continue; + unsigned ResultingResourceState = S | ResourceMask | Combo; + if (ResultingResourceState == S) + continue; + V.push_back(ResultingResourceState); } - - current->addTransition(InsnClass, NewState); + } + } + return V; + }; + + // Given a resource state, return a quick (conservative) guess as to whether + // InsnClass can be applied. This is a filter for the more heavyweight + // applyInsnClass. + auto canApplyInsnClass = [](ArrayRef InsnClass, + NfaStateTy State) -> bool { + for (unsigned Resources : InsnClass) { + if ((State | Resources) == State) + return false; + } + return true; + }; + + DfaEmitter Emitter; + std::deque Worklist(1, 0); + std::set SeenStates; + SeenStates.insert(Worklist.front()); + while (!Worklist.empty()) { + NfaStateTy State = Worklist.front(); + Worklist.pop_front(); + for (unsigned i = 0; i < allInsnClasses.size(); i++) { + const std::vector &InsnClass = allInsnClasses[i]; + if (!canApplyInsnClass(InsnClass, State)) + continue; + for (unsigned NewState : applyInsnClass(InsnClass, State)) { + if (SeenStates.emplace(NewState).second) + Worklist.emplace_back(NewState); + Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass)); } } } - // Print out the table. - D.writeTableAndAPI(OS, TargetName, - numInsnClasses, maxResources, numCombos, maxStages); + OS << "} // end namespace llvm\n\n"; + OS << "namespace {\n"; + std::string TargetAndDFAName = TargetName + DFAName; + Emitter.emit(TargetAndDFAName, OS); + OS << "} // end anonymous namespace\n\n"; + + std::string SubTargetClassName = TargetName + "GenSubtargetInfo"; + OS << "namespace llvm {\n"; + OS << "DFAPacketizer *" << SubTargetClassName << "::" + << "create" << DFAName + << "DFAPacketizer(const InstrItineraryData *IID) const {\n" + << " static Automaton A(ArrayRef<" << TargetAndDFAName + << "Transition>(" << TargetAndDFAName << "Transitions), " + << TargetAndDFAName << "TransitionInfo);\n" + << " return new DFAPacketizer(IID, A);\n" + << "\n}\n\n"; } namespace llvm { diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp index 9e75c7fba77b..0002b0e14db6 100644 --- a/utils/TableGen/DisassemblerEmitter.cpp +++ b/utils/TableGen/DisassemblerEmitter.cpp @@ -153,4 +153,4 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { "MCDisassembler::Success", "MCDisassembler::Fail", ""); } -} // End llvm namespace +} // end namespace llvm diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp index f5e975d2e5ae..ac69b431607d 100644 --- a/utils/TableGen/FixedLenDecoderEmitter.cpp +++ b/utils/TableGen/FixedLenDecoderEmitter.cpp @@ -13,6 +13,7 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" +#include "InfoByHwMode.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/CachedHashString.h" @@ -64,9 +65,10 @@ struct OperandInfo { std::vector Fields; std::string Decoder; bool HasCompleteDecoder; + uint64_t InitValue; OperandInfo(std::string D, bool HCD) - : Decoder(std::move(D)), HasCompleteDecoder(HCD) {} + : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} void addField(unsigned Base, unsigned Width, unsigned Offset) { Fields.push_back(EncodingField(Base, Width, Offset)); @@ -96,9 +98,11 @@ struct DecoderTableInfo { struct EncodingAndInst { const Record *EncodingDef; const CodeGenInstruction *Inst; + StringRef HwModeName; - EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst) - : EncodingDef(EncodingDef), Inst(Inst) {} + EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, + StringRef HwModeName = "") + : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} }; struct EncodingIDAndOpcode { @@ -599,7 +603,7 @@ void Filter::recurse() { // Delegates to an inferior filter chooser for further processing on this // group of instructions whose segment values are variable. FilterChooserMap.insert( - std::make_pair(-1U, llvm::make_unique( + std::make_pair(-1U, std::make_unique( Owner->AllInstructions, VariableInstructions, Owner->Operands, BitValueArray, *Owner))); } @@ -625,7 +629,7 @@ void Filter::recurse() { // Delegates to an inferior filter chooser for further processing on this // category of instructions. FilterChooserMap.insert(std::make_pair( - Inst.first, llvm::make_unique( + Inst.first, std::make_unique( Owner->AllInstructions, Inst.second, Owner->Operands, BitValueArray, *Owner))); } @@ -1103,12 +1107,15 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, bool &OpHasCompleteDecoder) const { const std::string &Decoder = OpInfo.Decoder; - if (OpInfo.numFields() != 1) - o.indent(Indentation) << "tmp = 0;\n"; + if (OpInfo.numFields() != 1 || OpInfo.InitValue != 0) { + o.indent(Indentation) << "tmp = 0x"; + o.write_hex(OpInfo.InitValue); + o << ";\n"; + } for (const EncodingField &EF : OpInfo) { o.indent(Indentation) << "tmp "; - if (OpInfo.numFields() != 1) o << '|'; + if (OpInfo.numFields() != 1 || OpInfo.InitValue != 0) o << '|'; o << "= fieldFromInstruction" << "(insn, " << EF.Base << ", " << EF.Width << ')'; if (OpInfo.numFields() != 1 || EF.Offset != 0) @@ -2026,6 +2033,16 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, HasCompleteDecoderBit->getValue() : true; OperandInfo OpInfo(Decoder, HasCompleteDecoder); + + // Some bits of the operand may be required to be 1 depending on the + // instruction's encoding. Collect those bits. + if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second)) + if (const BitsInit *OpBits = dyn_cast(EncodedValue->getValue())) + for (unsigned I = 0; I < OpBits->getNumBits(); ++I) + if (const BitInit *OpBit = dyn_cast(OpBits->getBit(I))) + if (OpBit->getValue()) + OpInfo.InitValue |= 1ULL << I; + unsigned Base = ~0U; unsigned Width = 0; unsigned Offset = 0; @@ -2368,12 +2385,50 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) { Target.reverseBitsForLittleEndianEncoding(); // Parameterize the decoders based on namespace and instruction width. + std::set HwModeNames; const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); NumberedEncodings.reserve(NumberedInstructions.size()); DenseMap IndexOfInstruction; + // First, collect all HwModes referenced by the target. for (const auto &NumberedInstruction : NumberedInstructions) { IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); - NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction); + + if (const RecordVal *RV = + NumberedInstruction->TheDef->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM.Map) + HwModeNames.insert(HWM.getMode(KV.first).Name); + } + } + } + + // If HwModeNames is empty, add the empty string so we always have one HwMode. + if (HwModeNames.empty()) + HwModeNames.insert(""); + + for (const auto &NumberedInstruction : NumberedInstructions) { + IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); + + if (const RecordVal *RV = + NumberedInstruction->TheDef->getValue("EncodingInfos")) { + if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM.Map) { + NumberedEncodings.emplace_back(KV.second, NumberedInstruction, + HWM.getMode(KV.first).Name); + HwModeNames.insert(HWM.getMode(KV.first).Name); + } + continue; + } + } + // This instruction is encoded the same on all HwModes. Emit it for all + // HwModes. + for (StringRef HwModeName : HwModeNames) + NumberedEncodings.emplace_back(NumberedInstruction->TheDef, + NumberedInstruction, HwModeName); } for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding")) NumberedEncodings.emplace_back( @@ -2401,13 +2456,19 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) { NumInstructions++; NumEncodings++; - StringRef DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace"); + if (!Size) + continue; - if (Size) { - if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) { - OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back(i, IndexOfInstruction.find(Def)->second); - } else - NumEncodingsOmitted++; + if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) { + std::string DecoderNamespace = + EncodingDef->getValueAsString("DecoderNamespace"); + if (!NumberedEncodings[i].HwModeName.empty()) + DecoderNamespace += + std::string("_") + NumberedEncodings[i].HwModeName.str(); + OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back( + i, IndexOfInstruction.find(Def)->second); + } else { + NumEncodingsOmitted++; } } @@ -2451,7 +2512,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) { // Emit the main entry point for the decoder, decodeInstruction(). emitDecodeInstruction(OS); - OS << "\n} // End llvm namespace\n"; + OS << "\n} // end namespace llvm\n"; } namespace llvm { diff --git a/utils/TableGen/GICombinerEmitter.cpp b/utils/TableGen/GICombinerEmitter.cpp new file mode 100644 index 000000000000..5dc4d6b07740 --- /dev/null +++ b/utils/TableGen/GICombinerEmitter.cpp @@ -0,0 +1,452 @@ +//===- GlobalCombinerEmitter.cpp - Generate a combiner --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Generate a combiner implementation for GlobalISel from a declarative +/// syntax +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Timer.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/StringMatcher.h" +#include "llvm/TableGen/TableGenBackend.h" +#include "CodeGenTarget.h" +#include "GlobalISel/CodeExpander.h" +#include "GlobalISel/CodeExpansions.h" + +using namespace llvm; + +#define DEBUG_TYPE "gicombiner-emitter" + +// FIXME: Use ALWAYS_ENABLED_STATISTIC once it's available. +unsigned NumPatternTotal = 0; +STATISTIC(NumPatternTotalStatistic, "Total number of patterns"); + +cl::OptionCategory + GICombinerEmitterCat("Options for -gen-global-isel-combiner"); +static cl::list + SelectedCombiners("combiners", cl::desc("Emit the specified combiners"), + cl::cat(GICombinerEmitterCat), cl::CommaSeparated); +static cl::opt ShowExpansions( + "gicombiner-show-expansions", + cl::desc("Use C++ comments to indicate occurence of code expansion"), + cl::cat(GICombinerEmitterCat)); + +namespace { +typedef uint64_t RuleID; + +class RootInfo { + StringRef PatternSymbol; + +public: + RootInfo(StringRef PatternSymbol) : PatternSymbol(PatternSymbol) {} + + StringRef getPatternSymbol() const { return PatternSymbol; } +}; + +class CombineRule { +protected: + /// A unique ID for this rule + /// ID's are used for debugging and run-time disabling of rules among other + /// things. + RuleID ID; + + /// The record defining this rule. + const Record &TheDef; + + /// The roots of a match. These are the leaves of the DAG that are closest to + /// the end of the function. I.e. the nodes that are encountered without + /// following any edges of the DAG described by the pattern as we work our way + /// from the bottom of the function to the top. + std::vector Roots; + + /// A block of arbitrary C++ to finish testing the match. + /// FIXME: This is a temporary measure until we have actual pattern matching + const CodeInit *MatchingFixupCode = nullptr; +public: + CombineRule(const CodeGenTarget &Target, RuleID ID, const Record &R) + : ID(ID), TheDef(R) {} + bool parseDefs(); + bool parseMatcher(const CodeGenTarget &Target); + + RuleID getID() const { return ID; } + StringRef getName() const { return TheDef.getName(); } + const Record &getDef() const { return TheDef; } + const CodeInit *getMatchingFixupCode() const { return MatchingFixupCode; } + size_t getNumRoots() const { return Roots.size(); } + + using const_root_iterator = std::vector::const_iterator; + const_root_iterator roots_begin() const { return Roots.begin(); } + const_root_iterator roots_end() const { return Roots.end(); } + iterator_range roots() const { + return llvm::make_range(Roots.begin(), Roots.end()); + } +}; + +/// A convenience function to check that an Init refers to a specific def. This +/// is primarily useful for testing for defs and similar in DagInit's since +/// DagInit's support any type inside them. +static bool isSpecificDef(const Init &N, StringRef Def) { + if (const DefInit *OpI = dyn_cast(&N)) + if (OpI->getDef()->getName() == Def) + return true; + return false; +} + +/// A convenience function to check that an Init refers to a def that is a +/// subclass of the given class and coerce it to a def if it is. This is +/// primarily useful for testing for subclasses of GIMatchKind and similar in +/// DagInit's since DagInit's support any type inside them. +static Record *getDefOfSubClass(const Init &N, StringRef Cls) { + if (const DefInit *OpI = dyn_cast(&N)) + if (OpI->getDef()->isSubClassOf(Cls)) + return OpI->getDef(); + return nullptr; +} + +bool CombineRule::parseDefs() { + NamedRegionTimer T("parseDefs", "Time spent parsing the defs", "Rule Parsing", + "Time spent on rule parsing", TimeRegions); + DagInit *Defs = TheDef.getValueAsDag("Defs"); + + if (Defs->getOperatorAsDef(TheDef.getLoc())->getName() != "defs") { + PrintError(TheDef.getLoc(), "Expected defs operator"); + return false; + } + + for (unsigned I = 0, E = Defs->getNumArgs(); I < E; ++I) { + // Roots should be collected into Roots + if (isSpecificDef(*Defs->getArg(I), "root")) { + Roots.emplace_back(Defs->getArgNameStr(I)); + continue; + } + + // Otherwise emit an appropriate error message. + if (getDefOfSubClass(*Defs->getArg(I), "GIDefKind")) + PrintError(TheDef.getLoc(), + "This GIDefKind not implemented in tablegen"); + else if (getDefOfSubClass(*Defs->getArg(I), "GIDefKindWithArgs")) + PrintError(TheDef.getLoc(), + "This GIDefKindWithArgs not implemented in tablegen"); + else + PrintError(TheDef.getLoc(), + "Expected a subclass of GIDefKind or a sub-dag whose " + "operator is of type GIDefKindWithArgs"); + return false; + } + + if (Roots.empty()) { + PrintError(TheDef.getLoc(), "Combine rules must have at least one root"); + return false; + } + return true; +} + +bool CombineRule::parseMatcher(const CodeGenTarget &Target) { + NamedRegionTimer T("parseMatcher", "Time spent parsing the matcher", + "Rule Parsing", "Time spent on rule parsing", TimeRegions); + DagInit *Matchers = TheDef.getValueAsDag("Match"); + + if (Matchers->getOperatorAsDef(TheDef.getLoc())->getName() != "match") { + PrintError(TheDef.getLoc(), "Expected match operator"); + return false; + } + + if (Matchers->getNumArgs() == 0) { + PrintError(TheDef.getLoc(), "Matcher is empty"); + return false; + } + + // The match section consists of a list of matchers and predicates. Parse each + // one and add the equivalent GIMatchDag nodes, predicates, and edges. + for (unsigned I = 0; I < Matchers->getNumArgs(); ++I) { + + // Parse arbitrary C++ code we have in lieu of supporting MIR matching + if (const CodeInit *CodeI = dyn_cast(Matchers->getArg(I))) { + assert(!MatchingFixupCode && + "Only one block of arbitrary code is currently permitted"); + MatchingFixupCode = CodeI; + continue; + } + + PrintError(TheDef.getLoc(), + "Expected a subclass of GIMatchKind or a sub-dag whose " + "operator is either of a GIMatchKindWithArgs or Instruction"); + PrintNote("Pattern was `" + Matchers->getArg(I)->getAsString() + "'"); + return false; + } + return true; +} + +class GICombinerEmitter { + StringRef Name; + const CodeGenTarget &Target; + Record *Combiner; + std::vector> Rules; + std::unique_ptr makeCombineRule(const Record &R); + + void gatherRules(std::vector> &ActiveRules, + const std::vector &&RulesAndGroups); + +public: + explicit GICombinerEmitter(RecordKeeper &RK, const CodeGenTarget &Target, + StringRef Name, Record *Combiner); + ~GICombinerEmitter() {} + + StringRef getClassName() const { + return Combiner->getValueAsString("Classname"); + } + void run(raw_ostream &OS); + + /// Emit the name matcher (guarded by #ifndef NDEBUG) used to disable rules in + /// response to the generated cl::opt. + void emitNameMatcher(raw_ostream &OS) const; + void generateCodeForRule(raw_ostream &OS, const CombineRule *Rule, + StringRef Indent) const; +}; + +GICombinerEmitter::GICombinerEmitter(RecordKeeper &RK, + const CodeGenTarget &Target, + StringRef Name, Record *Combiner) + : Name(Name), Target(Target), Combiner(Combiner) {} + +void GICombinerEmitter::emitNameMatcher(raw_ostream &OS) const { + std::vector> Cases; + Cases.reserve(Rules.size()); + + for (const CombineRule &EnumeratedRule : make_pointee_range(Rules)) { + std::string Code; + raw_string_ostream SS(Code); + SS << "return " << EnumeratedRule.getID() << ";\n"; + Cases.push_back(std::make_pair(EnumeratedRule.getName(), SS.str())); + } + + OS << "static Optional getRuleIdxForIdentifier(StringRef " + "RuleIdentifier) {\n" + << " uint64_t I;\n" + << " // getAtInteger(...) returns false on success\n" + << " bool Parsed = !RuleIdentifier.getAsInteger(0, I);\n" + << " if (Parsed)\n" + << " return I;\n\n" + << "#ifndef NDEBUG\n"; + StringMatcher Matcher("RuleIdentifier", Cases, OS); + Matcher.Emit(); + OS << "#endif // ifndef NDEBUG\n\n" + << " return None;\n" + << "}\n"; +} + +std::unique_ptr +GICombinerEmitter::makeCombineRule(const Record &TheDef) { + std::unique_ptr Rule = + std::make_unique(Target, NumPatternTotal, TheDef); + + if (!Rule->parseDefs()) + return nullptr; + if (!Rule->parseMatcher(Target)) + return nullptr; + // For now, don't support multi-root rules. We'll come back to this later + // once we have the algorithm changes to support it. + if (Rule->getNumRoots() > 1) { + PrintError(TheDef.getLoc(), "Multi-root matches are not supported (yet)"); + return nullptr; + } + return Rule; +} + +/// Recurse into GICombineGroup's and flatten the ruleset into a simple list. +void GICombinerEmitter::gatherRules( + std::vector> &ActiveRules, + const std::vector &&RulesAndGroups) { + for (Record *R : RulesAndGroups) { + if (R->isValueUnset("Rules")) { + std::unique_ptr Rule = makeCombineRule(*R); + if (Rule == nullptr) { + PrintError(R->getLoc(), "Failed to parse rule"); + continue; + } + ActiveRules.emplace_back(std::move(Rule)); + ++NumPatternTotal; + } else + gatherRules(ActiveRules, R->getValueAsListOfDefs("Rules")); + } +} + +void GICombinerEmitter::generateCodeForRule(raw_ostream &OS, + const CombineRule *Rule, + StringRef Indent) const { + { + const Record &RuleDef = Rule->getDef(); + + OS << Indent << "// Rule: " << RuleDef.getName() << "\n" + << Indent << "if (!isRuleDisabled(" << Rule->getID() << ")) {\n"; + + CodeExpansions Expansions; + for (const RootInfo &Root : Rule->roots()) { + Expansions.declare(Root.getPatternSymbol(), "MI"); + } + DagInit *Applyer = RuleDef.getValueAsDag("Apply"); + if (Applyer->getOperatorAsDef(RuleDef.getLoc())->getName() != + "apply") { + PrintError(RuleDef.getLoc(), "Expected apply operator"); + return; + } + + OS << Indent << " if (1\n"; + + if (Rule->getMatchingFixupCode() && + !Rule->getMatchingFixupCode()->getValue().empty()) { + // FIXME: Single-use lambda's like this are a serious compile-time + // performance and memory issue. It's convenient for this early stage to + // defer some work to successive patches but we need to eliminate this + // before the ruleset grows to small-moderate size. Last time, it became + // a big problem for low-mem systems around the 500 rule mark but by the + // time we grow that large we should have merged the ISel match table + // mechanism with the Combiner. + OS << Indent << " && [&]() {\n" + << Indent << " " + << CodeExpander(Rule->getMatchingFixupCode()->getValue(), Expansions, + Rule->getMatchingFixupCode()->getLoc(), ShowExpansions) + << "\n" + << Indent << " return true;\n" + << Indent << " }()"; + } + OS << ") {\n" << Indent << " "; + + if (const CodeInit *Code = dyn_cast(Applyer->getArg(0))) { + OS << CodeExpander(Code->getAsUnquotedString(), Expansions, + Code->getLoc(), ShowExpansions) + << "\n" + << Indent << " return true;\n" + << Indent << " }\n"; + } else { + PrintError(RuleDef.getLoc(), "Expected apply code block"); + return; + } + + OS << Indent << "}\n"; + } +} + +void GICombinerEmitter::run(raw_ostream &OS) { + gatherRules(Rules, Combiner->getValueAsListOfDefs("Rules")); + NamedRegionTimer T("Emit", "Time spent emitting the combiner", + "Code Generation", "Time spent generating code", + TimeRegions); + OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_DEPS\n" + << "#include \"llvm/ADT/SparseBitVector.h\"\n" + << "namespace llvm {\n" + << "extern cl::OptionCategory GICombinerOptionCategory;\n" + << "} // end namespace llvm\n" + << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_DEPS\n\n"; + + OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_H\n" + << "class " << getClassName() << " {\n" + << " SparseBitVector<> DisabledRules;\n" + << "\n" + << "public:\n" + << " bool parseCommandLineOption();\n" + << " bool isRuleDisabled(unsigned ID) const;\n" + << " bool setRuleDisabled(StringRef RuleIdentifier);\n" + << "\n" + << " bool tryCombineAll(\n" + << " GISelChangeObserver &Observer,\n" + << " MachineInstr &MI,\n" + << " MachineIRBuilder &B) const;\n" + << "};\n\n"; + + emitNameMatcher(OS); + + OS << "bool " << getClassName() + << "::setRuleDisabled(StringRef RuleIdentifier) {\n" + << " std::pair RangePair = " + "RuleIdentifier.split('-');\n" + << " if (!RangePair.second.empty()) {\n" + << " const auto First = getRuleIdxForIdentifier(RangePair.first);\n" + << " const auto Last = getRuleIdxForIdentifier(RangePair.second);\n" + << " if (!First.hasValue() || !Last.hasValue())\n" + << " return false;\n" + << " if (First >= Last)\n" + << " report_fatal_error(\"Beginning of range should be before end of " + "range\");\n" + << " for (auto I = First.getValue(); I < Last.getValue(); ++I)\n" + << " DisabledRules.set(I);\n" + << " return true;\n" + << " } else {\n" + << " const auto I = getRuleIdxForIdentifier(RangePair.first);\n" + << " if (!I.hasValue())\n" + << " return false;\n" + << " DisabledRules.set(I.getValue());\n" + << " return true;\n" + << " }\n" + << " return false;\n" + << "}\n"; + + OS << "bool " << getClassName() + << "::isRuleDisabled(unsigned RuleID) const {\n" + << " return DisabledRules.test(RuleID);\n" + << "}\n"; + OS << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_H\n\n"; + + OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_CPP\n" + << "\n" + << "cl::list " << Name << "Option(\n" + << " \"" << Name.lower() << "-disable-rule\",\n" + << " cl::desc(\"Disable one or more combiner rules temporarily in " + << "the " << Name << " pass\"),\n" + << " cl::CommaSeparated,\n" + << " cl::Hidden,\n" + << " cl::cat(GICombinerOptionCategory));\n" + << "\n" + << "bool " << getClassName() << "::parseCommandLineOption() {\n" + << " for (const auto &Identifier : " << Name << "Option)\n" + << " if (!setRuleDisabled(Identifier))\n" + << " return false;\n" + << " return true;\n" + << "}\n\n"; + + OS << "bool " << getClassName() << "::tryCombineAll(\n" + << " GISelChangeObserver &Observer,\n" + << " MachineInstr &MI,\n" + << " MachineIRBuilder &B) const {\n" + << " CombinerHelper Helper(Observer, B);\n" + << " MachineBasicBlock *MBB = MI.getParent();\n" + << " MachineFunction *MF = MBB->getParent();\n" + << " MachineRegisterInfo &MRI = MF->getRegInfo();\n" + << " (void)MBB; (void)MF; (void)MRI;\n\n"; + + for (const auto &Rule : Rules) + generateCodeForRule(OS, Rule.get(), " "); + OS << "\n return false;\n" + << "}\n" + << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_CPP\n"; +} + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// + +namespace llvm { +void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) { + CodeGenTarget Target(RK); + emitSourceFileHeader("Global Combiner", OS); + + if (SelectedCombiners.empty()) + PrintFatalError("No combiners selected with -combiners"); + for (const auto &Combiner : SelectedCombiners) { + Record *CombinerDef = RK.getDef(Combiner); + if (!CombinerDef) + PrintFatalError("Could not find " + Combiner); + GICombinerEmitter(RK, Target, Combiner, CombinerDef).run(OS); + } + NumPatternTotalStatistic = NumPatternTotal; +} + +} // namespace llvm diff --git a/utils/TableGen/GlobalISel/CMakeLists.txt b/utils/TableGen/GlobalISel/CMakeLists.txt new file mode 100644 index 000000000000..2f74d1087bcd --- /dev/null +++ b/utils/TableGen/GlobalISel/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +llvm_add_library(LLVMTableGenGlobalISel STATIC DISABLE_LLVM_LINK_LLVM_DYLIB + CodeExpander.cpp + ) diff --git a/utils/TableGen/GlobalISel/CodeExpander.cpp b/utils/TableGen/GlobalISel/CodeExpander.cpp new file mode 100644 index 000000000000..d59a9b8e3b65 --- /dev/null +++ b/utils/TableGen/GlobalISel/CodeExpander.cpp @@ -0,0 +1,93 @@ +//===- CodeExpander.cpp - Expand variables in a string --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Expand the variables in a string. +// +//===----------------------------------------------------------------------===// + +#include "CodeExpander.h" +#include "CodeExpansions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Error.h" + +using namespace llvm; + +void CodeExpander::emit(raw_ostream &OS) const { + StringRef Current = Code; + + while (!Current.empty()) { + size_t Pos = Current.find_first_of("$\n\\"); + if (Pos == StringRef::npos) { + OS << Current; + Current = ""; + continue; + } + + OS << Current.substr(0, Pos); + Current = Current.substr(Pos); + + if (Current.startswith("\n")) { + OS << "\n" << Indent; + Current = Current.drop_front(1); + continue; + } + + if (Current.startswith("\\$") || Current.startswith("\\\\")) { + OS << Current[1]; + Current = Current.drop_front(2); + continue; + } + + if (Current.startswith("\\")) { + Current = Current.drop_front(1); + continue; + } + + if (Current.startswith("${")) { + StringRef StartVar = Current; + Current = Current.drop_front(2); + StringRef Var; + std::tie(Var, Current) = Current.split("}"); + + // Warn if we split because no terminator was found. + StringRef EndVar = StartVar.drop_front(2 /* ${ */ + Var.size()); + if (EndVar.empty()) { + size_t LocOffset = StartVar.data() - Code.data(); + PrintWarning( + Loc.size() > 0 && Loc[0].isValid() + ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset) + : SMLoc(), + "Unterminated expansion"); + } + + auto ValueI = Expansions.find(Var); + if (ValueI == Expansions.end()) { + size_t LocOffset = StartVar.data() - Code.data(); + PrintError(Loc.size() > 0 && Loc[0].isValid() + ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset) + : SMLoc(), + "Attempting to expand an undeclared variable " + Var); + } + if (ShowExpansions) + OS << "/*$" << Var << "{*/"; + OS << Expansions.lookup(Var); + if (ShowExpansions) + OS << "/*}*/"; + continue; + } + + size_t LocOffset = Current.data() - Code.data(); + PrintWarning(Loc.size() > 0 && Loc[0].isValid() + ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset) + : SMLoc(), + "Assuming missing escape character"); + OS << "$"; + Current = Current.drop_front(1); + } +} diff --git a/utils/TableGen/GlobalISel/CodeExpander.h b/utils/TableGen/GlobalISel/CodeExpander.h new file mode 100644 index 000000000000..bd6946de5925 --- /dev/null +++ b/utils/TableGen/GlobalISel/CodeExpander.h @@ -0,0 +1,55 @@ +//===- CodeExpander.h - Expand variables in a string ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Expand the variables in a string. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_CODEEXPANDER_H +#define LLVM_UTILS_TABLEGEN_CODEEXPANDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/SMLoc.h" + +namespace llvm { +class CodeExpansions; +class raw_ostream; + +/// Emit the given code with all '${foo}' placeholders expanded to their +/// replacements. +/// +/// It's an error to use an undefined expansion and expansion-like output that +/// needs to be emitted verbatim can be escaped as '\${foo}' +/// +/// The emitted code can be given a custom indent to enable both indentation by +/// an arbitrary amount of whitespace and emission of the code as a comment. +class CodeExpander { + StringRef Code; + const CodeExpansions &Expansions; + const ArrayRef &Loc; + bool ShowExpansions; + StringRef Indent; + +public: + CodeExpander(StringRef Code, const CodeExpansions &Expansions, + const ArrayRef &Loc, bool ShowExpansions, + StringRef Indent = " ") + : Code(Code), Expansions(Expansions), Loc(Loc), + ShowExpansions(ShowExpansions), Indent(Indent) {} + + void emit(raw_ostream &OS) const; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const CodeExpander &Expander) { + Expander.emit(OS); + return OS; +} +} // end namespace llvm + +#endif // ifndef LLVM_UTILS_TABLEGEN_CODEEXPANDER_H diff --git a/utils/TableGen/GlobalISel/CodeExpansions.h b/utils/TableGen/GlobalISel/CodeExpansions.h new file mode 100644 index 000000000000..bb890ec8f57e --- /dev/null +++ b/utils/TableGen/GlobalISel/CodeExpansions.h @@ -0,0 +1,43 @@ +//===- CodeExpansions.h - Record expansions for CodeExpander --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Record the expansions to use in a CodeExpander. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" + +#ifndef LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H +#define LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H +namespace llvm { +class CodeExpansions { +public: + using const_iterator = StringMap::const_iterator; + +protected: + StringMap Expansions; + +public: + void declare(StringRef Name, StringRef Expansion) { + bool Inserted = Expansions.try_emplace(Name, Expansion).second; + assert(Inserted && "Declared variable twice"); + (void)Inserted; + } + + std::string lookup(StringRef Variable) const { + return Expansions.lookup(Variable); + } + + const_iterator begin() const { return Expansions.begin(); } + const_iterator end() const { return Expansions.end(); } + const_iterator find(StringRef Variable) const { + return Expansions.find(Variable); + } +}; +} // end namespace llvm +#endif // ifndef LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp index f1c02134198b..d8d4c9f4f55c 100644 --- a/utils/TableGen/GlobalISelEmitter.cpp +++ b/utils/TableGen/GlobalISelEmitter.cpp @@ -249,6 +249,10 @@ static std::string explainPredicates(const TreePatternNode *N) { OS << ']'; } + int64_t MinAlign = P.getMinAlignment(); + if (MinAlign > 0) + Explanation += " MinAlign=" + utostr(MinAlign); + if (P.isAtomicOrderingMonotonic()) Explanation += " monotonic"; if (P.isAtomicOrderingAcquire()) @@ -329,6 +333,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) { const ListInit *AddrSpaces = Predicate.getAddressSpaces(); if (AddrSpaces && !AddrSpaces->empty()) continue; + + if (Predicate.getMinAlignment() > 0) + continue; } if (Predicate.isAtomic() && Predicate.getMemoryVT()) @@ -822,6 +829,10 @@ protected: /// the renderers. StringMap DefinedOperands; + /// A map of anonymous physical register operands defined by the matchers that + /// may be referenced by the renderers. + DenseMap PhysRegOperands; + /// ID for the next instruction variable defined with implicitlyDefineInsnVar() unsigned NextInsnVarID; @@ -904,6 +915,8 @@ public: void defineOperand(StringRef SymbolicName, OperandMatcher &OM); + void definePhysRegOperand(Record *Reg, OperandMatcher &OM); + Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern, unsigned RendererID, unsigned SubOperandID) { if (ComplexSubOperands.count(SymbolicName)) @@ -927,6 +940,7 @@ public: InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const; const OperandMatcher &getOperandMatcher(StringRef Name) const; + const OperandMatcher &getPhysRegOperandMatcher(Record *) const; void optimize() override; void emit(MatchTable &Table) override; @@ -1048,14 +1062,17 @@ public: IPM_Opcode, IPM_NumOperands, IPM_ImmPredicate, + IPM_Imm, IPM_AtomicOrderingMMO, IPM_MemoryLLTSize, IPM_MemoryVsLLTSize, IPM_MemoryAddressSpace, + IPM_MemoryAlignment, IPM_GenericPredicate, OPM_SameOperand, OPM_ComplexPattern, OPM_IntrinsicID, + OPM_CmpPredicate, OPM_Instruction, OPM_Int, OPM_LiteralInt, @@ -1324,6 +1341,23 @@ public: } }; +class ImmOperandMatcher : public OperandPredicateMatcher { +public: + ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx) + : OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {} + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_Imm; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI") + << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op") + << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak; + } +}; + /// Generates code to check that an operand is a G_CONSTANT with a particular /// int. class ConstantIntOperandMatcher : public OperandPredicateMatcher { @@ -1381,6 +1415,36 @@ public: } }; +/// Generates code to check that an operand is an CmpInst predicate +class CmpPredicateOperandMatcher : public OperandPredicateMatcher { +protected: + std::string PredName; + +public: + CmpPredicateOperandMatcher(unsigned InsnVarID, unsigned OpIdx, + std::string P) + : OperandPredicateMatcher(OPM_CmpPredicate, InsnVarID, OpIdx), PredName(P) {} + + bool isIdentical(const PredicateMatcher &B) const override { + return OperandPredicateMatcher::isIdentical(B) && + PredName == cast(&B)->PredName; + } + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == OPM_CmpPredicate; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckCmpPredicate") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx) + << MatchTable::Comment("Predicate") + << MatchTable::NamedValue("CmpInst", PredName) + << MatchTable::LineBreak; + } +}; + /// Generates code to check that an operand is an intrinsic ID. class IntrinsicIDOperandMatcher : public OperandPredicateMatcher { protected: @@ -1442,7 +1506,7 @@ public: Optional addPredicate(Args &&... args) { if (isSameAsAnotherOperand()) return None; - Predicates.emplace_back(llvm::make_unique( + Predicates.emplace_back(std::make_unique( getInsnVarID(), getOpIdx(), std::forward(args)...)); return static_cast(Predicates.back().get()); } @@ -1849,6 +1913,40 @@ public: } }; +class MemoryAlignmentPredicateMatcher : public InstructionPredicateMatcher { +protected: + unsigned MMOIdx; + int MinAlign; + +public: + MemoryAlignmentPredicateMatcher(unsigned InsnVarID, unsigned MMOIdx, + int MinAlign) + : InstructionPredicateMatcher(IPM_MemoryAlignment, InsnVarID), + MMOIdx(MMOIdx), MinAlign(MinAlign) { + assert(MinAlign > 0); + } + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_MemoryAlignment; + } + + bool isIdentical(const PredicateMatcher &B) const override { + if (!InstructionPredicateMatcher::isIdentical(B)) + return false; + auto *Other = cast(&B); + return MMOIdx == Other->MMOIdx && MinAlign == Other->MinAlign; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckMemoryAlignment") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx) + << MatchTable::Comment("MinAlign") << MatchTable::IntValue(MinAlign) + << MatchTable::LineBreak; + } +}; + /// Generates code to check that the size of an MMO is less-than, equal-to, or /// greater than a given LLT. class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher { @@ -1945,6 +2043,11 @@ protected: std::string SymbolicName; unsigned InsnVarID; + /// PhysRegInputs - List list has an entry for each explicitly specified + /// physreg input to the pattern. The first elt is the Register node, the + /// second is the recorded slot number the input pattern match saved it in. + SmallVector, 2> PhysRegInputs; + public: InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName) : Rule(Rule), SymbolicName(SymbolicName) { @@ -1957,7 +2060,7 @@ public: template Optional addPredicate(Args &&... args) { Predicates.emplace_back( - llvm::make_unique(getInsnVarID(), std::forward(args)...)); + std::make_unique(getInsnVarID(), std::forward(args)...)); return static_cast(Predicates.back().get()); } @@ -1986,6 +2089,20 @@ public: llvm_unreachable("Failed to lookup operand"); } + OperandMatcher &addPhysRegInput(Record *Reg, unsigned OpIdx, + unsigned TempOpIdx) { + assert(SymbolicName.empty()); + OperandMatcher *OM = new OperandMatcher(*this, OpIdx, "", TempOpIdx); + Operands.emplace_back(OM); + Rule.definePhysRegOperand(Reg, *OM); + PhysRegInputs.emplace_back(Reg, OpIdx); + return *OM; + } + + ArrayRef> getPhysRegInputs() const { + return PhysRegInputs; + } + StringRef getSymbolicName() const { return SymbolicName; } unsigned getNumOperands() const { return Operands.size(); } OperandVec::iterator operands_begin() { return Operands.begin(); } @@ -2193,9 +2310,11 @@ public: OR_Copy, OR_CopyOrAddZeroReg, OR_CopySubReg, + OR_CopyPhysReg, OR_CopyConstantAsImm, OR_CopyFConstantAsFPImm, OR_Imm, + OR_SubRegIndex, OR_Register, OR_TempRegister, OR_ComplexPattern, @@ -2247,6 +2366,38 @@ public: } }; +/// A CopyRenderer emits code to copy a virtual register to a specific physical +/// register. +class CopyPhysRegRenderer : public OperandRenderer { +protected: + unsigned NewInsnID; + Record *PhysReg; + +public: + CopyPhysRegRenderer(unsigned NewInsnID, Record *Reg) + : OperandRenderer(OR_CopyPhysReg), NewInsnID(NewInsnID), + PhysReg(Reg) { + assert(PhysReg); + } + + static bool classof(const OperandRenderer *R) { + return R->getKind() == OR_CopyPhysReg; + } + + Record *getPhysReg() const { return PhysReg; } + + void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { + const OperandMatcher &Operand = Rule.getPhysRegOperandMatcher(PhysReg); + unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher()); + Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID") + << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID") + << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx") + << MatchTable::IntValue(Operand.getOpIdx()) + << MatchTable::Comment(PhysReg->getName()) + << MatchTable::LineBreak; + } +}; + /// A CopyOrAddZeroRegRenderer emits code to copy a single operand from an /// existing instruction to the one being built. If the operand turns out to be /// a 'G_CONSTANT 0' then it replaces the operand with a zero register. @@ -2393,11 +2544,13 @@ class AddRegisterRenderer : public OperandRenderer { protected: unsigned InsnID; const Record *RegisterDef; + bool IsDef; public: - AddRegisterRenderer(unsigned InsnID, const Record *RegisterDef) - : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef) { - } + AddRegisterRenderer(unsigned InsnID, const Record *RegisterDef, + bool IsDef = false) + : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef), + IsDef(IsDef) {} static bool classof(const OperandRenderer *R) { return R->getKind() == OR_Register; @@ -2411,7 +2564,16 @@ public: ? RegisterDef->getValueAsString("Namespace") : ""), RegisterDef->getName()) - << MatchTable::LineBreak; + << MatchTable::Comment("AddRegisterRegFlags"); + + // TODO: This is encoded as a 64-bit element, but only 16 or 32-bits are + // really needed for a physical register reference. We can pack the + // register and flags in a single field. + if (IsDef) + Table << MatchTable::NamedValue("RegState::Define"); + else + Table << MatchTable::IntValue(0); + Table << MatchTable::LineBreak; } }; @@ -2467,6 +2629,28 @@ public: } }; +/// Adds an enum value for a subreg index to the instruction being built. +class SubRegIndexRenderer : public OperandRenderer { +protected: + unsigned InsnID; + const CodeGenSubRegIndex *SubRegIdx; + +public: + SubRegIndexRenderer(unsigned InsnID, const CodeGenSubRegIndex *SRI) + : OperandRenderer(OR_SubRegIndex), InsnID(InsnID), SubRegIdx(SRI) {} + + static bool classof(const OperandRenderer *R) { + return R->getKind() == OR_SubRegIndex; + } + + void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID") + << MatchTable::IntValue(InsnID) << MatchTable::Comment("SubRegIndex") + << MatchTable::IntValue(SubRegIdx->EnumValue) + << MatchTable::LineBreak; + } +}; + /// Adds operands by calling a renderer function supplied by the ComplexPattern /// matcher function. class RenderComplexPatternOperand : public OperandRenderer { @@ -2620,7 +2804,7 @@ public: template Kind &addRenderer(Args&&... args) { OperandRenderers.emplace_back( - llvm::make_unique(InsnID, std::forward(args)...)); + std::make_unique(InsnID, std::forward(args)...)); return *static_cast(OperandRenderers.back().get()); } @@ -2747,7 +2931,9 @@ private: public: MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID) - : Ty(Ty), TempRegID(TempRegID) {} + : Ty(Ty), TempRegID(TempRegID) { + KnownTypes.insert(Ty); + } void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { Table << MatchTable::Opcode("GIR_MakeTempReg") @@ -2781,7 +2967,7 @@ const std::vector &RuleMatcher::getRequiredFeatures() const { // iterator. template Kind &RuleMatcher::addAction(Args &&... args) { - Actions.emplace_back(llvm::make_unique(std::forward(args)...)); + Actions.emplace_back(std::make_unique(std::forward(args)...)); return *static_cast(Actions.back().get()); } @@ -2796,7 +2982,7 @@ template action_iterator RuleMatcher::insertAction(action_iterator InsertPt, Args &&... args) { return Actions.emplace(InsertPt, - llvm::make_unique(std::forward(args)...)); + std::make_unique(std::forward(args)...)); } unsigned RuleMatcher::implicitlyDefineInsnVar(InstructionMatcher &Matcher) { @@ -2823,6 +3009,13 @@ void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) { OM.addPredicate(OM.getSymbolicName()); } +void RuleMatcher::definePhysRegOperand(Record *Reg, OperandMatcher &OM) { + if (PhysRegOperands.find(Reg) == PhysRegOperands.end()) { + PhysRegOperands[Reg] = &OM; + return; + } +} + InstructionMatcher & RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const { for (const auto &I : InsnVariableIDs) @@ -2832,6 +3025,18 @@ RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const { ("Failed to lookup instruction " + SymbolicName).str().c_str()); } +const OperandMatcher & +RuleMatcher::getPhysRegOperandMatcher(Record *Reg) const { + const auto &I = PhysRegOperands.find(Reg); + + if (I == PhysRegOperands.end()) { + PrintFatalError(SrcLoc, "Register " + Reg->getName() + + " was not declared in matcher"); + } + + return *I->second; +} + const OperandMatcher & RuleMatcher::getOperandMatcher(StringRef Name) const { const auto &I = DefinedOperands.find(Name); @@ -3079,9 +3284,9 @@ private: bool OperandIsAPointer, unsigned OpIdx, unsigned &TempOpIdx); - Expected - createAndImportInstructionRenderer(RuleMatcher &M, - const TreePatternNode *Dst); + Expected createAndImportInstructionRenderer( + RuleMatcher &M, InstructionMatcher &InsnMatcher, + const TreePatternNode *Src, const TreePatternNode *Dst); Expected createAndImportSubInstructionRenderer( action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst, unsigned TempReg); @@ -3089,6 +3294,7 @@ private: createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst); void importExplicitDefRenderers(BuildMIAction &DstMIBuilder); + Expected importExplicitUseRenderers(action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder, @@ -3122,6 +3328,32 @@ private: MatchTable buildMatchTable(MutableArrayRef Rules, bool Optimize, bool WithCoverage); + /// Infer a CodeGenRegisterClass for the type of \p SuperRegNode. The returned + /// CodeGenRegisterClass will support the CodeGenRegisterClass of + /// \p SubRegNode, and the subregister index defined by \p SubRegIdxNode. + /// If no register class is found, return None. + Optional + inferSuperRegisterClassForNode(const TypeSetByHwMode &Ty, + TreePatternNode *SuperRegNode, + TreePatternNode *SubRegIdxNode); + Optional + inferSubRegIndexForNode(TreePatternNode *SubRegIdxNode); + + /// Infer a CodeGenRegisterClass which suppoorts \p Ty and \p SubRegIdxNode. + /// Return None if no such class exists. + Optional + inferSuperRegisterClass(const TypeSetByHwMode &Ty, + TreePatternNode *SubRegIdxNode); + + /// Return the CodeGenRegisterClass associated with \p Leaf if it has one. + Optional + getRegClassFromLeaf(TreePatternNode *Leaf); + + /// Return a CodeGenRegisterClass for \p N if one can be found. Return None + /// otherwise. + Optional + inferRegClassFromPattern(TreePatternNode *N); + public: /// Takes a sequence of \p Rules and group them based on the predicates /// they share. \p MatcherStorage is used as a memory container @@ -3190,6 +3422,13 @@ Record *GlobalISelEmitter::findNodeEquiv(Record *N) const { const CodeGenInstruction * GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const { + if (N->getNumChildren() >= 1) { + // setcc operation maps to two different G_* instructions based on the type. + if (!Equiv.isValueUnset("IfFloatingPoint") && + MVT(N->getChild(0)->getSimpleType(0)).isFloatingPoint()) + return &Target.getInstruction(Equiv.getValueAsDef("IfFloatingPoint")); + } + for (const TreePredicateCall &Call : N->getPredicateCalls()) { const TreePredicateFn &Predicate = Call.Fn; if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() && @@ -3199,6 +3438,7 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const { Predicate.isZeroExtLoad()) return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend")); } + return &Target.getInstruction(Equiv.getValueAsDef("I")); } @@ -3212,7 +3452,7 @@ Error GlobalISelEmitter::importRulePredicates(RuleMatcher &M, ArrayRef Predicates) { for (const Predicate &P : Predicates) { - if (!P.Def) + if (!P.Def || P.getCondString().empty()) continue; declareSubtargetFeature(P.Def); M.addRequiredFeature(P.Def); @@ -3287,6 +3527,10 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( 0, ParsedAddrSpaces); } } + + int64_t MinAlign = Predicate.getMinAlignment(); + if (MinAlign > 0) + InsnMatcher.addPredicate(0, MinAlign); } // G_LOAD is used for both non-extending and any-extending loads. @@ -3301,11 +3545,19 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( continue; } - if (Predicate.isStore() && Predicate.isTruncStore()) { - // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size. - InsnMatcher.addPredicate( - 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0); - continue; + if (Predicate.isStore()) { + if (Predicate.isTruncStore()) { + // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size. + InsnMatcher.addPredicate( + 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0); + continue; + } + if (Predicate.isNonTruncStore()) { + // We need to check the sizes match here otherwise we could incorrectly + // match truncating stores with non-truncating ones. + InsnMatcher.addPredicate( + 0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0); + } } // No check required. We already did it by swapping the opcode. @@ -3405,6 +3657,10 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( } if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic")) InsnMatcher.addPredicate("NotAtomic"); + else if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) { + InsnMatcher.addPredicate( + "Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger); + } if (Src->isLeaf()) { Init *SrcInit = Src->getLeafValue(); @@ -3427,8 +3683,43 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( return InsnMatcher; } + // Special case because the operand order is changed from setcc. The + // predicate operand needs to be swapped from the last operand to the first + // source. + + unsigned NumChildren = Src->getNumChildren(); + bool IsFCmp = SrcGIOrNull->TheDef->getName() == "G_FCMP"; + + if (IsFCmp || SrcGIOrNull->TheDef->getName() == "G_ICMP") { + TreePatternNode *SrcChild = Src->getChild(NumChildren - 1); + if (SrcChild->isLeaf()) { + DefInit *DI = dyn_cast(SrcChild->getLeafValue()); + Record *CCDef = DI ? DI->getDef() : nullptr; + if (!CCDef || !CCDef->isSubClassOf("CondCode")) + return failedImport("Unable to handle CondCode"); + + OperandMatcher &OM = + InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx); + StringRef PredType = IsFCmp ? CCDef->getValueAsString("FCmpPredicate") : + CCDef->getValueAsString("ICmpPredicate"); + + if (!PredType.empty()) { + OM.addPredicate(PredType); + // Process the other 2 operands normally. + --NumChildren; + } + } + } + // Match the used operands (i.e. the children of the operator). - for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) { + bool IsIntrinsic = + SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" || + SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS"; + const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP); + if (IsIntrinsic && !II) + return failedImport("Expected IntInit containing intrinsic ID)"); + + for (unsigned i = 0; i != NumChildren; ++i) { TreePatternNode *SrcChild = Src->getChild(i); // SelectionDAG allows pointers to be represented with iN since it doesn't @@ -3436,19 +3727,21 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( // Coerce integers to pointers to address space 0 if the context indicates a pointer. bool OperandIsAPointer = SrcGIOrNull->isOperandAPointer(i); - // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately - // following the defs is an intrinsic ID. - if ((SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" || - SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS") && - i == 0) { - if (const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP)) { + if (IsIntrinsic) { + // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately + // following the defs is an intrinsic ID. + if (i == 0) { OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx); OM.addPredicate(II); continue; } - return failedImport("Expected IntInit containing instrinsic ID)"); + // We have to check intrinsics for llvm_anyptr_ty parameters. + // + // Note that we have to look at the i-1th parameter, because we don't + // have the intrinsic ID in the intrinsic's parameter list. + OperandIsAPointer |= II->isParamAPointer(i - 1); } if (auto Error = @@ -3473,14 +3766,37 @@ Error GlobalISelEmitter::importComplexPatternOperandMatcher( return Error::success(); } +// Get the name to use for a pattern operand. For an anonymous physical register +// input, this should use the register name. +static StringRef getSrcChildName(const TreePatternNode *SrcChild, + Record *&PhysReg) { + StringRef SrcChildName = SrcChild->getName(); + if (SrcChildName.empty() && SrcChild->isLeaf()) { + if (auto *ChildDefInit = dyn_cast(SrcChild->getLeafValue())) { + auto *ChildRec = ChildDefInit->getDef(); + if (ChildRec->isSubClassOf("Register")) { + SrcChildName = ChildRec->getName(); + PhysReg = ChildRec; + } + } + } + + return SrcChildName; +} + Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher, const TreePatternNode *SrcChild, bool OperandIsAPointer, unsigned OpIdx, unsigned &TempOpIdx) { - OperandMatcher &OM = - InsnMatcher.addOperand(OpIdx, SrcChild->getName(), TempOpIdx); + + Record *PhysReg = nullptr; + StringRef SrcChildName = getSrcChildName(SrcChild, PhysReg); + + OperandMatcher &OM = PhysReg ? + InsnMatcher.addPhysRegInput(PhysReg, OpIdx, TempOpIdx) : + InsnMatcher.addOperand(OpIdx, SrcChildName, TempOpIdx); if (OM.isSameAsAnotherOperand()) return Error::success(); @@ -3496,6 +3812,10 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule, OM.addPredicate(); return Error::success(); } + if (SrcChild->getOperator()->getName() == "timm") { + OM.addPredicate(); + return Error::success(); + } } } @@ -3569,6 +3889,20 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule, return Error::success(); } + if (ChildRec->isSubClassOf("Register")) { + // This just be emitted as a copy to the specific register. + ValueTypeByHwMode VT = ChildTypes.front().getValueTypeByHwMode(); + const CodeGenRegisterClass *RC + = CGRegs.getMinimalPhysRegClass(ChildRec, &VT); + if (!RC) { + return failedImport( + "Could not determine physical register class of pattern source"); + } + + OM.addPredicate(*RC); + return Error::success(); + } + // Check for ValueType. if (ChildRec->isSubClassOf("ValueType")) { // We already added a type check as standard practice so this doesn't need @@ -3631,7 +3965,10 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( // rendered as operands. // FIXME: The target should be able to choose sign-extended when appropriate // (e.g. on Mips). - if (DstChild->getOperator()->getName() == "imm") { + if (DstChild->getOperator()->getName() == "timm") { + DstMIBuilder.addRenderer(DstChild->getName()); + return InsertPt; + } else if (DstChild->getOperator()->getName() == "imm") { DstMIBuilder.addRenderer(DstChild->getName()); return InsertPt; } else if (DstChild->getOperator()->getName() == "fpimm") { @@ -3708,6 +4045,12 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( return InsertPt; } + if (ChildRec->isSubClassOf("SubRegIndex")) { + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(ChildRec); + DstMIBuilder.addRenderer(SubIdx->EnumValue); + return InsertPt; + } + if (ChildRec->isSubClassOf("ComplexPattern")) { const auto &ComplexPattern = ComplexPatternEquivs.find(ChildRec); if (ComplexPattern == ComplexPatternEquivs.end()) @@ -3729,7 +4072,8 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( } Expected GlobalISelEmitter::createAndImportInstructionRenderer( - RuleMatcher &M, const TreePatternNode *Dst) { + RuleMatcher &M, InstructionMatcher &InsnMatcher, const TreePatternNode *Src, + const TreePatternNode *Dst) { auto InsertPtOrError = createInstructionRenderer(M.actions_end(), M, Dst); if (auto Error = InsertPtOrError.takeError()) return std::move(Error); @@ -3737,6 +4081,17 @@ Expected GlobalISelEmitter::createAndImportInstructionRenderer( action_iterator InsertPt = InsertPtOrError.get(); BuildMIAction &DstMIBuilder = *static_cast(InsertPt->get()); + for (auto PhysInput : InsnMatcher.getPhysRegInputs()) { + InsertPt = M.insertAction( + InsertPt, M.allocateOutputInsnID(), + &Target.getInstruction(RK.getDef("COPY"))); + BuildMIAction &CopyToPhysRegMIBuilder = + *static_cast(InsertPt->get()); + CopyToPhysRegMIBuilder.addRenderer(PhysInput.first, + true); + CopyToPhysRegMIBuilder.addRenderer(PhysInput.first); + } + importExplicitDefRenderers(DstMIBuilder); if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst) @@ -3768,6 +4123,78 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer( if (auto Error = InsertPtOrError.takeError()) return std::move(Error); + // We need to make sure that when we import an INSERT_SUBREG as a + // subinstruction that it ends up being constrained to the correct super + // register and subregister classes. + auto OpName = Target.getInstruction(Dst->getOperator()).TheDef->getName(); + if (OpName == "INSERT_SUBREG") { + auto SubClass = inferRegClassFromPattern(Dst->getChild(1)); + if (!SubClass) + return failedImport( + "Cannot infer register class from INSERT_SUBREG operand #1"); + Optional SuperClass = + inferSuperRegisterClassForNode(Dst->getExtType(0), Dst->getChild(0), + Dst->getChild(2)); + if (!SuperClass) + return failedImport( + "Cannot infer register class for INSERT_SUBREG operand #0"); + // The destination and the super register source of an INSERT_SUBREG must + // be the same register class. + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 1, **SuperClass); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 2, **SubClass); + return InsertPtOrError.get(); + } + + if (OpName == "EXTRACT_SUBREG") { + // EXTRACT_SUBREG selects into a subregister COPY but unlike most + // instructions, the result register class is controlled by the + // subregisters of the operand. As a result, we must constrain the result + // class rather than check that it's already the right one. + auto SuperClass = inferRegClassFromPattern(Dst->getChild(0)); + if (!SuperClass) + return failedImport( + "Cannot infer register class from EXTRACT_SUBREG operand #0"); + + auto SubIdx = inferSubRegIndexForNode(Dst->getChild(1)); + if (!SubIdx) + return failedImport("EXTRACT_SUBREG child #1 is not a subreg index"); + + const auto &SrcRCDstRCPair = + (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx); + assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 0, *SrcRCDstRCPair->second); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 1, *SrcRCDstRCPair->first); + + // We're done with this pattern! It's eligible for GISel emission; return + // it. + return InsertPtOrError.get(); + } + + // Similar to INSERT_SUBREG, we also have to handle SUBREG_TO_REG as a + // subinstruction. + if (OpName == "SUBREG_TO_REG") { + auto SubClass = inferRegClassFromPattern(Dst->getChild(1)); + if (!SubClass) + return failedImport( + "Cannot infer register class from SUBREG_TO_REG child #1"); + auto SuperClass = inferSuperRegisterClass(Dst->getExtType(0), + Dst->getChild(2)); + if (!SuperClass) + return failedImport( + "Cannot infer register class for SUBREG_TO_REG operand #0"); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 2, **SubClass); + return InsertPtOrError.get(); + } + M.insertAction(InsertPt, DstMIBuilder.getInsnID()); return InsertPtOrError.get(); @@ -3786,12 +4213,9 @@ Expected GlobalISelEmitter::createInstructionRenderer( // COPY_TO_REGCLASS is just a copy with a ConstrainOperandToRegClassAction // attached. Similarly for EXTRACT_SUBREG except that's a subregister copy. - if (DstI->TheDef->getName() == "COPY_TO_REGCLASS") - DstI = &Target.getInstruction(RK.getDef("COPY")); - else if (DstI->TheDef->getName() == "EXTRACT_SUBREG") + StringRef Name = DstI->TheDef->getName(); + if (Name == "COPY_TO_REGCLASS" || Name == "EXTRACT_SUBREG") DstI = &Target.getInstruction(RK.getDef("COPY")); - else if (DstI->TheDef->getName() == "REG_SEQUENCE") - return failedImport("Unable to emit REG_SEQUENCE"); return M.insertAction(InsertPt, M.allocateOutputInsnID(), DstI); @@ -3812,8 +4236,11 @@ Expected GlobalISelEmitter::importExplicitUseRenderers( const CodeGenInstruction *DstI = DstMIBuilder.getCGI(); CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst->getOperator()); + StringRef Name = OrigDstI->TheDef->getName(); + unsigned ExpectedDstINumUses = Dst->getNumChildren(); + // EXTRACT_SUBREG needs to use a subregister COPY. - if (OrigDstI->TheDef->getName() == "EXTRACT_SUBREG") { + if (Name == "EXTRACT_SUBREG") { if (!Dst->getChild(0)->isLeaf()) return failedImport("EXTRACT_SUBREG child #1 is not a leaf"); @@ -3843,10 +4270,41 @@ Expected GlobalISelEmitter::importExplicitUseRenderers( return failedImport("EXTRACT_SUBREG child #1 is not a subreg index"); } + if (Name == "REG_SEQUENCE") { + if (!Dst->getChild(0)->isLeaf()) + return failedImport("REG_SEQUENCE child #0 is not a leaf"); + + Record *RCDef = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); + if (!RCDef) + return failedImport("REG_SEQUENCE child #0 could not " + "be coerced to a register class"); + + if ((ExpectedDstINumUses - 1) % 2 != 0) + return failedImport("Malformed REG_SEQUENCE"); + + for (unsigned I = 1; I != ExpectedDstINumUses; I += 2) { + TreePatternNode *ValChild = Dst->getChild(I); + TreePatternNode *SubRegChild = Dst->getChild(I + 1); + + if (DefInit *SubRegInit = + dyn_cast(SubRegChild->getLeafValue())) { + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); + + auto InsertPtOrError = + importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild); + if (auto Error = InsertPtOrError.takeError()) + return std::move(Error); + InsertPt = InsertPtOrError.get(); + DstMIBuilder.addRenderer(SubIdx); + } + } + + return InsertPt; + } + // Render the explicit uses. unsigned DstINumUses = OrigDstI->Operands.size() - OrigDstI->Operands.NumDefs; - unsigned ExpectedDstINumUses = Dst->getNumChildren(); - if (OrigDstI->TheDef->getName() == "COPY_TO_REGCLASS") { + if (Name == "COPY_TO_REGCLASS") { DstINumUses--; // Ignore the class constraint. ExpectedDstINumUses--; } @@ -3945,6 +4403,126 @@ Error GlobalISelEmitter::importImplicitDefRenderers( return Error::success(); } +Optional +GlobalISelEmitter::getRegClassFromLeaf(TreePatternNode *Leaf) { + assert(Leaf && "Expected node?"); + assert(Leaf->isLeaf() && "Expected leaf?"); + Record *RCRec = getInitValueAsRegClass(Leaf->getLeafValue()); + if (!RCRec) + return None; + CodeGenRegisterClass *RC = CGRegs.getRegClass(RCRec); + if (!RC) + return None; + return RC; +} + +Optional +GlobalISelEmitter::inferRegClassFromPattern(TreePatternNode *N) { + if (!N) + return None; + + if (N->isLeaf()) + return getRegClassFromLeaf(N); + + // We don't have a leaf node, so we have to try and infer something. Check + // that we have an instruction that we an infer something from. + + // Only handle things that produce a single type. + if (N->getNumTypes() != 1) + return None; + Record *OpRec = N->getOperator(); + + // We only want instructions. + if (!OpRec->isSubClassOf("Instruction")) + return None; + + // Don't want to try and infer things when there could potentially be more + // than one candidate register class. + auto &Inst = Target.getInstruction(OpRec); + if (Inst.Operands.NumDefs > 1) + return None; + + // Handle any special-case instructions which we can safely infer register + // classes from. + StringRef InstName = Inst.TheDef->getName(); + bool IsRegSequence = InstName == "REG_SEQUENCE"; + if (IsRegSequence || InstName == "COPY_TO_REGCLASS") { + // If we have a COPY_TO_REGCLASS, then we need to handle it specially. It + // has the desired register class as the first child. + TreePatternNode *RCChild = N->getChild(IsRegSequence ? 0 : 1); + if (!RCChild->isLeaf()) + return None; + return getRegClassFromLeaf(RCChild); + } + + // Handle destination record types that we can safely infer a register class + // from. + const auto &DstIOperand = Inst.Operands[0]; + Record *DstIOpRec = DstIOperand.Rec; + if (DstIOpRec->isSubClassOf("RegisterOperand")) { + DstIOpRec = DstIOpRec->getValueAsDef("RegClass"); + const CodeGenRegisterClass &RC = Target.getRegisterClass(DstIOpRec); + return &RC; + } + + if (DstIOpRec->isSubClassOf("RegisterClass")) { + const CodeGenRegisterClass &RC = Target.getRegisterClass(DstIOpRec); + return &RC; + } + + return None; +} + +Optional +GlobalISelEmitter::inferSuperRegisterClass(const TypeSetByHwMode &Ty, + TreePatternNode *SubRegIdxNode) { + assert(SubRegIdxNode && "Expected subregister index node!"); + // We need a ValueTypeByHwMode for getSuperRegForSubReg. + if (!Ty.isValueTypeByHwMode(false)) + return None; + if (!SubRegIdxNode->isLeaf()) + return None; + DefInit *SubRegInit = dyn_cast(SubRegIdxNode->getLeafValue()); + if (!SubRegInit) + return None; + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); + + // Use the information we found above to find a minimal register class which + // supports the subregister and type we want. + auto RC = + Target.getSuperRegForSubReg(Ty.getValueTypeByHwMode(), CGRegs, SubIdx); + if (!RC) + return None; + return *RC; +} + +Optional +GlobalISelEmitter::inferSuperRegisterClassForNode( + const TypeSetByHwMode &Ty, TreePatternNode *SuperRegNode, + TreePatternNode *SubRegIdxNode) { + assert(SuperRegNode && "Expected super register node!"); + // Check if we already have a defined register class for the super register + // node. If we do, then we should preserve that rather than inferring anything + // from the subregister index node. We can assume that whoever wrote the + // pattern in the first place made sure that the super register and + // subregister are compatible. + if (Optional SuperRegisterClass = + inferRegClassFromPattern(SuperRegNode)) + return *SuperRegisterClass; + return inferSuperRegisterClass(Ty, SubRegIdxNode); +} + +Optional +GlobalISelEmitter::inferSubRegIndexForNode(TreePatternNode *SubRegIdxNode) { + if (!SubRegIdxNode->isLeaf()) + return None; + + DefInit *SubRegInit = dyn_cast(SubRegIdxNode->getLeafValue()); + if (!SubRegInit) + return None; + return CGRegs.getSubRegIdx(SubRegInit->getDef()); +} + Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { // Keep track of the matchers and actions to emit. int Score = P.getPatternComplexity(CGP); @@ -4035,6 +4613,8 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { return failedImport("Pattern operator isn't an instruction"); auto &DstI = Target.getInstruction(DstOp); + StringRef DstIName = DstI.TheDef->getName(); + if (DstI.Operands.NumDefs != Src->getExtTypes().size()) return failedImport("Src pattern results and dst MI defs are different (" + to_string(Src->getExtTypes().size()) + " def(s) vs " + @@ -4048,13 +4628,17 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { const auto &DstIOperand = DstI.Operands[OpIdx]; Record *DstIOpRec = DstIOperand.Rec; - if (DstI.TheDef->getName() == "COPY_TO_REGCLASS") { + if (DstIName == "COPY_TO_REGCLASS") { DstIOpRec = getInitValueAsRegClass(Dst->getChild(1)->getLeafValue()); if (DstIOpRec == nullptr) return failedImport( "COPY_TO_REGCLASS operand #1 isn't a register class"); - } else if (DstI.TheDef->getName() == "EXTRACT_SUBREG") { + } else if (DstIName == "REG_SEQUENCE") { + DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); + if (DstIOpRec == nullptr) + return failedImport("REG_SEQUENCE operand #0 isn't a register class"); + } else if (DstIName == "EXTRACT_SUBREG") { if (!Dst->getChild(0)->isLeaf()) return failedImport("EXTRACT_SUBREG operand #0 isn't a leaf"); @@ -4063,8 +4647,33 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); if (DstIOpRec == nullptr) + return failedImport("EXTRACT_SUBREG operand #0 isn't a register class"); + } else if (DstIName == "INSERT_SUBREG") { + auto MaybeSuperClass = inferSuperRegisterClassForNode( + VTy, Dst->getChild(0), Dst->getChild(2)); + if (!MaybeSuperClass) return failedImport( - "EXTRACT_SUBREG operand #0 isn't a register class"); + "Cannot infer register class for INSERT_SUBREG operand #0"); + // Move to the next pattern here, because the register class we found + // doesn't necessarily have a record associated with it. So, we can't + // set DstIOpRec using this. + OperandMatcher &OM = InsnMatcher.getOperand(OpIdx); + OM.setSymbolicName(DstIOperand.Name); + M.defineOperand(OM.getSymbolicName(), OM); + OM.addPredicate(**MaybeSuperClass); + ++OpIdx; + continue; + } else if (DstIName == "SUBREG_TO_REG") { + auto MaybeRegClass = inferSuperRegisterClass(VTy, Dst->getChild(2)); + if (!MaybeRegClass) + return failedImport( + "Cannot infer register class for SUBREG_TO_REG operand #0"); + OperandMatcher &OM = InsnMatcher.getOperand(OpIdx); + OM.setSymbolicName(DstIOperand.Name); + M.defineOperand(OM.getSymbolicName(), OM); + OM.addPredicate(**MaybeRegClass); + ++OpIdx; + continue; } else if (DstIOpRec->isSubClassOf("RegisterOperand")) DstIOpRec = DstIOpRec->getValueAsDef("RegClass"); else if (!DstIOpRec->isSubClassOf("RegisterClass")) @@ -4079,7 +4688,8 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { ++OpIdx; } - auto DstMIBuilderOrError = createAndImportInstructionRenderer(M, Dst); + auto DstMIBuilderOrError = + createAndImportInstructionRenderer(M, InsnMatcher, Src, Dst); if (auto Error = DstMIBuilderOrError.takeError()) return std::move(Error); BuildMIAction &DstMIBuilder = DstMIBuilderOrError.get(); @@ -4093,7 +4703,7 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { // Constrain the registers to classes. This is normally derived from the // emitted instruction but a few instructions require special handling. - if (DstI.TheDef->getName() == "COPY_TO_REGCLASS") { + if (DstIName == "COPY_TO_REGCLASS") { // COPY_TO_REGCLASS does not provide operand constraints itself but the // result is constrained to the class given by the second child. Record *DstIOpRec = @@ -4111,28 +4721,16 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { return std::move(M); } - if (DstI.TheDef->getName() == "EXTRACT_SUBREG") { - // EXTRACT_SUBREG selects into a subregister COPY but unlike most - // instructions, the result register class is controlled by the - // subregisters of the operand. As a result, we must constrain the result - // class rather than check that it's already the right one. - if (!Dst->getChild(0)->isLeaf()) - return failedImport("EXTRACT_SUBREG child #1 is not a leaf"); + if (DstIName == "EXTRACT_SUBREG") { + auto SuperClass = inferRegClassFromPattern(Dst->getChild(0)); + if (!SuperClass) + return failedImport( + "Cannot infer register class from EXTRACT_SUBREG operand #0"); - DefInit *SubRegInit = dyn_cast(Dst->getChild(1)->getLeafValue()); - if (!SubRegInit) + auto SubIdx = inferSubRegIndexForNode(Dst->getChild(1)); + if (!SubIdx) return failedImport("EXTRACT_SUBREG child #1 is not a subreg index"); - // Constrain the result to the same register bank as the operand. - Record *DstIOpRec = - getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); - - if (DstIOpRec == nullptr) - return failedImport("EXTRACT_SUBREG operand #1 isn't a register class"); - - CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); - CodeGenRegisterClass *SrcRC = CGRegs.getRegClass(DstIOpRec); - // It would be nice to leave this constraint implicit but we're required // to pick a register class so constrain the result to a register class // that can hold the correct MVT. @@ -4143,7 +4741,7 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { "Expected Src of EXTRACT_SUBREG to have one result type"); const auto &SrcRCDstRCPair = - SrcRC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx); + (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx); assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); M.addAction(0, 0, *SrcRCDstRCPair->second); M.addAction(0, 1, *SrcRCDstRCPair->first); @@ -4154,6 +4752,51 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { return std::move(M); } + if (DstIName == "INSERT_SUBREG") { + assert(Src->getExtTypes().size() == 1 && + "Expected Src of INSERT_SUBREG to have one result type"); + // We need to constrain the destination, a super regsister source, and a + // subregister source. + auto SubClass = inferRegClassFromPattern(Dst->getChild(1)); + if (!SubClass) + return failedImport( + "Cannot infer register class from INSERT_SUBREG operand #1"); + auto SuperClass = inferSuperRegisterClassForNode( + Src->getExtType(0), Dst->getChild(0), Dst->getChild(2)); + if (!SuperClass) + return failedImport( + "Cannot infer register class for INSERT_SUBREG operand #0"); + M.addAction(0, 0, **SuperClass); + M.addAction(0, 1, **SuperClass); + M.addAction(0, 2, **SubClass); + ++NumPatternImported; + return std::move(M); + } + + if (DstIName == "SUBREG_TO_REG") { + // We need to constrain the destination and subregister source. + assert(Src->getExtTypes().size() == 1 && + "Expected Src of SUBREG_TO_REG to have one result type"); + + // Attempt to infer the subregister source from the first child. If it has + // an explicitly given register class, we'll use that. Otherwise, we will + // fail. + auto SubClass = inferRegClassFromPattern(Dst->getChild(1)); + if (!SubClass) + return failedImport( + "Cannot infer register class from SUBREG_TO_REG child #1"); + // We don't have a child to look at that might have a super register node. + auto SuperClass = + inferSuperRegisterClass(Src->getExtType(0), Dst->getChild(2)); + if (!SuperClass) + return failedImport( + "Cannot infer register class for SUBREG_TO_REG operand #0"); + M.addAction(0, 0, **SuperClass); + M.addAction(0, 2, **SubClass); + ++NumPatternImported; + return std::move(M); + } + M.addAction(0); // We're done with this pattern! It's eligible for GISel emission; return it. @@ -4235,7 +4878,7 @@ std::vector GlobalISelEmitter::optimizeRules( std::vector> &MatcherStorage) { std::vector OptRules; - std::unique_ptr CurrentGroup = make_unique(); + std::unique_ptr CurrentGroup = std::make_unique(); assert(CurrentGroup->empty() && "Newly created group isn't empty!"); unsigned NumGroups = 0; @@ -4256,7 +4899,7 @@ std::vector GlobalISelEmitter::optimizeRules( MatcherStorage.emplace_back(std::move(CurrentGroup)); ++NumGroups; } - CurrentGroup = make_unique(); + CurrentGroup = std::make_unique(); }; for (Matcher *Rule : Rules) { // Greedily add as many matchers as possible to the current group: diff --git a/utils/TableGen/InfoByHwMode.cpp b/utils/TableGen/InfoByHwMode.cpp index d9662889a5db..7cd1b0f08132 100644 --- a/utils/TableGen/InfoByHwMode.cpp +++ b/utils/TableGen/InfoByHwMode.cpp @@ -192,6 +192,17 @@ void RegSizeInfoByHwMode::writeToStream(raw_ostream &OS) const { OS << '}'; } +EncodingInfoByHwMode::EncodingInfoByHwMode(Record *R, const CodeGenHwModes &CGH) { + const HwModeSelect &MS = CGH.getHwModeSelect(R); + for (const HwModeSelect::PairType &P : MS.Items) { + assert(P.second && P.second->isSubClassOf("InstructionEncoding") && + "Encoding must subclass InstructionEncoding"); + auto I = Map.insert({P.first, P.second}); + assert(I.second && "Duplicate entry?"); + (void)I; + } +} + namespace llvm { raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) { T.writeToStream(OS); diff --git a/utils/TableGen/InfoByHwMode.h b/utils/TableGen/InfoByHwMode.h index 9e5cc3d5f2a4..d92e5901a7f3 100644 --- a/utils/TableGen/InfoByHwMode.h +++ b/utils/TableGen/InfoByHwMode.h @@ -184,6 +184,11 @@ raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T); raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfo &T); raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T); +struct EncodingInfoByHwMode : public InfoByHwMode { + EncodingInfoByHwMode(Record *R, const CodeGenHwModes &CGH); + EncodingInfoByHwMode() = default; +}; + } // namespace llvm #endif // LLVM_UTILS_TABLEGEN_INFOBYHWMODE_H diff --git a/utils/TableGen/InstrDocsEmitter.cpp b/utils/TableGen/InstrDocsEmitter.cpp index 91c457ba08fd..45fa936b9574 100644 --- a/utils/TableGen/InstrDocsEmitter.cpp +++ b/utils/TableGen/InstrDocsEmitter.cpp @@ -231,4 +231,4 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { } } -} // end llvm namespace +} // end namespace llvm diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp index 2d367f538b71..300ba36a7007 100644 --- a/utils/TableGen/InstrInfoEmitter.cpp +++ b/utils/TableGen/InstrInfoEmitter.cpp @@ -332,6 +332,10 @@ void InstrInfoEmitter::emitOperandTypeMappings( StringRef Namespace = Target.getInstNamespace(); std::vector Operands = Records.getAllDerivedDefinitions("Operand"); + std::vector RegisterOperands = + Records.getAllDerivedDefinitions("RegisterOperand"); + std::vector RegisterClasses = + Records.getAllDerivedDefinitions("RegisterClass"); OS << "#ifdef GET_INSTRINFO_OPERAND_TYPES_ENUM\n"; OS << "#undef GET_INSTRINFO_OPERAND_TYPES_ENUM\n"; @@ -341,10 +345,13 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "enum OperandType {\n"; unsigned EnumVal = 0; - for (const Record *Op : Operands) { - if (!Op->isAnonymous()) - OS << " " << Op->getName() << " = " << EnumVal << ",\n"; - ++EnumVal; + for (const std::vector *RecordsToAdd : + {&Operands, &RegisterOperands, &RegisterClasses}) { + for (const Record *Op : *RecordsToAdd) { + if (!Op->isAnonymous()) + OS << " " << Op->getName() << " = " << EnumVal << ",\n"; + ++EnumVal; + } } OS << " OPERAND_TYPE_LIST_END" << "\n};\n"; @@ -358,7 +365,8 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "namespace llvm {\n"; OS << "namespace " << Namespace << " {\n"; OS << "LLVM_READONLY\n"; - OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n"; + OS << "static int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n"; + // TODO: Factor out instructions with same operands to compress the tables. if (!NumberedInstructions.empty()) { std::vector OperandOffsets; std::vector OperandRecords; @@ -399,7 +407,10 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "/**/\n "; } Record *OpR = OperandRecords[I]; - if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous()) + if ((OpR->isSubClassOf("Operand") || + OpR->isSubClassOf("RegisterOperand") || + OpR->isSubClassOf("RegisterClass")) && + !OpR->isAnonymous()) OS << "OpTypes::" << OpR->getName(); else OS << -1; @@ -414,7 +425,7 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "}\n"; OS << "} // end namespace " << Namespace << "\n"; OS << "} // end namespace llvm\n"; - OS << "#endif //GET_INSTRINFO_OPERAND_TYPE\n\n"; + OS << "#endif // GET_INSTRINFO_OPERAND_TYPE\n\n"; } void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, @@ -436,8 +447,8 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, << "(const MCInst &MI);\n"; } - OS << "\n} // end " << TargetName << "_MC namespace\n"; - OS << "} // end llvm namespace\n\n"; + OS << "\n} // end namespace " << TargetName << "_MC\n"; + OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_INSTRINFO_MC_HELPER_DECLS\n\n"; @@ -459,8 +470,8 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, OS << "\n}\n\n"; } - OS << "} // end " << TargetName << "_MC namespace\n"; - OS << "} // end llvm namespace\n\n"; + OS << "} // end namespace " << TargetName << "_MC\n"; + OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_GENISTRINFO_MC_HELPERS\n"; } @@ -576,7 +587,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) { << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, " << NumberedInstructions.size() << ");\n}\n\n"; - OS << "} // end llvm namespace\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_MC_DESC\n\n"; @@ -592,7 +603,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) { << " ~" << ClassName << "() override = default;\n"; - OS << "\n};\n} // end llvm namespace\n"; + OS << "\n};\n} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_HEADER\n\n"; @@ -620,7 +631,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) { << " InitMCInstrInfo(" << TargetName << "Insts, " << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, " << NumberedInstructions.size() << ");\n}\n"; - OS << "} // end llvm namespace\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_CTOR_DTOR\n\n"; @@ -651,6 +662,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, CodeGenTarget &Target = CDP.getTargetInfo(); // Emit all of the target independent flags... + if (Inst.isPreISelOpcode) OS << "|(1ULL<TheDef->getName() << "\t= " << Num++ << ",\n"; OS << " INSTRUCTION_LIST_END = " << Num << "\n"; OS << " };\n\n"; - OS << "} // end " << Namespace << " namespace\n"; - OS << "} // end llvm namespace\n"; + OS << "} // end namespace " << Namespace << "\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_ENUM\n\n"; OS << "#ifdef GET_INSTRINFO_SCHED_ENUM\n"; @@ -780,9 +792,9 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) { OS << " " << Class.Name << "\t= " << Num++ << ",\n"; OS << " SCHED_LIST_END = " << Num << "\n"; OS << " };\n"; - OS << "} // end Sched namespace\n"; - OS << "} // end " << Namespace << " namespace\n"; - OS << "} // end llvm namespace\n"; + OS << "} // end namespace Sched\n"; + OS << "} // end namespace " << Namespace << "\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_SCHED_ENUM\n\n"; } @@ -794,4 +806,4 @@ void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) { EmitMapTable(RK, OS); } -} // end llvm namespace +} // end namespace llvm diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index 979af98f6768..e01f91c20456 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -220,7 +220,11 @@ enum IIT_Info { IIT_STRUCT7 = 39, IIT_STRUCT8 = 40, IIT_F128 = 41, - IIT_VEC_ELEMENT = 42 + IIT_VEC_ELEMENT = 42, + IIT_SCALABLE_VEC = 43, + IIT_SUBDIVIDE2_ARG = 44, + IIT_SUBDIVIDE4_ARG = 45, + IIT_VEC_OF_BITCASTS_TO_INT = 46 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -292,6 +296,12 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, Sig.push_back(IIT_PTR_TO_ELT); else if (R->isSubClassOf("LLVMVectorElementType")) Sig.push_back(IIT_VEC_ELEMENT); + else if (R->isSubClassOf("LLVMSubdivide2VectorType")) + Sig.push_back(IIT_SUBDIVIDE2_ARG); + else if (R->isSubClassOf("LLVMSubdivide4VectorType")) + Sig.push_back(IIT_SUBDIVIDE4_ARG); + else if (R->isSubClassOf("LLVMVectorOfBitcastsToInt")) + Sig.push_back(IIT_VEC_OF_BITCASTS_TO_INT); else Sig.push_back(IIT_ARG); return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/); @@ -339,6 +349,8 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, if (MVT(VT).isVector()) { MVT VVT = VT; + if (VVT.isScalableVector()) + Sig.push_back(IIT_SCALABLE_VEC); switch (VVT.getVectorNumElements()) { default: PrintFatalError("unhandled vector type width in intrinsic!"); case 1: Sig.push_back(IIT_V1); break; @@ -647,6 +659,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, OS << "Attribute::NoCapture"; addComma = true; break; + case CodeGenIntrinsic::NoAlias: + if (addComma) + OS << ","; + OS << "Attribute::NoAlias"; + addComma = true; + break; case CodeGenIntrinsic::Returned: if (addComma) OS << ","; diff --git a/utils/TableGen/RISCVCompressInstEmitter.cpp b/utils/TableGen/RISCVCompressInstEmitter.cpp index e62f528ebc2e..2f1d3898f182 100644 --- a/utils/TableGen/RISCVCompressInstEmitter.cpp +++ b/utils/TableGen/RISCVCompressInstEmitter.cpp @@ -411,12 +411,8 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) { assert(SourceDag && "Missing 'Input' in compress pattern!"); LLVM_DEBUG(dbgs() << "Input: " << *SourceDag << "\n"); - DefInit *OpDef = dyn_cast(SourceDag->getOperator()); - if (!OpDef) - PrintFatalError(Rec->getLoc(), - Rec->getName() + " has unexpected operator type!"); // Checking we are transforming from compressed to uncompressed instructions. - Record *Operator = OpDef->getDef(); + Record *Operator = SourceDag->getOperatorAsDef(Rec->getLoc()); if (!Operator->isSubClassOf("RVInst")) PrintFatalError(Rec->getLoc(), "Input instruction '" + Operator->getName() + "' is not a 32 bit wide instruction!"); @@ -428,12 +424,7 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) { assert(DestDag && "Missing 'Output' in compress pattern!"); LLVM_DEBUG(dbgs() << "Output: " << *DestDag << "\n"); - DefInit *DestOpDef = dyn_cast(DestDag->getOperator()); - if (!DestOpDef) - PrintFatalError(Rec->getLoc(), - Rec->getName() + " has unexpected operator type!"); - - Record *DestOperator = DestOpDef->getDef(); + Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc()); if (!DestOperator->isSubClassOf("RVInst16")) PrintFatalError(Rec->getLoc(), "Output instruction '" + DestOperator->getName() + diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp index 1b619072c814..513cd14e0fab 100644 --- a/utils/TableGen/RegisterInfoEmitter.cpp +++ b/utils/TableGen/RegisterInfoEmitter.cpp @@ -888,7 +888,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, // Keep track of sub-register names as well. These are not differentially // encoded. typedef SmallVector SubRegIdxVec; - SequenceToOffsetTable> SubRegIdxSeqs; + SequenceToOffsetTable>> SubRegIdxSeqs; SmallVector SubRegIdxLists(Regs.size()); SequenceToOffsetTable RegStrings; @@ -1315,7 +1315,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, // Compress the sub-reg index lists. typedef std::vector IdxList; SmallVector SuperRegIdxLists(RegisterClasses.size()); - SequenceToOffsetTable> SuperRegIdxSeqs; + SequenceToOffsetTable>> SuperRegIdxSeqs; BitVector MaskBV(RegisterClasses.size()); for (const auto &RC : RegisterClasses) { diff --git a/utils/TableGen/SearchableTableEmitter.cpp b/utils/TableGen/SearchableTableEmitter.cpp index 954b63e7253c..f08f8aa01956 100644 --- a/utils/TableGen/SearchableTableEmitter.cpp +++ b/utils/TableGen/SearchableTableEmitter.cpp @@ -134,7 +134,7 @@ private: CodeGenIntrinsic &getIntrinsic(Init *I) { std::unique_ptr &Intr = Intrinsics[I]; if (!Intr) - Intr = make_unique(cast(I)->getDef()); + Intr = std::make_unique(cast(I)->getDef()); return *Intr; } @@ -496,7 +496,7 @@ void SearchableTableEmitter::emitGenericTable(const GenericTable &Table, emitIfdef((Twine("GET_") + Table.PreprocessorGuard + "_IMPL").str(), OS); // The primary data table contains all the fields defined for this map. - OS << "const " << Table.CppTypeName << " " << Table.Name << "[] = {\n"; + OS << "constexpr " << Table.CppTypeName << " " << Table.Name << "[] = {\n"; for (unsigned i = 0; i < Table.Entries.size(); ++i) { Record *Entry = Table.Entries[i]; OS << " { "; @@ -541,7 +541,7 @@ std::unique_ptr SearchableTableEmitter::parseSearchIndex(GenericTable &Table, StringRef Name, const std::vector &Key, bool EarlyOut) { - auto Index = llvm::make_unique(); + auto Index = std::make_unique(); Index->Name = Name; Index->EarlyOut = EarlyOut; @@ -577,7 +577,7 @@ void SearchableTableEmitter::collectEnumEntries( if (!ValueField.empty()) Value = getInt(EntryRec, ValueField); - Enum.Entries.push_back(llvm::make_unique(Name, Value)); + Enum.Entries.push_back(std::make_unique(Name, Value)); Enum.EntryMap.insert(std::make_pair(EntryRec, Enum.Entries.back().get())); } @@ -647,7 +647,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { if (!EnumRec->isValueUnset("ValueField")) ValueField = EnumRec->getValueAsString("ValueField"); - auto Enum = llvm::make_unique(); + auto Enum = std::make_unique(); Enum->Name = EnumRec->getName(); Enum->PreprocessorGuard = EnumRec->getName(); @@ -664,7 +664,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { } for (auto TableRec : Records.getAllDerivedDefinitions("GenericTable")) { - auto Table = llvm::make_unique(); + auto Table = std::make_unique(); Table->Name = TableRec->getName(); Table->PreprocessorGuard = TableRec->getName(); Table->CppTypeName = TableRec->getValueAsString("CppTypeName"); @@ -733,7 +733,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { if (!Class->isValueUnset("EnumValueField")) ValueField = Class->getValueAsString("EnumValueField"); - auto Enum = llvm::make_unique(); + auto Enum = std::make_unique(); Enum->Name = (Twine(Class->getName()) + "Values").str(); Enum->PreprocessorGuard = Class->getName().upper(); Enum->Class = Class; @@ -743,7 +743,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { Enums.emplace_back(std::move(Enum)); } - auto Table = llvm::make_unique(); + auto Table = std::make_unique(); Table->Name = (Twine(Class->getName()) + "sList").str(); Table->PreprocessorGuard = Class->getName().upper(); Table->CppTypeName = Class->getName(); diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 9ce2b3b275c8..9b094adb7d5c 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -1057,6 +1057,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, LLVM_DEBUG(dbgs() << ProcModel.ModelName << " does not have resources for class " << SC.Name << '\n'); + SCDesc.NumMicroOps = MCSchedClassDesc::InvalidNumMicroOps; } } // Sum resources across all operand writes. @@ -1728,7 +1729,7 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { << " const MCInst *MI, unsigned CPUID) {\n"; emitSchedModelHelpersImpl(OS, /* OnlyExpandMCPredicates */ true); OS << "}\n"; - OS << "} // end of namespace " << Target << "_MC\n\n"; + OS << "} // end namespace " << Target << "_MC\n\n"; OS << "struct " << Target << "GenMCSubtargetInfo : public MCSubtargetInfo {\n"; @@ -1746,7 +1747,10 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { << " return " << Target << "_MC" << "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID); \n"; OS << " }\n"; + if (TGT.getHwModes().getNumModeIds() > 1) + OS << " unsigned getHwMode() const override;\n"; OS << "};\n"; + EmitHwModeCheck(Target + "GenMCSubtargetInfo", OS); } void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { @@ -1858,7 +1862,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "namespace " << Target << "_MC {\n" << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass," << " const MCInst *MI, unsigned CPUID);\n" - << "}\n\n"; + << "} // end namespace " << Target << "_MC\n\n"; OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n" << " explicit " << ClassName << "(const Triple &TT, StringRef CPU, " << "StringRef FS);\n" diff --git a/utils/TableGen/SubtargetFeatureInfo.cpp b/utils/TableGen/SubtargetFeatureInfo.cpp index edf0b4a01c6d..5430f73d5e09 100644 --- a/utils/TableGen/SubtargetFeatureInfo.cpp +++ b/utils/TableGen/SubtargetFeatureInfo.cpp @@ -38,6 +38,10 @@ SubtargetFeatureInfo::getAll(const RecordKeeper &Records) { if (Pred->getName().empty()) PrintFatalError(Pred->getLoc(), "Predicate has no name!"); + // Ignore always true predicates. + if (Pred->getValueAsString("CondString").empty()) + continue; + SubtargetFeatures.emplace_back( Pred, SubtargetFeatureInfo(Pred, SubtargetFeatures.size())); } @@ -95,9 +99,11 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures( OS << " PredicateBitset Features;\n"; for (const auto &SF : SubtargetFeatures) { const SubtargetFeatureInfo &SFI = SF.second; + StringRef CondStr = SFI.TheDef->getValueAsString("CondString"); + assert(!CondStr.empty() && "true predicate should have been filtered"); - OS << " if (" << SFI.TheDef->getValueAsString("CondString") << ")\n"; - OS << " Features[" << SFI.getEnumBitName() << "] = 1;\n"; + OS << " if (" << CondStr << ")\n"; + OS << " Features.set(" << SFI.getEnumBitName() << ");\n"; } OS << " return Features;\n"; OS << "}\n\n"; @@ -142,7 +148,7 @@ void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( } while (true); OS << ")\n"; - OS << " Features[" << SFI.getEnumBitName() << "] = 1;\n"; + OS << " Features.set(" << SFI.getEnumBitName() << ");\n"; } OS << " return Features;\n"; OS << "}\n\n"; diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp index c485ed2feb7a..f730d91160ad 100644 --- a/utils/TableGen/TableGen.cpp +++ b/utils/TableGen/TableGen.cpp @@ -49,10 +49,12 @@ enum ActionType { GenAttributes, GenSearchableTables, GenGlobalISel, + GenGICombiner, GenX86EVEX2VEXTables, GenX86FoldTables, GenRegisterBank, GenExegesis, + GenAutomata, }; namespace llvm { @@ -62,75 +64,75 @@ bool TimeRegions = false; } // end namespace llvm namespace { - cl::opt - Action(cl::desc("Action to perform:"), - cl::values(clEnumValN(PrintRecords, "print-records", - "Print all records to stdout (default)"), - clEnumValN(DumpJSON, "dump-json", - "Dump all records as machine-readable JSON"), - clEnumValN(GenEmitter, "gen-emitter", - "Generate machine code emitter"), - clEnumValN(GenRegisterInfo, "gen-register-info", - "Generate registers and register classes info"), - clEnumValN(GenInstrInfo, "gen-instr-info", - "Generate instruction descriptions"), - clEnumValN(GenInstrDocs, "gen-instr-docs", - "Generate instruction documentation"), - clEnumValN(GenCallingConv, "gen-callingconv", - "Generate calling convention descriptions"), - clEnumValN(GenAsmWriter, "gen-asm-writer", - "Generate assembly writer"), - clEnumValN(GenDisassembler, "gen-disassembler", - "Generate disassembler"), - clEnumValN(GenPseudoLowering, "gen-pseudo-lowering", - "Generate pseudo instruction lowering"), - clEnumValN(GenCompressInst, "gen-compress-inst-emitter", - "Generate RISCV compressed instructions."), - clEnumValN(GenAsmMatcher, "gen-asm-matcher", - "Generate assembly instruction matcher"), - clEnumValN(GenDAGISel, "gen-dag-isel", - "Generate a DAG instruction selector"), - clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer", - "Generate DFA Packetizer for VLIW targets"), - clEnumValN(GenFastISel, "gen-fast-isel", - "Generate a \"fast\" instruction selector"), - clEnumValN(GenSubtarget, "gen-subtarget", - "Generate subtarget enumerations"), - clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums", - "Generate intrinsic enums"), - clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl", - "Generate intrinsic information"), - clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums", - "Generate target intrinsic enums"), - clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl", - "Generate target intrinsic information"), - clEnumValN(PrintEnums, "print-enums", - "Print enum values for a class"), - clEnumValN(PrintSets, "print-sets", - "Print expanded sets for testing DAG exprs"), - clEnumValN(GenOptParserDefs, "gen-opt-parser-defs", - "Generate option definitions"), - clEnumValN(GenCTags, "gen-ctags", - "Generate ctags-compatible index"), - clEnumValN(GenAttributes, "gen-attrs", - "Generate attributes"), - clEnumValN(GenSearchableTables, "gen-searchable-tables", - "Generate generic binary-searchable table"), - clEnumValN(GenGlobalISel, "gen-global-isel", - "Generate GlobalISel selector"), - clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables", - "Generate X86 EVEX to VEX compress tables"), - clEnumValN(GenX86FoldTables, "gen-x86-fold-tables", - "Generate X86 fold tables"), - clEnumValN(GenRegisterBank, "gen-register-bank", - "Generate registers bank descriptions"), - clEnumValN(GenExegesis, "gen-exegesis", - "Generate llvm-exegesis tables"))); +cl::opt Action( + cl::desc("Action to perform:"), + cl::values( + clEnumValN(PrintRecords, "print-records", + "Print all records to stdout (default)"), + clEnumValN(DumpJSON, "dump-json", + "Dump all records as machine-readable JSON"), + clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), + clEnumValN(GenRegisterInfo, "gen-register-info", + "Generate registers and register classes info"), + clEnumValN(GenInstrInfo, "gen-instr-info", + "Generate instruction descriptions"), + clEnumValN(GenInstrDocs, "gen-instr-docs", + "Generate instruction documentation"), + clEnumValN(GenCallingConv, "gen-callingconv", + "Generate calling convention descriptions"), + clEnumValN(GenAsmWriter, "gen-asm-writer", "Generate assembly writer"), + clEnumValN(GenDisassembler, "gen-disassembler", + "Generate disassembler"), + clEnumValN(GenPseudoLowering, "gen-pseudo-lowering", + "Generate pseudo instruction lowering"), + clEnumValN(GenCompressInst, "gen-compress-inst-emitter", + "Generate RISCV compressed instructions."), + clEnumValN(GenAsmMatcher, "gen-asm-matcher", + "Generate assembly instruction matcher"), + clEnumValN(GenDAGISel, "gen-dag-isel", + "Generate a DAG instruction selector"), + clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer", + "Generate DFA Packetizer for VLIW targets"), + clEnumValN(GenFastISel, "gen-fast-isel", + "Generate a \"fast\" instruction selector"), + clEnumValN(GenSubtarget, "gen-subtarget", + "Generate subtarget enumerations"), + clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums", + "Generate intrinsic enums"), + clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl", + "Generate intrinsic information"), + clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums", + "Generate target intrinsic enums"), + clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl", + "Generate target intrinsic information"), + clEnumValN(PrintEnums, "print-enums", "Print enum values for a class"), + clEnumValN(PrintSets, "print-sets", + "Print expanded sets for testing DAG exprs"), + clEnumValN(GenOptParserDefs, "gen-opt-parser-defs", + "Generate option definitions"), + clEnumValN(GenCTags, "gen-ctags", "Generate ctags-compatible index"), + clEnumValN(GenAttributes, "gen-attrs", "Generate attributes"), + clEnumValN(GenSearchableTables, "gen-searchable-tables", + "Generate generic binary-searchable table"), + clEnumValN(GenGlobalISel, "gen-global-isel", + "Generate GlobalISel selector"), + clEnumValN(GenGICombiner, "gen-global-isel-combiner", + "Generate GlobalISel combiner"), + clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables", + "Generate X86 EVEX to VEX compress tables"), + clEnumValN(GenX86FoldTables, "gen-x86-fold-tables", + "Generate X86 fold tables"), + clEnumValN(GenRegisterBank, "gen-register-bank", + "Generate registers bank descriptions"), + clEnumValN(GenExegesis, "gen-exegesis", + "Generate llvm-exegesis tables"), + clEnumValN(GenAutomata, "gen-automata", + "Generate generic automata"))); - cl::OptionCategory PrintEnumsCat("Options for -print-enums"); - cl::opt - Class("class", cl::desc("Print Enum list for this class"), - cl::value_desc("class name"), cl::cat(PrintEnumsCat)); +cl::OptionCategory PrintEnumsCat("Options for -print-enums"); +cl::opt Class("class", cl::desc("Print Enum list for this class"), + cl::value_desc("class name"), + cl::cat(PrintEnumsCat)); cl::opt TimeRegionsOpt("time-regions", @@ -235,6 +237,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenGlobalISel: EmitGlobalISel(Records, OS); break; + case GenGICombiner: + EmitGICombiner(Records, OS); + break; case GenRegisterBank: EmitRegisterBank(Records, OS); break; @@ -247,6 +252,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenExegesis: EmitExegesis(Records, OS); break; + case GenAutomata: + EmitAutomata(Records, OS); + break; } return false; @@ -263,11 +271,16 @@ int main(int argc, char **argv) { return TableGenMain(argv[0], &LLVMTableGenMain); } -#ifdef __has_feature -#if __has_feature(address_sanitizer) +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) || \ + __has_feature(leak_sanitizer) + #include // Disable LeakSanitizer for this binary as it has too many leaks that are not // very interesting to fix. See compiler-rt/include/sanitizer/lsan_interface.h . LLVM_ATTRIBUTE_USED int __lsan_is_turned_off() { return 1; } -#endif // __has_feature(address_sanitizer) -#endif // defined(__has_feature) + +#endif diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h index 135ec65c0f95..8c067dd51b3b 100644 --- a/utils/TableGen/TableGenBackends.h +++ b/utils/TableGen/TableGenBackends.h @@ -85,10 +85,12 @@ void EmitCTags(RecordKeeper &RK, raw_ostream &OS); void EmitAttributes(RecordKeeper &RK, raw_ostream &OS); void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS); void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS); +void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS); void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS); void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS); void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS); void EmitExegesis(RecordKeeper &RK, raw_ostream &OS); +void EmitAutomata(RecordKeeper &RK, raw_ostream &OS); } // End llvm namespace diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp index 365cba5a60ca..54aa5a8164f2 100644 --- a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp +++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp @@ -167,7 +167,7 @@ void emitWebAssemblyDisassemblerTables( OS << " },\n"; } OS << " { 0, nullptr }\n};\n\n"; - OS << "} // End llvm namespace\n"; + OS << "} // end namespace llvm\n"; } } // namespace llvm diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp index 8036aecc4f4b..14bce4c29446 100644 --- a/utils/TableGen/X86DisassemblerTables.cpp +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -651,7 +651,7 @@ static const char* stringForDecisionType(ModRMDecisionType dt) { DisassemblerTables::DisassemblerTables() { for (unsigned i = 0; i < array_lengthof(Tables); i++) - Tables[i] = llvm::make_unique(); + Tables[i] = std::make_unique(); HasConflicts = false; } diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp index 3df14f40e4a9..6dc7e31e0dab 100644 --- a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp +++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp @@ -98,6 +98,7 @@ public: bool EVEX_W1_VEX_W0 = RecE->getValueAsBit("EVEX_W1_VEX_W0"); if (RecV->getValueAsDef("OpEnc")->getName().str() != "EncVEX" || + RecV->getValueAsBit("isCodeGenOnly") != RecE->getValueAsBit("isCodeGenOnly") || // VEX/EVEX fields RecV->getValueAsDef("OpPrefix") != RecE->getValueAsDef("OpPrefix") || RecV->getValueAsDef("OpMap") != RecE->getValueAsDef("OpMap") || diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index ab8a8855c478..33dc6f3f9e23 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -749,7 +749,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::RawFrmImm8: case X86Local::RawFrmImm16: case X86Local::AddCCFrm: - filter = llvm::make_unique(); + filter = std::make_unique(); break; case X86Local::MRMDestReg: case X86Local::MRMSrcReg: @@ -758,7 +758,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::MRMSrcRegCC: case X86Local::MRMXrCC: case X86Local::MRMXr: - filter = llvm::make_unique(true); + filter = std::make_unique(true); break; case X86Local::MRMDestMem: case X86Local::MRMSrcMem: @@ -767,22 +767,22 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::MRMSrcMemCC: case X86Local::MRMXmCC: case X86Local::MRMXm: - filter = llvm::make_unique(false); + filter = std::make_unique(false); break; case X86Local::MRM0r: case X86Local::MRM1r: case X86Local::MRM2r: case X86Local::MRM3r: case X86Local::MRM4r: case X86Local::MRM5r: case X86Local::MRM6r: case X86Local::MRM7r: - filter = llvm::make_unique(true, Form - X86Local::MRM0r); + filter = std::make_unique(true, Form - X86Local::MRM0r); break; case X86Local::MRM0m: case X86Local::MRM1m: case X86Local::MRM2m: case X86Local::MRM3m: case X86Local::MRM4m: case X86Local::MRM5m: case X86Local::MRM6m: case X86Local::MRM7m: - filter = llvm::make_unique(false, Form - X86Local::MRM0m); + filter = std::make_unique(false, Form - X86Local::MRM0m); break; X86_INSTR_MRM_MAPPING - filter = llvm::make_unique(0xC0 + Form - X86Local::MRM_C0); + filter = std::make_unique(0xC0 + Form - X86Local::MRM_C0); break; } // switch (Form) @@ -854,6 +854,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("GR64", TYPE_R64) TYPE("i8mem", TYPE_M) TYPE("i8imm", TYPE_IMM) + TYPE("u4imm", TYPE_UIMM8) TYPE("u8imm", TYPE_UIMM8) TYPE("i16u8imm", TYPE_UIMM8) TYPE("i32u8imm", TYPE_UIMM8) @@ -973,6 +974,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s, ENCODING("i64i32imm", ENCODING_ID) ENCODING("i64i8imm", ENCODING_IB) ENCODING("i8imm", ENCODING_IB) + ENCODING("u4imm", ENCODING_IB) ENCODING("u8imm", ENCODING_IB) ENCODING("i16u8imm", ENCODING_IB) ENCODING("i32u8imm", ENCODING_IB) diff --git a/utils/add_argument_names.py b/utils/add_argument_names.py new file mode 100755 index 000000000000..38dde2599794 --- /dev/null +++ b/utils/add_argument_names.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +import re, sys + +def fix_string(s): + TYPE = re.compile('\s*(i[0-9]+|float|double|x86_fp80|fp128|ppc_fp128|\[\[.*?\]\]|\[2 x \[\[[A-Z_0-9]+\]\]\]|<.*?>|{.*?}|\[[0-9]+ x .*?\]|%["a-z:A-Z0-9._]+({{.*?}})?|%{{.*?}}|{{.*?}}|\[\[.*?\]\])(\s*(\*|addrspace\(.*?\)|dereferenceable\(.*?\)|byval\(.*?\)|sret|zeroext|inreg|returned|signext|nocapture|align \d+|swiftself|swifterror|readonly|noalias|inalloca|nocapture))*\s*') + + counter = 0 + if 'i32{{.*}}' in s: + counter = 1 + + at_pos = s.find('@') + if at_pos == -1: + at_pos = 0 + + annoying_pos = s.find('{{[^(]+}}') + if annoying_pos != -1: + at_pos = annoying_pos + 9 + + paren_pos = s.find('(', at_pos) + if paren_pos == -1: + return s + + res = s[:paren_pos+1] + s = s[paren_pos+1:] + + m = TYPE.match(s) + while m: + res += m.group() + s = s[m.end():] + if s.startswith(',') or s.startswith(')'): + res += f' %{counter}' + counter += 1 + + next_arg = s.find(',') + if next_arg == -1: + break + + res += s[:next_arg+1] + s = s[next_arg+1:] + m = TYPE.match(s) + + return res+s + +def process_file(contents): + PREFIX = re.compile(r'check-prefix(es)?(=|\s+)([a-zA-Z0-9,]+)') + check_prefixes = ['CHECK'] + result = '' + for line in contents.split('\n'): + if 'FileCheck' in line: + m = PREFIX.search(line) + if m: + check_prefixes.extend(m.group(3).split(',')) + + found_check = False + for prefix in check_prefixes: + if prefix in line: + found_check = True + break + + if not found_check or 'define' not in line: + result += line + '\n' + continue + + # We have a check for a function definition. Number the args. + line = fix_string(line) + result += line + '\n' + return result + +def main(): + print(f'Processing {sys.argv[1]}') + f = open(sys.argv[1]) + content = f.read() + f.close() + + content = process_file(content) + + f = open(sys.argv[1], 'w') + f.write(content) + f.close() + +if __name__ == '__main__': + main() diff --git a/utils/llvm-locstats/CMakeLists.txt b/utils/llvm-locstats/CMakeLists.txt new file mode 100644 index 000000000000..a919023e141e --- /dev/null +++ b/utils/llvm-locstats/CMakeLists.txt @@ -0,0 +1,12 @@ +if (LLVM_BUILD_UTILS AND LLVM_BUILD_TOOLS) + add_custom_command( + OUTPUT ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats + DEPENDS ${LLVM_MAIN_SRC_DIR}/utils/llvm-locstats/llvm-locstats.py + COMMAND ${CMAKE_COMMAND} -E copy ${LLVM_MAIN_SRC_DIR}/utils/llvm-locstats/llvm-locstats.py ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats + COMMENT "Copying llvm-locstats into ${LLVM_TOOLS_BINARY_DIR}" + ) + add_custom_target(llvm-locstats ALL + DEPENDS ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats + ) + set_target_properties(llvm-locstats PROPERTIES FOLDER "Tools") +endif() diff --git a/utils/llvm-locstats/llvm-locstats.py b/utils/llvm-locstats/llvm-locstats.py new file mode 100755 index 000000000000..4df525ed1a96 --- /dev/null +++ b/utils/llvm-locstats/llvm-locstats.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +# This is a tool that works like debug location coverage calculator. +# It parses the llvm-dwarfdump --statistics output by reporting it +# in a more human readable way. +# + +from __future__ import print_function +import argparse +import os +import sys +from json import loads +from math import ceil +from subprocess import Popen, PIPE + +def coverage_buckets(): + yield '0%' + yield '1-9%' + for start in range(10, 91, 10): + yield '{0}-{1}%'.format(start, start + 9) + yield '100%' + +def locstats_output( + variables_total, + variables_total_locstats, + variables_with_loc, + scope_bytes_covered, + scope_bytes_from_first_def, + variables_coverage_map + ): + + pc_ranges_covered = int(ceil(scope_bytes_covered * 100.0) + / scope_bytes_from_first_def) + variables_coverage_per_map = {} + for cov_bucket in coverage_buckets(): + variables_coverage_per_map[cov_bucket] = \ + int(ceil(variables_coverage_map[cov_bucket] * 100.0) \ + / variables_total_locstats) + + print (' =================================================') + print (' Debug Location Statistics ') + print (' =================================================') + print (' cov% samples percentage(~) ') + print (' -------------------------------------------------') + for cov_bucket in coverage_buckets(): + print (' {0:6} {1:8d} {2:3d}%'. \ + format(cov_bucket, variables_coverage_map[cov_bucket], \ + variables_coverage_per_map[cov_bucket])) + print (' =================================================') + print (' -the number of debug variables processed: ' \ + + str(variables_total_locstats)) + print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%') + + # Only if we are processing all the variables output the total + # availability. + if variables_total and variables_with_loc: + total_availability = int(ceil(variables_with_loc * 100.0) \ + / variables_total) + print (' -------------------------------------------------') + print (' -total availability: ' + str(total_availability) + '%') + print (' =================================================') + +def parse_program_args(parser): + parser.add_argument('-only-variables', action='store_true', + default=False, + help='calculate the location statistics only for ' + 'local variables' + ) + parser.add_argument('-only-formal-parameters', action='store_true', + default=False, + help='calculate the location statistics only for ' + 'formal parameters' + ) + parser.add_argument('-ignore-debug-entry-values', action='store_true', + default=False, + help='ignore the location statistics on locations with ' + 'entry values' + ) + parser.add_argument('file_name', type=str, help='file to process') + return parser.parse_args() + + +def Main(): + parser = argparse.ArgumentParser() + results = parse_program_args(parser) + + if len(sys.argv) < 2: + print ('error: Too few arguments.') + parser.print_help() + sys.exit(1) + + if results.only_variables and results.only_formal_parameters: + print ('error: Please use just one only* option.') + parser.print_help() + sys.exit(1) + + # These will be different due to different options enabled. + variables_total = None + variables_total_locstats = None + variables_with_loc = None + variables_scope_bytes_covered = None + variables_scope_bytes_from_first_def = None + variables_scope_bytes_entry_values = None + variables_coverage_map = {} + binary = results.file_name + + # Get the directory of the LLVM tools. + llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \ + "llvm-dwarfdump") + # The statistics llvm-dwarfdump option. + llvm_dwarfdump_stats_opt = "--statistics" + + subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \ + stdin=PIPE, stdout=PIPE, stderr=PIPE, \ + universal_newlines = True) + cmd_stdout, cmd_stderr = subproc.communicate() + + # Get the JSON and parse it. + json_parsed = None + + try: + json_parsed = loads(cmd_stdout) + except: + print ('error: No valid llvm-dwarfdump statistics found.') + sys.exit(1) + + if results.only_variables: + # Read the JSON only for local variables. + variables_total_locstats = \ + json_parsed['total vars procesed by location statistics'] + variables_scope_bytes_covered = \ + json_parsed['vars scope bytes covered'] + variables_scope_bytes_from_first_def = \ + json_parsed['vars scope bytes total'] + if not results.ignore_debug_entry_values: + for cov_bucket in coverage_buckets(): + cov_category = "vars with {} of its scope covered".format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + else: + variables_scope_bytes_entry_values = \ + json_parsed['vars entry value scope bytes covered'] + variables_scope_bytes_covered = variables_scope_bytes_covered \ + - variables_scope_bytes_entry_values + for cov_bucket in coverage_buckets(): + cov_category = \ + "vars (excluding the debug entry values) " \ + "with {} of its scope covered".format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + elif results.only_formal_parameters: + # Read the JSON only for formal parameters. + variables_total_locstats = \ + json_parsed['total params procesed by location statistics'] + variables_scope_bytes_covered = \ + json_parsed['formal params scope bytes covered'] + variables_scope_bytes_from_first_def = \ + json_parsed['formal params scope bytes total'] + if not results.ignore_debug_entry_values: + for cov_bucket in coverage_buckets(): + cov_category = "params with {} of its scope covered".format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + else: + variables_scope_bytes_entry_values = \ + json_parsed['formal params entry value scope bytes covered'] + variables_scope_bytes_covered = variables_scope_bytes_covered \ + - variables_scope_bytes_entry_values + for cov_bucket in coverage_buckets(): + cov_category = \ + "params (excluding the debug entry values) " \ + "with {} of its scope covered".format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + else: + # Read the JSON for both local variables and formal parameters. + variables_total = \ + json_parsed['source variables'] + variables_with_loc = json_parsed['variables with location'] + variables_total_locstats = \ + json_parsed['total variables procesed by location statistics'] + variables_scope_bytes_covered = \ + json_parsed['scope bytes covered'] + variables_scope_bytes_from_first_def = \ + json_parsed['scope bytes total'] + if not results.ignore_debug_entry_values: + for cov_bucket in coverage_buckets(): + cov_category = "variables with {} of its scope covered". \ + format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + else: + variables_scope_bytes_entry_values = \ + json_parsed['entry value scope bytes covered'] + variables_scope_bytes_covered = variables_scope_bytes_covered \ + - variables_scope_bytes_entry_values + for cov_bucket in coverage_buckets(): + cov_category = "variables (excluding the debug entry values) " \ + "with {} of its scope covered". format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + + # Pretty print collected info. + locstats_output( + variables_total, + variables_total_locstats, + variables_with_loc, + variables_scope_bytes_covered, + variables_scope_bytes_from_first_def, + variables_coverage_map + ) + +if __name__ == '__main__': + Main() + sys.exit(0) -- cgit v1.2.3